Scripts: Fix extract_classes.py to better handle XML indentation (#26)

No need to special case codeblocks, we can just rely on textwrap.dedent and keep the original element text verbatim.
2025-12-31 17:48:32 +03:00 · 2025-06-06 14:17:34 +02:00
parent ad605a0900
commit b0cc5442f2
1 changed files with 7 additions and 71 deletions
--- a/scripts/extract_classes.py
+++ b/scripts/extract_classes.py
@@ -3,6 +3,7 @@
 import argparse
 import os
 import shutil
+import textwrap
 from collections import OrderedDict

 EXTRACT_ATTRIBS = ["deprecated", "experimental"]
@@ -180,69 +181,6 @@ def _collect_classes_file(path, classes):
        print_error("Unknown XML file {}, skipping".format(path))


-## regions are list of tuples with size 3 (start_index, end_index, indent)
-## indication in string where the codeblock starts, ends, and it's indent
-## if i inside the region returns the indent, else returns -1
-def _get_xml_indent(i, regions):
-    for region in regions:
-        if region[0] < i < region[1]:
-            return region[2]
-    return -1
-
-
-## find and build all regions of codeblock which we need later
-def _make_codeblock_regions(desc, path=""):
-    code_block_end = False
-    code_block_index = 0
-    code_block_regions = []
-    while not code_block_end:
-        code_block_index = desc.find("[codeblock]", code_block_index)
-        if code_block_index < 0:
-            break
-        xml_indent = 0
-        while True:
-            ## [codeblock] always have a trailing new line and some tabs
-            ## those tabs are belongs to xml indentations not code indent
-            if desc[code_block_index + len("[codeblock]\n") + xml_indent] == "\t":
-                xml_indent += 1
-            else:
-                break
-        end_index = desc.find("[/codeblock]", code_block_index)
-        if end_index < 0:
-            print_error("Non terminating codeblock: {}".format(path))
-            exit(1)
-        code_block_regions.append((code_block_index, end_index, xml_indent))
-        code_block_index += 1
-    return code_block_regions
-
-
-def _strip_and_split_desc(desc, code_block_regions):
-    desc_strip = ""  ## a stripped desc msg
-    total_indent = 0  ## code indent = total indent - xml indent
-    for i in range(len(desc)):
-        c = desc[i]
-        if c == "\n":
-            c = "\\n"
-        if c == '"':
-            c = '\\"'
-        if c == "\\":
-            c = "\\\\"  ## <element \> is invalid for msgmerge
-        if c == "\t":
-            xml_indent = _get_xml_indent(i, code_block_regions)
-            if xml_indent >= 0:
-                total_indent += 1
-                if xml_indent < total_indent:
-                    c = "\\t"
-                else:
-                    continue
-            else:
-                continue
-        desc_strip += c
-        if c == "\\n":
-            total_indent = 0
-    return desc_strip
-
-
 def _c_escape(string):
    result = ""
    for i in range(len(string)):
@@ -289,9 +227,9 @@ def _make_translation_catalog(classes):
                    continue

                line_no = elem._start_line_number if elem_text[0] != "\n" else elem._start_line_number + 1
-                desc_str = elem_text.strip()
-                code_block_regions = _make_codeblock_regions(desc_str, desc_list.path)
-                desc_msg = _strip_and_split_desc(desc_str, code_block_regions)
+                # The magic happens here, we remove XML indentation (keeping potential indentation in code blocks),
+                # and escape special characters. The actual clean POT format with wrapping is handled later with msgmerge.
+                desc_msg = _c_escape(textwrap.dedent(elem_text).strip())
                desc_obj = Desc(line_no, desc_msg, desc_list)
                desc_list.list.append(desc_obj)

@@ -329,11 +267,9 @@ def _generate_translation_catalog_file(unique_msgs, output, location_line=False)
            f.write('msgid "{}"\n'.format(msg))
            f.write('msgstr ""\n\n')

-    ## TODO: what if 'nt'?
-    if os.name == "posix":
-        print("Wrapping template at 80 characters for compatibility with Weblate.")
-        os.system("msgmerge -w80 {0} {0} > {0}.wrap".format(output))
-        shutil.move("{}.wrap".format(output), output)
+    print("Wrapping template at 80 characters for compatibility with Weblate.")
+    os.system("msgmerge -w80 {0} {0} > {0}.wrap".format(output))
+    shutil.move("{}.wrap".format(output), output)


 def main():