From b0cc5442f2ef1e3edbdcf4bb94f8fdcd16852e49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= <rverschelde@gmail.com>
Date: Fri, 6 Jun 2025 14:17:34 +0200
Subject: [PATCH] Scripts: Fix extract_classes.py to better handle XML
 indentation (#26)

No need to special case codeblocks, we can just rely on textwrap.dedent and keep
the original element text verbatim.
---
 scripts/extract_classes.py | 78 ++++----------------------------------
 1 file changed, 7 insertions(+), 71 deletions(-)
diff --git a/scripts/extract_classes.py b/scripts/extract_classes.py
index 1decbc0..3f4ada0 100644
--- a/scripts/extract_classes.py
+++ b/scripts/extract_classes.py
@@ -3,6 +3,7 @@
 import argparse
 import os
 import shutil
+import textwrap
 from collections import OrderedDict
 
 EXTRACT_ATTRIBS = ["deprecated", "experimental"]
@@ -180,69 +181,6 @@ def _collect_classes_file(path, classes):
         print_error("Unknown XML file {}, skipping".format(path))
 
 
-## regions are list of tuples with size 3 (start_index, end_index, indent)
-## indication in string where the codeblock starts, ends, and it's indent
-## if i inside the region returns the indent, else returns -1
-def _get_xml_indent(i, regions):
-    for region in regions:
-        if region[0] < i < region[1]:
-            return region[2]
-    return -1
-
-
-## find and build all regions of codeblock which we need later
-def _make_codeblock_regions(desc, path=""):
-    code_block_end = False
-    code_block_index = 0
-    code_block_regions = []
-    while not code_block_end:
-        code_block_index = desc.find("[codeblock]", code_block_index)
-        if code_block_index < 0:
-            break
-        xml_indent = 0
-        while True:
-            ## [codeblock] always have a trailing new line and some tabs
-            ## those tabs are belongs to xml indentations not code indent
-            if desc[code_block_index + len("[codeblock]\n") + xml_indent] == "\t":
-                xml_indent += 1
-            else:
-                break
-        end_index = desc.find("[/codeblock]", code_block_index)
-        if end_index < 0:
-            print_error("Non terminating codeblock: {}".format(path))
-            exit(1)
-        code_block_regions.append((code_block_index, end_index, xml_indent))
-        code_block_index += 1
-    return code_block_regions
-
-
-def _strip_and_split_desc(desc, code_block_regions):
-    desc_strip = ""  ## a stripped desc msg
-    total_indent = 0  ## code indent = total indent - xml indent
-    for i in range(len(desc)):
-        c = desc[i]
-        if c == "\n":
-            c = "\\n"
-        if c == '"':
-            c = '\\"'
-        if c == "\\":
-            c = "\\\\"  ## <element \> is invalid for msgmerge
-        if c == "\t":
-            xml_indent = _get_xml_indent(i, code_block_regions)
-            if xml_indent >= 0:
-                total_indent += 1
-                if xml_indent < total_indent:
-                    c = "\\t"
-                else:
-                    continue
-            else:
-                continue
-        desc_strip += c
-        if c == "\\n":
-            total_indent = 0
-    return desc_strip
-
-
 def _c_escape(string):
     result = ""
     for i in range(len(string)):
@@ -289,9 +227,9 @@ def _make_translation_catalog(classes):
                     continue
 
                 line_no = elem._start_line_number if elem_text[0] != "\n" else elem._start_line_number + 1
-                desc_str = elem_text.strip()
-                code_block_regions = _make_codeblock_regions(desc_str, desc_list.path)
-                desc_msg = _strip_and_split_desc(desc_str, code_block_regions)
+                # The magic happens here, we remove XML indentation (keeping potential indentation in code blocks),
+                # and escape special characters. The actual clean POT format with wrapping is handled later with msgmerge.
+                desc_msg = _c_escape(textwrap.dedent(elem_text).strip())
                 desc_obj = Desc(line_no, desc_msg, desc_list)
                 desc_list.list.append(desc_obj)
 
@@ -329,11 +267,9 @@ def _generate_translation_catalog_file(unique_msgs, output, location_line=False)
             f.write('msgid "{}"\n'.format(msg))
             f.write('msgstr ""\n\n')
 
-    ## TODO: what if 'nt'?
-    if os.name == "posix":
-        print("Wrapping template at 80 characters for compatibility with Weblate.")
-        os.system("msgmerge -w80 {0} {0} > {0}.wrap".format(output))
-        shutil.move("{}.wrap".format(output), output)
+    print("Wrapping template at 80 characters for compatibility with Weblate.")
+    os.system("msgmerge -w80 {0} {0} > {0}.wrap".format(output))
+    shutil.move("{}.wrap".format(output), output)
 
 
 def main():