Scripts: Fix extract_classes.py to better handle XML indentation (#26)

No need to special case codeblocks, we can just rely on textwrap.dedent and keep
the original element text verbatim.
This commit is contained in:
Rémi Verschelde
2025-06-06 14:17:34 +02:00
committed by GitHub
parent ad605a0900
commit b0cc5442f2

View File

@@ -3,6 +3,7 @@
import argparse
import os
import shutil
import textwrap
from collections import OrderedDict
EXTRACT_ATTRIBS = ["deprecated", "experimental"]
@@ -180,69 +181,6 @@ def _collect_classes_file(path, classes):
print_error("Unknown XML file {}, skipping".format(path))
## regions are list of tuples with size 3 (start_index, end_index, indent)
## indication in string where the codeblock starts, ends, and it's indent
## if i inside the region returns the indent, else returns -1
def _get_xml_indent(i, regions):
for region in regions:
if region[0] < i < region[1]:
return region[2]
return -1
## find and build all regions of codeblock which we need later
def _make_codeblock_regions(desc, path=""):
code_block_end = False
code_block_index = 0
code_block_regions = []
while not code_block_end:
code_block_index = desc.find("[codeblock]", code_block_index)
if code_block_index < 0:
break
xml_indent = 0
while True:
## [codeblock] always have a trailing new line and some tabs
## those tabs are belongs to xml indentations not code indent
if desc[code_block_index + len("[codeblock]\n") + xml_indent] == "\t":
xml_indent += 1
else:
break
end_index = desc.find("[/codeblock]", code_block_index)
if end_index < 0:
print_error("Non terminating codeblock: {}".format(path))
exit(1)
code_block_regions.append((code_block_index, end_index, xml_indent))
code_block_index += 1
return code_block_regions
def _strip_and_split_desc(desc, code_block_regions):
desc_strip = "" ## a stripped desc msg
total_indent = 0 ## code indent = total indent - xml indent
for i in range(len(desc)):
c = desc[i]
if c == "\n":
c = "\\n"
if c == '"':
c = '\\"'
if c == "\\":
c = "\\\\" ## <element \> is invalid for msgmerge
if c == "\t":
xml_indent = _get_xml_indent(i, code_block_regions)
if xml_indent >= 0:
total_indent += 1
if xml_indent < total_indent:
c = "\\t"
else:
continue
else:
continue
desc_strip += c
if c == "\\n":
total_indent = 0
return desc_strip
def _c_escape(string):
result = ""
for i in range(len(string)):
@@ -289,9 +227,9 @@ def _make_translation_catalog(classes):
continue
line_no = elem._start_line_number if elem_text[0] != "\n" else elem._start_line_number + 1
desc_str = elem_text.strip()
code_block_regions = _make_codeblock_regions(desc_str, desc_list.path)
desc_msg = _strip_and_split_desc(desc_str, code_block_regions)
# The magic happens here, we remove XML indentation (keeping potential indentation in code blocks),
# and escape special characters. The actual clean POT format with wrapping is handled later with msgmerge.
desc_msg = _c_escape(textwrap.dedent(elem_text).strip())
desc_obj = Desc(line_no, desc_msg, desc_list)
desc_list.list.append(desc_obj)
@@ -329,11 +267,9 @@ def _generate_translation_catalog_file(unique_msgs, output, location_line=False)
f.write('msgid "{}"\n'.format(msg))
f.write('msgstr ""\n\n')
## TODO: what if 'nt'?
if os.name == "posix":
print("Wrapping template at 80 characters for compatibility with Weblate.")
os.system("msgmerge -w80 {0} {0} > {0}.wrap".format(output))
shutil.move("{}.wrap".format(output), output)
print("Wrapping template at 80 characters for compatibility with Weblate.")
os.system("msgmerge -w80 {0} {0} > {0}.wrap".format(output))
shutil.move("{}.wrap".format(output), output)
def main():