Skip to content

Commit ac625cb

Browse files
committed
feat: Remove XML-incompatible control characters
1 parent b89faf5 commit ac625cb

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

hmdriver2/_xpath.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,26 @@ def __call__(self, xpath: str) -> '_XMLElement':
3434

3535
return _XMLElement(None, self._d)
3636

37+
@staticmethod
38+
def _sanitize_text(text: str) -> str:
39+
"""Remove XML-incompatible control characters."""
40+
return re.sub(r'[\x00-\x1F\x7F]', '', text)
41+
3742
@staticmethod
3843
def _json2xml(hierarchy: Dict) -> etree.Element:
44+
"""Convert JSON-like hierarchy to XML."""
3945
attributes = hierarchy.get("attributes", {})
40-
tag = attributes.get("type", "orgRoot") or "orgRoot"
41-
# Clean the "text" attribute to be compatible with XML format
42-
attributes["text"] = re.sub(u"[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]+", "", attributes.get("text", ""))
43-
xml = etree.Element(tag, attrib=attributes)
46+
47+
# 过滤所有属性的值,确保无非法字符
48+
cleaned_attributes = {k: _XPath._sanitize_text(str(v)) for k, v in attributes.items()}
49+
50+
tag = cleaned_attributes.get("type", "orgRoot") or "orgRoot"
51+
xml = etree.Element(tag, attrib=cleaned_attributes)
4452

4553
children = hierarchy.get("children", [])
4654
for item in children:
4755
xml.append(_XPath._json2xml(item))
56+
4857
return xml
4958

5059

0 commit comments

Comments
 (0)