Skip to content

Commit d892000

Browse files
author
Steve Canny
committed
oxml: extract XmlString to oxml.xmlchemy
serialize_for_reading() had to go at the same time to avoid a circular import dependency.
1 parent 75c0302 commit d892000

File tree

4 files changed

+178
-149
lines changed

4 files changed

+178
-149
lines changed

docx/oxml/shared.py

Lines changed: 1 addition & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -8,96 +8,12 @@
88

99
from lxml import etree
1010

11-
import re
12-
1311
from . import OxmlElement
1412
from .exceptions import ValidationError
1513
from .ns import qn
14+
from .xmlchemy import serialize_for_reading
1615

1716

18-
# ===========================================================================
19-
# utility functions
20-
# ===========================================================================
21-
22-
23-
def serialize_for_reading(element):
24-
"""
25-
Serialize *element* to human-readable XML suitable for tests. No XML
26-
declaration.
27-
"""
28-
xml = etree.tostring(element, encoding='unicode', pretty_print=True)
29-
return XmlString(xml)
30-
31-
32-
class XmlString(str):
33-
"""
34-
Provides string comparison override suitable for serialized XML that is
35-
useful for tests.
36-
"""
37-
38-
# ' <w:xyz xmlns:a="http://ns/decl/a" attr_name="val">text</w:xyz>'
39-
# | | || |
40-
# +----------+------------------------------------------++-----------+
41-
# front attrs | text
42-
# close
43-
44-
_xml_elm_line_patt = re.compile(
45-
'( *</?[\w:]+)(.*?)(/?>)([^<]*</\w+>)?'
46-
)
47-
48-
def __eq__(self, other):
49-
lines = self.splitlines()
50-
lines_other = other.splitlines()
51-
if len(lines) != len(lines_other):
52-
return False
53-
for line, line_other in zip(lines, lines_other):
54-
if not self._eq_elm_strs(line, line_other):
55-
return False
56-
return True
57-
58-
def __ne__(self, other):
59-
return not self.__eq__(other)
60-
61-
def _attr_seq(self, attrs):
62-
"""
63-
Return a sequence of attribute strings parsed from *attrs*. Each
64-
attribute string is stripped of whitespace on both ends.
65-
"""
66-
attrs = attrs.strip()
67-
attr_lst = attrs.split()
68-
return sorted(attr_lst)
69-
70-
def _eq_elm_strs(self, line, line_2):
71-
"""
72-
Return True if the element in *line_2* is XML equivalent to the
73-
element in *line*.
74-
"""
75-
front, attrs, close, text = self._parse_line(line)
76-
front_2, attrs_2, close_2, text_2 = self._parse_line(line_2)
77-
if front != front_2:
78-
return False
79-
if self._attr_seq(attrs) != self._attr_seq(attrs_2):
80-
return False
81-
if close != close_2:
82-
return False
83-
if text != text_2:
84-
return False
85-
return True
86-
87-
def _parse_line(self, line):
88-
"""
89-
Return front, attrs, close, text 4-tuple result of parsing XML element
90-
string *line*.
91-
"""
92-
match = self._xml_elm_line_patt.match(line)
93-
front, attrs, close, text = [match.group(n) for n in range(1, 5)]
94-
return front, attrs, close, text
95-
96-
97-
# ===========================================================================
98-
# shared custom element classes
99-
# ===========================================================================
100-
10117
class OxmlBaseElement(etree.ElementBase):
10218
"""
10319
Base class for all custom element classes, to add standardized behavior

docx/oxml/xmlchemy.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# encoding: utf-8
2+
3+
"""
4+
Provides a wrapper around lxml that enables declarative definition of custom
5+
element classes.
6+
"""
7+
8+
from __future__ import absolute_import
9+
10+
from lxml import etree
11+
12+
import re
13+
14+
15+
def serialize_for_reading(element):
16+
"""
17+
Serialize *element* to human-readable XML suitable for tests. No XML
18+
declaration.
19+
"""
20+
xml = etree.tostring(element, encoding='unicode', pretty_print=True)
21+
return XmlString(xml)
22+
23+
24+
class XmlString(str):
25+
"""
26+
Provides string comparison override suitable for serialized XML that is
27+
useful for tests.
28+
"""
29+
30+
# ' <w:xyz xmlns:a="http://ns/decl/a" attr_name="val">text</w:xyz>'
31+
# | | || |
32+
# +----------+------------------------------------------++-----------+
33+
# front attrs | text
34+
# close
35+
36+
_xml_elm_line_patt = re.compile(
37+
'( *</?[\w:]+)(.*?)(/?>)([^<]*</[\w:]+>)?$'
38+
)
39+
40+
def __eq__(self, other):
41+
lines = self.splitlines()
42+
lines_other = other.splitlines()
43+
if len(lines) != len(lines_other):
44+
return False
45+
for line, line_other in zip(lines, lines_other):
46+
if not self._eq_elm_strs(line, line_other):
47+
return False
48+
return True
49+
50+
def __ne__(self, other):
51+
return not self.__eq__(other)
52+
53+
def _attr_seq(self, attrs):
54+
"""
55+
Return a sequence of attribute strings parsed from *attrs*. Each
56+
attribute string is stripped of whitespace on both ends.
57+
"""
58+
attrs = attrs.strip()
59+
attr_lst = attrs.split()
60+
return sorted(attr_lst)
61+
62+
def _eq_elm_strs(self, line, line_2):
63+
"""
64+
Return True if the element in *line_2* is XML equivalent to the
65+
element in *line*.
66+
"""
67+
front, attrs, close, text = self._parse_line(line)
68+
front_2, attrs_2, close_2, text_2 = self._parse_line(line_2)
69+
if front != front_2:
70+
return False
71+
if self._attr_seq(attrs) != self._attr_seq(attrs_2):
72+
return False
73+
if close != close_2:
74+
return False
75+
if text != text_2:
76+
return False
77+
return True
78+
79+
@classmethod
80+
def _parse_line(cls, line):
81+
"""
82+
Return front, attrs, close, text 4-tuple result of parsing XML element
83+
string *line*.
84+
"""
85+
match = cls._xml_elm_line_patt.match(line)
86+
front, attrs, close, text = [match.group(n) for n in range(1, 5)]
87+
return front, attrs, close, text

tests/oxml/test_shared.py

Lines changed: 0 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +0,0 @@
1-
# encoding: utf-8
2-
3-
"""
4-
Test suite for docx.oxml.shared
5-
"""
6-
7-
from __future__ import (
8-
absolute_import, division, print_function, unicode_literals
9-
)
10-
11-
import pytest
12-
13-
from docx.oxml.shared import XmlString
14-
15-
16-
class DescribeXmlString(object):
17-
18-
def it_knows_if_two_xml_lines_are_equivalent(self, xml_line_case):
19-
line, other, differs = xml_line_case
20-
xml = XmlString(line)
21-
assert xml == other
22-
assert xml != differs
23-
24-
# fixtures ---------------------------------------------
25-
26-
@pytest.fixture(params=[
27-
'simple_elm', 'nsp_tagname', 'indent', 'attrs', 'nsdecl_order',
28-
'closing_elm',
29-
])
30-
def xml_line_case(self, request):
31-
cases = {
32-
'simple_elm': (
33-
'<name/>',
34-
'<name/>',
35-
'<name>',
36-
),
37-
'nsp_tagname': (
38-
'<xyz:name/>',
39-
'<xyz:name/>',
40-
'<abc:name/>',
41-
),
42-
'indent': (
43-
' <xyz:name/>',
44-
' <xyz:name/>',
45-
'<xyz:name/>',
46-
),
47-
'attrs': (
48-
' <abc:Name foo="bar" bar="foo">',
49-
' <abc:Name bar="foo" foo="bar">',
50-
' <abc:Name far="boo" foo="bar">',
51-
),
52-
'nsdecl_order': (
53-
' <name xmlns:a="http://ns/1" xmlns:b="http://ns/2"/>',
54-
' <name xmlns:b="http://ns/2" xmlns:a="http://ns/1"/>',
55-
' <name xmlns:b="http://ns/2" xmlns:a="http://ns/1">',
56-
),
57-
'closing_elm': (
58-
'</xyz:name>',
59-
'</xyz:name>',
60-
'<xyz:name>',
61-
),
62-
}
63-
line, other, differs = cases[request.param]
64-
return line, other, differs

tests/oxml/test_xmlchemy.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# encoding: utf-8
2+
3+
"""
4+
Test suite for docx.oxml.xmlchemy
5+
"""
6+
7+
from __future__ import absolute_import, print_function, unicode_literals
8+
9+
import pytest
10+
11+
from docx.oxml.xmlchemy import XmlString
12+
13+
14+
class DescribeXmlString(object):
15+
16+
def it_parses_a_line_to_help_compare(self, parse_fixture):
17+
"""
18+
This internal function is important to test separately because if it
19+
doesn't parse a line properly, false equality can result.
20+
"""
21+
line, expected_front, expected_attrs = parse_fixture[:3]
22+
expected_close, expected_text = parse_fixture[3:]
23+
front, attrs, close, text = XmlString._parse_line(line)
24+
# print("'%s' '%s' '%s' %s" % (
25+
# front, attrs, close, ('%s' % text) if text else text))
26+
assert front == expected_front
27+
assert attrs == expected_attrs
28+
assert close == expected_close
29+
assert text == expected_text
30+
31+
def it_knows_if_two_xml_lines_are_equivalent(self, xml_line_case):
32+
line, other, differs = xml_line_case
33+
xml = XmlString(line)
34+
assert xml == other
35+
assert xml != differs
36+
37+
# fixtures ---------------------------------------------
38+
39+
@pytest.fixture(params=[
40+
('<a>text</a>', '<a', '', '>', 'text</a>'),
41+
('<a:f/>', '<a:f', '', '/>', None),
42+
('<a:f b="c"/>', '<a:f', ' b="c"', '/>', None),
43+
('<a:f>t</a:f>', '<a:f', '', '>', 't</a:f>'),
44+
('<dcterms:created xsi:type="dcterms:W3CDTF">2013-12-23T23:15:00Z</d'
45+
'cterms:created>', '<dcterms:created', ' xsi:type="dcterms:W3CDTF"',
46+
'>', '2013-12-23T23:15:00Z</dcterms:created>'),
47+
])
48+
def parse_fixture(self, request):
49+
line, front, attrs, close, text = request.param
50+
return line, front, attrs, close, text
51+
52+
@pytest.fixture(params=[
53+
'simple_elm', 'nsp_tagname', 'indent', 'attrs', 'nsdecl_order',
54+
'closing_elm',
55+
])
56+
def xml_line_case(self, request):
57+
cases = {
58+
'simple_elm': (
59+
'<name/>',
60+
'<name/>',
61+
'<name>',
62+
),
63+
'nsp_tagname': (
64+
'<xyz:name/>',
65+
'<xyz:name/>',
66+
'<abc:name/>',
67+
),
68+
'indent': (
69+
' <xyz:name/>',
70+
' <xyz:name/>',
71+
'<xyz:name/>',
72+
),
73+
'attrs': (
74+
' <abc:Name foo="bar" bar="foo">',
75+
' <abc:Name bar="foo" foo="bar">',
76+
' <abc:Name far="boo" foo="bar">',
77+
),
78+
'nsdecl_order': (
79+
' <name xmlns:a="http://ns/1" xmlns:b="http://ns/2"/>',
80+
' <name xmlns:b="http://ns/2" xmlns:a="http://ns/1"/>',
81+
' <name xmlns:b="http://ns/2" xmlns:a="http://ns/1">',
82+
),
83+
'closing_elm': (
84+
'</xyz:name>',
85+
'</xyz:name>',
86+
'<xyz:name>',
87+
),
88+
}
89+
line, other, differs = cases[request.param]
90+
return line, other, differs

0 commit comments

Comments
 (0)