Skip to content

Commit eb45c8d

Browse files
authored
fix: Extra content at the end of the document (#161)
## Why? XML with additional content at the end of the document is invalid. https://www.w3.org/TR/2006/REC-xml11-20060816/#document ``` [1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* ) ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Misc ``` [27] Misc ::= Comment | PI | S ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PI ``` [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PITarget ``` [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) ```
1 parent face9dd commit eb45c8d

File tree

6 files changed

+99
-7
lines changed

6 files changed

+99
-7
lines changed

lib/rexml/parsers/baseparser.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,15 +460,24 @@ def pull_event
460460
@closed = tag
461461
@nsstack.shift
462462
else
463+
if @tags.empty? and @have_root
464+
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
465+
end
463466
@tags.push( tag )
464467
end
468+
@have_root = true
465469
return [ :start_element, tag, attributes ]
466470
end
467471
else
468472
text = @source.read_until("<")
469473
if text.chomp!("<")
470474
@source.position -= "<".bytesize
471475
end
476+
if @tags.empty? and @have_root
477+
unless /\A\s*\z/.match?(text)
478+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
479+
end
480+
end
472481
return [ :text, text ]
473482
end
474483
rescue REXML::UndefinedNamespaceException

test/parse/test_comment.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,5 +105,17 @@ def test_after_doctype_malformed_comment_end
105105
DETAIL
106106
end
107107
end
108+
109+
def test_after_root
110+
parser = REXML::Parsers::BaseParser.new('<a></a><!-- ok comment -->')
111+
112+
events = {}
113+
while parser.has_next?
114+
event = parser.pull
115+
events[event[0]] = event[1]
116+
end
117+
118+
assert_equal(" ok comment ", events[:comment])
119+
end
108120
end
109121
end

test/parse/test_element.rb

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,40 @@ def test_garbage_less_than_slash_before_end_tag_at_line_start
8585
</ </x>
8686
DETAIL
8787
end
88+
89+
def test_after_root
90+
exception = assert_raise(REXML::ParseException) do
91+
parser = REXML::Parsers::BaseParser.new('<a></a><b>')
92+
while parser.has_next?
93+
parser.pull
94+
end
95+
end
96+
97+
assert_equal(<<~DETAIL.chomp, exception.to_s)
98+
Malformed XML: Extra tag at the end of the document (got '<b')
99+
Line: 1
100+
Position: 10
101+
Last 80 unconsumed characters:
102+
103+
DETAIL
104+
end
105+
106+
def test_after_empty_element_tag_root
107+
exception = assert_raise(REXML::ParseException) do
108+
parser = REXML::Parsers::BaseParser.new('<a/><b>')
109+
while parser.has_next?
110+
parser.pull
111+
end
112+
end
113+
114+
assert_equal(<<~DETAIL.chomp, exception.to_s)
115+
Malformed XML: Extra tag at the end of the document (got '<b')
116+
Line: 1
117+
Position: 7
118+
Last 80 unconsumed characters:
119+
120+
DETAIL
121+
end
88122
end
89123
end
90124
end

test/parse/test_processing_instruction.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,17 @@ def test_garbage_text
4040
])
4141
end
4242
end
43+
44+
def test_after_root
45+
parser = REXML::Parsers::BaseParser.new('<a></a><?abc version="1.0" ?>')
46+
47+
events = {}
48+
while parser.has_next?
49+
event = parser.pull
50+
events[event[0]] = event[1]
51+
end
52+
53+
assert_equal("abc", events[:processing_instruction])
54+
end
4355
end
4456
end

test/parse/test_text.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
require "test/unit"
2+
require 'rexml/parsers/baseparser'
3+
4+
module REXMLTests
5+
class TestParseText < Test::Unit::TestCase
6+
class TestInvalid < self
7+
def test_after_root
8+
exception = assert_raise(REXML::ParseException) do
9+
parser = REXML::Parsers::BaseParser.new('<a></a>c')
10+
while parser.has_next?
11+
parser.pull
12+
end
13+
end
14+
15+
assert_equal(<<~DETAIL.chomp, exception.to_s)
16+
Malformed XML: Extra content at the end of the document (got 'c')
17+
Line: 1
18+
Position: 8
19+
Last 80 unconsumed characters:
20+
21+
DETAIL
22+
end
23+
end
24+
end
25+
end

test/test_pullparser.rb

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,23 +63,23 @@ def test_entity_replacement
6363
end
6464

6565
def test_character_references
66-
source = '<a>&#65;</a><b>&#x42;</b>'
66+
source = '<root><a>&#65;</a><b>&#x42;</b></root>'
6767
parser = REXML::Parsers::PullParser.new( source )
68+
69+
events = {}
6870
element_name = ''
6971
while parser.has_next?
7072
event = parser.pull
7173
case event.event_type
7274
when :start_element
7375
element_name = event[0]
7476
when :text
75-
case element_name
76-
when 'a'
77-
assert_equal('A', event[1])
78-
when 'b'
79-
assert_equal('B', event[1])
80-
end
77+
events[element_name] = event[1]
8178
end
8279
end
80+
81+
assert_equal('A', events['a'])
82+
assert_equal("B", events['b'])
8383
end
8484

8585
def test_text_content_with_line_breaks

0 commit comments

Comments
 (0)