Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Doc/library/xml.etree.elementtree.rst
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,10 @@ Functions
.. versionchanged:: 3.13
Added the :meth:`!close` method.

.. versionchanged:: next
A :exc:`ResourceWarning` is now emitted if the iterator opened a file
and is not explicitly closed.


.. function:: parse(source, parser=None)

Expand Down
6 changes: 6 additions & 0 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1150,3 +1150,9 @@ that may require changes to your code.

* :meth:`~mmap.mmap.resize` has been removed on platforms that don't support the
underlying syscall, instead of raising a :exc:`SystemError`.

* Resource warning is now emitted for unclosed
:func:`xml.etree.ElementTree.iterparse` iterator if it opened a file.
Use its :meth:`!close` method or the :func:`contextlib.closing` context
manager to close it.
(Contributed by Osama Abdelkader and Serhiy Storchaka in :gh:`140601`.)
47 changes: 47 additions & 0 deletions Lib/test/test_xml_etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -1436,18 +1436,40 @@ def test_nonexistent_file(self):

def test_resource_warnings_not_exhausted(self):
# Not exhausting the iterator still closes the underlying file (bpo-43292)
# Not closing before del should emit ResourceWarning
it = ET.iterparse(SIMPLE_XMLFILE)
with warnings_helper.check_no_resource_warning(self):
it.close()
del it
gc_collect()

it = ET.iterparse(SIMPLE_XMLFILE)
with self.assertWarns(ResourceWarning) as wm:
del it
gc_collect()
# Not 'unclosed file'.
self.assertIn('unclosed iterparse iterator', str(wm.warning))
self.assertIn(repr(SIMPLE_XMLFILE), str(wm.warning))
self.assertEqual(wm.filename, __file__)

it = ET.iterparse(SIMPLE_XMLFILE)
with warnings_helper.check_no_resource_warning(self):
action, elem = next(it)
it.close()
self.assertEqual((action, elem.tag), ('end', 'element'))
del it, elem
gc_collect()

it = ET.iterparse(SIMPLE_XMLFILE)
with self.assertWarns(ResourceWarning) as wm:
action, elem = next(it)
self.assertEqual((action, elem.tag), ('end', 'element'))
del it, elem
gc_collect()
self.assertIn('unclosed iterparse iterator', str(wm.warning))
self.assertIn(repr(SIMPLE_XMLFILE), str(wm.warning))
self.assertEqual(wm.filename, __file__)

def test_resource_warnings_failed_iteration(self):
self.addCleanup(os_helper.unlink, TESTFN)
with open(TESTFN, "wb") as f:
Expand All @@ -1461,15 +1483,40 @@ def test_resource_warnings_failed_iteration(self):
next(it)
self.assertEqual(str(cm.exception),
'junk after document element: line 1, column 12')
it.close()
del cm, it
gc_collect()

it = ET.iterparse(TESTFN)
action, elem = next(it)
self.assertEqual((action, elem.tag), ('end', 'document'))
with self.assertWarns(ResourceWarning) as wm:
with self.assertRaises(ET.ParseError) as cm:
next(it)
self.assertEqual(str(cm.exception),
'junk after document element: line 1, column 12')
del cm, it
gc_collect()
self.assertIn('unclosed iterparse iterator', str(wm.warning))
self.assertIn(repr(TESTFN), str(wm.warning))
self.assertEqual(wm.filename, __file__)

def test_resource_warnings_exhausted(self):
it = ET.iterparse(SIMPLE_XMLFILE)
with warnings_helper.check_no_resource_warning(self):
list(it)
it.close()
del it
gc_collect()

it = ET.iterparse(SIMPLE_XMLFILE)
with self.assertWarns(ResourceWarning) as wm:
list(it)
del it
gc_collect()
self.assertIn('unclosed iterparse iterator', str(wm.warning))
self.assertIn(repr(SIMPLE_XMLFILE), str(wm.warning))
self.assertEqual(wm.filename, __file__)

def test_close_not_exhausted(self):
iterparse = ET.iterparse
Expand Down
12 changes: 8 additions & 4 deletions Lib/xml/etree/ElementTree.py
Original file line number Diff line number Diff line change
Expand Up @@ -1261,16 +1261,20 @@ def iterator(source):
gen = iterator(source)
class IterParseIterator(collections.abc.Iterator):
__next__ = gen.__next__

def close(self):
nonlocal close_source
if close_source:
source.close()
close_source = False
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't this option be better here?

try:
  if close_source:
    close_source = False
    source.close()
finally:
  gen.close()

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder. It seems that only exception can be raised here is a KeyboardInterrupt. The difference between calling and not calling gen.close() is whether you can continue iterating after iterparse().close() (cached elements can still be yielded). But if iterparse().close() failed, you do not have any guarantees. Anyway, the file is closed first, so there will not be leaks. I think there will be no practical difference, so we can keep the simplest code.

gen.close()

def __del__(self):
# TODO: Emit a ResourceWarning if it was not explicitly closed.
# (When the close() method will be supported in all maintained Python versions.)
def __del__(self, _warn=warnings.warn):
if close_source:
source.close()
try:
_warn(f"unclosed iterparse iterator {source.name!r}", ResourceWarning, stacklevel=2)
finally:
source.close()
Comment on lines +1274 to +1277
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this is better?

Suggested change
try:
_warn(f"unclosed iterparse iterator {source.name!r}", ResourceWarning, stacklevel=2)
finally:
source.close()
name = getattr(source, 'name', None)
if name:
_warn("unclosed iterparse iterator %r" % (name,),
ResourceWarning, stacklevel=2)
source.close()

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

source.name always exist and not empty. This code is only executed when iterparse() opened a file by name.

BTW, if the iterparse() argument was file descriptor 0, your variant would not work.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right - I missed that close_source=True guarantees source.name exists, and if name: breaks on fd 0. Thanks for catching that!

The original code is correct. (Though % formatting might be slightly safer than f-string in __del__, but not a blocker.)


it = IterParseIterator()
it.root = None
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
:func:`xml.etree.ElementTree.iterparse` now emits a :exc:`ResourceWarning`
when the iterator is not explicitly closed and was opened with a filename.
This helps developers identify and fix resource leaks. Patch by Osama
Abdelkader.
Loading