|
| 1 | +#! /usr/bin/env python |
| 2 | + |
| 3 | +""" |
| 4 | +combinerefs path |
| 5 | +
|
| 6 | +A helper for analyzing PYTHONDUMPREFS output. |
| 7 | +
|
| 8 | +When the PYTHONDUMPREFS envar is set in a debug build, at Python shutdown |
| 9 | +time Py_Finalize() prints the list of all live objects twice: first it |
| 10 | +prints the repr() of each object while the interpreter is still fully intact. |
| 11 | +After cleaning up everything it can, it prints all remaining live objects |
| 12 | +again, but the second time just prints their addresses, refcounts, and type |
| 13 | +names. |
| 14 | +
|
| 15 | +Save all this output into a file, then run this script passing the path to |
| 16 | +that file. The script finds both output chunks, combines them, then prints |
| 17 | +a line of output for each object still alive at the end: |
| 18 | +
|
| 19 | + address refcnt typename repr |
| 20 | +
|
| 21 | +address is the address of the object, in whatever format the platform C |
| 22 | +produces for a %p format code. |
| 23 | +
|
| 24 | +refcnt is of the form |
| 25 | +
|
| 26 | + "[" ref "]" |
| 27 | +
|
| 28 | +when the object's refcount is the same in both PYTHONDUMPREFS output blocks, |
| 29 | +or |
| 30 | +
|
| 31 | + "[" ref_before "->" ref_after "]" |
| 32 | +
|
| 33 | +if the refcount changed. |
| 34 | +
|
| 35 | +typename is object->ob_type->tp_name, extracted from the second PYTHONDUMPREFS |
| 36 | +output block. |
| 37 | +
|
| 38 | +repr is repr(object), extracted from the first PYTHONDUMPREFS output block. |
| 39 | +
|
| 40 | +The objects are listed in allocation order, with most-recently allocated |
| 41 | +printed first, and the first object allocated printed last. |
| 42 | +
|
| 43 | +
|
| 44 | +Simple examples: |
| 45 | +
|
| 46 | + 00857060 [14] str '__len__' |
| 47 | +
|
| 48 | +The str object '__len__' is alive at shutdown time, and both PYTHONDUMPREFS |
| 49 | +output blocks said there were 14 references to it. This is probably due to |
| 50 | +C modules that intern the string "__len__" and keep a reference to it in a |
| 51 | +file static. |
| 52 | +
|
| 53 | + 00857038 [46->5] tuple () |
| 54 | +
|
| 55 | +46-5 = 41 references to the empty tuple were removed by the cleanup actions |
| 56 | +between the times PYTHONDUMPREFS produced output. |
| 57 | +
|
| 58 | + 00858028 [1025->1456] str '<dummy key>' |
| 59 | +
|
| 60 | +The string '<dummy key>', which is used in dictobject.c as the name of the |
| 61 | +dummy key that overwrites a real key that gets deleted, actually grew |
| 62 | +several hundred references during cleanup. It suggests that stuff did get |
| 63 | +removed from dicts by cleanup, but that the dicts themselves are staying |
| 64 | +alive for some reason. |
| 65 | +""" |
| 66 | + |
| 67 | +import re |
| 68 | +import sys |
| 69 | + |
| 70 | +# Generate lines from fileiter. If whilematch is true, continue reading |
| 71 | +# while the regexp object pat matches line. If whilematch is false, lines |
| 72 | +# are read so long as pat doesn't match them. In any case, the first line |
| 73 | +# that doesn't match pat (when whilematch is true), or that does match pat |
| 74 | +# (when whilematch is false), is lost, and fileiter will resume at the line |
| 75 | +# following it. |
| 76 | +def read(fileiter, pat, whilematch): |
| 77 | + result = [] |
| 78 | + for line in fileiter: |
| 79 | + if bool(pat.match(line)) == whilematch: |
| 80 | + result.append(line) |
| 81 | + else: |
| 82 | + break |
| 83 | + return result |
| 84 | + |
| 85 | +def combine(fname): |
| 86 | + f = file(fname) |
| 87 | + fi = iter(f) |
| 88 | + |
| 89 | + for line in read(fi, re.compile(r'^Remaining objects:$'), False): |
| 90 | + pass |
| 91 | + |
| 92 | + crack = re.compile(r'([a-zA-Z\d]+) \[(\d+)\] (.*)') |
| 93 | + addr2rc = {} |
| 94 | + addr2guts = {} |
| 95 | + before = 0 |
| 96 | + for line in read(fi, re.compile(r'^Remaining object addresses:$'), False): |
| 97 | + m = crack.match(line) |
| 98 | + if m: |
| 99 | + addr, addr2rc[addr], addr2guts[addr] = m.groups() |
| 100 | + before += 1 |
| 101 | + else: |
| 102 | + print '??? skipped:', line |
| 103 | + |
| 104 | + after = 0 |
| 105 | + for line in read(fi, crack, True): |
| 106 | + after += 1 |
| 107 | + m = crack.match(line) |
| 108 | + assert m |
| 109 | + addr, rc, guts = m.groups() # guts is type name here |
| 110 | + if addr not in addr2rc: |
| 111 | + print '??? new object created while tearing down:', line |
| 112 | + continue |
| 113 | + print addr, |
| 114 | + if rc == addr2rc[addr]: |
| 115 | + print '[%s]' % rc, |
| 116 | + else: |
| 117 | + print '[%s->%s]' % (addr2rc[addr], rc), |
| 118 | + print guts, addr2guts[addr] |
| 119 | + |
| 120 | + f.close() |
| 121 | + print "%d objects before, %d after" % (before, after) |
| 122 | + |
| 123 | +if __name__ == '__main__': |
| 124 | + combine(sys.argv[1]) |
0 commit comments