-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathpythoncapi.py
More file actions
330 lines (258 loc) · 9.15 KB
/
pythoncapi.py
File metadata and controls
330 lines (258 loc) · 9.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
import glob
import os.path
import re
import subprocess
EXCLUDE_HEADERS = frozenset((
# Don't parse pthread_stubs.h: special header file used by WASM
'pthread_stubs.h',
# Don't parse dynamic_annotations.h: not included by Python.h.
'dynamic_annotations.h',
# Skip Include/pystats.h: the code is skipped unless if Python
# is built with --enable-pystats (if the Py_STATS macro is defined)
'pystats.h',
))
# Checkout of Python Git repository
CPYTHON_URL = 'https://github.com/python/cpython'
GIT_DIR = os.path.normpath(os.path.join(os.path.dirname(__file__), '..', 'cpython_git'))
PATH_LIMITED_API = 'Include'
PATH_CPYTHON_API = os.path.join('Include', 'cpython')
PATH_INTERNAL_API = os.path.join('Include', 'internal')
POSIXMODULE_H = 'Modules/posixmodule.h'
RE_IDENTIFIER = r'[A-Za-z_][A-Za-z0-9_]*'
RE_STRUCT_START = re.compile(r'^(?:typedef +)?struct(?: +([A-Za-z0-9_]+))? *{', re.MULTILINE)
RE_STRUCT_END = re.compile(r'^}(?: +([A-Za-z0-9_]+))? *;', re.MULTILINE)
TYPEDEFS = {
'_object': 'PyObject',
'_longobject': 'PyLongObject',
'_typeobject': 'PyTypeObject',
'PyCodeObject': 'PyCodeObject',
'_frame': 'PyFrameObject',
'_ts': 'PyThreadState',
'_is': 'PyInterpreterState',
'_xid': '_PyCrossInterpreterData',
'_traceback': 'PyTracebackObject',
}
PUBLIC_NAME_PREFIX = ("Py", "PY")
def run_command(cmd, cwd):
subprocess.run(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=True,
cwd=cwd)
def git_clone():
if os.path.exists(GIT_DIR):
return
print(f"Clone CPython Git repository: {CPYTHON_URL}")
dst_name = os.path.basename(GIT_DIR)
cmd = ['git', 'clone', CPYTHON_URL, dst_name]
run_command(cmd, cwd=os.path.dirname(GIT_DIR))
_CLEANED = False
_FETCHED = False
def git_switch_branch(branch):
git_clone()
global _CLEANED
if not _CLEANED:
cmd = ['git', 'clean', '-fdx']
run_command(cmd, cwd=GIT_DIR)
cmd = ['git', 'checkout', '.']
run_command(cmd, cwd=GIT_DIR)
_CLEANED = True
if branch == 'main':
cmd = ['git', 'switch', branch]
run_command(cmd, cwd=GIT_DIR)
global _FETCHED
if not _FETCHED:
print(f"Update the CPython Git repository (git fetch)")
cmd = ['git', 'fetch']
run_command(cmd, cwd=GIT_DIR)
_FETCHED = True
cmd = ['git', 'merge', '--ff']
run_command(cmd, cwd=GIT_DIR)
else:
cmd = ['git', 'checkout', branch]
run_command(cmd, cwd=GIT_DIR)
def list_files(path):
if not os.path.exists(path):
return []
files = glob.glob(os.path.join(path, '*.h'))
if path == PATH_INTERNAL_API:
files.append(POSIXMODULE_H)
files = [name for name in files
if os.path.basename(name) not in EXCLUDE_HEADERS]
return files
def _get_types(filename, names):
with open(filename, encoding="utf-8") as fp:
content = fp.read()
for match in RE_STRUCT_START.finditer(content):
struct_name = match.group(1)
match2 = RE_STRUCT_END.search(content, match.end())
if not match2:
raise Exception(f"{filename}: cannot find end of: {match.group()}")
name = match2.group(1)
if not name:
name = struct_name
if not name:
raise Exception(f"{filename}: structure has no name: {match.group()})")
if name in TYPEDEFS:
name = TYPEDEFS[name]
names.add(name)
if 'pthread_mutex_t' in names:
raise Exception('pthread_stubs.h was parsed')
def get_types_path(directory):
names = set()
for filename in list_files(directory):
_get_types(filename, names)
return sorted(names)
def get_types():
limited = get_types_path(PATH_LIMITED_API)
cpython = get_types_path(PATH_CPYTHON_API)
internal = get_types_path(PATH_INTERNAL_API)
return (limited, cpython, internal)
def grep(regex, filenames, group=0):
for filename in filenames:
with open(filename, encoding='utf-8') as fp:
content = fp.read()
for match in regex.finditer(content):
yield match.group(group)
def is_function_public(name):
return name.startswith(PUBLIC_NAME_PREFIX)
def get_macros_static_inline_funcs():
files = list_files(PATH_LIMITED_API) + list_files(PATH_CPYTHON_API)
# Match '#define func('
# Don't match '#define constant (&obj)': space before '('
regex = re.compile(fr'^ *# *define (_?P[Yy][A-Za-z_]+)\(', re.MULTILINE)
macros = set(grep(regex, files, group=1))
regex = re.compile(fr'^static inline [^(\n]+ ({RE_IDENTIFIER}) *\(', re.MULTILINE)
funcs = set(grep(regex, files, group=1))
# Remove macros only used to cast arguments types. Like:
# "static inline void Py_INCREF(...) { ...}"
# "#define Py_INCREF(obj) Py_INCREF(_PyObject_CAST(obj))"
# Only count the static inline function, ignore the macro.
macros = macros - funcs
# In Python 3.10, the Py_INCREF() was wrapping the _Py_INCREF() static
# inline function.
# In Python 3.11, Py_NewRef() macro just calls _Py_NewRef() static inline
# function.
for name in list(macros):
if f"_{name}" in funcs:
macros.discard(name)
# Remove PyDTrace_xxx functions
for name in list(funcs):
if name.startswith("PyDTrace_"):
funcs.discard(name)
# Remove private static inline functions
private_macros = set()
private_funcs = set()
for name in list(macros):
if not is_function_public(name):
macros.discard(name)
private_macros.add(name)
for name in list(funcs):
if not is_function_public(name):
funcs.discard(name)
private_funcs.add(name)
return (macros, funcs, private_macros, private_funcs)
def get_functions():
regex = re.compile(
# Ignore "#define PyAPI_FUNC(RTYPE) ..." (pyport.h)
fr'(?<!define )'
# 'PyAPI_FUNC(int) '
fr'PyAPI_FUNC\([^)]+\)[ |\n]*'
# '_Py_NO_RETURN '
fr'(?:{RE_IDENTIFIER}+[ |\n]+)*'
# 'PyLong_FromLong('
fr'({RE_IDENTIFIER})[ |\n]*\(',
re.MULTILINE | re.DOTALL)
def get(path):
files = list_files(path)
return set(grep(regex, files, group=1))
limited = get(PATH_LIMITED_API)
cpython = get(PATH_CPYTHON_API)
internal = get(PATH_INTERNAL_API)
for names in (limited, cpython, internal):
if 'pthread_create' in names:
raise Exception('pthread_stubs.h was parsed')
public = set()
private = set()
for name in limited | cpython:
if is_function_public(name):
public.add(name)
else:
private.add(name)
return (public, private, internal)
def get_variables():
regex = re.compile(
# 'Py_DEPRECATED(3.13) '
fr'^ *(?:Py_DEPRECATED\([^)]+\) *)?'
# 'PyAPI_DATA' ... ';'
fr'PyAPI_DATA.*;',
re.MULTILINE)
RE_VARIABLE = (
# 'name'
# 'name, name2'
# 'name, *name2'
fr'(?:const *)?'
fr'({RE_IDENTIFIER}(?:, *\*? *{RE_IDENTIFIER})*)'
# '[]', '[256]', '[PY_EXECUTABLE_KINDS+1]'
fr'(?:\[[^]]*\])?'
)
RE_FUNC = (
# '(*name) (' ... ')'
# '*(*name) (' ... ')'
# 'name (' ... ')'
fr'(?:\*? *\( *\* *({RE_IDENTIFIER}) *\)|({RE_IDENTIFIER})) *\([^)]*\)'
)
regex2 = re.compile(
# 'PyAPI_FUNC(int) '
fr'PyAPI_DATA\([^)]+\) +'
# 'Py_VerboseFlag;'
fr'(?:{RE_VARIABLE}|{RE_FUNC}) *;',
re.MULTILINE)
def get(path):
files = list_files(path)
names = set()
for line in grep(regex, files):
match = regex2.search(line)
if match is None:
raise ValueError(f'fail to parse PyAPI_DATA: {line!r}')
parts = match.group(1) # variable name
if not parts:
parts = match.group(2) # func 1
if not parts:
parts = match.group(3) # func 2
for part in parts.split(','):
part = part.strip()
names.add(part)
return names
limited = get(PATH_LIMITED_API)
cpython = get(PATH_CPYTHON_API)
internal = get(PATH_INTERNAL_API)
public = set()
private = set()
for name in limited | cpython:
if is_function_public(name):
public.add(name)
else:
private.add(name)
return (public, private, internal)
def get_line_number(filename):
with open(filename, encoding="utf-8") as fp:
line_number = 0
for _ in fp:
line_number += 1
return line_number
def get_line_numbers():
def get(path):
line_number = 0
for filename in list_files(path):
line_number += get_line_number(filename)
return line_number
limited = get(PATH_LIMITED_API)
cpython = get(PATH_CPYTHON_API)
internal = get(PATH_INTERNAL_API)
return (limited, cpython, internal)
def get_file_numbers():
limited = len(list_files(PATH_LIMITED_API))
cpython = len(list_files(PATH_CPYTHON_API))
internal = len(list_files(PATH_INTERNAL_API))
return (limited, cpython, internal)