Skip to content

Commit 07149cd

Browse files
authored
Implement multi-file compilation (mypyc/mypyc#510)
This implements an initial version of multi-file compilation. Each module is turned into its own .c file. A shared `__native.c` contains the globals initialization and global definitions. The major changes needed to support this are: 1. Declarations used by multiple files are now all collected into a single header file (which is 27kloc for mypy). 2. Most of the order dependence in the code needed to be removed, so that different modules could be untangled. 3. This meant adding more forward declarations of things. `HeaderDeclaration` was generalized to contain both a declaration (that goes into the shared header) and an optional definition (that goes into a shared `.c` file) 4. The runtime finally needs its own `.c` file to prevent duplication of data structures it uses. As a TODO, more stuff should be moved into it from the header. Compilation on Linux with clang is made slightly *slower* (though that could change if we made it parallelizable). On Windows, it seems to help some but not enormously: it is not enough to get the build to not OOM on a free Appveyor, but seems like it might keep the build from intermittently OOMing on a "quad VM" Appveyor. Initial numbers showed a slowdown (like 5%), but I haven't dug into it too deeply yet. I am inclined to merge this PR anyways, though, even if we don't turn on multi-file by default. The abstractions for generating multiple files are probably good, and I think most of the changes needed to make it work are also good. We run multi-module tests in both modes. The default remains off.
1 parent 5823765 commit 07149cd

File tree

14 files changed

+228
-127
lines changed

14 files changed

+228
-127
lines changed

lib-rt/CPy.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#include <stdbool.h>
2+
#include <Python.h>
3+
#include <frameobject.h>
4+
#include <assert.h>
5+
#include "CPy.h"
6+
7+
// TODO: Currently only the things that *need* to be defined a single time
8+
// instead of copied into every module live here. This is silly, and most
9+
// of the code in CPy.h and python_support.h should move here.
10+
11+
struct ExcDummyStruct _CPy_ExcDummyStruct = { PyObject_HEAD_INIT(NULL) };
12+
PyObject *_CPy_ExcDummy = (PyObject *)&_CPy_ExcDummyStruct;
13+
14+
// Because its dynamic linker is more restricted than linux/OS X,
15+
// Windows doesn't allow initializing globals with values from
16+
// other dynamic libraries. This means we need to initialize
17+
// things at load time.
18+
void CPy_Init(void) {
19+
_CPy_ExcDummyStruct.ob_base.ob_type = &PyBaseObject_Type;
20+
}

lib-rt/CPy.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -908,8 +908,8 @@ static void CPy_AddTraceback(const char *filename, const char *funcname, int lin
908908
// exception APIs that might want to return NULL pointers instead
909909
// return properly refcounted pointers to this dummy object.
910910
struct ExcDummyStruct { PyObject_HEAD };
911-
static struct ExcDummyStruct _CPy_ExcDummyStruct = { PyObject_HEAD_INIT(NULL) };
912-
static PyObject *_CPy_ExcDummy = (PyObject *)&_CPy_ExcDummyStruct;
911+
extern struct ExcDummyStruct _CPy_ExcDummyStruct;
912+
extern PyObject *_CPy_ExcDummy;
913913

914914
static inline void _CPy_ToDummy(PyObject **p) {
915915
if (*p == NULL) {
@@ -1025,13 +1025,8 @@ static void CPy_GetExcInfo(PyObject **p_type, PyObject **p_value, PyObject **p_t
10251025
_CPy_ToNone(p_traceback);
10261026
}
10271027

1028-
// Because its dynamic linker is more restricted than linux/OS X,
1029-
// Windows doesn't allow initializing globals with values from
1030-
// other dynamic libraries. This means we need to initialize
1031-
// things at load time.
1032-
static void CPy_Init(void) {
1033-
_CPy_ExcDummyStruct.ob_base.ob_type = &PyBaseObject_Type;
1034-
}
1028+
void CPy_Init(void);
1029+
10351030

10361031
#ifdef __cplusplus
10371032
}

mypyc/build.py

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import subprocess
2727
import hashlib
2828
import time
29+
import shutil
2930

3031
from typing import List, Tuple, Any, Optional, Union, Dict, cast
3132
MYPY = False
@@ -227,7 +228,8 @@ def include_dir() -> str:
227228

228229

229230
def generate_c(sources: List[BuildSource], options: Options,
230-
shared_lib_name: Optional[str]) -> Tuple[str, str]:
231+
multi_file: bool,
232+
shared_lib_name: Optional[str]) -> Tuple[List[Tuple[str, str]], str]:
231233
"""Drive the actual core compilation step.
232234
233235
Returns the C source code and (for debugging) the pretty printed IR.
@@ -247,7 +249,8 @@ def generate_c(sources: List[BuildSource], options: Options,
247249
print("Parsed and typechecked in {:.3f}s".format(t1 - t0))
248250

249251
ops = [] # type: List[str]
250-
ctext = emitmodule.compile_modules_to_c(result, module_names, shared_lib_name, ops=ops)
252+
ctext = emitmodule.compile_modules_to_c(result, module_names, shared_lib_name, multi_file,
253+
ops=ops)
251254

252255
t2 = time.time()
253256
print("Compiled to C in {:.3f}s".format(t2 - t1))
@@ -257,7 +260,7 @@ def generate_c(sources: List[BuildSource], options: Options,
257260

258261
def build_using_shared_lib(sources: List[BuildSource],
259262
lib_name: str,
260-
cfile: str,
263+
cfiles: List[str],
261264
build_dir: str,
262265
extra_compile_args: List[str],
263266
) -> List[MypycifyExtension]:
@@ -275,7 +278,7 @@ def build_using_shared_lib(sources: List[BuildSource],
275278
shared_lib = MypycifyExtension(
276279
'lib' + lib_name,
277280
is_mypyc_shared=True,
278-
sources=[cfile],
281+
sources=cfiles,
279282
include_dirs=[include_dir()],
280283
extra_compile_args=extra_compile_args,
281284
)
@@ -302,7 +305,7 @@ def build_using_shared_lib(sources: List[BuildSource],
302305

303306

304307
def build_single_module(sources: List[BuildSource],
305-
cfile: str,
308+
cfiles: List[str],
306309
extra_compile_args: List[str],
307310
) -> List[MypycifyExtension]:
308311
"""Produce the list of extension modules for a standalone extension.
@@ -311,7 +314,7 @@ def build_single_module(sources: List[BuildSource],
311314
"""
312315
return [MypycifyExtension(
313316
sources[0].module,
314-
sources=[cfile],
317+
sources=cfiles,
315318
include_dirs=[include_dir()],
316319
extra_compile_args=extra_compile_args,
317320
)]
@@ -320,6 +323,7 @@ def build_single_module(sources: List[BuildSource],
320323
def mypycify(paths: List[str],
321324
mypy_options: Optional[List[str]] = None,
322325
opt_level: str = '3',
326+
multi_file: bool = False,
323327
skip_cgen: bool = False) -> List[MypycifyExtension]:
324328
"""Main entry point to building using mypyc.
325329
@@ -356,20 +360,26 @@ def mypycify(paths: List[str],
356360
# of the modules are in package. (Because I didn't want to fuss
357361
# around with making the single module code handle packages.)
358362
use_shared_lib = len(sources) > 1 or any('.' in x.module for x in sources)
359-
cfile = os.path.join(build_dir, '__native.c')
360363

361364
lib_name = shared_lib_name([source.module for source in sources]) if use_shared_lib else None
362365

363366
# We let the test harness make us skip doing the full compilation
364367
# so that it can do a corner-cutting version without full stubs.
365368
# TODO: Be able to do this based on file mtimes?
366369
if not skip_cgen:
367-
ctext, ops_text = generate_c(sources, options, lib_name)
370+
cfiles, ops_text = generate_c(sources, options, multi_file, lib_name)
368371
# TODO: unique names?
369372
with open(os.path.join(build_dir, 'ops.txt'), 'w') as f:
370373
f.write(ops_text)
371-
with open(cfile, 'w', encoding='utf-8') as f:
372-
f.write(ctext)
374+
cfilenames = []
375+
for cfile, ctext in cfiles:
376+
cfile = os.path.join(build_dir, cfile)
377+
with open(cfile, 'w', encoding='utf-8') as f:
378+
f.write(ctext)
379+
if os.path.splitext(cfile)[1] == '.c':
380+
cfilenames.append(cfile)
381+
else:
382+
cfilenames = glob.glob(os.path.join(build_dir, '*.c'))
373383

374384
cflags = [] # type: List[str]
375385
if compiler.compiler_type == 'unix':
@@ -391,11 +401,16 @@ def mypycify(paths: List[str],
391401
'/wd4146', # negating unsigned int
392402
]
393403

404+
# Copy the runtime library in
405+
rt_file = os.path.join(build_dir, 'CPy.c')
406+
shutil.copyfile(os.path.join(include_dir(), 'CPy.c'), rt_file)
407+
cfilenames.append(rt_file)
408+
394409
if use_shared_lib:
395410
assert lib_name
396-
extensions = build_using_shared_lib(sources, lib_name, cfile, build_dir, cflags)
411+
extensions = build_using_shared_lib(sources, lib_name, cfilenames, build_dir, cflags)
397412
else:
398-
extensions = build_single_module(sources, cfile, cflags)
413+
extensions = build_single_module(sources, cfilenames, cflags)
399414

400415
return extensions
401416

mypyc/emit.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@
1919

2020

2121
class HeaderDeclaration:
22-
def __init__(self, dependencies: Set[str], body: List[str]) -> None:
22+
def __init__(self,
23+
dependencies: Set[str], decl: List[str], defn: Optional[List[str]]) -> None:
2324
self.dependencies = dependencies
24-
self.body = body
25+
self.decl = decl
26+
self.defn = defn
2527

2628

2729
class EmitterContext:
@@ -34,7 +36,7 @@ def __init__(self, module_names: List[str]) -> None:
3436
# Map from tuple types to unique ids for them
3537
self.tuple_ids = {} # type: Dict[RTuple, str]
3638

37-
# The two maps below are used for generating declarations or
39+
# The map below is used for generating declarations and
3840
# definitions at the top of the C file. The main idea is that they can
3941
# be generated at any time during the emit phase.
4042

@@ -43,11 +45,6 @@ def __init__(self, module_names: List[str]) -> None:
4345
# The declaration contains the body of the struct.
4446
self.declarations = OrderedDict() # type: Dict[str, HeaderDeclaration]
4547

46-
# A map from C identifier to code that defined the C identifier. This
47-
# is similar to to 'declarations', but these may appear after the
48-
# declarations in the generated code.
49-
self.statics = OrderedDict() # type: Dict[str, str]
50-
5148

5249
class Emitter:
5350
"""Helper for C code generation."""
@@ -205,11 +202,13 @@ def tuple_undefined_value(self, rtuple: RTuple) -> str:
205202
context = self.context
206203
id = self.tuple_unique_id(rtuple)
207204
name = 'tuple_undefined_' + id
208-
if name not in context.statics:
205+
if name not in context.declarations:
209206
struct_name = self.tuple_struct_name(rtuple)
210207
values = self.tuple_undefined_value_helper(rtuple)
211-
init = 'struct {} {} = {{ {} }};'.format(struct_name, name, ''.join(values))
212-
context.statics[name] = init
208+
var = 'struct {} {}'.format(struct_name, name)
209+
decl = '{};'.format(var)
210+
init = '{} = {{ {} }};'.format(var, ''.join(values))
211+
context.declarations[name] = HeaderDeclaration(set([struct_name]), [decl], [init])
213212
return name
214213

215214
def tuple_undefined_value_helper(self, rtuple: RTuple) -> List[str]:
@@ -242,6 +241,7 @@ def declare_tuple_struct(self, tuple_type: RTuple) -> None:
242241
self.context.declarations[struct_name] = HeaderDeclaration(
243242
dependencies,
244243
self.tuple_c_declaration(tuple_type),
244+
None,
245245
)
246246

247247
def emit_inc_ref(self, dest: str, rtype: RType) -> None:

mypyc/emitclass.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,14 @@ def generate_slots(cl: ClassIR, table: SlotTable, emitter: Emitter) -> Dict[str,
7272

7373

7474
def generate_class_type_decl(cl: ClassIR, emitter: Emitter) -> None:
75-
emitter.emit_line('static PyTypeObject *{};'.format(emitter.type_struct_name(cl)))
75+
emitter.emit_line('PyTypeObject *{};'.format(emitter.type_struct_name(cl)))
76+
emitter.emit_line()
77+
generate_object_struct(cl, emitter)
78+
emitter.emit_line()
79+
declare_native_getters_and_setters(cl, emitter)
80+
generate_full = not cl.is_trait and not cl.builtin_base
81+
if generate_full:
82+
emitter.emit_line('{};'.format(native_function_header(cl.ctor, emitter)))
7683

7784

7885
def generate_class(cl: ClassIR, module: str, emitter: Emitter) -> None:
@@ -112,8 +119,6 @@ def emit_line() -> None:
112119
emitter.emit_line()
113120

114121
emit_line()
115-
generate_object_struct(cl, emitter)
116-
emit_line()
117122

118123
# If the class has a method to initialize default attribute
119124
# values, we need to call it during initialization.
@@ -224,6 +229,18 @@ def generate_object_struct(cl: ClassIR, emitter: Emitter) -> None:
224229
emitter.emit_line('}} {};'.format(cl.struct_name(emitter.names)))
225230

226231

232+
def declare_native_getters_and_setters(cl: ClassIR,
233+
emitter: Emitter) -> None:
234+
for attr, rtype in cl.attributes.items():
235+
emitter.emit_line('{}{}({} *self);'.format(emitter.ctype_spaced(rtype),
236+
native_getter_name(cl, attr, emitter.names),
237+
cl.struct_name(emitter.names)))
238+
emitter.emit_line(
239+
'bool {}({} *self, {}value);'.format(native_setter_name(cl, attr, emitter.names),
240+
cl.struct_name(emitter.names),
241+
emitter.ctype_spaced(rtype)))
242+
243+
227244
def generate_native_getters_and_setters(cl: ClassIR,
228245
emitter: Emitter) -> None:
229246
for attr, rtype in cl.attributes.items():

mypyc/emitfunc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def native_function_header(fn: FuncDecl, emitter: Emitter) -> str:
3939
for arg in fn.sig.args:
4040
args.append('{}{}{}'.format(emitter.ctype_spaced(arg.type), REG_PREFIX, arg.name))
4141

42-
return 'static {ret_type}{name}({args})'.format(
42+
return '{ret_type}{name}({args})'.format(
4343
ret_type=emitter.ctype_spaced(fn.sig.ret_type),
4444
name=emitter.native_function_name(fn),
4545
args=', '.join(args) or 'void')

0 commit comments

Comments
 (0)