Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
compile time cleandoc
  • Loading branch information
methane committed Jul 4, 2023
commit 469b3f764be7ed8b9ca634cb2f6b93f706bc8d54
2 changes: 2 additions & 0 deletions Include/internal/pycore_compile.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ int _PyCompile_ConstCacheMergeOne(PyObject *const_cache, PyObject **obj);

/* Access compiler internals for unit testing */

PyAPI_FUNC(PyObject*) _PyCompile_CleanDoc(PyObject *doc);

PyAPI_FUNC(PyObject*) _PyCompile_CodeGen(
PyObject *ast,
PyObject *filename,
Expand Down
45 changes: 22 additions & 23 deletions Lib/inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,29 +881,28 @@ def cleandoc(doc):

Any whitespace that can be uniformly removed from the second line
onwards is removed."""
try:
lines = doc.expandtabs().split('\n')
except UnicodeError:
return None
else:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed this try-except-else block because Python 3 don't autodecode from bytes.
If doc is bytes, doc.split('\n') raises TypeError, not UnicodeError.

# Find minimum indentation of any non-blank lines after first line.
margin = sys.maxsize
for line in lines[1:]:
content = len(line.lstrip())
if content:
indent = len(line) - content
margin = min(margin, indent)
# Remove indentation.
if lines:
lines[0] = lines[0].lstrip()
if margin < sys.maxsize:
for i in range(1, len(lines)): lines[i] = lines[i][margin:]
# Remove any trailing or leading blank lines.
while lines and not lines[-1]:
lines.pop()
while lines and not lines[0]:
lines.pop(0)
return '\n'.join(lines)
lines = doc.expandtabs().split('\n')

# Find minimum indentation of any non-blank lines after first line.
margin = sys.maxsize
for line in lines[1:]:
content = len(line.lstrip(' '))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed from line.lstrip() to line.lstrip(' ') for better compatibility between inspect.cleandoc and compiler.cleandoc.

if content:
indent = len(line) - content
margin = min(margin, indent)
# Remove indentation.
if lines:
lines[0] = lines[0].lstrip(' ')
if margin < sys.maxsize:
for i in range(1, len(lines)):
lines[i] = lines[i][margin:]
# Remove any trailing or leading blank lines.
while lines and not lines[-1]:
lines.pop()
while lines and not lines[0]:
lines.pop(0)
return '\n'.join(lines)


def getfile(object):
"""Work out which source or compiled file an object was defined in."""
Expand Down
39 changes: 37 additions & 2 deletions Lib/test/test_inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,9 +596,44 @@ def test_finddoc(self):
self.assertEqual(finddoc(int.from_bytes), int.from_bytes.__doc__)
self.assertEqual(finddoc(int.real), int.real.__doc__)

cleandoc_testdata = [
# first line should have different margin
(' An\n indented\n docstring.', 'An\nindented\n docstring.'),
# trailing whitespace are not removed.
(' An \n \n indented \n docstring. ',
'An \n \nindented \n docstring. '),
# NUL is not termination.
('doc\0string\n\n second\0line\n third\0line\0',
'doc\0string\n\nsecond\0line\nthird\0line\0'),
# first line is lstrip()-ped. other lines are kept when no margin.[w:
(' ', ''),
]

def test_cleandoc(self):
self.assertEqual(inspect.cleandoc('An\n indented\n docstring.'),
'An\nindented\ndocstring.')
func = inspect.cleandoc
testdata = self.cleandoc_testdata + [
# leading and trailing empty lines should be removed
('\n\n\n first paragraph\n\n second paragraph\n\n',
'first paragraph\n\n second paragraph'),
(' \n \n \n ', ' \n \n '),
]
for i, (input, expected) in enumerate(testdata):
with self.subTest(i=i):
self.assertEqual(func(input), expected)

@cpython_only
def test_c_cleandoc(self):
import _testinternalcapi
func = _testinternalcapi.compiler_cleandoc
testdata = self.cleandoc_testdata + [
# leading and trailing empty lines are not removed
('\n\n\n first paragraph\n\n second paragraph\n\n',
'\n\n\nfirst paragraph\n\n second paragraph\n\n'),
(' \n \n \n ', '\n \n \n '),
]
for i, (input, expected) in enumerate(self.cleandoc_testdata):
with self.subTest(i=i):
self.assertEqual(func(input), expected)

def test_getcomments(self):
self.assertEqual(inspect.getcomments(mod), '# line 1\n')
Expand Down
20 changes: 19 additions & 1 deletion Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#include "interpreteridobject.h" // _PyInterpreterID_LookUp()
#include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
#include "pycore_bitutils.h" // _Py_bswap32()
#include "pycore_compile.h" // _PyCompile_CodeGen, _PyCompile_OptimizeCfg, _PyCompile_Assemble
#include "pycore_compile.h" // _PyCompile_CleanDoc, _PyCompile_CodeGen, _PyCompile_OptimizeCfg, _PyCompile_Assemble
#include "pycore_ceval.h" // _PyEval_AddPendingCall
#include "pycore_fileutils.h" // _Py_normpath
#include "pycore_frame.h" // _PyInterpreterFrame
Expand Down Expand Up @@ -591,6 +591,23 @@ set_eval_frame_record(PyObject *self, PyObject *list)
Py_RETURN_NONE;
}

/*[clinic input]

_testinternalcapi.compiler_cleandoc -> object

doc: unicode

C implementation of inspect.cleandoc().
[clinic start generated code]*/

static PyObject *
_testinternalcapi_compiler_cleandoc_impl(PyObject *module, PyObject *doc)
/*[clinic end generated code: output=2dd203a80feff5bc input=2de03fab931d9cdc]*/
{
return _PyCompile_CleanDoc(doc);
}


/*[clinic input]

_testinternalcapi.compiler_codegen -> object
Expand Down Expand Up @@ -1271,6 +1288,7 @@ static PyMethodDef module_functions[] = {
{"DecodeLocaleEx", decode_locale_ex, METH_VARARGS},
{"set_eval_frame_default", set_eval_frame_default, METH_NOARGS, NULL},
{"set_eval_frame_record", set_eval_frame_record, METH_O, NULL},
_TESTINTERNALCAPI_COMPILER_CLEANDOC_METHODDEF
_TESTINTERNALCAPI_COMPILER_CODEGEN_METHODDEF
_TESTINTERNALCAPI_OPTIMIZE_CFG_METHODDEF
_TESTINTERNALCAPI_ASSEMBLE_CODE_OBJECT_METHODDEF
Expand Down
61 changes: 60 additions & 1 deletion Modules/clinic/_testinternalcapi.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

91 changes: 90 additions & 1 deletion Python/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1672,10 +1672,16 @@ compiler_body(struct compiler *c, location loc, asdl_stmt_seq *stmts)
if (c->c_optimize < 2) {
docstring = _PyAST_GetDocString(stmts);
if (docstring) {
PyObject *cleandoc = _PyCompile_CleanDoc(docstring);
if (cleandoc == NULL) {
return ERROR;
}
i = 1;
st = (stmt_ty)asdl_seq_GET(stmts, 0);
assert(st->kind == Expr_kind);
VISIT(c, expr, st->v.Expr.value);
location loc = LOC(st->v.Expr.value);
ADDOP_LOAD_CONST(c, loc, cleandoc);
Py_DECREF(cleandoc);
RETURN_IF_ERROR(compiler_nameop(c, NO_LOCATION, &_Py_ID(__doc__), Store));
}
}
Expand Down Expand Up @@ -2220,11 +2226,19 @@ compiler_function_body(struct compiler *c, stmt_ty s, int is_async, Py_ssize_t f
/* if not -OO mode, add docstring */
if (c->c_optimize < 2) {
docstring = _PyAST_GetDocString(body);
if (docstring) {
docstring = _PyCompile_CleanDoc(docstring);
if (docstring == NULL) {
compiler_exit_scope(c);
return ERROR;
}
}
}
if (compiler_add_const(c->c_const_cache, c->u, docstring ? docstring : Py_None) < 0) {
compiler_exit_scope(c);
return ERROR;
}
Py_XDECREF(docstring);

c->u->u_metadata.u_argcount = asdl_seq_LEN(args->args);
c->u->u_metadata.u_posonlyargcount = asdl_seq_LEN(args->posonlyargs);
Expand Down Expand Up @@ -7935,6 +7949,81 @@ cfg_to_instructions(cfg_builder *g)
return NULL;
}

// C implementation of inspect.cleandoc()
//
// Difference from inspect.cleandoc():
// - Do not remove leading and trailing blank lines to keep lineno.
PyObject *
_PyCompile_CleanDoc(PyObject *doc)
{
doc = PyObject_CallMethod(doc, "expandtabs", NULL);
if (doc == NULL) {
return NULL;
}

Py_ssize_t doc_size;
const char *doc_utf8 = PyUnicode_AsUTF8AndSize(doc, &doc_size);
if (doc_utf8 == NULL) {
Py_DECREF(doc);
return NULL;
}
const char *p = doc_utf8;
const char *pend = p + doc_size;

// First pass: find minimum indentation of any non-blank lines
// after first line.
while (p < pend && *p++ != '\n') {
}

Py_ssize_t margin = PY_SSIZE_T_MAX;
while (p < pend) {
const char *s = p;
while (*p == ' ') p++;
if (p < pend && *p != '\n') {
margin = Py_MIN(margin, p - s);
}
while (p < pend && *p++ != '\n') {
}
}
if (margin == PY_SSIZE_T_MAX) {
margin = 0;
}

// Second pass: write cleandoc into buff.
char *buff = PyMem_Malloc(doc_size + 1);
char *w = buff;
p = doc_utf8;

// copy firstline without indent.
while (*p == ' ') p++;
while (p < pend) {
int ch = *w++ = *p++;
if (ch == '\n') {
break;
}
}

// copy subsequent lines without margin.
while (p < pend) {
for (Py_ssize_t i = 0; i < margin; i++, p++) {
if (*p != ' ') {
assert(*p == '\n' || *p == '\0');
break;
}
}
while (p < pend) {
int ch = *w++ = *p++;
if (ch == '\n') {
break;
}
}
}

Py_DECREF(doc);
return PyUnicode_FromStringAndSize(buff, w - buff);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure the dedent logic belongs in compile.c.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because this dedent logic is very specific for docstring.
We can not use this logic in other place except inspect.cleandoc.

If we reuse this logic in inspect.cleandoc, it would be:

doc = compiler.cleandoc(doc).strip('\n')



PyObject *
_PyCompile_CodeGen(PyObject *ast, PyObject *filename, PyCompilerFlags *pflags,
int optimize, int compile_mode)
Expand Down