Mercurial > p > roundup > code
changeset 5544:1a0498c1ed90
Avoid errors indexing binary uploads with Python 3.
If you upload a binary file for a FileClass whose content property is
set to be indexed (the default), an error of the form "'utf-8' codec
can't decode byte 0x89 in position 0: invalid start byte" can occur
when the code attempts to index the content of that file. (This error
is after the creation of the file, and any issue etc. created at the
same time, has been committed; the page returned gives the impression
that the creation failed, but that's not the case.)
The indexing itself only happens for text/plain files, but that check
is in the indexers themselves, after this error occurs (and it's
entirely possible that a text/plain upload could actually have some
binary or non-UTF-8 content). bytes objects for the binary contents
get converted to str, with resulting errors when they are not in fact
UTF-8 text. This patch makes the places that might try indexing
binary content do the conversion to strings, for Python 3, with
errors='ignore', so that at least no such exception occurs (and if the
file is not text/plain, the results of the conversion will then get
discarded in the indexers).
| author | Joseph Myers <jsm@polyomino.org.uk> |
|---|---|
| date | Sun, 16 Sep 2018 20:04:03 +0000 |
| parents | bc3e00a3d24b |
| children | 4523fe3cf04c |
| files | roundup/backends/back_anydbm.py roundup/backends/rdbms_common.py |
| diffstat | 2 files changed, 22 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/roundup/backends/back_anydbm.py Sun Sep 16 16:19:20 2018 +0000 +++ b/roundup/backends/back_anydbm.py Sun Sep 16 20:04:03 2018 +0000 @@ -2227,9 +2227,12 @@ # store and possibly index self.db.storefile(self.classname, itemid, None, bs2b(content)) if self.properties['content'].indexme: + index_content = content + if bytes != str and isinstance(content, bytes): + index_content = content.decode('utf-8', errors='ignore') mime_type = self.get(itemid, 'type', self.default_mime_type) self.db.indexer.add_text((self.classname, itemid, 'content'), - content, mime_type) + index_content, mime_type) propvalues['content'] = content # fire reactors @@ -2245,8 +2248,12 @@ for prop, propclass in self.getprops().items(): if prop == 'content' and propclass.indexme: mime_type = self.get(nodeid, 'type', self.default_mime_type) + index_content = self.get(nodeid, 'binary_content') + if bytes != str and isinstance(index_content, bytes): + index_content = index_content.decode('utf-8', + errors='ignore') self.db.indexer.add_text((self.classname, nodeid, 'content'), - str(self.get(nodeid, 'content')), mime_type) + index_content, mime_type) elif isinstance(propclass, hyperdb.String) and propclass.indexme: # index them under (classname, nodeid, property) try:
--- a/roundup/backends/rdbms_common.py Sun Sep 16 16:19:20 2018 +0000 +++ b/roundup/backends/rdbms_common.py Sun Sep 16 20:04:03 2018 +0000 @@ -3052,8 +3052,11 @@ # and index! if self.properties['content'].indexme: + index_content = content + if bytes != str and isinstance(content, bytes): + index_content = content.decode('utf-8', errors='ignore') self.db.indexer.add_text((self.classname, newid, 'content'), - content, mime_type) + index_content, mime_type) # store off the content as a file self.db.storefile(self.classname, newid, None, bs2b(content)) @@ -3105,8 +3108,11 @@ self.db.storefile(self.classname, itemid, None, bs2b(content)) if self.properties['content'].indexme: mime_type = self.get(itemid, 'type', self.default_mime_type) + index_content = content + if bytes != str and isinstance(content, bytes): + index_content = content.decode('utf-8', errors='ignore') self.db.indexer.add_text((self.classname, itemid, 'content'), - content, mime_type) + index_content, mime_type) propvalues['content'] = content # fire reactors @@ -3122,8 +3128,12 @@ for prop, propclass in self.getprops().items(): if prop == 'content' and propclass.indexme: mime_type = self.get(nodeid, 'type', self.default_mime_type) + index_content = self.get(nodeid, 'binary_content') + if bytes != str and isinstance(index_content, bytes): + index_content = index_content.decode('utf-8', + errors='ignore') self.db.indexer.add_text((self.classname, nodeid, 'content'), - str(self.get(nodeid, 'content')), mime_type) + index_content, mime_type) elif isinstance(propclass, hyperdb.String) and propclass.indexme: # index them under (classname, nodeid, property) try:
