Mercurial > p > roundup > code
comparison roundup/cgi/client.py @ 6458:8f1b91756457
issue2551147 - Enable compression of http responses in roundup.
gzip, (brotli/zstd with optional packages) on the fly
compression/content-encoding enabled by default. Can serve
pre-compressed static assets as well if the client can accept it.
Docs updated.
Also added example nginx config to installation.txt. The config allows
nginx to compress data on the fly. If the config is used, dynamic
compression in roundup can be disabled.
Dedicating this checkin to my father Paul Hector Rouillard 1930-2021.
I did much of the development in this changeset while sitting with him
as he slept/transitioned. Without his encouragement and example, my
desire to learn would not be what it is and I wouldn't be half the
person I am.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Sat, 24 Jul 2021 16:31:36 -0400 |
| parents | 8f8f4988b856 |
| children | 679ec82798e9 |
comparison
equal
deleted
inserted
replaced
| 6457:dc59051807b6 | 6458:8f1b91756457 |
|---|---|
| 48 from email.mime.base import MIMEBase | 48 from email.mime.base import MIMEBase |
| 49 from email.mime.text import MIMEText | 49 from email.mime.text import MIMEText |
| 50 from email.mime.multipart import MIMEMultipart | 50 from email.mime.multipart import MIMEMultipart |
| 51 import roundup.anypy.email_ | 51 import roundup.anypy.email_ |
| 52 | 52 |
| 53 from roundup.anypy.strings import s2b, b2s, uchr, is_us | 53 from roundup.anypy.strings import s2b, b2s, bs2b, uchr, is_us |
| 54 | 54 |
| 55 def initialiseSecurity(security): | 55 def initialiseSecurity(security): |
| 56 '''Create some Permissions and Roles on the security object | 56 '''Create some Permissions and Roles on the security object |
| 57 | 57 |
| 58 This function is directly invoked by security.Security.__init__() | 58 This function is directly invoked by security.Security.__init__() |
| 334 # A connect or send request failed because the connected party | 334 # A connect or send request failed because the connected party |
| 335 # did not properly respond after a period of time. | 335 # did not properly respond after a period of time. |
| 336 errno.ETIMEDOUT, | 336 errno.ETIMEDOUT, |
| 337 ) | 337 ) |
| 338 | 338 |
| 339 # Cache_Control[key] = Cache-Control header value | |
| 340 # Key can be explicitly file basename - value applied to just that file | |
| 341 # takes precedence over mime type. | |
| 342 # Key can be mime type - all files of that mimetype will get the value | |
| 339 Cache_Control = {} | 343 Cache_Control = {} |
| 340 | 344 |
| 345 # list of valid http compression (Content-Encoding) algorithms | |
| 346 # we have available | |
| 347 compressors = [] | |
| 348 try: | |
| 349 # Only one provided by standard library | |
| 350 import gzip | |
| 351 compressors.append('gzip') | |
| 352 except ImportError: | |
| 353 pass | |
| 354 try: | |
| 355 import brotli | |
| 356 compressors.append('br') | |
| 357 except ImportError: | |
| 358 pass | |
| 359 try: | |
| 360 import zstd | |
| 361 compressors.append('zstd') | |
| 362 except ImportError: | |
| 363 pass | |
| 364 | |
| 365 # mime types of files that are already compressed and should not be | |
| 366 # compressed on the fly. Can be extended/reduced using interfaces.py. | |
| 367 # This excludes types from being compressed. Should we have a list | |
| 368 # of mime types we should compress? write_html() calls compress_encode | |
| 369 # which uses this without a content-type so that's an issue. | |
| 370 # Also for text based data, might have charset too so need to parse | |
| 371 # content-type. | |
| 372 precompressed_mime_types = [ "image/png", "image/jpeg" ] | |
| 373 | |
| 341 def __init__(self, instance, request, env, form=None, translator=None): | 374 def __init__(self, instance, request, env, form=None, translator=None): |
| 342 # re-seed the random number generator. Is this is an instance of | 375 # re-seed the random number generator. Is this is an instance of |
| 343 # random.SystemRandom it has no effect. | 376 # random.SystemRandom it has no effect. |
| 344 random_.seed() | 377 random_.seed() |
| 345 # So we also seed the pseudorandom random source obtained from | 378 # So we also seed the pseudorandom random source obtained from |
| 1742 self.additional_headers['Content-Type'] = mime_type | 1775 self.additional_headers['Content-Type'] = mime_type |
| 1743 self.additional_headers['Last-Modified'] = email.utils.formatdate(lmt) | 1776 self.additional_headers['Last-Modified'] = email.utils.formatdate(lmt) |
| 1744 | 1777 |
| 1745 ims = None | 1778 ims = None |
| 1746 # see if there's an if-modified-since... | 1779 # see if there's an if-modified-since... |
| 1747 # XXX see which interfaces set this | 1780 # used if this is run behind a non-caching http proxy |
| 1748 #if hasattr(self.request, 'headers'): | 1781 if hasattr(self.request, 'headers'): |
| 1749 #ims = self.request.headers.getheader('if-modified-since') | 1782 ims = self.request.headers.get('if-modified-since') |
| 1750 if 'HTTP_IF_MODIFIED_SINCE' in self.env: | 1783 elif 'HTTP_IF_MODIFIED_SINCE' in self.env: |
| 1751 # cgi will put the header in the env var | 1784 # cgi will put the header in the env var |
| 1752 ims = self.env['HTTP_IF_MODIFIED_SINCE'] | 1785 ims = self.env['HTTP_IF_MODIFIED_SINCE'] |
| 1753 if ims: | 1786 if ims: |
| 1754 ims = email.utils.parsedate(ims)[:6] | 1787 ims = email.utils.parsedate(ims)[:6] |
| 1755 lmtt = time.gmtime(lmt)[:6] | 1788 lmtt = time.gmtime(lmt)[:6] |
| 1756 if lmtt <= ims: | 1789 if lmtt <= ims: |
| 1790 if (self.determine_content_encoding()): | |
| 1791 # set vary header as though we were returning 200 | |
| 1792 # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Vary | |
| 1793 self.setVary("Accept-Encoding") | |
| 1757 raise NotModified | 1794 raise NotModified |
| 1758 | 1795 |
| 1759 if filename: | 1796 if filename: |
| 1760 self.write_file(filename) | 1797 self.write_file(filename) |
| 1761 else: | 1798 else: |
| 2021 # connection. There's no way to be certain that this is | 2058 # connection. There's no way to be certain that this is |
| 2022 # the situation that has occurred here, but that is the | 2059 # the situation that has occurred here, but that is the |
| 2023 # most likely case. | 2060 # most likely case. |
| 2024 pass | 2061 pass |
| 2025 | 2062 |
| 2063 def determine_content_encoding(self, list_all=False, precompressed=False): | |
| 2064 | |
| 2065 encoding_list = [] | |
| 2066 | |
| 2067 # FIXME: Should parse for q= values and properly order | |
| 2068 # the request encodings. Also should handle identity coding. | |
| 2069 # Then return first acceptable by q value. | |
| 2070 # This code always uses order: zstd, br, gzip. It will send identity | |
| 2071 # even if identity excluded rather than returning 406. | |
| 2072 accept_encoding = self.request.headers.get('accept-encoding') or [] | |
| 2073 | |
| 2074 if accept_encoding: | |
| 2075 for enc in ['zstd', 'br', 'gzip']: | |
| 2076 if ((enc in self.compressors) or precompressed) and \ | |
| 2077 (enc in accept_encoding): | |
| 2078 if not list_all: | |
| 2079 return enc | |
| 2080 else: | |
| 2081 encoding_list.append(enc) | |
| 2082 | |
| 2083 # Return value must evaluate to false in boolean context if no | |
| 2084 # acceptable encoding is found. If an (non-identity) encoding | |
| 2085 # is found the Vary header will include accept-encoding. | |
| 2086 # What to return if the identity encoding is unacceptable? | |
| 2087 # Maybe raise a 406 from here? | |
| 2088 if not list_all: | |
| 2089 return None | |
| 2090 else: | |
| 2091 return encoding_list | |
| 2092 | |
| 2093 def setVary(self, header): | |
| 2094 '''Vary header will include the new header. This will append | |
| 2095 if Vary exists.''' | |
| 2096 | |
| 2097 if ('Vary' in self.additional_headers): | |
| 2098 self.additional_headers['Vary'] += ", %s"%header | |
| 2099 else: | |
| 2100 self.additional_headers['Vary'] = header | |
| 2101 | |
| 2102 def compress_encode(self, byte_content, quality=4): | |
| 2103 | |
| 2104 if not self.db.config.WEB_DYNAMIC_COMPRESSION: | |
| 2105 # dynamic compression disabled. | |
| 2106 return byte_content | |
| 2107 | |
| 2108 # don't compress small content | |
| 2109 if len(byte_content) < 100: | |
| 2110 return byte_content | |
| 2111 | |
| 2112 # abort if already encoded (e.g. served from | |
| 2113 # precompressed file or cache on disk) | |
| 2114 if ('Content-Encoding' in self.additional_headers): | |
| 2115 return byte_content | |
| 2116 | |
| 2117 # abort if file-type already compressed | |
| 2118 if ('Content-Type' in self.additional_headers) and \ | |
| 2119 (self.additional_headers['Content-Type'] in \ | |
| 2120 self.precompressed_mime_types): | |
| 2121 return byte_content | |
| 2122 | |
| 2123 encoder = None | |
| 2124 # return same content if unable to compress | |
| 2125 new_content = byte_content | |
| 2126 | |
| 2127 | |
| 2128 encoder = self.determine_content_encoding() | |
| 2129 | |
| 2130 if encoder == 'zstd': | |
| 2131 new_content = self.zstd.ZSTD_compress(byte_content, 3) | |
| 2132 elif encoder == 'br': | |
| 2133 # lgblock=0 sets value from quality | |
| 2134 new_content = self.brotli.compress(byte_content, | |
| 2135 quality=quality, | |
| 2136 mode=1, | |
| 2137 lgblock=0) | |
| 2138 elif encoder == 'gzip': | |
| 2139 try: | |
| 2140 new_content = self.gzip.compress(byte_content, compresslevel=5) | |
| 2141 except AttributeError: | |
| 2142 try: | |
| 2143 from StringIO import cStringIO as IOBuff | |
| 2144 except ImportError: | |
| 2145 # python 3 | |
| 2146 # however this code should not be needed under python3 | |
| 2147 # since py3 gzip library has compress() method. | |
| 2148 from io import BytesIO as IOBuff | |
| 2149 | |
| 2150 out = IOBuff() | |
| 2151 # handle under python2 | |
| 2152 f = self.gzip.GzipFile(fileobj=out, mode='w', compresslevel=5) | |
| 2153 f.write(byte_content) | |
| 2154 f.close() | |
| 2155 new_content = out.getvalue() | |
| 2156 | |
| 2157 if encoder: | |
| 2158 # we changed the data, change existing content-length header | |
| 2159 # and add Content-Encoding and Vary header. | |
| 2160 self.additional_headers['Content-Length'] = str(len(new_content)) | |
| 2161 self.additional_headers['Content-Encoding'] = encoder | |
| 2162 self.setVary('Accept-Encoding') | |
| 2163 | |
| 2164 return new_content | |
| 2165 | |
| 2026 def write(self, content): | 2166 def write(self, content): |
| 2167 if not self.headers_done and self.env['REQUEST_METHOD'] != 'HEAD': | |
| 2168 # compress_encode modifies headers, must run before self.header() | |
| 2169 content = self.compress_encode(bs2b(content)) | |
| 2170 | |
| 2027 if not self.headers_done: | 2171 if not self.headers_done: |
| 2028 self.header() | 2172 self.header() |
| 2029 if self.env['REQUEST_METHOD'] != 'HEAD': | 2173 if self.env['REQUEST_METHOD'] != 'HEAD': |
| 2030 self._socket_op(self.request.wfile.write, content) | 2174 self._socket_op(self.request.wfile.write, content) |
| 2031 | 2175 |
| 2032 def write_html(self, content): | 2176 def write_html(self, content): |
| 2033 if not self.headers_done: | |
| 2034 # at this point, we are sure about Content-Type | |
| 2035 if 'Content-Type' not in self.additional_headers: | |
| 2036 self.additional_headers['Content-Type'] = \ | |
| 2037 'text/html; charset=%s' % self.charset | |
| 2038 self.header() | |
| 2039 | |
| 2040 if self.env['REQUEST_METHOD'] == 'HEAD': | |
| 2041 # client doesn't care about content | |
| 2042 return | |
| 2043 | |
| 2044 if sys.version_info[0] > 2: | 2177 if sys.version_info[0] > 2: |
| 2045 # An action setting appropriate headers for a non-HTML | 2178 # An action setting appropriate headers for a non-HTML |
| 2046 # response may return a bytes object directly. | 2179 # response may return a bytes object directly. |
| 2047 if not isinstance(content, bytes): | 2180 if not isinstance(content, bytes): |
| 2048 content = content.encode(self.charset, 'xmlcharrefreplace') | 2181 content = content.encode(self.charset, 'xmlcharrefreplace') |
| 2049 elif self.charset != self.STORAGE_CHARSET: | 2182 elif self.charset != self.STORAGE_CHARSET: |
| 2050 # recode output | 2183 # recode output |
| 2051 content = content.decode(self.STORAGE_CHARSET, 'replace') | 2184 content = content.decode(self.STORAGE_CHARSET, 'replace') |
| 2052 content = content.encode(self.charset, 'xmlcharrefreplace') | 2185 content = content.encode(self.charset, 'xmlcharrefreplace') |
| 2186 | |
| 2187 if self.env['REQUEST_METHOD'] != 'HEAD' and not self.headers_done: | |
| 2188 # compress_encode modifies headers, must run before self.header() | |
| 2189 content = self.compress_encode(bs2b(content)) | |
| 2190 | |
| 2191 if not self.headers_done: | |
| 2192 # at this point, we are sure about Content-Type | |
| 2193 if 'Content-Type' not in self.additional_headers: | |
| 2194 self.additional_headers['Content-Type'] = \ | |
| 2195 'text/html; charset=%s' % self.charset | |
| 2196 self.header() | |
| 2197 | |
| 2198 if self.env['REQUEST_METHOD'] == 'HEAD': | |
| 2199 # client doesn't care about content | |
| 2200 return | |
| 2053 | 2201 |
| 2054 # and write | 2202 # and write |
| 2055 self._socket_op(self.request.wfile.write, content) | 2203 self._socket_op(self.request.wfile.write, content) |
| 2056 | 2204 |
| 2057 def http_strip(self, content): | 2205 def http_strip(self, content): |
| 2221 self.setHeader("Content-Range", | 2369 self.setHeader("Content-Range", |
| 2222 "bytes %d-%d/%d" % (first, last, length)) | 2370 "bytes %d-%d/%d" % (first, last, length)) |
| 2223 return (first, last - first + 1) | 2371 return (first, last - first + 1) |
| 2224 | 2372 |
| 2225 def write_file(self, filename): | 2373 def write_file(self, filename): |
| 2226 """Send the contents of 'filename' to the user.""" | 2374 """Send the contents of 'filename' to the user. |
| 2227 | 2375 Send an acceptable pre-compressed version of the |
| 2228 # Determine the length of the file. | 2376 file if it is newer than the uncompressed version. |
| 2377 """ | |
| 2378 | |
| 2379 # Assume we will return the entire file. | |
| 2380 offset = 0 | |
| 2381 | |
| 2382 # initalize length from uncompressed file | |
| 2229 stat_info = os.stat(filename) | 2383 stat_info = os.stat(filename) |
| 2230 length = stat_info[stat.ST_SIZE] | 2384 length = stat_info[stat.ST_SIZE] |
| 2231 # Assume we will return the entire file. | 2385 |
| 2232 offset = 0 | 2386 # Determine if we are sending a range. If so, compress |
| 2387 # on the fly. Otherwise see if we have a suitable | |
| 2388 # pre-compressed/encoded file we can send. | |
| 2389 if not self.env.get("HTTP_RANGE"): | |
| 2390 # no range, search for file in list ordered | |
| 2391 # from best to worst alternative | |
| 2392 encoding_list = self.determine_content_encoding(list_all=True, | |
| 2393 precompressed=True) | |
| 2394 if encoding_list and self.db.config.WEB_USE_PRECOMPRESSED_FILES: | |
| 2395 # do we need to search through list? If best is not | |
| 2396 # precompressed, on the fly compress with best? | |
| 2397 # by searching list we will respond with precompressed | |
| 2398 # 2nd best or worse. | |
| 2399 for encoder in encoding_list: | |
| 2400 try: | |
| 2401 trial_filename = '%s.%s'%(filename,encoder) | |
| 2402 trial_stat_info = os.stat(trial_filename) | |
| 2403 if stat_info[stat.ST_MTIME] > \ | |
| 2404 trial_stat_info[stat.ST_MTIME]: | |
| 2405 # compressed file is obsolete | |
| 2406 # don't use it | |
| 2407 logger.warning(self._("Cache failure: " | |
| 2408 "compressed file %(compressed)s is " | |
| 2409 "older than its source file " | |
| 2410 "%(filename)s"%{'filename': filename, | |
| 2411 'compressed': trial_filename})) | |
| 2412 | |
| 2413 continue | |
| 2414 filename = trial_filename | |
| 2415 length = trial_stat_info[stat.ST_SIZE] | |
| 2416 self.setHeader('Content-Encoding', encoder) | |
| 2417 self.setVary('Accept-Encoding') | |
| 2418 break | |
| 2419 # except FileNotFoundError: py2/py3 | |
| 2420 # compatible version | |
| 2421 except EnvironmentError as e: | |
| 2422 if e.errno != errno.ENOENT: | |
| 2423 raise | |
| 2424 | |
| 2233 # If the headers have not already been finalized, | 2425 # If the headers have not already been finalized, |
| 2234 if not self.headers_done: | 2426 if not self.headers_done: |
| 2235 # RFC 2616 14.19: ETag | 2427 # RFC 2616 14.19: ETag |
| 2236 # | 2428 # |
| 2237 # Compute the entity tag, in a format similar to that | 2429 # Compute the entity tag, in a format similar to that |
| 2238 # used by Apache. | 2430 # used by Apache. |
| 2431 # | |
| 2432 # Tag does *not* change with Content-Encoding. | |
| 2433 # Header 'Vary: Accept-Encoding' is returned with response. | |
| 2434 # RFC2616 section 13.32 discusses etag and references | |
| 2435 # section 14.44 (Vary header) as being applicable to etag. | |
| 2436 # Hence the intermediate proxy should/must match | |
| 2437 # Accept-Encoding and ETag to determine whether to return | |
| 2438 # a 304 or report cache miss and fetch from origin server. | |
| 2239 etag = '"%x-%x-%x"' % (stat_info[stat.ST_INO], | 2439 etag = '"%x-%x-%x"' % (stat_info[stat.ST_INO], |
| 2240 length, | 2440 length, |
| 2241 stat_info[stat.ST_MTIME]) | 2441 stat_info[stat.ST_MTIME]) |
| 2242 self.setHeader("ETag", etag) | 2442 self.setHeader("ETag", etag) |
| 2243 # RFC 2616 14.5: Accept-Ranges | 2443 # RFC 2616 14.5: Accept-Ranges |
| 2253 offset, length = content_range | 2453 offset, length = content_range |
| 2254 # RFC 2616 14.13: Content-Length | 2454 # RFC 2616 14.13: Content-Length |
| 2255 # | 2455 # |
| 2256 # Tell the client how much data we are providing. | 2456 # Tell the client how much data we are providing. |
| 2257 self.setHeader("Content-Length", str(length)) | 2457 self.setHeader("Content-Length", str(length)) |
| 2258 # Send the HTTP header. | |
| 2259 self.header() | |
| 2260 # If the client doesn't actually want the body, or if we are | 2458 # If the client doesn't actually want the body, or if we are |
| 2261 # indicating an invalid range. | 2459 # indicating an invalid range. |
| 2262 if (self.env['REQUEST_METHOD'] == 'HEAD' | 2460 if (self.env['REQUEST_METHOD'] == 'HEAD' |
| 2263 or self.response_code == http_.client.REQUESTED_RANGE_NOT_SATISFIABLE): | 2461 or self.response_code == http_.client.REQUESTED_RANGE_NOT_SATISFIABLE): |
| 2264 return | 2462 return |
| 2265 # Use the optimized "sendfile" operation, if possible. | 2463 # Use the optimized "sendfile" operation, if possible. |
| 2266 if hasattr(self.request, "sendfile"): | 2464 if hasattr(self.request, "sendfile"): |
| 2465 self.header() | |
| 2267 self._socket_op(self.request.sendfile, filename, offset, length) | 2466 self._socket_op(self.request.sendfile, filename, offset, length) |
| 2268 return | 2467 return |
| 2269 # Fallback to the "write" operation. | 2468 # Fallback to the "write" operation. |
| 2270 f = open(filename, 'rb') | 2469 f = open(filename, 'rb') |
| 2271 try: | 2470 try: |
