comparison roundup/cgi/client.py @ 6458:8f1b91756457

issue2551147 - Enable compression of http responses in roundup. gzip, (brotli/zstd with optional packages) on the fly compression/content-encoding enabled by default. Can serve pre-compressed static assets as well if the client can accept it. Docs updated. Also added example nginx config to installation.txt. The config allows nginx to compress data on the fly. If the config is used, dynamic compression in roundup can be disabled. Dedicating this checkin to my father Paul Hector Rouillard 1930-2021. I did much of the development in this changeset while sitting with him as he slept/transitioned. Without his encouragement and example, my desire to learn would not be what it is and I wouldn't be half the person I am.
author John Rouillard <rouilj@ieee.org>
date Sat, 24 Jul 2021 16:31:36 -0400
parents 8f8f4988b856
children 679ec82798e9
comparison
equal deleted inserted replaced
6457:dc59051807b6 6458:8f1b91756457
48 from email.mime.base import MIMEBase 48 from email.mime.base import MIMEBase
49 from email.mime.text import MIMEText 49 from email.mime.text import MIMEText
50 from email.mime.multipart import MIMEMultipart 50 from email.mime.multipart import MIMEMultipart
51 import roundup.anypy.email_ 51 import roundup.anypy.email_
52 52
53 from roundup.anypy.strings import s2b, b2s, uchr, is_us 53 from roundup.anypy.strings import s2b, b2s, bs2b, uchr, is_us
54 54
55 def initialiseSecurity(security): 55 def initialiseSecurity(security):
56 '''Create some Permissions and Roles on the security object 56 '''Create some Permissions and Roles on the security object
57 57
58 This function is directly invoked by security.Security.__init__() 58 This function is directly invoked by security.Security.__init__()
334 # A connect or send request failed because the connected party 334 # A connect or send request failed because the connected party
335 # did not properly respond after a period of time. 335 # did not properly respond after a period of time.
336 errno.ETIMEDOUT, 336 errno.ETIMEDOUT,
337 ) 337 )
338 338
339 # Cache_Control[key] = Cache-Control header value
340 # Key can be explicitly file basename - value applied to just that file
341 # takes precedence over mime type.
342 # Key can be mime type - all files of that mimetype will get the value
339 Cache_Control = {} 343 Cache_Control = {}
340 344
345 # list of valid http compression (Content-Encoding) algorithms
346 # we have available
347 compressors = []
348 try:
349 # Only one provided by standard library
350 import gzip
351 compressors.append('gzip')
352 except ImportError:
353 pass
354 try:
355 import brotli
356 compressors.append('br')
357 except ImportError:
358 pass
359 try:
360 import zstd
361 compressors.append('zstd')
362 except ImportError:
363 pass
364
365 # mime types of files that are already compressed and should not be
366 # compressed on the fly. Can be extended/reduced using interfaces.py.
367 # This excludes types from being compressed. Should we have a list
368 # of mime types we should compress? write_html() calls compress_encode
369 # which uses this without a content-type so that's an issue.
370 # Also for text based data, might have charset too so need to parse
371 # content-type.
372 precompressed_mime_types = [ "image/png", "image/jpeg" ]
373
341 def __init__(self, instance, request, env, form=None, translator=None): 374 def __init__(self, instance, request, env, form=None, translator=None):
342 # re-seed the random number generator. Is this is an instance of 375 # re-seed the random number generator. Is this is an instance of
343 # random.SystemRandom it has no effect. 376 # random.SystemRandom it has no effect.
344 random_.seed() 377 random_.seed()
345 # So we also seed the pseudorandom random source obtained from 378 # So we also seed the pseudorandom random source obtained from
1742 self.additional_headers['Content-Type'] = mime_type 1775 self.additional_headers['Content-Type'] = mime_type
1743 self.additional_headers['Last-Modified'] = email.utils.formatdate(lmt) 1776 self.additional_headers['Last-Modified'] = email.utils.formatdate(lmt)
1744 1777
1745 ims = None 1778 ims = None
1746 # see if there's an if-modified-since... 1779 # see if there's an if-modified-since...
1747 # XXX see which interfaces set this 1780 # used if this is run behind a non-caching http proxy
1748 #if hasattr(self.request, 'headers'): 1781 if hasattr(self.request, 'headers'):
1749 #ims = self.request.headers.getheader('if-modified-since') 1782 ims = self.request.headers.get('if-modified-since')
1750 if 'HTTP_IF_MODIFIED_SINCE' in self.env: 1783 elif 'HTTP_IF_MODIFIED_SINCE' in self.env:
1751 # cgi will put the header in the env var 1784 # cgi will put the header in the env var
1752 ims = self.env['HTTP_IF_MODIFIED_SINCE'] 1785 ims = self.env['HTTP_IF_MODIFIED_SINCE']
1753 if ims: 1786 if ims:
1754 ims = email.utils.parsedate(ims)[:6] 1787 ims = email.utils.parsedate(ims)[:6]
1755 lmtt = time.gmtime(lmt)[:6] 1788 lmtt = time.gmtime(lmt)[:6]
1756 if lmtt <= ims: 1789 if lmtt <= ims:
1790 if (self.determine_content_encoding()):
1791 # set vary header as though we were returning 200
1792 # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Vary
1793 self.setVary("Accept-Encoding")
1757 raise NotModified 1794 raise NotModified
1758 1795
1759 if filename: 1796 if filename:
1760 self.write_file(filename) 1797 self.write_file(filename)
1761 else: 1798 else:
2021 # connection. There's no way to be certain that this is 2058 # connection. There's no way to be certain that this is
2022 # the situation that has occurred here, but that is the 2059 # the situation that has occurred here, but that is the
2023 # most likely case. 2060 # most likely case.
2024 pass 2061 pass
2025 2062
2063 def determine_content_encoding(self, list_all=False, precompressed=False):
2064
2065 encoding_list = []
2066
2067 # FIXME: Should parse for q= values and properly order
2068 # the request encodings. Also should handle identity coding.
2069 # Then return first acceptable by q value.
2070 # This code always uses order: zstd, br, gzip. It will send identity
2071 # even if identity excluded rather than returning 406.
2072 accept_encoding = self.request.headers.get('accept-encoding') or []
2073
2074 if accept_encoding:
2075 for enc in ['zstd', 'br', 'gzip']:
2076 if ((enc in self.compressors) or precompressed) and \
2077 (enc in accept_encoding):
2078 if not list_all:
2079 return enc
2080 else:
2081 encoding_list.append(enc)
2082
2083 # Return value must evaluate to false in boolean context if no
2084 # acceptable encoding is found. If an (non-identity) encoding
2085 # is found the Vary header will include accept-encoding.
2086 # What to return if the identity encoding is unacceptable?
2087 # Maybe raise a 406 from here?
2088 if not list_all:
2089 return None
2090 else:
2091 return encoding_list
2092
2093 def setVary(self, header):
2094 '''Vary header will include the new header. This will append
2095 if Vary exists.'''
2096
2097 if ('Vary' in self.additional_headers):
2098 self.additional_headers['Vary'] += ", %s"%header
2099 else:
2100 self.additional_headers['Vary'] = header
2101
2102 def compress_encode(self, byte_content, quality=4):
2103
2104 if not self.db.config.WEB_DYNAMIC_COMPRESSION:
2105 # dynamic compression disabled.
2106 return byte_content
2107
2108 # don't compress small content
2109 if len(byte_content) < 100:
2110 return byte_content
2111
2112 # abort if already encoded (e.g. served from
2113 # precompressed file or cache on disk)
2114 if ('Content-Encoding' in self.additional_headers):
2115 return byte_content
2116
2117 # abort if file-type already compressed
2118 if ('Content-Type' in self.additional_headers) and \
2119 (self.additional_headers['Content-Type'] in \
2120 self.precompressed_mime_types):
2121 return byte_content
2122
2123 encoder = None
2124 # return same content if unable to compress
2125 new_content = byte_content
2126
2127
2128 encoder = self.determine_content_encoding()
2129
2130 if encoder == 'zstd':
2131 new_content = self.zstd.ZSTD_compress(byte_content, 3)
2132 elif encoder == 'br':
2133 # lgblock=0 sets value from quality
2134 new_content = self.brotli.compress(byte_content,
2135 quality=quality,
2136 mode=1,
2137 lgblock=0)
2138 elif encoder == 'gzip':
2139 try:
2140 new_content = self.gzip.compress(byte_content, compresslevel=5)
2141 except AttributeError:
2142 try:
2143 from StringIO import cStringIO as IOBuff
2144 except ImportError:
2145 # python 3
2146 # however this code should not be needed under python3
2147 # since py3 gzip library has compress() method.
2148 from io import BytesIO as IOBuff
2149
2150 out = IOBuff()
2151 # handle under python2
2152 f = self.gzip.GzipFile(fileobj=out, mode='w', compresslevel=5)
2153 f.write(byte_content)
2154 f.close()
2155 new_content = out.getvalue()
2156
2157 if encoder:
2158 # we changed the data, change existing content-length header
2159 # and add Content-Encoding and Vary header.
2160 self.additional_headers['Content-Length'] = str(len(new_content))
2161 self.additional_headers['Content-Encoding'] = encoder
2162 self.setVary('Accept-Encoding')
2163
2164 return new_content
2165
2026 def write(self, content): 2166 def write(self, content):
2167 if not self.headers_done and self.env['REQUEST_METHOD'] != 'HEAD':
2168 # compress_encode modifies headers, must run before self.header()
2169 content = self.compress_encode(bs2b(content))
2170
2027 if not self.headers_done: 2171 if not self.headers_done:
2028 self.header() 2172 self.header()
2029 if self.env['REQUEST_METHOD'] != 'HEAD': 2173 if self.env['REQUEST_METHOD'] != 'HEAD':
2030 self._socket_op(self.request.wfile.write, content) 2174 self._socket_op(self.request.wfile.write, content)
2031 2175
2032 def write_html(self, content): 2176 def write_html(self, content):
2033 if not self.headers_done:
2034 # at this point, we are sure about Content-Type
2035 if 'Content-Type' not in self.additional_headers:
2036 self.additional_headers['Content-Type'] = \
2037 'text/html; charset=%s' % self.charset
2038 self.header()
2039
2040 if self.env['REQUEST_METHOD'] == 'HEAD':
2041 # client doesn't care about content
2042 return
2043
2044 if sys.version_info[0] > 2: 2177 if sys.version_info[0] > 2:
2045 # An action setting appropriate headers for a non-HTML 2178 # An action setting appropriate headers for a non-HTML
2046 # response may return a bytes object directly. 2179 # response may return a bytes object directly.
2047 if not isinstance(content, bytes): 2180 if not isinstance(content, bytes):
2048 content = content.encode(self.charset, 'xmlcharrefreplace') 2181 content = content.encode(self.charset, 'xmlcharrefreplace')
2049 elif self.charset != self.STORAGE_CHARSET: 2182 elif self.charset != self.STORAGE_CHARSET:
2050 # recode output 2183 # recode output
2051 content = content.decode(self.STORAGE_CHARSET, 'replace') 2184 content = content.decode(self.STORAGE_CHARSET, 'replace')
2052 content = content.encode(self.charset, 'xmlcharrefreplace') 2185 content = content.encode(self.charset, 'xmlcharrefreplace')
2186
2187 if self.env['REQUEST_METHOD'] != 'HEAD' and not self.headers_done:
2188 # compress_encode modifies headers, must run before self.header()
2189 content = self.compress_encode(bs2b(content))
2190
2191 if not self.headers_done:
2192 # at this point, we are sure about Content-Type
2193 if 'Content-Type' not in self.additional_headers:
2194 self.additional_headers['Content-Type'] = \
2195 'text/html; charset=%s' % self.charset
2196 self.header()
2197
2198 if self.env['REQUEST_METHOD'] == 'HEAD':
2199 # client doesn't care about content
2200 return
2053 2201
2054 # and write 2202 # and write
2055 self._socket_op(self.request.wfile.write, content) 2203 self._socket_op(self.request.wfile.write, content)
2056 2204
2057 def http_strip(self, content): 2205 def http_strip(self, content):
2221 self.setHeader("Content-Range", 2369 self.setHeader("Content-Range",
2222 "bytes %d-%d/%d" % (first, last, length)) 2370 "bytes %d-%d/%d" % (first, last, length))
2223 return (first, last - first + 1) 2371 return (first, last - first + 1)
2224 2372
2225 def write_file(self, filename): 2373 def write_file(self, filename):
2226 """Send the contents of 'filename' to the user.""" 2374 """Send the contents of 'filename' to the user.
2227 2375 Send an acceptable pre-compressed version of the
2228 # Determine the length of the file. 2376 file if it is newer than the uncompressed version.
2377 """
2378
2379 # Assume we will return the entire file.
2380 offset = 0
2381
2382 # initalize length from uncompressed file
2229 stat_info = os.stat(filename) 2383 stat_info = os.stat(filename)
2230 length = stat_info[stat.ST_SIZE] 2384 length = stat_info[stat.ST_SIZE]
2231 # Assume we will return the entire file. 2385
2232 offset = 0 2386 # Determine if we are sending a range. If so, compress
2387 # on the fly. Otherwise see if we have a suitable
2388 # pre-compressed/encoded file we can send.
2389 if not self.env.get("HTTP_RANGE"):
2390 # no range, search for file in list ordered
2391 # from best to worst alternative
2392 encoding_list = self.determine_content_encoding(list_all=True,
2393 precompressed=True)
2394 if encoding_list and self.db.config.WEB_USE_PRECOMPRESSED_FILES:
2395 # do we need to search through list? If best is not
2396 # precompressed, on the fly compress with best?
2397 # by searching list we will respond with precompressed
2398 # 2nd best or worse.
2399 for encoder in encoding_list:
2400 try:
2401 trial_filename = '%s.%s'%(filename,encoder)
2402 trial_stat_info = os.stat(trial_filename)
2403 if stat_info[stat.ST_MTIME] > \
2404 trial_stat_info[stat.ST_MTIME]:
2405 # compressed file is obsolete
2406 # don't use it
2407 logger.warning(self._("Cache failure: "
2408 "compressed file %(compressed)s is "
2409 "older than its source file "
2410 "%(filename)s"%{'filename': filename,
2411 'compressed': trial_filename}))
2412
2413 continue
2414 filename = trial_filename
2415 length = trial_stat_info[stat.ST_SIZE]
2416 self.setHeader('Content-Encoding', encoder)
2417 self.setVary('Accept-Encoding')
2418 break
2419 # except FileNotFoundError: py2/py3
2420 # compatible version
2421 except EnvironmentError as e:
2422 if e.errno != errno.ENOENT:
2423 raise
2424
2233 # If the headers have not already been finalized, 2425 # If the headers have not already been finalized,
2234 if not self.headers_done: 2426 if not self.headers_done:
2235 # RFC 2616 14.19: ETag 2427 # RFC 2616 14.19: ETag
2236 # 2428 #
2237 # Compute the entity tag, in a format similar to that 2429 # Compute the entity tag, in a format similar to that
2238 # used by Apache. 2430 # used by Apache.
2431 #
2432 # Tag does *not* change with Content-Encoding.
2433 # Header 'Vary: Accept-Encoding' is returned with response.
2434 # RFC2616 section 13.32 discusses etag and references
2435 # section 14.44 (Vary header) as being applicable to etag.
2436 # Hence the intermediate proxy should/must match
2437 # Accept-Encoding and ETag to determine whether to return
2438 # a 304 or report cache miss and fetch from origin server.
2239 etag = '"%x-%x-%x"' % (stat_info[stat.ST_INO], 2439 etag = '"%x-%x-%x"' % (stat_info[stat.ST_INO],
2240 length, 2440 length,
2241 stat_info[stat.ST_MTIME]) 2441 stat_info[stat.ST_MTIME])
2242 self.setHeader("ETag", etag) 2442 self.setHeader("ETag", etag)
2243 # RFC 2616 14.5: Accept-Ranges 2443 # RFC 2616 14.5: Accept-Ranges
2253 offset, length = content_range 2453 offset, length = content_range
2254 # RFC 2616 14.13: Content-Length 2454 # RFC 2616 14.13: Content-Length
2255 # 2455 #
2256 # Tell the client how much data we are providing. 2456 # Tell the client how much data we are providing.
2257 self.setHeader("Content-Length", str(length)) 2457 self.setHeader("Content-Length", str(length))
2258 # Send the HTTP header.
2259 self.header()
2260 # If the client doesn't actually want the body, or if we are 2458 # If the client doesn't actually want the body, or if we are
2261 # indicating an invalid range. 2459 # indicating an invalid range.
2262 if (self.env['REQUEST_METHOD'] == 'HEAD' 2460 if (self.env['REQUEST_METHOD'] == 'HEAD'
2263 or self.response_code == http_.client.REQUESTED_RANGE_NOT_SATISFIABLE): 2461 or self.response_code == http_.client.REQUESTED_RANGE_NOT_SATISFIABLE):
2264 return 2462 return
2265 # Use the optimized "sendfile" operation, if possible. 2463 # Use the optimized "sendfile" operation, if possible.
2266 if hasattr(self.request, "sendfile"): 2464 if hasattr(self.request, "sendfile"):
2465 self.header()
2267 self._socket_op(self.request.sendfile, filename, offset, length) 2466 self._socket_op(self.request.sendfile, filename, offset, length)
2268 return 2467 return
2269 # Fallback to the "write" operation. 2468 # Fallback to the "write" operation.
2270 f = open(filename, 'rb') 2469 f = open(filename, 'rb')
2271 try: 2470 try:

Roundup Issue Tracker: http://roundup-tracker.org/