Mercurial > p > roundup > code
comparison roundup/cgi/client.py @ 4064:662cd78df973
Add support for resuming (file) downloads.
| author | Stefan Seefeld <stefan@seefeld.name> |
|---|---|
| date | Sun, 22 Feb 2009 01:41:19 +0000 |
| parents | e70643990e9c |
| children | 1e28d58c6d1c |
comparison
equal
deleted
inserted
replaced
| 4063:625915ce35b8 | 4064:662cd78df973 |
|---|---|
| 1 """WWW request handler (also used in the stand-alone server). | 1 """WWW request handler (also used in the stand-alone server). |
| 2 """ | 2 """ |
| 3 __docformat__ = 'restructuredtext' | 3 __docformat__ = 'restructuredtext' |
| 4 | 4 |
| 5 import base64, binascii, cgi, codecs, mimetypes, os | 5 import base64, binascii, cgi, codecs, httplib, mimetypes, os |
| 6 import quopri, random, re, rfc822, stat, sys, time, urllib, urlparse | 6 import quopri, random, re, rfc822, stat, sys, time, urllib, urlparse |
| 7 import Cookie, socket, errno | 7 import Cookie, socket, errno |
| 8 from Cookie import CookieError, BaseCookie, SimpleCookie | 8 from Cookie import CookieError, BaseCookie, SimpleCookie |
| 9 from cStringIO import StringIO | 9 from cStringIO import StringIO |
| 10 | 10 |
| 461 # send the 304 response | 461 # send the 304 response |
| 462 self.response_code = 304 | 462 self.response_code = 304 |
| 463 self.header() | 463 self.header() |
| 464 except Unauthorised, message: | 464 except Unauthorised, message: |
| 465 # users may always see the front page | 465 # users may always see the front page |
| 466 self.response_code = 403 | |
| 466 self.classname = self.nodeid = None | 467 self.classname = self.nodeid = None |
| 467 self.template = '' | 468 self.template = '' |
| 468 self.error_message.append(message) | 469 self.error_message.append(message) |
| 469 self.write_html(self.renderContext()) | 470 self.write_html(self.renderContext()) |
| 470 except NotFound, e: | 471 except NotFound, e: |
| 630 try: | 631 try: |
| 631 login = self.get_action_class('login')(self) | 632 login = self.get_action_class('login')(self) |
| 632 login.verifyLogin(username, password) | 633 login.verifyLogin(username, password) |
| 633 except LoginError, err: | 634 except LoginError, err: |
| 634 self.make_user_anonymous() | 635 self.make_user_anonymous() |
| 635 self.response_code = 403 | |
| 636 raise Unauthorised, err | 636 raise Unauthorised, err |
| 637 | |
| 638 user = username | 637 user = username |
| 639 | 638 |
| 640 # if user was not set by http authorization, try session lookup | 639 # if user was not set by http authorization, try session lookup |
| 641 if not user: | 640 if not user: |
| 642 user = self.session_api.get('user') | 641 user = self.session_api.get('user') |
| 877 | 876 |
| 878 def _serve_file(self, lmt, mime_type, content=None, filename=None): | 877 def _serve_file(self, lmt, mime_type, content=None, filename=None): |
| 879 ''' guts of serve_file() and serve_static_file() | 878 ''' guts of serve_file() and serve_static_file() |
| 880 ''' | 879 ''' |
| 881 | 880 |
| 882 if not content: | |
| 883 length = os.stat(filename)[stat.ST_SIZE] | |
| 884 else: | |
| 885 length = len(content) | |
| 886 | |
| 887 # spit out headers | 881 # spit out headers |
| 888 self.additional_headers['Content-Type'] = mime_type | 882 self.additional_headers['Content-Type'] = mime_type |
| 889 self.additional_headers['Content-Length'] = str(length) | |
| 890 self.additional_headers['Last-Modified'] = rfc822.formatdate(lmt) | 883 self.additional_headers['Last-Modified'] = rfc822.formatdate(lmt) |
| 891 | 884 |
| 892 ims = None | 885 ims = None |
| 893 # see if there's an if-modified-since... | 886 # see if there's an if-modified-since... |
| 894 # XXX see which interfaces set this | 887 # XXX see which interfaces set this |
| 901 ims = rfc822.parsedate(ims)[:6] | 894 ims = rfc822.parsedate(ims)[:6] |
| 902 lmtt = time.gmtime(lmt)[:6] | 895 lmtt = time.gmtime(lmt)[:6] |
| 903 if lmtt <= ims: | 896 if lmtt <= ims: |
| 904 raise NotModified | 897 raise NotModified |
| 905 | 898 |
| 906 if not self.headers_done: | |
| 907 self.header() | |
| 908 | |
| 909 if self.env['REQUEST_METHOD'] == 'HEAD': | |
| 910 return | |
| 911 | |
| 912 # If we have a file, and the 'sendfile' method is available, | |
| 913 # we can bypass reading and writing the content into application | |
| 914 # memory entirely. | |
| 915 if filename: | 899 if filename: |
| 916 if hasattr(self.request, 'sendfile'): | 900 self.write_file(filename) |
| 917 self._socket_op(self.request.sendfile, filename) | 901 else: |
| 918 return | 902 self.additional_headers['Content-Length'] = str(len(content)) |
| 919 f = open(filename, 'rb') | 903 self.write(content) |
| 920 try: | |
| 921 content = f.read() | |
| 922 finally: | |
| 923 f.close() | |
| 924 | |
| 925 self._socket_op(self.request.wfile.write, content) | |
| 926 | |
| 927 | 904 |
| 928 def renderContext(self): | 905 def renderContext(self): |
| 929 ''' Return a PageTemplate for the named page | 906 ''' Return a PageTemplate for the named page |
| 930 ''' | 907 ''' |
| 931 name = self.classname | 908 name = self.classname |
| 1069 err_errno = err[0] | 1046 err_errno = err[0] |
| 1070 except TypeError: | 1047 except TypeError: |
| 1071 pass | 1048 pass |
| 1072 if err_errno not in self.IGNORE_NET_ERRORS: | 1049 if err_errno not in self.IGNORE_NET_ERRORS: |
| 1073 raise | 1050 raise |
| 1051 except IOError: | |
| 1052 # Apache's mod_python will raise IOError -- without an | |
| 1053 # accompanying errno -- when a write to the client fails. | |
| 1054 # A common case is that the client has closed the | |
| 1055 # connection. There's no way to be certain that this is | |
| 1056 # the situation that has occurred here, but that is the | |
| 1057 # most likely case. | |
| 1058 pass | |
| 1074 | 1059 |
| 1075 def write(self, content): | 1060 def write(self, content): |
| 1076 if not self.headers_done: | 1061 if not self.headers_done: |
| 1077 self.header() | 1062 self.header() |
| 1078 if self.env['REQUEST_METHOD'] != 'HEAD': | 1063 if self.env['REQUEST_METHOD'] != 'HEAD': |
| 1096 content = content.encode(self.charset, 'xmlcharrefreplace') | 1081 content = content.encode(self.charset, 'xmlcharrefreplace') |
| 1097 | 1082 |
| 1098 # and write | 1083 # and write |
| 1099 self._socket_op(self.request.wfile.write, content) | 1084 self._socket_op(self.request.wfile.write, content) |
| 1100 | 1085 |
| 1086 def http_strip(self, content): | |
| 1087 """Remove HTTP Linear White Space from 'content'. | |
| 1088 | |
| 1089 'content' -- A string. | |
| 1090 | |
| 1091 returns -- 'content', with all leading and trailing LWS | |
| 1092 removed.""" | |
| 1093 | |
| 1094 # RFC 2616 2.2: Basic Rules | |
| 1095 # | |
| 1096 # LWS = [CRLF] 1*( SP | HT ) | |
| 1097 return content.strip(" \r\n\t") | |
| 1098 | |
| 1099 def http_split(self, content): | |
| 1100 """Split an HTTP list. | |
| 1101 | |
| 1102 'content' -- A string, giving a list of items. | |
| 1103 | |
| 1104 returns -- A sequence of strings, containing the elements of | |
| 1105 the list.""" | |
| 1106 | |
| 1107 # RFC 2616 2.1: Augmented BNF | |
| 1108 # | |
| 1109 # Grammar productions of the form "#rule" indicate a | |
| 1110 # comma-separated list of elements matching "rule". LWS | |
| 1111 # is then removed from each element, and empty elements | |
| 1112 # removed. | |
| 1113 | |
| 1114 # Split at commas. | |
| 1115 elements = content.split(",") | |
| 1116 # Remove linear whitespace at either end of the string. | |
| 1117 elements = [self.http_strip(e) for e in elements] | |
| 1118 # Remove any now-empty elements. | |
| 1119 return [e for e in elements if e] | |
| 1120 | |
| 1121 def handle_range_header(self, length, etag): | |
| 1122 """Handle the 'Range' and 'If-Range' headers. | |
| 1123 | |
| 1124 'length' -- the length of the content available for the | |
| 1125 resource. | |
| 1126 | |
| 1127 'etag' -- the entity tag for this resources. | |
| 1128 | |
| 1129 returns -- If the request headers (including 'Range' and | |
| 1130 'If-Range') indicate that only a portion of the entity should | |
| 1131 be returned, then the return value is a pair '(offfset, | |
| 1132 length)' indicating the first byte and number of bytes of the | |
| 1133 content that should be returned to the client. In addition, | |
| 1134 this method will set 'self.response_code' to indicate Partial | |
| 1135 Content. In all other cases, the return value is 'None'. If | |
| 1136 appropriate, 'self.response_code' will be | |
| 1137 set to indicate 'REQUESTED_RANGE_NOT_SATISFIABLE'. In that | |
| 1138 case, the caller should not send any data to the client.""" | |
| 1139 | |
| 1140 # RFC 2616 14.35: Range | |
| 1141 # | |
| 1142 # See if the Range header is present. | |
| 1143 ranges_specifier = self.env.get("HTTP_RANGE") | |
| 1144 if ranges_specifier is None: | |
| 1145 return None | |
| 1146 # RFC 2616 14.27: If-Range | |
| 1147 # | |
| 1148 # Check to see if there is an If-Range header. | |
| 1149 # Because the specification says: | |
| 1150 # | |
| 1151 # The If-Range header ... MUST be ignored if the request | |
| 1152 # does not include a Range header, we check for If-Range | |
| 1153 # after checking for Range. | |
| 1154 if_range = self.env.get("HTTP_IF_RANGE") | |
| 1155 if if_range: | |
| 1156 # The grammar for the If-Range header is: | |
| 1157 # | |
| 1158 # If-Range = "If-Range" ":" ( entity-tag | HTTP-date ) | |
| 1159 # entity-tag = [ weak ] opaque-tag | |
| 1160 # weak = "W/" | |
| 1161 # opaque-tag = quoted-string | |
| 1162 # | |
| 1163 # We only support strong entity tags. | |
| 1164 if_range = self.http_strip(if_range) | |
| 1165 if (not if_range.startswith('"') | |
| 1166 or not if_range.endswith('"')): | |
| 1167 return None | |
| 1168 # If the condition doesn't match the entity tag, then we | |
| 1169 # must send the client the entire file. | |
| 1170 if if_range != etag: | |
| 1171 return | |
| 1172 # The grammar for the Range header value is: | |
| 1173 # | |
| 1174 # ranges-specifier = byte-ranges-specifier | |
| 1175 # byte-ranges-specifier = bytes-unit "=" byte-range-set | |
| 1176 # byte-range-set = 1#( byte-range-spec | suffix-byte-range-spec ) | |
| 1177 # byte-range-spec = first-byte-pos "-" [last-byte-pos] | |
| 1178 # first-byte-pos = 1*DIGIT | |
| 1179 # last-byte-pos = 1*DIGIT | |
| 1180 # suffix-byte-range-spec = "-" suffix-length | |
| 1181 # suffix-length = 1*DIGIT | |
| 1182 # | |
| 1183 # Look for the "=" separating the units from the range set. | |
| 1184 specs = ranges_specifier.split("=", 1) | |
| 1185 if len(specs) != 2: | |
| 1186 return None | |
| 1187 # Check that the bytes-unit is in fact "bytes". If it is not, | |
| 1188 # we do not know how to process this range. | |
| 1189 bytes_unit = self.http_strip(specs[0]) | |
| 1190 if bytes_unit != "bytes": | |
| 1191 return None | |
| 1192 # Seperate the range-set into range-specs. | |
| 1193 byte_range_set = self.http_strip(specs[1]) | |
| 1194 byte_range_specs = self.http_split(byte_range_set) | |
| 1195 # We only handle exactly one range at this time. | |
| 1196 if len(byte_range_specs) != 1: | |
| 1197 return None | |
| 1198 # Parse the spec. | |
| 1199 byte_range_spec = byte_range_specs[0] | |
| 1200 pos = byte_range_spec.split("-", 1) | |
| 1201 if len(pos) != 2: | |
| 1202 return None | |
| 1203 # Get the first and last bytes. | |
| 1204 first = self.http_strip(pos[0]) | |
| 1205 last = self.http_strip(pos[1]) | |
| 1206 # We do not handle suffix ranges. | |
| 1207 if not first: | |
| 1208 return None | |
| 1209 # Convert the first and last positions to integers. | |
| 1210 try: | |
| 1211 first = int(first) | |
| 1212 if last: | |
| 1213 last = int(last) | |
| 1214 else: | |
| 1215 last = length - 1 | |
| 1216 except: | |
| 1217 # The positions could not be parsed as integers. | |
| 1218 return None | |
| 1219 # Check that the range makes sense. | |
| 1220 if (first < 0 or last < 0 or last < first): | |
| 1221 return None | |
| 1222 if last >= length: | |
| 1223 # RFC 2616 10.4.17: 416 Requested Range Not Satisfiable | |
| 1224 # | |
| 1225 # If there is an If-Range header, RFC 2616 says that we | |
| 1226 # should just ignore the invalid Range header. | |
| 1227 if if_range: | |
| 1228 return None | |
| 1229 # Return code 416 with a Content-Range header giving the | |
| 1230 # allowable range. | |
| 1231 self.response_code = httplib.REQUESTED_RANGE_NOT_SATISFIABLE | |
| 1232 self.setHeader("Content-Range", "bytes */%d" % length) | |
| 1233 return None | |
| 1234 # RFC 2616 10.2.7: 206 Partial Content | |
| 1235 # | |
| 1236 # Tell the client that we are honoring the Range request by | |
| 1237 # indicating that we are providing partial content. | |
| 1238 self.response_code = httplib.PARTIAL_CONTENT | |
| 1239 # RFC 2616 14.16: Content-Range | |
| 1240 # | |
| 1241 # Tell the client what data we are providing. | |
| 1242 # | |
| 1243 # content-range-spec = byte-content-range-spec | |
| 1244 # byte-content-range-spec = bytes-unit SP | |
| 1245 # byte-range-resp-spec "/" | |
| 1246 # ( instance-length | "*" ) | |
| 1247 # byte-range-resp-spec = (first-byte-pos "-" last-byte-pos) | |
| 1248 # | "*" | |
| 1249 # instance-length = 1 * DIGIT | |
| 1250 self.setHeader("Content-Range", | |
| 1251 "bytes %d-%d/%d" % (first, last, length)) | |
| 1252 return (first, last - first + 1) | |
| 1253 | |
| 1254 def write_file(self, filename): | |
| 1255 '''Send the contents of 'filename' to the user.''' | |
| 1256 | |
| 1257 # Determine the length of the file. | |
| 1258 stat_info = os.stat(filename) | |
| 1259 length = stat_info[stat.ST_SIZE] | |
| 1260 # Assume we will return the entire file. | |
| 1261 offset = 0 | |
| 1262 # If the headers have not already been finalized, | |
| 1263 if not self.headers_done: | |
| 1264 # RFC 2616 14.19: ETag | |
| 1265 # | |
| 1266 # Compute the entity tag, in a format similar to that | |
| 1267 # used by Apache. | |
| 1268 etag = '"%x-%x-%x"' % (stat_info[stat.ST_INO], | |
| 1269 length, | |
| 1270 stat_info[stat.ST_MTIME]) | |
| 1271 self.setHeader("ETag", etag) | |
| 1272 # RFC 2616 14.5: Accept-Ranges | |
| 1273 # | |
| 1274 # Let the client know that we will accept range requests. | |
| 1275 self.setHeader("Accept-Ranges", "bytes") | |
| 1276 # RFC 2616 14.35: Range | |
| 1277 # | |
| 1278 # If there is a Range header, we may be able to avoid | |
| 1279 # sending the entire file. | |
| 1280 content_range = self.handle_range_header(length, etag) | |
| 1281 if content_range: | |
| 1282 offset, length = content_range | |
| 1283 # RFC 2616 14.13: Content-Length | |
| 1284 # | |
| 1285 # Tell the client how much data we are providing. | |
| 1286 self.setHeader("Content-Length", length) | |
| 1287 # Send the HTTP header. | |
| 1288 self.header() | |
| 1289 # If the client doesn't actually want the body, or if we are | |
| 1290 # indicating an invalid range. | |
| 1291 if (self.env['REQUEST_METHOD'] == 'HEAD' | |
| 1292 or self.response_code == httplib.REQUESTED_RANGE_NOT_SATISFIABLE): | |
| 1293 return | |
| 1294 # Use the optimized "sendfile" operation, if possible. | |
| 1295 if hasattr(self.request, "sendfile"): | |
| 1296 self._socket_op(self.request.sendfile, filename, offset, length) | |
| 1297 return | |
| 1298 # Fallback to the "write" operation. | |
| 1299 f = open(filename, 'rb') | |
| 1300 try: | |
| 1301 if offset: | |
| 1302 f.seek(offset) | |
| 1303 content = f.read(length) | |
| 1304 finally: | |
| 1305 f.close() | |
| 1306 self.write(content) | |
| 1101 | 1307 |
| 1102 def setHeader(self, header, value): | 1308 def setHeader(self, header, value): |
| 1103 '''Override a header to be returned to the user's browser. | 1309 '''Override a header to be returned to the user's browser. |
| 1104 ''' | 1310 ''' |
| 1105 self.additional_headers[header] = value | 1311 self.additional_headers[header] = value |
