comparison roundup/cgi/client.py @ 4064:662cd78df973

Add support for resuming (file) downloads.
author Stefan Seefeld <stefan@seefeld.name>
date Sun, 22 Feb 2009 01:41:19 +0000
parents e70643990e9c
children 1e28d58c6d1c
comparison
equal deleted inserted replaced
4063:625915ce35b8 4064:662cd78df973
1 """WWW request handler (also used in the stand-alone server). 1 """WWW request handler (also used in the stand-alone server).
2 """ 2 """
3 __docformat__ = 'restructuredtext' 3 __docformat__ = 'restructuredtext'
4 4
5 import base64, binascii, cgi, codecs, mimetypes, os 5 import base64, binascii, cgi, codecs, httplib, mimetypes, os
6 import quopri, random, re, rfc822, stat, sys, time, urllib, urlparse 6 import quopri, random, re, rfc822, stat, sys, time, urllib, urlparse
7 import Cookie, socket, errno 7 import Cookie, socket, errno
8 from Cookie import CookieError, BaseCookie, SimpleCookie 8 from Cookie import CookieError, BaseCookie, SimpleCookie
9 from cStringIO import StringIO 9 from cStringIO import StringIO
10 10
461 # send the 304 response 461 # send the 304 response
462 self.response_code = 304 462 self.response_code = 304
463 self.header() 463 self.header()
464 except Unauthorised, message: 464 except Unauthorised, message:
465 # users may always see the front page 465 # users may always see the front page
466 self.response_code = 403
466 self.classname = self.nodeid = None 467 self.classname = self.nodeid = None
467 self.template = '' 468 self.template = ''
468 self.error_message.append(message) 469 self.error_message.append(message)
469 self.write_html(self.renderContext()) 470 self.write_html(self.renderContext())
470 except NotFound, e: 471 except NotFound, e:
630 try: 631 try:
631 login = self.get_action_class('login')(self) 632 login = self.get_action_class('login')(self)
632 login.verifyLogin(username, password) 633 login.verifyLogin(username, password)
633 except LoginError, err: 634 except LoginError, err:
634 self.make_user_anonymous() 635 self.make_user_anonymous()
635 self.response_code = 403
636 raise Unauthorised, err 636 raise Unauthorised, err
637
638 user = username 637 user = username
639 638
640 # if user was not set by http authorization, try session lookup 639 # if user was not set by http authorization, try session lookup
641 if not user: 640 if not user:
642 user = self.session_api.get('user') 641 user = self.session_api.get('user')
877 876
878 def _serve_file(self, lmt, mime_type, content=None, filename=None): 877 def _serve_file(self, lmt, mime_type, content=None, filename=None):
879 ''' guts of serve_file() and serve_static_file() 878 ''' guts of serve_file() and serve_static_file()
880 ''' 879 '''
881 880
882 if not content:
883 length = os.stat(filename)[stat.ST_SIZE]
884 else:
885 length = len(content)
886
887 # spit out headers 881 # spit out headers
888 self.additional_headers['Content-Type'] = mime_type 882 self.additional_headers['Content-Type'] = mime_type
889 self.additional_headers['Content-Length'] = str(length)
890 self.additional_headers['Last-Modified'] = rfc822.formatdate(lmt) 883 self.additional_headers['Last-Modified'] = rfc822.formatdate(lmt)
891 884
892 ims = None 885 ims = None
893 # see if there's an if-modified-since... 886 # see if there's an if-modified-since...
894 # XXX see which interfaces set this 887 # XXX see which interfaces set this
901 ims = rfc822.parsedate(ims)[:6] 894 ims = rfc822.parsedate(ims)[:6]
902 lmtt = time.gmtime(lmt)[:6] 895 lmtt = time.gmtime(lmt)[:6]
903 if lmtt <= ims: 896 if lmtt <= ims:
904 raise NotModified 897 raise NotModified
905 898
906 if not self.headers_done:
907 self.header()
908
909 if self.env['REQUEST_METHOD'] == 'HEAD':
910 return
911
912 # If we have a file, and the 'sendfile' method is available,
913 # we can bypass reading and writing the content into application
914 # memory entirely.
915 if filename: 899 if filename:
916 if hasattr(self.request, 'sendfile'): 900 self.write_file(filename)
917 self._socket_op(self.request.sendfile, filename) 901 else:
918 return 902 self.additional_headers['Content-Length'] = str(len(content))
919 f = open(filename, 'rb') 903 self.write(content)
920 try:
921 content = f.read()
922 finally:
923 f.close()
924
925 self._socket_op(self.request.wfile.write, content)
926
927 904
928 def renderContext(self): 905 def renderContext(self):
929 ''' Return a PageTemplate for the named page 906 ''' Return a PageTemplate for the named page
930 ''' 907 '''
931 name = self.classname 908 name = self.classname
1069 err_errno = err[0] 1046 err_errno = err[0]
1070 except TypeError: 1047 except TypeError:
1071 pass 1048 pass
1072 if err_errno not in self.IGNORE_NET_ERRORS: 1049 if err_errno not in self.IGNORE_NET_ERRORS:
1073 raise 1050 raise
1051 except IOError:
1052 # Apache's mod_python will raise IOError -- without an
1053 # accompanying errno -- when a write to the client fails.
1054 # A common case is that the client has closed the
1055 # connection. There's no way to be certain that this is
1056 # the situation that has occurred here, but that is the
1057 # most likely case.
1058 pass
1074 1059
1075 def write(self, content): 1060 def write(self, content):
1076 if not self.headers_done: 1061 if not self.headers_done:
1077 self.header() 1062 self.header()
1078 if self.env['REQUEST_METHOD'] != 'HEAD': 1063 if self.env['REQUEST_METHOD'] != 'HEAD':
1096 content = content.encode(self.charset, 'xmlcharrefreplace') 1081 content = content.encode(self.charset, 'xmlcharrefreplace')
1097 1082
1098 # and write 1083 # and write
1099 self._socket_op(self.request.wfile.write, content) 1084 self._socket_op(self.request.wfile.write, content)
1100 1085
1086 def http_strip(self, content):
1087 """Remove HTTP Linear White Space from 'content'.
1088
1089 'content' -- A string.
1090
1091 returns -- 'content', with all leading and trailing LWS
1092 removed."""
1093
1094 # RFC 2616 2.2: Basic Rules
1095 #
1096 # LWS = [CRLF] 1*( SP | HT )
1097 return content.strip(" \r\n\t")
1098
1099 def http_split(self, content):
1100 """Split an HTTP list.
1101
1102 'content' -- A string, giving a list of items.
1103
1104 returns -- A sequence of strings, containing the elements of
1105 the list."""
1106
1107 # RFC 2616 2.1: Augmented BNF
1108 #
1109 # Grammar productions of the form "#rule" indicate a
1110 # comma-separated list of elements matching "rule". LWS
1111 # is then removed from each element, and empty elements
1112 # removed.
1113
1114 # Split at commas.
1115 elements = content.split(",")
1116 # Remove linear whitespace at either end of the string.
1117 elements = [self.http_strip(e) for e in elements]
1118 # Remove any now-empty elements.
1119 return [e for e in elements if e]
1120
1121 def handle_range_header(self, length, etag):
1122 """Handle the 'Range' and 'If-Range' headers.
1123
1124 'length' -- the length of the content available for the
1125 resource.
1126
1127 'etag' -- the entity tag for this resources.
1128
1129 returns -- If the request headers (including 'Range' and
1130 'If-Range') indicate that only a portion of the entity should
1131 be returned, then the return value is a pair '(offfset,
1132 length)' indicating the first byte and number of bytes of the
1133 content that should be returned to the client. In addition,
1134 this method will set 'self.response_code' to indicate Partial
1135 Content. In all other cases, the return value is 'None'. If
1136 appropriate, 'self.response_code' will be
1137 set to indicate 'REQUESTED_RANGE_NOT_SATISFIABLE'. In that
1138 case, the caller should not send any data to the client."""
1139
1140 # RFC 2616 14.35: Range
1141 #
1142 # See if the Range header is present.
1143 ranges_specifier = self.env.get("HTTP_RANGE")
1144 if ranges_specifier is None:
1145 return None
1146 # RFC 2616 14.27: If-Range
1147 #
1148 # Check to see if there is an If-Range header.
1149 # Because the specification says:
1150 #
1151 # The If-Range header ... MUST be ignored if the request
1152 # does not include a Range header, we check for If-Range
1153 # after checking for Range.
1154 if_range = self.env.get("HTTP_IF_RANGE")
1155 if if_range:
1156 # The grammar for the If-Range header is:
1157 #
1158 # If-Range = "If-Range" ":" ( entity-tag | HTTP-date )
1159 # entity-tag = [ weak ] opaque-tag
1160 # weak = "W/"
1161 # opaque-tag = quoted-string
1162 #
1163 # We only support strong entity tags.
1164 if_range = self.http_strip(if_range)
1165 if (not if_range.startswith('"')
1166 or not if_range.endswith('"')):
1167 return None
1168 # If the condition doesn't match the entity tag, then we
1169 # must send the client the entire file.
1170 if if_range != etag:
1171 return
1172 # The grammar for the Range header value is:
1173 #
1174 # ranges-specifier = byte-ranges-specifier
1175 # byte-ranges-specifier = bytes-unit "=" byte-range-set
1176 # byte-range-set = 1#( byte-range-spec | suffix-byte-range-spec )
1177 # byte-range-spec = first-byte-pos "-" [last-byte-pos]
1178 # first-byte-pos = 1*DIGIT
1179 # last-byte-pos = 1*DIGIT
1180 # suffix-byte-range-spec = "-" suffix-length
1181 # suffix-length = 1*DIGIT
1182 #
1183 # Look for the "=" separating the units from the range set.
1184 specs = ranges_specifier.split("=", 1)
1185 if len(specs) != 2:
1186 return None
1187 # Check that the bytes-unit is in fact "bytes". If it is not,
1188 # we do not know how to process this range.
1189 bytes_unit = self.http_strip(specs[0])
1190 if bytes_unit != "bytes":
1191 return None
1192 # Seperate the range-set into range-specs.
1193 byte_range_set = self.http_strip(specs[1])
1194 byte_range_specs = self.http_split(byte_range_set)
1195 # We only handle exactly one range at this time.
1196 if len(byte_range_specs) != 1:
1197 return None
1198 # Parse the spec.
1199 byte_range_spec = byte_range_specs[0]
1200 pos = byte_range_spec.split("-", 1)
1201 if len(pos) != 2:
1202 return None
1203 # Get the first and last bytes.
1204 first = self.http_strip(pos[0])
1205 last = self.http_strip(pos[1])
1206 # We do not handle suffix ranges.
1207 if not first:
1208 return None
1209 # Convert the first and last positions to integers.
1210 try:
1211 first = int(first)
1212 if last:
1213 last = int(last)
1214 else:
1215 last = length - 1
1216 except:
1217 # The positions could not be parsed as integers.
1218 return None
1219 # Check that the range makes sense.
1220 if (first < 0 or last < 0 or last < first):
1221 return None
1222 if last >= length:
1223 # RFC 2616 10.4.17: 416 Requested Range Not Satisfiable
1224 #
1225 # If there is an If-Range header, RFC 2616 says that we
1226 # should just ignore the invalid Range header.
1227 if if_range:
1228 return None
1229 # Return code 416 with a Content-Range header giving the
1230 # allowable range.
1231 self.response_code = httplib.REQUESTED_RANGE_NOT_SATISFIABLE
1232 self.setHeader("Content-Range", "bytes */%d" % length)
1233 return None
1234 # RFC 2616 10.2.7: 206 Partial Content
1235 #
1236 # Tell the client that we are honoring the Range request by
1237 # indicating that we are providing partial content.
1238 self.response_code = httplib.PARTIAL_CONTENT
1239 # RFC 2616 14.16: Content-Range
1240 #
1241 # Tell the client what data we are providing.
1242 #
1243 # content-range-spec = byte-content-range-spec
1244 # byte-content-range-spec = bytes-unit SP
1245 # byte-range-resp-spec "/"
1246 # ( instance-length | "*" )
1247 # byte-range-resp-spec = (first-byte-pos "-" last-byte-pos)
1248 # | "*"
1249 # instance-length = 1 * DIGIT
1250 self.setHeader("Content-Range",
1251 "bytes %d-%d/%d" % (first, last, length))
1252 return (first, last - first + 1)
1253
1254 def write_file(self, filename):
1255 '''Send the contents of 'filename' to the user.'''
1256
1257 # Determine the length of the file.
1258 stat_info = os.stat(filename)
1259 length = stat_info[stat.ST_SIZE]
1260 # Assume we will return the entire file.
1261 offset = 0
1262 # If the headers have not already been finalized,
1263 if not self.headers_done:
1264 # RFC 2616 14.19: ETag
1265 #
1266 # Compute the entity tag, in a format similar to that
1267 # used by Apache.
1268 etag = '"%x-%x-%x"' % (stat_info[stat.ST_INO],
1269 length,
1270 stat_info[stat.ST_MTIME])
1271 self.setHeader("ETag", etag)
1272 # RFC 2616 14.5: Accept-Ranges
1273 #
1274 # Let the client know that we will accept range requests.
1275 self.setHeader("Accept-Ranges", "bytes")
1276 # RFC 2616 14.35: Range
1277 #
1278 # If there is a Range header, we may be able to avoid
1279 # sending the entire file.
1280 content_range = self.handle_range_header(length, etag)
1281 if content_range:
1282 offset, length = content_range
1283 # RFC 2616 14.13: Content-Length
1284 #
1285 # Tell the client how much data we are providing.
1286 self.setHeader("Content-Length", length)
1287 # Send the HTTP header.
1288 self.header()
1289 # If the client doesn't actually want the body, or if we are
1290 # indicating an invalid range.
1291 if (self.env['REQUEST_METHOD'] == 'HEAD'
1292 or self.response_code == httplib.REQUESTED_RANGE_NOT_SATISFIABLE):
1293 return
1294 # Use the optimized "sendfile" operation, if possible.
1295 if hasattr(self.request, "sendfile"):
1296 self._socket_op(self.request.sendfile, filename, offset, length)
1297 return
1298 # Fallback to the "write" operation.
1299 f = open(filename, 'rb')
1300 try:
1301 if offset:
1302 f.seek(offset)
1303 content = f.read(length)
1304 finally:
1305 f.close()
1306 self.write(content)
1101 1307
1102 def setHeader(self, header, value): 1308 def setHeader(self, header, value):
1103 '''Override a header to be returned to the user's browser. 1309 '''Override a header to be returned to the user's browser.
1104 ''' 1310 '''
1105 self.additional_headers[header] = value 1311 self.additional_headers[header] = value

Roundup Issue Tracker: http://roundup-tracker.org/