Skip to content

Commit 9be6c50

Browse files
committed
Upgrade http-parser
1 parent 935f843 commit 9be6c50

6 files changed

Lines changed: 242 additions & 140 deletions

File tree

deps/http_parser/Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ test: test_g
88
test_g: http_parser_g.o test_g.o
99
gcc $(OPT_DEBUG) http_parser_g.o test_g.o -o $@
1010

11-
test_g.o: test.c Makefile
11+
test_g.o: test.c http_parser.h Makefile
1212
gcc $(OPT_DEBUG) -c test.c -o $@
1313

14-
test.o: test.c Makefile
14+
test.o: test.c http_parser.h Makefile
1515
gcc $(OPT_FAST) -c test.c -o $@
1616

1717
http_parser_g.o: http_parser.c http_parser.h Makefile
@@ -23,7 +23,7 @@ test-valgrind: test_g
2323
http_parser.o: http_parser.c http_parser.h Makefile
2424
gcc $(OPT_FAST) -c http_parser.c
2525

26-
test_fast: http_parser.o test.c
26+
test_fast: http_parser.o test.c http_parser.h
2727
gcc $(OPT_FAST) http_parser.o test.c -o $@
2828

2929
test-run-timed: test_fast

deps/http_parser/README.md

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,32 @@
11
HTTP Parser
22
===========
33

4-
This is a parser for HTTP messages written in C. It parses both requests
5-
and responses. The parser is designed to be used in performance HTTP
6-
applications. It does not make any allocations, it does not buffer data, and
7-
it can be interrupted at anytime. Depending on your architecture, it only
8-
requires between 100 and 200 bytes of data per message stream (in a web
9-
server that is per connection).
4+
This is a parser for HTTP messages written in C. It parses both requests and
5+
responses. The parser is designed to be used in performance HTTP
6+
applications. It does not make any syscalls nor allocations, it does not
7+
buffer data, it can be interrupted at anytime. Depending on your
8+
architecture, it only requires between 100 and 200 bytes of data per message
9+
stream (in a web server that is per connection).
1010

1111
Features:
1212

1313
* No dependencies
14-
* Parses both requests and responses.
15-
* Handles persistent streams.
14+
* Handles persistent streams (keep-alive).
1615
* Decodes chunked encoding.
17-
* Extracts the following data from a message
18-
* header fields and values
19-
* content-length
20-
* request method
21-
* response status code
22-
* transfer-encoding
23-
* http version
24-
* request path, query string, fragment
25-
* message body
26-
* Defends against buffer overflow attacks.
2716
* Upgrade support
17+
* Defends against buffer overflow attacks.
18+
19+
The parser extracts the following information from HTTP messages:
20+
21+
* Header fields and values
22+
* Content-Length
23+
* Request method
24+
* Response status code
25+
* Transfer-Encoding
26+
* HTTP version
27+
* Request path, query string, fragment
28+
* Message body
29+
2830

2931
Usage
3032
-----
@@ -55,10 +57,9 @@ When data is received on the socket execute the parser and check for errors.
5557
}
5658

5759
/* Start up / continue the parser.
58-
* Note we pass the recved==0 to http_parse_requests to signal
59-
* that EOF has been recieved.
60+
* Note we pass recved==0 to signal that EOF has been recieved.
6061
*/
61-
nparsed = http_parser_execute(parser, settings, buf, recved);
62+
nparsed = http_parser_execute(parser, &settings, buf, recved);
6263

6364
if (parser->upgrade) {
6465
/* handle new protocol */
@@ -83,10 +84,6 @@ The parser decodes the transfer-encoding for both requests and responses
8384
transparently. That is, a chunked encoding is decoded before being sent to
8485
the on_body callback.
8586

86-
It does not decode the content-encoding (gzip). Not all HTTP applications
87-
need to inspect the body. Decoding gzip is non-neglagable amount of
88-
processing (and requires making allocations). HTTP proxies using this
89-
parser, for example, would not want such a feature.
9087

9188
The Special Problem of Upgrade
9289
------------------------------
@@ -109,11 +106,11 @@ information the Web Socket protocol.)
109106

110107
To support this, the parser will treat this as a normal HTTP message without a
111108
body. Issuing both on_headers_complete and on_message_complete callbacks. However
112-
http_parser_execute() may finish without parsing the entire supplied buffer.
109+
http_parser_execute() will stop parsing at the end of the headers and return.
113110

114-
The user needs to check if parser->upgrade has been set to 1 after
115-
http_parser_execute() returns to determine if a premature exit was due to an
116-
upgrade or an error.
111+
The user is expected to check if `parser->upgrade` has been set to 1 after
112+
`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
113+
offset by the return value of `http_parser_execute()`.
117114

118115

119116
Callbacks
@@ -166,6 +163,7 @@ and apply following logic:
166163
| | | and append callback data to it |
167164
------------------------ ------------ --------------------------------------------
168165

166+
169167
See examples of reading in headers:
170168

171169
* [partial example](http://gist.github.com/155877) in C

deps/http_parser/http_parser.c

Lines changed: 75 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,6 @@
3232
#endif
3333

3434

35-
#define MAX_FIELD_SIZE (80*1024)
36-
37-
3835
#define CALLBACK2(FOR) \
3936
do { \
4037
if (settings->on_##FOR) { \
@@ -45,20 +42,16 @@ do { \
4542

4643
#define MARK(FOR) \
4744
do { \
48-
parser->FOR##_mark = p; \
49-
parser->FOR##_size = 0; \
45+
FOR##_mark = p; \
5046
} while (0)
5147

52-
5348
#define CALLBACK_NOCLEAR(FOR) \
5449
do { \
55-
if (parser->FOR##_mark) { \
56-
parser->FOR##_size += p - parser->FOR##_mark; \
57-
if (parser->FOR##_size > MAX_FIELD_SIZE) return (p - data); \
50+
if (FOR##_mark) { \
5851
if (settings->on_##FOR) { \
5952
if (0 != settings->on_##FOR(parser, \
60-
parser->FOR##_mark, \
61-
p - parser->FOR##_mark)) \
53+
FOR##_mark, \
54+
p - FOR##_mark)) \
6255
{ \
6356
return (p - data); \
6457
} \
@@ -70,7 +63,7 @@ do { \
7063
#define CALLBACK(FOR) \
7164
do { \
7265
CALLBACK_NOCLEAR(FOR); \
73-
parser->FOR##_mark = NULL; \
66+
FOR##_mark = NULL; \
7467
} while (0)
7568

7669

@@ -132,6 +125,8 @@ static const uint32_t usual[] = {
132125
enum state
133126
{ s_dead = 1 /* important that this is > 0 */
134127

128+
, s_start_res_or_resp
129+
, s_res_or_resp_H
135130
, s_start_res
136131
, s_res_H
137132
, s_res_HT
@@ -303,12 +298,31 @@ size_t http_parser_execute (http_parser *parser,
303298
return 0;
304299
}
305300

306-
if (parser->header_field_mark) parser->header_field_mark = data;
307-
if (parser->header_value_mark) parser->header_value_mark = data;
308-
if (parser->fragment_mark) parser->fragment_mark = data;
309-
if (parser->query_string_mark) parser->query_string_mark = data;
310-
if (parser->path_mark) parser->path_mark = data;
311-
if (parser->url_mark) parser->url_mark = data;
301+
/* technically we could combine all of these (except for url_mark) into one
302+
variable, saving stack space, but it seems more clear to have them
303+
separated. */
304+
const char *header_field_mark = 0;
305+
const char *header_value_mark = 0;
306+
const char *fragment_mark = 0;
307+
const char *query_string_mark = 0;
308+
const char *path_mark = 0;
309+
const char *url_mark = 0;
310+
311+
if (state == s_header_field)
312+
header_field_mark = data;
313+
if (state == s_header_value)
314+
header_value_mark = data;
315+
if (state == s_req_fragment)
316+
fragment_mark = data;
317+
if (state == s_req_query_string)
318+
query_string_mark = data;
319+
if (state == s_req_path)
320+
path_mark = data;
321+
if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
322+
|| state == s_req_schema_slash_slash || state == s_req_port
323+
|| state == s_req_query_string_start || state == s_req_query_string
324+
|| state == s_req_fragment_start || state == s_req_fragment)
325+
url_mark = data;
312326

313327
for (p=data, pe=data+len; p != pe; p++) {
314328
ch = *p;
@@ -326,6 +340,42 @@ size_t http_parser_execute (http_parser *parser,
326340
*/
327341
goto error;
328342

343+
case s_start_res_or_resp:
344+
{
345+
if (ch == CR || ch == LF)
346+
break;
347+
parser->flags = 0;
348+
parser->content_length = -1;
349+
350+
CALLBACK2(message_begin);
351+
352+
if (ch == 'H')
353+
state = s_res_or_resp_H;
354+
else {
355+
parser->type = HTTP_REQUEST;
356+
if (ch < 'A' || 'Z' < ch) goto error;
357+
parser->buffer[0] = ch;
358+
index = 0;
359+
state = s_req_method;
360+
}
361+
break;
362+
}
363+
364+
case s_res_or_resp_H:
365+
if (ch == 'T') {
366+
parser->type = HTTP_RESPONSE;
367+
state = s_res_HT;
368+
} else {
369+
if (ch < 'A' || 'Z' < ch) goto error;
370+
parser->type = HTTP_REQUEST;
371+
parser->method = (enum http_method) 0;
372+
parser->buffer[0] = 'H';
373+
parser->buffer[1] = ch;
374+
index = 1;
375+
state = s_req_method;
376+
}
377+
break;
378+
329379
case s_start_res:
330380
{
331381
parser->flags = 0;
@@ -584,7 +634,7 @@ size_t http_parser_execute (http_parser *parser,
584634
break;
585635

586636
case 9:
587-
if (ngx_str9cmp(parser->buffer,
637+
if (ngx_str9cmp(parser->buffer,
588638
'P', 'R', 'O', 'P', 'P', 'A', 'T', 'C', 'H')) {
589639
parser->method = HTTP_PROPPATCH;
590640
break;
@@ -637,6 +687,9 @@ size_t http_parser_execute (http_parser *parser,
637687
if (ch == ':') {
638688
state = s_req_schema_slash;
639689
break;
690+
} else if (ch == '.') {
691+
state = s_req_host;
692+
break;
640693
}
641694

642695
goto error;
@@ -1156,12 +1209,14 @@ size_t http_parser_execute (http_parser *parser,
11561209

11571210
if (!c) {
11581211
if (ch == CR) {
1212+
CALLBACK(header_value);
11591213
header_state = h_general;
11601214
state = s_header_almost_done;
11611215
break;
11621216
}
11631217

11641218
if (ch == LF) {
1219+
CALLBACK(header_value);
11651220
state = s_header_field_start;
11661221
break;
11671222
}
@@ -1547,15 +1602,8 @@ void
15471602
http_parser_init (http_parser *parser, enum http_parser_type t)
15481603
{
15491604
parser->type = t;
1550-
parser->state = (t == HTTP_REQUEST ? s_start_req : s_start_res);
1605+
parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_res_or_resp));
15511606
parser->nread = 0;
15521607
parser->upgrade = 0;
1553-
1554-
parser->header_field_mark = NULL;
1555-
parser->header_value_mark = NULL;
1556-
parser->query_string_mark = NULL;
1557-
parser->path_mark = NULL;
1558-
parser->url_mark = NULL;
1559-
parser->fragment_mark = NULL;
15601608
}
15611609

deps/http_parser/http_parser.h

Lines changed: 13 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -89,49 +89,36 @@ enum http_method
8989
};
9090

9191

92-
enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE };
92+
enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
9393

9494

9595
struct http_parser {
9696
/** PRIVATE **/
97-
enum http_parser_type type;
98-
unsigned short state;
99-
unsigned short header_state;
100-
size_t index;
101-
102-
/* 1 = Upgrade header was present and the parser has exited because of that.
103-
* 0 = No upgrade header present.
104-
* Should be checked when http_parser_execute() returns in addition to
105-
* error checking.
106-
*/
107-
unsigned short upgrade;
97+
unsigned char type;
98+
unsigned char state;
99+
unsigned char header_state;
100+
unsigned char index;
108101

109102
char flags;
110103

111104
size_t nread;
112105
ssize_t body_read;
113106
ssize_t content_length;
114107

115-
const char *header_field_mark;
116-
size_t header_field_size;
117-
const char *header_value_mark;
118-
size_t header_value_size;
119-
const char *query_string_mark;
120-
size_t query_string_size;
121-
const char *path_mark;
122-
size_t path_size;
123-
const char *url_mark;
124-
size_t url_size;
125-
const char *fragment_mark;
126-
size_t fragment_size;
127-
128108
/** READ-ONLY **/
129109
unsigned short status_code; /* responses only */
130-
enum http_method method; /* requests only */
110+
unsigned short method; /* requests only */
131111
unsigned short http_major;
132112
unsigned short http_minor;
133113
char buffer[HTTP_PARSER_MAX_METHOD_LEN];
134114

115+
/* 1 = Upgrade header was present and the parser has exited because of that.
116+
* 0 = No upgrade header present.
117+
* Should be checked when http_parser_execute() returns in addition to
118+
* error checking.
119+
*/
120+
char upgrade;
121+
135122
/** PUBLIC **/
136123
void *data; /* A pointer to get hook to the "connection" or "socket" object */
137124
};

0 commit comments

Comments
 (0)