Skip to content

Commit 24ea607

Browse files
committed
implementing http_parser_parse_url
1 parent c67a99d commit 24ea607

File tree

2 files changed

+160
-1
lines changed

2 files changed

+160
-1
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package http_parser;
2+
3+
/**
4+
*/
5+
public class HTTPParserUrl {
6+
public int field_set;
7+
public int port;
8+
9+
public class FieldData {
10+
public int off;
11+
public int len;
12+
}
13+
14+
public FieldData[] field_data = new FieldData[6]; //UF_MAX
15+
16+
}

src/impl/http_parser/lolevel/HTTPParser.java

Lines changed: 144 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import java.nio.ByteBuffer;
44
import http_parser.HTTPException;
55
import http_parser.HTTPMethod;
6+
import http_parser.HTTPParserUrl;
67
import http_parser.ParserType;
78
import static http_parser.lolevel.HTTPParser.C.*;
89
import static http_parser.lolevel.HTTPParser.State.*;
@@ -1537,9 +1538,131 @@ public boolean http_should_keep_alive() {
15371538
return !http_message_needs_eof();
15381539
}
15391540

1541+
public int parse_url(ByteBuffer data, boolean is_connect, HTTPParserUrl u) {
1542+
1543+
UrlFields uf = UrlFields.UF_SCHEMA;
1544+
UrlFields old_uf = UrlFields.UF_SCHEMA;
1545+
u.port = 0;
1546+
u.field_set = 0;
1547+
state = (is_connect ? State.req_host_start : State.req_spaces_before_url);
1548+
int p_init = data.position();
1549+
int p = 0;
1550+
int pe = 0;
1551+
byte ch = 0;
1552+
while (data.position() != data.limit()) {
1553+
p = data.position();
1554+
pe = data.limit();
1555+
ch = data.get();
1556+
state = parse_url_char(ch);
1557+
switch(state) {
1558+
case dead:
1559+
return 1;
1560+
1561+
/* Skip delimeters */
1562+
case req_schema_slash:
1563+
case req_schema_slash_slash:
1564+
case req_host_start:
1565+
case req_host_v6_start:
1566+
case req_host_v6_end:
1567+
case req_port_start:
1568+
case req_query_string_start:
1569+
case req_fragment_start:
1570+
continue;
1571+
1572+
case req_schema:
1573+
uf = UrlFields.UF_SCHEMA;
1574+
break;
1575+
1576+
case req_host:
1577+
case req_host_v6:
1578+
uf = UrlFields.UF_HOST;
1579+
break;
1580+
1581+
case req_port:
1582+
uf = UrlFields.UF_PORT;
1583+
break;
1584+
1585+
case req_path:
1586+
uf = UrlFields.UF_PATH;
1587+
break;
1588+
1589+
case req_query_string:
1590+
uf = UrlFields.UF_QUERY;
1591+
break;
1592+
1593+
case req_fragment:
1594+
uf = UrlFields.UF_FRAGMENT;
1595+
break;
1596+
1597+
default:
1598+
return 1;
1599+
}
1600+
/* Nothing's changed; soldier on */
1601+
if (uf == old_uf) {
1602+
u.field_data[uf.getIndex()].len++;//Not sure if using ordinal() is a good idea ...
1603+
continue;
1604+
}
15401605

1541-
//TODO Skip http_parser_parse_url & http_parser_pause for now
1606+
u.field_data[uf.getIndex()].off = p - p_init;
1607+
u.field_data[uf.getIndex()].len = 1;
15421608

1609+
u.field_set |= (1 << uf.getIndex());
1610+
old_uf = uf;
1611+
1612+
}
1613+
1614+
/* CONNECT requests can only contain "hostname:port" */
1615+
if (is_connect && u.field_set != ((1 << UrlFields.UF_HOST.getIndex())|(1 << UrlFields.UF_PORT.getIndex()))) {
1616+
return 1;
1617+
}
1618+
1619+
/* Make sure we don't end somewhere unexpected */
1620+
switch (state) {
1621+
case req_host_v6_start:
1622+
case req_host_v6:
1623+
case req_host_v6_end:
1624+
case req_host:
1625+
case req_port_start:
1626+
return 1;
1627+
default:
1628+
break;
1629+
}
1630+
1631+
if (0 != (u.field_set & (1 << UrlFields.UF_PORT.getIndex()))) {
1632+
/* Don't bother with endp; we've already validated the string */
1633+
int v = strtoi(data, p + u.field_data[UrlFields.UF_PORT.getIndex()].off);
1634+
1635+
/* Ports have a max value of 2^16 */
1636+
if (v > 0xffff) {
1637+
return 1;
1638+
}
1639+
1640+
u.port = v;
1641+
}
1642+
1643+
return 0;
1644+
}
1645+
1646+
//hacky reimplementation of srttoul, tailored for our simple needs
1647+
//we only need to parse port val, so no negative values etc
1648+
int strtoi(ByteBuffer data, int start_pos) {
1649+
data.position(start_pos);
1650+
byte ch;
1651+
String str = "";
1652+
while(true) {
1653+
ch = data.get();
1654+
if(Character.isWhitespace((char)ch)){
1655+
continue;
1656+
}
1657+
if(isDigit(ch)){
1658+
str = str + (char)ch; //TODO replace with something less hacky
1659+
}else{
1660+
break;
1661+
}
1662+
}
1663+
return Integer.parseInt(str);
1664+
}
1665+
15431666
boolean isDigit(byte b) {
15441667
if (b >= 0x30 && b <=0x39) {
15451668
return true;
@@ -1989,4 +2112,24 @@ enum HState {
19892112
, connection_keep_alive
19902113
, connection_close
19912114
}
2115+
enum UrlFields {
2116+
UF_SCHEMA(0)
2117+
, UF_HOST(1)
2118+
, UF_PORT(2)
2119+
, UF_PATH(3)
2120+
, UF_QUERY(4)
2121+
, UF_FRAGMENT(5)
2122+
, UF_MAX(6);
2123+
2124+
2125+
private final int index;
2126+
2127+
private UrlFields(int index) {
2128+
this.index = index;
2129+
}
2130+
public int getIndex() {
2131+
return index;
2132+
}
2133+
2134+
}
19922135
}

0 commit comments

Comments
 (0)