"Fossies" - the Fresh Open Source Software Archive 
Member "wrk-4.2.0/src/http_parser.c" (7 Feb 2021, 69700 Bytes) of package /linux/www/wrk-4.2.0.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "http_parser.c" see the
Fossies "Dox" file reference documentation and the last
Fossies "Diffs" side-by-side code changes report:
4.0.2_vs_4.1.0.
1 /* Copyright Joyent, Inc. and other Node contributors.
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to
5 * deal in the Software without restriction, including without limitation the
6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 * IN THE SOFTWARE.
20 */
21 #include "http_parser.h"
22 #include <assert.h>
23 #include <stddef.h>
24 #include <ctype.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <limits.h>
28
29 #ifndef ULLONG_MAX
30 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
31 #endif
32
33 #ifndef MIN
34 # define MIN(a,b) ((a) < (b) ? (a) : (b))
35 #endif
36
37 #ifndef ARRAY_SIZE
38 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
39 #endif
40
41 #ifndef BIT_AT
42 # define BIT_AT(a, i) \
43 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
44 (1 << ((unsigned int) (i) & 7))))
45 #endif
46
47 #ifndef ELEM_AT
48 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
49 #endif
50
51 #define SET_ERRNO(e) \
52 do { \
53 parser->http_errno = (e); \
54 } while(0)
55
56 #define CURRENT_STATE() p_state
57 #define UPDATE_STATE(V) p_state = (enum state) (V);
58 #define RETURN(V) \
59 do { \
60 parser->state = CURRENT_STATE(); \
61 return (V); \
62 } while (0);
63 #define REEXECUTE() \
64 goto reexecute; \
65
66
67 #ifdef __GNUC__
68 # define LIKELY(X) __builtin_expect(!!(X), 1)
69 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
70 #else
71 # define LIKELY(X) (X)
72 # define UNLIKELY(X) (X)
73 #endif
74
75
76 /* Run the notify callback FOR, returning ER if it fails */
77 #define CALLBACK_NOTIFY_(FOR, ER) \
78 do { \
79 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
80 \
81 if (LIKELY(settings->on_##FOR)) { \
82 parser->state = CURRENT_STATE(); \
83 if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
84 SET_ERRNO(HPE_CB_##FOR); \
85 } \
86 UPDATE_STATE(parser->state); \
87 \
88 /* We either errored above or got paused; get out */ \
89 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
90 return (ER); \
91 } \
92 } \
93 } while (0)
94
95 /* Run the notify callback FOR and consume the current byte */
96 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
97
98 /* Run the notify callback FOR and don't consume the current byte */
99 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
100
101 /* Run data callback FOR with LEN bytes, returning ER if it fails */
102 #define CALLBACK_DATA_(FOR, LEN, ER) \
103 do { \
104 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
105 \
106 if (FOR##_mark) { \
107 if (LIKELY(settings->on_##FOR)) { \
108 parser->state = CURRENT_STATE(); \
109 if (UNLIKELY(0 != \
110 settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
111 SET_ERRNO(HPE_CB_##FOR); \
112 } \
113 UPDATE_STATE(parser->state); \
114 \
115 /* We either errored above or got paused; get out */ \
116 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
117 return (ER); \
118 } \
119 } \
120 FOR##_mark = NULL; \
121 } \
122 } while (0)
123
124 /* Run the data callback FOR and consume the current byte */
125 #define CALLBACK_DATA(FOR) \
126 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
127
128 /* Run the data callback FOR and don't consume the current byte */
129 #define CALLBACK_DATA_NOADVANCE(FOR) \
130 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
131
132 /* Set the mark FOR; non-destructive if mark is already set */
133 #define MARK(FOR) \
134 do { \
135 if (!FOR##_mark) { \
136 FOR##_mark = p; \
137 } \
138 } while (0)
139
140 /* Don't allow the total size of the HTTP headers (including the status
141 * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
142 * embedders against denial-of-service attacks where the attacker feeds
143 * us a never-ending header that the embedder keeps buffering.
144 *
145 * This check is arguably the responsibility of embedders but we're doing
146 * it on the embedder's behalf because most won't bother and this way we
147 * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
148 * than any reasonable request or response so this should never affect
149 * day-to-day operation.
150 */
151 #define COUNT_HEADER_SIZE(V) \
152 do { \
153 parser->nread += (V); \
154 if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \
155 SET_ERRNO(HPE_HEADER_OVERFLOW); \
156 goto error; \
157 } \
158 } while (0)
159
160
161 #define PROXY_CONNECTION "proxy-connection"
162 #define CONNECTION "connection"
163 #define CONTENT_LENGTH "content-length"
164 #define TRANSFER_ENCODING "transfer-encoding"
165 #define UPGRADE "upgrade"
166 #define CHUNKED "chunked"
167 #define KEEP_ALIVE "keep-alive"
168 #define CLOSE "close"
169
170
171 static const char *method_strings[] =
172 {
173 #define XX(num, name, string) #string,
174 HTTP_METHOD_MAP(XX)
175 #undef XX
176 };
177
178
179 /* Tokens as defined by rfc 2616. Also lowercases them.
180 * token = 1*<any CHAR except CTLs or separators>
181 * separators = "(" | ")" | "<" | ">" | "@"
182 * | "," | ";" | ":" | "\" | <">
183 * | "/" | "[" | "]" | "?" | "="
184 * | "{" | "}" | SP | HT
185 */
186 static const char tokens[256] = {
187 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
188 0, 0, 0, 0, 0, 0, 0, 0,
189 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
190 0, 0, 0, 0, 0, 0, 0, 0,
191 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
192 0, 0, 0, 0, 0, 0, 0, 0,
193 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
194 0, 0, 0, 0, 0, 0, 0, 0,
195 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
196 0, '!', 0, '#', '$', '%', '&', '\'',
197 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
198 0, 0, '*', '+', 0, '-', '.', 0,
199 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
200 '0', '1', '2', '3', '4', '5', '6', '7',
201 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
202 '8', '9', 0, 0, 0, 0, 0, 0,
203 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
204 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
205 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
206 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
207 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
208 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
209 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
210 'x', 'y', 'z', 0, 0, 0, '^', '_',
211 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
212 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
213 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
214 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
215 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
216 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
217 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
218 'x', 'y', 'z', 0, '|', 0, '~', 0 };
219
220
221 static const int8_t unhex[256] =
222 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
223 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
224 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
225 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
226 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
227 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
229 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
230 };
231
232
233 #if HTTP_PARSER_STRICT
234 # define T(v) 0
235 #else
236 # define T(v) v
237 #endif
238
239
240 static const uint8_t normal_url_char[32] = {
241 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
242 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
243 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
244 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
245 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
246 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
247 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
248 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
249 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
250 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
251 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
252 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
253 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
254 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
255 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
256 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
257 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
258 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
259 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
260 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
261 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
262 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
263 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
264 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
265 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
266 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
267 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
268 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
269 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
270 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
271 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
272 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
273
274 #undef T
275
276 enum state
277 { s_dead = 1 /* important that this is > 0 */
278
279 , s_start_req_or_res
280 , s_res_or_resp_H
281 , s_start_res
282 , s_res_H
283 , s_res_HT
284 , s_res_HTT
285 , s_res_HTTP
286 , s_res_http_major
287 , s_res_http_dot
288 , s_res_http_minor
289 , s_res_http_end
290 , s_res_first_status_code
291 , s_res_status_code
292 , s_res_status_start
293 , s_res_status
294 , s_res_line_almost_done
295
296 , s_start_req
297
298 , s_req_method
299 , s_req_spaces_before_url
300 , s_req_schema
301 , s_req_schema_slash
302 , s_req_schema_slash_slash
303 , s_req_server_start
304 , s_req_server
305 , s_req_server_with_at
306 , s_req_path
307 , s_req_query_string_start
308 , s_req_query_string
309 , s_req_fragment_start
310 , s_req_fragment
311 , s_req_http_start
312 , s_req_http_H
313 , s_req_http_HT
314 , s_req_http_HTT
315 , s_req_http_HTTP
316 , s_req_http_major
317 , s_req_http_dot
318 , s_req_http_minor
319 , s_req_http_end
320 , s_req_line_almost_done
321
322 , s_header_field_start
323 , s_header_field
324 , s_header_value_discard_ws
325 , s_header_value_discard_ws_almost_done
326 , s_header_value_discard_lws
327 , s_header_value_start
328 , s_header_value
329 , s_header_value_lws
330
331 , s_header_almost_done
332
333 , s_chunk_size_start
334 , s_chunk_size
335 , s_chunk_parameters
336 , s_chunk_size_almost_done
337
338 , s_headers_almost_done
339 , s_headers_done
340
341 /* Important: 's_headers_done' must be the last 'header' state. All
342 * states beyond this must be 'body' states. It is used for overflow
343 * checking. See the PARSING_HEADER() macro.
344 */
345
346 , s_chunk_data
347 , s_chunk_data_almost_done
348 , s_chunk_data_done
349
350 , s_body_identity
351 , s_body_identity_eof
352
353 , s_message_done
354 };
355
356
357 #define PARSING_HEADER(state) (state <= s_headers_done)
358
359
360 enum header_states
361 { h_general = 0
362 , h_C
363 , h_CO
364 , h_CON
365
366 , h_matching_connection
367 , h_matching_proxy_connection
368 , h_matching_content_length
369 , h_matching_transfer_encoding
370 , h_matching_upgrade
371
372 , h_connection
373 , h_content_length
374 , h_transfer_encoding
375 , h_upgrade
376
377 , h_matching_transfer_encoding_chunked
378 , h_matching_connection_token_start
379 , h_matching_connection_keep_alive
380 , h_matching_connection_close
381 , h_matching_connection_upgrade
382 , h_matching_connection_token
383
384 , h_transfer_encoding_chunked
385 , h_connection_keep_alive
386 , h_connection_close
387 , h_connection_upgrade
388 };
389
390 enum http_host_state
391 {
392 s_http_host_dead = 1
393 , s_http_userinfo_start
394 , s_http_userinfo
395 , s_http_host_start
396 , s_http_host_v6_start
397 , s_http_host
398 , s_http_host_v6
399 , s_http_host_v6_end
400 , s_http_host_v6_zone_start
401 , s_http_host_v6_zone
402 , s_http_host_port_start
403 , s_http_host_port
404 };
405
406 /* Macros for character classes; depends on strict-mode */
407 #define CR '\r'
408 #define LF '\n'
409 #define LOWER(c) (unsigned char)(c | 0x20)
410 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
411 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
412 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
413 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
414 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
415 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
416 (c) == ')')
417 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
418 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
419 (c) == '$' || (c) == ',')
420
421 #define STRICT_TOKEN(c) (tokens[(unsigned char)c])
422
423 #if HTTP_PARSER_STRICT
424 #define TOKEN(c) (tokens[(unsigned char)c])
425 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
426 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
427 #else
428 #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
429 #define IS_URL_CHAR(c) \
430 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
431 #define IS_HOST_CHAR(c) \
432 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
433 #endif
434
435 /**
436 * Verify that a char is a valid visible (printable) US-ASCII
437 * character or %x80-FF
438 **/
439 #define IS_HEADER_CHAR(ch) \
440 (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
441
442 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
443
444
445 #if HTTP_PARSER_STRICT
446 # define STRICT_CHECK(cond) \
447 do { \
448 if (cond) { \
449 SET_ERRNO(HPE_STRICT); \
450 goto error; \
451 } \
452 } while (0)
453 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
454 #else
455 # define STRICT_CHECK(cond)
456 # define NEW_MESSAGE() start_state
457 #endif
458
459
460 /* Map errno values to strings for human-readable output */
461 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
462 static struct {
463 const char *name;
464 const char *description;
465 } http_strerror_tab[] = {
466 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
467 };
468 #undef HTTP_STRERROR_GEN
469
470 int http_message_needs_eof(const http_parser *parser);
471
472 /* Our URL parser.
473 *
474 * This is designed to be shared by http_parser_execute() for URL validation,
475 * hence it has a state transition + byte-for-byte interface. In addition, it
476 * is meant to be embedded in http_parser_parse_url(), which does the dirty
477 * work of turning state transitions URL components for its API.
478 *
479 * This function should only be invoked with non-space characters. It is
480 * assumed that the caller cares about (and can detect) the transition between
481 * URL and non-URL states by looking for these.
482 */
483 static enum state
484 parse_url_char(enum state s, const char ch)
485 {
486 if (ch == ' ' || ch == '\r' || ch == '\n') {
487 return s_dead;
488 }
489
490 #if HTTP_PARSER_STRICT
491 if (ch == '\t' || ch == '\f') {
492 return s_dead;
493 }
494 #endif
495
496 switch (s) {
497 case s_req_spaces_before_url:
498 /* Proxied requests are followed by scheme of an absolute URI (alpha).
499 * All methods except CONNECT are followed by '/' or '*'.
500 */
501
502 if (ch == '/' || ch == '*') {
503 return s_req_path;
504 }
505
506 if (IS_ALPHA(ch)) {
507 return s_req_schema;
508 }
509
510 break;
511
512 case s_req_schema:
513 if (IS_ALPHA(ch)) {
514 return s;
515 }
516
517 if (ch == ':') {
518 return s_req_schema_slash;
519 }
520
521 break;
522
523 case s_req_schema_slash:
524 if (ch == '/') {
525 return s_req_schema_slash_slash;
526 }
527
528 break;
529
530 case s_req_schema_slash_slash:
531 if (ch == '/') {
532 return s_req_server_start;
533 }
534
535 break;
536
537 case s_req_server_with_at:
538 if (ch == '@') {
539 return s_dead;
540 }
541
542 /* FALLTHROUGH */
543 case s_req_server_start:
544 case s_req_server:
545 if (ch == '/') {
546 return s_req_path;
547 }
548
549 if (ch == '?') {
550 return s_req_query_string_start;
551 }
552
553 if (ch == '@') {
554 return s_req_server_with_at;
555 }
556
557 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
558 return s_req_server;
559 }
560
561 break;
562
563 case s_req_path:
564 if (IS_URL_CHAR(ch)) {
565 return s;
566 }
567
568 switch (ch) {
569 case '?':
570 return s_req_query_string_start;
571
572 case '#':
573 return s_req_fragment_start;
574 }
575
576 break;
577
578 case s_req_query_string_start:
579 case s_req_query_string:
580 if (IS_URL_CHAR(ch)) {
581 return s_req_query_string;
582 }
583
584 switch (ch) {
585 case '?':
586 /* allow extra '?' in query string */
587 return s_req_query_string;
588
589 case '#':
590 return s_req_fragment_start;
591 }
592
593 break;
594
595 case s_req_fragment_start:
596 if (IS_URL_CHAR(ch)) {
597 return s_req_fragment;
598 }
599
600 switch (ch) {
601 case '?':
602 return s_req_fragment;
603
604 case '#':
605 return s;
606 }
607
608 break;
609
610 case s_req_fragment:
611 if (IS_URL_CHAR(ch)) {
612 return s;
613 }
614
615 switch (ch) {
616 case '?':
617 case '#':
618 return s;
619 }
620
621 break;
622
623 default:
624 break;
625 }
626
627 /* We should never fall out of the switch above unless there's an error */
628 return s_dead;
629 }
630
631 size_t http_parser_execute (http_parser *parser,
632 const http_parser_settings *settings,
633 const char *data,
634 size_t len)
635 {
636 char c, ch;
637 int8_t unhex_val;
638 const char *p = data;
639 const char *header_field_mark = 0;
640 const char *header_value_mark = 0;
641 const char *url_mark = 0;
642 const char *body_mark = 0;
643 const char *status_mark = 0;
644 enum state p_state = (enum state) parser->state;
645 const unsigned int lenient = parser->lenient_http_headers;
646
647 /* We're in an error state. Don't bother doing anything. */
648 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
649 return 0;
650 }
651
652 if (len == 0) {
653 switch (CURRENT_STATE()) {
654 case s_body_identity_eof:
655 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
656 * we got paused.
657 */
658 CALLBACK_NOTIFY_NOADVANCE(message_complete);
659 return 0;
660
661 case s_dead:
662 case s_start_req_or_res:
663 case s_start_res:
664 case s_start_req:
665 return 0;
666
667 default:
668 SET_ERRNO(HPE_INVALID_EOF_STATE);
669 return 1;
670 }
671 }
672
673
674 if (CURRENT_STATE() == s_header_field)
675 header_field_mark = data;
676 if (CURRENT_STATE() == s_header_value)
677 header_value_mark = data;
678 switch (CURRENT_STATE()) {
679 case s_req_path:
680 case s_req_schema:
681 case s_req_schema_slash:
682 case s_req_schema_slash_slash:
683 case s_req_server_start:
684 case s_req_server:
685 case s_req_server_with_at:
686 case s_req_query_string_start:
687 case s_req_query_string:
688 case s_req_fragment_start:
689 case s_req_fragment:
690 url_mark = data;
691 break;
692 case s_res_status:
693 status_mark = data;
694 break;
695 default:
696 break;
697 }
698
699 for (p=data; p != data + len; p++) {
700 ch = *p;
701
702 if (PARSING_HEADER(CURRENT_STATE()))
703 COUNT_HEADER_SIZE(1);
704
705 reexecute:
706 switch (CURRENT_STATE()) {
707
708 case s_dead:
709 /* this state is used after a 'Connection: close' message
710 * the parser will error out if it reads another message
711 */
712 if (LIKELY(ch == CR || ch == LF))
713 break;
714
715 SET_ERRNO(HPE_CLOSED_CONNECTION);
716 goto error;
717
718 case s_start_req_or_res:
719 {
720 if (ch == CR || ch == LF)
721 break;
722 parser->flags = 0;
723 parser->content_length = ULLONG_MAX;
724
725 if (ch == 'H') {
726 UPDATE_STATE(s_res_or_resp_H);
727
728 CALLBACK_NOTIFY(message_begin);
729 } else {
730 parser->type = HTTP_REQUEST;
731 UPDATE_STATE(s_start_req);
732 REEXECUTE();
733 }
734
735 break;
736 }
737
738 case s_res_or_resp_H:
739 if (ch == 'T') {
740 parser->type = HTTP_RESPONSE;
741 UPDATE_STATE(s_res_HT);
742 } else {
743 if (UNLIKELY(ch != 'E')) {
744 SET_ERRNO(HPE_INVALID_CONSTANT);
745 goto error;
746 }
747
748 parser->type = HTTP_REQUEST;
749 parser->method = HTTP_HEAD;
750 parser->index = 2;
751 UPDATE_STATE(s_req_method);
752 }
753 break;
754
755 case s_start_res:
756 {
757 parser->flags = 0;
758 parser->content_length = ULLONG_MAX;
759
760 switch (ch) {
761 case 'H':
762 UPDATE_STATE(s_res_H);
763 break;
764
765 case CR:
766 case LF:
767 break;
768
769 default:
770 SET_ERRNO(HPE_INVALID_CONSTANT);
771 goto error;
772 }
773
774 CALLBACK_NOTIFY(message_begin);
775 break;
776 }
777
778 case s_res_H:
779 STRICT_CHECK(ch != 'T');
780 UPDATE_STATE(s_res_HT);
781 break;
782
783 case s_res_HT:
784 STRICT_CHECK(ch != 'T');
785 UPDATE_STATE(s_res_HTT);
786 break;
787
788 case s_res_HTT:
789 STRICT_CHECK(ch != 'P');
790 UPDATE_STATE(s_res_HTTP);
791 break;
792
793 case s_res_HTTP:
794 STRICT_CHECK(ch != '/');
795 UPDATE_STATE(s_res_http_major);
796 break;
797
798 case s_res_http_major:
799 if (UNLIKELY(!IS_NUM(ch))) {
800 SET_ERRNO(HPE_INVALID_VERSION);
801 goto error;
802 }
803
804 parser->http_major = ch - '0';
805 UPDATE_STATE(s_res_http_dot);
806 break;
807
808 case s_res_http_dot:
809 {
810 if (UNLIKELY(ch != '.')) {
811 SET_ERRNO(HPE_INVALID_VERSION);
812 goto error;
813 }
814
815 UPDATE_STATE(s_res_http_minor);
816 break;
817 }
818
819 case s_res_http_minor:
820 if (UNLIKELY(!IS_NUM(ch))) {
821 SET_ERRNO(HPE_INVALID_VERSION);
822 goto error;
823 }
824
825 parser->http_minor = ch - '0';
826 UPDATE_STATE(s_res_http_end);
827 break;
828
829 case s_res_http_end:
830 {
831 if (UNLIKELY(ch != ' ')) {
832 SET_ERRNO(HPE_INVALID_VERSION);
833 goto error;
834 }
835
836 UPDATE_STATE(s_res_first_status_code);
837 break;
838 }
839
840 case s_res_first_status_code:
841 {
842 if (!IS_NUM(ch)) {
843 if (ch == ' ') {
844 break;
845 }
846
847 SET_ERRNO(HPE_INVALID_STATUS);
848 goto error;
849 }
850 parser->status_code = ch - '0';
851 UPDATE_STATE(s_res_status_code);
852 break;
853 }
854
855 case s_res_status_code:
856 {
857 if (!IS_NUM(ch)) {
858 switch (ch) {
859 case ' ':
860 UPDATE_STATE(s_res_status_start);
861 break;
862 case CR:
863 case LF:
864 UPDATE_STATE(s_res_status_start);
865 REEXECUTE();
866 break;
867 default:
868 SET_ERRNO(HPE_INVALID_STATUS);
869 goto error;
870 }
871 break;
872 }
873
874 parser->status_code *= 10;
875 parser->status_code += ch - '0';
876
877 if (UNLIKELY(parser->status_code > 999)) {
878 SET_ERRNO(HPE_INVALID_STATUS);
879 goto error;
880 }
881
882 break;
883 }
884
885 case s_res_status_start:
886 {
887 MARK(status);
888 UPDATE_STATE(s_res_status);
889 parser->index = 0;
890
891 if (ch == CR || ch == LF)
892 REEXECUTE();
893
894 break;
895 }
896
897 case s_res_status:
898 if (ch == CR) {
899 UPDATE_STATE(s_res_line_almost_done);
900 CALLBACK_DATA(status);
901 break;
902 }
903
904 if (ch == LF) {
905 UPDATE_STATE(s_header_field_start);
906 CALLBACK_DATA(status);
907 break;
908 }
909
910 break;
911
912 case s_res_line_almost_done:
913 STRICT_CHECK(ch != LF);
914 UPDATE_STATE(s_header_field_start);
915 break;
916
917 case s_start_req:
918 {
919 if (ch == CR || ch == LF)
920 break;
921 parser->flags = 0;
922 parser->content_length = ULLONG_MAX;
923
924 if (UNLIKELY(!IS_ALPHA(ch))) {
925 SET_ERRNO(HPE_INVALID_METHOD);
926 goto error;
927 }
928
929 parser->method = (enum http_method) 0;
930 parser->index = 1;
931 switch (ch) {
932 case 'A': parser->method = HTTP_ACL; break;
933 case 'B': parser->method = HTTP_BIND; break;
934 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
935 case 'D': parser->method = HTTP_DELETE; break;
936 case 'G': parser->method = HTTP_GET; break;
937 case 'H': parser->method = HTTP_HEAD; break;
938 case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
939 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
940 case 'N': parser->method = HTTP_NOTIFY; break;
941 case 'O': parser->method = HTTP_OPTIONS; break;
942 case 'P': parser->method = HTTP_POST;
943 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
944 break;
945 case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
946 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
947 case 'T': parser->method = HTTP_TRACE; break;
948 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
949 default:
950 SET_ERRNO(HPE_INVALID_METHOD);
951 goto error;
952 }
953 UPDATE_STATE(s_req_method);
954
955 CALLBACK_NOTIFY(message_begin);
956
957 break;
958 }
959
960 case s_req_method:
961 {
962 const char *matcher;
963 if (UNLIKELY(ch == '\0')) {
964 SET_ERRNO(HPE_INVALID_METHOD);
965 goto error;
966 }
967
968 matcher = method_strings[parser->method];
969 if (ch == ' ' && matcher[parser->index] == '\0') {
970 UPDATE_STATE(s_req_spaces_before_url);
971 } else if (ch == matcher[parser->index]) {
972 ; /* nada */
973 } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
974
975 switch (parser->method << 16 | parser->index << 8 | ch) {
976 #define XX(meth, pos, ch, new_meth) \
977 case (HTTP_##meth << 16 | pos << 8 | ch): \
978 parser->method = HTTP_##new_meth; break;
979
980 XX(POST, 1, 'U', PUT)
981 XX(POST, 1, 'A', PATCH)
982 XX(POST, 1, 'R', PROPFIND)
983 XX(PUT, 2, 'R', PURGE)
984 XX(CONNECT, 1, 'H', CHECKOUT)
985 XX(CONNECT, 2, 'P', COPY)
986 XX(MKCOL, 1, 'O', MOVE)
987 XX(MKCOL, 1, 'E', MERGE)
988 XX(MKCOL, 1, '-', MSEARCH)
989 XX(MKCOL, 2, 'A', MKACTIVITY)
990 XX(MKCOL, 3, 'A', MKCALENDAR)
991 XX(SUBSCRIBE, 1, 'E', SEARCH)
992 XX(REPORT, 2, 'B', REBIND)
993 XX(PROPFIND, 4, 'P', PROPPATCH)
994 XX(LOCK, 1, 'I', LINK)
995 XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
996 XX(UNLOCK, 2, 'B', UNBIND)
997 XX(UNLOCK, 3, 'I', UNLINK)
998 #undef XX
999 default:
1000 SET_ERRNO(HPE_INVALID_METHOD);
1001 goto error;
1002 }
1003 } else {
1004 SET_ERRNO(HPE_INVALID_METHOD);
1005 goto error;
1006 }
1007
1008 ++parser->index;
1009 break;
1010 }
1011
1012 case s_req_spaces_before_url:
1013 {
1014 if (ch == ' ') break;
1015
1016 MARK(url);
1017 if (parser->method == HTTP_CONNECT) {
1018 UPDATE_STATE(s_req_server_start);
1019 }
1020
1021 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1022 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1023 SET_ERRNO(HPE_INVALID_URL);
1024 goto error;
1025 }
1026
1027 break;
1028 }
1029
1030 case s_req_schema:
1031 case s_req_schema_slash:
1032 case s_req_schema_slash_slash:
1033 case s_req_server_start:
1034 {
1035 switch (ch) {
1036 /* No whitespace allowed here */
1037 case ' ':
1038 case CR:
1039 case LF:
1040 SET_ERRNO(HPE_INVALID_URL);
1041 goto error;
1042 default:
1043 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1044 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1045 SET_ERRNO(HPE_INVALID_URL);
1046 goto error;
1047 }
1048 }
1049
1050 break;
1051 }
1052
1053 case s_req_server:
1054 case s_req_server_with_at:
1055 case s_req_path:
1056 case s_req_query_string_start:
1057 case s_req_query_string:
1058 case s_req_fragment_start:
1059 case s_req_fragment:
1060 {
1061 switch (ch) {
1062 case ' ':
1063 UPDATE_STATE(s_req_http_start);
1064 CALLBACK_DATA(url);
1065 break;
1066 case CR:
1067 case LF:
1068 parser->http_major = 0;
1069 parser->http_minor = 9;
1070 UPDATE_STATE((ch == CR) ?
1071 s_req_line_almost_done :
1072 s_header_field_start);
1073 CALLBACK_DATA(url);
1074 break;
1075 default:
1076 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1077 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1078 SET_ERRNO(HPE_INVALID_URL);
1079 goto error;
1080 }
1081 }
1082 break;
1083 }
1084
1085 case s_req_http_start:
1086 switch (ch) {
1087 case 'H':
1088 UPDATE_STATE(s_req_http_H);
1089 break;
1090 case ' ':
1091 break;
1092 default:
1093 SET_ERRNO(HPE_INVALID_CONSTANT);
1094 goto error;
1095 }
1096 break;
1097
1098 case s_req_http_H:
1099 STRICT_CHECK(ch != 'T');
1100 UPDATE_STATE(s_req_http_HT);
1101 break;
1102
1103 case s_req_http_HT:
1104 STRICT_CHECK(ch != 'T');
1105 UPDATE_STATE(s_req_http_HTT);
1106 break;
1107
1108 case s_req_http_HTT:
1109 STRICT_CHECK(ch != 'P');
1110 UPDATE_STATE(s_req_http_HTTP);
1111 break;
1112
1113 case s_req_http_HTTP:
1114 STRICT_CHECK(ch != '/');
1115 UPDATE_STATE(s_req_http_major);
1116 break;
1117
1118 case s_req_http_major:
1119 if (UNLIKELY(!IS_NUM(ch))) {
1120 SET_ERRNO(HPE_INVALID_VERSION);
1121 goto error;
1122 }
1123
1124 parser->http_major = ch - '0';
1125 UPDATE_STATE(s_req_http_dot);
1126 break;
1127
1128 case s_req_http_dot:
1129 {
1130 if (UNLIKELY(ch != '.')) {
1131 SET_ERRNO(HPE_INVALID_VERSION);
1132 goto error;
1133 }
1134
1135 UPDATE_STATE(s_req_http_minor);
1136 break;
1137 }
1138
1139 case s_req_http_minor:
1140 if (UNLIKELY(!IS_NUM(ch))) {
1141 SET_ERRNO(HPE_INVALID_VERSION);
1142 goto error;
1143 }
1144
1145 parser->http_minor = ch - '0';
1146 UPDATE_STATE(s_req_http_end);
1147 break;
1148
1149 case s_req_http_end:
1150 {
1151 if (ch == CR) {
1152 UPDATE_STATE(s_req_line_almost_done);
1153 break;
1154 }
1155
1156 if (ch == LF) {
1157 UPDATE_STATE(s_header_field_start);
1158 break;
1159 }
1160
1161 SET_ERRNO(HPE_INVALID_VERSION);
1162 goto error;
1163 break;
1164 }
1165
1166 /* end of request line */
1167 case s_req_line_almost_done:
1168 {
1169 if (UNLIKELY(ch != LF)) {
1170 SET_ERRNO(HPE_LF_EXPECTED);
1171 goto error;
1172 }
1173
1174 UPDATE_STATE(s_header_field_start);
1175 break;
1176 }
1177
1178 case s_header_field_start:
1179 {
1180 if (ch == CR) {
1181 UPDATE_STATE(s_headers_almost_done);
1182 break;
1183 }
1184
1185 if (ch == LF) {
1186 /* they might be just sending \n instead of \r\n so this would be
1187 * the second \n to denote the end of headers*/
1188 UPDATE_STATE(s_headers_almost_done);
1189 REEXECUTE();
1190 }
1191
1192 c = TOKEN(ch);
1193
1194 if (UNLIKELY(!c)) {
1195 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1196 goto error;
1197 }
1198
1199 MARK(header_field);
1200
1201 parser->index = 0;
1202 UPDATE_STATE(s_header_field);
1203
1204 switch (c) {
1205 case 'c':
1206 parser->header_state = h_C;
1207 break;
1208
1209 case 'p':
1210 parser->header_state = h_matching_proxy_connection;
1211 break;
1212
1213 case 't':
1214 parser->header_state = h_matching_transfer_encoding;
1215 break;
1216
1217 case 'u':
1218 parser->header_state = h_matching_upgrade;
1219 break;
1220
1221 default:
1222 parser->header_state = h_general;
1223 break;
1224 }
1225 break;
1226 }
1227
1228 case s_header_field:
1229 {
1230 const char* start = p;
1231 for (; p != data + len; p++) {
1232 ch = *p;
1233 c = TOKEN(ch);
1234
1235 if (!c)
1236 break;
1237
1238 switch (parser->header_state) {
1239 case h_general:
1240 break;
1241
1242 case h_C:
1243 parser->index++;
1244 parser->header_state = (c == 'o' ? h_CO : h_general);
1245 break;
1246
1247 case h_CO:
1248 parser->index++;
1249 parser->header_state = (c == 'n' ? h_CON : h_general);
1250 break;
1251
1252 case h_CON:
1253 parser->index++;
1254 switch (c) {
1255 case 'n':
1256 parser->header_state = h_matching_connection;
1257 break;
1258 case 't':
1259 parser->header_state = h_matching_content_length;
1260 break;
1261 default:
1262 parser->header_state = h_general;
1263 break;
1264 }
1265 break;
1266
1267 /* connection */
1268
1269 case h_matching_connection:
1270 parser->index++;
1271 if (parser->index > sizeof(CONNECTION)-1
1272 || c != CONNECTION[parser->index]) {
1273 parser->header_state = h_general;
1274 } else if (parser->index == sizeof(CONNECTION)-2) {
1275 parser->header_state = h_connection;
1276 }
1277 break;
1278
1279 /* proxy-connection */
1280
1281 case h_matching_proxy_connection:
1282 parser->index++;
1283 if (parser->index > sizeof(PROXY_CONNECTION)-1
1284 || c != PROXY_CONNECTION[parser->index]) {
1285 parser->header_state = h_general;
1286 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1287 parser->header_state = h_connection;
1288 }
1289 break;
1290
1291 /* content-length */
1292
1293 case h_matching_content_length:
1294 parser->index++;
1295 if (parser->index > sizeof(CONTENT_LENGTH)-1
1296 || c != CONTENT_LENGTH[parser->index]) {
1297 parser->header_state = h_general;
1298 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1299 parser->header_state = h_content_length;
1300 }
1301 break;
1302
1303 /* transfer-encoding */
1304
1305 case h_matching_transfer_encoding:
1306 parser->index++;
1307 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1308 || c != TRANSFER_ENCODING[parser->index]) {
1309 parser->header_state = h_general;
1310 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1311 parser->header_state = h_transfer_encoding;
1312 }
1313 break;
1314
1315 /* upgrade */
1316
1317 case h_matching_upgrade:
1318 parser->index++;
1319 if (parser->index > sizeof(UPGRADE)-1
1320 || c != UPGRADE[parser->index]) {
1321 parser->header_state = h_general;
1322 } else if (parser->index == sizeof(UPGRADE)-2) {
1323 parser->header_state = h_upgrade;
1324 }
1325 break;
1326
1327 case h_connection:
1328 case h_content_length:
1329 case h_transfer_encoding:
1330 case h_upgrade:
1331 if (ch != ' ') parser->header_state = h_general;
1332 break;
1333
1334 default:
1335 assert(0 && "Unknown header_state");
1336 break;
1337 }
1338 }
1339
1340 COUNT_HEADER_SIZE(p - start);
1341
1342 if (p == data + len) {
1343 --p;
1344 break;
1345 }
1346
1347 if (ch == ':') {
1348 UPDATE_STATE(s_header_value_discard_ws);
1349 CALLBACK_DATA(header_field);
1350 break;
1351 }
1352
1353 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1354 goto error;
1355 }
1356
1357 case s_header_value_discard_ws:
1358 if (ch == ' ' || ch == '\t') break;
1359
1360 if (ch == CR) {
1361 UPDATE_STATE(s_header_value_discard_ws_almost_done);
1362 break;
1363 }
1364
1365 if (ch == LF) {
1366 UPDATE_STATE(s_header_value_discard_lws);
1367 break;
1368 }
1369
1370 /* FALLTHROUGH */
1371
1372 case s_header_value_start:
1373 {
1374 MARK(header_value);
1375
1376 UPDATE_STATE(s_header_value);
1377 parser->index = 0;
1378
1379 c = LOWER(ch);
1380
1381 switch (parser->header_state) {
1382 case h_upgrade:
1383 parser->flags |= F_UPGRADE;
1384 parser->header_state = h_general;
1385 break;
1386
1387 case h_transfer_encoding:
1388 /* looking for 'Transfer-Encoding: chunked' */
1389 if ('c' == c) {
1390 parser->header_state = h_matching_transfer_encoding_chunked;
1391 } else {
1392 parser->header_state = h_general;
1393 }
1394 break;
1395
1396 case h_content_length:
1397 if (UNLIKELY(!IS_NUM(ch))) {
1398 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1399 goto error;
1400 }
1401
1402 if (parser->flags & F_CONTENTLENGTH) {
1403 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1404 goto error;
1405 }
1406
1407 parser->flags |= F_CONTENTLENGTH;
1408 parser->content_length = ch - '0';
1409 break;
1410
1411 case h_connection:
1412 /* looking for 'Connection: keep-alive' */
1413 if (c == 'k') {
1414 parser->header_state = h_matching_connection_keep_alive;
1415 /* looking for 'Connection: close' */
1416 } else if (c == 'c') {
1417 parser->header_state = h_matching_connection_close;
1418 } else if (c == 'u') {
1419 parser->header_state = h_matching_connection_upgrade;
1420 } else {
1421 parser->header_state = h_matching_connection_token;
1422 }
1423 break;
1424
1425 /* Multi-value `Connection` header */
1426 case h_matching_connection_token_start:
1427 break;
1428
1429 default:
1430 parser->header_state = h_general;
1431 break;
1432 }
1433 break;
1434 }
1435
1436 case s_header_value:
1437 {
1438 const char* start = p;
1439 enum header_states h_state = (enum header_states) parser->header_state;
1440 for (; p != data + len; p++) {
1441 ch = *p;
1442 if (ch == CR) {
1443 UPDATE_STATE(s_header_almost_done);
1444 parser->header_state = h_state;
1445 CALLBACK_DATA(header_value);
1446 break;
1447 }
1448
1449 if (ch == LF) {
1450 UPDATE_STATE(s_header_almost_done);
1451 COUNT_HEADER_SIZE(p - start);
1452 parser->header_state = h_state;
1453 CALLBACK_DATA_NOADVANCE(header_value);
1454 REEXECUTE();
1455 }
1456
1457 if (!lenient && !IS_HEADER_CHAR(ch)) {
1458 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1459 goto error;
1460 }
1461
1462 c = LOWER(ch);
1463
1464 switch (h_state) {
1465 case h_general:
1466 {
1467 const char* p_cr;
1468 const char* p_lf;
1469 size_t limit = data + len - p;
1470
1471 limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1472
1473 p_cr = (const char*) memchr(p, CR, limit);
1474 p_lf = (const char*) memchr(p, LF, limit);
1475 if (p_cr != NULL) {
1476 if (p_lf != NULL && p_cr >= p_lf)
1477 p = p_lf;
1478 else
1479 p = p_cr;
1480 } else if (UNLIKELY(p_lf != NULL)) {
1481 p = p_lf;
1482 } else {
1483 p = data + len;
1484 }
1485 --p;
1486
1487 break;
1488 }
1489
1490 case h_connection:
1491 case h_transfer_encoding:
1492 assert(0 && "Shouldn't get here.");
1493 break;
1494
1495 case h_content_length:
1496 {
1497 uint64_t t;
1498
1499 if (ch == ' ') break;
1500
1501 if (UNLIKELY(!IS_NUM(ch))) {
1502 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1503 parser->header_state = h_state;
1504 goto error;
1505 }
1506
1507 t = parser->content_length;
1508 t *= 10;
1509 t += ch - '0';
1510
1511 /* Overflow? Test against a conservative limit for simplicity. */
1512 if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1513 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1514 parser->header_state = h_state;
1515 goto error;
1516 }
1517
1518 parser->content_length = t;
1519 break;
1520 }
1521
1522 /* Transfer-Encoding: chunked */
1523 case h_matching_transfer_encoding_chunked:
1524 parser->index++;
1525 if (parser->index > sizeof(CHUNKED)-1
1526 || c != CHUNKED[parser->index]) {
1527 h_state = h_general;
1528 } else if (parser->index == sizeof(CHUNKED)-2) {
1529 h_state = h_transfer_encoding_chunked;
1530 }
1531 break;
1532
1533 case h_matching_connection_token_start:
1534 /* looking for 'Connection: keep-alive' */
1535 if (c == 'k') {
1536 h_state = h_matching_connection_keep_alive;
1537 /* looking for 'Connection: close' */
1538 } else if (c == 'c') {
1539 h_state = h_matching_connection_close;
1540 } else if (c == 'u') {
1541 h_state = h_matching_connection_upgrade;
1542 } else if (STRICT_TOKEN(c)) {
1543 h_state = h_matching_connection_token;
1544 } else if (c == ' ' || c == '\t') {
1545 /* Skip lws */
1546 } else {
1547 h_state = h_general;
1548 }
1549 break;
1550
1551 /* looking for 'Connection: keep-alive' */
1552 case h_matching_connection_keep_alive:
1553 parser->index++;
1554 if (parser->index > sizeof(KEEP_ALIVE)-1
1555 || c != KEEP_ALIVE[parser->index]) {
1556 h_state = h_matching_connection_token;
1557 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1558 h_state = h_connection_keep_alive;
1559 }
1560 break;
1561
1562 /* looking for 'Connection: close' */
1563 case h_matching_connection_close:
1564 parser->index++;
1565 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1566 h_state = h_matching_connection_token;
1567 } else if (parser->index == sizeof(CLOSE)-2) {
1568 h_state = h_connection_close;
1569 }
1570 break;
1571
1572 /* looking for 'Connection: upgrade' */
1573 case h_matching_connection_upgrade:
1574 parser->index++;
1575 if (parser->index > sizeof(UPGRADE) - 1 ||
1576 c != UPGRADE[parser->index]) {
1577 h_state = h_matching_connection_token;
1578 } else if (parser->index == sizeof(UPGRADE)-2) {
1579 h_state = h_connection_upgrade;
1580 }
1581 break;
1582
1583 case h_matching_connection_token:
1584 if (ch == ',') {
1585 h_state = h_matching_connection_token_start;
1586 parser->index = 0;
1587 }
1588 break;
1589
1590 case h_transfer_encoding_chunked:
1591 if (ch != ' ') h_state = h_general;
1592 break;
1593
1594 case h_connection_keep_alive:
1595 case h_connection_close:
1596 case h_connection_upgrade:
1597 if (ch == ',') {
1598 if (h_state == h_connection_keep_alive) {
1599 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1600 } else if (h_state == h_connection_close) {
1601 parser->flags |= F_CONNECTION_CLOSE;
1602 } else if (h_state == h_connection_upgrade) {
1603 parser->flags |= F_CONNECTION_UPGRADE;
1604 }
1605 h_state = h_matching_connection_token_start;
1606 parser->index = 0;
1607 } else if (ch != ' ') {
1608 h_state = h_matching_connection_token;
1609 }
1610 break;
1611
1612 default:
1613 UPDATE_STATE(s_header_value);
1614 h_state = h_general;
1615 break;
1616 }
1617 }
1618 parser->header_state = h_state;
1619
1620 COUNT_HEADER_SIZE(p - start);
1621
1622 if (p == data + len)
1623 --p;
1624 break;
1625 }
1626
1627 case s_header_almost_done:
1628 {
1629 if (UNLIKELY(ch != LF)) {
1630 SET_ERRNO(HPE_LF_EXPECTED);
1631 goto error;
1632 }
1633
1634 UPDATE_STATE(s_header_value_lws);
1635 break;
1636 }
1637
1638 case s_header_value_lws:
1639 {
1640 if (ch == ' ' || ch == '\t') {
1641 UPDATE_STATE(s_header_value_start);
1642 REEXECUTE();
1643 }
1644
1645 /* finished the header */
1646 switch (parser->header_state) {
1647 case h_connection_keep_alive:
1648 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1649 break;
1650 case h_connection_close:
1651 parser->flags |= F_CONNECTION_CLOSE;
1652 break;
1653 case h_transfer_encoding_chunked:
1654 parser->flags |= F_CHUNKED;
1655 break;
1656 case h_connection_upgrade:
1657 parser->flags |= F_CONNECTION_UPGRADE;
1658 break;
1659 default:
1660 break;
1661 }
1662
1663 UPDATE_STATE(s_header_field_start);
1664 REEXECUTE();
1665 }
1666
1667 case s_header_value_discard_ws_almost_done:
1668 {
1669 STRICT_CHECK(ch != LF);
1670 UPDATE_STATE(s_header_value_discard_lws);
1671 break;
1672 }
1673
1674 case s_header_value_discard_lws:
1675 {
1676 if (ch == ' ' || ch == '\t') {
1677 UPDATE_STATE(s_header_value_discard_ws);
1678 break;
1679 } else {
1680 switch (parser->header_state) {
1681 case h_connection_keep_alive:
1682 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1683 break;
1684 case h_connection_close:
1685 parser->flags |= F_CONNECTION_CLOSE;
1686 break;
1687 case h_connection_upgrade:
1688 parser->flags |= F_CONNECTION_UPGRADE;
1689 break;
1690 case h_transfer_encoding_chunked:
1691 parser->flags |= F_CHUNKED;
1692 break;
1693 default:
1694 break;
1695 }
1696
1697 /* header value was empty */
1698 MARK(header_value);
1699 UPDATE_STATE(s_header_field_start);
1700 CALLBACK_DATA_NOADVANCE(header_value);
1701 REEXECUTE();
1702 }
1703 }
1704
1705 case s_headers_almost_done:
1706 {
1707 STRICT_CHECK(ch != LF);
1708
1709 if (parser->flags & F_TRAILING) {
1710 /* End of a chunked request */
1711 UPDATE_STATE(s_message_done);
1712 CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1713 REEXECUTE();
1714 }
1715
1716 /* Cannot use chunked encoding and a content-length header together
1717 per the HTTP specification. */
1718 if ((parser->flags & F_CHUNKED) &&
1719 (parser->flags & F_CONTENTLENGTH)) {
1720 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1721 goto error;
1722 }
1723
1724 UPDATE_STATE(s_headers_done);
1725
1726 /* Set this here so that on_headers_complete() callbacks can see it */
1727 if ((parser->flags & F_UPGRADE) &&
1728 (parser->flags & F_CONNECTION_UPGRADE)) {
1729 /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1730 * mandatory only when it is a 101 Switching Protocols response,
1731 * otherwise it is purely informational, to announce support.
1732 */
1733 parser->upgrade =
1734 (parser->type == HTTP_REQUEST || parser->status_code == 101);
1735 } else {
1736 parser->upgrade = (parser->method == HTTP_CONNECT);
1737 }
1738
1739 /* Here we call the headers_complete callback. This is somewhat
1740 * different than other callbacks because if the user returns 1, we
1741 * will interpret that as saying that this message has no body. This
1742 * is needed for the annoying case of recieving a response to a HEAD
1743 * request.
1744 *
1745 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1746 * we have to simulate it by handling a change in errno below.
1747 */
1748 if (settings->on_headers_complete) {
1749 switch (settings->on_headers_complete(parser)) {
1750 case 0:
1751 break;
1752
1753 case 2:
1754 parser->upgrade = 1;
1755
1756 /* FALLTHROUGH */
1757 case 1:
1758 parser->flags |= F_SKIPBODY;
1759 break;
1760
1761 default:
1762 SET_ERRNO(HPE_CB_headers_complete);
1763 RETURN(p - data); /* Error */
1764 }
1765 }
1766
1767 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1768 RETURN(p - data);
1769 }
1770
1771 REEXECUTE();
1772 }
1773
1774 case s_headers_done:
1775 {
1776 int hasBody;
1777 STRICT_CHECK(ch != LF);
1778
1779 parser->nread = 0;
1780
1781 hasBody = parser->flags & F_CHUNKED ||
1782 (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1783 if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1784 (parser->flags & F_SKIPBODY) || !hasBody)) {
1785 /* Exit, the rest of the message is in a different protocol. */
1786 UPDATE_STATE(NEW_MESSAGE());
1787 CALLBACK_NOTIFY(message_complete);
1788 RETURN((p - data) + 1);
1789 }
1790
1791 if (parser->flags & F_SKIPBODY) {
1792 UPDATE_STATE(NEW_MESSAGE());
1793 CALLBACK_NOTIFY(message_complete);
1794 } else if (parser->flags & F_CHUNKED) {
1795 /* chunked encoding - ignore Content-Length header */
1796 UPDATE_STATE(s_chunk_size_start);
1797 } else {
1798 if (parser->content_length == 0) {
1799 /* Content-Length header given but zero: Content-Length: 0\r\n */
1800 UPDATE_STATE(NEW_MESSAGE());
1801 CALLBACK_NOTIFY(message_complete);
1802 } else if (parser->content_length != ULLONG_MAX) {
1803 /* Content-Length header given and non-zero */
1804 UPDATE_STATE(s_body_identity);
1805 } else {
1806 if (!http_message_needs_eof(parser)) {
1807 /* Assume content-length 0 - read the next */
1808 UPDATE_STATE(NEW_MESSAGE());
1809 CALLBACK_NOTIFY(message_complete);
1810 } else {
1811 /* Read body until EOF */
1812 UPDATE_STATE(s_body_identity_eof);
1813 }
1814 }
1815 }
1816
1817 break;
1818 }
1819
1820 case s_body_identity:
1821 {
1822 uint64_t to_read = MIN(parser->content_length,
1823 (uint64_t) ((data + len) - p));
1824
1825 assert(parser->content_length != 0
1826 && parser->content_length != ULLONG_MAX);
1827
1828 /* The difference between advancing content_length and p is because
1829 * the latter will automaticaly advance on the next loop iteration.
1830 * Further, if content_length ends up at 0, we want to see the last
1831 * byte again for our message complete callback.
1832 */
1833 MARK(body);
1834 parser->content_length -= to_read;
1835 p += to_read - 1;
1836
1837 if (parser->content_length == 0) {
1838 UPDATE_STATE(s_message_done);
1839
1840 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1841 *
1842 * The alternative to doing this is to wait for the next byte to
1843 * trigger the data callback, just as in every other case. The
1844 * problem with this is that this makes it difficult for the test
1845 * harness to distinguish between complete-on-EOF and
1846 * complete-on-length. It's not clear that this distinction is
1847 * important for applications, but let's keep it for now.
1848 */
1849 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1850 REEXECUTE();
1851 }
1852
1853 break;
1854 }
1855
1856 /* read until EOF */
1857 case s_body_identity_eof:
1858 MARK(body);
1859 p = data + len - 1;
1860
1861 break;
1862
1863 case s_message_done:
1864 UPDATE_STATE(NEW_MESSAGE());
1865 CALLBACK_NOTIFY(message_complete);
1866 if (parser->upgrade) {
1867 /* Exit, the rest of the message is in a different protocol. */
1868 RETURN((p - data) + 1);
1869 }
1870 break;
1871
1872 case s_chunk_size_start:
1873 {
1874 assert(parser->nread == 1);
1875 assert(parser->flags & F_CHUNKED);
1876
1877 unhex_val = unhex[(unsigned char)ch];
1878 if (UNLIKELY(unhex_val == -1)) {
1879 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1880 goto error;
1881 }
1882
1883 parser->content_length = unhex_val;
1884 UPDATE_STATE(s_chunk_size);
1885 break;
1886 }
1887
1888 case s_chunk_size:
1889 {
1890 uint64_t t;
1891
1892 assert(parser->flags & F_CHUNKED);
1893
1894 if (ch == CR) {
1895 UPDATE_STATE(s_chunk_size_almost_done);
1896 break;
1897 }
1898
1899 unhex_val = unhex[(unsigned char)ch];
1900
1901 if (unhex_val == -1) {
1902 if (ch == ';' || ch == ' ') {
1903 UPDATE_STATE(s_chunk_parameters);
1904 break;
1905 }
1906
1907 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1908 goto error;
1909 }
1910
1911 t = parser->content_length;
1912 t *= 16;
1913 t += unhex_val;
1914
1915 /* Overflow? Test against a conservative limit for simplicity. */
1916 if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1917 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1918 goto error;
1919 }
1920
1921 parser->content_length = t;
1922 break;
1923 }
1924
1925 case s_chunk_parameters:
1926 {
1927 assert(parser->flags & F_CHUNKED);
1928 /* just ignore this shit. TODO check for overflow */
1929 if (ch == CR) {
1930 UPDATE_STATE(s_chunk_size_almost_done);
1931 break;
1932 }
1933 break;
1934 }
1935
1936 case s_chunk_size_almost_done:
1937 {
1938 assert(parser->flags & F_CHUNKED);
1939 STRICT_CHECK(ch != LF);
1940
1941 parser->nread = 0;
1942
1943 if (parser->content_length == 0) {
1944 parser->flags |= F_TRAILING;
1945 UPDATE_STATE(s_header_field_start);
1946 } else {
1947 UPDATE_STATE(s_chunk_data);
1948 }
1949 CALLBACK_NOTIFY(chunk_header);
1950 break;
1951 }
1952
1953 case s_chunk_data:
1954 {
1955 uint64_t to_read = MIN(parser->content_length,
1956 (uint64_t) ((data + len) - p));
1957
1958 assert(parser->flags & F_CHUNKED);
1959 assert(parser->content_length != 0
1960 && parser->content_length != ULLONG_MAX);
1961
1962 /* See the explanation in s_body_identity for why the content
1963 * length and data pointers are managed this way.
1964 */
1965 MARK(body);
1966 parser->content_length -= to_read;
1967 p += to_read - 1;
1968
1969 if (parser->content_length == 0) {
1970 UPDATE_STATE(s_chunk_data_almost_done);
1971 }
1972
1973 break;
1974 }
1975
1976 case s_chunk_data_almost_done:
1977 assert(parser->flags & F_CHUNKED);
1978 assert(parser->content_length == 0);
1979 STRICT_CHECK(ch != CR);
1980 UPDATE_STATE(s_chunk_data_done);
1981 CALLBACK_DATA(body);
1982 break;
1983
1984 case s_chunk_data_done:
1985 assert(parser->flags & F_CHUNKED);
1986 STRICT_CHECK(ch != LF);
1987 parser->nread = 0;
1988 UPDATE_STATE(s_chunk_size_start);
1989 CALLBACK_NOTIFY(chunk_complete);
1990 break;
1991
1992 default:
1993 assert(0 && "unhandled state");
1994 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1995 goto error;
1996 }
1997 }
1998
1999 /* Run callbacks for any marks that we have leftover after we ran our of
2000 * bytes. There should be at most one of these set, so it's OK to invoke
2001 * them in series (unset marks will not result in callbacks).
2002 *
2003 * We use the NOADVANCE() variety of callbacks here because 'p' has already
2004 * overflowed 'data' and this allows us to correct for the off-by-one that
2005 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2006 * value that's in-bounds).
2007 */
2008
2009 assert(((header_field_mark ? 1 : 0) +
2010 (header_value_mark ? 1 : 0) +
2011 (url_mark ? 1 : 0) +
2012 (body_mark ? 1 : 0) +
2013 (status_mark ? 1 : 0)) <= 1);
2014
2015 CALLBACK_DATA_NOADVANCE(header_field);
2016 CALLBACK_DATA_NOADVANCE(header_value);
2017 CALLBACK_DATA_NOADVANCE(url);
2018 CALLBACK_DATA_NOADVANCE(body);
2019 CALLBACK_DATA_NOADVANCE(status);
2020
2021 RETURN(len);
2022
2023 error:
2024 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2025 SET_ERRNO(HPE_UNKNOWN);
2026 }
2027
2028 RETURN(p - data);
2029 }
2030
2031
2032 /* Does the parser need to see an EOF to find the end of the message? */
2033 int
2034 http_message_needs_eof (const http_parser *parser)
2035 {
2036 if (parser->type == HTTP_REQUEST) {
2037 return 0;
2038 }
2039
2040 /* See RFC 2616 section 4.4 */
2041 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2042 parser->status_code == 204 || /* No Content */
2043 parser->status_code == 304 || /* Not Modified */
2044 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2045 return 0;
2046 }
2047
2048 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2049 return 0;
2050 }
2051
2052 return 1;
2053 }
2054
2055
2056 int
2057 http_should_keep_alive (const http_parser *parser)
2058 {
2059 if (parser->http_major > 0 && parser->http_minor > 0) {
2060 /* HTTP/1.1 */
2061 if (parser->flags & F_CONNECTION_CLOSE) {
2062 return 0;
2063 }
2064 } else {
2065 /* HTTP/1.0 or earlier */
2066 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2067 return 0;
2068 }
2069 }
2070
2071 return !http_message_needs_eof(parser);
2072 }
2073
2074
2075 const char *
2076 http_method_str (enum http_method m)
2077 {
2078 return ELEM_AT(method_strings, m, "<unknown>");
2079 }
2080
2081
2082 void
2083 http_parser_init (http_parser *parser, enum http_parser_type t)
2084 {
2085 void *data = parser->data; /* preserve application data */
2086 memset(parser, 0, sizeof(*parser));
2087 parser->data = data;
2088 parser->type = t;
2089 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2090 parser->http_errno = HPE_OK;
2091 }
2092
2093 void
2094 http_parser_settings_init(http_parser_settings *settings)
2095 {
2096 memset(settings, 0, sizeof(*settings));
2097 }
2098
2099 const char *
2100 http_errno_name(enum http_errno err) {
2101 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2102 return http_strerror_tab[err].name;
2103 }
2104
2105 const char *
2106 http_errno_description(enum http_errno err) {
2107 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2108 return http_strerror_tab[err].description;
2109 }
2110
2111 static enum http_host_state
2112 http_parse_host_char(enum http_host_state s, const char ch) {
2113 switch(s) {
2114 case s_http_userinfo:
2115 case s_http_userinfo_start:
2116 if (ch == '@') {
2117 return s_http_host_start;
2118 }
2119
2120 if (IS_USERINFO_CHAR(ch)) {
2121 return s_http_userinfo;
2122 }
2123 break;
2124
2125 case s_http_host_start:
2126 if (ch == '[') {
2127 return s_http_host_v6_start;
2128 }
2129
2130 if (IS_HOST_CHAR(ch)) {
2131 return s_http_host;
2132 }
2133
2134 break;
2135
2136 case s_http_host:
2137 if (IS_HOST_CHAR(ch)) {
2138 return s_http_host;
2139 }
2140
2141 /* FALLTHROUGH */
2142 case s_http_host_v6_end:
2143 if (ch == ':') {
2144 return s_http_host_port_start;
2145 }
2146
2147 break;
2148
2149 case s_http_host_v6:
2150 if (ch == ']') {
2151 return s_http_host_v6_end;
2152 }
2153
2154 /* FALLTHROUGH */
2155 case s_http_host_v6_start:
2156 if (IS_HEX(ch) || ch == ':' || ch == '.') {
2157 return s_http_host_v6;
2158 }
2159
2160 if (s == s_http_host_v6 && ch == '%') {
2161 return s_http_host_v6_zone_start;
2162 }
2163 break;
2164
2165 case s_http_host_v6_zone:
2166 if (ch == ']') {
2167 return s_http_host_v6_end;
2168 }
2169
2170 /* FALLTHROUGH */
2171 case s_http_host_v6_zone_start:
2172 /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2173 if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2174 ch == '~') {
2175 return s_http_host_v6_zone;
2176 }
2177 break;
2178
2179 case s_http_host_port:
2180 case s_http_host_port_start:
2181 if (IS_NUM(ch)) {
2182 return s_http_host_port;
2183 }
2184
2185 break;
2186
2187 default:
2188 break;
2189 }
2190 return s_http_host_dead;
2191 }
2192
2193 static int
2194 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2195 enum http_host_state s;
2196
2197 const char *p;
2198 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2199
2200 assert(u->field_set & (1 << UF_HOST));
2201
2202 u->field_data[UF_HOST].len = 0;
2203
2204 s = found_at ? s_http_userinfo_start : s_http_host_start;
2205
2206 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2207 enum http_host_state new_s = http_parse_host_char(s, *p);
2208
2209 if (new_s == s_http_host_dead) {
2210 return 1;
2211 }
2212
2213 switch(new_s) {
2214 case s_http_host:
2215 if (s != s_http_host) {
2216 u->field_data[UF_HOST].off = p - buf;
2217 }
2218 u->field_data[UF_HOST].len++;
2219 break;
2220
2221 case s_http_host_v6:
2222 if (s != s_http_host_v6) {
2223 u->field_data[UF_HOST].off = p - buf;
2224 }
2225 u->field_data[UF_HOST].len++;
2226 break;
2227
2228 case s_http_host_v6_zone_start:
2229 case s_http_host_v6_zone:
2230 u->field_data[UF_HOST].len++;
2231 break;
2232
2233 case s_http_host_port:
2234 if (s != s_http_host_port) {
2235 u->field_data[UF_PORT].off = p - buf;
2236 u->field_data[UF_PORT].len = 0;
2237 u->field_set |= (1 << UF_PORT);
2238 }
2239 u->field_data[UF_PORT].len++;
2240 break;
2241
2242 case s_http_userinfo:
2243 if (s != s_http_userinfo) {
2244 u->field_data[UF_USERINFO].off = p - buf ;
2245 u->field_data[UF_USERINFO].len = 0;
2246 u->field_set |= (1 << UF_USERINFO);
2247 }
2248 u->field_data[UF_USERINFO].len++;
2249 break;
2250
2251 default:
2252 break;
2253 }
2254 s = new_s;
2255 }
2256
2257 /* Make sure we don't end somewhere unexpected */
2258 switch (s) {
2259 case s_http_host_start:
2260 case s_http_host_v6_start:
2261 case s_http_host_v6:
2262 case s_http_host_v6_zone_start:
2263 case s_http_host_v6_zone:
2264 case s_http_host_port_start:
2265 case s_http_userinfo:
2266 case s_http_userinfo_start:
2267 return 1;
2268 default:
2269 break;
2270 }
2271
2272 return 0;
2273 }
2274
2275 void
2276 http_parser_url_init(struct http_parser_url *u) {
2277 memset(u, 0, sizeof(*u));
2278 }
2279
2280 int
2281 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2282 struct http_parser_url *u)
2283 {
2284 enum state s;
2285 const char *p;
2286 enum http_parser_url_fields uf, old_uf;
2287 int found_at = 0;
2288
2289 u->port = u->field_set = 0;
2290 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2291 old_uf = UF_MAX;
2292
2293 for (p = buf; p < buf + buflen; p++) {
2294 s = parse_url_char(s, *p);
2295
2296 /* Figure out the next field that we're operating on */
2297 switch (s) {
2298 case s_dead:
2299 return 1;
2300
2301 /* Skip delimeters */
2302 case s_req_schema_slash:
2303 case s_req_schema_slash_slash:
2304 case s_req_server_start:
2305 case s_req_query_string_start:
2306 case s_req_fragment_start:
2307 continue;
2308
2309 case s_req_schema:
2310 uf = UF_SCHEMA;
2311 break;
2312
2313 case s_req_server_with_at:
2314 found_at = 1;
2315
2316 /* FALLTHROUGH */
2317 case s_req_server:
2318 uf = UF_HOST;
2319 break;
2320
2321 case s_req_path:
2322 uf = UF_PATH;
2323 break;
2324
2325 case s_req_query_string:
2326 uf = UF_QUERY;
2327 break;
2328
2329 case s_req_fragment:
2330 uf = UF_FRAGMENT;
2331 break;
2332
2333 default:
2334 assert(!"Unexpected state");
2335 return 1;
2336 }
2337
2338 /* Nothing's changed; soldier on */
2339 if (uf == old_uf) {
2340 u->field_data[uf].len++;
2341 continue;
2342 }
2343
2344 u->field_data[uf].off = p - buf;
2345 u->field_data[uf].len = 1;
2346
2347 u->field_set |= (1 << uf);
2348 old_uf = uf;
2349 }
2350
2351 /* host must be present if there is a schema */
2352 /* parsing http:///toto will fail */
2353 if ((u->field_set & (1 << UF_SCHEMA)) &&
2354 (u->field_set & (1 << UF_HOST)) == 0) {
2355 return 1;
2356 }
2357
2358 if (u->field_set & (1 << UF_HOST)) {
2359 if (http_parse_host(buf, u, found_at) != 0) {
2360 return 1;
2361 }
2362 }
2363
2364 /* CONNECT requests can only contain "hostname:port" */
2365 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2366 return 1;
2367 }
2368
2369 if (u->field_set & (1 << UF_PORT)) {
2370 /* Don't bother with endp; we've already validated the string */
2371 unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2372
2373 /* Ports have a max value of 2^16 */
2374 if (v > 0xffff) {
2375 return 1;
2376 }
2377
2378 u->port = (uint16_t) v;
2379 }
2380
2381 return 0;
2382 }
2383
2384 void
2385 http_parser_pause(http_parser *parser, int paused) {
2386 /* Users should only be pausing/unpausing a parser that is not in an error
2387 * state. In non-debug builds, there's not much that we can do about this
2388 * other than ignore it.
2389 */
2390 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2391 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2392 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2393 } else {
2394 assert(0 && "Attempting to pause parser in error state");
2395 }
2396 }
2397
2398 int
2399 http_body_is_final(const struct http_parser *parser) {
2400 return parser->state == s_message_done;
2401 }
2402
2403 unsigned long
2404 http_parser_version(void) {
2405 return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2406 HTTP_PARSER_VERSION_MINOR * 0x00100 |
2407 HTTP_PARSER_VERSION_PATCH * 0x00001;
2408 }