"Fossies" - the Fresh Open Source Software Archive

Member "wrk-4.2.0/src/http_parser.c" (7 Feb 2021, 69700 Bytes) of package /linux/www/wrk-4.2.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "http_parser.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 4.0.2_vs_4.1.0.

    1 /* Copyright Joyent, Inc. and other Node contributors.
    2  *
    3  * Permission is hereby granted, free of charge, to any person obtaining a copy
    4  * of this software and associated documentation files (the "Software"), to
    5  * deal in the Software without restriction, including without limitation the
    6  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
    7  * sell copies of the Software, and to permit persons to whom the Software is
    8  * furnished to do so, subject to the following conditions:
    9  *
   10  * The above copyright notice and this permission notice shall be included in
   11  * all copies or substantial portions of the Software.
   12  *
   13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   18  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   19  * IN THE SOFTWARE.
   20  */
   21 #include "http_parser.h"
   22 #include <assert.h>
   23 #include <stddef.h>
   24 #include <ctype.h>
   25 #include <stdlib.h>
   26 #include <string.h>
   27 #include <limits.h>
   28 
   29 #ifndef ULLONG_MAX
   30 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
   31 #endif
   32 
   33 #ifndef MIN
   34 # define MIN(a,b) ((a) < (b) ? (a) : (b))
   35 #endif
   36 
   37 #ifndef ARRAY_SIZE
   38 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
   39 #endif
   40 
   41 #ifndef BIT_AT
   42 # define BIT_AT(a, i)                                                \
   43   (!!((unsigned int) (a)[(unsigned int) (i) >> 3] &                  \
   44    (1 << ((unsigned int) (i) & 7))))
   45 #endif
   46 
   47 #ifndef ELEM_AT
   48 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
   49 #endif
   50 
   51 #define SET_ERRNO(e)                                                 \
   52 do {                                                                 \
   53   parser->http_errno = (e);                                          \
   54 } while(0)
   55 
   56 #define CURRENT_STATE() p_state
   57 #define UPDATE_STATE(V) p_state = (enum state) (V);
   58 #define RETURN(V)                                                    \
   59 do {                                                                 \
   60   parser->state = CURRENT_STATE();                                   \
   61   return (V);                                                        \
   62 } while (0);
   63 #define REEXECUTE()                                                  \
   64   goto reexecute;                                                    \
   65 
   66 
   67 #ifdef __GNUC__
   68 # define LIKELY(X) __builtin_expect(!!(X), 1)
   69 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
   70 #else
   71 # define LIKELY(X) (X)
   72 # define UNLIKELY(X) (X)
   73 #endif
   74 
   75 
   76 /* Run the notify callback FOR, returning ER if it fails */
   77 #define CALLBACK_NOTIFY_(FOR, ER)                                    \
   78 do {                                                                 \
   79   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
   80                                                                      \
   81   if (LIKELY(settings->on_##FOR)) {                                  \
   82     parser->state = CURRENT_STATE();                                 \
   83     if (UNLIKELY(0 != settings->on_##FOR(parser))) {                 \
   84       SET_ERRNO(HPE_CB_##FOR);                                       \
   85     }                                                                \
   86     UPDATE_STATE(parser->state);                                     \
   87                                                                      \
   88     /* We either errored above or got paused; get out */             \
   89     if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {             \
   90       return (ER);                                                   \
   91     }                                                                \
   92   }                                                                  \
   93 } while (0)
   94 
   95 /* Run the notify callback FOR and consume the current byte */
   96 #define CALLBACK_NOTIFY(FOR)            CALLBACK_NOTIFY_(FOR, p - data + 1)
   97 
   98 /* Run the notify callback FOR and don't consume the current byte */
   99 #define CALLBACK_NOTIFY_NOADVANCE(FOR)  CALLBACK_NOTIFY_(FOR, p - data)
  100 
  101 /* Run data callback FOR with LEN bytes, returning ER if it fails */
  102 #define CALLBACK_DATA_(FOR, LEN, ER)                                 \
  103 do {                                                                 \
  104   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
  105                                                                      \
  106   if (FOR##_mark) {                                                  \
  107     if (LIKELY(settings->on_##FOR)) {                                \
  108       parser->state = CURRENT_STATE();                               \
  109       if (UNLIKELY(0 !=                                              \
  110                    settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
  111         SET_ERRNO(HPE_CB_##FOR);                                     \
  112       }                                                              \
  113       UPDATE_STATE(parser->state);                                   \
  114                                                                      \
  115       /* We either errored above or got paused; get out */           \
  116       if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {           \
  117         return (ER);                                                 \
  118       }                                                              \
  119     }                                                                \
  120     FOR##_mark = NULL;                                               \
  121   }                                                                  \
  122 } while (0)
  123 
  124 /* Run the data callback FOR and consume the current byte */
  125 #define CALLBACK_DATA(FOR)                                           \
  126     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
  127 
  128 /* Run the data callback FOR and don't consume the current byte */
  129 #define CALLBACK_DATA_NOADVANCE(FOR)                                 \
  130     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
  131 
  132 /* Set the mark FOR; non-destructive if mark is already set */
  133 #define MARK(FOR)                                                    \
  134 do {                                                                 \
  135   if (!FOR##_mark) {                                                 \
  136     FOR##_mark = p;                                                  \
  137   }                                                                  \
  138 } while (0)
  139 
  140 /* Don't allow the total size of the HTTP headers (including the status
  141  * line) to exceed HTTP_MAX_HEADER_SIZE.  This check is here to protect
  142  * embedders against denial-of-service attacks where the attacker feeds
  143  * us a never-ending header that the embedder keeps buffering.
  144  *
  145  * This check is arguably the responsibility of embedders but we're doing
  146  * it on the embedder's behalf because most won't bother and this way we
  147  * make the web a little safer.  HTTP_MAX_HEADER_SIZE is still far bigger
  148  * than any reasonable request or response so this should never affect
  149  * day-to-day operation.
  150  */
  151 #define COUNT_HEADER_SIZE(V)                                         \
  152 do {                                                                 \
  153   parser->nread += (V);                                              \
  154   if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) {            \
  155     SET_ERRNO(HPE_HEADER_OVERFLOW);                                  \
  156     goto error;                                                      \
  157   }                                                                  \
  158 } while (0)
  159 
  160 
  161 #define PROXY_CONNECTION "proxy-connection"
  162 #define CONNECTION "connection"
  163 #define CONTENT_LENGTH "content-length"
  164 #define TRANSFER_ENCODING "transfer-encoding"
  165 #define UPGRADE "upgrade"
  166 #define CHUNKED "chunked"
  167 #define KEEP_ALIVE "keep-alive"
  168 #define CLOSE "close"
  169 
  170 
  171 static const char *method_strings[] =
  172   {
  173 #define XX(num, name, string) #string,
  174   HTTP_METHOD_MAP(XX)
  175 #undef XX
  176   };
  177 
  178 
  179 /* Tokens as defined by rfc 2616. Also lowercases them.
  180  *        token       = 1*<any CHAR except CTLs or separators>
  181  *     separators     = "(" | ")" | "<" | ">" | "@"
  182  *                    | "," | ";" | ":" | "\" | <">
  183  *                    | "/" | "[" | "]" | "?" | "="
  184  *                    | "{" | "}" | SP | HT
  185  */
  186 static const char tokens[256] = {
  187 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
  188         0,       0,       0,       0,       0,       0,       0,       0,
  189 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
  190         0,       0,       0,       0,       0,       0,       0,       0,
  191 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
  192         0,       0,       0,       0,       0,       0,       0,       0,
  193 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
  194         0,       0,       0,       0,       0,       0,       0,       0,
  195 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
  196         0,      '!',      0,      '#',     '$',     '%',     '&',    '\'',
  197 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
  198         0,       0,      '*',     '+',      0,      '-',     '.',      0,
  199 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
  200        '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
  201 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
  202        '8',     '9',      0,       0,       0,       0,       0,       0,
  203 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
  204         0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
  205 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
  206        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
  207 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
  208        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
  209 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
  210        'x',     'y',     'z',      0,       0,       0,      '^',     '_',
  211 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
  212        '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
  213 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
  214        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
  215 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
  216        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
  217 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
  218        'x',     'y',     'z',      0,      '|',      0,      '~',       0 };
  219 
  220 
  221 static const int8_t unhex[256] =
  222   {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  223   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  224   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  225   , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
  226   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
  227   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  228   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
  229   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  230   };
  231 
  232 
  233 #if HTTP_PARSER_STRICT
  234 # define T(v) 0
  235 #else
  236 # define T(v) v
  237 #endif
  238 
  239 
  240 static const uint8_t normal_url_char[32] = {
  241 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
  242         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
  243 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
  244         0    | T(2)   |   0    |   0    | T(16)  |   0    |   0    |   0,
  245 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
  246         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
  247 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
  248         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
  249 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
  250         0    |   2    |   4    |   0    |   16   |   32   |   64   |  128,
  251 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
  252         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
  253 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
  254         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
  255 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
  256         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0,
  257 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
  258         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
  259 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
  260         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
  261 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
  262         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
  263 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
  264         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
  265 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
  266         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
  267 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
  268         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
  269 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
  270         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
  271 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
  272         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0, };
  273 
  274 #undef T
  275 
  276 enum state
  277   { s_dead = 1 /* important that this is > 0 */
  278 
  279   , s_start_req_or_res
  280   , s_res_or_resp_H
  281   , s_start_res
  282   , s_res_H
  283   , s_res_HT
  284   , s_res_HTT
  285   , s_res_HTTP
  286   , s_res_http_major
  287   , s_res_http_dot
  288   , s_res_http_minor
  289   , s_res_http_end
  290   , s_res_first_status_code
  291   , s_res_status_code
  292   , s_res_status_start
  293   , s_res_status
  294   , s_res_line_almost_done
  295 
  296   , s_start_req
  297 
  298   , s_req_method
  299   , s_req_spaces_before_url
  300   , s_req_schema
  301   , s_req_schema_slash
  302   , s_req_schema_slash_slash
  303   , s_req_server_start
  304   , s_req_server
  305   , s_req_server_with_at
  306   , s_req_path
  307   , s_req_query_string_start
  308   , s_req_query_string
  309   , s_req_fragment_start
  310   , s_req_fragment
  311   , s_req_http_start
  312   , s_req_http_H
  313   , s_req_http_HT
  314   , s_req_http_HTT
  315   , s_req_http_HTTP
  316   , s_req_http_major
  317   , s_req_http_dot
  318   , s_req_http_minor
  319   , s_req_http_end
  320   , s_req_line_almost_done
  321 
  322   , s_header_field_start
  323   , s_header_field
  324   , s_header_value_discard_ws
  325   , s_header_value_discard_ws_almost_done
  326   , s_header_value_discard_lws
  327   , s_header_value_start
  328   , s_header_value
  329   , s_header_value_lws
  330 
  331   , s_header_almost_done
  332 
  333   , s_chunk_size_start
  334   , s_chunk_size
  335   , s_chunk_parameters
  336   , s_chunk_size_almost_done
  337 
  338   , s_headers_almost_done
  339   , s_headers_done
  340 
  341   /* Important: 's_headers_done' must be the last 'header' state. All
  342    * states beyond this must be 'body' states. It is used for overflow
  343    * checking. See the PARSING_HEADER() macro.
  344    */
  345 
  346   , s_chunk_data
  347   , s_chunk_data_almost_done
  348   , s_chunk_data_done
  349 
  350   , s_body_identity
  351   , s_body_identity_eof
  352 
  353   , s_message_done
  354   };
  355 
  356 
  357 #define PARSING_HEADER(state) (state <= s_headers_done)
  358 
  359 
  360 enum header_states
  361   { h_general = 0
  362   , h_C
  363   , h_CO
  364   , h_CON
  365 
  366   , h_matching_connection
  367   , h_matching_proxy_connection
  368   , h_matching_content_length
  369   , h_matching_transfer_encoding
  370   , h_matching_upgrade
  371 
  372   , h_connection
  373   , h_content_length
  374   , h_transfer_encoding
  375   , h_upgrade
  376 
  377   , h_matching_transfer_encoding_chunked
  378   , h_matching_connection_token_start
  379   , h_matching_connection_keep_alive
  380   , h_matching_connection_close
  381   , h_matching_connection_upgrade
  382   , h_matching_connection_token
  383 
  384   , h_transfer_encoding_chunked
  385   , h_connection_keep_alive
  386   , h_connection_close
  387   , h_connection_upgrade
  388   };
  389 
  390 enum http_host_state
  391   {
  392     s_http_host_dead = 1
  393   , s_http_userinfo_start
  394   , s_http_userinfo
  395   , s_http_host_start
  396   , s_http_host_v6_start
  397   , s_http_host
  398   , s_http_host_v6
  399   , s_http_host_v6_end
  400   , s_http_host_v6_zone_start
  401   , s_http_host_v6_zone
  402   , s_http_host_port_start
  403   , s_http_host_port
  404 };
  405 
  406 /* Macros for character classes; depends on strict-mode  */
  407 #define CR                  '\r'
  408 #define LF                  '\n'
  409 #define LOWER(c)            (unsigned char)(c | 0x20)
  410 #define IS_ALPHA(c)         (LOWER(c) >= 'a' && LOWER(c) <= 'z')
  411 #define IS_NUM(c)           ((c) >= '0' && (c) <= '9')
  412 #define IS_ALPHANUM(c)      (IS_ALPHA(c) || IS_NUM(c))
  413 #define IS_HEX(c)           (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
  414 #define IS_MARK(c)          ((c) == '-' || (c) == '_' || (c) == '.' || \
  415   (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
  416   (c) == ')')
  417 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
  418   (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
  419   (c) == '$' || (c) == ',')
  420 
  421 #define STRICT_TOKEN(c)     (tokens[(unsigned char)c])
  422 
  423 #if HTTP_PARSER_STRICT
  424 #define TOKEN(c)            (tokens[(unsigned char)c])
  425 #define IS_URL_CHAR(c)      (BIT_AT(normal_url_char, (unsigned char)c))
  426 #define IS_HOST_CHAR(c)     (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
  427 #else
  428 #define TOKEN(c)            ((c == ' ') ? ' ' : tokens[(unsigned char)c])
  429 #define IS_URL_CHAR(c)                                                         \
  430   (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
  431 #define IS_HOST_CHAR(c)                                                        \
  432   (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
  433 #endif
  434 
  435 /**
  436  * Verify that a char is a valid visible (printable) US-ASCII
  437  * character or %x80-FF
  438  **/
  439 #define IS_HEADER_CHAR(ch)                                                     \
  440   (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
  441 
  442 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
  443 
  444 
  445 #if HTTP_PARSER_STRICT
  446 # define STRICT_CHECK(cond)                                          \
  447 do {                                                                 \
  448   if (cond) {                                                        \
  449     SET_ERRNO(HPE_STRICT);                                           \
  450     goto error;                                                      \
  451   }                                                                  \
  452 } while (0)
  453 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
  454 #else
  455 # define STRICT_CHECK(cond)
  456 # define NEW_MESSAGE() start_state
  457 #endif
  458 
  459 
  460 /* Map errno values to strings for human-readable output */
  461 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
  462 static struct {
  463   const char *name;
  464   const char *description;
  465 } http_strerror_tab[] = {
  466   HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
  467 };
  468 #undef HTTP_STRERROR_GEN
  469 
  470 int http_message_needs_eof(const http_parser *parser);
  471 
  472 /* Our URL parser.
  473  *
  474  * This is designed to be shared by http_parser_execute() for URL validation,
  475  * hence it has a state transition + byte-for-byte interface. In addition, it
  476  * is meant to be embedded in http_parser_parse_url(), which does the dirty
  477  * work of turning state transitions URL components for its API.
  478  *
  479  * This function should only be invoked with non-space characters. It is
  480  * assumed that the caller cares about (and can detect) the transition between
  481  * URL and non-URL states by looking for these.
  482  */
  483 static enum state
  484 parse_url_char(enum state s, const char ch)
  485 {
  486   if (ch == ' ' || ch == '\r' || ch == '\n') {
  487     return s_dead;
  488   }
  489 
  490 #if HTTP_PARSER_STRICT
  491   if (ch == '\t' || ch == '\f') {
  492     return s_dead;
  493   }
  494 #endif
  495 
  496   switch (s) {
  497     case s_req_spaces_before_url:
  498       /* Proxied requests are followed by scheme of an absolute URI (alpha).
  499        * All methods except CONNECT are followed by '/' or '*'.
  500        */
  501 
  502       if (ch == '/' || ch == '*') {
  503         return s_req_path;
  504       }
  505 
  506       if (IS_ALPHA(ch)) {
  507         return s_req_schema;
  508       }
  509 
  510       break;
  511 
  512     case s_req_schema:
  513       if (IS_ALPHA(ch)) {
  514         return s;
  515       }
  516 
  517       if (ch == ':') {
  518         return s_req_schema_slash;
  519       }
  520 
  521       break;
  522 
  523     case s_req_schema_slash:
  524       if (ch == '/') {
  525         return s_req_schema_slash_slash;
  526       }
  527 
  528       break;
  529 
  530     case s_req_schema_slash_slash:
  531       if (ch == '/') {
  532         return s_req_server_start;
  533       }
  534 
  535       break;
  536 
  537     case s_req_server_with_at:
  538       if (ch == '@') {
  539         return s_dead;
  540       }
  541 
  542     /* FALLTHROUGH */
  543     case s_req_server_start:
  544     case s_req_server:
  545       if (ch == '/') {
  546         return s_req_path;
  547       }
  548 
  549       if (ch == '?') {
  550         return s_req_query_string_start;
  551       }
  552 
  553       if (ch == '@') {
  554         return s_req_server_with_at;
  555       }
  556 
  557       if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
  558         return s_req_server;
  559       }
  560 
  561       break;
  562 
  563     case s_req_path:
  564       if (IS_URL_CHAR(ch)) {
  565         return s;
  566       }
  567 
  568       switch (ch) {
  569         case '?':
  570           return s_req_query_string_start;
  571 
  572         case '#':
  573           return s_req_fragment_start;
  574       }
  575 
  576       break;
  577 
  578     case s_req_query_string_start:
  579     case s_req_query_string:
  580       if (IS_URL_CHAR(ch)) {
  581         return s_req_query_string;
  582       }
  583 
  584       switch (ch) {
  585         case '?':
  586           /* allow extra '?' in query string */
  587           return s_req_query_string;
  588 
  589         case '#':
  590           return s_req_fragment_start;
  591       }
  592 
  593       break;
  594 
  595     case s_req_fragment_start:
  596       if (IS_URL_CHAR(ch)) {
  597         return s_req_fragment;
  598       }
  599 
  600       switch (ch) {
  601         case '?':
  602           return s_req_fragment;
  603 
  604         case '#':
  605           return s;
  606       }
  607 
  608       break;
  609 
  610     case s_req_fragment:
  611       if (IS_URL_CHAR(ch)) {
  612         return s;
  613       }
  614 
  615       switch (ch) {
  616         case '?':
  617         case '#':
  618           return s;
  619       }
  620 
  621       break;
  622 
  623     default:
  624       break;
  625   }
  626 
  627   /* We should never fall out of the switch above unless there's an error */
  628   return s_dead;
  629 }
  630 
  631 size_t http_parser_execute (http_parser *parser,
  632                             const http_parser_settings *settings,
  633                             const char *data,
  634                             size_t len)
  635 {
  636   char c, ch;
  637   int8_t unhex_val;
  638   const char *p = data;
  639   const char *header_field_mark = 0;
  640   const char *header_value_mark = 0;
  641   const char *url_mark = 0;
  642   const char *body_mark = 0;
  643   const char *status_mark = 0;
  644   enum state p_state = (enum state) parser->state;
  645   const unsigned int lenient = parser->lenient_http_headers;
  646 
  647   /* We're in an error state. Don't bother doing anything. */
  648   if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
  649     return 0;
  650   }
  651 
  652   if (len == 0) {
  653     switch (CURRENT_STATE()) {
  654       case s_body_identity_eof:
  655         /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
  656          * we got paused.
  657          */
  658         CALLBACK_NOTIFY_NOADVANCE(message_complete);
  659         return 0;
  660 
  661       case s_dead:
  662       case s_start_req_or_res:
  663       case s_start_res:
  664       case s_start_req:
  665         return 0;
  666 
  667       default:
  668         SET_ERRNO(HPE_INVALID_EOF_STATE);
  669         return 1;
  670     }
  671   }
  672 
  673 
  674   if (CURRENT_STATE() == s_header_field)
  675     header_field_mark = data;
  676   if (CURRENT_STATE() == s_header_value)
  677     header_value_mark = data;
  678   switch (CURRENT_STATE()) {
  679   case s_req_path:
  680   case s_req_schema:
  681   case s_req_schema_slash:
  682   case s_req_schema_slash_slash:
  683   case s_req_server_start:
  684   case s_req_server:
  685   case s_req_server_with_at:
  686   case s_req_query_string_start:
  687   case s_req_query_string:
  688   case s_req_fragment_start:
  689   case s_req_fragment:
  690     url_mark = data;
  691     break;
  692   case s_res_status:
  693     status_mark = data;
  694     break;
  695   default:
  696     break;
  697   }
  698 
  699   for (p=data; p != data + len; p++) {
  700     ch = *p;
  701 
  702     if (PARSING_HEADER(CURRENT_STATE()))
  703       COUNT_HEADER_SIZE(1);
  704 
  705 reexecute:
  706     switch (CURRENT_STATE()) {
  707 
  708       case s_dead:
  709         /* this state is used after a 'Connection: close' message
  710          * the parser will error out if it reads another message
  711          */
  712         if (LIKELY(ch == CR || ch == LF))
  713           break;
  714 
  715         SET_ERRNO(HPE_CLOSED_CONNECTION);
  716         goto error;
  717 
  718       case s_start_req_or_res:
  719       {
  720         if (ch == CR || ch == LF)
  721           break;
  722         parser->flags = 0;
  723         parser->content_length = ULLONG_MAX;
  724 
  725         if (ch == 'H') {
  726           UPDATE_STATE(s_res_or_resp_H);
  727 
  728           CALLBACK_NOTIFY(message_begin);
  729         } else {
  730           parser->type = HTTP_REQUEST;
  731           UPDATE_STATE(s_start_req);
  732           REEXECUTE();
  733         }
  734 
  735         break;
  736       }
  737 
  738       case s_res_or_resp_H:
  739         if (ch == 'T') {
  740           parser->type = HTTP_RESPONSE;
  741           UPDATE_STATE(s_res_HT);
  742         } else {
  743           if (UNLIKELY(ch != 'E')) {
  744             SET_ERRNO(HPE_INVALID_CONSTANT);
  745             goto error;
  746           }
  747 
  748           parser->type = HTTP_REQUEST;
  749           parser->method = HTTP_HEAD;
  750           parser->index = 2;
  751           UPDATE_STATE(s_req_method);
  752         }
  753         break;
  754 
  755       case s_start_res:
  756       {
  757         parser->flags = 0;
  758         parser->content_length = ULLONG_MAX;
  759 
  760         switch (ch) {
  761           case 'H':
  762             UPDATE_STATE(s_res_H);
  763             break;
  764 
  765           case CR:
  766           case LF:
  767             break;
  768 
  769           default:
  770             SET_ERRNO(HPE_INVALID_CONSTANT);
  771             goto error;
  772         }
  773 
  774         CALLBACK_NOTIFY(message_begin);
  775         break;
  776       }
  777 
  778       case s_res_H:
  779         STRICT_CHECK(ch != 'T');
  780         UPDATE_STATE(s_res_HT);
  781         break;
  782 
  783       case s_res_HT:
  784         STRICT_CHECK(ch != 'T');
  785         UPDATE_STATE(s_res_HTT);
  786         break;
  787 
  788       case s_res_HTT:
  789         STRICT_CHECK(ch != 'P');
  790         UPDATE_STATE(s_res_HTTP);
  791         break;
  792 
  793       case s_res_HTTP:
  794         STRICT_CHECK(ch != '/');
  795         UPDATE_STATE(s_res_http_major);
  796         break;
  797 
  798       case s_res_http_major:
  799         if (UNLIKELY(!IS_NUM(ch))) {
  800           SET_ERRNO(HPE_INVALID_VERSION);
  801           goto error;
  802         }
  803 
  804         parser->http_major = ch - '0';
  805         UPDATE_STATE(s_res_http_dot);
  806         break;
  807 
  808       case s_res_http_dot:
  809       {
  810         if (UNLIKELY(ch != '.')) {
  811           SET_ERRNO(HPE_INVALID_VERSION);
  812           goto error;
  813         }
  814 
  815         UPDATE_STATE(s_res_http_minor);
  816         break;
  817       }
  818 
  819       case s_res_http_minor:
  820         if (UNLIKELY(!IS_NUM(ch))) {
  821           SET_ERRNO(HPE_INVALID_VERSION);
  822           goto error;
  823         }
  824 
  825         parser->http_minor = ch - '0';
  826         UPDATE_STATE(s_res_http_end);
  827         break;
  828 
  829       case s_res_http_end:
  830       {
  831         if (UNLIKELY(ch != ' ')) {
  832           SET_ERRNO(HPE_INVALID_VERSION);
  833           goto error;
  834         }
  835 
  836         UPDATE_STATE(s_res_first_status_code);
  837         break;
  838       }
  839 
  840       case s_res_first_status_code:
  841       {
  842         if (!IS_NUM(ch)) {
  843           if (ch == ' ') {
  844             break;
  845           }
  846 
  847           SET_ERRNO(HPE_INVALID_STATUS);
  848           goto error;
  849         }
  850         parser->status_code = ch - '0';
  851         UPDATE_STATE(s_res_status_code);
  852         break;
  853       }
  854 
  855       case s_res_status_code:
  856       {
  857         if (!IS_NUM(ch)) {
  858           switch (ch) {
  859             case ' ':
  860               UPDATE_STATE(s_res_status_start);
  861               break;
  862             case CR:
  863             case LF:
  864               UPDATE_STATE(s_res_status_start);
  865               REEXECUTE();
  866               break;
  867             default:
  868               SET_ERRNO(HPE_INVALID_STATUS);
  869               goto error;
  870           }
  871           break;
  872         }
  873 
  874         parser->status_code *= 10;
  875         parser->status_code += ch - '0';
  876 
  877         if (UNLIKELY(parser->status_code > 999)) {
  878           SET_ERRNO(HPE_INVALID_STATUS);
  879           goto error;
  880         }
  881 
  882         break;
  883       }
  884 
  885       case s_res_status_start:
  886       {
  887         MARK(status);
  888         UPDATE_STATE(s_res_status);
  889         parser->index = 0;
  890 
  891         if (ch == CR || ch == LF)
  892           REEXECUTE();
  893 
  894         break;
  895       }
  896 
  897       case s_res_status:
  898         if (ch == CR) {
  899           UPDATE_STATE(s_res_line_almost_done);
  900           CALLBACK_DATA(status);
  901           break;
  902         }
  903 
  904         if (ch == LF) {
  905           UPDATE_STATE(s_header_field_start);
  906           CALLBACK_DATA(status);
  907           break;
  908         }
  909 
  910         break;
  911 
  912       case s_res_line_almost_done:
  913         STRICT_CHECK(ch != LF);
  914         UPDATE_STATE(s_header_field_start);
  915         break;
  916 
  917       case s_start_req:
  918       {
  919         if (ch == CR || ch == LF)
  920           break;
  921         parser->flags = 0;
  922         parser->content_length = ULLONG_MAX;
  923 
  924         if (UNLIKELY(!IS_ALPHA(ch))) {
  925           SET_ERRNO(HPE_INVALID_METHOD);
  926           goto error;
  927         }
  928 
  929         parser->method = (enum http_method) 0;
  930         parser->index = 1;
  931         switch (ch) {
  932           case 'A': parser->method = HTTP_ACL; break;
  933           case 'B': parser->method = HTTP_BIND; break;
  934           case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
  935           case 'D': parser->method = HTTP_DELETE; break;
  936           case 'G': parser->method = HTTP_GET; break;
  937           case 'H': parser->method = HTTP_HEAD; break;
  938           case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
  939           case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
  940           case 'N': parser->method = HTTP_NOTIFY; break;
  941           case 'O': parser->method = HTTP_OPTIONS; break;
  942           case 'P': parser->method = HTTP_POST;
  943             /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
  944             break;
  945           case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
  946           case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
  947           case 'T': parser->method = HTTP_TRACE; break;
  948           case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
  949           default:
  950             SET_ERRNO(HPE_INVALID_METHOD);
  951             goto error;
  952         }
  953         UPDATE_STATE(s_req_method);
  954 
  955         CALLBACK_NOTIFY(message_begin);
  956 
  957         break;
  958       }
  959 
  960       case s_req_method:
  961       {
  962         const char *matcher;
  963         if (UNLIKELY(ch == '\0')) {
  964           SET_ERRNO(HPE_INVALID_METHOD);
  965           goto error;
  966         }
  967 
  968         matcher = method_strings[parser->method];
  969         if (ch == ' ' && matcher[parser->index] == '\0') {
  970           UPDATE_STATE(s_req_spaces_before_url);
  971         } else if (ch == matcher[parser->index]) {
  972           ; /* nada */
  973         } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
  974 
  975           switch (parser->method << 16 | parser->index << 8 | ch) {
  976 #define XX(meth, pos, ch, new_meth) \
  977             case (HTTP_##meth << 16 | pos << 8 | ch): \
  978               parser->method = HTTP_##new_meth; break;
  979 
  980             XX(POST,      1, 'U', PUT)
  981             XX(POST,      1, 'A', PATCH)
  982             XX(POST,      1, 'R', PROPFIND)
  983             XX(PUT,       2, 'R', PURGE)
  984             XX(CONNECT,   1, 'H', CHECKOUT)
  985             XX(CONNECT,   2, 'P', COPY)
  986             XX(MKCOL,     1, 'O', MOVE)
  987             XX(MKCOL,     1, 'E', MERGE)
  988             XX(MKCOL,     1, '-', MSEARCH)
  989             XX(MKCOL,     2, 'A', MKACTIVITY)
  990             XX(MKCOL,     3, 'A', MKCALENDAR)
  991             XX(SUBSCRIBE, 1, 'E', SEARCH)
  992             XX(REPORT,    2, 'B', REBIND)
  993             XX(PROPFIND,  4, 'P', PROPPATCH)
  994             XX(LOCK,      1, 'I', LINK)
  995             XX(UNLOCK,    2, 'S', UNSUBSCRIBE)
  996             XX(UNLOCK,    2, 'B', UNBIND)
  997             XX(UNLOCK,    3, 'I', UNLINK)
  998 #undef XX
  999             default:
 1000               SET_ERRNO(HPE_INVALID_METHOD);
 1001               goto error;
 1002           }
 1003         } else {
 1004           SET_ERRNO(HPE_INVALID_METHOD);
 1005           goto error;
 1006         }
 1007 
 1008         ++parser->index;
 1009         break;
 1010       }
 1011 
 1012       case s_req_spaces_before_url:
 1013       {
 1014         if (ch == ' ') break;
 1015 
 1016         MARK(url);
 1017         if (parser->method == HTTP_CONNECT) {
 1018           UPDATE_STATE(s_req_server_start);
 1019         }
 1020 
 1021         UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
 1022         if (UNLIKELY(CURRENT_STATE() == s_dead)) {
 1023           SET_ERRNO(HPE_INVALID_URL);
 1024           goto error;
 1025         }
 1026 
 1027         break;
 1028       }
 1029 
 1030       case s_req_schema:
 1031       case s_req_schema_slash:
 1032       case s_req_schema_slash_slash:
 1033       case s_req_server_start:
 1034       {
 1035         switch (ch) {
 1036           /* No whitespace allowed here */
 1037           case ' ':
 1038           case CR:
 1039           case LF:
 1040             SET_ERRNO(HPE_INVALID_URL);
 1041             goto error;
 1042           default:
 1043             UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
 1044             if (UNLIKELY(CURRENT_STATE() == s_dead)) {
 1045               SET_ERRNO(HPE_INVALID_URL);
 1046               goto error;
 1047             }
 1048         }
 1049 
 1050         break;
 1051       }
 1052 
 1053       case s_req_server:
 1054       case s_req_server_with_at:
 1055       case s_req_path:
 1056       case s_req_query_string_start:
 1057       case s_req_query_string:
 1058       case s_req_fragment_start:
 1059       case s_req_fragment:
 1060       {
 1061         switch (ch) {
 1062           case ' ':
 1063             UPDATE_STATE(s_req_http_start);
 1064             CALLBACK_DATA(url);
 1065             break;
 1066           case CR:
 1067           case LF:
 1068             parser->http_major = 0;
 1069             parser->http_minor = 9;
 1070             UPDATE_STATE((ch == CR) ?
 1071               s_req_line_almost_done :
 1072               s_header_field_start);
 1073             CALLBACK_DATA(url);
 1074             break;
 1075           default:
 1076             UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
 1077             if (UNLIKELY(CURRENT_STATE() == s_dead)) {
 1078               SET_ERRNO(HPE_INVALID_URL);
 1079               goto error;
 1080             }
 1081         }
 1082         break;
 1083       }
 1084 
 1085       case s_req_http_start:
 1086         switch (ch) {
 1087           case 'H':
 1088             UPDATE_STATE(s_req_http_H);
 1089             break;
 1090           case ' ':
 1091             break;
 1092           default:
 1093             SET_ERRNO(HPE_INVALID_CONSTANT);
 1094             goto error;
 1095         }
 1096         break;
 1097 
 1098       case s_req_http_H:
 1099         STRICT_CHECK(ch != 'T');
 1100         UPDATE_STATE(s_req_http_HT);
 1101         break;
 1102 
 1103       case s_req_http_HT:
 1104         STRICT_CHECK(ch != 'T');
 1105         UPDATE_STATE(s_req_http_HTT);
 1106         break;
 1107 
 1108       case s_req_http_HTT:
 1109         STRICT_CHECK(ch != 'P');
 1110         UPDATE_STATE(s_req_http_HTTP);
 1111         break;
 1112 
 1113       case s_req_http_HTTP:
 1114         STRICT_CHECK(ch != '/');
 1115         UPDATE_STATE(s_req_http_major);
 1116         break;
 1117 
 1118       case s_req_http_major:
 1119         if (UNLIKELY(!IS_NUM(ch))) {
 1120           SET_ERRNO(HPE_INVALID_VERSION);
 1121           goto error;
 1122         }
 1123 
 1124         parser->http_major = ch - '0';
 1125         UPDATE_STATE(s_req_http_dot);
 1126         break;
 1127 
 1128       case s_req_http_dot:
 1129       {
 1130         if (UNLIKELY(ch != '.')) {
 1131           SET_ERRNO(HPE_INVALID_VERSION);
 1132           goto error;
 1133         }
 1134 
 1135         UPDATE_STATE(s_req_http_minor);
 1136         break;
 1137       }
 1138 
 1139       case s_req_http_minor:
 1140         if (UNLIKELY(!IS_NUM(ch))) {
 1141           SET_ERRNO(HPE_INVALID_VERSION);
 1142           goto error;
 1143         }
 1144 
 1145         parser->http_minor = ch - '0';
 1146         UPDATE_STATE(s_req_http_end);
 1147         break;
 1148 
 1149       case s_req_http_end:
 1150       {
 1151         if (ch == CR) {
 1152           UPDATE_STATE(s_req_line_almost_done);
 1153           break;
 1154         }
 1155 
 1156         if (ch == LF) {
 1157           UPDATE_STATE(s_header_field_start);
 1158           break;
 1159         }
 1160 
 1161         SET_ERRNO(HPE_INVALID_VERSION);
 1162         goto error;
 1163         break;
 1164       }
 1165 
 1166       /* end of request line */
 1167       case s_req_line_almost_done:
 1168       {
 1169         if (UNLIKELY(ch != LF)) {
 1170           SET_ERRNO(HPE_LF_EXPECTED);
 1171           goto error;
 1172         }
 1173 
 1174         UPDATE_STATE(s_header_field_start);
 1175         break;
 1176       }
 1177 
 1178       case s_header_field_start:
 1179       {
 1180         if (ch == CR) {
 1181           UPDATE_STATE(s_headers_almost_done);
 1182           break;
 1183         }
 1184 
 1185         if (ch == LF) {
 1186           /* they might be just sending \n instead of \r\n so this would be
 1187            * the second \n to denote the end of headers*/
 1188           UPDATE_STATE(s_headers_almost_done);
 1189           REEXECUTE();
 1190         }
 1191 
 1192         c = TOKEN(ch);
 1193 
 1194         if (UNLIKELY(!c)) {
 1195           SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
 1196           goto error;
 1197         }
 1198 
 1199         MARK(header_field);
 1200 
 1201         parser->index = 0;
 1202         UPDATE_STATE(s_header_field);
 1203 
 1204         switch (c) {
 1205           case 'c':
 1206             parser->header_state = h_C;
 1207             break;
 1208 
 1209           case 'p':
 1210             parser->header_state = h_matching_proxy_connection;
 1211             break;
 1212 
 1213           case 't':
 1214             parser->header_state = h_matching_transfer_encoding;
 1215             break;
 1216 
 1217           case 'u':
 1218             parser->header_state = h_matching_upgrade;
 1219             break;
 1220 
 1221           default:
 1222             parser->header_state = h_general;
 1223             break;
 1224         }
 1225         break;
 1226       }
 1227 
 1228       case s_header_field:
 1229       {
 1230         const char* start = p;
 1231         for (; p != data + len; p++) {
 1232           ch = *p;
 1233           c = TOKEN(ch);
 1234 
 1235           if (!c)
 1236             break;
 1237 
 1238           switch (parser->header_state) {
 1239             case h_general:
 1240               break;
 1241 
 1242             case h_C:
 1243               parser->index++;
 1244               parser->header_state = (c == 'o' ? h_CO : h_general);
 1245               break;
 1246 
 1247             case h_CO:
 1248               parser->index++;
 1249               parser->header_state = (c == 'n' ? h_CON : h_general);
 1250               break;
 1251 
 1252             case h_CON:
 1253               parser->index++;
 1254               switch (c) {
 1255                 case 'n':
 1256                   parser->header_state = h_matching_connection;
 1257                   break;
 1258                 case 't':
 1259                   parser->header_state = h_matching_content_length;
 1260                   break;
 1261                 default:
 1262                   parser->header_state = h_general;
 1263                   break;
 1264               }
 1265               break;
 1266 
 1267             /* connection */
 1268 
 1269             case h_matching_connection:
 1270               parser->index++;
 1271               if (parser->index > sizeof(CONNECTION)-1
 1272                   || c != CONNECTION[parser->index]) {
 1273                 parser->header_state = h_general;
 1274               } else if (parser->index == sizeof(CONNECTION)-2) {
 1275                 parser->header_state = h_connection;
 1276               }
 1277               break;
 1278 
 1279             /* proxy-connection */
 1280 
 1281             case h_matching_proxy_connection:
 1282               parser->index++;
 1283               if (parser->index > sizeof(PROXY_CONNECTION)-1
 1284                   || c != PROXY_CONNECTION[parser->index]) {
 1285                 parser->header_state = h_general;
 1286               } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
 1287                 parser->header_state = h_connection;
 1288               }
 1289               break;
 1290 
 1291             /* content-length */
 1292 
 1293             case h_matching_content_length:
 1294               parser->index++;
 1295               if (parser->index > sizeof(CONTENT_LENGTH)-1
 1296                   || c != CONTENT_LENGTH[parser->index]) {
 1297                 parser->header_state = h_general;
 1298               } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
 1299                 parser->header_state = h_content_length;
 1300               }
 1301               break;
 1302 
 1303             /* transfer-encoding */
 1304 
 1305             case h_matching_transfer_encoding:
 1306               parser->index++;
 1307               if (parser->index > sizeof(TRANSFER_ENCODING)-1
 1308                   || c != TRANSFER_ENCODING[parser->index]) {
 1309                 parser->header_state = h_general;
 1310               } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
 1311                 parser->header_state = h_transfer_encoding;
 1312               }
 1313               break;
 1314 
 1315             /* upgrade */
 1316 
 1317             case h_matching_upgrade:
 1318               parser->index++;
 1319               if (parser->index > sizeof(UPGRADE)-1
 1320                   || c != UPGRADE[parser->index]) {
 1321                 parser->header_state = h_general;
 1322               } else if (parser->index == sizeof(UPGRADE)-2) {
 1323                 parser->header_state = h_upgrade;
 1324               }
 1325               break;
 1326 
 1327             case h_connection:
 1328             case h_content_length:
 1329             case h_transfer_encoding:
 1330             case h_upgrade:
 1331               if (ch != ' ') parser->header_state = h_general;
 1332               break;
 1333 
 1334             default:
 1335               assert(0 && "Unknown header_state");
 1336               break;
 1337           }
 1338         }
 1339 
 1340         COUNT_HEADER_SIZE(p - start);
 1341 
 1342         if (p == data + len) {
 1343           --p;
 1344           break;
 1345         }
 1346 
 1347         if (ch == ':') {
 1348           UPDATE_STATE(s_header_value_discard_ws);
 1349           CALLBACK_DATA(header_field);
 1350           break;
 1351         }
 1352 
 1353         SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
 1354         goto error;
 1355       }
 1356 
 1357       case s_header_value_discard_ws:
 1358         if (ch == ' ' || ch == '\t') break;
 1359 
 1360         if (ch == CR) {
 1361           UPDATE_STATE(s_header_value_discard_ws_almost_done);
 1362           break;
 1363         }
 1364 
 1365         if (ch == LF) {
 1366           UPDATE_STATE(s_header_value_discard_lws);
 1367           break;
 1368         }
 1369 
 1370         /* FALLTHROUGH */
 1371 
 1372       case s_header_value_start:
 1373       {
 1374         MARK(header_value);
 1375 
 1376         UPDATE_STATE(s_header_value);
 1377         parser->index = 0;
 1378 
 1379         c = LOWER(ch);
 1380 
 1381         switch (parser->header_state) {
 1382           case h_upgrade:
 1383             parser->flags |= F_UPGRADE;
 1384             parser->header_state = h_general;
 1385             break;
 1386 
 1387           case h_transfer_encoding:
 1388             /* looking for 'Transfer-Encoding: chunked' */
 1389             if ('c' == c) {
 1390               parser->header_state = h_matching_transfer_encoding_chunked;
 1391             } else {
 1392               parser->header_state = h_general;
 1393             }
 1394             break;
 1395 
 1396           case h_content_length:
 1397             if (UNLIKELY(!IS_NUM(ch))) {
 1398               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
 1399               goto error;
 1400             }
 1401 
 1402             if (parser->flags & F_CONTENTLENGTH) {
 1403               SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
 1404               goto error;
 1405             }
 1406 
 1407             parser->flags |= F_CONTENTLENGTH;
 1408             parser->content_length = ch - '0';
 1409             break;
 1410 
 1411           case h_connection:
 1412             /* looking for 'Connection: keep-alive' */
 1413             if (c == 'k') {
 1414               parser->header_state = h_matching_connection_keep_alive;
 1415             /* looking for 'Connection: close' */
 1416             } else if (c == 'c') {
 1417               parser->header_state = h_matching_connection_close;
 1418             } else if (c == 'u') {
 1419               parser->header_state = h_matching_connection_upgrade;
 1420             } else {
 1421               parser->header_state = h_matching_connection_token;
 1422             }
 1423             break;
 1424 
 1425           /* Multi-value `Connection` header */
 1426           case h_matching_connection_token_start:
 1427             break;
 1428 
 1429           default:
 1430             parser->header_state = h_general;
 1431             break;
 1432         }
 1433         break;
 1434       }
 1435 
 1436       case s_header_value:
 1437       {
 1438         const char* start = p;
 1439         enum header_states h_state = (enum header_states) parser->header_state;
 1440         for (; p != data + len; p++) {
 1441           ch = *p;
 1442           if (ch == CR) {
 1443             UPDATE_STATE(s_header_almost_done);
 1444             parser->header_state = h_state;
 1445             CALLBACK_DATA(header_value);
 1446             break;
 1447           }
 1448 
 1449           if (ch == LF) {
 1450             UPDATE_STATE(s_header_almost_done);
 1451             COUNT_HEADER_SIZE(p - start);
 1452             parser->header_state = h_state;
 1453             CALLBACK_DATA_NOADVANCE(header_value);
 1454             REEXECUTE();
 1455           }
 1456 
 1457           if (!lenient && !IS_HEADER_CHAR(ch)) {
 1458             SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
 1459             goto error;
 1460           }
 1461 
 1462           c = LOWER(ch);
 1463 
 1464           switch (h_state) {
 1465             case h_general:
 1466             {
 1467               const char* p_cr;
 1468               const char* p_lf;
 1469               size_t limit = data + len - p;
 1470 
 1471               limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
 1472 
 1473               p_cr = (const char*) memchr(p, CR, limit);
 1474               p_lf = (const char*) memchr(p, LF, limit);
 1475               if (p_cr != NULL) {
 1476                 if (p_lf != NULL && p_cr >= p_lf)
 1477                   p = p_lf;
 1478                 else
 1479                   p = p_cr;
 1480               } else if (UNLIKELY(p_lf != NULL)) {
 1481                 p = p_lf;
 1482               } else {
 1483                 p = data + len;
 1484               }
 1485               --p;
 1486 
 1487               break;
 1488             }
 1489 
 1490             case h_connection:
 1491             case h_transfer_encoding:
 1492               assert(0 && "Shouldn't get here.");
 1493               break;
 1494 
 1495             case h_content_length:
 1496             {
 1497               uint64_t t;
 1498 
 1499               if (ch == ' ') break;
 1500 
 1501               if (UNLIKELY(!IS_NUM(ch))) {
 1502                 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
 1503                 parser->header_state = h_state;
 1504                 goto error;
 1505               }
 1506 
 1507               t = parser->content_length;
 1508               t *= 10;
 1509               t += ch - '0';
 1510 
 1511               /* Overflow? Test against a conservative limit for simplicity. */
 1512               if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
 1513                 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
 1514                 parser->header_state = h_state;
 1515                 goto error;
 1516               }
 1517 
 1518               parser->content_length = t;
 1519               break;
 1520             }
 1521 
 1522             /* Transfer-Encoding: chunked */
 1523             case h_matching_transfer_encoding_chunked:
 1524               parser->index++;
 1525               if (parser->index > sizeof(CHUNKED)-1
 1526                   || c != CHUNKED[parser->index]) {
 1527                 h_state = h_general;
 1528               } else if (parser->index == sizeof(CHUNKED)-2) {
 1529                 h_state = h_transfer_encoding_chunked;
 1530               }
 1531               break;
 1532 
 1533             case h_matching_connection_token_start:
 1534               /* looking for 'Connection: keep-alive' */
 1535               if (c == 'k') {
 1536                 h_state = h_matching_connection_keep_alive;
 1537               /* looking for 'Connection: close' */
 1538               } else if (c == 'c') {
 1539                 h_state = h_matching_connection_close;
 1540               } else if (c == 'u') {
 1541                 h_state = h_matching_connection_upgrade;
 1542               } else if (STRICT_TOKEN(c)) {
 1543                 h_state = h_matching_connection_token;
 1544               } else if (c == ' ' || c == '\t') {
 1545                 /* Skip lws */
 1546               } else {
 1547                 h_state = h_general;
 1548               }
 1549               break;
 1550 
 1551             /* looking for 'Connection: keep-alive' */
 1552             case h_matching_connection_keep_alive:
 1553               parser->index++;
 1554               if (parser->index > sizeof(KEEP_ALIVE)-1
 1555                   || c != KEEP_ALIVE[parser->index]) {
 1556                 h_state = h_matching_connection_token;
 1557               } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
 1558                 h_state = h_connection_keep_alive;
 1559               }
 1560               break;
 1561 
 1562             /* looking for 'Connection: close' */
 1563             case h_matching_connection_close:
 1564               parser->index++;
 1565               if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
 1566                 h_state = h_matching_connection_token;
 1567               } else if (parser->index == sizeof(CLOSE)-2) {
 1568                 h_state = h_connection_close;
 1569               }
 1570               break;
 1571 
 1572             /* looking for 'Connection: upgrade' */
 1573             case h_matching_connection_upgrade:
 1574               parser->index++;
 1575               if (parser->index > sizeof(UPGRADE) - 1 ||
 1576                   c != UPGRADE[parser->index]) {
 1577                 h_state = h_matching_connection_token;
 1578               } else if (parser->index == sizeof(UPGRADE)-2) {
 1579                 h_state = h_connection_upgrade;
 1580               }
 1581               break;
 1582 
 1583             case h_matching_connection_token:
 1584               if (ch == ',') {
 1585                 h_state = h_matching_connection_token_start;
 1586                 parser->index = 0;
 1587               }
 1588               break;
 1589 
 1590             case h_transfer_encoding_chunked:
 1591               if (ch != ' ') h_state = h_general;
 1592               break;
 1593 
 1594             case h_connection_keep_alive:
 1595             case h_connection_close:
 1596             case h_connection_upgrade:
 1597               if (ch == ',') {
 1598                 if (h_state == h_connection_keep_alive) {
 1599                   parser->flags |= F_CONNECTION_KEEP_ALIVE;
 1600                 } else if (h_state == h_connection_close) {
 1601                   parser->flags |= F_CONNECTION_CLOSE;
 1602                 } else if (h_state == h_connection_upgrade) {
 1603                   parser->flags |= F_CONNECTION_UPGRADE;
 1604                 }
 1605                 h_state = h_matching_connection_token_start;
 1606                 parser->index = 0;
 1607               } else if (ch != ' ') {
 1608                 h_state = h_matching_connection_token;
 1609               }
 1610               break;
 1611 
 1612             default:
 1613               UPDATE_STATE(s_header_value);
 1614               h_state = h_general;
 1615               break;
 1616           }
 1617         }
 1618         parser->header_state = h_state;
 1619 
 1620         COUNT_HEADER_SIZE(p - start);
 1621 
 1622         if (p == data + len)
 1623           --p;
 1624         break;
 1625       }
 1626 
 1627       case s_header_almost_done:
 1628       {
 1629         if (UNLIKELY(ch != LF)) {
 1630           SET_ERRNO(HPE_LF_EXPECTED);
 1631           goto error;
 1632         }
 1633 
 1634         UPDATE_STATE(s_header_value_lws);
 1635         break;
 1636       }
 1637 
 1638       case s_header_value_lws:
 1639       {
 1640         if (ch == ' ' || ch == '\t') {
 1641           UPDATE_STATE(s_header_value_start);
 1642           REEXECUTE();
 1643         }
 1644 
 1645         /* finished the header */
 1646         switch (parser->header_state) {
 1647           case h_connection_keep_alive:
 1648             parser->flags |= F_CONNECTION_KEEP_ALIVE;
 1649             break;
 1650           case h_connection_close:
 1651             parser->flags |= F_CONNECTION_CLOSE;
 1652             break;
 1653           case h_transfer_encoding_chunked:
 1654             parser->flags |= F_CHUNKED;
 1655             break;
 1656           case h_connection_upgrade:
 1657             parser->flags |= F_CONNECTION_UPGRADE;
 1658             break;
 1659           default:
 1660             break;
 1661         }
 1662 
 1663         UPDATE_STATE(s_header_field_start);
 1664         REEXECUTE();
 1665       }
 1666 
 1667       case s_header_value_discard_ws_almost_done:
 1668       {
 1669         STRICT_CHECK(ch != LF);
 1670         UPDATE_STATE(s_header_value_discard_lws);
 1671         break;
 1672       }
 1673 
 1674       case s_header_value_discard_lws:
 1675       {
 1676         if (ch == ' ' || ch == '\t') {
 1677           UPDATE_STATE(s_header_value_discard_ws);
 1678           break;
 1679         } else {
 1680           switch (parser->header_state) {
 1681             case h_connection_keep_alive:
 1682               parser->flags |= F_CONNECTION_KEEP_ALIVE;
 1683               break;
 1684             case h_connection_close:
 1685               parser->flags |= F_CONNECTION_CLOSE;
 1686               break;
 1687             case h_connection_upgrade:
 1688               parser->flags |= F_CONNECTION_UPGRADE;
 1689               break;
 1690             case h_transfer_encoding_chunked:
 1691               parser->flags |= F_CHUNKED;
 1692               break;
 1693             default:
 1694               break;
 1695           }
 1696 
 1697           /* header value was empty */
 1698           MARK(header_value);
 1699           UPDATE_STATE(s_header_field_start);
 1700           CALLBACK_DATA_NOADVANCE(header_value);
 1701           REEXECUTE();
 1702         }
 1703       }
 1704 
 1705       case s_headers_almost_done:
 1706       {
 1707         STRICT_CHECK(ch != LF);
 1708 
 1709         if (parser->flags & F_TRAILING) {
 1710           /* End of a chunked request */
 1711           UPDATE_STATE(s_message_done);
 1712           CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
 1713           REEXECUTE();
 1714         }
 1715 
 1716         /* Cannot use chunked encoding and a content-length header together
 1717            per the HTTP specification. */
 1718         if ((parser->flags & F_CHUNKED) &&
 1719             (parser->flags & F_CONTENTLENGTH)) {
 1720           SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
 1721           goto error;
 1722         }
 1723 
 1724         UPDATE_STATE(s_headers_done);
 1725 
 1726         /* Set this here so that on_headers_complete() callbacks can see it */
 1727         if ((parser->flags & F_UPGRADE) &&
 1728             (parser->flags & F_CONNECTION_UPGRADE)) {
 1729           /* For responses, "Upgrade: foo" and "Connection: upgrade" are
 1730            * mandatory only when it is a 101 Switching Protocols response,
 1731            * otherwise it is purely informational, to announce support.
 1732            */
 1733           parser->upgrade =
 1734               (parser->type == HTTP_REQUEST || parser->status_code == 101);
 1735         } else {
 1736           parser->upgrade = (parser->method == HTTP_CONNECT);
 1737         }
 1738 
 1739         /* Here we call the headers_complete callback. This is somewhat
 1740          * different than other callbacks because if the user returns 1, we
 1741          * will interpret that as saying that this message has no body. This
 1742          * is needed for the annoying case of recieving a response to a HEAD
 1743          * request.
 1744          *
 1745          * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
 1746          * we have to simulate it by handling a change in errno below.
 1747          */
 1748         if (settings->on_headers_complete) {
 1749           switch (settings->on_headers_complete(parser)) {
 1750             case 0:
 1751               break;
 1752 
 1753             case 2:
 1754               parser->upgrade = 1;
 1755 
 1756             /* FALLTHROUGH */
 1757             case 1:
 1758               parser->flags |= F_SKIPBODY;
 1759               break;
 1760 
 1761             default:
 1762               SET_ERRNO(HPE_CB_headers_complete);
 1763               RETURN(p - data); /* Error */
 1764           }
 1765         }
 1766 
 1767         if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
 1768           RETURN(p - data);
 1769         }
 1770 
 1771         REEXECUTE();
 1772       }
 1773 
 1774       case s_headers_done:
 1775       {
 1776         int hasBody;
 1777         STRICT_CHECK(ch != LF);
 1778 
 1779         parser->nread = 0;
 1780 
 1781         hasBody = parser->flags & F_CHUNKED ||
 1782           (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
 1783         if (parser->upgrade && (parser->method == HTTP_CONNECT ||
 1784                                 (parser->flags & F_SKIPBODY) || !hasBody)) {
 1785           /* Exit, the rest of the message is in a different protocol. */
 1786           UPDATE_STATE(NEW_MESSAGE());
 1787           CALLBACK_NOTIFY(message_complete);
 1788           RETURN((p - data) + 1);
 1789         }
 1790 
 1791         if (parser->flags & F_SKIPBODY) {
 1792           UPDATE_STATE(NEW_MESSAGE());
 1793           CALLBACK_NOTIFY(message_complete);
 1794         } else if (parser->flags & F_CHUNKED) {
 1795           /* chunked encoding - ignore Content-Length header */
 1796           UPDATE_STATE(s_chunk_size_start);
 1797         } else {
 1798           if (parser->content_length == 0) {
 1799             /* Content-Length header given but zero: Content-Length: 0\r\n */
 1800             UPDATE_STATE(NEW_MESSAGE());
 1801             CALLBACK_NOTIFY(message_complete);
 1802           } else if (parser->content_length != ULLONG_MAX) {
 1803             /* Content-Length header given and non-zero */
 1804             UPDATE_STATE(s_body_identity);
 1805           } else {
 1806             if (!http_message_needs_eof(parser)) {
 1807               /* Assume content-length 0 - read the next */
 1808               UPDATE_STATE(NEW_MESSAGE());
 1809               CALLBACK_NOTIFY(message_complete);
 1810             } else {
 1811               /* Read body until EOF */
 1812               UPDATE_STATE(s_body_identity_eof);
 1813             }
 1814           }
 1815         }
 1816 
 1817         break;
 1818       }
 1819 
 1820       case s_body_identity:
 1821       {
 1822         uint64_t to_read = MIN(parser->content_length,
 1823                                (uint64_t) ((data + len) - p));
 1824 
 1825         assert(parser->content_length != 0
 1826             && parser->content_length != ULLONG_MAX);
 1827 
 1828         /* The difference between advancing content_length and p is because
 1829          * the latter will automaticaly advance on the next loop iteration.
 1830          * Further, if content_length ends up at 0, we want to see the last
 1831          * byte again for our message complete callback.
 1832          */
 1833         MARK(body);
 1834         parser->content_length -= to_read;
 1835         p += to_read - 1;
 1836 
 1837         if (parser->content_length == 0) {
 1838           UPDATE_STATE(s_message_done);
 1839 
 1840           /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
 1841            *
 1842            * The alternative to doing this is to wait for the next byte to
 1843            * trigger the data callback, just as in every other case. The
 1844            * problem with this is that this makes it difficult for the test
 1845            * harness to distinguish between complete-on-EOF and
 1846            * complete-on-length. It's not clear that this distinction is
 1847            * important for applications, but let's keep it for now.
 1848            */
 1849           CALLBACK_DATA_(body, p - body_mark + 1, p - data);
 1850           REEXECUTE();
 1851         }
 1852 
 1853         break;
 1854       }
 1855 
 1856       /* read until EOF */
 1857       case s_body_identity_eof:
 1858         MARK(body);
 1859         p = data + len - 1;
 1860 
 1861         break;
 1862 
 1863       case s_message_done:
 1864         UPDATE_STATE(NEW_MESSAGE());
 1865         CALLBACK_NOTIFY(message_complete);
 1866         if (parser->upgrade) {
 1867           /* Exit, the rest of the message is in a different protocol. */
 1868           RETURN((p - data) + 1);
 1869         }
 1870         break;
 1871 
 1872       case s_chunk_size_start:
 1873       {
 1874         assert(parser->nread == 1);
 1875         assert(parser->flags & F_CHUNKED);
 1876 
 1877         unhex_val = unhex[(unsigned char)ch];
 1878         if (UNLIKELY(unhex_val == -1)) {
 1879           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
 1880           goto error;
 1881         }
 1882 
 1883         parser->content_length = unhex_val;
 1884         UPDATE_STATE(s_chunk_size);
 1885         break;
 1886       }
 1887 
 1888       case s_chunk_size:
 1889       {
 1890         uint64_t t;
 1891 
 1892         assert(parser->flags & F_CHUNKED);
 1893 
 1894         if (ch == CR) {
 1895           UPDATE_STATE(s_chunk_size_almost_done);
 1896           break;
 1897         }
 1898 
 1899         unhex_val = unhex[(unsigned char)ch];
 1900 
 1901         if (unhex_val == -1) {
 1902           if (ch == ';' || ch == ' ') {
 1903             UPDATE_STATE(s_chunk_parameters);
 1904             break;
 1905           }
 1906 
 1907           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
 1908           goto error;
 1909         }
 1910 
 1911         t = parser->content_length;
 1912         t *= 16;
 1913         t += unhex_val;
 1914 
 1915         /* Overflow? Test against a conservative limit for simplicity. */
 1916         if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
 1917           SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
 1918           goto error;
 1919         }
 1920 
 1921         parser->content_length = t;
 1922         break;
 1923       }
 1924 
 1925       case s_chunk_parameters:
 1926       {
 1927         assert(parser->flags & F_CHUNKED);
 1928         /* just ignore this shit. TODO check for overflow */
 1929         if (ch == CR) {
 1930           UPDATE_STATE(s_chunk_size_almost_done);
 1931           break;
 1932         }
 1933         break;
 1934       }
 1935 
 1936       case s_chunk_size_almost_done:
 1937       {
 1938         assert(parser->flags & F_CHUNKED);
 1939         STRICT_CHECK(ch != LF);
 1940 
 1941         parser->nread = 0;
 1942 
 1943         if (parser->content_length == 0) {
 1944           parser->flags |= F_TRAILING;
 1945           UPDATE_STATE(s_header_field_start);
 1946         } else {
 1947           UPDATE_STATE(s_chunk_data);
 1948         }
 1949         CALLBACK_NOTIFY(chunk_header);
 1950         break;
 1951       }
 1952 
 1953       case s_chunk_data:
 1954       {
 1955         uint64_t to_read = MIN(parser->content_length,
 1956                                (uint64_t) ((data + len) - p));
 1957 
 1958         assert(parser->flags & F_CHUNKED);
 1959         assert(parser->content_length != 0
 1960             && parser->content_length != ULLONG_MAX);
 1961 
 1962         /* See the explanation in s_body_identity for why the content
 1963          * length and data pointers are managed this way.
 1964          */
 1965         MARK(body);
 1966         parser->content_length -= to_read;
 1967         p += to_read - 1;
 1968 
 1969         if (parser->content_length == 0) {
 1970           UPDATE_STATE(s_chunk_data_almost_done);
 1971         }
 1972 
 1973         break;
 1974       }
 1975 
 1976       case s_chunk_data_almost_done:
 1977         assert(parser->flags & F_CHUNKED);
 1978         assert(parser->content_length == 0);
 1979         STRICT_CHECK(ch != CR);
 1980         UPDATE_STATE(s_chunk_data_done);
 1981         CALLBACK_DATA(body);
 1982         break;
 1983 
 1984       case s_chunk_data_done:
 1985         assert(parser->flags & F_CHUNKED);
 1986         STRICT_CHECK(ch != LF);
 1987         parser->nread = 0;
 1988         UPDATE_STATE(s_chunk_size_start);
 1989         CALLBACK_NOTIFY(chunk_complete);
 1990         break;
 1991 
 1992       default:
 1993         assert(0 && "unhandled state");
 1994         SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
 1995         goto error;
 1996     }
 1997   }
 1998 
 1999   /* Run callbacks for any marks that we have leftover after we ran our of
 2000    * bytes. There should be at most one of these set, so it's OK to invoke
 2001    * them in series (unset marks will not result in callbacks).
 2002    *
 2003    * We use the NOADVANCE() variety of callbacks here because 'p' has already
 2004    * overflowed 'data' and this allows us to correct for the off-by-one that
 2005    * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
 2006    * value that's in-bounds).
 2007    */
 2008 
 2009   assert(((header_field_mark ? 1 : 0) +
 2010           (header_value_mark ? 1 : 0) +
 2011           (url_mark ? 1 : 0)  +
 2012           (body_mark ? 1 : 0) +
 2013           (status_mark ? 1 : 0)) <= 1);
 2014 
 2015   CALLBACK_DATA_NOADVANCE(header_field);
 2016   CALLBACK_DATA_NOADVANCE(header_value);
 2017   CALLBACK_DATA_NOADVANCE(url);
 2018   CALLBACK_DATA_NOADVANCE(body);
 2019   CALLBACK_DATA_NOADVANCE(status);
 2020 
 2021   RETURN(len);
 2022 
 2023 error:
 2024   if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
 2025     SET_ERRNO(HPE_UNKNOWN);
 2026   }
 2027 
 2028   RETURN(p - data);
 2029 }
 2030 
 2031 
 2032 /* Does the parser need to see an EOF to find the end of the message? */
 2033 int
 2034 http_message_needs_eof (const http_parser *parser)
 2035 {
 2036   if (parser->type == HTTP_REQUEST) {
 2037     return 0;
 2038   }
 2039 
 2040   /* See RFC 2616 section 4.4 */
 2041   if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
 2042       parser->status_code == 204 ||     /* No Content */
 2043       parser->status_code == 304 ||     /* Not Modified */
 2044       parser->flags & F_SKIPBODY) {     /* response to a HEAD request */
 2045     return 0;
 2046   }
 2047 
 2048   if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
 2049     return 0;
 2050   }
 2051 
 2052   return 1;
 2053 }
 2054 
 2055 
 2056 int
 2057 http_should_keep_alive (const http_parser *parser)
 2058 {
 2059   if (parser->http_major > 0 && parser->http_minor > 0) {
 2060     /* HTTP/1.1 */
 2061     if (parser->flags & F_CONNECTION_CLOSE) {
 2062       return 0;
 2063     }
 2064   } else {
 2065     /* HTTP/1.0 or earlier */
 2066     if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
 2067       return 0;
 2068     }
 2069   }
 2070 
 2071   return !http_message_needs_eof(parser);
 2072 }
 2073 
 2074 
 2075 const char *
 2076 http_method_str (enum http_method m)
 2077 {
 2078   return ELEM_AT(method_strings, m, "<unknown>");
 2079 }
 2080 
 2081 
 2082 void
 2083 http_parser_init (http_parser *parser, enum http_parser_type t)
 2084 {
 2085   void *data = parser->data; /* preserve application data */
 2086   memset(parser, 0, sizeof(*parser));
 2087   parser->data = data;
 2088   parser->type = t;
 2089   parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
 2090   parser->http_errno = HPE_OK;
 2091 }
 2092 
 2093 void
 2094 http_parser_settings_init(http_parser_settings *settings)
 2095 {
 2096   memset(settings, 0, sizeof(*settings));
 2097 }
 2098 
 2099 const char *
 2100 http_errno_name(enum http_errno err) {
 2101   assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
 2102   return http_strerror_tab[err].name;
 2103 }
 2104 
 2105 const char *
 2106 http_errno_description(enum http_errno err) {
 2107   assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
 2108   return http_strerror_tab[err].description;
 2109 }
 2110 
 2111 static enum http_host_state
 2112 http_parse_host_char(enum http_host_state s, const char ch) {
 2113   switch(s) {
 2114     case s_http_userinfo:
 2115     case s_http_userinfo_start:
 2116       if (ch == '@') {
 2117         return s_http_host_start;
 2118       }
 2119 
 2120       if (IS_USERINFO_CHAR(ch)) {
 2121         return s_http_userinfo;
 2122       }
 2123       break;
 2124 
 2125     case s_http_host_start:
 2126       if (ch == '[') {
 2127         return s_http_host_v6_start;
 2128       }
 2129 
 2130       if (IS_HOST_CHAR(ch)) {
 2131         return s_http_host;
 2132       }
 2133 
 2134       break;
 2135 
 2136     case s_http_host:
 2137       if (IS_HOST_CHAR(ch)) {
 2138         return s_http_host;
 2139       }
 2140 
 2141     /* FALLTHROUGH */
 2142     case s_http_host_v6_end:
 2143       if (ch == ':') {
 2144         return s_http_host_port_start;
 2145       }
 2146 
 2147       break;
 2148 
 2149     case s_http_host_v6:
 2150       if (ch == ']') {
 2151         return s_http_host_v6_end;
 2152       }
 2153 
 2154     /* FALLTHROUGH */
 2155     case s_http_host_v6_start:
 2156       if (IS_HEX(ch) || ch == ':' || ch == '.') {
 2157         return s_http_host_v6;
 2158       }
 2159 
 2160       if (s == s_http_host_v6 && ch == '%') {
 2161         return s_http_host_v6_zone_start;
 2162       }
 2163       break;
 2164 
 2165     case s_http_host_v6_zone:
 2166       if (ch == ']') {
 2167         return s_http_host_v6_end;
 2168       }
 2169 
 2170     /* FALLTHROUGH */
 2171     case s_http_host_v6_zone_start:
 2172       /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
 2173       if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
 2174           ch == '~') {
 2175         return s_http_host_v6_zone;
 2176       }
 2177       break;
 2178 
 2179     case s_http_host_port:
 2180     case s_http_host_port_start:
 2181       if (IS_NUM(ch)) {
 2182         return s_http_host_port;
 2183       }
 2184 
 2185       break;
 2186 
 2187     default:
 2188       break;
 2189   }
 2190   return s_http_host_dead;
 2191 }
 2192 
 2193 static int
 2194 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
 2195   enum http_host_state s;
 2196 
 2197   const char *p;
 2198   size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
 2199 
 2200   assert(u->field_set & (1 << UF_HOST));
 2201 
 2202   u->field_data[UF_HOST].len = 0;
 2203 
 2204   s = found_at ? s_http_userinfo_start : s_http_host_start;
 2205 
 2206   for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
 2207     enum http_host_state new_s = http_parse_host_char(s, *p);
 2208 
 2209     if (new_s == s_http_host_dead) {
 2210       return 1;
 2211     }
 2212 
 2213     switch(new_s) {
 2214       case s_http_host:
 2215         if (s != s_http_host) {
 2216           u->field_data[UF_HOST].off = p - buf;
 2217         }
 2218         u->field_data[UF_HOST].len++;
 2219         break;
 2220 
 2221       case s_http_host_v6:
 2222         if (s != s_http_host_v6) {
 2223           u->field_data[UF_HOST].off = p - buf;
 2224         }
 2225         u->field_data[UF_HOST].len++;
 2226         break;
 2227 
 2228       case s_http_host_v6_zone_start:
 2229       case s_http_host_v6_zone:
 2230         u->field_data[UF_HOST].len++;
 2231         break;
 2232 
 2233       case s_http_host_port:
 2234         if (s != s_http_host_port) {
 2235           u->field_data[UF_PORT].off = p - buf;
 2236           u->field_data[UF_PORT].len = 0;
 2237           u->field_set |= (1 << UF_PORT);
 2238         }
 2239         u->field_data[UF_PORT].len++;
 2240         break;
 2241 
 2242       case s_http_userinfo:
 2243         if (s != s_http_userinfo) {
 2244           u->field_data[UF_USERINFO].off = p - buf ;
 2245           u->field_data[UF_USERINFO].len = 0;
 2246           u->field_set |= (1 << UF_USERINFO);
 2247         }
 2248         u->field_data[UF_USERINFO].len++;
 2249         break;
 2250 
 2251       default:
 2252         break;
 2253     }
 2254     s = new_s;
 2255   }
 2256 
 2257   /* Make sure we don't end somewhere unexpected */
 2258   switch (s) {
 2259     case s_http_host_start:
 2260     case s_http_host_v6_start:
 2261     case s_http_host_v6:
 2262     case s_http_host_v6_zone_start:
 2263     case s_http_host_v6_zone:
 2264     case s_http_host_port_start:
 2265     case s_http_userinfo:
 2266     case s_http_userinfo_start:
 2267       return 1;
 2268     default:
 2269       break;
 2270   }
 2271 
 2272   return 0;
 2273 }
 2274 
 2275 void
 2276 http_parser_url_init(struct http_parser_url *u) {
 2277   memset(u, 0, sizeof(*u));
 2278 }
 2279 
 2280 int
 2281 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
 2282                       struct http_parser_url *u)
 2283 {
 2284   enum state s;
 2285   const char *p;
 2286   enum http_parser_url_fields uf, old_uf;
 2287   int found_at = 0;
 2288 
 2289   u->port = u->field_set = 0;
 2290   s = is_connect ? s_req_server_start : s_req_spaces_before_url;
 2291   old_uf = UF_MAX;
 2292 
 2293   for (p = buf; p < buf + buflen; p++) {
 2294     s = parse_url_char(s, *p);
 2295 
 2296     /* Figure out the next field that we're operating on */
 2297     switch (s) {
 2298       case s_dead:
 2299         return 1;
 2300 
 2301       /* Skip delimeters */
 2302       case s_req_schema_slash:
 2303       case s_req_schema_slash_slash:
 2304       case s_req_server_start:
 2305       case s_req_query_string_start:
 2306       case s_req_fragment_start:
 2307         continue;
 2308 
 2309       case s_req_schema:
 2310         uf = UF_SCHEMA;
 2311         break;
 2312 
 2313       case s_req_server_with_at:
 2314         found_at = 1;
 2315 
 2316       /* FALLTHROUGH */
 2317       case s_req_server:
 2318         uf = UF_HOST;
 2319         break;
 2320 
 2321       case s_req_path:
 2322         uf = UF_PATH;
 2323         break;
 2324 
 2325       case s_req_query_string:
 2326         uf = UF_QUERY;
 2327         break;
 2328 
 2329       case s_req_fragment:
 2330         uf = UF_FRAGMENT;
 2331         break;
 2332 
 2333       default:
 2334         assert(!"Unexpected state");
 2335         return 1;
 2336     }
 2337 
 2338     /* Nothing's changed; soldier on */
 2339     if (uf == old_uf) {
 2340       u->field_data[uf].len++;
 2341       continue;
 2342     }
 2343 
 2344     u->field_data[uf].off = p - buf;
 2345     u->field_data[uf].len = 1;
 2346 
 2347     u->field_set |= (1 << uf);
 2348     old_uf = uf;
 2349   }
 2350 
 2351   /* host must be present if there is a schema */
 2352   /* parsing http:///toto will fail */
 2353   if ((u->field_set & (1 << UF_SCHEMA)) &&
 2354       (u->field_set & (1 << UF_HOST)) == 0) {
 2355     return 1;
 2356   }
 2357 
 2358   if (u->field_set & (1 << UF_HOST)) {
 2359     if (http_parse_host(buf, u, found_at) != 0) {
 2360       return 1;
 2361     }
 2362   }
 2363 
 2364   /* CONNECT requests can only contain "hostname:port" */
 2365   if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
 2366     return 1;
 2367   }
 2368 
 2369   if (u->field_set & (1 << UF_PORT)) {
 2370     /* Don't bother with endp; we've already validated the string */
 2371     unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
 2372 
 2373     /* Ports have a max value of 2^16 */
 2374     if (v > 0xffff) {
 2375       return 1;
 2376     }
 2377 
 2378     u->port = (uint16_t) v;
 2379   }
 2380 
 2381   return 0;
 2382 }
 2383 
 2384 void
 2385 http_parser_pause(http_parser *parser, int paused) {
 2386   /* Users should only be pausing/unpausing a parser that is not in an error
 2387    * state. In non-debug builds, there's not much that we can do about this
 2388    * other than ignore it.
 2389    */
 2390   if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
 2391       HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
 2392     SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
 2393   } else {
 2394     assert(0 && "Attempting to pause parser in error state");
 2395   }
 2396 }
 2397 
 2398 int
 2399 http_body_is_final(const struct http_parser *parser) {
 2400     return parser->state == s_message_done;
 2401 }
 2402 
 2403 unsigned long
 2404 http_parser_version(void) {
 2405   return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
 2406          HTTP_PARSER_VERSION_MINOR * 0x00100 |
 2407          HTTP_PARSER_VERSION_PATCH * 0x00001;
 2408 }