diff options
Diffstat (limited to 'uri.c')
-rw-r--r-- | uri.c | 50 |
1 files changed, 48 insertions, 2 deletions
@@ -93,6 +93,8 @@ struct parser { const char *err; }; +#define CONT_BYTE(b) ((b & 0xC0) == 0x80) + /* XXX: these macros will expand multiple times their argument */ #define UNRESERVED(p) \ @@ -115,6 +117,48 @@ struct parser { || p == ';' \ || p == '=') +/* NOTE: the increment are one less what it should be, because the + * caller will add one byte after we return. */ +static int +valid_multibyte_utf8(struct parser *p) +{ + uint32_t c; + uint8_t s; + + c = 0; + s = *p->uri; + + if ((s & 0xE0) == 0xC0) { + if (!CONT_BYTE(*(p->uri+1))) + return 0; + c = ((s & 0x1F) << 6) | (*(p->uri+1) & 0x3F); + p->uri += 1; + } else if ((s & 0xF0) == 0xE0) { + if (!CONT_BYTE(*(p->uri+1)) || + !CONT_BYTE(*(p->uri+2))) + return 0; + c = (s & 0x0F) << 12 + | ((*(p->uri+1) & 0x3F) << 6) + | ((*(p->uri+2) & 0x3F)); + p->uri += 2; + } else if ((s & 0xF8) == 0xF0) { + if (!CONT_BYTE(*(p->uri+1)) || + !CONT_BYTE(*(p->uri+2)) || + !CONT_BYTE(*(p->uri+3))) + return 0; + c = (s & 0x07) << 18 + | ((*(p->uri+1) & 0x3F) << 12) + | ((*(p->uri+2) & 0x3F) << 6) + | ((*(p->uri+3) & 0x3F)); + p->uri += 3; + } else + return 0; + + return (((0x080 <= c) && (c <= 0x7FF)) + || (((0x800 <= c) && (c <= 0xFFFF))) + || (((0x10000 <= c) && (c <= 0x10FFFF)))); +} + static int parse_pct_encoded(struct parser *p) { @@ -308,7 +352,8 @@ parse_query(struct parser *p) || SUB_DELIMITERS(*p->uri) || *p->uri == '/' || *p->uri == '?' - || parse_pct_encoded(p)) + || parse_pct_encoded(p) + || valid_multibyte_utf8(p)) p->uri++; if (*p->uri != '\0' && *p->uri != '#') { @@ -348,7 +393,8 @@ parse_path(struct parser *p) while (UNRESERVED(*p->uri) || SUB_DELIMITERS(*p->uri) || *p->uri == '/' - || parse_pct_encoded(p)) + || parse_pct_encoded(p) + || valid_multibyte_utf8(p)) p->uri++; if (*p->uri != '\0' && *p->uri != '?' && *p->uri != '#') { |