aboutsummaryrefslogtreecommitdiff
path: root/uri.c
diff options
context:
space:
mode:
Diffstat (limited to 'uri.c')
-rw-r--r--uri.c50
1 files changed, 48 insertions, 2 deletions
diff --git a/uri.c b/uri.c
index 245928a..3f81b76 100644
--- a/uri.c
+++ b/uri.c
@@ -93,6 +93,8 @@ struct parser {
const char *err;
};
+#define CONT_BYTE(b) ((b & 0xC0) == 0x80)
+
/* XXX: these macros will expand multiple times their argument */
#define UNRESERVED(p) \
@@ -115,6 +117,48 @@ struct parser {
|| p == ';' \
|| p == '=')
+/* NOTE: the increment are one less what it should be, because the
+ * caller will add one byte after we return. */
+static int
+valid_multibyte_utf8(struct parser *p)
+{
+ uint32_t c;
+ uint8_t s;
+
+ c = 0;
+ s = *p->uri;
+
+ if ((s & 0xE0) == 0xC0) {
+ if (!CONT_BYTE(*(p->uri+1)))
+ return 0;
+ c = ((s & 0x1F) << 6) | (*(p->uri+1) & 0x3F);
+ p->uri += 1;
+ } else if ((s & 0xF0) == 0xE0) {
+ if (!CONT_BYTE(*(p->uri+1)) ||
+ !CONT_BYTE(*(p->uri+2)))
+ return 0;
+ c = (s & 0x0F) << 12
+ | ((*(p->uri+1) & 0x3F) << 6)
+ | ((*(p->uri+2) & 0x3F));
+ p->uri += 2;
+ } else if ((s & 0xF8) == 0xF0) {
+ if (!CONT_BYTE(*(p->uri+1)) ||
+ !CONT_BYTE(*(p->uri+2)) ||
+ !CONT_BYTE(*(p->uri+3)))
+ return 0;
+ c = (s & 0x07) << 18
+ | ((*(p->uri+1) & 0x3F) << 12)
+ | ((*(p->uri+2) & 0x3F) << 6)
+ | ((*(p->uri+3) & 0x3F));
+ p->uri += 3;
+ } else
+ return 0;
+
+ return (((0x080 <= c) && (c <= 0x7FF))
+ || (((0x800 <= c) && (c <= 0xFFFF)))
+ || (((0x10000 <= c) && (c <= 0x10FFFF))));
+}
+
static int
parse_pct_encoded(struct parser *p)
{
@@ -308,7 +352,8 @@ parse_query(struct parser *p)
|| SUB_DELIMITERS(*p->uri)
|| *p->uri == '/'
|| *p->uri == '?'
- || parse_pct_encoded(p))
+ || parse_pct_encoded(p)
+ || valid_multibyte_utf8(p))
p->uri++;
if (*p->uri != '\0' && *p->uri != '#') {
@@ -348,7 +393,8 @@ parse_path(struct parser *p)
while (UNRESERVED(*p->uri)
|| SUB_DELIMITERS(*p->uri)
|| *p->uri == '/'
- || parse_pct_encoded(p))
+ || parse_pct_encoded(p)
+ || valid_multibyte_utf8(p))
p->uri++;
if (*p->uri != '\0' && *p->uri != '?' && *p->uri != '#') {