diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Makefile | 18 | ||||
-rw-r--r-- | gmid.c | 12 | ||||
-rw-r--r-- | gmid.h | 12 | ||||
-rw-r--r-- | iri.c (renamed from uri.c) | 196 | ||||
-rw-r--r-- | iri_test.c (renamed from uri_test.c) | 78 | ||||
-rw-r--r-- | utf8.c | 4 |
7 files changed, 127 insertions, 195 deletions
@@ -2,6 +2,6 @@ cert.pem key.pem TAGS gmid -uri_test +iri_test *.o docs @@ -6,17 +6,17 @@ LDFLAGS = -ltls all: gmid TAGS README.md -gmid: gmid.o uri.o utf8.o - ${CC} gmid.o uri.o utf8.o -o gmid ${LDFLAGS} +gmid: gmid.o iri.o utf8.o + ${CC} gmid.o iri.o utf8.o -o gmid ${LDFLAGS} -TAGS: gmid.c uri.c utf8.c - -etags gmid.c uri.c utf8.c || true +TAGS: gmid.c iri.c utf8.c + -etags gmid.c iri.c utf8.c || true clean: - rm -f *.o gmid + rm -f *.o gmid iri_test -uri_test: uri_test.o uri.o utf8.o - ${CC} uri_test.o uri.o utf8.o -o uri_test ${LDFLAGS} +iri_test: iri_test.o iri.o utf8.o + ${CC} iri_test.o iri.o utf8.o -o iri_test ${LDFLAGS} -test: uri_test - ./uri_test +test: iri_test + ./iri_test @@ -572,7 +572,7 @@ handle(struct pollfd *fds, struct client *client) { char buf[GEMINI_URL_LEN]; const char *parse_err; - struct uri uri; + struct iri iri; switch (client->state) { case S_OPEN: @@ -593,7 +593,7 @@ handle(struct pollfd *fds, struct client *client) } parse_err = "invalid request"; - if (!trim_req_uri(buf) || !parse_uri(buf, &uri, &parse_err)) { + if (!trim_req_iri(buf) || !parse_iri(buf, &iri, &parse_err)) { if (!start_reply(fds, client, BAD_REQUEST, parse_err)) return; goodbye(fds, client); @@ -601,11 +601,11 @@ handle(struct pollfd *fds, struct client *client) } LOGI(client, "GET %s%s%s", - *uri.path ? uri.path : "/", - *uri.query ? "?" : "", - *uri.query ? uri.query : ""); + *iri.path ? iri.path : "/", + *iri.query ? "?" : "", + *iri.query ? iri.query : ""); - send_file(uri.path, uri.query, fds, client); + send_file(iri.path, iri.query, fds, client); break; case S_INITIALIZING: @@ -70,7 +70,7 @@ struct client { struct sockaddr_storage addr; }; -struct uri { +struct iri { char *schema; char *host; char *port; @@ -81,8 +81,8 @@ struct uri { }; struct parser { - char *uri; - struct uri *parsed; + char *iri; + struct iri *parsed; const char *err; }; @@ -123,8 +123,8 @@ void usage(const char*); /* utf8.c */ int valid_multibyte_utf8(struct parser*); -/* uri.c */ -int parse_uri(char*, struct uri*, const char**); -int trim_req_uri(char*); +/* iri.c */ +int parse_iri(char*, struct iri*, const char**); +int trim_req_iri(char*); #endif @@ -19,74 +19,6 @@ #include "gmid.h" -/* - * Notes from RFC3986 - * - * => gemini://tanso.net/rfc/rfc3986.txt - * - * - * ABNF - * ==== - * - * pct-encoded "%" HEXDIG HEXDIG - * - * reserved = gen-delims / sub-delimis - * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" - * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" - * / "*" / "+" / "," / ";" / "=" - * - * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - * - * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] - * - * hier-part = "//" authority path-abempty - * / path-absolute - * / path-rootless - * / path-empty - * - * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) - * - * authority = [ userinfo "@" ] host [ ":" port ] - * - * (note that userinfo isn't used for Gemini URL) - * - * host = IP-literal / IPv4address / reg-name - * reg-name = *( unreserved / pct-encoded / sub-delims ) - * - * port = *DIGIT - * - * path = path-abemty ; begins with "/" or is empty - * / path-absolute ; begins with "/" but not "//" - * / path-noscheme ; begins with a non-colon segment - * / path-rootless ; begins with a segment - * / path-empty ; zero characters - * - * path-abemty = *( "/" segment ) - * path-absolute = "/" [ segment-nz *( "/" segment ) ] - * path-noscheme = ; not used - * path-rootless = ; not used - * path-empty = ; not used - * - * segment = *pchar - * segment-nz = 1*pchar - * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) - * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" - * - * query = *( pchar / "/" / "?" ) - * - * fragment = *( pchar / "/" / "?" ) - * - * - * EXAMPLE - * ======= - * - * foo://example.com:8042/over/there?name=ferret#nose - * \_/ \______________/\_________/ \_________/ \__/ - * | | | | | - * scheme authority path query fragment - * - */ - static inline int unreserved(int p) { @@ -116,17 +48,17 @@ sub_delimiters(int p) static int parse_pct_encoded(struct parser *p) { - if (*p->uri != '%') + if (*p->iri != '%') return 0; - if (!isxdigit(*(p->uri+1)) || !isxdigit(*(p->uri+2))) { + if (!isxdigit(*(p->iri+1)) || !isxdigit(*(p->iri+2))) { p->err = "illegal percent-encoding"; return 0; } - sscanf(p->uri+1, "%2hhx", p->uri); - memmove(p->uri+1, p->uri+3, strlen(p->uri+3)+1); - if (*p->uri == '\0') { + sscanf(p->iri+1, "%2hhx", p->iri); + memmove(p->iri+1, p->iri+3, strlen(p->iri+3)+1); + if (*p->iri == '\0') { p->err = "illegal percent-encoding"; return 0; } @@ -138,32 +70,32 @@ parse_pct_encoded(struct parser *p) static int parse_scheme(struct parser *p) { - p->parsed->schema = p->uri; + p->parsed->schema = p->iri; - if (!isalpha(*p->uri)) { + if (!isalpha(*p->iri)) { p->err = "illegal character in scheme"; return 0; } - p->uri++; - while (isalnum(*p->uri) - || *p->uri == '+' - || *p->uri == '-' - || *p->uri == '.') - p->uri++; + p->iri++; + while (isalnum(*p->iri) + || *p->iri == '+' + || *p->iri == '-' + || *p->iri == '.') + p->iri++; - if (*p->uri != ':') { + if (*p->iri != ':') { p->err = "illegal character in scheme"; return 0; } - *p->uri = '\0'; - if (*(++p->uri) != '/' || *(++p->uri) != '/') { + *p->iri = '\0'; + if (*(++p->iri) != '/' || *(++p->iri) != '/') { p->err = "invalid marker after scheme"; return 0; } - p->uri++; + p->iri++; return 1; } @@ -173,26 +105,26 @@ parse_port(struct parser *p) { uint32_t i = 0; - p->parsed->port = p->uri; + p->parsed->port = p->iri; - for (; isdigit(*p->uri); p->uri++) { - i = i * 10 + *p->uri - '0'; + for (; isdigit(*p->iri); p->iri++) { + i = i * 10 + *p->iri - '0'; if (i > UINT16_MAX) { p->err = "port number too large"; return 0; } } - if (*p->uri != '/' && *p->uri != '\0') { + if (*p->iri != '/' && *p->iri != '\0') { p->err = "illegal character in port number"; return 0; } p->parsed->port_no = i; - if (*p->uri != '\0') { - *p->uri = '\0'; - p->uri++; + if (*p->iri != '\0') { + *p->iri = '\0'; + p->iri++; } return 1; @@ -203,29 +135,29 @@ parse_port(struct parser *p) static int parse_authority(struct parser *p) { - p->parsed->host = p->uri; + p->parsed->host = p->iri; - while (unreserved(*p->uri) - || sub_delimiters(*p->uri) + while (unreserved(*p->iri) + || sub_delimiters(*p->iri) || parse_pct_encoded(p)) - p->uri++; + p->iri++; if (p->err != NULL) return 0; - if (*p->uri == ':') { - *p->uri = '\0'; - p->uri++; + if (*p->iri == ':') { + *p->iri = '\0'; + p->iri++; return parse_port(p); } - if (*p->uri == '/') { - *p->uri = '\0'; - p->uri++; + if (*p->iri == '/') { + *p->iri = '\0'; + p->iri++; return 1; } - if (*p->uri == '\0') + if (*p->iri == '\0') return 1; p->err = "illegal character in authority section"; @@ -305,29 +237,29 @@ path_clean(char *path) static int parse_query(struct parser *p) { - p->parsed->query = p->uri; - if (*p->uri == '\0') + p->parsed->query = p->iri; + if (*p->iri == '\0') return 1; - while (unreserved(*p->uri) - || sub_delimiters(*p->uri) - || *p->uri == '/' - || *p->uri == '?' + while (unreserved(*p->iri) + || sub_delimiters(*p->iri) + || *p->iri == '/' + || *p->iri == '?' || parse_pct_encoded(p) || valid_multibyte_utf8(p)) - p->uri++; + p->iri++; if (p->err != NULL) return 0; - if (*p->uri != '\0' && *p->uri != '#') { + if (*p->iri != '\0' && *p->iri != '#') { p->err = "illegal character in query"; return 0; } - if (*p->uri != '\0') { - *p->uri = '\0'; - p->uri++; + if (*p->iri != '\0') { + *p->iri = '\0'; + p->iri++; } return 1; @@ -337,7 +269,7 @@ parse_query(struct parser *p) static int parse_fragment(struct parser *p) { - p->parsed->fragment = p->uri; + p->parsed->fragment = p->iri; return 1; } @@ -348,31 +280,31 @@ parse_path(struct parser *p) { char c; - p->parsed->path = p->uri; - if (*p->uri == '\0') { - p->parsed->query = p->parsed->fragment = p->uri; + p->parsed->path = p->iri; + if (*p->iri == '\0') { + p->parsed->query = p->parsed->fragment = p->iri; return 1; } - while (unreserved(*p->uri) - || sub_delimiters(*p->uri) - || *p->uri == '/' + while (unreserved(*p->iri) + || sub_delimiters(*p->iri) + || *p->iri == '/' || parse_pct_encoded(p) || valid_multibyte_utf8(p)) - p->uri++; + p->iri++; if (p->err != NULL) return 0; - if (*p->uri != '\0' && *p->uri != '?' && *p->uri != '#') { + if (*p->iri != '\0' && *p->iri != '?' && *p->iri != '#') { p->err = "illegal character in path"; return 0; } - if (*p->uri != '\0') { - c = *p->uri; - *p->uri = '\0'; - p->uri++; + if (*p->iri != '\0') { + c = *p->iri; + *p->iri = '\0'; + p->iri++; if (c == '#') { if (!parse_fragment(p)) @@ -391,15 +323,15 @@ parse_path(struct parser *p) } int -parse_uri(char *uri, struct uri *ret, const char **err_ret) +parse_iri(char *iri, struct iri *ret, const char **err_ret) { char *end; - struct parser p = {uri, ret, NULL}; + struct parser p = {iri, ret, NULL}; bzero(ret, sizeof(*ret)); /* initialize optional stuff to the empty string */ - end = uri + strlen(uri); + end = iri + strlen(iri); p.parsed->port = end; p.parsed->path = end; p.parsed->query = end; @@ -415,11 +347,11 @@ parse_uri(char *uri, struct uri *ret, const char **err_ret) } int -trim_req_uri(char *uri) +trim_req_iri(char *iri) { char *i; - if ((i = strstr(uri, "\r\n")) == NULL) + if ((i = strstr(iri, "\r\n")) == NULL) return 0; *i = '\0'; return 1; @@ -20,15 +20,15 @@ #include "gmid.h" -#define TEST(uri, fail, exp, descr) \ - if (!run_test(uri, fail, exp)) { \ +#define TEST(iri, fail, exp, descr) \ + if (!run_test(iri, fail, exp)) { \ fprintf(stderr, "%s:%d: error: %s\n", \ __FILE__, __LINE__, descr); \ exit(1); \ } -#define URI(schema, host, port, path, query, frag) \ - ((struct uri){schema, host, port, 0, path, query, frag}) +#define IRI(schema, host, port, path, query, frag) \ + ((struct iri){schema, host, port, 0, path, query, frag}) #define DIFF(wanted, got, field) \ if (wanted->field == NULL || got->field == NULL || \ @@ -42,7 +42,7 @@ #define FAIL 1 int -diff_uri(struct uri *p, struct uri *exp) +diff_iri(struct iri *p, struct iri *exp) { DIFF(p, exp, schema); DIFF(p, exp, host); @@ -54,18 +54,18 @@ diff_uri(struct uri *p, struct uri *exp) } int -run_test(const char *uri, int should_fail, struct uri expected) +run_test(const char *iri, int should_fail, struct iri expected) { int failed, ok = 1; - char *uri_copy; - struct uri parsed; + char *iri_copy; + struct iri parsed; const char *error; - if ((uri_copy = strdup(uri)) == NULL) + if ((iri_copy = strdup(iri)) == NULL) err(1, "strdup"); - fprintf(stderr, "=> %s\n", uri); - failed = !parse_uri(uri_copy, &parsed, &error); + fprintf(stderr, "=> %s\n", iri); + failed = !parse_iri(iri_copy, &parsed, &error); if (failed && should_fail) goto done; @@ -75,22 +75,22 @@ run_test(const char *uri, int should_fail, struct uri expected) ok = !failed && !should_fail; if (ok) - ok = diff_uri(&expected, &parsed); + ok = diff_iri(&expected, &parsed); done: - free(uri_copy); + free(iri_copy); return ok; } int main(void) { - struct uri empty = {"", "", "", PASS, "", "", ""}; + struct iri empty = {"", "", "", PASS, "", "", ""}; TEST("http://omarpolo.com", PASS, - URI("http", "omarpolo.com", "", "", "", ""), - "can parse uri with empty path"); + IRI("http", "omarpolo.com", "", "", "", ""), + "can parse iri with empty path"); /* schema */ TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing"); @@ -101,19 +101,19 @@ main(void) /* authority */ TEST("gemini://omarpolo.com", PASS, - URI("gemini", "omarpolo.com", "", "", "", ""), + IRI("gemini", "omarpolo.com", "", "", "", ""), "can parse authority with empty path"); TEST("gemini://omarpolo.com/", PASS, - URI("gemini", "omarpolo.com", "", "", "", ""), + IRI("gemini", "omarpolo.com", "", "", "", ""), "can parse authority with empty path (alt)") TEST("gemini://omarpolo.com:1965", PASS, - URI("gemini", "omarpolo.com", "1965", "", "", ""), + IRI("gemini", "omarpolo.com", "1965", "", "", ""), "can parse with port and empty path"); TEST("gemini://omarpolo.com:1965/", PASS, - URI("gemini", "omarpolo.com", "1965", "", "", ""), + IRI("gemini", "omarpolo.com", "1965", "", "", ""), "can parse with port and empty path") TEST("gemini://omarpolo.com:196s", FAIL, @@ -123,31 +123,31 @@ main(void) /* path */ TEST("gemini://omarpolo.com/foo/bar/baz", PASS, - URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), + IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), "parse simple paths"); TEST("gemini://omarpolo.com/foo//bar///baz", PASS, - URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), + IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), "parse paths with multiple slashes"); TEST("gemini://omarpolo.com/foo/./bar/./././baz", PASS, - URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), + IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), "parse paths with . elements"); TEST("gemini://omarpolo.com/foo/bar/../bar/baz", PASS, - URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), + IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), "parse paths with .. elements"); TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz", PASS, - URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), + IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), "parse paths with multiple .. elements"); TEST("gemini://omarpolo.com/foo/..", PASS, - URI("gemini", "omarpolo.com", "", "", "", ""), + IRI("gemini", "omarpolo.com", "", "", "", ""), "parse paths with a trailing .."); TEST("gemini://omarpolo.com/foo/../", PASS, - URI("gemini", "omarpolo.com", "", "", "", ""), + IRI("gemini", "omarpolo.com", "", "", "", ""), "parse paths with a trailing .."); TEST("gemini://omarpolo.com/foo/../..", FAIL, @@ -155,41 +155,41 @@ main(void) "reject paths that would escape the root"); TEST("gemini://omarpolo.com/foo/../foo/../././/bar/baz/.././.././/", PASS, - URI("gemini", "omarpolo.com", "", "", "", ""), + IRI("gemini", "omarpolo.com", "", "", "", ""), "parse path with lots of cleaning available"); /* query */ TEST("foo://example.com/foo/?gne", PASS, - URI("foo", "example.com", "", "foo/", "gne", ""), + IRI("foo", "example.com", "", "foo/", "gne", ""), "parse query strings"); TEST("foo://example.com/foo/?gne&foo", PASS, - URI("foo", "example.com", "", "foo/", "gne&foo", ""), + IRI("foo", "example.com", "", "foo/", "gne&foo", ""), "parse query strings"); TEST("foo://example.com/foo/?gne%2F", PASS, - URI("foo", "example.com", "", "foo/", "gne/", ""), + IRI("foo", "example.com", "", "foo/", "gne/", ""), "parse query strings"); /* fragment */ TEST("foo://bar.co/#foo", PASS, - URI("foo", "bar.co", "", "", "", "foo"), + IRI("foo", "bar.co", "", "", "", "foo"), "can recognize fragments"); /* percent encoding */ TEST("foo://bar.com/caf%C3%A8.gmi", PASS, - URI("foo", "bar.com", "", "cafè.gmi", "", ""), + IRI("foo", "bar.com", "", "cafè.gmi", "", ""), "can decode"); TEST("foo://bar.com/caff%C3%A8%20macchiato.gmi", PASS, - URI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""), + IRI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""), "can decode"); TEST("foo://bar.com/caff%C3%A8+macchiato.gmi", PASS, - URI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""), + IRI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""), "can decode"); TEST("foo://bar.com/foo%2F..%2F..", FAIL, @@ -203,19 +203,19 @@ main(void) /* IRI */ TEST("foo://bar.com/cafè.gmi", PASS, - URI("foo", "bar.com", "", "cafè.gmi", "" , ""), + IRI("foo", "bar.com", "", "cafè.gmi", "" , ""), "decode IRI (with a 2-byte utf8 seq)"); TEST("foo://bar.com/世界.gmi", PASS, - URI("foo", "bar.com", "", "世界.gmi", "" , ""), + IRI("foo", "bar.com", "", "世界.gmi", "" , ""), "decode IRI"); TEST("foo://bar.com/😼.gmi", PASS, - URI("foo", "bar.com", "", "😼.gmi", "" , ""), + IRI("foo", "bar.com", "", "😼.gmi", "" , ""), "decode IRI (with a 3-byte utf8 seq)"); TEST("foo://bar.com/😼/𤭢.gmi", PASS, - URI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""), + IRI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""), "decode IRI (with a 3-byte and a 4-byte utf8 seq)"); TEST("foo://bar.com/世界/\xC0\x80", FAIL, @@ -64,8 +64,8 @@ valid_multibyte_utf8(struct parser *p) { uint32_t cp = 0, state = 0; - for (; *p->uri; p->uri++) - if (!utf8_decode(&state, &cp, *p->uri)) + for (; *p->iri; p->iri++) + if (!utf8_decode(&state, &cp, *p->iri)) break; /* reject the ASCII range */ |