aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOmar Polo <op@omarpolo.com>2020-12-25 13:13:12 +0100
committerOmar Polo <op@omarpolo.com>2020-12-25 13:13:12 +0100
commit33d32d1fd66a577f22f3f33f238e8dac44ec9995 (patch)
treef9010d36f92d9239d0b80c87d9b57ee10fd4776d
parentd5aba4c791266e35cf79cec02dcd15267fb62f62 (diff)
implement a valid RFC3986 (URI) parser
Up until now I used a "poor man" approach: the uri parser is barely a parser, it tries to extract the path from the request, with some minor checking, and that's all. This obviously is not RFC3986-compliant. The new RFC3986 (URI) parser should be fully compliant. It may accept some invalid URI, but shouldn't reject or mis-parse valid URI. (in particular, the rule for the path is way more relaxed in this parser than it is in the RFC text). A difference with RFC3986 is that we don't even try to parse the (optional) userinfo part of a URI: following the Gemini spec we treat it as an error. A further caveats is that %2F in the path part of the URI is indistinguishable from a literal '/': this is NOT conforming, but due to the scope and use of gmid, I don't see how treat a %2F sequence in the path (reject the URI?).
-rw-r--r--.gitignore1
-rw-r--r--Makefile18
-rw-r--r--gmid.c155
-rw-r--r--gmid.h42
-rw-r--r--uri.c413
-rw-r--r--uri_test.c184
6 files changed, 659 insertions, 154 deletions
diff --git a/.gitignore b/.gitignore
index 9b1c514..a7794ec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,5 +2,6 @@ cert.pem
key.pem
TAGS
gmid
+uri_test
*.o
docs
diff --git a/Makefile b/Makefile
index 6c49573..1d355e3 100644
--- a/Makefile
+++ b/Makefile
@@ -2,18 +2,24 @@ CC = cc
CFLAGS = -Wall -Wextra -g
LDFLAGS = -ltls
-.PHONY: all clean
+.PHONY: all clean test
all: gmid TAGS README.md
-gmid: gmid.o
- ${CC} gmid.o -o gmid ${LDFLAGS}
+gmid: gmid.o uri.o
+ ${CC} gmid.o uri.o -o gmid ${LDFLAGS}
-TAGS: gmid.c
- -etags gmid.c || true
+TAGS: gmid.c uri.c
+ -etags gmid.c uri.c || true
README.md: gmid.1
mandoc -Tmarkdown gmid.1 | sed -e '1d' -e '$$d' > README.md
clean:
- rm -f gmid.o gmid
+ rm -f *.o gmid
+
+uri_test: uri_test.o uri.o
+ ${CC} uri_test.o uri.o -o uri_test ${LDFLAGS}
+
+test: uri_test
+ ./uri_test
diff --git a/gmid.c b/gmid.c
index 7376a34..fb98cfc 100644
--- a/gmid.c
+++ b/gmid.c
@@ -34,6 +34,28 @@ int port;
int foreground;
int connected_clients;
+struct etm { /* file extension to mime */
+ const char *mime;
+ const char *ext;
+} filetypes[] = {
+ {"application/pdf", "pdf"},
+
+ {"image/gif", "gif"},
+ {"image/jpeg", "jpg"},
+ {"image/jpeg", "jpeg"},
+ {"image/png", "png"},
+ {"image/svg+xml", "svg"},
+
+ {"text/gemini", "gemini"},
+ {"text/gemini", "gmi"},
+ {"text/markdown", "markdown"},
+ {"text/markdown", "md"},
+ {"text/plain", "txt"},
+ {"text/xml", "xml"},
+
+ {NULL, NULL}
+};
+
void
siginfo_handler(int sig)
{
@@ -51,102 +73,6 @@ starts_with(const char *str, const char *prefix)
return 1;
}
-char *
-url_after_proto(char *url)
-{
- char *s;
- const char *proto = "gemini:";
- const char *marker = "//";
-
- /* a relative URL */
- if ((s = strstr(url, marker)) == NULL)
- return url;
-
- /*
- * if a protocol is not specified, gemini should be implied:
- * this handles the case of //example.com
- */
- if (s == url)
- return s + strlen(marker);
-
- if (s - strlen(proto) != url)
- return NULL;
-
- if (!starts_with(url, proto))
- return NULL;
-
- return s + strlen(marker);
-}
-
-char *
-url_start_of_request(char *url)
-{
- char *s, *t;
-
- if ((s = url_after_proto(url)) == NULL)
- return NULL;
-
- /* non-absolute URL */
- if (s == url)
- return s;
-
- if ((t = strstr(s, "/")) == NULL)
- return s + strlen(s);
- return t;
-}
-
-int
-url_trim(struct client *c, char *url)
-{
- const char *e = "\r\n";
- char *s;
-
- if ((s = strstr(url, e)) == NULL)
- return 0;
- s[0] = '\0';
- s[1] = '\0';
-
- if (s[2] != '\0') {
- LOGE(c, "%s", "request longer than 1024 bytes");
- return 0;
- }
-
- return 1;
-}
-
-char *
-adjust_path(char *path)
-{
- char *s, *query;
- size_t len;
-
- if ((query = strchr(path, '?')) != NULL) {
- *query = '\0';
- query++;
- }
-
- /* /.. -> / */
- len = strlen(path);
- if (len >= 3) {
- if (!strcmp(&path[len-3], "/..")) {
- path[len-2] = '\0';
- }
- }
-
- /* if the path is only `..` trim out and exit */
- if (!strcmp(path, "..")) {
- path[0] = '\0';
- return query;
- }
-
- /* remove every ../ in the path */
- while (1) {
- if ((s = strstr(path, "../")) == NULL)
- return query;
- memmove(s, s+3, strlen(s)+1); /* copy also the \0 */
- }
-}
-
int
start_reply(struct pollfd *pfd, struct client *client, int code, const char *reason)
{
@@ -224,7 +150,7 @@ check_path(struct client *c, const char *path, int *fd)
struct stat sb;
assert(path != NULL);
- if ((*fd = openat(dirfd, path,
+ if ((*fd = openat(dirfd, *path ? path : ".",
O_RDONLY | O_NOFOLLOW | O_CLOEXEC)) == -1) {
return FILE_MISSING;
}
@@ -288,16 +214,8 @@ err:
int
-open_file(char *path, char *query, struct pollfd *fds, struct client *c)
+open_file(char *fpath, char *query, struct pollfd *fds, struct client *c)
{
- char fpath[PATHBUF];
-
- bzero(fpath, sizeof(fpath));
-
- if (*path != '.')
- fpath[0] = '.';
- strlcat(fpath, path, PATHBUF);
-
switch (check_path(c, fpath, &c->fd)) {
case FILE_EXECUTABLE:
/* +2 to skip the ./ */
@@ -578,8 +496,8 @@ void
handle(struct pollfd *fds, struct client *client)
{
char buf[GEMINI_URL_LEN];
- char *path;
- char *query;
+ const char *parse_err;
+ struct uri uri;
switch (client->state) {
case S_OPEN:
@@ -599,26 +517,19 @@ handle(struct pollfd *fds, struct client *client)
return;
}
- if (!url_trim(client, buf)) {
- if (!start_reply(fds, client, BAD_REQUEST, "bad request"))
- return;
- goodbye(fds, client);
- return;
- }
-
- if ((path = url_start_of_request(buf)) == NULL) {
- if (!start_reply(fds, client, BAD_REQUEST, "bad request"))
+ if (!trim_req_uri(buf) || !parse_uri(buf, &uri, &parse_err)) {
+ if (!start_reply(fds, client, BAD_REQUEST, parse_err))
return;
goodbye(fds, client);
return;
}
- query = adjust_path(path);
- LOGI(client, "GET %s%s%s", path,
- query ? "?" : "",
- query ? query : "");
+ LOGI(client, "GET %s%s%s",
+ *uri.path ? uri.path : "/",
+ *uri.query ? "?" : "",
+ *uri.query ? uri.query : "");
- send_file(path, query, fds, client);
+ send_file(uri.path, uri.query, fds, client);
break;
case S_INITIALIZING:
diff --git a/gmid.h b/gmid.h
index 62288a8..d8a050b 100644
--- a/gmid.h
+++ b/gmid.h
@@ -107,6 +107,17 @@ struct client {
struct in_addr addr;
};
+
+struct uri {
+ char *schema;
+ char *host;
+ char *port;
+ uint16_t port_no;
+ char *path;
+ char *query;
+ char *fragment;
+};
+
enum {
FILE_EXISTS,
FILE_EXECUTABLE,
@@ -114,35 +125,10 @@ enum {
FILE_MISSING,
};
-struct etm { /* file extension to mime */
- const char *mime;
- const char *ext;
-} filetypes[] = {
- {"application/pdf", "pdf"},
-
- {"image/gif", "gif"},
- {"image/jpeg", "jpg"},
- {"image/jpeg", "jpeg"},
- {"image/png", "png"},
- {"image/svg+xml", "svg"},
-
- {"text/gemini", "gemini"},
- {"text/gemini", "gmi"},
- {"text/markdown", "markdown"},
- {"text/markdown", "md"},
- {"text/plain", "txt"},
- {"text/xml", "xml"},
-
- {NULL, NULL}
-};
-
+/* gmid.c */
void siginfo_handler(int);
int starts_with(const char*, const char*);
-char *url_after_proto(char*);
-char *url_start_of_request(char*);
-int url_trim(struct client*, char*);
-char *adjust_path(char*);
ssize_t filesize(int);
int start_reply(struct pollfd*, struct client*, int, const char*);
@@ -167,4 +153,8 @@ void loop(struct tls*, int);
void usage(const char*);
+/* uri.c */
+int parse_uri(char*, struct uri*, const char**);
+int trim_req_uri(char*);
+
#endif
diff --git a/uri.c b/uri.c
new file mode 100644
index 0000000..245928a
--- /dev/null
+++ b/uri.c
@@ -0,0 +1,413 @@
+/*
+ * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <ctype.h>
+#include <string.h>
+
+#include "gmid.h"
+
+/*
+ * Notes from RFC3986
+ *
+ * => gemini://tanso.net/rfc/rfc3986.txt
+ *
+ *
+ * ABNF
+ * ====
+ *
+ * pct-encoded "%" HEXDIG HEXDIG
+ *
+ * reserved = gen-delims / sub-delimis
+ * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+ * / "*" / "+" / "," / ";" / "="
+ *
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ *
+ * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ *
+ * hier-part = "//" authority path-abempty
+ * / path-absolute
+ * / path-rootless
+ * / path-empty
+ *
+ * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ *
+ * authority = [ userinfo "@" ] host [ ":" port ]
+ *
+ * (note that userinfo isn't used for Gemini URL)
+ *
+ * host = IP-literal / IPv4address / reg-name
+ * reg-name = *( unreserved / pct-encoded / sub-delims )
+ *
+ * port = *DIGIT
+ *
+ * path = path-abemty ; begins with "/" or is empty
+ * / path-absolute ; begins with "/" but not "//"
+ * / path-noscheme ; begins with a non-colon segment
+ * / path-rootless ; begins with a segment
+ * / path-empty ; zero characters
+ *
+ * path-abemty = *( "/" segment )
+ * path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ * path-noscheme = ; not used
+ * path-rootless = ; not used
+ * path-empty = ; not used
+ *
+ * segment = *pchar
+ * segment-nz = 1*pchar
+ * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+ *
+ * query = *( pchar / "/" / "?" )
+ *
+ * fragment = *( pchar / "/" / "?" )
+ *
+ *
+ * EXAMPLE
+ * =======
+ *
+ * foo://example.com:8042/over/there?name=ferret#nose
+ * \_/ \______________/\_________/ \_________/ \__/
+ * | | | | |
+ * scheme authority path query fragment
+ *
+ */
+
+struct parser {
+ char *uri;
+ struct uri *parsed;
+ const char *err;
+};
+
+/* XXX: these macros will expand multiple times their argument */
+
+#define UNRESERVED(p) \
+ (isalnum(p) \
+ || p == '-' \
+ || p == '.' \
+ || p == '_' \
+ || p == '~')
+
+#define SUB_DELIMITERS(p) \
+ (p == '!' \
+ || p == '$' \
+ || p == '&' \
+ || p == '\'' \
+ || p == '(' \
+ || p == ')' \
+ || p == '*' \
+ || p == '+' \
+ || p == ',' \
+ || p == ';' \
+ || p == '=')
+
+static int
+parse_pct_encoded(struct parser *p)
+{
+ if (*p->uri != '%')
+ return 0;
+
+ if (!isxdigit(*(p->uri+1)) || !isxdigit(*(p->uri+2))) {
+ p->err = "illegal percent-encoding";
+ return 0;
+ }
+
+ sscanf(p->uri+1, "%2hhx", p->uri);
+ memmove(p->uri+1, p->uri+3, strlen(p->uri+3)+1);
+
+ return 1;
+}
+
+/* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) "://" */
+static int
+parse_scheme(struct parser *p)
+{
+ p->parsed->schema = p->uri;
+
+ if (!isalpha(*p->uri)) {
+ p->err = "illegal character in scheme";
+ return 0;
+ }
+
+ p->uri++;
+ while (isalnum(*p->uri)
+ || *p->uri == '+'
+ || *p->uri == '-'
+ || *p->uri == '.')
+ p->uri++;
+
+ if (*p->uri != ':') {
+ p->err = "illegal character in scheme";
+ return 0;
+ }
+
+ *p->uri = '\0';
+ if (*(++p->uri) != '/' || *(++p->uri) != '/') {
+ p->err = "invalid marker after scheme";
+ return 0;
+ }
+
+ p->uri++;
+ return 1;
+}
+
+/* *DIGIT */
+static int
+parse_port(struct parser *p)
+{
+ uint32_t i = 0;
+
+ p->parsed->port = p->uri;
+
+ for (; isdigit(*p->uri); p->uri++) {
+ i = i * 10 + *p->uri - '0';
+ if (i > UINT16_MAX) {
+ p->err = "port number too large";
+ return 0;
+ }
+ }
+
+ if (*p->uri != '/' && *p->uri != '\0') {
+ p->err = "illegal character in port number";
+ return 0;
+ }
+
+ p->parsed->port_no = i;
+
+ if (*p->uri != '\0') {
+ *p->uri = '\0';
+ p->uri++;
+ }
+
+ return 1;
+}
+
+/* TODO: add support for ip-literal and ipv4addr ? */
+/* *( unreserved / sub-delims / pct-encoded ) */
+static int
+parse_authority(struct parser *p)
+{
+ p->parsed->host = p->uri;
+
+ while (UNRESERVED(*p->uri)
+ || SUB_DELIMITERS(*p->uri)
+ || parse_pct_encoded(p))
+ p->uri++;
+
+ if (*p->uri == ':') {
+ *p->uri = '\0';
+ p->uri++;
+ return parse_port(p);
+ }
+
+ if (*p->uri == '/') {
+ *p->uri = '\0';
+ p->uri++;
+ return 1;
+ }
+
+ if (*p->uri == '\0')
+ return 1;
+
+ p->err = "illegal character in authority section";
+ return 0;
+}
+
+/* Routine for path_clean. Elide the pointed .. with the preceding
+ * element. Return 0 if it's not possible. incr is the length of
+ * the increment, 3 for ../ and 2 for .. */
+static int
+path_elide_dotdot(char *path, char *i, int incr)
+{
+ char *j;
+
+ if (i == path)
+ return 0;
+ for (j = i-2; j != path && *j != '/'; j--)
+ /* noop */ ;
+ if (*j == '/')
+ j++;
+ i += incr;
+ memmove(j, i, strlen(i)+1);
+ return 1;
+}
+
+/*
+ * Use an algorithm similar to the one implemented in go' path.Clean:
+ *
+ * 1. Replace multiple slashes with a single slash
+ * 2. Eliminate each . path name element
+ * 3. Eliminate each inner .. along with the non-.. element that precedes it
+ * 4. Eliminate trailing .. if possible or error (go would only discard)
+ *
+ * Unlike path.Clean, this function return the empty string if the
+ * original path is equivalent to "/".
+ */
+static int
+path_clean(char *path)
+{
+ char *i;
+
+ /* 1. replace multiple slashes with a single one */
+ for (i = path; *i; ++i) {
+ if (*i == '/' && *(i+1) == '/') {
+ memmove(i, i+1, strlen(i)); /* move also the \0 */
+ i--;
+ }
+ }
+
+ /* 2. eliminate each . path name element */
+ for (i = path; *i; ++i) {
+ if ((i == path || *i == '/') && *(i+1) == '.' &&
+ *(i+2) == '/') {
+ /* move also the \0 */
+ memmove(i, i+2, strlen(i)-1);
+ i--;
+ }
+ }
+ if (!strcmp(path, ".") || !strcmp(path, "/.")) {
+ *path = '\0';
+ return 1;
+ }
+
+ /* 3. eliminate each inner .. along with the preceding non-.. */
+ for (i = strstr(path, "../"); i != NULL; i = strstr(path, ".."))
+ if (!path_elide_dotdot(path, i, 3))
+ return 0;
+
+ /* 4. eliminate trailing ..*/
+ if ((i = strstr(path, "..")) != NULL)
+ if (!path_elide_dotdot(path, i, 2))
+ return 0;
+
+ return 1;
+}
+
+static int
+parse_query(struct parser *p)
+{
+ p->parsed->query = p->uri;
+ if (*p->uri == '\0')
+ return 1;
+
+ while (UNRESERVED(*p->uri)
+ || SUB_DELIMITERS(*p->uri)
+ || *p->uri == '/'
+ || *p->uri == '?'
+ || parse_pct_encoded(p))
+ p->uri++;
+
+ if (*p->uri != '\0' && *p->uri != '#') {
+ p->err = "illegal character in query";
+ return 0;
+ }
+
+ if (*p->uri != '\0') {
+ *p->uri = '\0';
+ p->uri++;
+ }
+
+ return 1;
+}
+
+/* don't even bother */
+static int
+parse_fragment(struct parser *p)
+{
+ p->parsed->fragment = p->uri;
+ return 1;
+}
+
+/* XXX: is it too broad? */
+/* *(pchar / "/") */
+static int
+parse_path(struct parser *p)
+{
+ char c;
+
+ p->parsed->path = p->uri;
+ if (*p->uri == '\0') {
+ p->parsed->query = p->parsed->fragment = p->uri;
+ return 1;
+ }
+
+ while (UNRESERVED(*p->uri)
+ || SUB_DELIMITERS(*p->uri)
+ || *p->uri == '/'
+ || parse_pct_encoded(p))
+ p->uri++;
+
+ if (*p->uri != '\0' && *p->uri != '?' && *p->uri != '#') {
+ p->err = "illegal character in path";
+ return 0;
+ }
+
+ if (*p->uri != '\0') {
+ c = *p->uri;
+ *p->uri = '\0';
+ p->uri++;
+
+ if (c == '#') {
+ if (!parse_fragment(p))
+ return 0;
+ } else
+ if (!parse_query(p) || !parse_fragment(p))
+ return 0;
+ }
+
+ if (!path_clean(p->parsed->path)) {
+ p->err = "illegal path";
+ return 0;
+ }
+
+ return 1;
+}
+
+int
+parse_uri(char *uri, struct uri *ret, const char **err_ret)
+{
+ char *end;
+ struct parser p = {uri, ret, NULL};
+
+ bzero(ret, sizeof(*ret));
+
+ /* initialize optional stuff to the empty string */
+ end = uri + strlen(uri);
+ p.parsed->port = end;
+ p.parsed->path = end;
+ p.parsed->query = end;
+ p.parsed->fragment = end;
+
+ if (!parse_scheme(&p) || !parse_authority(&p) || !parse_path(&p)) {
+ *err_ret = p.err;
+ return 0;
+ }
+
+ *err_ret = NULL;
+ return 1;
+}
+
+int
+trim_req_uri(char *uri)
+{
+ char *i;
+
+ if ((i = strstr(uri, "\r\n")) == NULL)
+ return 0;
+ *i = '\0';
+ return 1;
+}
diff --git a/uri_test.c b/uri_test.c
new file mode 100644
index 0000000..c6521f6
--- /dev/null
+++ b/uri_test.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "gmid.h"
+
+#define TEST(uri, fail, exp, descr) \
+ if (!run_test(uri, fail, exp)) { \
+ fprintf(stderr, "%s:%d: error: %s\n", \
+ __FILE__, __LINE__, descr); \
+ exit(1); \
+ }
+
+#define URI(schema, host, port, path, query, frag) \
+ ((struct uri){schema, host, port, 0, path, query, frag})
+
+#define DIFF(wanted, got, field) \
+ if (wanted->field == NULL || got->field == NULL || \
+ strcmp(wanted->field, got->field)) { \
+ fprintf(stderr, #field ":\n\tgot: %s\n\twanted: %s\n", \
+ got->field, wanted->field); \
+ return 0; \
+ }
+
+#define PASS 0
+#define FAIL 1
+
+int
+diff_uri(struct uri *p, struct uri *exp)
+{
+ DIFF(p, exp, schema);
+ DIFF(p, exp, host);
+ DIFF(p, exp, port);
+ DIFF(p, exp, path);
+ DIFF(p, exp, query);
+ DIFF(p, exp, fragment);
+ return 1;
+}
+
+int
+run_test(const char *uri, int should_fail, struct uri expected)
+{
+ int failed, ok = 1;
+ char *uri_copy;
+ struct uri parsed;
+ const char *error;
+
+ if ((uri_copy = strdup(uri)) == NULL)
+ err(1, "strdup");
+
+ fprintf(stderr, "=> %s\n", uri);
+ failed = !parse_uri(uri_copy, &parsed, &error);
+
+ if (failed && should_fail)
+ goto done;
+
+ if (error != NULL)
+ fprintf(stderr, "> %s\n", error);
+
+ ok = !failed && !should_fail;
+ if (ok)
+ ok = diff_uri(&expected, &parsed);
+
+done:
+ free(uri_copy);
+ return ok;
+}
+
+int
+main(void)
+{
+ struct uri empty = {"", "", "", PASS, "", "", ""};
+
+ TEST("http://omarpolo.com",
+ PASS,
+ URI("http", "omarpolo.com", "", "", "", ""),
+ "can parse uri with empty path");
+
+ /* schema */
+ TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing");
+ TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker");
+ TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker");
+ TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema");
+
+ /* authority */
+ TEST("gemini://omarpolo.com",
+ PASS,
+ URI("gemini", "omarpolo.com", "", "", "", ""),
+ "can parse authority with empty path");
+ TEST("gemini://omarpolo.com/",
+ PASS,
+ URI("gemini", "omarpolo.com", "", "", "", ""),
+ "can parse authority with empty path (alt)")
+ TEST("gemini://omarpolo.com:1965",
+ PASS,
+ URI("gemini", "omarpolo.com", "1965", "", "", ""),
+ "can parse with port and empty path");
+ TEST("gemini://omarpolo.com:1965/",
+ PASS,
+ URI("gemini", "omarpolo.com", "1965", "", "", ""),
+ "can parse with port and empty path")
+ TEST("gemini://omarpolo.com:196s",
+ FAIL,
+ empty,
+ "FAIL with invalid port number");
+
+ /* path */
+ TEST("gemini://omarpolo.com/foo/bar/baz",
+ PASS,
+ URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ "parse simple paths");
+ TEST("gemini://omarpolo.com/foo//bar///baz",
+ PASS,
+ URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ "parse paths with multiple slashes");
+ TEST("gemini://omarpolo.com/foo/./bar/./././baz",
+ PASS,
+ URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ "parse paths with . elements");
+ TEST("gemini://omarpolo.com/foo/bar/../bar/baz",
+ PASS,
+ URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ "parse paths with .. elements");
+ TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz",
+ PASS,
+ URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ "parse paths with multiple .. elements");
+ TEST("gemini://omarpolo.com/foo/..",
+ PASS,
+ URI("gemini", "omarpolo.com", "", "", "", ""),
+ "parse paths with a trailing ..");
+ TEST("gemini://omarpolo.com/foo/../",
+ PASS,
+ URI("gemini", "omarpolo.com", "", "", "", ""),
+ "parse paths with a trailing ..");
+ TEST("gemini://omarpolo.com/foo/../..",
+ FAIL,
+ empty,
+ "reject paths that would escape the root");
+
+ /* query */
+ TEST("foo://example.com/foo/?gne",
+ PASS,
+ URI("foo", "example.com", "", "foo/", "gne", ""),
+ "parse query strings");
+ TEST("foo://example.com/foo/?gne&foo",
+ PASS,
+ URI("foo", "example.com", "", "foo/", "gne&foo", ""),
+ "parse query strings");
+ TEST("foo://example.com/foo/?gne%2F",
+ PASS,
+ URI("foo", "example.com", "", "foo/", "gne/", ""),
+ "parse query strings");
+
+ /* fragment */
+ TEST("foo://bar.co/#foo",
+ PASS,
+ URI("foo", "bar.co", "", "", "", "foo"),
+ "can recognize fragments");
+
+ /* percent encoding */
+ TEST("foo://bar.com/caf%C3%A8.gmi",
+ PASS,
+ URI("foo", "bar.com", "", "cafè.gmi", "", ""),
+ "can decode");
+
+ return 0;
+}