aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOmar Polo <op@omarpolo.com>2021-01-11 13:08:00 +0000
committerOmar Polo <op@omarpolo.com>2021-01-11 13:08:00 +0000
commit3c1cf9d07cb679ba444566159538b510902f2de9 (patch)
tree3f3e8abbbddd868cd3553bb6d7e15d5667f266f6
parent28778244d67be7024868a5095e5eedda22a3ed98 (diff)
s/uri/iri since we accept IRIs
-rw-r--r--.gitignore2
-rw-r--r--Makefile18
-rw-r--r--gmid.c12
-rw-r--r--gmid.h12
-rw-r--r--iri.c (renamed from uri.c)196
-rw-r--r--iri_test.c (renamed from uri_test.c)78
-rw-r--r--utf8.c4
7 files changed, 127 insertions, 195 deletions
diff --git a/.gitignore b/.gitignore
index a7794ec..5660e44 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,6 @@ cert.pem
key.pem
TAGS
gmid
-uri_test
+iri_test
*.o
docs
diff --git a/Makefile b/Makefile
index 76f05bd..3e0b72e 100644
--- a/Makefile
+++ b/Makefile
@@ -6,17 +6,17 @@ LDFLAGS = -ltls
all: gmid TAGS README.md
-gmid: gmid.o uri.o utf8.o
- ${CC} gmid.o uri.o utf8.o -o gmid ${LDFLAGS}
+gmid: gmid.o iri.o utf8.o
+ ${CC} gmid.o iri.o utf8.o -o gmid ${LDFLAGS}
-TAGS: gmid.c uri.c utf8.c
- -etags gmid.c uri.c utf8.c || true
+TAGS: gmid.c iri.c utf8.c
+ -etags gmid.c iri.c utf8.c || true
clean:
- rm -f *.o gmid
+ rm -f *.o gmid iri_test
-uri_test: uri_test.o uri.o utf8.o
- ${CC} uri_test.o uri.o utf8.o -o uri_test ${LDFLAGS}
+iri_test: iri_test.o iri.o utf8.o
+ ${CC} iri_test.o iri.o utf8.o -o iri_test ${LDFLAGS}
-test: uri_test
- ./uri_test
+test: iri_test
+ ./iri_test
diff --git a/gmid.c b/gmid.c
index ef12066..0c1bed2 100644
--- a/gmid.c
+++ b/gmid.c
@@ -572,7 +572,7 @@ handle(struct pollfd *fds, struct client *client)
{
char buf[GEMINI_URL_LEN];
const char *parse_err;
- struct uri uri;
+ struct iri iri;
switch (client->state) {
case S_OPEN:
@@ -593,7 +593,7 @@ handle(struct pollfd *fds, struct client *client)
}
parse_err = "invalid request";
- if (!trim_req_uri(buf) || !parse_uri(buf, &uri, &parse_err)) {
+ if (!trim_req_iri(buf) || !parse_iri(buf, &iri, &parse_err)) {
if (!start_reply(fds, client, BAD_REQUEST, parse_err))
return;
goodbye(fds, client);
@@ -601,11 +601,11 @@ handle(struct pollfd *fds, struct client *client)
}
LOGI(client, "GET %s%s%s",
- *uri.path ? uri.path : "/",
- *uri.query ? "?" : "",
- *uri.query ? uri.query : "");
+ *iri.path ? iri.path : "/",
+ *iri.query ? "?" : "",
+ *iri.query ? iri.query : "");
- send_file(uri.path, uri.query, fds, client);
+ send_file(iri.path, iri.query, fds, client);
break;
case S_INITIALIZING:
diff --git a/gmid.h b/gmid.h
index 64effde..ecca57f 100644
--- a/gmid.h
+++ b/gmid.h
@@ -70,7 +70,7 @@ struct client {
struct sockaddr_storage addr;
};
-struct uri {
+struct iri {
char *schema;
char *host;
char *port;
@@ -81,8 +81,8 @@ struct uri {
};
struct parser {
- char *uri;
- struct uri *parsed;
+ char *iri;
+ struct iri *parsed;
const char *err;
};
@@ -123,8 +123,8 @@ void usage(const char*);
/* utf8.c */
int valid_multibyte_utf8(struct parser*);
-/* uri.c */
-int parse_uri(char*, struct uri*, const char**);
-int trim_req_uri(char*);
+/* iri.c */
+int parse_iri(char*, struct iri*, const char**);
+int trim_req_iri(char*);
#endif
diff --git a/uri.c b/iri.c
index 1258abb..8c02039 100644
--- a/uri.c
+++ b/iri.c
@@ -19,74 +19,6 @@
#include "gmid.h"
-/*
- * Notes from RFC3986
- *
- * => gemini://tanso.net/rfc/rfc3986.txt
- *
- *
- * ABNF
- * ====
- *
- * pct-encoded "%" HEXDIG HEXDIG
- *
- * reserved = gen-delims / sub-delimis
- * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
- * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
- * / "*" / "+" / "," / ";" / "="
- *
- * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
- *
- * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
- *
- * hier-part = "//" authority path-abempty
- * / path-absolute
- * / path-rootless
- * / path-empty
- *
- * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
- *
- * authority = [ userinfo "@" ] host [ ":" port ]
- *
- * (note that userinfo isn't used for Gemini URL)
- *
- * host = IP-literal / IPv4address / reg-name
- * reg-name = *( unreserved / pct-encoded / sub-delims )
- *
- * port = *DIGIT
- *
- * path = path-abemty ; begins with "/" or is empty
- * / path-absolute ; begins with "/" but not "//"
- * / path-noscheme ; begins with a non-colon segment
- * / path-rootless ; begins with a segment
- * / path-empty ; zero characters
- *
- * path-abemty = *( "/" segment )
- * path-absolute = "/" [ segment-nz *( "/" segment ) ]
- * path-noscheme = ; not used
- * path-rootless = ; not used
- * path-empty = ; not used
- *
- * segment = *pchar
- * segment-nz = 1*pchar
- * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
- * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
- *
- * query = *( pchar / "/" / "?" )
- *
- * fragment = *( pchar / "/" / "?" )
- *
- *
- * EXAMPLE
- * =======
- *
- * foo://example.com:8042/over/there?name=ferret#nose
- * \_/ \______________/\_________/ \_________/ \__/
- * | | | | |
- * scheme authority path query fragment
- *
- */
-
static inline int
unreserved(int p)
{
@@ -116,17 +48,17 @@ sub_delimiters(int p)
static int
parse_pct_encoded(struct parser *p)
{
- if (*p->uri != '%')
+ if (*p->iri != '%')
return 0;
- if (!isxdigit(*(p->uri+1)) || !isxdigit(*(p->uri+2))) {
+ if (!isxdigit(*(p->iri+1)) || !isxdigit(*(p->iri+2))) {
p->err = "illegal percent-encoding";
return 0;
}
- sscanf(p->uri+1, "%2hhx", p->uri);
- memmove(p->uri+1, p->uri+3, strlen(p->uri+3)+1);
- if (*p->uri == '\0') {
+ sscanf(p->iri+1, "%2hhx", p->iri);
+ memmove(p->iri+1, p->iri+3, strlen(p->iri+3)+1);
+ if (*p->iri == '\0') {
p->err = "illegal percent-encoding";
return 0;
}
@@ -138,32 +70,32 @@ parse_pct_encoded(struct parser *p)
static int
parse_scheme(struct parser *p)
{
- p->parsed->schema = p->uri;
+ p->parsed->schema = p->iri;
- if (!isalpha(*p->uri)) {
+ if (!isalpha(*p->iri)) {
p->err = "illegal character in scheme";
return 0;
}
- p->uri++;
- while (isalnum(*p->uri)
- || *p->uri == '+'
- || *p->uri == '-'
- || *p->uri == '.')
- p->uri++;
+ p->iri++;
+ while (isalnum(*p->iri)
+ || *p->iri == '+'
+ || *p->iri == '-'
+ || *p->iri == '.')
+ p->iri++;
- if (*p->uri != ':') {
+ if (*p->iri != ':') {
p->err = "illegal character in scheme";
return 0;
}
- *p->uri = '\0';
- if (*(++p->uri) != '/' || *(++p->uri) != '/') {
+ *p->iri = '\0';
+ if (*(++p->iri) != '/' || *(++p->iri) != '/') {
p->err = "invalid marker after scheme";
return 0;
}
- p->uri++;
+ p->iri++;
return 1;
}
@@ -173,26 +105,26 @@ parse_port(struct parser *p)
{
uint32_t i = 0;
- p->parsed->port = p->uri;
+ p->parsed->port = p->iri;
- for (; isdigit(*p->uri); p->uri++) {
- i = i * 10 + *p->uri - '0';
+ for (; isdigit(*p->iri); p->iri++) {
+ i = i * 10 + *p->iri - '0';
if (i > UINT16_MAX) {
p->err = "port number too large";
return 0;
}
}
- if (*p->uri != '/' && *p->uri != '\0') {
+ if (*p->iri != '/' && *p->iri != '\0') {
p->err = "illegal character in port number";
return 0;
}
p->parsed->port_no = i;
- if (*p->uri != '\0') {
- *p->uri = '\0';
- p->uri++;
+ if (*p->iri != '\0') {
+ *p->iri = '\0';
+ p->iri++;
}
return 1;
@@ -203,29 +135,29 @@ parse_port(struct parser *p)
static int
parse_authority(struct parser *p)
{
- p->parsed->host = p->uri;
+ p->parsed->host = p->iri;
- while (unreserved(*p->uri)
- || sub_delimiters(*p->uri)
+ while (unreserved(*p->iri)
+ || sub_delimiters(*p->iri)
|| parse_pct_encoded(p))
- p->uri++;
+ p->iri++;
if (p->err != NULL)
return 0;
- if (*p->uri == ':') {
- *p->uri = '\0';
- p->uri++;
+ if (*p->iri == ':') {
+ *p->iri = '\0';
+ p->iri++;
return parse_port(p);
}
- if (*p->uri == '/') {
- *p->uri = '\0';
- p->uri++;
+ if (*p->iri == '/') {
+ *p->iri = '\0';
+ p->iri++;
return 1;
}
- if (*p->uri == '\0')
+ if (*p->iri == '\0')
return 1;
p->err = "illegal character in authority section";
@@ -305,29 +237,29 @@ path_clean(char *path)
static int
parse_query(struct parser *p)
{
- p->parsed->query = p->uri;
- if (*p->uri == '\0')
+ p->parsed->query = p->iri;
+ if (*p->iri == '\0')
return 1;
- while (unreserved(*p->uri)
- || sub_delimiters(*p->uri)
- || *p->uri == '/'
- || *p->uri == '?'
+ while (unreserved(*p->iri)
+ || sub_delimiters(*p->iri)
+ || *p->iri == '/'
+ || *p->iri == '?'
|| parse_pct_encoded(p)
|| valid_multibyte_utf8(p))
- p->uri++;
+ p->iri++;
if (p->err != NULL)
return 0;
- if (*p->uri != '\0' && *p->uri != '#') {
+ if (*p->iri != '\0' && *p->iri != '#') {
p->err = "illegal character in query";
return 0;
}
- if (*p->uri != '\0') {
- *p->uri = '\0';
- p->uri++;
+ if (*p->iri != '\0') {
+ *p->iri = '\0';
+ p->iri++;
}
return 1;
@@ -337,7 +269,7 @@ parse_query(struct parser *p)
static int
parse_fragment(struct parser *p)
{
- p->parsed->fragment = p->uri;
+ p->parsed->fragment = p->iri;
return 1;
}
@@ -348,31 +280,31 @@ parse_path(struct parser *p)
{
char c;
- p->parsed->path = p->uri;
- if (*p->uri == '\0') {
- p->parsed->query = p->parsed->fragment = p->uri;
+ p->parsed->path = p->iri;
+ if (*p->iri == '\0') {
+ p->parsed->query = p->parsed->fragment = p->iri;
return 1;
}
- while (unreserved(*p->uri)
- || sub_delimiters(*p->uri)
- || *p->uri == '/'
+ while (unreserved(*p->iri)
+ || sub_delimiters(*p->iri)
+ || *p->iri == '/'
|| parse_pct_encoded(p)
|| valid_multibyte_utf8(p))
- p->uri++;
+ p->iri++;
if (p->err != NULL)
return 0;
- if (*p->uri != '\0' && *p->uri != '?' && *p->uri != '#') {
+ if (*p->iri != '\0' && *p->iri != '?' && *p->iri != '#') {
p->err = "illegal character in path";
return 0;
}
- if (*p->uri != '\0') {
- c = *p->uri;
- *p->uri = '\0';
- p->uri++;
+ if (*p->iri != '\0') {
+ c = *p->iri;
+ *p->iri = '\0';
+ p->iri++;
if (c == '#') {
if (!parse_fragment(p))
@@ -391,15 +323,15 @@ parse_path(struct parser *p)
}
int
-parse_uri(char *uri, struct uri *ret, const char **err_ret)
+parse_iri(char *iri, struct iri *ret, const char **err_ret)
{
char *end;
- struct parser p = {uri, ret, NULL};
+ struct parser p = {iri, ret, NULL};
bzero(ret, sizeof(*ret));
/* initialize optional stuff to the empty string */
- end = uri + strlen(uri);
+ end = iri + strlen(iri);
p.parsed->port = end;
p.parsed->path = end;
p.parsed->query = end;
@@ -415,11 +347,11 @@ parse_uri(char *uri, struct uri *ret, const char **err_ret)
}
int
-trim_req_uri(char *uri)
+trim_req_iri(char *iri)
{
char *i;
- if ((i = strstr(uri, "\r\n")) == NULL)
+ if ((i = strstr(iri, "\r\n")) == NULL)
return 0;
*i = '\0';
return 1;
diff --git a/uri_test.c b/iri_test.c
index 3e4d823..6200cb7 100644
--- a/uri_test.c
+++ b/iri_test.c
@@ -20,15 +20,15 @@
#include "gmid.h"
-#define TEST(uri, fail, exp, descr) \
- if (!run_test(uri, fail, exp)) { \
+#define TEST(iri, fail, exp, descr) \
+ if (!run_test(iri, fail, exp)) { \
fprintf(stderr, "%s:%d: error: %s\n", \
__FILE__, __LINE__, descr); \
exit(1); \
}
-#define URI(schema, host, port, path, query, frag) \
- ((struct uri){schema, host, port, 0, path, query, frag})
+#define IRI(schema, host, port, path, query, frag) \
+ ((struct iri){schema, host, port, 0, path, query, frag})
#define DIFF(wanted, got, field) \
if (wanted->field == NULL || got->field == NULL || \
@@ -42,7 +42,7 @@
#define FAIL 1
int
-diff_uri(struct uri *p, struct uri *exp)
+diff_iri(struct iri *p, struct iri *exp)
{
DIFF(p, exp, schema);
DIFF(p, exp, host);
@@ -54,18 +54,18 @@ diff_uri(struct uri *p, struct uri *exp)
}
int
-run_test(const char *uri, int should_fail, struct uri expected)
+run_test(const char *iri, int should_fail, struct iri expected)
{
int failed, ok = 1;
- char *uri_copy;
- struct uri parsed;
+ char *iri_copy;
+ struct iri parsed;
const char *error;
- if ((uri_copy = strdup(uri)) == NULL)
+ if ((iri_copy = strdup(iri)) == NULL)
err(1, "strdup");
- fprintf(stderr, "=> %s\n", uri);
- failed = !parse_uri(uri_copy, &parsed, &error);
+ fprintf(stderr, "=> %s\n", iri);
+ failed = !parse_iri(iri_copy, &parsed, &error);
if (failed && should_fail)
goto done;
@@ -75,22 +75,22 @@ run_test(const char *uri, int should_fail, struct uri expected)
ok = !failed && !should_fail;
if (ok)
- ok = diff_uri(&expected, &parsed);
+ ok = diff_iri(&expected, &parsed);
done:
- free(uri_copy);
+ free(iri_copy);
return ok;
}
int
main(void)
{
- struct uri empty = {"", "", "", PASS, "", "", ""};
+ struct iri empty = {"", "", "", PASS, "", "", ""};
TEST("http://omarpolo.com",
PASS,
- URI("http", "omarpolo.com", "", "", "", ""),
- "can parse uri with empty path");
+ IRI("http", "omarpolo.com", "", "", "", ""),
+ "can parse iri with empty path");
/* schema */
TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing");
@@ -101,19 +101,19 @@ main(void)
/* authority */
TEST("gemini://omarpolo.com",
PASS,
- URI("gemini", "omarpolo.com", "", "", "", ""),
+ IRI("gemini", "omarpolo.com", "", "", "", ""),
"can parse authority with empty path");
TEST("gemini://omarpolo.com/",
PASS,
- URI("gemini", "omarpolo.com", "", "", "", ""),
+ IRI("gemini", "omarpolo.com", "", "", "", ""),
"can parse authority with empty path (alt)")
TEST("gemini://omarpolo.com:1965",
PASS,
- URI("gemini", "omarpolo.com", "1965", "", "", ""),
+ IRI("gemini", "omarpolo.com", "1965", "", "", ""),
"can parse with port and empty path");
TEST("gemini://omarpolo.com:1965/",
PASS,
- URI("gemini", "omarpolo.com", "1965", "", "", ""),
+ IRI("gemini", "omarpolo.com", "1965", "", "", ""),
"can parse with port and empty path")
TEST("gemini://omarpolo.com:196s",
FAIL,
@@ -123,31 +123,31 @@ main(void)
/* path */
TEST("gemini://omarpolo.com/foo/bar/baz",
PASS,
- URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
"parse simple paths");
TEST("gemini://omarpolo.com/foo//bar///baz",
PASS,
- URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
"parse paths with multiple slashes");
TEST("gemini://omarpolo.com/foo/./bar/./././baz",
PASS,
- URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
"parse paths with . elements");
TEST("gemini://omarpolo.com/foo/bar/../bar/baz",
PASS,
- URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
"parse paths with .. elements");
TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz",
PASS,
- URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
"parse paths with multiple .. elements");
TEST("gemini://omarpolo.com/foo/..",
PASS,
- URI("gemini", "omarpolo.com", "", "", "", ""),
+ IRI("gemini", "omarpolo.com", "", "", "", ""),
"parse paths with a trailing ..");
TEST("gemini://omarpolo.com/foo/../",
PASS,
- URI("gemini", "omarpolo.com", "", "", "", ""),
+ IRI("gemini", "omarpolo.com", "", "", "", ""),
"parse paths with a trailing ..");
TEST("gemini://omarpolo.com/foo/../..",
FAIL,
@@ -155,41 +155,41 @@ main(void)
"reject paths that would escape the root");
TEST("gemini://omarpolo.com/foo/../foo/../././/bar/baz/.././.././/",
PASS,
- URI("gemini", "omarpolo.com", "", "", "", ""),
+ IRI("gemini", "omarpolo.com", "", "", "", ""),
"parse path with lots of cleaning available");
/* query */
TEST("foo://example.com/foo/?gne",
PASS,
- URI("foo", "example.com", "", "foo/", "gne", ""),
+ IRI("foo", "example.com", "", "foo/", "gne", ""),
"parse query strings");
TEST("foo://example.com/foo/?gne&foo",
PASS,
- URI("foo", "example.com", "", "foo/", "gne&foo", ""),
+ IRI("foo", "example.com", "", "foo/", "gne&foo", ""),
"parse query strings");
TEST("foo://example.com/foo/?gne%2F",
PASS,
- URI("foo", "example.com", "", "foo/", "gne/", ""),
+ IRI("foo", "example.com", "", "foo/", "gne/", ""),
"parse query strings");
/* fragment */
TEST("foo://bar.co/#foo",
PASS,
- URI("foo", "bar.co", "", "", "", "foo"),
+ IRI("foo", "bar.co", "", "", "", "foo"),
"can recognize fragments");
/* percent encoding */
TEST("foo://bar.com/caf%C3%A8.gmi",
PASS,
- URI("foo", "bar.com", "", "cafè.gmi", "", ""),
+ IRI("foo", "bar.com", "", "cafè.gmi", "", ""),
"can decode");
TEST("foo://bar.com/caff%C3%A8%20macchiato.gmi",
PASS,
- URI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""),
+ IRI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""),
"can decode");
TEST("foo://bar.com/caff%C3%A8+macchiato.gmi",
PASS,
- URI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""),
+ IRI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""),
"can decode");
TEST("foo://bar.com/foo%2F..%2F..",
FAIL,
@@ -203,19 +203,19 @@ main(void)
/* IRI */
TEST("foo://bar.com/cafè.gmi",
PASS,
- URI("foo", "bar.com", "", "cafè.gmi", "" , ""),
+ IRI("foo", "bar.com", "", "cafè.gmi", "" , ""),
"decode IRI (with a 2-byte utf8 seq)");
TEST("foo://bar.com/世界.gmi",
PASS,
- URI("foo", "bar.com", "", "世界.gmi", "" , ""),
+ IRI("foo", "bar.com", "", "世界.gmi", "" , ""),
"decode IRI");
TEST("foo://bar.com/😼.gmi",
PASS,
- URI("foo", "bar.com", "", "😼.gmi", "" , ""),
+ IRI("foo", "bar.com", "", "😼.gmi", "" , ""),
"decode IRI (with a 3-byte utf8 seq)");
TEST("foo://bar.com/😼/𤭢.gmi",
PASS,
- URI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""),
+ IRI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""),
"decode IRI (with a 3-byte and a 4-byte utf8 seq)");
TEST("foo://bar.com/世界/\xC0\x80",
FAIL,
diff --git a/utf8.c b/utf8.c
index 09aad1c..8f530b0 100644
--- a/utf8.c
+++ b/utf8.c
@@ -64,8 +64,8 @@ valid_multibyte_utf8(struct parser *p)
{
uint32_t cp = 0, state = 0;
- for (; *p->uri; p->uri++)
- if (!utf8_decode(&state, &cp, *p->uri))
+ for (; *p->iri; p->iri++)
+ if (!utf8_decode(&state, &cp, *p->iri))
break;
/* reject the ASCII range */