aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOmar Polo <op@omarpolo.com>2021-01-27 10:47:49 +0000
committerOmar Polo <op@omarpolo.com>2021-01-27 10:47:49 +0000
commit3300cbe06a9567c66ee63f3866bcbcf3430e0205 (patch)
treea7686f8e774573b55ebbe18373a27eb60f44baa4
parent390a61189309451462c0a1dc56c68f71e334ad4b (diff)
initial punycode support
-rw-r--r--.gitignore1
-rw-r--r--ChangeLog4
-rw-r--r--Makefile2
-rw-r--r--gmid.c9
-rw-r--r--gmid.h8
-rw-r--r--regress/Makefile11
-rw-r--r--regress/puny-test.c78
-rw-r--r--server.c14
-rw-r--r--utf8.c17
9 files changed, 133 insertions, 11 deletions
diff --git a/.gitignore b/.gitignore
index c7d3334..ba20330 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,4 +17,5 @@ regress/*.pem
regress/reg.conf
regress/fill-file
regress/iri_test
+regress/puny-test
regress/*.o
diff --git a/ChangeLog b/ChangeLog
index 4082685..b0116b3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2021-01-26 Omar Polo <op@omarpolo.com>
+
+ * puny.c (puny_decode): initial punycode support!
+
2021-01-25 Omar Polo <op@omarpolo.com>
* gmid.1: manpage improvements (example and usage)
diff --git a/Makefile b/Makefile
index c600dd3..576b49c 100644
--- a/Makefile
+++ b/Makefile
@@ -13,7 +13,7 @@ lex.yy.c: lex.l y.tab.c
y.tab.c: parse.y
${YACC} -b y -d parse.y
-SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c
+SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c puny.c
OBJS = ${SRCS:.c=.o} lex.yy.o y.tab.o ${COMPAT}
gmid: ${OBJS}
diff --git a/gmid.c b/gmid.c
index 7a973f5..09a1d6f 100644
--- a/gmid.c
+++ b/gmid.c
@@ -131,7 +131,14 @@ log_request(struct client *c, char *meta, size_t l)
/* serialize the IRI */
strlcpy(b, c->iri.schema, sizeof(b));
strlcat(b, "://", sizeof(b));
- strlcat(b, c->iri.host, sizeof(b));
+
+ /* log the decoded host name, but if it was invalid
+ * use the raw one. */
+ if (*c->domain != '\0')
+ strlcat(b, c->domain, sizeof(b));
+ else
+ strlcat(b, c->iri.host, sizeof(b));
+
strlcat(b, "/", sizeof(b));
strlcat(b, c->iri.path, sizeof(b)); /* TODO: sanitize UTF8 */
if (*c->iri.query != '\0') { /* TODO: sanitize UTF8 */
diff --git a/gmid.h b/gmid.h
index 9774257..b4e6b84 100644
--- a/gmid.h
+++ b/gmid.h
@@ -54,6 +54,9 @@
#define HOSTSLEN 64
#define LOCLEN 32
+/* RFC1034 imposes this limit. 63+1 for the NUL-terminator */
+#define DOMAIN_NAME_LEN (63+1)
+
#define LOGE(c, fmt, ...) logs(LOG_ERR, c, fmt, __VA_ARGS__)
#define LOGW(c, fmt, ...) logs(LOG_WARNING, c, fmt, __VA_ARGS__)
#define LOGN(c, fmt, ...) logs(LOG_NOTICE, c, fmt, __VA_ARGS__)
@@ -134,6 +137,7 @@ struct client {
struct tls *ctx;
char req[GEMINI_URL_LEN];
struct iri iri;
+ char domain[DOMAIN_NAME_LEN];
int state, next;
int code;
const char *meta;
@@ -236,9 +240,13 @@ void sandbox(void);
/* utf8.c */
int valid_multibyte_utf8(struct parser*);
+char *utf8_nth(char*, size_t);
/* iri.c */
int parse_iri(char*, struct iri*, const char**);
int trim_req_iri(char*);
+/* puny.c */
+int puny_decode(char*, char*, size_t);
+
#endif
diff --git a/regress/Makefile b/regress/Makefile
index 3c9c572..5000165 100644
--- a/regress/Makefile
+++ b/regress/Makefile
@@ -2,11 +2,16 @@ include ../Makefile.local
.PHONY: all clean runtime
-all: iri_test runtime
+all: puny-test testdata iri_test cert.pem
+ ./puny-test
+ ./runtime
./iri_test
+puny-test: puny-test.o ../puny.o ../utf8.o
+ ${CC} puny-test.o ../puny.o ../utf8.o -o puny-test
+
iri_test: iri_test.o ../iri.o ../utf8.o
- ${CC} iri_test.o ../iri.o ../utf8.o -o iri_test ${LDFLAGS}
+ ${CC} iri_test.o ../iri.o ../utf8.o -o iri_test
fill-file: fill-file.o
${CC} fill-file.o -o fill-file
@@ -38,5 +43,5 @@ testdata: fill-file
cp hello testdata/dir
cp testdata/index.gmi testdata/dir/foo.gmi
-runtime: testdata cert.pem
+runtime: testdata
./runtime
diff --git a/regress/puny-test.c b/regress/puny-test.c
new file mode 100644
index 0000000..b25f82a
--- /dev/null
+++ b/regress/puny-test.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "../gmid.h"
+
+struct suite {
+ const char *src;
+ const char *res;
+} t[] = {
+ {"foo", "foo"},
+ {"xn-invalid", "xn-invalid"},
+ {"naïve", "naïve"},
+ {"xn--8ca", "è"},
+ {"xn--caff-8oa", "caffè"},
+ {"xn--nave-6pa", "naïve"},
+ {"xn--e-0mbbc", "τeστ"},
+ {"xn--8ca67lbac", "τèστ"},
+ {"xn--28j2a3ar1p", "こんにちは"},
+ {"xn--hello--ur7iy09x", "hello-世界"},
+ {"xn--hi--hi-rr7iy09x", "hi-世界-hi"},
+ {"xn--caf-8la.foo.org", "cafè.foo.org"},
+ /* 3 bytes */
+ {"xn--j6h", "♨"},
+ /* 4 bytes */
+ {"xn--x73l", "𩸽"},
+ {"xn--x73laaa", "𩸽𩸽𩸽𩸽"},
+ {NULL, NULL}
+};
+
+int
+main(int argc, char **argv)
+{
+ struct suite *i;
+ int failed;
+ char *hostname;
+ char buf[64]; /* name len */
+
+ failed = 0;
+ for (i = t; i->src != NULL; ++i) {
+ if ((hostname = strdup(i->src)) == NULL)
+ return 0;
+
+ memset(buf, 0, sizeof(buf));
+ if (!puny_decode(hostname, buf, sizeof(buf))) {
+ printf("decode: failure with %s\n", i->src);
+ failed = 1;
+ continue;
+ }
+
+ if (strcmp(buf, i->res)) {
+ printf("ERR: expected \"%s\", got \"%s\"\n",
+ i->res, buf);
+ failed = 1;
+ continue;
+ } else
+ printf("OK: %s => %s\n", i->src, buf);
+
+ free(hostname);
+ }
+
+ return failed;
+}
diff --git a/server.c b/server.c
index a305da3..1383974 100644
--- a/server.c
+++ b/server.c
@@ -262,12 +262,10 @@ handle_handshake(struct pollfd *fds, struct client *c)
}
servname = tls_conn_servername(c->ctx);
+ puny_decode(servname, c->domain, sizeof(c->domain));
for (h = hosts; h->domain != NULL; ++h) {
- if (!strcmp(h->domain, "*"))
- break;
-
- if (servname != NULL && !fnmatch(h->domain, servname, 0))
+ if (!fnmatch(h->domain, c->domain, 0))
break;
}
@@ -290,6 +288,7 @@ void
handle_open_conn(struct pollfd *fds, struct client *c)
{
const char *parse_err = "invalid request";
+ char decoded[DOMAIN_NAME_LEN];
bzero(c->req, sizeof(c->req));
bzero(&c->iri, sizeof(c->iri));
@@ -314,8 +313,11 @@ handle_open_conn(struct pollfd *fds, struct client *c)
return;
}
- /* XXX: we should check that the SNI matches the requested host */
- if (strcmp(c->iri.schema, "gemini") || c->iri.port_no != conf.port) {
+ puny_decode(c->iri.host, decoded, sizeof(decoded));
+
+ if (c->iri.port_no != conf.port
+ || strcmp(c->iri.schema, "gemini")
+ || strcmp(decoded, c->domain)) {
start_reply(fds, c, PROXY_REFUSED, "won't proxy request");
return;
}
diff --git a/utf8.c b/utf8.c
index 8f530b0..20985b4 100644
--- a/utf8.c
+++ b/utf8.c
@@ -77,3 +77,20 @@ valid_multibyte_utf8(struct parser *p)
}
return 1;
}
+
+char *
+utf8_nth(char *s, size_t n)
+{
+ size_t i;
+ uint32_t cp = 0, state = 0;
+
+ for (i = 0; *s && i < n; ++s)
+ if (!utf8_decode(&state, &cp, *s))
+ ++i;
+
+ if (state != UTF8_ACCEPT)
+ return NULL;
+ if (i == n)
+ return s;
+ return NULL;
+}