diff options
author | Omar Polo <op@omarpolo.com> | 2021-01-27 10:47:49 +0000 |
---|---|---|
committer | Omar Polo <op@omarpolo.com> | 2021-01-27 10:47:49 +0000 |
commit | 3300cbe06a9567c66ee63f3866bcbcf3430e0205 (patch) | |
tree | a7686f8e774573b55ebbe18373a27eb60f44baa4 | |
parent | 390a61189309451462c0a1dc56c68f71e334ad4b (diff) |
initial punycode support
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | gmid.c | 9 | ||||
-rw-r--r-- | gmid.h | 8 | ||||
-rw-r--r-- | regress/Makefile | 11 | ||||
-rw-r--r-- | regress/puny-test.c | 78 | ||||
-rw-r--r-- | server.c | 14 | ||||
-rw-r--r-- | utf8.c | 17 |
9 files changed, 133 insertions, 11 deletions
@@ -17,4 +17,5 @@ regress/*.pem regress/reg.conf regress/fill-file regress/iri_test +regress/puny-test regress/*.o @@ -1,3 +1,7 @@ +2021-01-26 Omar Polo <op@omarpolo.com> + + * puny.c (puny_decode): initial punycode support! + 2021-01-25 Omar Polo <op@omarpolo.com> * gmid.1: manpage improvements (example and usage) @@ -13,7 +13,7 @@ lex.yy.c: lex.l y.tab.c y.tab.c: parse.y ${YACC} -b y -d parse.y -SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c +SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c puny.c OBJS = ${SRCS:.c=.o} lex.yy.o y.tab.o ${COMPAT} gmid: ${OBJS} @@ -131,7 +131,14 @@ log_request(struct client *c, char *meta, size_t l) /* serialize the IRI */ strlcpy(b, c->iri.schema, sizeof(b)); strlcat(b, "://", sizeof(b)); - strlcat(b, c->iri.host, sizeof(b)); + + /* log the decoded host name, but if it was invalid + * use the raw one. */ + if (*c->domain != '\0') + strlcat(b, c->domain, sizeof(b)); + else + strlcat(b, c->iri.host, sizeof(b)); + strlcat(b, "/", sizeof(b)); strlcat(b, c->iri.path, sizeof(b)); /* TODO: sanitize UTF8 */ if (*c->iri.query != '\0') { /* TODO: sanitize UTF8 */ @@ -54,6 +54,9 @@ #define HOSTSLEN 64 #define LOCLEN 32 +/* RFC1034 imposes this limit. 63+1 for the NUL-terminator */ +#define DOMAIN_NAME_LEN (63+1) + #define LOGE(c, fmt, ...) logs(LOG_ERR, c, fmt, __VA_ARGS__) #define LOGW(c, fmt, ...) logs(LOG_WARNING, c, fmt, __VA_ARGS__) #define LOGN(c, fmt, ...) logs(LOG_NOTICE, c, fmt, __VA_ARGS__) @@ -134,6 +137,7 @@ struct client { struct tls *ctx; char req[GEMINI_URL_LEN]; struct iri iri; + char domain[DOMAIN_NAME_LEN]; int state, next; int code; const char *meta; @@ -236,9 +240,13 @@ void sandbox(void); /* utf8.c */ int valid_multibyte_utf8(struct parser*); +char *utf8_nth(char*, size_t); /* iri.c */ int parse_iri(char*, struct iri*, const char**); int trim_req_iri(char*); +/* puny.c */ +int puny_decode(char*, char*, size_t); + #endif diff --git a/regress/Makefile b/regress/Makefile index 3c9c572..5000165 100644 --- a/regress/Makefile +++ b/regress/Makefile @@ -2,11 +2,16 @@ include ../Makefile.local .PHONY: all clean runtime -all: iri_test runtime +all: puny-test testdata iri_test cert.pem + ./puny-test + ./runtime ./iri_test +puny-test: puny-test.o ../puny.o ../utf8.o + ${CC} puny-test.o ../puny.o ../utf8.o -o puny-test + iri_test: iri_test.o ../iri.o ../utf8.o - ${CC} iri_test.o ../iri.o ../utf8.o -o iri_test ${LDFLAGS} + ${CC} iri_test.o ../iri.o ../utf8.o -o iri_test fill-file: fill-file.o ${CC} fill-file.o -o fill-file @@ -38,5 +43,5 @@ testdata: fill-file cp hello testdata/dir cp testdata/index.gmi testdata/dir/foo.gmi -runtime: testdata cert.pem +runtime: testdata ./runtime diff --git a/regress/puny-test.c b/regress/puny-test.c new file mode 100644 index 0000000..b25f82a --- /dev/null +++ b/regress/puny-test.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021 Omar Polo <op@omarpolo.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <stdio.h> +#include <string.h> + +#include "../gmid.h" + +struct suite { + const char *src; + const char *res; +} t[] = { + {"foo", "foo"}, + {"xn-invalid", "xn-invalid"}, + {"naïve", "naïve"}, + {"xn--8ca", "è"}, + {"xn--caff-8oa", "caffè"}, + {"xn--nave-6pa", "naïve"}, + {"xn--e-0mbbc", "τeστ"}, + {"xn--8ca67lbac", "τèστ"}, + {"xn--28j2a3ar1p", "こんにちは"}, + {"xn--hello--ur7iy09x", "hello-世界"}, + {"xn--hi--hi-rr7iy09x", "hi-世界-hi"}, + {"xn--caf-8la.foo.org", "cafè.foo.org"}, + /* 3 bytes */ + {"xn--j6h", "♨"}, + /* 4 bytes */ + {"xn--x73l", "𩸽"}, + {"xn--x73laaa", "𩸽𩸽𩸽𩸽"}, + {NULL, NULL} +}; + +int +main(int argc, char **argv) +{ + struct suite *i; + int failed; + char *hostname; + char buf[64]; /* name len */ + + failed = 0; + for (i = t; i->src != NULL; ++i) { + if ((hostname = strdup(i->src)) == NULL) + return 0; + + memset(buf, 0, sizeof(buf)); + if (!puny_decode(hostname, buf, sizeof(buf))) { + printf("decode: failure with %s\n", i->src); + failed = 1; + continue; + } + + if (strcmp(buf, i->res)) { + printf("ERR: expected \"%s\", got \"%s\"\n", + i->res, buf); + failed = 1; + continue; + } else + printf("OK: %s => %s\n", i->src, buf); + + free(hostname); + } + + return failed; +} @@ -262,12 +262,10 @@ handle_handshake(struct pollfd *fds, struct client *c) } servname = tls_conn_servername(c->ctx); + puny_decode(servname, c->domain, sizeof(c->domain)); for (h = hosts; h->domain != NULL; ++h) { - if (!strcmp(h->domain, "*")) - break; - - if (servname != NULL && !fnmatch(h->domain, servname, 0)) + if (!fnmatch(h->domain, c->domain, 0)) break; } @@ -290,6 +288,7 @@ void handle_open_conn(struct pollfd *fds, struct client *c) { const char *parse_err = "invalid request"; + char decoded[DOMAIN_NAME_LEN]; bzero(c->req, sizeof(c->req)); bzero(&c->iri, sizeof(c->iri)); @@ -314,8 +313,11 @@ handle_open_conn(struct pollfd *fds, struct client *c) return; } - /* XXX: we should check that the SNI matches the requested host */ - if (strcmp(c->iri.schema, "gemini") || c->iri.port_no != conf.port) { + puny_decode(c->iri.host, decoded, sizeof(decoded)); + + if (c->iri.port_no != conf.port + || strcmp(c->iri.schema, "gemini") + || strcmp(decoded, c->domain)) { start_reply(fds, c, PROXY_REFUSED, "won't proxy request"); return; } @@ -77,3 +77,20 @@ valid_multibyte_utf8(struct parser *p) } return 1; } + +char * +utf8_nth(char *s, size_t n) +{ + size_t i; + uint32_t cp = 0, state = 0; + + for (i = 0; *s && i < n; ++s) + if (!utf8_decode(&state, &cp, *s)) + ++i; + + if (state != UTF8_ACCEPT) + return NULL; + if (i == n) + return s; + return NULL; +} |