aboutsummaryrefslogtreecommitdiff
path: root/puny.c
diff options
context:
space:
mode:
authorOmar Polo <op@omarpolo.com>2021-01-27 11:21:23 +0000
committerOmar Polo <op@omarpolo.com>2021-01-27 11:21:23 +0000
commit7957cbd9aad6ace179287b2ddcbae0b14a25a90d (patch)
tree1c1ee41b84dc98afd5090e8bc362498298140801 /puny.c
parent42650adec078a7d3e885afbafa1fa4406d4823fb (diff)
const-ify puny_decode (and add puny.c)
Diffstat (limited to 'puny.c')
-rw-r--r--puny.c253
1 files changed, 253 insertions, 0 deletions
diff --git a/puny.c b/puny.c
new file mode 100644
index 0000000..f465198
--- /dev/null
+++ b/puny.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "gmid.h"
+
+#define BASE 36
+#define TMIN 1
+#define TMAX 26
+#define SKEW 38
+#define DAMP 700
+#define IBIAS 72
+#define IN 128
+
+static int
+adapt(int delta, int numpoints, int firsttime)
+{
+ int k;
+
+ if (firsttime)
+ delta = delta / DAMP;
+ else
+ delta = delta / 2;
+
+ delta += (delta / numpoints);
+
+ k = 0;
+ while (delta > ((BASE - TMIN) * TMAX) / 2) {
+ delta = delta / (BASE - TMIN);
+ k += BASE;
+ }
+ return k + (((BASE - TMIN + 1) * delta) / (delta + SKEW));
+}
+
+static const char *
+copy_until_delimiter(const char *s, char *out, size_t len)
+{
+ char *end, *t;
+
+ end = strchr(s, '\0');
+ if (end - s > len)
+ return NULL;
+
+ for (t = end; t >= s; --t)
+ if (*t == '-')
+ break;
+
+ if (t < s)
+ t = end;
+
+ for (; s < t; ++s, ++out) {
+ if (*s > 'z')
+ return NULL;
+ *out = *s;
+ }
+
+ return s;
+}
+
+static unsigned int
+digit_value(char c)
+{
+ if ('A' <= c && c <= 'Z')
+ return c - 'A';
+
+ if ('a' <= c && c <= 'z')
+ return c - 'a';
+
+ if ('0' <= c && c <= '9')
+ return 26 + c - '0';
+
+ return c;
+}
+
+static int
+insert(char *out, size_t len, int codepoint, size_t i)
+{
+ int l;
+ size_t outlen;
+ char *t;
+
+ if (codepoint <= 0x7F)
+ return 0;
+ else if (codepoint <= 0x7FF)
+ l = 2;
+ else if (codepoint <= 0xFFFF)
+ l = 3;
+ else if (codepoint <= 0x10FFFF)
+ l = 4;
+ else
+ return 0;
+
+ if ((t = utf8_nth(out, i)) == NULL)
+ return 0;
+ if (t + l >= out + len)
+ return 0;
+
+ memmove(t + l, t, strlen(t));
+
+ switch (l) {
+ case 2:
+ t[1] = ( codepoint & 0x3F) + 0x80;
+ t[0] = ((codepoint >> 6) & 0x1F) + 0xC0;
+ break;
+ case 3:
+ t[2] = ( codepoint & 0x3F) + 0x80;
+ t[1] = ((codepoint >> 6) & 0x3F) + 0x80;
+ t[0] = ((codepoint >> 12) & 0x0F) + 0xE0;
+ break;
+ case 4:
+ t[3] = ( codepoint & 0x3F) + 0x80;
+ t[2] = ((codepoint >> 6) & 0x3F) + 0x80;
+ t[1] = ((codepoint >> 12) & 0x3F) + 0x80;
+ t[0] = ((codepoint >> 18) & 0x07) + 0xF0;
+ break;
+ }
+ return 1;
+}
+
+static int
+decode(const char *str, char *out, size_t len)
+{
+ size_t i;
+ uint32_t n;
+ unsigned int oldi, bias, w, k, digit, t;
+ unsigned int numpoints;
+ const char *s;
+
+ if (str == NULL || len <= 4)
+ return 0;
+
+ /* todo: starts_with */
+ if (strstr(str, "xn--") != str) {
+ strncpy(out, str, len);
+ return 1;
+ }
+
+ /* skip the xn-- */
+ str += 4;
+
+ if (strchr(str, '-') != NULL) {
+ if ((s = copy_until_delimiter(str, out, len)) == NULL)
+ return 0;
+ if (*s == '-')
+ s++;
+ } else
+ s = str;
+
+ numpoints = strlen(out);
+
+ n = IN;
+ i = 0;
+ bias = IBIAS;
+
+ while (*s != '\0') {
+ oldi = i;
+ w = 1;
+
+ for (k = BASE; ; k += BASE) {
+ if (*s == '\0')
+ return 0;
+ /* fail eventually? */
+ digit = digit_value(*s);
+ s++;
+
+ /* fail on overflow */
+ i += digit * w;
+
+ if (k <= bias)
+ t = TMIN;
+ else if (k >= bias + TMAX)
+ t = TMAX;
+ else
+ t = k - bias;
+
+ if (digit < t)
+ break;
+ w *= (BASE - t);
+ }
+
+ bias = adapt(i - oldi, numpoints+1, oldi == 0);
+ n += i / (numpoints+1); /* fail on overflow */
+ i = i % (numpoints+1);
+
+ if (!insert(out, len, n, i))
+ return 0;
+ numpoints++;
+ ++i;
+ }
+
+ return 1;
+}
+
+const char *
+end_of_component(const char *hostname)
+{
+ for (; *hostname != '\0' && *hostname != '.'; ++hostname)
+ ; /* nop */
+ return hostname;
+}
+
+int
+puny_decode(const char *hostname, char *out, size_t len)
+{
+ char comp[DOMAIN_NAME_LEN];
+ const char *s, *end;
+ size_t l;
+
+ memset(out, 0, len);
+
+ s = hostname;
+ for (;;) {
+ end = end_of_component(s);
+ if (end - s >= sizeof(comp))
+ return 0;
+
+ memcpy(comp, s, end - s);
+ comp[end - s] = '\0';
+
+ if (!decode(comp, out, len))
+ return 0;
+
+ if (*end == '\0')
+ return 1;
+
+ if (strlcat(out, ".", len) >= len)
+ return 0;
+
+ l = strlen(out);
+ if (l >= len)
+ return 0;
+ out += l;
+ len -= l;
+
+ s = end+1;
+ }
+}