aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOmar Polo <op@omarpolo.com>2021-01-13 19:00:53 +0000
committerOmar Polo <op@omarpolo.com>2021-01-13 19:00:53 +0000
commitde428fff65f1ef1a337a1caafb3d580433c73fc9 (patch)
tree4cedb7972ad0c2fe1f1255201000f0e0d66e5623
parent9862b637c2aa97e7e8d148ae9c3f92d0ca758fa7 (diff)
normalize schema when parsing the IRI
RFC3986 in section 3.1 "Scheme" says that > Although schemes are case-insensitive, the canonical form is > lowercase and documents that specify schemes must do so with > lowercase letters. An implementation should accept uppercase > letters as equivalent to lowercase in scheme names (e.g., allow > "HTTP" as well as "http") for the sake of robustness but should only > produce lowercase scheme names for consistency. so we cope with that. The other possibility would have been to use strcasecmp instead of strcmp when checking on the protocol, but since the "case" version, although popular, is not part of any standard AFAIK I prefer downcasing while parsing and be done with it.
-rw-r--r--iri.c15
-rw-r--r--iri_test.c4
2 files changed, 15 insertions, 4 deletions
diff --git a/iri.c b/iri.c
index 1901dac..1b997dd 100644
--- a/iri.c
+++ b/iri.c
@@ -77,12 +77,19 @@ parse_scheme(struct parser *p)
return 0;
}
- p->iri++;
- while (isalnum(*p->iri)
+ do {
+ /* normalize the scheme (i.e. lowercase it)
+ *
+ * XXX: since we cannot have good things, tolower
+ * depends on the LC_CTYPE locale. The good things is
+ * that we're sure p->iri points to something in the
+ * ASCII range, so it shouldn't do weird stuff. */
+ *p->iri = tolower(*p->iri);
+ p->iri++;
+ } while (isalnum(*p->iri)
|| *p->iri == '+'
|| *p->iri == '-'
- || *p->iri == '.')
- p->iri++;
+ || *p->iri == '.');
if (*p->iri != ':') {
p->err = "illegal character in scheme";
diff --git a/iri_test.c b/iri_test.c
index 18a1020..e322813 100644
--- a/iri_test.c
+++ b/iri_test.c
@@ -97,6 +97,10 @@ main(void)
TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker");
TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker");
TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema");
+ TEST("GEMINI://omarpolo.com",
+ PASS,
+ IRI("gemini", "omarpolo.com", "", "", "", ""),
+ "Schemas are case insensitive.");
/* authority */
TEST("gemini://omarpolo.com",