aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOmar Polo <op@omarpolo.com>2020-11-10 14:07:36 +0100
committerOmar Polo <op@omarpolo.com>2020-11-10 14:07:36 +0100
commita5d310bc0d07fea3d75b593ea6b2f86dad006211 (patch)
tree54922481b1240b9ed149c8289af8b6d35800b36b
parent92e66347ed2971e811170559eb8865e0db0a5d3e (diff)
[cgi] added support for path parameters
enhance the CGI scripting support so that script can take path parameters. That is, a script at /cgi/foo is called when the request path is /cgi/foo/bar/... This commit also introduce some backward incompatible changes as the default env variables set for the CGI script changed.
-rw-r--r--ChangeLog5
-rw-r--r--README.md67
-rw-r--r--gmid.161
-rw-r--r--gmid.c210
4 files changed, 231 insertions, 112 deletions
diff --git a/ChangeLog b/ChangeLog
index b60fdc2..fa0596c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2020-11-10 Omar Polo <op@omarpolo.com>
+
+ * gmid.c (open_file): added support for path parameters for CGI
+ scripts
+
2020-11-06 Omar Polo <op@omarpolo.com>
* gmid.1: great improvements to the documentation
diff --git a/README.md b/README.md
index b8b11de..2d4ac46 100644
--- a/README.md
+++ b/README.md
@@ -24,10 +24,7 @@ will strip any sequence of
*../*
or trailing
*..*
-in the requests made by clients, so it's impossible to serve content
-outside the
-*docs*
-directory by mistake, and will also refuse to follow symlinks.
+in the requests made by clients and will refuse to follow symlinks.
Furthermore, on
OpenBSD,
pledge(2)
@@ -50,7 +47,6 @@ If a user request path is a directory,
will try to serve a
*index.gmi*
file inside that directory.
-If not found, it will return an error 51 (not found) to the user.
The options are as follows:
@@ -63,7 +59,8 @@ The options are as follows:
> The root directory to serve.
> **gmid**
-> won't serve any file that is outside that directory, by default
+> won't serve any file that is outside that directory.
+> By default is
> *docs*.
**-h**
@@ -97,31 +94,61 @@ with these additional variables set:
> "gmid"
-`SERVER_PROTOCOL`
-
-> "gemini"
-
`SERVER_PORT`
> "1965"
-`PATH_INFO`
+`SCRIPT_NAME`
+
+> The (public) path to the script.
+
+`SCRIPT_EXECUTABLE`
+
+> The full path to the executable.
+
+`REQUEST_URI`
-> the request path
+> The user request (without the query parameters.)
-`PATH_TRANSLATED`
+`REQUEST_RELATIVE`
-> the full path: the concatenation of the document root and the request
-> path
+> The request relative to the script.
`QUERY_STRING`
-> the query string if present in the request URL, otherwise it
-> won't be set.
+> The query parameters.
-`REMOTE_ADDR`
+`REMOTE_HOST`
-> the IP address of the client in dot notation
+> The remote IP address.
+
+`DOCUMENT_ROOT`
+
+> The root directory being served, the one provided with the
+> *d*
+> parameter to
+> **gmid**
+
+Let's say you have a script in
+*/cgi-bin/script*
+and the user request is
+*/cgi-bin/script/foo/bar?quux*.
+Then
+`SCRIPT_NAME`
+will be
+*/cgi-bin/script*,
+`SCRIPT_EXECUTABLE`
+will be
+*$DOCUMENT\_ROOT/cgi-bin/script*,
+`REQUEST_URI`
+will be
+*/cgi-bin/script/foo/bar*,
+`REQUEST_RELATIVE`
+will be
+*foo/bar and*
+`QUERY_STRING`
+will be
+*quux*.
# EXAMPLES
@@ -157,7 +184,7 @@ option is
*cgi-bin*
and not
*docs/cgi-bin*,
-since it&#8217;s relative to the document root.
+since it's relative to the document root.
# CAVEATS
diff --git a/gmid.1 b/gmid.1
index b56ddf0..f25285f 100644
--- a/gmid.1
+++ b/gmid.1
@@ -37,10 +37,7 @@ will strip any sequence of
.Pa ../
or trailing
.Pa ..
-in the requests made by clients, so it's impossible to serve content
-outside the
-.Pa docs
-directory by mistake, and will also refuse to follow symlinks.
+in the requests made by clients and will refuse to follow symlinks.
Furthermore, on
.Ox ,
.Xr pledge 2
@@ -63,7 +60,6 @@ If a user request path is a directory,
will try to serve a
.Pa index.gmi
file inside that directory.
-If not found, it will return an error 51 (not found) to the user.
.Pp
The options are as follows:
.Bl -tag -width 12m
@@ -73,7 +69,8 @@ The certificate to use, by default is
.It Fl d Ar docs
The root directory to serve.
.Nm
-won't serve any file that is outside that directory, by default
+won't serve any file that is outside that directory.
+By default is
.Pa docs .
.It Fl h
Print the usage and exit.
@@ -93,24 +90,50 @@ executable file will execute it and fed its output to the client.
The CGI scripts will inherit the environment from
.Nm
with these additional variables set:
-.Bl -tag -width 15m
+.Bl -tag -width 18m
.It Ev SERVER_SOFTWARE
"gmid"
-.It Ev SERVER_PROTOCOL
-"gemini"
.It Ev SERVER_PORT
"1965"
-.It Ev PATH_INFO
-the request path
-.It Ev PATH_TRANSLATED
-the full path: the concatenation of the document root and the request
-path
+.It Ev SCRIPT_NAME
+The (public) path to the script.
+.It Ev SCRIPT_EXECUTABLE
+The full path to the executable.
+.It Ev REQUEST_URI
+The user request (without the query parameters.)
+.It Ev REQUEST_RELATIVE
+The request relative to the script.
.It Ev QUERY_STRING
-the query string if present in the request URL, otherwise it
-won't be set.
-.It Ev REMOTE_ADDR
-the IP address of the client in dot notation
+The query parameters.
+.It Ev REMOTE_HOST
+The remote IP address.
+.It Ev DOCUMENT_ROOT
+The root directory being served, the one provided with the
+.Ar d
+parameter to
+.Nm
.El
+.Pp
+Let's say you have a script in
+.Pa /cgi-bin/script
+and the user request is
+.Pa /cgi-bin/script/foo/bar?quux .
+Then
+.Ev SCRIPT_NAME
+will be
+.Pa /cgi-bin/script ,
+.Ev SCRIPT_EXECUTABLE
+will be
+.Pa $DOCUMENT_ROOT/cgi-bin/script ,
+.Ev REQUEST_URI
+will be
+.Pa /cgi-bin/script/foo/bar ,
+.Ev REQUEST_RELATIVE
+will be
+.Pa foo/bar and
+.Ev QUERY_STRING
+will be
+.Ar quux .
.Sh EXAMPLES
To quickly getting started
.Bd -literal -offset indent
@@ -146,7 +169,7 @@ option is
.Pa cgi-bin
and not
.Pa docs/cgi-bin ,
-since it’s relative to the document root.
+since it's relative to the document root.
.Sh CAVEATS
.Bl -bullet
.It
diff --git a/gmid.c b/gmid.c
index 133f513..86a900a 100644
--- a/gmid.c
+++ b/gmid.c
@@ -77,6 +77,13 @@ struct client {
struct in_addr addr;
};
+enum {
+ FILE_EXISTS,
+ FILE_EXECUTABLE,
+ FILE_DIRECTORY,
+ FILE_MISSING,
+};
+
struct etm { /* file extension to mime */
const char *mime;
const char *ext;
@@ -118,14 +125,15 @@ char *url_after_proto(char*);
char *url_start_of_request(char*);
int url_trim(struct client*, char*);
char *adjust_path(char*);
-int path_isdir(char*);
ssize_t filesize(int);
int start_reply(struct pollfd*, struct client*, int, const char*);
const char *path_ext(const char*);
const char *mime(const char*);
+int check_path(const char*, int*);
+int check_for_cgi(char *, char*, struct pollfd*, struct client*);
int open_file(char*, char*, struct pollfd*, struct client*);
-void start_cgi(const char*, const char*, struct pollfd*, struct client*);
+int start_cgi(const char*, const char*, const char*, struct pollfd*, struct client*);
void cgi_setpoll_on_child(struct pollfd*, struct client*);
void cgi_setpoll_on_client(struct pollfd*, struct client*);
void handle_cgi(struct pollfd*, struct client*);
@@ -245,14 +253,6 @@ adjust_path(char *path)
}
int
-path_isdir(char *path)
-{
- if (*path == '\0')
- return 1;
- return path[strlen(path)-1] == '/';
-}
-
-int
start_reply(struct pollfd *pfd, struct client *client, int code, const char *reason)
{
char buf[1030] = {0}; /* status + ' ' + max reply len + \r\n\0 */
@@ -324,12 +324,78 @@ mime(const char *path)
}
int
-open_file(char *path, char *query, struct pollfd *fds, struct client *c)
+check_path(const char *path, int *fd)
{
- char fpath[PATHBUF];
struct stat sb;
assert(path != NULL);
+ if ((*fd = openat(dirfd, path,
+ O_RDONLY | O_NOFOLLOW | O_CLOEXEC)) == -1) {
+ return FILE_MISSING;
+ }
+
+ if (fstat(*fd, &sb) == -1) {
+ dprintf(logfd, "failed stat for %s\n", path);
+ return FILE_MISSING;
+ }
+
+ if (S_ISDIR(sb.st_mode))
+ return FILE_DIRECTORY;
+
+ if (sb.st_mode & S_IXUSR)
+ return FILE_EXECUTABLE;
+
+ return FILE_EXISTS;
+}
+
+/*
+ * the inverse of this algorithm, i.e. starting from the start of the
+ * path + strlen(cgi), and checking if each component, should be
+ * faster. But it's tedious to write. This does the opposite: starts
+ * from the end and strip one component at a time, until either an
+ * executable is found or we emptied the path.
+ */
+int
+check_for_cgi(char *path, char *query, struct pollfd *fds, struct client *c)
+{
+ char *end;
+ end = strchr(path, '\0');
+
+ /* NB: assume CGI is enabled and path matches cgi */
+
+ while (end > path) {
+ /* go up one level. UNIX paths are simple and POSIX
+ * dirname, with its ambiguities on if the given path
+ * is changed or not, gives me headaches. */
+ while (*end != '/')
+ end--;
+ *end = '\0';
+
+ switch (check_path(path, &c->fd)) {
+ case FILE_EXECUTABLE:
+ return start_cgi(path, end+1, query, fds,c);
+ case FILE_MISSING:
+ break;
+ default:
+ goto err;
+ }
+
+ *end = '/';
+ end--;
+ }
+
+err:
+ if (!start_reply(fds, c, NOT_FOUND, "not found"))
+ return 0;
+ goodbye(fds, c);
+ return 0;
+}
+
+
+int
+open_file(char *path, char *query, struct pollfd *fds, struct client *c)
+{
+ char fpath[PATHBUF];
bzero(fpath, sizeof(fpath));
@@ -337,60 +403,58 @@ open_file(char *path, char *query, struct pollfd *fds, struct client *c)
fpath[0] = '.';
strlcat(fpath, path, PATHBUF);
- if ((c->fd = openat(dirfd, fpath,
- O_RDONLY | O_NOFOLLOW | O_CLOEXEC)) == -1) {
- LOG(c, "open failed: %s", fpath);
- if (!start_reply(fds, c, NOT_FOUND, "not found"))
+ switch (check_path(fpath, &c->fd)) {
+ case FILE_EXECUTABLE:
+ /* +2 to skip the ./ */
+ if (cgi != NULL && starts_with(fpath+2, cgi))
+ return start_cgi(fpath, "", query, fds, c);
+
+ /* fallthrough */
+
+ case FILE_EXISTS:
+ if ((c->len = filesize(c->fd)) == -1) {
+ LOG(c, "failed to get file size for %s", fpath);
+ goodbye(fds, c);
return 0;
- goodbye(fds, c);
- return 0;
- }
+ }
- if (fstat(c->fd, &sb) == -1) {
- LOG(c, "fstat failed for %s", fpath);
- if (!start_reply(fds, c, TEMP_FAILURE, "internal server error"))
+ if ((c->buf = mmap(NULL, c->len, PROT_READ, MAP_PRIVATE,
+ c->fd, 0)) == MAP_FAILED) {
+ warn("mmap: %s", fpath);
+ goodbye(fds, c);
return 0;
- goodbye(fds, c);
- return 0;
- }
+ }
+ c->i = c->buf;
+ return start_reply(fds, c, SUCCESS, mime(fpath));
- if (S_ISDIR(sb.st_mode)) {
+ case FILE_DIRECTORY:
LOG(c, "%s is a directory, trying %s/index.gmi", fpath, fpath);
close(c->fd);
c->fd = -1;
send_dir(fpath, fds, c);
return 0;
- }
- /* +2 to skip the ./ */
- if ((sb.st_mode & S_IXUSR) && cgi != NULL && starts_with(fpath+2, cgi)) {
- start_cgi(fpath, query, fds, c);
- return 0;
- }
+ case FILE_MISSING:
+ if (cgi != NULL && starts_with(fpath+2, cgi))
+ return check_for_cgi(fpath, query, fds, c);
- if ((c->len = filesize(c->fd)) == -1) {
- LOG(c, "failed to get file size for %s", fpath);
+ if (!start_reply(fds, c, NOT_FOUND, "not found"))
+ return 0;
goodbye(fds, c);
return 0;
- }
- if ((c->buf = mmap(NULL, c->len, PROT_READ, MAP_PRIVATE,
- c->fd, 0)) == MAP_FAILED) {
- warn("mmap: %s", fpath);
- goodbye(fds, c);
- return 0;
+ default:
+ /* unreachable */
+ abort();
}
- c->i = c->buf;
-
- return start_reply(fds, c, SUCCESS, mime(fpath));
}
-void
-start_cgi(const char *path, const char *query,
+int
+start_cgi(const char *spath, const char *relpath, const char *query,
struct pollfd *fds, struct client *c)
{
pid_t pid;
- int p[2];
+ int p[2]; /* read end, write end */
if (pipe(p) == -1)
goto err;
@@ -399,65 +463,68 @@ start_cgi(const char *path, const char *query,
case -1:
goto err;
- case 0: { /* child */
- char *expath;
+ case 0: { /* child */
+ char *ex, *requri;
char addr[INET_ADDRSTRLEN];
char *argv[] = { NULL, NULL, NULL };
- /* skip the initial ./ */
- path += 2;
+ spath++;
- close(p[0]); /* close the read end */
+ close(p[0]);
if (dup2(p[1], 1) == -1)
goto childerr;
if (inet_ntop(c->af, &c->addr, addr, sizeof(addr)) == NULL)
- goto childerr;
+ goto childerr;
+
+ if (asprintf(&ex, "%s%s", dir, spath+1) == -1)
+ goto childerr;
- /* skip the ./ at the start of path*/
- if (asprintf(&expath, "%s%s", dir, path) == -1)
+ if (asprintf(&requri, "%s%s%s", spath,
+ *relpath == '\0' ? "" : "/",
+ relpath) == -1)
goto childerr;
- argv[0] = argv[1] = expath;
+
+ argv[0] = argv[1] = ex;
/* fix the env */
setenv("SERVER_SOFTWARE", "gmid", 1);
- /* setenv("SERVER_NAME", "", 1); */
- /* setenv("GATEWAY_INTERFACE", "CGI/version", 1); */
- setenv("SERVER_PROTOCOL", "gemini", 1);
setenv("SERVER_PORT", "1965", 1);
- setenv("PATH_INFO", path, 1);
- setenv("PATH_TRANSLATED", expath, 1);
+ /* setenv("SERVER_NAME", "", 1); */
+ setenv("SCRIPT_NAME", spath, 1);
+ setenv("SCRIPT_EXECUTABLE", ex, 1);
+ setenv("REQUEST_URI", requri, 1);
+ setenv("REQUEST_RELATIVE", relpath, 1);
if (query != NULL)
setenv("QUERY_STRING", query, 1);
- setenv("REMOTE_ADDR", addr, 1);
+ setenv("REMOTE_HOST", addr, 1);
+ setenv("DOCUMENT_ROOT", dir, 1);
- execvp(expath, argv);
+ execvp(ex, argv);
goto childerr;
}
default: /* parent */
- close(p[1]); /* close the write end */
+ close(p[1]);
close(c->fd);
c->fd = p[0];
c->child = pid;
mark_nonblock(c->fd);
c->state = S_SENDING;
handle_cgi(fds, c);
- return;
+ return 0;
}
err:
if (!start_reply(fds, c, TEMP_FAILURE, "internal server error"))
- return;
+ return 0;
goodbye(fds, c);
- return;
+ return 0;
childerr:
dprintf(p[1], "%d internal server error\r\n", TEMP_FAILURE);
close(p[1]);
-
- /* don't call atexit stuff */
- _exit(1);
+ _exit(1);
}
void
@@ -645,10 +712,7 @@ handle(struct pollfd *fds, struct client *client)
query ? "?" : "",
query ? query : "");
- if (path_isdir(path))
- send_dir(path, fds, client);
- else
- send_file(path, query, fds, client);
+ send_file(path, query, fds, client);
break;
case S_INITIALIZING: