/* make_hash.c v2003-01-24 make encodings.c from encodings.dat Copyright (C) 2000-2003 David Necas (Yeti) This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as published by the Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif /* HAVE_CONFIG_H */ #include #include #ifdef HAVE_STRING_H # include #else /* HAVE_STRING_H */ # ifdef HAVE_STRINGS_H # include # endif /* HAVE_STRINGS_H */ #endif /* HAVE_STRING_H */ #ifdef HAVE_MEMORY_H # include #endif /* HAVE_MEMORY_H */ #include #include /* PARR {{{ */ #ifdef __GNUC__ # define PVAR(f, v) fprintf(stderr, "%s:%u %s(): " \ #v " == %" #f "\n", __FILE__, __LINE__, __FUNCTION__, v) # define PARR(f, v, n) ( { int _i; \ fprintf(stderr, "%s:%u %s(): " #v " == { ", __FILE__, __LINE__, __FUNCTION__); \ for (_i = 0; _i < n; _i++) fprintf(stderr, "%" #f ", ", (v)[_i]); \ fputs("}\n", stderr); \ } ) #else /* __GNUC__ */ /* FIXME */ #endif /* __GNUC__ */ /* }}} */ #define LEN 4096 typedef struct { char *enca; char *rfc1345; char *cstocs; char *iconv; char *mime; int naliases; char **aliases; char *human; char *flags; char *nsurface; } EncaCharsetRaw; typedef struct { int enca; int rfc1345; int cstocs; int iconv; int mime; char *human; char *flags; char *nsurface; } EncaCharsetFine; static EncaCharsetRaw RawNULL = { NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL }; static char* fixspaces(char *line) { char *p, *q; int qs = 0; for (p = line; isspace(*p); p++) ; for (q = line; *p != '\0'; p++) { if (isspace(*p)) { *q = ' '; qs = 1; } else { if (qs) q++; *q++ = *p; qs = 0; } } *q = '\0'; return line; } static int add_item(const char *line, const char *name, char **item) { const int len = strlen(name); if (*item != NULL) return 0; if (strncmp(line, name, len) != 0) return 0; *item = fixspaces(strdup(line + len)); return 1; } static char** check_alias(char **aliases, int *n, char *string) { int i; if (string == NULL || string[0] == '\0') return aliases; for (i = 0; i < *n; i++) if (strcmp(aliases[i], string) == 0) return aliases; (*n)++; aliases = (char**)realloc(aliases, (*n)*sizeof(char*)); aliases[*n - 1] = strdup(string); return aliases; } static EncaCharsetRaw* read_raw_charset_data(FILE *stream, int *rsize) { char *line; EncaCharsetRaw *r, *raw; int rs; char *gl; line = (char*)malloc(LEN); r = raw = (EncaCharsetRaw*)malloc(sizeof(EncaCharsetRaw)); *r = RawNULL; rs = 1; while (1) { gl = fgets(line, LEN, stream); if (r->enca && r->rfc1345 && r->cstocs && r->human && r->iconv && r->mime && r->flags && r->nsurface && r->aliases) { if (r->enca[0] == '\0') { fprintf(stderr, "Enca's charset name #%d empty\n", (int)(r - raw + 1)); exit(1); } if (r->rfc1345[0] == '\0') { fprintf(stderr, "RFC-1345 charset name #%d empty\n", (int)(r - raw + 1)); exit(1); } if (r->iconv[0] == '\0') r->iconv = NULL; if (r->cstocs[0] == '\0') r->cstocs = NULL; if (r->mime[0] == '\0') r->mime = NULL; if (r->nsurface[0] == '\0') r->nsurface = strdup("0"); r->aliases = check_alias(r->aliases, &r->naliases, r->enca); r->aliases = check_alias(r->aliases, &r->naliases, r->iconv); r->aliases = check_alias(r->aliases, &r->naliases, r->rfc1345); r->aliases = check_alias(r->aliases, &r->naliases, r->mime); r->aliases = check_alias(r->aliases, &r->naliases, r->cstocs); if (!gl) break; rs++; { int d = r - raw; raw = (EncaCharsetRaw*)realloc(raw, rs*sizeof(EncaCharsetRaw)); r = raw + d + 1; } *r = RawNULL; } line[LEN-1] = '\0'; fixspaces(line); if (line[0] == '\0' || line[0] == '#') continue; if (add_item(line, "enca:", &r->enca)) continue; if (add_item(line, "rfc:", &r->rfc1345)) continue; if (add_item(line, "iconv:", &r->iconv)) continue; if (add_item(line, "mime:", &r->mime)) continue; if (add_item(line, "cstocs:", &r->cstocs)) continue; if (add_item(line, "human:", &r->human)) continue; if (add_item(line, "flags:", &r->flags)) continue; if (add_item(line, "nsurface:", &r->nsurface)) continue; if (strncmp(line, "aliases:", 8) == 0 && !r->aliases) { int i; char *next, *l = fixspaces(line+8); r->naliases = 1; while ((l = strchr(l, ' ')) != NULL) { r->naliases++; l++; } r->aliases = (char**)malloc((r->naliases)*sizeof(char*)); l = line+8; for (i = 0; i < r->naliases; i++) { next = strchr(l, ' '); if (next) *next = '\0'; r->aliases[i] = strdup(l); l = next+1; } continue; } fprintf(stderr, "Unexpected `%s'\n", line); exit(1); } *rsize = rs; return raw; } static int squeeze_compare(const char *x, const char *y) { while (*x != '\0' || *y != '\0') { while (*x != '\0' && !isalnum(*x)) x++; while (*y != '\0' && !isalnum(*y)) y++; if (tolower(*x) != tolower(*y)) return (int)tolower(*x) - (int)tolower(*y); if (*x != '\0') x++; if (*y != '\0') y++; } return 0; } static int stable_compare(const void *p, const void *q) { char *x = *(char**)p; char *y = *(char**)q; int i; i = squeeze_compare(x, y); /* to stabilize the sort */ if (i == 0) return strcmp(x, y); return i; } static int bin_search(char **alist, const int n, const char *s) { int i1 = 0; int i2 = n-1; int i; i = stable_compare(&s, &alist[i1]); if (i < 0) { fprintf(stderr, "Out of search range: `%s'\n", s); exit(0); } if (i == 0) return i1; i = stable_compare(&s, &alist[i2]); if (i > 0) { fprintf(stderr, "Out of search range: `%s'\n", s); exit(0); } if (i == 0) return i2; while (i1+1 < i2) { int im = (i1 + i2)/2; i = stable_compare(&s, &alist[im]); if (i == 0) return im; if (i > 0) i1 = im; else i2 = im; } if (stable_compare(&s, &alist[i1+1]) == 0) return i1+1; fprintf(stderr, "Not found: `%s'\n", s); exit(0); } static char** build_alias_list(EncaCharsetRaw *raw, const int ncs, int *total) { char **alist; int nn, i, j, k; for (i = nn = 0; i < ncs; i++) nn += raw[i].naliases; alist = (char**)malloc(nn*sizeof(char*)); for (i = j = 0; i < ncs; i++) { for (k = 0; k < raw[i].naliases; k++) alist[j++] = raw[i].aliases[k]; } qsort(alist, nn, sizeof(char*), &stable_compare); for (i = 1; i < nn; ) { if (squeeze_compare(alist[i], alist[i-1]) == 0) { if (strcmp(alist[i], alist[i-1]) == 0) { fprintf(stderr, "Removing duplicate `%s'\n", alist[i]); memmove(alist+i-1, alist+i, (nn-i)*sizeof(char*)); nn--; } else { fprintf(stderr, "Keeping equvialent `%s' and `%s'\n", alist[i], alist[i-1]); i++; } } else i++; } *total = nn; return alist; } static EncaCharsetFine* refine_data(EncaCharsetRaw *raw, const int ncs, char **alist, const int nn) { int i; EncaCharsetFine *fine; fine = (EncaCharsetFine*)malloc(ncs*sizeof(EncaCharsetFine)); for (i = 0; i < ncs; i++) { fine[i].enca = bin_search(alist, nn, raw[i].enca); fine[i].rfc1345 = bin_search(alist, nn, raw[i].rfc1345); fine[i].iconv = raw[i].iconv ? bin_search(alist, nn, raw[i].iconv) : -1; fine[i].cstocs = raw[i].cstocs ? bin_search(alist, nn, raw[i].cstocs) : -1; fine[i].mime = raw[i].mime ? bin_search(alist, nn, raw[i].mime) : -1; fine[i].human = raw[i].human; fine[i].flags = raw[i].flags; fine[i].nsurface = raw[i].nsurface; } return fine; } static int* create_index_list(EncaCharsetRaw *raw, const int ncs, char **alist, const int nn) { int i, k; int *ilist; ilist = (int*)malloc(nn*sizeof(int)); for (i = 0; i < ncs; i++) { for (k = 0; k < raw[i].naliases; k++) { ilist[bin_search(alist, nn, raw[i].aliases[k])] = i; } } return ilist; } static void print_fine_data(EncaCharsetFine *fine, const int ncs, int *ilist, char **alist, const int nn) { int i; puts("/**** THIS IS A GENERATED FILE. DO NOT TOUCH! *****/"); puts("/* THIS IS A GENERATED TABLE, see tools/make_hash.c. */"); puts("static const EncaCharsetInfo CHARSET_INFO[] = {"); for (i = 0; i < ncs; i++) { printf(" {\n" " %d, %d, %d, %d, %d,\n" " \"%s\",\n" " %s,\n" " %s\n" " },\n", fine[i].enca, fine[i].rfc1345, fine[i].cstocs, fine[i].iconv, fine[i].mime, fine[i].human, fine[i].flags, fine[i].nsurface); } puts("};\n"); puts("/* THIS IS A GENERATED TABLE, see tools/make_hash.c. */"); puts("static const char *ALIAS_LIST[] = {"); for (i = 0; i < nn; i++) printf(" \"%s\",\n", alist[i]); puts("};\n"); puts("/* THIS IS A GENERATED TABLE, see tools/make_hash.c. */"); puts("static const int INDEX_LIST[] = {"); for (i = 0; i < nn; i++) { if (i%16 == 0) printf(" "); printf("%2d, ", ilist[i]); if (i%16 == 15 || i == nn-1) printf("\n"); } puts("};\n"); } int main(void) { EncaCharsetRaw *raw; EncaCharsetFine *fine; char **alist; int *ilist; int ncs, nn; raw = read_raw_charset_data(stdin, &ncs); alist = build_alias_list(raw, ncs, &nn); fine = refine_data(raw, ncs, alist, nn); ilist = create_index_list(raw, ncs, alist, nn); print_fine_data(fine, ncs, ilist, alist, nn); return 0; }