aboutsummaryrefslogtreecommitdiff
path: root/lib/enca/src/convert.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/enca/src/convert.c')
-rw-r--r--lib/enca/src/convert.c575
1 files changed, 0 insertions, 575 deletions
diff --git a/lib/enca/src/convert.c b/lib/enca/src/convert.c
deleted file mode 100644
index 51a2b177c4..0000000000
--- a/lib/enca/src/convert.c
+++ /dev/null
@@ -1,575 +0,0 @@
-/*
- @(#) $Id: convert.c,v 1.28 2005/12/01 10:08:53 yeti Exp $
- conversion to other encodings
-
- Copyright (C) 2000-2003 David Necas (Yeti) <yeti@physics.muni.cz>
-
- This program is free software; you can redistribute it and/or modify it
- under the terms of version 2 of the GNU General Public License as published
- by the Free Software Foundation.
-
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation, Inc.,
- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
-*/
-#include "common.h"
-
-#ifdef HAVE_SYS_WAIT_H
-# include <sys/wait.h>
-#else
-pid_t waitpid(pid_t pid, int *status, int options);
-#endif
-
-/* We can't go on w/o this, defining struct stat manually is braindamaged. */
-#include <sys/types.h>
-#include <sys/stat.h>
-
-/* converter flags */
-#define CONV_EXTERN 0x0001
-
-/* converter-type (filename, input encoding, output encoding) */
-typedef int (* ConverterFunc)(File*, EncaEncoding);
-
-/* struct converter data */
-typedef struct _ConverterData ConverterData;
-
-struct _ConverterData {
- unsigned long int flags; /* flags */
- ConverterFunc convfunc; /* pointer to converter function */
-};
-
-/* struct converter list */
-typedef struct _Converter Converter;
-
-struct _Converter {
- const Abbreviation *conv; /* the converter (an abbreviation table entry) */
- Converter *next; /* next in the list */
-};
-
-/* converter list */
-static Converter *converters = NULL;
-
-/* data for xtable */
-static struct {
- size_t ncharsets; /* number of charsets */
- int *charsets; /* charset id's for active language [ncharsets] */
- byte *tables; /* tables from charsets to target_charset [ncharsets * 0x100] */
- int *have_table; /* whether particular table is already cached [ncharsets] */
- unsigned int *ucs2_map; /* temporary space for map computation [0x10000] */
- unsigned int target_map[0x100];
-}
-xdata = { 0, NULL, NULL, NULL, NULL, { 0 } };
-
-/* Local prototypes. */
-static int convert_builtin (File *file,
- EncaEncoding from_enc);
-static const byte* xtable (int from_charset);
-static void xdata_free (void);
-
-static const ConverterData cdata_builtin = { 0, &convert_builtin };
-#ifdef HAVE_LIBRECODE
-static const ConverterData cdata_librecode = { 0, &convert_recode };
-#endif /* HAVE_LIBRECODE */
-#ifdef HAVE_GOOD_ICONV
-static const ConverterData cdata_iconv = { 0, &convert_iconv };
-#endif /* HAVE_GOOD_ICONV */
-#ifdef ENABLE_EXTERNAL
-static const ConverterData cdata_extern = { CONV_EXTERN, &convert_external };
-#endif /* ENABLE_EXTERNAL */
-
-static const Abbreviation CONVERTERS[] = {
- { "built-in", &cdata_builtin },
-#ifdef HAVE_LIBRECODE
- { "librecode", &cdata_librecode },
-#endif /* HAVE_LIBRECODE */
-#ifdef HAVE_GOOD_ICONV
- { "iconv", &cdata_iconv },
-#endif /* HAVE_GOOD_ICONV */
-#ifdef ENABLE_EXTERNAL
- { "extern", &cdata_extern }
-#endif /* ENABLE_EXTERNAL */
-};
-
-/* decide which converter should be run and do common checks
- from_enc, to_enc are current and requested encoding
- returns error code
-
- it doesn't open the file (guess() did it) and doesn't close it (caller does
- it) */
-int
-convert(File *file,
- EncaEncoding from_enc)
-{
- Converter *conv;
- int extern_failed = 0;
- int err;
-
- if (options.verbosity_level) {
- fprintf(stderr, "%s: converting `%s': %s\n",
- program_name, ffname_r(file->name),
- format_request_string(from_enc, options.target_enc, 0));
- }
-
- /* do nothing when requested encoding is current encoding
- (`nothing' may include copying stdin to stdout) */
- if (from_enc.charset == options.target_enc.charset
- && from_enc.surface == options.target_enc.surface) {
- if (file->name != NULL)
- return ERR_OK;
- else
- return copy_and_convert(file, file, NULL);
- }
-
- /* try sequentially all allowed converters until we find some that can
- perform the conversion or exahust the list */
- conv = converters;
- while (conv != NULL) {
- if (options.verbosity_level > 1) {
- fprintf(stderr, " trying to convert `%s' using %s\n",
- ffname_r(file->name), conv->conv->name);
- }
- err = ((ConverterData *)conv->conv->data)->convfunc(file, from_enc);
- if (err == ERR_OK)
- return ERR_OK;
-
- if ((((ConverterData *)conv->conv->data)->flags & CONV_EXTERN) != 0) {
- fprintf(stderr, "%s: external converter failed on `%s', "
- "probably destroying it\n",
- program_name, ffname_w(file->name));
- extern_failed = 1;
- }
- /* don't tempt fate in case of i/o or other serious problems */
- if (err != ERR_CANNOT)
- return ERR_IOFAIL;
-
- conv = conv->next;
- }
-
- /* no converter able/allowed to perform given conversion, that's bad */
- fprintf(stderr, "%s: no converter is able/allowed to perform "
- "conversion %s on file `%s'\n",
- program_name,
- format_request_string(from_enc, options.target_enc, 0),
- ffname_r(file->name));
-
- /* nevertheless stdin should be copied to stdout anyway it cannot make
- more mess */
- if (file->name == NULL)
- copy_and_convert(file, file, NULL);
-
- return ERR_CANNOT;
-}
-
-/* built-in converter
- performs conversion by in place modification of file named fname
- or by calling copy_and_convert() for stdin -> stdout conversion
- returns zero on success, error code otherwise */
-static int
-convert_builtin(File *file,
- EncaEncoding from_enc)
-{
- static int ascii = ENCA_CS_UNKNOWN;
-
- Buffer *buf; /* file->buffer alias */
- const byte *xlat; /* conversion table */
-
- if (!enca_charset_is_known(ascii)) {
- ascii = enca_name_to_charset("ascii");
- assert(enca_charset_is_known(ascii));
- }
-
- /* surfaces can cause fail iff user specificaly requested some
- * or when they are other type than EOLs */
- {
- EncaSurface srf = options.target_enc.surface ^ from_enc.surface;
-
- if ((options.target_enc.surface
- && from_enc.surface != options.target_enc.surface)
- || srf != (srf & ENCA_SURFACE_MASK_EOL)) {
- if (options.verbosity_level > 2)
- fprintf(stderr, "%s: built-in: cannot convert between "
- "different surfaces\n",
- program_name);
- return ERR_CANNOT;
- }
- }
-
- /* catch trivial conversions */
- {
- int identity = 0;
-
- if (from_enc.charset == options.target_enc.charset)
- identity = 1;
-
- if (from_enc.charset == ascii
- && enca_charset_is_8bit(options.target_enc.charset)
- && !enca_charset_is_binary(options.target_enc.charset))
- identity = 1;
-
- if (identity) {
- if (file->name == NULL)
- return copy_and_convert(file, file, NULL);
- else
- return ERR_OK;
- }
- }
-
- xlat = xtable(from_enc.charset);
- if (xlat == NULL)
- return ERR_CANNOT;
-
- if (file->name == NULL)
- return copy_and_convert(file, file, xlat);
-
- /* read buffer_size bytes, convert, write back, etc. to death (or eof,
- whichever come first) */
- buf = file->buffer;
- buf->pos = 0;
- file_seek(file, 0, SEEK_SET);
-
- do {
- if (file_read(file) == -1)
- return ERR_IOFAIL;
-
- if (buf->pos == 0)
- break;
-
- {
- size_t len = buf->pos;
- byte *p = buf->data;
- do {
- *p = xlat[*p];
- p++;
- } while (--len);
- }
-
- if (file_seek(file, -(buf->pos), SEEK_CUR) == -1)
- return ERR_IOFAIL;
-
- if (file_write(file) == -1)
- return ERR_IOFAIL;
-
- /* XXX: apparent no-op
- but ISO C requires fseek() or ftell() between subsequent fwrite() and
- fread(), or else the latter _may_ read nonsense -- and it actually does
- read nonsense with glibc-2.2 (at least); see fopen(3) */
- if (file_seek(file, 0, SEEK_CUR) == -1)
- return ERR_IOFAIL;
-
- } while (1);
-
- return ERR_OK;
-}
-
-/* copy file file_from to file file_to, optionally performing xlat conversion
- (if not NULL)
- file_from has to be already opened for reading,
- file_to has to be already opened for writing
- they have to share common buffer
- returns 0 on success, nonzero on failure */
-int
-copy_and_convert(File *file_from, File *file_to, const byte *xlat)
-{
- Buffer *buf; /* file_from->buffer alias */
-
- if (xlat == NULL && options.verbosity_level > 3)
- fprintf(stderr, " copying `%s' to `%s'\n",
- ffname_r(file_from->name),
- ffname_w(file_to->name));
-
- assert(file_from->buffer == file_to->buffer);
- buf = file_from->buffer;
- /* If there's something in the buffer, process it first. */
- if (file_from->buffer->pos != 0) {
- if (xlat != NULL) {
- size_t len = buf->pos;
- byte *p = buf->data;
- do {
- *p = xlat[*p];
- p++;
- } while (--len);
- }
- if (file_write(file_to) == -1)
- return ERR_IOFAIL;
- }
- /* Then copy the rest. */
- do {
- if (file_read(file_from) == -1)
- return ERR_IOFAIL;
-
- if (buf->pos == 0)
- break;
-
- if (xlat != NULL) {
- size_t len = buf->pos;
- byte *p = buf->data;
- do {
- *p = xlat[*p];
- p++;
- } while (--len);
- }
-
- if (file_write(file_to) == -1)
- return ERR_IOFAIL;
- } while (1);
- fflush(file_to->stream);
-
- return ERR_OK;
-}
-
-/* add converter to list of converters
- (note `none' adds nothing and causes removing of all converters instead)
- returns zero if everything went ok, nonzero otherwise */
-int
-add_converter(const char *cname)
-{
- /* no converters symbolic name */
- static const char *CONVERTER_NAME_NONE = "none";
-
- const Abbreviation *data;
- Converter *conv = NULL, *conv1;
-
- /* remove everything when we got `none' */
- if (strcmp(CONVERTER_NAME_NONE, cname) == 0) {
- if (options.verbosity_level > 3)
- fprintf(stderr, "Removing all converters\n");
- while (converters != NULL) {
- conv = converters->next;
- enca_free(converters);
- converters = conv;
- }
- return 0;
- }
-
- /* find converter data */
- data = expand_abbreviation(cname, CONVERTERS, ELEMENTS(CONVERTERS),
- "converter");
- if (data == NULL)
- return 1;
-
- /* add it to the end of converter list */
- if (options.verbosity_level > 3)
- fprintf(stderr, "Adding converter `%s'\n", data->name);
- if (converters == NULL)
- converters = conv = NEW(Converter, 1);
- else {
- for (conv1 = converters; conv1 != NULL; conv1 = conv1->next) {
- /* reject duplicities */
- if (data == conv1->conv->data) {
- fprintf(stderr, "%s: converter %s specified more than once\n",
- program_name,
- conv1->conv->name);
- return 1;
- }
- conv = conv1;
- }
-
- conv->next = NEW(Converter, 1);
- conv = conv->next;
- }
- conv->next = NULL;
- conv->conv = data;
-
- return 0;
-}
-
-/* return nonzero if the list contains external converter */
-int
-external_converter_listed(void)
-{
- Converter *conv;
-
- for (conv = converters; conv; conv = conv->next) {
- if (((ConverterData*)conv->conv->data)->flags & CONV_EXTERN)
- return 1;
- }
-
- return 0;
-}
-
-/* print white separated list of all valid converter names */
-void
-print_converter_list(void)
-{
- size_t i;
-
- for (i = 0; i < sizeof(CONVERTERS)/sizeof(Abbreviation); i++)
- printf("%s\n", CONVERTERS[i].name);
-}
-
-/* create and return request string for conversion from e1 to e2
- filters out natrual surfaces || mask
- is NOT thread-safe
- returned string must NOT be freed and must be cosidered volatile */
-const char*
-format_request_string(EncaEncoding e1,
- EncaEncoding e2,
- EncaSurface mask)
-{
- static char *s = NULL;
- char *p, *q;
- const char *e2_name, *e1_name;
-
- enca_free(s);
- /* build s sequentially since value returned by surface_name() is lost
- by the second call */
- e1_name = enca_charset_name(e1.charset, ENCA_NAME_STYLE_ENCA);
- p = enca_get_surface_name(e1.surface
- & ~(enca_charset_natural_surface(e1.charset)
- | mask),
- ENCA_NAME_STYLE_ENCA);
- if (!enca_charset_is_known(e2.charset)) {
- q = enca_strdup("");
- e2_name = options.target_enc_str;
- }
- else {
- q = enca_get_surface_name(e2.surface
- & ~(enca_charset_natural_surface(e2.charset)
- | mask),
- ENCA_NAME_STYLE_ENCA);
- e2_name = enca_charset_name(e2.charset, ENCA_NAME_STYLE_ENCA);
- }
-
- s = enca_strconcat(e1_name, p, "..", e2_name, q, NULL);
-
- enca_free(p);
- enca_free(q);
-
- return s;
-}
-
-/**
- * xtable:
- * @from_charset: Charset id for which the conversion table should be returned.
- *
- * Returns translation table from charset @from to (global) target charset.
- *
- * The returned table must be considered constant and must NOT be freed.
- *
- * Only conversion between charsets of one language is supported. We assume
- * a language contains all known charsets usable for represenation of texts,
- * so other charsets are taken as incompatible.
- *
- * Globals used: options.target_enc.charset, options.language.
- *
- * Returns: The conversion table [0x100]; #NULL on failure.
- **/
-static const byte*
-xtable(int from_charset)
-{
- static int xtable_initialized = 0;
-
- unsigned int from_map[0x100];
- size_t i;
- ssize_t fidx;
-
- if (!enca_charset_has_ucs2_map(options.target_enc.charset)
- || !enca_charset_has_ucs2_map(from_charset))
- return NULL;
-
- /* Initialize when we are called the first time. */
- if (!xtable_initialized) {
- /* Allocate various tables. Never freed. */
- xdata.charsets = enca_get_language_charsets(options.language,
- &xdata.ncharsets);
- assert(xdata.ncharsets > 1);
- xdata.have_table = NEW(int, xdata.ncharsets);
- xdata.tables = NEW(byte, 0x100*xdata.ncharsets);
- xdata.ucs2_map = NEW(unsigned int, 0x10000);
-
- for (i = 0; i < xdata.ncharsets; i++)
- xdata.have_table[i] = 0;
-
- /* Initialize tables to identity */
- for (i = 0; i < 0x100; i++)
- xdata.tables[i] = (byte)i;
- for (i = 1; i < xdata.ncharsets; i++)
- memcpy(xdata.tables + 0x100*i, xdata.tables, 0x100);
-
- /* Check whether target_charset belongs to given language */
- fidx = -1;
- for (i = 0; i < xdata.ncharsets; i++) {
- if (xdata.charsets[i] == options.target_enc.charset) {
- fidx = i;
- break;
- }
- }
- if (fidx < 0)
- return NULL;
-
- {
- int map_created;
- map_created = enca_charset_ucs2_map(options.target_enc.charset,
- xdata.target_map);
- assert(map_created);
- }
- atexit(xdata_free);
- }
-
- /* Check whether from_charset belongs to given language */
- fidx = -1;
- for (i = 0; i < xdata.ncharsets; i++) {
- if (xdata.charsets[i] == from_charset) {
- fidx = i;
- break;
- }
- }
- if (fidx < 0)
- return NULL;
-
- /* Return table if cached. */
- if (xdata.have_table[fidx])
- return xdata.tables + 0x100*fidx;
-
- /* Otherwise it must be generated */
- {
- int map_created;
- map_created = enca_charset_ucs2_map(from_charset, from_map);
- assert(map_created);
- }
-
- for (i = 0; i < 0x10000; i++)
- xdata.ucs2_map[i] = ENCA_NOT_A_CHAR;
-
- for (i = 0; i < 0x100; i++) {
- size_t j = 0xff - i;
-
- if (xdata.target_map[j] != ENCA_NOT_A_CHAR)
- xdata.ucs2_map[xdata.target_map[j]] = (unsigned int)j;
- }
-
- /* XXX XXX XXX XXX XXX Warning: Extreme brain damage! XXX XXX XXX XXX XXX
- * When converting to ibm866 we have to replace Belarussian/Ukrainian i/I
- * with Latin versions. I've been told everybody expect this. */
- if (options.target_enc.charset == enca_name_to_charset("ibm866")) {
- xdata.ucs2_map[0x0406] = (byte)'I';
- xdata.ucs2_map[0x0456] = (byte)'i';
- }
-
- for (i = 0; i < 0x100; i++) {
- size_t j = 0xff - i;
-
- if (from_map[j] != ENCA_NOT_A_CHAR
- && xdata.ucs2_map[from_map[j]] != ENCA_NOT_A_CHAR)
- xdata.tables[0x100*fidx + j] = (byte)xdata.ucs2_map[from_map[j]];
- }
-
- return xdata.tables + 0x100*fidx;
-}
-
-static void
-xdata_free(void)
-{
- enca_free(xdata.charsets);
- enca_free(xdata.tables);
- enca_free(xdata.have_table);
- enca_free(xdata.ucs2_map);
-}
-
-/* vim: ts=2
- */