diff options
author | Thomas Roessler <roessler@does-not-exist.org> | 2000-05-09 15:19:26 +0000 |
---|---|---|
committer | Thomas Roessler <roessler@does-not-exist.org> | 2000-05-09 15:19:26 +0000 |
commit | 3de049372c8ca458e096b8c8ece2d4f95da0fe82 (patch) | |
tree | bedf3c7648d8815163ff60f0a557a554fdc32545 | |
parent | ee09e5f42399a6016f3e96dcbd03bab1effa07ff (diff) |
Edmund Grimley Evans' UTF-8 patch.
-rw-r--r-- | Makefile.am | 17 | ||||
-rw-r--r-- | VERSION | 2 | ||||
-rw-r--r-- | charset.c | 1120 | ||||
-rw-r--r-- | charset.h | 58 | ||||
-rw-r--r-- | commands.c | 12 | ||||
-rw-r--r-- | compose.c | 41 | ||||
-rw-r--r-- | configure.in | 42 | ||||
-rw-r--r-- | curs_lib.c | 149 | ||||
-rw-r--r-- | curs_main.c | 2 | ||||
-rw-r--r-- | enter.c | 7 | ||||
-rw-r--r-- | gettext.c | 167 | ||||
-rw-r--r-- | gnupgparse.c | 38 | ||||
-rw-r--r-- | hdrline.c | 92 | ||||
-rw-r--r-- | help.c | 80 | ||||
-rw-r--r-- | iconv/Makefile.am | 10 | ||||
-rw-r--r-- | iconv/iconv.c | 899 | ||||
-rw-r--r-- | iconv/iconv.h | 12 | ||||
-rw-r--r-- | init.c | 2 | ||||
-rw-r--r-- | lib.h | 3 | ||||
-rw-r--r-- | mbyte.c | 167 | ||||
-rw-r--r-- | mbyte.h | 30 | ||||
-rw-r--r-- | menu.c | 60 | ||||
-rw-r--r-- | muttlib.c | 2 | ||||
-rw-r--r-- | pager.c | 318 | ||||
-rw-r--r-- | pgppubring.c | 11 | ||||
-rw-r--r-- | protos.h | 11 | ||||
-rw-r--r-- | rfc2047.c | 20 | ||||
-rw-r--r-- | rfc2231.c | 11 | ||||
-rw-r--r-- | sendlib.c | 50 | ||||
-rw-r--r-- | utf8.c | 66 | ||||
-rw-r--r-- | wcwidth.c | 134 |
31 files changed, 2334 insertions, 1299 deletions
diff --git a/Makefile.am b/Makefile.am index e751ced4..a58f7a40 100644 --- a/Makefile.am +++ b/Makefile.am @@ -13,7 +13,12 @@ IMAP_SUBDIR = imap IMAP_INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/imap endif -SUBDIRS = m4 po intl doc contrib $(CHARMAP_SUBDIR) $(IMAP_SUBDIR) +if BUILD_ICONV +ICONV_SUBDIR = iconv +ICONV_INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/iconv +endif + +SUBDIRS = m4 po intl doc contrib $(CHARMAP_SUBDIR) $(IMAP_SUBDIR) $(ICONV_SUBDIR) if NEEDS_PGPEWRAP bin_SCRIPTS = pgpewrap muttbug @@ -34,12 +39,13 @@ mutt_SOURCES = $(BUILT_SOURCES) \ rfc822.c rfc1524.c rfc2047.c rfc2231.c \ score.c send.c sendlib.c signal.c sort.c \ status.c system.c thread.c charset.c history.c lib.c \ - muttlib.c editmsg.c + muttlib.c editmsg.c utf8.c mbyte.c wcwidth.c gettext.c mutt_LDADD = @MUTT_LIB_OBJECTS@ @LIBOBJS@ $(LIBIMAP) $(MUTTLIBS) \ - $(INTLLIBS) + $(INTLLIBS) $(LIBICONV) -mutt_DEPENDENCIES = @MUTT_LIB_OBJECTS@ @LIBOBJS@ $(LIBIMAPDEPS) $(INTLDEPS) +mutt_DEPENDENCIES = @MUTT_LIB_OBJECTS@ @LIBOBJS@ $(LIBIMAPDEPS) \ + $(INTLDEPS) $(LIBICONVDEPS) makedoc_SOURCES = makedoc.c @@ -49,7 +55,8 @@ DEFS=-DSHAREDIR=\"$(sharedir)\" -DSYSCONFDIR=\"$(sysconfdir)\" \ -DBINDIR=\"$(bindir)\" -DHAVE_CONFIG_H=1 # top_srcdir is for building outside of the source tree -INCLUDES=-I$(top_srcdir) -I. $(IMAP_INCLUDES) -Iintl -I$(includedir) +INCLUDES=-I$(top_srcdir) -I. $(IMAP_INCLUDES) $(ICONV_INCLUDES) \ + -Iintl -I$(includedir) non_us_sources = pgp.c pgpinvoke.c pgpkey.c pgplib.c sha1dgst.c \ gnupgparse.c sha.h sha_locl.h \ @@ -1 +1 @@ -1.1.14 +1.3.0 @@ -19,13 +19,8 @@ * Boston, MA 02111, USA. */ -/* - * This module deals with POSIX.2 character set definition files. - */ - #include <string.h> -#include <ctype.h> #include <stdio.h> #include <stdlib.h> @@ -34,49 +29,17 @@ #include <unistd.h> #include <errno.h> +#include <iconv.h> + #include "mutt.h" #include "charset.h" -/* Define this if you want any dprint () statements in this code */ - -#undef CHARSET_DEBUG - -#ifndef CHARSET_DEBUG -# undef dprint -# define dprint(a, b) (void) a +#ifndef EILSEQ +#define EILSEQ EINVAL #endif -/* Module-global variables */ - -static HASH *Translations = NULL; -static HASH *Charsets = NULL; -static HASH *CharsetAliases = NULL; - -/* Function Prototypes */ - -static CHARDESC *chardesc_new (void); -static CHARDESC *repr2descr (int repr, CHARSET * cs); - -static CHARMAP *charmap_new (void); -static CHARMAP *parse_charmap_header (FILE * fp); -static CHARSET *charset_new (size_t hash_size); - -static CHARSET_MAP *build_translation (CHARSET * from, CHARSET * to); - -static char translate_character (CHARSET * to, const char *symbol); - -static int load_charset (const char *filename, CHARSET ** csp, short multbyte); -static int parse_charmap_line (char *line, CHARMAP * m, CHARDESC ** descrp); -static int _cd_compar (const void *a, const void *b); - -static void canonical_charset (char *dest, size_t dlen, const char *name); -static void chardesc_free (CHARDESC ** cdp); -static void charmap_free (CHARMAP ** cp); -static void charset_free (CHARSET ** csp); -static void fix_symbol (char *symbol, CHARMAP * m); - -static void canonical_charset (char *dest, size_t dlen, const char *name) +void mutt_canonical_charset (char *dest, size_t dlen, const char *name) { size_t i; @@ -87,785 +50,113 @@ static void canonical_charset (char *dest, size_t dlen, const char *name) { if (strchr ("_/. ", name[i])) dest[i] = '-'; + else if ('A' <= name[i] && name[i] <= 'Z') + dest[i] = name[i] - 'A' + 'a'; else - dest[i] = tolower (name[i]); + dest[i] = name[i]; } dest[i] = '\0'; } -static CHARSET *charset_new (size_t hash_size) +int mutt_is_utf8 (const char *s) { - CHARSET *cp = safe_malloc (sizeof (CHARSET)); - size_t i; - - cp->n_symb = 256; - cp->u_symb = 0; - cp->multbyte = 1; - cp->symb_to_repr = hash_create (hash_size); - cp->description = safe_malloc (cp->n_symb * sizeof (CHARDESC *)); - - for (i = 0; i < cp->n_symb; i++) - cp->description[i] = NULL; - - return cp; -} - -static void charset_free (CHARSET ** csp) -{ - CHARSET *cs = *csp; - size_t i; - - for (i = 0; i < cs->n_symb; i++) - chardesc_free (&cs->description[i]); - - safe_free ((void **) &cs->description); - - hash_destroy (&cs->symb_to_repr, NULL); - safe_free ((void **) csp); -} + char buffer[8]; -static CHARMAP *charmap_new (void) -{ - CHARMAP *m = safe_malloc (sizeof (CHARMAP)); - - m->charset = NULL; - m->escape_char = '\\'; - m->comment_char = '#'; - m->multbyte = 1; - m->aliases = NULL; - - return m; -} - -static void charmap_free (CHARMAP ** cp) -{ - if (!cp || !*cp) - return; - - mutt_free_list (&(*cp)->aliases); - safe_free ((void **) &(*cp)->charset); - safe_free ((void **) cp); - - return; -} - -static CHARDESC *chardesc_new (void) -{ - CHARDESC *p = safe_malloc (sizeof (CHARDESC)); - - p->symbol = NULL; - p->repr = -1; - - return p; -} - -static void chardesc_free (CHARDESC ** cdp) -{ - if (!cdp || !*cdp) - return; - - - safe_free ((void **) &(*cdp)->symbol); - safe_free ((void **) cdp); + if (!s) + return 0; - return; + mutt_canonical_charset (buffer, sizeof (buffer), s); + return !mutt_strcmp (buffer, "utf-8"); } -static CHARMAP *parse_charmap_header (FILE * fp) -{ - char buffer[1024]; - char *t, *u; - CHARMAP *m = charmap_new (); - - while (fgets (buffer, sizeof (buffer), fp)) - { - if ((t = strchr (buffer, '\n'))) - *t = '\0'; - else - { - charmap_free (&m); - return NULL; - } - - if (!strncmp (buffer, "CHARMAP", 7)) - break; - - if (*buffer == m->comment_char) - { - if ((t = strtok (buffer + 1, "\t ")) && !strcasecmp (t, "alias")) - { - char _tmp[SHORT_STRING]; - while ((t = strtok(NULL, "\t, "))) - { - canonical_charset (_tmp, sizeof (_tmp), t); - m->aliases = mutt_add_list (m->aliases, _tmp); - } - } - continue; - } - - if (!(t = strtok (buffer, "\t "))) - continue; - - if (!(u = strtok (NULL, "\t "))) - { - charmap_free (&m); - return NULL; - } - if (!strcmp (t, "<code_set_name>")) - { - safe_free ((void **) &m->charset); - canonical_charset (u, strlen (u) + 1, u); - m->charset = safe_strdup (u); - } - else if (!strcmp (t, "<comment_char>")) - { - m->comment_char = *u; - } - else if (!strcmp (t, "<escape_char>")) - { - m->escape_char = *u; - } - else if (!strcmp (t, "<mb_cur_max>")) - { - m->multbyte = strtol (u, NULL, 0); - } - } - - return m; -} - -/* Properly handle escape characters within a symbol. */ +/* + * Like iconv_open, but canonicalises the charsets + */ -static void fix_symbol (char *symbol, CHARMAP * m) +iconv_t mutt_iconv_open (const char *tocode, const char *fromcode) { - char *s, *d; + char tocode1[SHORT_STRING]; + char fromcode1[SHORT_STRING]; - for (s = symbol, d = symbol; *s; *d++ = *s++) - { - if (*s == m->escape_char && !*++s) - break; - } - - *d = *s; + mutt_canonical_charset (tocode1, sizeof (tocode1), tocode); + mutt_canonical_charset (fromcode1, sizeof (fromcode1), fromcode); + return iconv_open (tocode1, fromcode1); } -enum -{ - CL_DESCR, - CL_END, - CL_COMMENT, - CL_ERROR -}; - -static int parse_charmap_line (char *line, CHARMAP * m, CHARDESC ** descrp) -{ - char *t, *u; - short n; - CHARDESC *descr; - - if (*line == m->comment_char) - return CL_COMMENT; - - descr = *descrp = chardesc_new (); - - if (!strncmp (line, "END CHARMAP", 11)) - { - chardesc_free (descrp); - return CL_END; - } - - for (t = line; *t && isspace ((unsigned char) *t); t++) - ; - - if (*t++ != '<') - { - chardesc_free (descrp); - return CL_ERROR; - } - - for (u = t; *u && *u != '>'; u++) - { - if (*u == m->escape_char && u[1]) - u++; - } - - if (*u != '>') - { - chardesc_free (descrp); - return CL_ERROR; - } - - *u++ = '\0'; - descr->symbol = safe_strdup (t); - fix_symbol (descr->symbol, m); - - for (t = u; *t && isspace ((unsigned char) *t); t++) - ; - - for (u = t; *u && !isspace ((unsigned char) *u); u++) - ; - - *u++ = 0; - descr->repr = 0; - - for (n = 0; *t == m->escape_char && n < m->multbyte; n++) - { - switch (*++t) - { - case 'x': - descr->repr = descr->repr * 256 + strtol (++t, &t, 16); - break; - case 'd': - descr->repr = descr->repr * 256 + strtol (++t, &t, 10); - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - descr->repr = descr->repr * 256 + strtol (t, &t, 8); - break; - default: - chardesc_free (descrp); - return CL_ERROR; - } - } - - if (!n) - { - chardesc_free (descrp); - return CL_ERROR; - } - - return CL_DESCR; -} - -static int _cd_compar (const void *a, const void *b) -{ - const CHARDESC *ap, *bp; - int i; - - ap = * (CHARDESC **) a; - bp = * (CHARDESC **) b; - - i = ap->repr - bp->repr; - - dprint (98, (debugfile, "_cd_compar: { %x, %s }, { %x, %s } -> %d\n", - ap->repr, ap->symbol, bp->repr, bp->symbol, i)); - - return i; -} /* - * Load a character set description into memory. - * - * The multibyte parameter tells us whether we are going - * to accept multibyte character sets. + * Like iconv, but keeps going even when the input is invalid + * If you're supplying inrepls, the source charset should be stateless; + * if you're supplying an outrepl, the target charset should be. */ -static int load_charset (const char *filename, CHARSET ** csp, short multbyte) +size_t mutt_iconv (iconv_t cd, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft, + const char **inrepls, const char *outrepl) { - CHARDESC *cd = NULL; - CHARSET *cs = NULL; - CHARMAP *m = NULL; - FILE *fp; - char buffer[1024]; - int i; - int rv = -1; - - cs = *csp = charset_new (multbyte ? 1031 : 257); + size_t ret = 0, ret1; + const char *ib = *inbuf; + size_t ibl = *inbytesleft; + char *ob = *outbuf; + size_t obl = *outbytesleft; - dprint (2, (debugfile, "load_charset: Trying to open: %s\n", filename)); - - if ((fp = fopen (filename, "r")) == NULL) + for (;;) { - char _filename[_POSIX_PATH_MAX]; - - snprintf (_filename, sizeof (_filename), "%s/%s", CHARMAPS_DIR, filename); - dprint (2, (debugfile, "load_charset: Trying to open: %s\n", _filename)); - - if ((fp = fopen (_filename, "r")) == NULL) + ret1 = iconv (cd, &ib, &ibl, &ob, &obl); + if (ret1 != (size_t)-1) + ret += ret1; + if (ibl && obl && errno == EILSEQ) { - dprint (2, (debugfile, "load_charset: Failed.\n")); - goto bail; - } - } - - if ((m = parse_charmap_header (fp)) == NULL) - goto bail; - - /* Don't handle multibyte character sets unless explicitly requested - * to do so. - */ - - if (m->multbyte > 1 && !multbyte) - { - dprint (2, (debugfile, "load_charset: m->multbyte == %d\n", - (int) m->multbyte)); - goto bail; - } - - cs->multbyte = m->multbyte; - - while (fgets (buffer, sizeof (buffer), fp) != NULL) - { - i = parse_charmap_line (buffer, m, &cd); - - if (i == CL_END) - break; - else if (i == CL_DESCR) - { - dprint (5, (debugfile, "load_charset: Got character description: <%s> -> %x\n", - cd->symbol, cd->repr)); - - if (!multbyte) + if (inrepls) { - if (0 <= cd->repr && cd->repr < 256) + /* Try replacing the input */ + const char **t; + for (t = inrepls; *t; t++) { - hash_delete (cs->symb_to_repr, cd->symbol, NULL, NULL); - hash_insert (cs->symb_to_repr, cd->symbol, cd, 0); - - /* note: we intentionally leak some memory here. */ - if (!cs->description[cd->repr]) - cs->u_symb++; - - cs->description[cd->repr] = cd; - cd = NULL; - } - } - else - { - if (cs->u_symb == cs->n_symb) - { - size_t new_size = cs->n_symb + 256; - size_t i; - - safe_realloc ((void **) &cs->description, new_size * sizeof (CHARDESC *)); - for (i = cs->u_symb; i < new_size; i++) - cs->description[i] = NULL; - cs->n_symb = new_size; + const char *ib1 = *t; + size_t ibl1 = strlen (*t); + char *ob1 = ob; + size_t obl1 = obl; + iconv (cd, &ib1, &ibl1, &ob1, &obl1); + if (!ibl1) + { + ++ib, --ibl; + ob = ob1, obl = obl1; + ++ret; + break; + } } - - hash_delete (cs->symb_to_repr, cd->symbol, NULL, NULL); - hash_insert (cs->symb_to_repr, cd->symbol, cd, 0); - - cs->description[cs->u_symb++] = cd; - cd = NULL; + if (*t) + continue; } - } - - if (cd) - { - dprint (5, (debugfile, "load_charset: character description still present: <%s>->%x\n", - cd->symbol, cd->repr)); - } - chardesc_free (&cd); - } - - if (multbyte) - qsort (cs->description, cs->u_symb, sizeof (CHARDESC *), _cd_compar); - - rv = 0; - -bail: - charmap_free (&m); - if (fp) - fclose (fp); - if (rv) - charset_free (csp); - - return rv; -} - -static CHARDESC *repr2descr (int repr, CHARSET * cs) -{ - CHARDESC *key; - CHARDESC **r; - - if (!cs || repr < 0) - return NULL; - - if (cs->multbyte == 1) - { - if (repr < 256) - return cs->description[repr]; - else - return NULL; - } - - key = safe_malloc (sizeof(CHARDESC)); - key->repr = repr; - key->symbol = "<unknown>"; /* otherwise, the - * debug code may - * segfault. ouch. - */ - - r = bsearch (&key, cs->description, cs->u_symb, - sizeof (CHARDESC *), _cd_compar); - - safe_free ((void **) &key); - - if (r) return *r; - - return NULL; -} - -/* Build a translation table. If a character cannot be - * translated correctly, we try to find an approximation - * from the portable charcter set. - * - * Note that this implies the assumption that the portable - * character set can be used without any conversion. - * - * Should be safe on POSIX systems. - */ - -static char translate_character (CHARSET * to, const char *symbol) -{ - CHARDESC *cdt; - - if ((cdt = hash_find (to->symb_to_repr, symbol))) - return (char) cdt->repr; - else - return *symbol; -} - -static CHARSET_MAP *build_translation (CHARSET * from, CHARSET * to) -{ - int i; - CHARSET_MAP *map; - CHARDESC *cd; - - /* This is for 8-bit character sets. */ - - if (!from || !to || from->multbyte > 1 || to->multbyte > 1) - return NULL; - - map = safe_malloc (sizeof (CHARSET_MAP)); - for (i = 0; i < 256; i++) - { - if (!(cd = repr2descr (i, from))) - (*map)[i] = '?'; - else - (*map)[i] = translate_character (to, cd->symbol); - } - - return map; -} - -/* Currently, just scan the various charset definition files. - * On the long run, we should cache this stuff in a file. - */ - -static HASH *load_charset_aliases (void) -{ - HASH *charset_aliases; - CHARMAP *m; - DIR *dp; - FILE *fp; - struct dirent *de; - - if ((dp = opendir (CHARMAPS_DIR)) == NULL) - return NULL; - - charset_aliases = hash_create(127); - - while ((de = readdir (dp))) - { - char fnbuff[_POSIX_PATH_MAX]; - - if (*de->d_name == '.') - continue; - - snprintf (fnbuff, sizeof (fnbuff), "%s/%s", CHARMAPS_DIR, de->d_name); - dprint (2, (debugfile, "load_charset_aliases: Opening %s\n", fnbuff)); - if ((fp = fopen (fnbuff, "r")) == NULL) - continue; - - if ((m = parse_charmap_header (fp)) != NULL) - { - LIST *lp; - char buffer[LONG_STRING]; - - canonical_charset (buffer, sizeof (buffer), de->d_name); - m->aliases = mutt_add_list (m->aliases, buffer); - - if (m->charset) - m->aliases = mutt_add_list (m->aliases, m->charset); - - for (lp = m->aliases; lp; lp = lp->next) + if (outrepl) { - if (lp->data) + /* Try replacing the output */ + int n = strlen (outrepl); + if (n <= obl) { - dprint (2, (debugfile, "load_charset_aliases: %s -> %s\n", - lp->data, de->d_name)); - if (hash_find (charset_aliases, lp->data)) - { - dprint (2, (debugfile, "load_charset_aliases: %s already mapped.\n", - lp->data)); - } - else - hash_insert (charset_aliases, safe_strdup (lp->data), safe_strdup (de->d_name), 0); + memcpy (ob, outrepl, n); + ++ib, --ibl; + ob += n, obl -= n; + ++ret; + continue; } } - - charmap_free (&m); - } - - fclose (fp); - } - - closedir (dp); - return charset_aliases; -} - -static void init_charsets () -{ - if (Charsets) return; - - Charsets = hash_create (127); - Translations = hash_create (127); - CharsetAliases = load_charset_aliases (); -} - -CHARSET *mutt_get_charset (const char *name) -{ - CHARSET *charset; - char buffer[SHORT_STRING]; - char *real_charset; - char *hooked; - - if (!name || !*name) - return (NULL); - - init_charsets(); - canonical_charset (buffer, sizeof(buffer), name); - - /* needs to be documented */ - - if ((hooked = mutt_charset_hook (buffer))) - canonical_charset (buffer, sizeof (buffer), hooked); - - dprint (2, (debugfile, "mutt_get_charset: Looking for %s\n", buffer)); - - if(!CharsetAliases || !(real_charset = hash_find(CharsetAliases, buffer))) - real_charset = buffer; - - dprint (2, (debugfile, "mutt_get_charset: maps to: %s\n", real_charset)); - - if(!(charset = hash_find (Charsets, real_charset))) - { - dprint (2, (debugfile, "mutt_get_charset: Need to load.\n")); - if (load_charset(real_charset, &charset, 0) == 0) - hash_insert(Charsets, safe_strdup(real_charset), charset, 1); - else - charset = NULL; - } - return charset; -} - -CHARSET_MAP *mutt_get_translation(const char *_from, const char *_to) -{ - char from_canon[SHORT_STRING]; - char to_canon[SHORT_STRING]; - char key[SHORT_STRING]; - char *from, *to; - CHARSET *from_cs, *to_cs; - CHARSET_MAP *map; - - if(!_from || !_to) - return NULL; - - canonical_charset(from_canon, sizeof(from_canon), _from); - canonical_charset(to_canon, sizeof(to_canon), _to); - - /* quick check for some trivial cases. Doing this before - * we actually call the initialization routine delays character - * set loading until it's _really_ needed. - */ - - if(!strcmp(from_canon, to_canon) - || (!strcmp (from_canon, "us-ascii") && !strncmp (to_canon, "iso-8859", 8))) - return NULL; - - init_charsets(); - - if(!CharsetAliases || !(from = hash_find(CharsetAliases, from_canon))) - from = from_canon; - if(!CharsetAliases || !(to = hash_find(CharsetAliases, to_canon))) - to = to_canon; - - /* quick check for the identity mapping */ - if((from == to) || !mutt_strcmp(from, to)) - return NULL; - - snprintf(key, sizeof(key), "%s %s", from, to); - if((map = hash_find(Translations, key)) == NULL) - { - from_cs = mutt_get_charset(from); - to_cs = mutt_get_charset(to); - - if((map = build_translation(from_cs, to_cs))) - hash_insert(Translations, safe_strdup(key), map, 1); - } - return map; -} - -unsigned char mutt_display_char(unsigned char ch, CHARSET_MAP *map) -{ - if (!map || !ch) - return ch; - - return (unsigned char) (*map)[ch]; -} - -int mutt_display_string(char *str, CHARSET_MAP *map) -{ - if(!map) - return -1; - - while ((*str = mutt_display_char((unsigned char)*str, map))) - str++; - - return 0; -} - -/*************************************************************/ -/* UTF-8 support */ - -int mutt_is_utf8(const char *s) -{ - char buffer[SHORT_STRING]; - - if(!s) - return 0; - - canonical_charset(buffer, sizeof(buffer), s); - return !mutt_strcmp(buffer, "utf-8"); -} - -/* macros for the various bit maps we need */ - -#define IOOOOOOO 0x80 -#define IIOOOOOO 0xc0 -#define IIIOOOOO 0xe0 -#define IIIIOOOO 0xf0 -#define IIIIIOOO 0xf8 -#define IIIIIIOO 0xfc -#define IIIIIIIO 0xfe -#define IIIIIIII 0xff - -static struct unicode_mask -{ - int mask; - int value; - short len; -} -unicode_masks[] = -{ - { IOOOOOOO, 0, 1 }, - { IIIOOOOO, IIOOOOOO, 2 }, - { IIIIOOOO, IIIOOOOO, 3 }, - { IIIIIOOO, IIIIOOOO, 4 }, - { IIIIIIOO, IIIIIOOO, 5 }, - { IIIIIIIO, IIIIIIOO, 6 }, - { 0, 0, 0 } -}; - - -static char *utf_to_unicode(int *out, char *in) -{ - struct unicode_mask *um = NULL; - short i; - - for(i = 0; unicode_masks[i].mask; i++) - { - if((*in & unicode_masks[i].mask) == unicode_masks[i].value) - { - um = &unicode_masks[i]; - break; - } - } - - if(!um) - { - *out = (int) '?'; - return in + 1; - } - - for(i = 1; i < um->len; i++) - { - if((in[i] & IIOOOOOO) != IOOOOOOO) - { - *out = (int) '?'; - return in + i; } + *inbuf = ib, *inbytesleft = ibl; + *outbuf = ob, *outbytesleft = obl; + return ret; } - - *out = ((int)in[0]) & ~um->mask & 0xff; - for(i = 1; i < um->len; i++) - *out = (*out << 6) | (((int)in[i]) & ~IIOOOOOO & 0xff); - - if(!*out) - *out = '?'; - - return in + um->len; -} - -static CHARSET *Unicode = NULL; - -static int unicode_init (void) -{ - if (!Unicode) - { - if (load_charset ("ISO_10646", &Unicode, 1) == -1) - Unicode = NULL; - } - - return (Unicode == NULL ? -1 : 0); -} - -void mutt_decode_utf8_string(char *str, CHARSET *chs) -{ - char *s, *t; - CHARDESC *cd; - int ch; - - (void) unicode_init (); - - for (s = t = str; *t; s++) - { - t = utf_to_unicode(&ch, t); - - /* handle us-ascii characters directly */ - if (0 <= ch && ch < 128) - *s = ch; - else if ((cd = repr2descr (ch, Unicode)) && (ch = translate_character (chs, cd->symbol)) != -1) - *s = ch; - else - *s = '?'; - - if(!*s) *s = '?'; - } - - *s = '\0'; } - - /************************************************************* * General decoder framework + * Used in handler.c for converting to mutt's Charset */ - - #define MIN(a,b) (((a) <= (b)) ? (a): (b)) DECODER *mutt_open_decoder (const char *src, const char *dest) @@ -875,34 +166,16 @@ DECODER *mutt_open_decoder (const char *src, const char *dest) d->in.size = DECODER_BUFFSIZE; d->out.size = DECODER_BUFFSIZE; - d->_in = &d->in; - - if (!src || !dest || mutt_is_utf8 (dest)) + if (dest && src && (d->cd = mutt_iconv_open (dest, src)) != (iconv_t)-1) { - d->just_take_id = 1; - d->_in = &d->out; - return d; - } - - if (mutt_is_utf8 (src)) - { - if (!(d->chs = mutt_get_charset (dest)) || unicode_init () == -1) - { - d->just_take_id = 1; - d->_in = &d->out; - return d; - } - - d->src_is_utf8 = 1; - return d; + d->_in = &d->in; + d->outrepl = mutt_is_utf8 (dest) ? "\357\277\275" : "?"; } - - if (!(d->chm = mutt_get_translation (src, dest))) + else { d->just_take_id = 1; d->_in = &d->out; } - return d; } @@ -911,7 +184,23 @@ void mutt_free_decoder (DECODER **dpp) safe_free ((void **) dpp); } -static void _process_data (DECODER *, short); +static void _process_data (DECODER *d, short force) +{ + if (force) d->forced = 1; + + if (!d->just_take_id) + { + const char *ib = d->in.buff; + size_t ibl = d->in.used; + char *ob = d->out.buff + d->out.used; + size_t obl = d->out.size - d->out.used; + + mutt_iconv (d->cd, &ib, &ibl, &ob, &obl, 0, d->outrepl); + memmove (d->in.buff, ib, ibl); + d->in.used = ibl; + d->out.used = d->out.size - obl; + } +} void mutt_decoder_push (DECODER *d, void *_buff, size_t blen, size_t *taken) { @@ -977,75 +266,6 @@ void mutt_decoder_pop_to_state (DECODER *d, STATE *s) } } -/* this is where things actually happen */ - -static void _process_data_8bit (DECODER *d) -{ |