summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Roessler <roessler@does-not-exist.org>2000-05-09 15:19:26 +0000
committerThomas Roessler <roessler@does-not-exist.org>2000-05-09 15:19:26 +0000
commit3de049372c8ca458e096b8c8ece2d4f95da0fe82 (patch)
treebedf3c7648d8815163ff60f0a557a554fdc32545
parentee09e5f42399a6016f3e96dcbd03bab1effa07ff (diff)
Edmund Grimley Evans' UTF-8 patch.
-rw-r--r--Makefile.am17
-rw-r--r--VERSION2
-rw-r--r--charset.c1120
-rw-r--r--charset.h58
-rw-r--r--commands.c12
-rw-r--r--compose.c41
-rw-r--r--configure.in42
-rw-r--r--curs_lib.c149
-rw-r--r--curs_main.c2
-rw-r--r--enter.c7
-rw-r--r--gettext.c167
-rw-r--r--gnupgparse.c38
-rw-r--r--hdrline.c92
-rw-r--r--help.c80
-rw-r--r--iconv/Makefile.am10
-rw-r--r--iconv/iconv.c899
-rw-r--r--iconv/iconv.h12
-rw-r--r--init.c2
-rw-r--r--lib.h3
-rw-r--r--mbyte.c167
-rw-r--r--mbyte.h30
-rw-r--r--menu.c60
-rw-r--r--muttlib.c2
-rw-r--r--pager.c318
-rw-r--r--pgppubring.c11
-rw-r--r--protos.h11
-rw-r--r--rfc2047.c20
-rw-r--r--rfc2231.c11
-rw-r--r--sendlib.c50
-rw-r--r--utf8.c66
-rw-r--r--wcwidth.c134
31 files changed, 2334 insertions, 1299 deletions
diff --git a/Makefile.am b/Makefile.am
index e751ced4..a58f7a40 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -13,7 +13,12 @@ IMAP_SUBDIR = imap
IMAP_INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/imap
endif
-SUBDIRS = m4 po intl doc contrib $(CHARMAP_SUBDIR) $(IMAP_SUBDIR)
+if BUILD_ICONV
+ICONV_SUBDIR = iconv
+ICONV_INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/iconv
+endif
+
+SUBDIRS = m4 po intl doc contrib $(CHARMAP_SUBDIR) $(IMAP_SUBDIR) $(ICONV_SUBDIR)
if NEEDS_PGPEWRAP
bin_SCRIPTS = pgpewrap muttbug
@@ -34,12 +39,13 @@ mutt_SOURCES = $(BUILT_SOURCES) \
rfc822.c rfc1524.c rfc2047.c rfc2231.c \
score.c send.c sendlib.c signal.c sort.c \
status.c system.c thread.c charset.c history.c lib.c \
- muttlib.c editmsg.c
+ muttlib.c editmsg.c utf8.c mbyte.c wcwidth.c gettext.c
mutt_LDADD = @MUTT_LIB_OBJECTS@ @LIBOBJS@ $(LIBIMAP) $(MUTTLIBS) \
- $(INTLLIBS)
+ $(INTLLIBS) $(LIBICONV)
-mutt_DEPENDENCIES = @MUTT_LIB_OBJECTS@ @LIBOBJS@ $(LIBIMAPDEPS) $(INTLDEPS)
+mutt_DEPENDENCIES = @MUTT_LIB_OBJECTS@ @LIBOBJS@ $(LIBIMAPDEPS) \
+ $(INTLDEPS) $(LIBICONVDEPS)
makedoc_SOURCES = makedoc.c
@@ -49,7 +55,8 @@ DEFS=-DSHAREDIR=\"$(sharedir)\" -DSYSCONFDIR=\"$(sysconfdir)\" \
-DBINDIR=\"$(bindir)\" -DHAVE_CONFIG_H=1
# top_srcdir is for building outside of the source tree
-INCLUDES=-I$(top_srcdir) -I. $(IMAP_INCLUDES) -Iintl -I$(includedir)
+INCLUDES=-I$(top_srcdir) -I. $(IMAP_INCLUDES) $(ICONV_INCLUDES) \
+ -Iintl -I$(includedir)
non_us_sources = pgp.c pgpinvoke.c pgpkey.c pgplib.c sha1dgst.c \
gnupgparse.c sha.h sha_locl.h \
diff --git a/VERSION b/VERSION
index e9bc1499..f0bb29e7 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.1.14
+1.3.0
diff --git a/charset.c b/charset.c
index a531519c..30296275 100644
--- a/charset.c
+++ b/charset.c
@@ -19,13 +19,8 @@
* Boston, MA 02111, USA.
*/
-/*
- * This module deals with POSIX.2 character set definition files.
- */
-
#include <string.h>
-#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
@@ -34,49 +29,17 @@
#include <unistd.h>
#include <errno.h>
+#include <iconv.h>
+
#include "mutt.h"
#include "charset.h"
-/* Define this if you want any dprint () statements in this code */
-
-#undef CHARSET_DEBUG
-
-#ifndef CHARSET_DEBUG
-# undef dprint
-# define dprint(a, b) (void) a
+#ifndef EILSEQ
+#define EILSEQ EINVAL
#endif
-/* Module-global variables */
-
-static HASH *Translations = NULL;
-static HASH *Charsets = NULL;
-static HASH *CharsetAliases = NULL;
-
-/* Function Prototypes */
-
-static CHARDESC *chardesc_new (void);
-static CHARDESC *repr2descr (int repr, CHARSET * cs);
-
-static CHARMAP *charmap_new (void);
-static CHARMAP *parse_charmap_header (FILE * fp);
-static CHARSET *charset_new (size_t hash_size);
-
-static CHARSET_MAP *build_translation (CHARSET * from, CHARSET * to);
-
-static char translate_character (CHARSET * to, const char *symbol);
-
-static int load_charset (const char *filename, CHARSET ** csp, short multbyte);
-static int parse_charmap_line (char *line, CHARMAP * m, CHARDESC ** descrp);
-static int _cd_compar (const void *a, const void *b);
-
-static void canonical_charset (char *dest, size_t dlen, const char *name);
-static void chardesc_free (CHARDESC ** cdp);
-static void charmap_free (CHARMAP ** cp);
-static void charset_free (CHARSET ** csp);
-static void fix_symbol (char *symbol, CHARMAP * m);
-
-static void canonical_charset (char *dest, size_t dlen, const char *name)
+void mutt_canonical_charset (char *dest, size_t dlen, const char *name)
{
size_t i;
@@ -87,785 +50,113 @@ static void canonical_charset (char *dest, size_t dlen, const char *name)
{
if (strchr ("_/. ", name[i]))
dest[i] = '-';
+ else if ('A' <= name[i] && name[i] <= 'Z')
+ dest[i] = name[i] - 'A' + 'a';
else
- dest[i] = tolower (name[i]);
+ dest[i] = name[i];
}
dest[i] = '\0';
}
-static CHARSET *charset_new (size_t hash_size)
+int mutt_is_utf8 (const char *s)
{
- CHARSET *cp = safe_malloc (sizeof (CHARSET));
- size_t i;
-
- cp->n_symb = 256;
- cp->u_symb = 0;
- cp->multbyte = 1;
- cp->symb_to_repr = hash_create (hash_size);
- cp->description = safe_malloc (cp->n_symb * sizeof (CHARDESC *));
-
- for (i = 0; i < cp->n_symb; i++)
- cp->description[i] = NULL;
-
- return cp;
-}
-
-static void charset_free (CHARSET ** csp)
-{
- CHARSET *cs = *csp;
- size_t i;
-
- for (i = 0; i < cs->n_symb; i++)
- chardesc_free (&cs->description[i]);
-
- safe_free ((void **) &cs->description);
-
- hash_destroy (&cs->symb_to_repr, NULL);
- safe_free ((void **) csp);
-}
+ char buffer[8];
-static CHARMAP *charmap_new (void)
-{
- CHARMAP *m = safe_malloc (sizeof (CHARMAP));
-
- m->charset = NULL;
- m->escape_char = '\\';
- m->comment_char = '#';
- m->multbyte = 1;
- m->aliases = NULL;
-
- return m;
-}
-
-static void charmap_free (CHARMAP ** cp)
-{
- if (!cp || !*cp)
- return;
-
- mutt_free_list (&(*cp)->aliases);
- safe_free ((void **) &(*cp)->charset);
- safe_free ((void **) cp);
-
- return;
-}
-
-static CHARDESC *chardesc_new (void)
-{
- CHARDESC *p = safe_malloc (sizeof (CHARDESC));
-
- p->symbol = NULL;
- p->repr = -1;
-
- return p;
-}
-
-static void chardesc_free (CHARDESC ** cdp)
-{
- if (!cdp || !*cdp)
- return;
-
-
- safe_free ((void **) &(*cdp)->symbol);
- safe_free ((void **) cdp);
+ if (!s)
+ return 0;
- return;
+ mutt_canonical_charset (buffer, sizeof (buffer), s);
+ return !mutt_strcmp (buffer, "utf-8");
}
-static CHARMAP *parse_charmap_header (FILE * fp)
-{
- char buffer[1024];
- char *t, *u;
- CHARMAP *m = charmap_new ();
-
- while (fgets (buffer, sizeof (buffer), fp))
- {
- if ((t = strchr (buffer, '\n')))
- *t = '\0';
- else
- {
- charmap_free (&m);
- return NULL;
- }
-
- if (!strncmp (buffer, "CHARMAP", 7))
- break;
-
- if (*buffer == m->comment_char)
- {
- if ((t = strtok (buffer + 1, "\t ")) && !strcasecmp (t, "alias"))
- {
- char _tmp[SHORT_STRING];
- while ((t = strtok(NULL, "\t, ")))
- {
- canonical_charset (_tmp, sizeof (_tmp), t);
- m->aliases = mutt_add_list (m->aliases, _tmp);
- }
- }
- continue;
- }
-
- if (!(t = strtok (buffer, "\t ")))
- continue;
-
- if (!(u = strtok (NULL, "\t ")))
- {
- charmap_free (&m);
- return NULL;
- }
- if (!strcmp (t, "<code_set_name>"))
- {
- safe_free ((void **) &m->charset);
- canonical_charset (u, strlen (u) + 1, u);
- m->charset = safe_strdup (u);
- }
- else if (!strcmp (t, "<comment_char>"))
- {
- m->comment_char = *u;
- }
- else if (!strcmp (t, "<escape_char>"))
- {
- m->escape_char = *u;
- }
- else if (!strcmp (t, "<mb_cur_max>"))
- {
- m->multbyte = strtol (u, NULL, 0);
- }
- }
-
- return m;
-}
-
-/* Properly handle escape characters within a symbol. */
+/*
+ * Like iconv_open, but canonicalises the charsets
+ */
-static void fix_symbol (char *symbol, CHARMAP * m)
+iconv_t mutt_iconv_open (const char *tocode, const char *fromcode)
{
- char *s, *d;
+ char tocode1[SHORT_STRING];
+ char fromcode1[SHORT_STRING];
- for (s = symbol, d = symbol; *s; *d++ = *s++)
- {
- if (*s == m->escape_char && !*++s)
- break;
- }
-
- *d = *s;
+ mutt_canonical_charset (tocode1, sizeof (tocode1), tocode);
+ mutt_canonical_charset (fromcode1, sizeof (fromcode1), fromcode);
+ return iconv_open (tocode1, fromcode1);
}
-enum
-{
- CL_DESCR,
- CL_END,
- CL_COMMENT,
- CL_ERROR
-};
-
-static int parse_charmap_line (char *line, CHARMAP * m, CHARDESC ** descrp)
-{
- char *t, *u;
- short n;
- CHARDESC *descr;
-
- if (*line == m->comment_char)
- return CL_COMMENT;
-
- descr = *descrp = chardesc_new ();
-
- if (!strncmp (line, "END CHARMAP", 11))
- {
- chardesc_free (descrp);
- return CL_END;
- }
-
- for (t = line; *t && isspace ((unsigned char) *t); t++)
- ;
-
- if (*t++ != '<')
- {
- chardesc_free (descrp);
- return CL_ERROR;
- }
-
- for (u = t; *u && *u != '>'; u++)
- {
- if (*u == m->escape_char && u[1])
- u++;
- }
-
- if (*u != '>')
- {
- chardesc_free (descrp);
- return CL_ERROR;
- }
-
- *u++ = '\0';
- descr->symbol = safe_strdup (t);
- fix_symbol (descr->symbol, m);
-
- for (t = u; *t && isspace ((unsigned char) *t); t++)
- ;
-
- for (u = t; *u && !isspace ((unsigned char) *u); u++)
- ;
-
- *u++ = 0;
- descr->repr = 0;
-
- for (n = 0; *t == m->escape_char && n < m->multbyte; n++)
- {
- switch (*++t)
- {
- case 'x':
- descr->repr = descr->repr * 256 + strtol (++t, &t, 16);
- break;
- case 'd':
- descr->repr = descr->repr * 256 + strtol (++t, &t, 10);
- break;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- descr->repr = descr->repr * 256 + strtol (t, &t, 8);
- break;
- default:
- chardesc_free (descrp);
- return CL_ERROR;
- }
- }
-
- if (!n)
- {
- chardesc_free (descrp);
- return CL_ERROR;
- }
-
- return CL_DESCR;
-}
-
-static int _cd_compar (const void *a, const void *b)
-{
- const CHARDESC *ap, *bp;
- int i;
-
- ap = * (CHARDESC **) a;
- bp = * (CHARDESC **) b;
-
- i = ap->repr - bp->repr;
-
- dprint (98, (debugfile, "_cd_compar: { %x, %s }, { %x, %s } -> %d\n",
- ap->repr, ap->symbol, bp->repr, bp->symbol, i));
-
- return i;
-}
/*
- * Load a character set description into memory.
- *
- * The multibyte parameter tells us whether we are going
- * to accept multibyte character sets.
+ * Like iconv, but keeps going even when the input is invalid
+ * If you're supplying inrepls, the source charset should be stateless;
+ * if you're supplying an outrepl, the target charset should be.
*/
-static int load_charset (const char *filename, CHARSET ** csp, short multbyte)
+size_t mutt_iconv (iconv_t cd, const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft,
+ const char **inrepls, const char *outrepl)
{
- CHARDESC *cd = NULL;
- CHARSET *cs = NULL;
- CHARMAP *m = NULL;
- FILE *fp;
- char buffer[1024];
- int i;
- int rv = -1;
-
- cs = *csp = charset_new (multbyte ? 1031 : 257);
+ size_t ret = 0, ret1;
+ const char *ib = *inbuf;
+ size_t ibl = *inbytesleft;
+ char *ob = *outbuf;
+ size_t obl = *outbytesleft;
- dprint (2, (debugfile, "load_charset: Trying to open: %s\n", filename));
-
- if ((fp = fopen (filename, "r")) == NULL)
+ for (;;)
{
- char _filename[_POSIX_PATH_MAX];
-
- snprintf (_filename, sizeof (_filename), "%s/%s", CHARMAPS_DIR, filename);
- dprint (2, (debugfile, "load_charset: Trying to open: %s\n", _filename));
-
- if ((fp = fopen (_filename, "r")) == NULL)
+ ret1 = iconv (cd, &ib, &ibl, &ob, &obl);
+ if (ret1 != (size_t)-1)
+ ret += ret1;
+ if (ibl && obl && errno == EILSEQ)
{
- dprint (2, (debugfile, "load_charset: Failed.\n"));
- goto bail;
- }
- }
-
- if ((m = parse_charmap_header (fp)) == NULL)
- goto bail;
-
- /* Don't handle multibyte character sets unless explicitly requested
- * to do so.
- */
-
- if (m->multbyte > 1 && !multbyte)
- {
- dprint (2, (debugfile, "load_charset: m->multbyte == %d\n",
- (int) m->multbyte));
- goto bail;
- }
-
- cs->multbyte = m->multbyte;
-
- while (fgets (buffer, sizeof (buffer), fp) != NULL)
- {
- i = parse_charmap_line (buffer, m, &cd);
-
- if (i == CL_END)
- break;
- else if (i == CL_DESCR)
- {
- dprint (5, (debugfile, "load_charset: Got character description: <%s> -> %x\n",
- cd->symbol, cd->repr));
-
- if (!multbyte)
+ if (inrepls)
{
- if (0 <= cd->repr && cd->repr < 256)
+ /* Try replacing the input */
+ const char **t;
+ for (t = inrepls; *t; t++)
{
- hash_delete (cs->symb_to_repr, cd->symbol, NULL, NULL);
- hash_insert (cs->symb_to_repr, cd->symbol, cd, 0);
-
- /* note: we intentionally leak some memory here. */
- if (!cs->description[cd->repr])
- cs->u_symb++;
-
- cs->description[cd->repr] = cd;
- cd = NULL;
- }
- }
- else
- {
- if (cs->u_symb == cs->n_symb)
- {
- size_t new_size = cs->n_symb + 256;
- size_t i;
-
- safe_realloc ((void **) &cs->description, new_size * sizeof (CHARDESC *));
- for (i = cs->u_symb; i < new_size; i++)
- cs->description[i] = NULL;
- cs->n_symb = new_size;
+ const char *ib1 = *t;
+ size_t ibl1 = strlen (*t);
+ char *ob1 = ob;
+ size_t obl1 = obl;
+ iconv (cd, &ib1, &ibl1, &ob1, &obl1);
+ if (!ibl1)
+ {
+ ++ib, --ibl;
+ ob = ob1, obl = obl1;
+ ++ret;
+ break;
+ }
}
-
- hash_delete (cs->symb_to_repr, cd->symbol, NULL, NULL);
- hash_insert (cs->symb_to_repr, cd->symbol, cd, 0);
-
- cs->description[cs->u_symb++] = cd;
- cd = NULL;
+ if (*t)
+ continue;
}
- }
-
- if (cd)
- {
- dprint (5, (debugfile, "load_charset: character description still present: <%s>->%x\n",
- cd->symbol, cd->repr));
- }
- chardesc_free (&cd);
- }
-
- if (multbyte)
- qsort (cs->description, cs->u_symb, sizeof (CHARDESC *), _cd_compar);
-
- rv = 0;
-
-bail:
- charmap_free (&m);
- if (fp)
- fclose (fp);
- if (rv)
- charset_free (csp);
-
- return rv;
-}
-
-static CHARDESC *repr2descr (int repr, CHARSET * cs)
-{
- CHARDESC *key;
- CHARDESC **r;
-
- if (!cs || repr < 0)
- return NULL;
-
- if (cs->multbyte == 1)
- {
- if (repr < 256)
- return cs->description[repr];
- else
- return NULL;
- }
-
- key = safe_malloc (sizeof(CHARDESC));
- key->repr = repr;
- key->symbol = "<unknown>"; /* otherwise, the
- * debug code may
- * segfault. ouch.
- */
-
- r = bsearch (&key, cs->description, cs->u_symb,
- sizeof (CHARDESC *), _cd_compar);
-
- safe_free ((void **) &key);
-
- if (r) return *r;
-
- return NULL;
-}
-
-/* Build a translation table. If a character cannot be
- * translated correctly, we try to find an approximation
- * from the portable charcter set.
- *
- * Note that this implies the assumption that the portable
- * character set can be used without any conversion.
- *
- * Should be safe on POSIX systems.
- */
-
-static char translate_character (CHARSET * to, const char *symbol)
-{
- CHARDESC *cdt;
-
- if ((cdt = hash_find (to->symb_to_repr, symbol)))
- return (char) cdt->repr;
- else
- return *symbol;
-}
-
-static CHARSET_MAP *build_translation (CHARSET * from, CHARSET * to)
-{
- int i;
- CHARSET_MAP *map;
- CHARDESC *cd;
-
- /* This is for 8-bit character sets. */
-
- if (!from || !to || from->multbyte > 1 || to->multbyte > 1)
- return NULL;
-
- map = safe_malloc (sizeof (CHARSET_MAP));
- for (i = 0; i < 256; i++)
- {
- if (!(cd = repr2descr (i, from)))
- (*map)[i] = '?';
- else
- (*map)[i] = translate_character (to, cd->symbol);
- }
-
- return map;
-}
-
-/* Currently, just scan the various charset definition files.
- * On the long run, we should cache this stuff in a file.
- */
-
-static HASH *load_charset_aliases (void)
-{
- HASH *charset_aliases;
- CHARMAP *m;
- DIR *dp;
- FILE *fp;
- struct dirent *de;
-
- if ((dp = opendir (CHARMAPS_DIR)) == NULL)
- return NULL;
-
- charset_aliases = hash_create(127);
-
- while ((de = readdir (dp)))
- {
- char fnbuff[_POSIX_PATH_MAX];
-
- if (*de->d_name == '.')
- continue;
-
- snprintf (fnbuff, sizeof (fnbuff), "%s/%s", CHARMAPS_DIR, de->d_name);
- dprint (2, (debugfile, "load_charset_aliases: Opening %s\n", fnbuff));
- if ((fp = fopen (fnbuff, "r")) == NULL)
- continue;
-
- if ((m = parse_charmap_header (fp)) != NULL)
- {
- LIST *lp;
- char buffer[LONG_STRING];
-
- canonical_charset (buffer, sizeof (buffer), de->d_name);
- m->aliases = mutt_add_list (m->aliases, buffer);
-
- if (m->charset)
- m->aliases = mutt_add_list (m->aliases, m->charset);
-
- for (lp = m->aliases; lp; lp = lp->next)
+ if (outrepl)
{
- if (lp->data)
+ /* Try replacing the output */
+ int n = strlen (outrepl);
+ if (n <= obl)
{
- dprint (2, (debugfile, "load_charset_aliases: %s -> %s\n",
- lp->data, de->d_name));
- if (hash_find (charset_aliases, lp->data))
- {
- dprint (2, (debugfile, "load_charset_aliases: %s already mapped.\n",
- lp->data));
- }
- else
- hash_insert (charset_aliases, safe_strdup (lp->data), safe_strdup (de->d_name), 0);
+ memcpy (ob, outrepl, n);
+ ++ib, --ibl;
+ ob += n, obl -= n;
+ ++ret;
+ continue;
}
}
-
- charmap_free (&m);
- }
-
- fclose (fp);
- }
-
- closedir (dp);
- return charset_aliases;
-}
-
-static void init_charsets ()
-{
- if (Charsets) return;
-
- Charsets = hash_create (127);
- Translations = hash_create (127);
- CharsetAliases = load_charset_aliases ();
-}
-
-CHARSET *mutt_get_charset (const char *name)
-{
- CHARSET *charset;
- char buffer[SHORT_STRING];
- char *real_charset;
- char *hooked;
-
- if (!name || !*name)
- return (NULL);
-
- init_charsets();
- canonical_charset (buffer, sizeof(buffer), name);
-
- /* needs to be documented */
-
- if ((hooked = mutt_charset_hook (buffer)))
- canonical_charset (buffer, sizeof (buffer), hooked);
-
- dprint (2, (debugfile, "mutt_get_charset: Looking for %s\n", buffer));
-
- if(!CharsetAliases || !(real_charset = hash_find(CharsetAliases, buffer)))
- real_charset = buffer;
-
- dprint (2, (debugfile, "mutt_get_charset: maps to: %s\n", real_charset));
-
- if(!(charset = hash_find (Charsets, real_charset)))
- {
- dprint (2, (debugfile, "mutt_get_charset: Need to load.\n"));
- if (load_charset(real_charset, &charset, 0) == 0)
- hash_insert(Charsets, safe_strdup(real_charset), charset, 1);
- else
- charset = NULL;
- }
- return charset;
-}
-
-CHARSET_MAP *mutt_get_translation(const char *_from, const char *_to)
-{
- char from_canon[SHORT_STRING];
- char to_canon[SHORT_STRING];
- char key[SHORT_STRING];
- char *from, *to;
- CHARSET *from_cs, *to_cs;
- CHARSET_MAP *map;
-
- if(!_from || !_to)
- return NULL;
-
- canonical_charset(from_canon, sizeof(from_canon), _from);
- canonical_charset(to_canon, sizeof(to_canon), _to);
-
- /* quick check for some trivial cases. Doing this before
- * we actually call the initialization routine delays character
- * set loading until it's _really_ needed.
- */
-
- if(!strcmp(from_canon, to_canon)
- || (!strcmp (from_canon, "us-ascii") && !strncmp (to_canon, "iso-8859", 8)))
- return NULL;
-
- init_charsets();
-
- if(!CharsetAliases || !(from = hash_find(CharsetAliases, from_canon)))
- from = from_canon;
- if(!CharsetAliases || !(to = hash_find(CharsetAliases, to_canon)))
- to = to_canon;
-
- /* quick check for the identity mapping */
- if((from == to) || !mutt_strcmp(from, to))
- return NULL;
-
- snprintf(key, sizeof(key), "%s %s", from, to);
- if((map = hash_find(Translations, key)) == NULL)
- {
- from_cs = mutt_get_charset(from);
- to_cs = mutt_get_charset(to);
-
- if((map = build_translation(from_cs, to_cs)))
- hash_insert(Translations, safe_strdup(key), map, 1);
- }
- return map;
-}
-
-unsigned char mutt_display_char(unsigned char ch, CHARSET_MAP *map)
-{
- if (!map || !ch)
- return ch;
-
- return (unsigned char) (*map)[ch];
-}
-
-int mutt_display_string(char *str, CHARSET_MAP *map)
-{
- if(!map)
- return -1;
-
- while ((*str = mutt_display_char((unsigned char)*str, map)))
- str++;
-
- return 0;
-}
-
-/*************************************************************/
-/* UTF-8 support */
-
-int mutt_is_utf8(const char *s)
-{
- char buffer[SHORT_STRING];
-
- if(!s)
- return 0;
-
- canonical_charset(buffer, sizeof(buffer), s);
- return !mutt_strcmp(buffer, "utf-8");
-}
-
-/* macros for the various bit maps we need */
-
-#define IOOOOOOO 0x80
-#define IIOOOOOO 0xc0
-#define IIIOOOOO 0xe0
-#define IIIIOOOO 0xf0
-#define IIIIIOOO 0xf8
-#define IIIIIIOO 0xfc
-#define IIIIIIIO 0xfe
-#define IIIIIIII 0xff
-
-static struct unicode_mask
-{
- int mask;
- int value;
- short len;
-}
-unicode_masks[] =
-{
- { IOOOOOOO, 0, 1 },
- { IIIOOOOO, IIOOOOOO, 2 },
- { IIIIOOOO, IIIOOOOO, 3 },
- { IIIIIOOO, IIIIOOOO, 4 },
- { IIIIIIOO, IIIIIOOO, 5 },
- { IIIIIIIO, IIIIIIOO, 6 },
- { 0, 0, 0 }
-};
-
-
-static char *utf_to_unicode(int *out, char *in)
-{
- struct unicode_mask *um = NULL;
- short i;
-
- for(i = 0; unicode_masks[i].mask; i++)
- {
- if((*in & unicode_masks[i].mask) == unicode_masks[i].value)
- {
- um = &unicode_masks[i];
- break;
- }
- }
-
- if(!um)
- {
- *out = (int) '?';
- return in + 1;
- }
-
- for(i = 1; i < um->len; i++)
- {
- if((in[i] & IIOOOOOO) != IOOOOOOO)
- {
- *out = (int) '?';
- return in + i;
}
+ *inbuf = ib, *inbytesleft = ibl;
+ *outbuf = ob, *outbytesleft = obl;
+ return ret;
}
-
- *out = ((int)in[0]) & ~um->mask & 0xff;
- for(i = 1; i < um->len; i++)
- *out = (*out << 6) | (((int)in[i]) & ~IIOOOOOO & 0xff);
-
- if(!*out)
- *out = '?';
-
- return in + um->len;
-}
-
-static CHARSET *Unicode = NULL;
-
-static int unicode_init (void)
-{
- if (!Unicode)
- {
- if (load_charset ("ISO_10646", &Unicode, 1) == -1)
- Unicode = NULL;
- }
-
- return (Unicode == NULL ? -1 : 0);
-}
-
-void mutt_decode_utf8_string(char *str, CHARSET *chs)
-{
- char *s, *t;
- CHARDESC *cd;
- int ch;
-
- (void) unicode_init ();
-
- for (s = t = str; *t; s++)
- {
- t = utf_to_unicode(&ch, t);
-
- /* handle us-ascii characters directly */
- if (0 <= ch && ch < 128)
- *s = ch;
- else if ((cd = repr2descr (ch, Unicode)) && (ch = translate_character (chs, cd->symbol)) != -1)
- *s = ch;
- else
- *s = '?';
-
- if(!*s) *s = '?';
- }
-
- *s = '\0';
}
-
-
/*************************************************************
* General decoder framework
+ * Used in handler.c for converting to mutt's Charset
*/
-
-
#define MIN(a,b) (((a) <= (b)) ? (a): (b))
DECODER *mutt_open_decoder (const char *src, const char *dest)
@@ -875,34 +166,16 @@ DECODER *mutt_open_decoder (const char *src, const char *dest)
d->in.size = DECODER_BUFFSIZE;
d->out.size = DECODER_BUFFSIZE;
- d->_in = &d->in;
-
- if (!src || !dest || mutt_is_utf8 (dest))
+ if (dest && src && (d->cd = mutt_iconv_open (dest, src)) != (iconv_t)-1)
{
- d->just_take_id = 1;
- d->_in = &d->out;
- return d;
- }
-
- if (mutt_is_utf8 (src))
- {
- if (!(d->chs = mutt_get_charset (dest)) || unicode_init () == -1)
- {
- d->just_take_id = 1;
- d->_in = &d->out;
- return d;
- }
-
- d->src_is_utf8 = 1;
- return d;
+ d->_in = &d->in;
+ d->outrepl = mutt_is_utf8 (dest) ? "\357\277\275" : "?";
}
-
- if (!(d->chm = mutt_get_translation (src, dest)))
+ else
{
d->just_take_id = 1;
d->_in = &d->out;
}
-
return d;
}
@@ -911,7 +184,23 @@ void mutt_free_decoder (DECODER **dpp)
safe_free ((void **) dpp);
}
-static void _process_data (DECODER *, short);
+static void _process_data (DECODER *d, short force)
+{
+ if (force) d->forced = 1;
+
+ if (!d->just_take_id)
+ {
+ const char *ib = d->in.buff;
+ size_t ibl = d->in.used;
+ char *ob = d->out.buff + d->out.used;
+ size_t obl = d->out.size - d->out.used;
+
+ mutt_iconv (d->cd, &ib, &ibl, &ob, &obl, 0, d->outrepl);
+ memmove (d->in.buff, ib, ibl);
+ d->in.used = ibl;
+ d->out.used = d->out.size - obl;
+ }
+}
void mutt_decoder_push (DECODER *d, void *_buff, size_t blen, size_t *taken)
{
@@ -977,75 +266,6 @@ void mutt_decoder_pop_to_state (DECODER *d, STATE *s)
}
}
-/* this is where things actually happen */
-
-static void _process_data_8bit (DECODER *d)
-{