gnupg/jnlib/utf8conv.c

/* utf8conf.c -  UTF8 character set conversion
 * Copyright (C) 1994, 1998, 1999, 2000, 2001,
 *               2003  Free Software Foundation, Inc.
 *
 * This file is part of GnuPG.
 *
 * GnuPG is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * GnuPG is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
 */

#include <config.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <ctype.h>
#ifdef HAVE_LANGINFO_CODESET
#include <langinfo.h>
#endif

#include "libjnlib-config.h"
#include "stringhelp.h"
#include "utf8conv.h"


static ushort koi8_unicode[128] = {
  0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 0x2518, 0x251c, 0x2524,
  0x252c, 0x2534, 0x253c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,
  0x2591, 0x2592, 0x2593, 0x2320, 0x25a0, 0x2219, 0x221a, 0x2248,
  0x2264, 0x2265, 0x00a0, 0x2321, 0x00b0, 0x00b2, 0x00b7, 0x00f7,
  0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
  0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e,
  0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
  0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9,
  0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
  0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e,
  0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
  0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a,
  0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
  0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e,
  0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
  0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a
};

static ushort latin2_unicode[128] = {
  0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
  0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
  0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
  0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
  0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
  0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
  0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
  0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
  0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
  0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
  0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
  0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
  0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
  0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
  0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
  0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9
};


static const char *active_charset_name = "iso-8859-1";
static ushort *active_charset = NULL;
static int no_translation = 0;

int
set_native_charset (const char *newset)
{
  if (!newset)
#ifdef HAVE_LANGINFO_CODESET
    newset = nl_langinfo (CODESET);
#else
    newset = "8859-1";
#endif

  if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3))
    {
      newset += 3;
      if (*newset == '-' || *newset == '_')
	newset++;
    }

  if (!*newset
      || !ascii_strcasecmp (newset, "8859-1")
      || !ascii_strcasecmp (newset, "8859-15"))
    {
      active_charset_name = "iso-8859-1";
      no_translation = 0;
      active_charset = NULL;
    }
  else if (!ascii_strcasecmp (newset, "8859-2"))
    {
      active_charset_name = "iso-8859-2";
      no_translation = 0;
      active_charset = latin2_unicode;
    }
  else if (!ascii_strcasecmp (newset, "koi8-r"))
    {
      active_charset_name = "koi8-r";
      no_translation = 0;
      active_charset = koi8_unicode;
    }
  else if (!ascii_strcasecmp (newset, "utf8")
	   || !ascii_strcasecmp (newset, "utf-8"))
    {
      active_charset_name = "utf-8";
      no_translation = 1;
      active_charset = NULL;
    }
  else
    return -1;
  return 0;
}

const char *
get_native_charset ()
{
  return active_charset_name;
}

/****************
 * Convert string, which is in native encoding to UTF8 and return the
 * new allocated UTF8 string.
 */
char *
native_to_utf8 (const char *orig_string)
{
  const unsigned char *string = (const unsigned char *)orig_string;
  const unsigned char *s;
  char *buffer;
  unsigned char *p;
  size_t length = 0;

  if (no_translation)
    {
      buffer = jnlib_xstrdup (orig_string);
    }
  else if (active_charset)
    {
      for (s = string; *s; s++)
	{
	  length++;
	  if (*s & 0x80)
	    length += 2;	/* we may need 3 bytes */
	}
      buffer = jnlib_xmalloc (length + 1);
      for (p = (unsigned char *)buffer, s = string; *s; s++)
	{
	  if ((*s & 0x80))
	    {
	      ushort val = active_charset[*s & 0x7f];
	      if (val < 0x0800)
		{
		  *p++ = 0xc0 | ((val >> 6) & 0x1f);
		  *p++ = 0x80 | (val & 0x3f);
		}
	      else
		{
		  *p++ = 0xe0 | ((val >> 12) & 0x0f);
		  *p++ = 0x80 | ((val >> 6) & 0x3f);
		  *p++ = 0x80 | (val & 0x3f);
		}
	    }
	  else
	    *p++ = *s;
	}
      *p = 0;
    }
  else
    {
      for (s = string; *s; s++)
	{
	  length++;
	  if (*s & 0x80)
	    length++;
	}
      buffer = jnlib_xmalloc (length + 1);
      for (p = (unsigned char *)buffer, s = string; *s; s++)
	{
	  if (*s & 0x80)
	    {
	      *p++ = 0xc0 | ((*s >> 6) & 3);
	      *p++ = 0x80 | (*s & 0x3f);
	    }
	  else
	    *p++ = *s;
	}
      *p = 0;
    }
  return buffer;
}


/* Convert string, which is in UTF8 to native encoding.  Replace
 * illegal encodings by some "\xnn" and quote all control
 * characters. A character with value DELIM will always be quoted, it
 * must be a vanilla ASCII character.  */
char *
utf8_to_native (const char *string, size_t length, int delim)
{
  int nleft;
  int i;
  unsigned char encbuf[8];
  int encidx;
  const byte *s;
  size_t n;
  char *buffer = NULL;
  char *p = NULL;
  unsigned long val = 0;
  size_t slen;
  int resync = 0;

  /* 1. pass (p==NULL): count the extended utf-8 characters */
  /* 2. pass (p!=NULL): create string */
  for (;;)
    {
      for (slen = length, nleft = encidx = 0, n = 0,
             s = (const unsigned char *)string; slen;
	   s++, slen--)
	{
	  if (resync)
	    {
	      if (!(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)))
		{
		  /* still invalid */
		  if (p)
		    {
		      sprintf (p, "\\x%02x", *s);
		      p += 4;
		    }
		  n += 4;
		  continue;
		}
	      resync = 0;
	    }
	  if (!nleft)
	    {
	      if (!(*s & 0x80))
		{		/* plain ascii */
		  if (*s < 0x20 || *s == 0x7f || *s == delim ||
		      (delim && *s == '\\'))
		    {
		      n++;
		      if (p)
			*p++ = '\\';
		      switch (*s)
			{
			case '\n':
			  n++;
			  if (p)
			    *p++ = 'n';
			  break;
			case '\r':
			  n++;
			  if (p)
			    *p++ = 'r';
			  break;
			case '\f':
			  n++;
			  if (p)
			    *p++ = 'f';
			  break;
			case '\v':
			  n++;
			  if (p)
			    *p++ = 'v';
			  break;
			case '\b':
			  n++;
			  if (p)
			    *p++ = 'b';
			  break;
			case 0:
			  n++;
			  if (p)
			    *p++ = '0';
			  break;
			default:
			  n += 3;
			  if (p)
			    {
			      sprintf (p, "x%02x", *s);
			      p += 3;
			    }
			  break;
			}
		    }
		  else
		    {
		      if (p)
			*p++ = *s;
		      n++;
		    }
		}
	      else if ((*s & 0xe0) == 0xc0)
		{		/* 110x xxxx */
		  val = *s & 0x1f;
		  nleft = 1;
		  encidx = 0;
		  encbuf[encidx++] = *s;
		}
	      else if ((*s & 0xf0) == 0xe0)
		{		/* 1110 xxxx */
		  val = *s & 0x0f;
		  nleft = 2;
		  encidx = 0;
		  encbuf[encidx++] = *s;
		}
	      else if ((*s & 0xf8) == 0xf0)
		{		/* 1111 0xxx */
		  val = *s & 0x07;
		  nleft = 3;
		  encidx = 0;
		  encbuf[encidx++] = *s;
		}
	      else if ((*s & 0xfc) == 0xf8)
		{		/* 1111 10xx */
		  val = *s & 0x03;
		  nleft = 4;
		  encidx = 0;
		  encbuf[encidx++] = *s;
		}
	      else if ((*s & 0xfe) == 0xfc)
		{		/* 1111 110x */
		  val = *s & 0x01;
		  nleft = 5;
		  encidx = 0;
		  encbuf[encidx++] = *s;
		}
	      else
		{		/* invalid encoding: print as \xnn */
		  if (p)
		    {
		      sprintf (p, "\\x%02x", *s);
		      p += 4;
		    }
		  n += 4;
		  resync = 1;
		}
	    }
	  else if (*s < 0x80 || *s >= 0xc0)
	    {			/* invalid */
	      if (p)
		{
		  for (i = 0; i < encidx; i++)
		    {
		      sprintf (p, "\\x%02x", encbuf[i]);
		      p += 4;
		    }
		  sprintf (p, "\\x%02x", *s);
		  p += 4;
		}
	      n += 4 + 4 * encidx;
	      nleft = 0;
	      encidx = 0;
	      resync = 1;
	    }
	  else
	    {
	      encbuf[encidx++] = *s;
	      val <<= 6;
	      val |= *s & 0x3f;
	      if (!--nleft)
		{		/* ready */
		  if (no_translation)
		    {
		      if (p)
			{
			  for (i = 0; i < encidx; i++)
			    *p++ = encbuf[i];
			}
		      n += encidx;
		      encidx = 0;
		    }
		  else if (active_charset)
		    {		/* table lookup */
		      for (i = 0; i < 128; i++)
			{
			  if (active_charset[i] == val)
			    break;
			}
		      if (i < 128)
			{	/* we can print this one */
			  if (p)
			    *p++ = i + 128;
			  n++;
			}
		      else
			{	/* we do not have a translation: print utf8 */
			  if (p)
			    {
			      for (i = 0; i < encidx; i++)
				{
				  sprintf (p, "\\x%02x", encbuf[i]);
				  p += 4;
				}
			    }
			  n += encidx * 4;
			  encidx = 0;
			}
		    }
		  else
		    {		/* native set */
		      if (val >= 0x80 && val < 256)
			{
			  n++;	/* we can simply print this character */
			  if (p)
			    *p++ = val;
			}
		      else
			{	/* we do not have a translation: print utf8 */
			  if (p)
			    {
			      for (i = 0; i < encidx; i++)
				{
				  sprintf (p, "\\x%02x", encbuf[i]);
				  p += 4;
				}
			    }
			  n += encidx * 4;
			  encidx = 0;
			}
		    }
		}

	    }
	}
      if (!buffer)
	{			/* allocate the buffer after the first pass */
	  buffer = p = jnlib_xmalloc (n + 1);
	}
      else
	{
	  *p = 0;		/* make a string */
	  return buffer;
	}
    }
}
Finished the bulk of changes for gnupg 1.9. This included switching to libgcrypt functions, using shared error codes from libgpg-error, replacing the old functions we used to have in ../util by those in ../jnlib and ../common, renaming the malloc functions and a couple of types. Note, that not all changes are listed below becuause they are too similar and done at far too many places. As of today the code builds using the current libgcrypt from CVS but it is very unlikely that it actually works. 2003-06-18 19:56:13 +00:00			`/* utf8conf.c - UTF8 character set conversion`
			`* Copyright (C) 1994, 1998, 1999, 2000, 2001,`
			`* 2003 Free Software Foundation, Inc.`
			`*`
			`* This file is part of GnuPG.`
			`*`
			`* GnuPG is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation; either version 2 of the License, or`
			`* (at your option) any later version.`
			`*`
			`* GnuPG is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
			`* along with this program; if not, write to the Free Software`
			`* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA`
			`*/`

			`#include <config.h>`
			`#include <stdlib.h>`
			`#include <string.h>`
			`#include <stdarg.h>`
			`#include <ctype.h>`
			`#ifdef HAVE_LANGINFO_CODESET`
			`#include <langinfo.h>`
			`#endif`

			`#include "libjnlib-config.h"`
			`#include "stringhelp.h"`
			`#include "utf8conv.h"`


			`static ushort koi8_unicode[128] = {`
			`0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 0x2518, 0x251c, 0x2524,`
			`0x252c, 0x2534, 0x253c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,`
			`0x2591, 0x2592, 0x2593, 0x2320, 0x25a0, 0x2219, 0x221a, 0x2248,`
			`0x2264, 0x2265, 0x00a0, 0x2321, 0x00b0, 0x00b2, 0x00b7, 0x00f7,`
			`0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,`
			`0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e,`
			`0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,`
			`0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9,`
			`0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,`
			`0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e,`
			`0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,`
			`0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a,`
			`0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,`
			`0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e,`
			`0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,`
			`0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a`
			`};`

			`static ushort latin2_unicode[128] = {`
			`0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,`
			`0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,`
			`0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,`
			`0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,`
			`0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,`
			`0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,`
			`0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,`
			`0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,`
			`0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,`
			`0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,`
			`0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,`
			`0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,`
			`0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,`
			`0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,`
			`0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,`
			`0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9`
			`};`


			`static const char *active_charset_name = "iso-8859-1";`
			`static ushort *active_charset = NULL;`
			`static int no_translation = 0;`

			`int`
			`set_native_charset (const char *newset)`
			`{`
			`if (!newset)`
			`#ifdef HAVE_LANGINFO_CODESET`
			`newset = nl_langinfo (CODESET);`
			`#else`
			`newset = "8859-1";`
			`#endif`

			`if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3))`
			`{`
			`newset += 3;`
			`if (newset == '-' \|\| newset == '_')`
			`newset++;`
			`}`

			`if (!*newset`
			`\|\| !ascii_strcasecmp (newset, "8859-1")`
			`\|\| !ascii_strcasecmp (newset, "8859-15"))`
			`{`
			`active_charset_name = "iso-8859-1";`
			`no_translation = 0;`
			`active_charset = NULL;`
			`}`
			`else if (!ascii_strcasecmp (newset, "8859-2"))`
			`{`
			`active_charset_name = "iso-8859-2";`
			`no_translation = 0;`
			`active_charset = latin2_unicode;`
			`}`
			`else if (!ascii_strcasecmp (newset, "koi8-r"))`
			`{`
			`active_charset_name = "koi8-r";`
			`no_translation = 0;`
			`active_charset = koi8_unicode;`
			`}`
			`else if (!ascii_strcasecmp (newset, "utf8")`
			`\|\| !ascii_strcasecmp (newset, "utf-8"))`
			`{`
			`active_charset_name = "utf-8";`
			`no_translation = 1;`
			`active_charset = NULL;`
			`}`
			`else`
			`return -1;`
			`return 0;`
			`}`

			`const char *`
			`get_native_charset ()`
			`{`
			`return active_charset_name;`
			`}`

			`/****************`
			`* Convert string, which is in native encoding to UTF8 and return the`
			`* new allocated UTF8 string.`
			`*/`
			`char *`
gcc-4 defaults forced me to edit many many files to get rid of the char * vs. unsigned char * warnings. The GNU coding standards used to say that these mismatches are okay and better than a bunch of casts. Obviously this has changed now. 2005-06-16 08:12:03 +00:00			`native_to_utf8 (const char *orig_string)`
Finished the bulk of changes for gnupg 1.9. This included switching to libgcrypt functions, using shared error codes from libgpg-error, replacing the old functions we used to have in ../util by those in ../jnlib and ../common, renaming the malloc functions and a couple of types. Note, that not all changes are listed below becuause they are too similar and done at far too many places. As of today the code builds using the current libgcrypt from CVS but it is very unlikely that it actually works. 2003-06-18 19:56:13 +00:00			`{`
gcc-4 defaults forced me to edit many many files to get rid of the char * vs. unsigned char * warnings. The GNU coding standards used to say that these mismatches are okay and better than a bunch of casts. Obviously this has changed now. 2005-06-16 08:12:03 +00:00			`const unsigned char string = (const unsigned char )orig_string;`
			`const unsigned char *s;`
Finished the bulk of changes for gnupg 1.9. This included switching to libgcrypt functions, using shared error codes from libgpg-error, replacing the old functions we used to have in ../util by those in ../jnlib and ../common, renaming the malloc functions and a couple of types. Note, that not all changes are listed below becuause they are too similar and done at far too many places. As of today the code builds using the current libgcrypt from CVS but it is very unlikely that it actually works. 2003-06-18 19:56:13 +00:00			`char *buffer;`
gcc-4 defaults forced me to edit many many files to get rid of the char * vs. unsigned char * warnings. The GNU coding standards used to say that these mismatches are okay and better than a bunch of casts. Obviously this has changed now. 2005-06-16 08:12:03 +00:00			`unsigned char *p;`
Finished the bulk of changes for gnupg 1.9. This included switching to libgcrypt functions, using shared error codes from libgpg-error, replacing the old functions we used to have in ../util by those in ../jnlib and ../common, renaming the malloc functions and a couple of types. Note, that not all changes are listed below becuause they are too similar and done at far too many places. As of today the code builds using the current libgcrypt from CVS but it is very unlikely that it actually works. 2003-06-18 19:56:13 +00:00			`size_t length = 0;`

			`if (no_translation)`
			`{`
gcc-4 defaults forced me to edit many many files to get rid of the char * vs. unsigned char * warnings. The GNU coding standards used to say that these mismatches are okay and better than a bunch of casts. Obviously this has changed now. 2005-06-16 08:12:03 +00:00			`buffer = jnlib_xstrdup (orig_string);`
Finished the bulk of changes for gnupg 1.9. This included switching to libgcrypt functions, using shared error codes from libgpg-error, replacing the old functions we used to have in ../util by those in ../jnlib and ../common, renaming the malloc functions and a couple of types. Note, that not all changes are listed below becuause they are too similar and done at far too many places. As of today the code builds using the current libgcrypt from CVS but it is very unlikely that it actually works. 2003-06-18 19:56:13 +00:00			`}`
			`else if (active_charset)`
			`{`
			`for (s = string; *s; s++)`
			`{`
			`length++;`
			`if (*s & 0x80)`
			`length += 2; /* we may need 3 bytes */`
			`}`
			`buffer = jnlib_xmalloc (length + 1);`
gcc-4 defaults forced me to edit many many files to get rid of the char * vs. unsigned char * warnings. The GNU coding standards used to say that these mismatches are okay and better than a bunch of casts. Obviously this has changed now. 2005-06-16 08:12:03 +00:00			`for (p = (unsigned char )buffer, s = string; s; s++)`
Finished the bulk of changes for gnupg 1.9. This included switching to libgcrypt functions, using shared error codes from libgpg-error, replacing the old functions we used to have in ../util by those in ../jnlib and ../common, renaming the malloc functions and a couple of types. Note, that not all changes are listed below becuause they are too similar and done at far too many places. As of today the code builds using the current libgcrypt from CVS but it is very unlikely that it actually works. 2003-06-18 19:56:13 +00:00			`{`
			`if ((*s & 0x80))`
			`{`
			`ushort val = active_charset[*s & 0x7f];`
			`if (val < 0x0800)`
			`{`
			`*p++ = 0xc0 \| ((val >> 6) & 0x1f);`
			`*p++ = 0x80 \| (val & 0x3f);`
			`}`
			`else`
			`{`
			`*p++ = 0xe0 \| ((val >> 12) & 0x0f);`
			`*p++ = 0x80 \| ((val >> 6) & 0x3f);`
			`*p++ = 0x80 \| (val & 0x3f);`
			`}`
			`}`
			`else`
			`p++ = s;`
			`}`
			`*p = 0;`
			`}`
			`else`
			`{`
			`for (s = string; *s; s++)`
			`{`
			`length++;`
			`if (*s & 0x80)`
			`length++;`
			`}`
			`buffer = jnlib_xmalloc (length + 1);`
gcc-4 defaults forced me to edit many many files to get rid of the char * vs. unsigned char * warnings. The GNU coding standards used to say that these mismatches are okay and better than a bunch of casts. Obviously this has changed now. 2005-06-16 08:12:03 +00:00			`for (p = (unsigned char )buffer, s = string; s; s++)`
Finished the bulk of changes for gnupg 1.9. This included switching to libgcrypt functions, using shared error codes from libgpg-error, replacing the old functions we used to have in ../util by those in ../jnlib and ../common, renaming the malloc functions and a couple of types. Note, that not all changes are listed below becuause they are too similar and done at far too many places. As of today the code builds using the current libgcrypt from CVS but it is very unlikely that it actually works. 2003-06-18 19:56:13 +00:00			`{`
			`if (*s & 0x80)`
			`{`
			`p++ = 0xc0 \| ((s >> 6) & 3);`
			`p++ = 0x80 \| (s & 0x3f);`
			`}`
			`else`
			`p++ = s;`
			`}`
			`*p = 0;`
			`}`
			`return buffer;`
			`}`


			`/* Convert string, which is in UTF8 to native encoding. Replace`
			`* illegal encodings by some "\xnn" and quote all control`
			`* characters. A character with value DELIM will always be quoted, it`
			`* must be a vanilla ASCII character. */`
			`char *`
			`utf8_to_native (const char *string, size_t length, int delim)`
			`{`
			`int nleft;`
			`int i;`
gcc-4 defaults forced me to edit many many files to get rid of the char * vs. unsigned char * warnings. The GNU coding standards used to say that these mismatches are okay and better than a bunch of casts. Obviously this has changed now. 2005-06-16 08:12:03 +00:00			`unsigned char encbuf[8];`
Finished the bulk of changes for gnupg 1.9. This included switching to libgcrypt functions, using shared error codes from libgpg-error, replacing the old functions we used to have in ../util by those in ../jnlib and ../common, renaming the malloc functions and a couple of types. Note, that not all changes are listed below becuause they are too similar and done at far too many places. As of today the code builds using the current libgcrypt from CVS but it is very unlikely that it actually works. 2003-06-18 19:56:13 +00:00			`int encidx;`
			`const byte *s;`
			`size_t n;`
gcc-4 defaults forced me to edit many many files to get rid of the char * vs. unsigned char * warnings. The GNU coding standards used to say that these mismatches are okay and better than a bunch of casts. Obviously this has changed now. 2005-06-16 08:12:03 +00:00			`char *buffer = NULL;`
			`char *p = NULL;`
Finished the bulk of changes for gnupg 1.9. This included switching to libgcrypt functions, using shared error codes from libgpg-error, replacing the old functions we used to have in ../util by those in ../jnlib and ../common, renaming the malloc functions and a couple of types. Note, that not all changes are listed below becuause they are too similar and done at far too many places. As of today the code builds using the current libgcrypt from CVS but it is very unlikely that it actually works. 2003-06-18 19:56:13 +00:00			`unsigned long val = 0;`
			`size_t slen;`
			`int resync = 0;`

			`/* 1. pass (p==NULL): count the extended utf-8 characters */`
			`/* 2. pass (p!=NULL): create string */`
			`for (;;)`
			`{`
gcc-4 defaults forced me to edit many many files to get rid of the char * vs. unsigned char * warnings. The GNU coding standards used to say that these mismatches are okay and better than a bunch of casts. Obviously this has changed now. 2005-06-16 08:12:03 +00:00			`for (slen = length, nleft = encidx = 0, n = 0,`
			`s = (const unsigned char *)string; slen;`
Finished the bulk of changes for gnupg 1.9. This included switching to libgcrypt functions, using shared error codes from libgpg-error, replacing the old functions we used to have in ../util by those in ../jnlib and ../common, renaming the malloc functions and a couple of types. Note, that not all changes are listed below becuause they are too similar and done at far too many places. As of today the code builds using the current libgcrypt from CVS but it is very unlikely that it actually works. 2003-06-18 19:56:13 +00:00			`s++, slen--)`
			`{`
			`if (resync)`
			`{`
			`if (!(s < 128 \|\| (s >= 0xc0 && *s <= 0xfd)))`
			`{`
			`/* still invalid */`
			`if (p)`
			`{`
			`sprintf (p, "\\x%02x", *s);`
			`p += 4;`
			`}`
			`n += 4;`
			`continue;`
			`}`
			`resync = 0;`
			`}`
			`if (!nleft)`
			`{`
			`if (!(*s & 0x80))`
			`{ /* plain ascii */`
			`if (s < 0x20 \|\| s == 0x7f \|\| *s == delim \|\|`
			`(delim && *s == '\\'))`
			`{`
			`n++;`
			`if (p)`
			`*p++ = '\\';`
			`switch (*s)`
			`{`
			`case '\n':`
			`n++;`
			`if (p)`
			`*p++ = 'n';`
			`break;`
			`case '\r':`
			`n++;`
			`if (p)`
			`*p++ = 'r';`
			`break;`
			`case '\f':`
			`n++;`
			`if (p)`
			`*p++ = 'f';`
			`break;`
			`case '\v':`
			`n++;`
			`if (p)`
			`*p++ = 'v';`
			`break;`
			`case '\b':`
			`n++;`
			`if (p)`
			`*p++ = 'b';`
			`break;`
			`case 0:`
			`n++;`
			`if (p)`
			`*p++ = '0';`
			`break;`
			`default:`
			`n += 3;`
			`if (p)`
			`{`
			`sprintf (p, "x%02x", *s);`
			`p += 3;`
			`}`
			`break;`
			`}`
			`}`
			`else`
			`{`
			`if (p)`
			`p++ = s;`
			`n++;`
			`}`
			`}`
			`else if ((*s & 0xe0) == 0xc0)`
			`{ /* 110x xxxx */`
			`val = *s & 0x1f;`
			`nleft = 1;`
			`encidx = 0;`
			`encbuf[encidx++] = *s;`
			`}`
			`else if ((*s & 0xf0) == 0xe0)`
			`{ /* 1110 xxxx */`
			`val = *s & 0x0f;`
			`nleft = 2;`
			`encidx = 0;`
			`encbuf[encidx++] = *s;`
			`}`
			`else if ((*s & 0xf8) == 0xf0)`
			`{ /* 1111 0xxx */`
			`val = *s & 0x07;`
			`nleft = 3;`
			`encidx = 0;`
			`encbuf[encidx++] = *s;`
			`}`
			`else if ((*s & 0xfc) == 0xf8)`
			`{ /* 1111 10xx */`
			`val = *s & 0x03;`
			`nleft = 4;`
			`encidx = 0;`
			`encbuf[encidx++] = *s;`
			`}`
			`else if ((*s & 0xfe) == 0xfc)`
			`{ /* 1111 110x */`
			`val = *s & 0x01;`
			`nleft = 5;`
			`encidx = 0;`
			`encbuf[encidx++] = *s;`
			`}`
			`else`
			`{ /* invalid encoding: print as \xnn */`
			`if (p)`
			`{`
			`sprintf (p, "\\x%02x", *s);`
			`p += 4;`
			`}`
			`n += 4;`
			`resync = 1;`
			`}`
			`}`
			`else if (s < 0x80 \|\| s >= 0xc0)`
			`{ /* invalid */`
			`if (p)`
			`{`
			`for (i = 0; i < encidx; i++)`
			`{`
			`sprintf (p, "\\x%02x", encbuf[i]);`
			`p += 4;`
			`}`
			`sprintf (p, "\\x%02x", *s);`
			`p += 4;`
			`}`
			`n += 4 + 4 * encidx;`
			`nleft = 0;`
			`encidx = 0;`
			`resync = 1;`
			`}`
			`else`
			`{`
			`encbuf[encidx++] = *s;`
			`val <<= 6;`
			`val \|= *s & 0x3f;`
			`if (!--nleft)`
			`{ /* ready */`
			`if (no_translation)`
			`{`
			`if (p)`
			`{`
			`for (i = 0; i < encidx; i++)`
			`*p++ = encbuf[i];`
			`}`
			`n += encidx;`
			`encidx = 0;`
			`}`
			`else if (active_charset)`
			`{ /* table lookup */`
			`for (i = 0; i < 128; i++)`
			`{`
			`if (active_charset[i] == val)`
			`break;`
			`}`
			`if (i < 128)`
			`{ /* we can print this one */`
			`if (p)`
			`*p++ = i + 128;`
			`n++;`
			`}`
			`else`
			`{ /* we do not have a translation: print utf8 */`
			`if (p)`
			`{`
			`for (i = 0; i < encidx; i++)`
			`{`
			`sprintf (p, "\\x%02x", encbuf[i]);`
			`p += 4;`
			`}`
			`}`
			`n += encidx * 4;`
			`encidx = 0;`
			`}`
			`}`
			`else`
			`{ /* native set */`
			`if (val >= 0x80 && val < 256)`
			`{`
			`n++; /* we can simply print this character */`
			`if (p)`
			`*p++ = val;`
			`}`
			`else`
			`{ /* we do not have a translation: print utf8 */`
			`if (p)`
			`{`
			`for (i = 0; i < encidx; i++)`
			`{`
			`sprintf (p, "\\x%02x", encbuf[i]);`
			`p += 4;`
			`}`
			`}`
			`n += encidx * 4;`
			`encidx = 0;`
			`}`
			`}`
			`}`

			`}`
			`}`
			`if (!buffer)`
			`{ /* allocate the buffer after the first pass */`
			`buffer = p = jnlib_xmalloc (n + 1);`
			`}`
			`else`
			`{`
			`p = 0; / make a string */`
			`return buffer;`
			`}`
			`}`
			`}`