1
0
mirror of git://git.gnupg.org/gnupg.git synced 2024-11-10 21:38:50 +01:00

(mem2str): Translated comment to English.

(handle_iconv_error) [USE_GNUPG_ICONV]: New.
(set_native_charset) [USE_GNUPG_ICONV]: Added iconv based
conversion checks.
(native_to_utf8, utf8_to_native): Added iconv based conversion.
This commit is contained in:
Werner Koch 2004-10-26 12:43:49 +00:00
parent e939eb248d
commit a3d48b3bb2
2 changed files with 289 additions and 76 deletions

View File

@ -1,3 +1,11 @@
2004-10-26 Werner Koch <wk@g10code.com>
* strgutil.c (mem2str): Translated comment to English.
(handle_iconv_error) [USE_GNUPG_ICONV]: New.
(set_native_charset) [USE_GNUPG_ICONV]: Added iconv based
conversion checks.
(native_to_utf8, utf8_to_native): Added iconv based conversion.
2004-10-21 Werner Koch <wk@g10code.com>
* vasprintf.c: Removed. It was used only at one place and I don't

View File

@ -23,15 +23,26 @@
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#ifdef HAVE_LANGINFO_CODESET
#include <langinfo.h>
#endif
#ifndef HAVE_ICONV
#undef USE_GNUPG_ICONV
#endif
#ifdef USE_GNUPG_ICONV
#include <limits.h>
#include <iconv.h>
#endif
#include "types.h"
#include "util.h"
#include "memory.h"
#include "i18n.h"
#ifndef USE_GNUPG_ICONV
static ushort koi8_unicode[128] = {
0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524,
0x252c,0x2534,0x253c,0x2580,0x2584,0x2588,0x258c,0x2590,
@ -69,11 +80,18 @@ static ushort latin2_unicode[128] = {
0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,
0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
};
#endif /*!USE_GNUPG_ICONV*/
#ifndef MB_LEN_MAX
#define MB_LEN_MAX 16
#endif
static const char *active_charset_name = "iso-8859-1";
static ushort *active_charset = NULL;
static int no_translation = 0;
static int use_iconv = 0;
void
free_strlist( STRLIST sl )
@ -101,7 +119,7 @@ add_to_strlist( STRLIST *list, const char *string )
}
/****************
* ame as add_to_strlist() but if is_utf8 is *not* set a conversion
* Same as add_to_strlist() but if is_utf8 is *not* set a conversion
* to UTF8 is done
*/
STRLIST
@ -192,7 +210,7 @@ pop_strlist( STRLIST *list )
}
/****************
* look for the substring SUB in buffer and return a pointer to that
* Look for the substring SUB in buffer and return a pointer to that
* substring in BUF or NULL if not found.
* Comparison is case-insensitive.
*/
@ -234,12 +252,11 @@ ascii_memistr( const char *buf, size_t buflen, const char *sub )
return NULL ;
}
/****************
* Wie strncpy(), aber es werden maximal n-1 zeichen kopiert und ein
* '\0' angehängt. Ist n = 0, so geschieht nichts, ist Destination
* gleich NULL, so wird via m_alloc Speicher besorgt, ist dann nicht
* genügend Speicher vorhanden, so bricht die funktion ab.
*/
/* Like strncpy() but copy at max N-1 bytes and append a '\0'. With
* N given as 0 nothing is copied at all. With DEST given as NULL
* sufficient memory is allocated using m_alloc (note that m_alloc is
* guaranteed to succeed or to abort the process). */
char *
mem2str( char *dest , const void *src , size_t n )
{
@ -260,8 +277,8 @@ mem2str( char *dest , const void *src , size_t n )
}
/****************
* remove leading and trailing white spaces
/*
* Remove leading and trailing white spaces
*/
char *
trim_spaces( char *str )
@ -269,10 +286,10 @@ trim_spaces( char *str )
char *string, *p, *mark;
string = str;
/* find first non space character */
/* Find first non space character. */
for( p=string; *p && isspace( *(byte*)p ) ; p++ )
;
/* move characters */
/* Move characters. */
for( (mark = NULL); (*string = *p); string++, p++ )
if( isspace( *(byte*)p ) ) {
if( !mark )
@ -281,7 +298,7 @@ trim_spaces( char *str )
else
mark = NULL ;
if( mark )
*mark = '\0' ; /* remove trailing spaces */
*mark = '\0' ; /* Remove trailing spaces. */
return str ;
}
@ -311,7 +328,7 @@ trim_trailing_chars( byte *line, unsigned len, const char *trimchars )
}
/****************
* remove trailing white spaces and return the length of the buffer
* Remove trailing white spaces and return the length of the buffer
*/
unsigned
trim_trailing_ws( byte *line, unsigned len )
@ -319,6 +336,7 @@ trim_trailing_ws( byte *line, unsigned len )
return trim_trailing_chars( line, len, " \t\r\n" );
}
unsigned int
check_trailing_chars( const byte *line, unsigned int len,
const char *trimchars )
@ -341,8 +359,9 @@ check_trailing_chars( const byte *line, unsigned int len,
return len;
}
/****************
* remove trailing white spaces and return the length of the buffer
* Remove trailing white spaces and return the length of the buffer
*/
unsigned int
check_trailing_ws( const byte *line, unsigned int len )
@ -362,17 +381,46 @@ string_count_chr( const char *string, int c )
return count;
}
#ifdef USE_GNUPG_ICONV
static void
handle_iconv_error (const char *from, const char *to, int use_fallback)
{
if (errno == EINVAL)
log_error (_("conversion from `%s' to `%s' not available\n"),
from, "utf-8");
else
log_error (_("iconv_open failed: %s\n"), strerror (errno));
if (use_fallback)
{
/* To avoid further error messages we fallback to Latin-1 for the
native encoding. This is justified as one can expect that on a
utf-8 enabled system nl_langinfo() will work and thus we won't
never get to here. Thus Latin-1 seems to be a reasonable
default. */
active_charset_name = "iso-8859-1";
no_translation = 0;
active_charset = NULL;
use_iconv = 0;
}
}
#endif /*USE_GNUPG_ICONV*/
int
set_native_charset( const char *newset )
{
const char *full_newset;
if (!newset)
{
#ifdef HAVE_LANGINFO_CODESET
newset = nl_langinfo (CODESET);
#else
newset = "8859-1";
newset = "iso-8859-1";
#endif
}
full_newset = newset;
if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3)) {
newset += 3;
if (*newset == '-' || *newset == '_')
@ -385,25 +433,54 @@ set_native_charset( const char *newset )
active_charset_name = "iso-8859-1";
no_translation = 0;
active_charset = NULL;
}
else if( !ascii_strcasecmp( newset, "8859-2" ) ) {
active_charset_name = "iso-8859-2";
no_translation = 0;
active_charset = latin2_unicode;
}
else if( !ascii_strcasecmp( newset, "koi8-r" ) ) {
active_charset_name = "koi8-r";
no_translation = 0;
active_charset = koi8_unicode;
use_iconv = 0;
}
else if( !ascii_strcasecmp (newset, "utf8" )
|| !ascii_strcasecmp(newset, "utf-8") ) {
active_charset_name = "utf-8";
no_translation = 1;
active_charset = NULL;
use_iconv = 0;
}
#ifdef USE_GNUPG_ICONV
else {
iconv_t cd;
cd = iconv_open (full_newset, "utf-8");
if (cd == (iconv_t)-1)
{
handle_iconv_error (full_newset, "utf-8", 0);
return G10ERR_GENERAL;
}
iconv_close (cd);
cd = iconv_open ("utf-8", full_newset);
if (cd == (iconv_t)-1)
{
handle_iconv_error ("utf-8", full_newset, 0);
return G10ERR_GENERAL;
}
iconv_close (cd);
active_charset_name = full_newset;
no_translation = 0;
active_charset = NULL;
use_iconv = 1;
}
#else /*!USE_GNUPG_ICONV*/
else if( !ascii_strcasecmp( newset, "8859-2" ) ) {
active_charset_name = "iso-8859-2";
no_translation = 0;
active_charset = latin2_unicode;
use_iconv = 0;
}
else if( !ascii_strcasecmp( newset, "koi8-r" ) ) {
active_charset_name = "koi8-r";
no_translation = 0;
active_charset = koi8_unicode;
use_iconv = 0;
}
else
return G10ERR_GENERAL;
#endif /*!USE_GNUPG_ICONV*/
return 0;
}
@ -425,14 +502,82 @@ native_to_utf8( const char *string )
byte *p;
size_t length=0;
if (no_translation) {
if (no_translation)
{ /* Already utf-8 encoded. */
buffer = m_strdup (string);
}
else if( active_charset ) {
for(s=string; *s; s++ ) {
else if( !active_charset && !use_iconv) /* Shortcut implementation
for Latin-1. */
{
for(s=string; *s; s++ )
{
length++;
if( *s & 0x80 )
length += 2; /* we may need 3 bytes */
length++;
}
buffer = m_alloc( length + 1 );
for(p=buffer, s=string; *s; s++ )
{
if( *s & 0x80 )
{
*p++ = 0xc0 | ((*s >> 6) & 3);
*p++ = 0x80 | ( *s & 0x3f );
}
else
*p++ = *s;
}
*p = 0;
}
else /* Need to use a translation table. */
{
#ifdef USE_GNUPG_ICONV
iconv_t cd;
const char *inptr;
char *outptr;
size_t inbytes, outbytes;
cd = iconv_open (active_charset_name, "utf-8");
if (cd == (iconv_t)-1)
{
handle_iconv_error (active_charset_name, "utf-8", 1);
return native_to_utf8 (string);
}
for (s=string; *s; s++ )
{
length++;
if ((*s & 0x80))
length += 5; /* We may need up to 6 bytes for the utf8 output. */
}
buffer = m_alloc (length + 1);
inptr = string;
inbytes = strlen (string);
outptr = buffer;
outbytes = length;
if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
&outptr, &outbytes) == (size_t)-1)
{
log_error (_("conversion from `%s' to `%s' failed: %s\n"),
active_charset_name, "utf-8", strerror (errno));
/* We don't do any conversion at all but use the strings as is. */
strcpy (buffer, string);
}
else /* Success. */
{
*outptr = 0;
/* We could realloc the buffer now but I doubt that it makes
much sense given that it will get freed anyway soon
after. */
}
iconv_close (cd);
#else /*!USE_GNUPG_ICONV*/
for(s=string; *s; s++ )
{
length++;
if( *s & 0x80 )
length += 2; /* We may need up to 3 bytes. */
}
buffer = m_alloc( length + 1 );
for(p=buffer, s=string; *s; s++ ) {
@ -452,23 +597,8 @@ native_to_utf8( const char *string )
*p++ = *s;
}
*p = 0;
}
else {
for(s=string; *s; s++ ) {
length++;
if( *s & 0x80 )
length++;
}
buffer = m_alloc( length + 1 );
for(p=buffer, s=string; *s; s++ ) {
if( *s & 0x80 ) {
*p++ = 0xc0 | ((*s >> 6) & 3);
*p++ = 0x80 | ( *s & 0x3f );
}
else
*p++ = *s;
}
*p = 0;
#endif /*!USE_GNUPG_ICONV*/
}
return buffer;
}
@ -604,6 +734,30 @@ utf8_to_native( const char *string, size_t length, int delim )
n += encidx;
encidx = 0;
}
#ifdef USE_GNUPG_ICONV
else if(use_iconv) {
/* Our strategy for using iconv is a bit
* strange but it better keeps compatibility
* with previous versions in regard to how
* invalid encodings are displayed. What we
* do is to keep the utf-8 as is and have the
* real translation step then at the end.
* Yes, I know that this is ugly. However we
* are short of the 1.4 release and for this
* branch we should not mee too much around
* with iconv things. One reason for this is
* that we don't know enough about non-GNU
* iconv implementation and want to minimize
* the risk of breaking the code on too many
* platforms. */
if( p ) {
for(i=0; i < encidx; i++ )
*p++ = encbuf[i];
}
n += encidx;
encidx = 0;
}
#endif /*USE_GNUPG_ICONV*/
else if( active_charset ) { /* table lookup */
for(i=0; i < 128; i++ ) {
if( active_charset[i] == val )
@ -647,6 +801,57 @@ utf8_to_native( const char *string, size_t length, int delim )
if( !buffer ) { /* allocate the buffer after the first pass */
buffer = p = m_alloc( n + 1 );
}
#ifdef USE_GNUPG_ICONV
else if(use_iconv) {
/* Note: See above for comments. */
iconv_t cd;
const char *inptr;
char *outbuf, *outptr;
size_t inbytes, outbytes;
*p = 0; /* Terminate the buffer. */
cd = iconv_open ("utf-8", active_charset_name);
if (cd == (iconv_t)-1)
{
handle_iconv_error ("utf-8", active_charset_name, 1);
m_free (buffer);
return utf8_to_native (string, length, delim);
}
/* Allocate a new buffer large enough to hold all possible
* encodings. */
n = p - buffer + 1;
inbytes = n - 1;;
inptr = buffer;
outbytes = n * MB_LEN_MAX;
if (outbytes / MB_LEN_MAX != n)
BUG (); /* Actually an overflow. */
outbuf = outptr = m_alloc (outbytes);
if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
&outptr, &outbytes) == (size_t)-1) {
log_error (_("conversion from `%s' to `%s' failed: %s\n"),
"utf-8", active_charset_name, strerror (errno));
/* Didn't worked out. Temporary disable the use of
* iconv and fall back to our old code. */
m_free (buffer);
buffer = NULL;
m_free (outbuf);
use_iconv = 0;
outbuf = utf8_to_native (string, length, delim);
use_iconv = 1;
}
else { /* Success. */
*outptr = 0;
/* We could realloc the buffer now but I doubt that it makes
much sense given that it will get freed anyway soon
after. */
m_free (buffer);
}
iconv_close (cd);
return outbuf;
}
#endif /*USE_GNUPG_ICONV*/
else {
*p = 0; /* make a string */
return buffer;