From 9dc1bcc4ea118b71be30f34d84bd04392a6031cf Mon Sep 17 00:00:00 2001 From: Werner Koch Date: Thu, 6 Jan 2005 11:51:49 +0000 Subject: [PATCH] (set_native_charset): Assume that ASCII, ANSI_X3.4-1968 and 646 are actually meant as Latin-1. If nl_langinfo is not available get the charset from environment variables. For W32 use GetACP as error fallback. Removed Latin-15 to Latin-1 aliasing. --- util/ChangeLog | 8 +++++++ util/strgutil.c | 55 ++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/util/ChangeLog b/util/ChangeLog index 84ed9f21d..012232ed7 100644 --- a/util/ChangeLog +++ b/util/ChangeLog @@ -1,3 +1,11 @@ +2005-01-06 Werner Koch + + * strgutil.c (set_native_charset): Assume that ASCII, + ANSI_X3.4-1968 and 646 are actually meant as Latin-1. If + nl_langinfo is not available get the charset from environment + variables. For W32 use GetACP as error fallback. Removed Latin-15 + to Latin-1 aliasing. + 2004-12-28 David Shaw * srv.h: Better implementation for the SRV check. We don't need diff --git a/util/strgutil.c b/util/strgutil.c index 2167cb9fd..6205910e2 100644 --- a/util/strgutil.c +++ b/util/strgutil.c @@ -144,6 +144,8 @@ load_libiconv (void) { log_info (_("error loading `%s': %s\n"), "iconv.dll", dlerror ()); + log_info(_("please see http://www.gnupg.org/download/iconv.html " + "for more information\n")); iconv_open = NULL; iconv = NULL; iconv_close = NULL; @@ -479,14 +481,19 @@ set_native_charset( const char *newset ) if (!newset) { #ifdef _WIN32 static char codepage[30]; + unsigned int cpno; /* We are a console program thus we need to use the - GetConsoleOutputCP fucntion and not the the GetACP which + GetConsoleOutputCP function and not the the GetACP which would give the codepage for a GUI program. Note this is not a bulletproof detection because GetConsoleCP might - retrun a different one for console input. Not sure how to - cope with that. */ - sprintf (codepage, "CP%u", (unsigned int)GetConsoleOutputCP ()); + return a different one for console input. Not sure how to + cope with that. If the console Code page is not known we + fall back to the system code page. */ + cpno = GetConsoleOutputCP (); + if (!cpno) + cpno = GetACP (); + sprintf (codepage, "CP%u", cpno ); /* If it is the Windows name for Latin-1 we use the standard name instead to avoid loading of iconv.dll. Unfortunately it is often CP850 and we don't have a custom translation @@ -498,9 +505,32 @@ set_native_charset( const char *newset ) #else #ifdef HAVE_LANGINFO_CODESET newset = nl_langinfo (CODESET); -#else - newset = "iso-8859-1"; -#endif +#else /* !HAVE_LANGINFO_CODESET */ + /* Try to get the used charset from environment variables. */ + static char codepage[30]; + const char *lc, *dot, *mod; + + strcpy (codepage, "iso-8859-1"); + lc = getenv ("LC_ALL"); + if (!lc || !*lc) { + lc = getenv ("LC_CTYPE"); + if (!lc || !*lc) + lc = getenv ("LANG"); + } + if (lc && *lc) { + dot = strchr (lc, '.'); + if (dot) { + mod = strchr (++dot, '@'); + if (!mod) + mod = dot + strlen (dot); + if (mod - dot < sizeof codepage && dot != mod) { + memcpy (codepage, dot, mod - dot); + codepage [mod - dot] = 0; + } + } + } + newset = codepage; +#endif /* !HAVE_LANGINFO_CODESET */ #endif } @@ -511,9 +541,18 @@ set_native_charset( const char *newset ) newset++; } + /* Note that we silently assume that plain ASCII is actually meant + as Latin-1. This makes sense because many Unix system don't + have their locale set up properly and thus would get annoying + error messages and we have to handle all the "bug" + reports. Latin-1 has always been the character set used for 8 + bit characters on Unix systems. */ if( !*newset || !ascii_strcasecmp (newset, "8859-1" ) - || !ascii_strcasecmp (newset, "8859-15" ) ) { + || !ascii_strcasecmp (newset, "646" ) + || !ascii_strcasecmp (newset, "ASCII" ) + || !ascii_strcasecmp (newset, "ANSI_X3.4-1968" ) + ) { active_charset_name = "iso-8859-1"; no_translation = 0; active_charset = NULL;