mirror of
git://git.gnupg.org/gnupg.git
synced 2025-01-05 12:31:50 +01:00
Try and detect mis-coded Latin1 and convert it to UTF8. Whether the
heuristics succeed or not, the resulting string must be valid UTF8 as LDAP requires that. This is bug 1055.
This commit is contained in:
parent
b8805ca724
commit
00310b1aa8
@ -1,3 +1,10 @@
|
|||||||
|
2009-08-11 David Shaw <dshaw@jabberwocky.com>
|
||||||
|
|
||||||
|
* keyserver.c (keyserver_spawn): Try and detect mis-coded Latin1
|
||||||
|
and convert it to UTF8. Whether the heuristics succeed or not,
|
||||||
|
the resulting string must be valid UTF8 as LDAP requires that.
|
||||||
|
This is bug 1055.
|
||||||
|
|
||||||
2009-08-03 Werner Koch <wk@g10code.com>
|
2009-08-03 Werner Koch <wk@g10code.com>
|
||||||
|
|
||||||
* card-util.c (generate_card_keys): Ask for off-card keys only if
|
* card-util.c (generate_card_keys): Ask for off-card keys only if
|
||||||
|
@ -1270,24 +1270,49 @@ keyserver_spawn(enum ks_action action,STRLIST list,KEYDB_SEARCH_DESC *desc,
|
|||||||
{
|
{
|
||||||
PKT_user_id *uid=node->pkt->pkt.user_id;
|
PKT_user_id *uid=node->pkt->pkt.user_id;
|
||||||
int r;
|
int r;
|
||||||
|
char *uidstr1,*uidstr2,*uidstr3;
|
||||||
|
size_t uidstrlen;
|
||||||
|
|
||||||
if(uid->attrib_data)
|
if(uid->attrib_data)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
fprintf(spawn->tochild,"uid:");
|
fprintf(spawn->tochild,"uid:");
|
||||||
|
|
||||||
/* Quote ':', '%', and any 8-bit
|
/* Make sure it's real UTF8. What happens
|
||||||
characters */
|
here is that first we heuristically try
|
||||||
for(r=0;r<uid->len;r++)
|
and convert the string (which may be
|
||||||
|
mis-coded) into UTF8. We then bring it
|
||||||
|
to native and then back to UTF8. For
|
||||||
|
true UTF8, this whole process should be
|
||||||
|
lossless. For the common Latin-1
|
||||||
|
mis-encoding, it will become UTF8. For
|
||||||
|
other encodings, it will become UTF8 but
|
||||||
|
with unknown characters quoted. This
|
||||||
|
preserves the notion that anything in the
|
||||||
|
stream to the keyserver handler program
|
||||||
|
is UTF8. */
|
||||||
|
uidstr1=string_to_utf8(uid->name);
|
||||||
|
uidstr2=utf8_to_native(uidstr1,strlen(uidstr1),-1);
|
||||||
|
uidstr3=native_to_utf8(uidstr2);
|
||||||
|
|
||||||
|
uidstrlen=strlen(uidstr3);
|
||||||
|
|
||||||
|
/* Quote ':', '%', and anything not
|
||||||
|
printable ASCII */
|
||||||
|
for(r=0;r<uidstrlen;r++)
|
||||||
{
|
{
|
||||||
if(uid->name[r]==':' || uid->name[r]=='%'
|
if(uidstr3[r]==':' || uidstr3[r]=='%'
|
||||||
|| uid->name[r]&0x80)
|
|| uidstr3[r]<' ' || uidstr3[r]>'~')
|
||||||
fprintf(spawn->tochild,"%%%02X",
|
fprintf(spawn->tochild,"%%%02X",
|
||||||
(byte)uid->name[r]);
|
(byte)uidstr3[r]);
|
||||||
else
|
else
|
||||||
fprintf(spawn->tochild,"%c",uid->name[r]);
|
fprintf(spawn->tochild,"%c",uidstr3[r]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xfree(uidstr1);
|
||||||
|
xfree(uidstr2);
|
||||||
|
xfree(uidstr3);
|
||||||
|
|
||||||
fprintf(spawn->tochild,":%u:%u:",
|
fprintf(spawn->tochild,":%u:%u:",
|
||||||
uid->created,uid->expiredate);
|
uid->created,uid->expiredate);
|
||||||
|
|
||||||
|
@ -1,3 +1,7 @@
|
|||||||
|
2009-08-11 David Shaw <dshaw@jabberwocky.com>
|
||||||
|
|
||||||
|
* util.h: Add string_to_utf8() from GPA.
|
||||||
|
|
||||||
2009-07-21 Werner Koch <wk@g10code.com>
|
2009-07-21 Werner Koch <wk@g10code.com>
|
||||||
|
|
||||||
* estream-printf.h: New. Taken from libestream.x
|
* estream-printf.h: New. Taken from libestream.x
|
||||||
|
@ -190,7 +190,7 @@ int set_native_charset( const char *newset );
|
|||||||
const char* get_native_charset(void);
|
const char* get_native_charset(void);
|
||||||
char *native_to_utf8( const char *string );
|
char *native_to_utf8( const char *string );
|
||||||
char *utf8_to_native( const char *string, size_t length, int delim);
|
char *utf8_to_native( const char *string, size_t length, int delim);
|
||||||
int check_utf8_string( const char *string );
|
char *string_to_utf8 (const char *string);
|
||||||
|
|
||||||
int ascii_isupper (int c);
|
int ascii_isupper (int c);
|
||||||
int ascii_islower (int c);
|
int ascii_islower (int c);
|
||||||
|
@ -1,3 +1,8 @@
|
|||||||
|
2009-08-11 David Shaw <dshaw@jabberwocky.com>
|
||||||
|
|
||||||
|
* strgutil.c (string_to_utf8): New function to convert a Latin-1
|
||||||
|
string to UTF8. From GPA.
|
||||||
|
|
||||||
2009-07-23 David Shaw <dshaw@jabberwocky.com>
|
2009-07-23 David Shaw <dshaw@jabberwocky.com>
|
||||||
|
|
||||||
* srv.c (getsrv): Fix type-punning warning.
|
* srv.c (getsrv): Fix type-punning warning.
|
||||||
|
@ -1048,6 +1048,77 @@ utf8_to_native( const char *string, size_t length, int delim )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* This is similar to native_to_utf8, except it can take any input
|
||||||
|
(which may or may not be UTF8 encoded) and return something that is
|
||||||
|
(almost) definitely UTF8. This code is mostly borrowed from
|
||||||
|
GPA. */
|
||||||
|
|
||||||
|
char *
|
||||||
|
string_to_utf8 (const char *string)
|
||||||
|
{
|
||||||
|
const char *s;
|
||||||
|
|
||||||
|
if (!string)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* Due to a bug in old and not so old PGP versions user IDs have
|
||||||
|
been copied verbatim into the key. Thus many users with Umlauts
|
||||||
|
et al. in their name will see their names garbled. Although this
|
||||||
|
is not an issue for me (;-)), I have a couple of friends with
|
||||||
|
Umlauts in their name, so let's try to make their life easier by
|
||||||
|
detecting invalid encodings and convert that to Latin-1. We use
|
||||||
|
this even for X.509 because it may make things even better given
|
||||||
|
all the invalid encodings often found in X.509 certificates. */
|
||||||
|
for (s = string; *s && !(*s & 0x80); s++)
|
||||||
|
;
|
||||||
|
if (*s && ((s[1] & 0xc0) == 0x80) && ( ((*s & 0xe0) == 0xc0)
|
||||||
|
|| ((*s & 0xf0) == 0xe0)
|
||||||
|
|| ((*s & 0xf8) == 0xf0)
|
||||||
|
|| ((*s & 0xfc) == 0xf8)
|
||||||
|
|| ((*s & 0xfe) == 0xfc)) )
|
||||||
|
{
|
||||||
|
/* Possible utf-8 character followed by continuation byte.
|
||||||
|
Although this might still be Latin-1 we better assume that it
|
||||||
|
is valid utf-8. */
|
||||||
|
return xstrdup (string);
|
||||||
|
}
|
||||||
|
else if (*s && !strchr (string, 0xc3))
|
||||||
|
{
|
||||||
|
size_t length=0;
|
||||||
|
char *buffer,*p;
|
||||||
|
|
||||||
|
/* No 0xC3 character in the string; assume that it is Latin-1. */
|
||||||
|
|
||||||
|
for(s=string; *s; s++ )
|
||||||
|
{
|
||||||
|
length++;
|
||||||
|
if( *s & 0x80 )
|
||||||
|
length++;
|
||||||
|
}
|
||||||
|
buffer = xmalloc( length + 1 );
|
||||||
|
for(p=buffer, s=string; *s; s++ )
|
||||||
|
{
|
||||||
|
if( *s & 0x80 )
|
||||||
|
{
|
||||||
|
*p++ = 0xc0 | ((*s >> 6) & 3);
|
||||||
|
*p++ = 0x80 | ( *s & 0x3f );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
*p++ = *s;
|
||||||
|
}
|
||||||
|
*p = 0;
|
||||||
|
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Everything else is assumed to be UTF-8. We do this even that
|
||||||
|
we know the encoding is not valid. However as we only test
|
||||||
|
the first non-ascii character, valid encodings might
|
||||||
|
follow. */
|
||||||
|
return xstrdup (string);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Same as asprintf but return an allocated buffer suitable to be
|
/* Same as asprintf but return an allocated buffer suitable to be
|
||||||
freed using xfree. This function simply dies on memory failure,
|
freed using xfree. This function simply dies on memory failure,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user