From 1f5bed18f499eae66b02b8940f13496dba399c8a Mon Sep 17 00:00:00 2001
From: David Shaw <dshaw@jabberwocky.com>
Date: Mon, 26 Aug 2002 19:22:48 +0000
Subject: [PATCH] * gpgkeys_hkp.c (parse_hkp_index, dehtmlize): Move HTML
 functionality into new "dehtmlize" function.  Remove HTML before trying to
 parse each line from the keyserver.  If the keyserver provides key type
 information in the listing, use it.  (Copy over from g10/hkp.c).

---
 keyserver/ChangeLog     |   8 ++
 keyserver/gpgkeys_hkp.c | 169 +++++++++++++++++++++++-----------------
 2 files changed, 104 insertions(+), 73 deletions(-)
diff --git a/keyserver/ChangeLog b/keyserver/ChangeLog
index 5362dc66d..facf9b247 100644
--- a/keyserver/ChangeLog
+++ b/keyserver/ChangeLog
@@ -1,3 +1,11 @@
+2002-08-26  David Shaw  <dshaw@jabberwocky.com>
+
+	* gpgkeys_hkp.c (parse_hkp_index, dehtmlize): Move HTML
+	functionality into new "dehtmlize" function.  Remove HTML before
+	trying to parse each line from the keyserver.  If the keyserver
+	provides key type information in the listing, use it.  (Copy over
+	from g10/hkp.c).
+
 2002-08-19  David Shaw  <dshaw@jabberwocky.com>
 
 	* gpgkeys_hkp.c (get_key, parse_hkp_index): Bring over latest code
diff --git a/keyserver/gpgkeys_hkp.c b/keyserver/gpgkeys_hkp.c
index 455dd0050..2cb27be48 100644
--- a/keyserver/gpgkeys_hkp.c
+++ b/keyserver/gpgkeys_hkp.c
@@ -391,6 +391,77 @@ unsigned int scan_isodatestr( const char *string )
   return stamp;
 }
 
+/* Remove anything <between brackets> and de-urlencode in place.  Note
+   that this requires all brackets to be closed on the same line.  It
+   also means that the result is never larger than the input. */
+static void
+dehtmlize(char *line)
+{
+  int parsedindex=0;
+  char *parsed=line;
+
+  while(*line!='\0')
+    {
+      switch(*line)
+	{
+	case '<':
+	  while(*line!='>' && *line!='\0')
+	    line++;
+
+	  if(*line!='\0')
+	    line++;
+	  break;
+
+	case '&':
+	  if((*(line+1)!='\0' && tolower(*(line+1))=='l') &&
+	     (*(line+2)!='\0' && tolower(*(line+2))=='t') &&
+	     (*(line+3)!='\0' && *(line+3)==';'))
+	    {
+	      parsed[parsedindex++]='<';
+	      line+=4;
+	      break;
+	    }
+	  else if((*(line+1)!='\0' && tolower(*(line+1))=='g') &&
+		  (*(line+2)!='\0' && tolower(*(line+2))=='t') &&
+		  (*(line+3)!='\0' && *(line+3)==';'))
+	    {
+	      parsed[parsedindex++]='>';
+	      line+=4;
+	      break;
+	    }
+	  else if((*(line+1)!='\0' && tolower(*(line+1))=='a') &&
+		  (*(line+2)!='\0' && tolower(*(line+2))=='m') &&
+		  (*(line+3)!='\0' && tolower(*(line+3))=='p') &&
+		  (*(line+4)!='\0' && *(line+4)==';'))
+	    {
+	      parsed[parsedindex++]='&';
+	      line+=5;
+	      break;
+	    }
+
+	default:
+	  parsed[parsedindex++]=*line;
+	  line++;
+	  break;
+	}
+    }
+
+  parsed[parsedindex]='\0';
+
+  /* Chop off any trailing whitespace.  Note that the HKP servers have
+     \r\n as line endings, and the NAI HKP servers have just \n. */
+
+  if(parsedindex>0)
+    {
+      parsedindex--;
+      while(isspace(parsed[parsedindex]))
+	{
+	  parsed[parsedindex]='\0';
+	  parsedindex--;
+	}
+    }
+}
+
 /* pub  2048/<a href="/pks/lookup?op=get&search=0x3CB3B415">3CB3B415</a> 1998/04/03 David M. Shaw &lt;<a href="/pks/lookup?op=get&search=0x3CB3B415">dshaw@jabberwocky.com</a>&gt; */
 
 /* Luckily enough, both the HKP server and NAI HKP interface to their
@@ -400,17 +471,21 @@ unsigned int scan_isodatestr( const char *string )
 int parse_hkp_index(char *line,char **buffer)
 {
   static int open=0,revoked=0;
-  static char *key=NULL,*uid=NULL;
+  static char *key=NULL,*uid=NULL,*type=NULL;
   static unsigned int bits,createtime;
   int ret=0;
 
-  /*  printf("Open %d, LINE: %s, uid: %s\n",open,line,uid); */
+  /* printf("Open %d, LINE: %s, uid: %s\n",open,line,uid); */
+
+  dehtmlize(line);
+
+  /* printf("Now open %d, LINE: \"%s\", uid: %s\n",open,line,uid); */
 
   /* Try and catch some bastardization of HKP.  If we don't have
      certain unchanging landmarks, we can't reliably parse the
-     response. */
-
-  if(open && strncasecmp(line,"</pre>",6)!=0 &&
+     response.  This only complains about problems within the key
+     section itself.  Headers and footers should not matter. */
+  if(open && line[0]!='\0' &&
      strncasecmp(line,"pub ",4)!=0 &&
      strncasecmp(line,"    ",4)!=0)
     {
@@ -420,8 +495,6 @@ int parse_hkp_index(char *line,char **buffer)
       return -1;
     }
 
-  /* printf("Open %d, LINE: %s\n",open,line); */
-
   /* For multiple UIDs */
   if(open && uid!=NULL)
     {
@@ -456,7 +529,10 @@ int parse_hkp_index(char *line,char **buffer)
 	  append_quoted(*buffer,revoked?"1:":":",0);
 	  sprintf(intstr,"%u",createtime);
 	  append_quoted(*buffer,intstr,':');
-	  append_quoted(*buffer,"::::",0);
+	  append_quoted(*buffer,":::",0);
+	  if(type)
+	    append_quoted(*buffer,type,':');
+	  append_quoted(*buffer,":",0);
 	  sprintf(intstr,"%u",bits);
 	  append_quoted(*buffer,intstr,':');
 	  append_quoted(*buffer,"\n",0);
@@ -486,13 +562,16 @@ int parse_hkp_index(char *line,char **buffer)
       if(tok==NULL)
 	return ret;
 
+      if(tok[strlen(tok)-1]=='R')
+ 	type="RSA";
+      else if(tok[strlen(tok)-1]=='D')
+ 	type="DSA";
+      else
+ 	type=NULL;
+
       bits=atoi(tok);
 
-      tok=strsep(&line,">");
-      if(tok==NULL)
-	return ret;
-
-      tok=strsep(&line,"<");
+      tok=strsep(&line," ");
       if(tok==NULL)
 	{
 	  key=strdup("00000000");
@@ -505,10 +584,6 @@ int parse_hkp_index(char *line,char **buffer)
       if(tok==NULL)
 	return ret;
 
-      tok=strsep(&line," ");
-      if(tok==NULL)
-	return ret;
-  
       /* The date parser wants '-' instead of '/', so... */
       temp=tok;
       while(*temp!='\0')
@@ -524,77 +599,25 @@ int parse_hkp_index(char *line,char **buffer)
 
   if(open)
     {
-      int uidindex=0;
-
       if(line==NULL)
 	{
 	  uid=strdup("Key index corrupted");
 	  return ret;
 	}
 
-      /* All that's left is the user name.  Strip off anything
-	 <between brackets> and de-urlencode it. */
-
       while(*line==' ' && *line!='\0')
 	line++;
 
+      if(*line=='\0')
+	return ret;
+
       if(strncmp(line,"*** KEY REVOKED ***",19)==0)
 	{
 	  revoked=1;
 	  return ret;
 	}
 
-      uid=malloc(strlen(line)+1);
-
-      while(*line!='\0')
-	{
-	  switch(*line)
-	    {
-	    case '<':
-	      while(*line!='>' && *line!='\0')
-		line++;
-
-	      if(*line!='\0')
-		line++;
-	      break;
-
-	    case '&':
-	      if((*(line+1)!='\0' && tolower(*(line+1))=='l') &&
-		 (*(line+2)!='\0' && tolower(*(line+2))=='t') &&
-		 (*(line+3)!='\0' && *(line+3)==';'))
-		{
-		  uid[uidindex++]='<';
-		  line+=4;
-		  break;
-		}
-
-	      if((*(line+1)!='\0' && tolower(*(line+1))=='g') &&
-		 (*(line+2)!='\0' && tolower(*(line+2))=='t') &&
-		 (*(line+3)!='\0' && *(line+3)==';'))
-		{
-		  uid[uidindex++]='>';
-		  line+=4;
-		  break;
-		}
-
-	    default:
-	      uid[uidindex++]=*line;
-	      line++;
-	      break;
-	    }
-	}
-
-      uid[uidindex]='\0';
-
-      /* Chop off the trailing \r, \n, or both. This is fussy as the
-         true HKP servers have \r\n, and the NAI HKP servers have just
-         \n. */
-
-      if(isspace(uid[uidindex-1]))
-	uid[uidindex-1]='\0';
-
-      if(isspace(uid[uidindex-2]))
-	uid[uidindex-2]='\0';
+      uid=strdup(line);
     }
 
   return ret;