* gpgkeys_hkp.c (parse_hkp_index, dehtmlize): Move HTML functionality into

new "dehtmlize" function. Remove HTML before trying to parse each line from the keyserver. If the keyserver provides key type information in the listing, use it. (Copy over from g10/hkp.c).
2025-07-14 21:47:19 +02:00 · 2002-08-26 19:22:48 +00:00 · 2002-08-26 19:22:48 +00:00 · 1f5bed18f4
commit 1f5bed18f4
parent da488f03a7
2 changed files with 104 additions and 73 deletions
--- a/keyserver/ChangeLog
+++ b/keyserver/ChangeLog
@ -1,3 +1,11 @@
+2002-08-26  David Shaw  <dshaw@jabberwocky.com>
+
+	* gpgkeys_hkp.c (parse_hkp_index, dehtmlize): Move HTML
+	functionality into new "dehtmlize" function.  Remove HTML before
+	trying to parse each line from the keyserver.  If the keyserver
+	provides key type information in the listing, use it.  (Copy over
+	from g10/hkp.c).
+
 2002-08-19  David Shaw  <dshaw@jabberwocky.com>

 	* gpgkeys_hkp.c (get_key, parse_hkp_index): Bring over latest code
--- a/keyserver/gpgkeys_hkp.c
+++ b/keyserver/gpgkeys_hkp.c
@ -391,6 +391,77 @@ unsigned int scan_isodatestr( const char *string )
  return stamp;
 }

+/* Remove anything <between brackets> and de-urlencode in place.  Note
+   that this requires all brackets to be closed on the same line.  It
+   also means that the result is never larger than the input. */
+static void
+dehtmlize(char *line)
+{
+  int parsedindex=0;
+  char *parsed=line;
+
+  while(*line!='\0')
+    {
+      switch(*line)
+	{
+	case '<':
+	  while(*line!='>' && *line!='\0')
+	    line++;
+
+	  if(*line!='\0')
+	    line++;
+	  break;
+
+	case '&':
+	  if((*(line+1)!='\0' && tolower(*(line+1))=='l') &&
+	     (*(line+2)!='\0' && tolower(*(line+2))=='t') &&
+	     (*(line+3)!='\0' && *(line+3)==';'))
+	    {
+	      parsed[parsedindex++]='<';
+	      line+=4;
+	      break;
+	    }
+	  else if((*(line+1)!='\0' && tolower(*(line+1))=='g') &&
+		  (*(line+2)!='\0' && tolower(*(line+2))=='t') &&
+		  (*(line+3)!='\0' && *(line+3)==';'))
+	    {
+	      parsed[parsedindex++]='>';
+	      line+=4;
+	      break;
+	    }
+	  else if((*(line+1)!='\0' && tolower(*(line+1))=='a') &&
+		  (*(line+2)!='\0' && tolower(*(line+2))=='m') &&
+		  (*(line+3)!='\0' && tolower(*(line+3))=='p') &&
+		  (*(line+4)!='\0' && *(line+4)==';'))
+	    {
+	      parsed[parsedindex++]='&';
+	      line+=5;
+	      break;
+	    }
+
+	default:
+	  parsed[parsedindex++]=*line;
+	  line++;
+	  break;
+	}
+    }
+
+  parsed[parsedindex]='\0';
+
+  /* Chop off any trailing whitespace.  Note that the HKP servers have
+     \r\n as line endings, and the NAI HKP servers have just \n. */
+
+  if(parsedindex>0)
+    {
+      parsedindex--;
+      while(isspace(parsed[parsedindex]))
+	{
+	  parsed[parsedindex]='\0';
+	  parsedindex--;
+	}
+    }
+}
+
 /* pub  2048/<a href="/pks/lookup?op=get&search=0x3CB3B415">3CB3B415</a> 1998/04/03 David M. Shaw &lt;<a href="/pks/lookup?op=get&search=0x3CB3B415">dshaw@jabberwocky.com</a>&gt; */

 /* Luckily enough, both the HKP server and NAI HKP interface to their
@ -400,17 +471,21 @@ unsigned int scan_isodatestr( const char *string )
 int parse_hkp_index(char *line,char **buffer)
 {
  static int open=0,revoked=0;
-  static char *key=NULL,*uid=NULL;
+  static char *key=NULL,*uid=NULL,*type=NULL;
  static unsigned int bits,createtime;
  int ret=0;

-  /*  printf("Open %d, LINE: %s, uid: %s\n",open,line,uid); */
+  /* printf("Open %d, LINE: %s, uid: %s\n",open,line,uid); */
+
+  dehtmlize(line);
+
+  /* printf("Now open %d, LINE: \"%s\", uid: %s\n",open,line,uid); */

  /* Try and catch some bastardization of HKP.  If we don't have
     certain unchanging landmarks, we can't reliably parse the
-     response. */
-
-  if(open && strncasecmp(line,"</pre>",6)!=0 &&
+     response.  This only complains about problems within the key
+     section itself.  Headers and footers should not matter. */
+  if(open && line[0]!='\0' &&
     strncasecmp(line,"pub ",4)!=0 &&
     strncasecmp(line,"    ",4)!=0)
    {
@ -420,8 +495,6 @@ int parse_hkp_index(char *line,char **buffer)
      return -1;
    }

-  /* printf("Open %d, LINE: %s\n",open,line); */
-
  /* For multiple UIDs */
  if(open && uid!=NULL)
    {
@ -456,7 +529,10 @@ int parse_hkp_index(char *line,char **buffer)
 	  append_quoted(*buffer,revoked?"1:":":",0);
 	  sprintf(intstr,"%u",createtime);
 	  append_quoted(*buffer,intstr,':');
-	  append_quoted(*buffer,"::::",0);
+	  append_quoted(*buffer,":::",0);
+	  if(type)
+	    append_quoted(*buffer,type,':');
+	  append_quoted(*buffer,":",0);
 	  sprintf(intstr,"%u",bits);
 	  append_quoted(*buffer,intstr,':');
 	  append_quoted(*buffer,"\n",0);
@ -486,13 +562,16 @@ int parse_hkp_index(char *line,char **buffer)
      if(tok==NULL)
 	return ret;

+      if(tok[strlen(tok)-1]=='R')
+ 	type="RSA";
+      else if(tok[strlen(tok)-1]=='D')
+ 	type="DSA";
+      else
+ 	type=NULL;
+
      bits=atoi(tok);

-      tok=strsep(&line,">");
-      if(tok==NULL)
-	return ret;
-
-      tok=strsep(&line,"<");
+      tok=strsep(&line," ");
      if(tok==NULL)
 	{
 	  key=strdup("00000000");
@ -505,10 +584,6 @@ int parse_hkp_index(char *line,char **buffer)
      if(tok==NULL)
 	return ret;

-      tok=strsep(&line," ");
-      if(tok==NULL)
-	return ret;
-  
      /* The date parser wants '-' instead of '/', so... */
      temp=tok;
      while(*temp!='\0')
@ -524,77 +599,25 @@ int parse_hkp_index(char *line,char **buffer)

  if(open)
    {
-      int uidindex=0;
-
      if(line==NULL)
 	{
 	  uid=strdup("Key index corrupted");
 	  return ret;
 	}

-      /* All that's left is the user name.  Strip off anything
-	 <between brackets> and de-urlencode it. */
-
      while(*line==' ' && *line!='\0')
 	line++;

+      if(*line=='\0')
+	return ret;
+
      if(strncmp(line,"*** KEY REVOKED ***",19)==0)
 	{
 	  revoked=1;
 	  return ret;
 	}

-      uid=malloc(strlen(line)+1);
-
-      while(*line!='\0')
-	{
-	  switch(*line)
-	    {
-	    case '<':
-	      while(*line!='>' && *line!='\0')
-		line++;
-
-	      if(*line!='\0')
-		line++;
-	      break;
-
-	    case '&':
-	      if((*(line+1)!='\0' && tolower(*(line+1))=='l') &&
-		 (*(line+2)!='\0' && tolower(*(line+2))=='t') &&
-		 (*(line+3)!='\0' && *(line+3)==';'))
-		{
-		  uid[uidindex++]='<';
-		  line+=4;
-		  break;
-		}
-
-	      if((*(line+1)!='\0' && tolower(*(line+1))=='g') &&
-		 (*(line+2)!='\0' && tolower(*(line+2))=='t') &&
-		 (*(line+3)!='\0' && *(line+3)==';'))
-		{
-		  uid[uidindex++]='>';
-		  line+=4;
-		  break;
-		}
-
-	    default:
-	      uid[uidindex++]=*line;
-	      line++;
-	      break;
-	    }
-	}
-
-      uid[uidindex]='\0';
-
-      /* Chop off the trailing \r, \n, or both. This is fussy as the
-         true HKP servers have \r\n, and the NAI HKP servers have just
-         \n. */
-
-      if(isspace(uid[uidindex-1]))
-	uid[uidindex-1]='\0';
-
-      if(isspace(uid[uidindex-2]))
-	uid[uidindex-2]='\0';
+      uid=strdup(line);
    }

  return ret;