From 576e429d41a144ff4f0c00e8722da2f92ae17d9a Mon Sep 17 00:00:00 2001
From: Werner Koch <wk@gnupg.org>
Date: Fri, 20 Aug 2021 09:13:01 +0200
Subject: [PATCH] wkd: Fix client issue with leading or trailing spaces in
 user-ids.

* common/recsel.c (recsel_parse_expr): Add flag -t.
* common/stringhelp.c (strtokenize): Factor code out to
do_strtokenize.
(strtokenize_nt): New.
(do_strtokenize): Add arg trim to support the strtokenize_nt.
* common/t-stringhelp.c (test_strtokenize_nt): New test cases.

* tools/wks-util.c (wks_list_key): Use strtokenize_nt and the recsel
flag -t.
--

This fixes a bug with user ids with leading spaces because:

wks-client lists all mail addresses from the key and matches them to the
requested mail address.

If there are several user-ids all with the same mail address
wks-client picks one of them and then extracts exactly that user id.
However, here it does not match by the mail address but by the full
user-id so that we can be sure that there will be only one user-id in
the final key.

The filter built expression unfortunately strips leading blanks but
requires a verbatim match.  Thus it won't find the user id again and
errors out.

The new -t flag and a non-trimming strtokenize solves the problem.

Signed-off-by: Werner Koch <wk@gnupg.org>
---
 common/recsel.c       |  12 +++-
 common/stringhelp.c   |  55 ++++++++++++----
 common/stringhelp.h   |   3 +
 common/t-stringhelp.c | 144 +++++++++++++++++++++++++++++++++++++++++-
 doc/gpg.texi          |   3 +
 tools/wks-util.c      |   4 +-
 6 files changed, 201 insertions(+), 20 deletions(-)
diff --git a/common/recsel.c b/common/recsel.c
index b2b302b75..df77b5785 100644
--- a/common/recsel.c
+++ b/common/recsel.c
@@ -172,6 +172,8 @@ find_next_lc (char *string)
  *
  *   --  VALUE spans to the end of the expression.
  *   -c  The string match in this part is done case-sensitive.
+ *   -t  Do not trim leading and trailing spaces from VALUE.
+ *       Note that a space after <op> is here required.
  *
  * For example four calls to recsel_parse_expr() with these values for
  * EXPR
@@ -203,6 +205,7 @@ recsel_parse_expr (recsel_expr_t *selector, const char *expression)
   char *s0, *s;
   int toend = 0;
   int xcase = 0;
+  int notrim = 0;
   int disjun = 0;
   char *next_lc = NULL;
 
@@ -232,6 +235,7 @@ recsel_parse_expr (recsel_expr_t *selector, const char *expression)
         {
         case '-': toend = 1; break;
         case 'c': xcase = 1; break;
+        case 't': notrim = 1; break;
         default:
           log_error ("invalid flag '-%c' in expression\n", *expr);
           recsel_release (se_head);
@@ -391,8 +395,11 @@ recsel_parse_expr (recsel_expr_t *selector, const char *expression)
       return my_error (GPG_ERR_INV_OP);
     }
 
-  while (*s == ' ' || *s == '\t')
+  if (*s == ' ' || *s == '\t')
     s++;
+  if (!notrim)
+    while (*s == ' ' || *s == '\t')
+      s++;
 
   if (se->op == SELECT_NONEMPTY || se->op == SELECT_ISTRUE)
     {
@@ -425,7 +432,8 @@ recsel_parse_expr (recsel_expr_t *selector, const char *expression)
       return my_error (GPG_ERR_NO_NAME);
     }
 
-  trim_spaces (se->name + (s - expr));
+  if (!notrim)
+    trim_spaces (se->name + (s - expr));
   se->value = se->name + (s - expr);
   if (!se->value[0] && !(se->op == SELECT_NONEMPTY || se->op == SELECT_ISTRUE))
     {
diff --git a/common/stringhelp.c b/common/stringhelp.c
index babdeb847..c5e64a06c 100644
--- a/common/stringhelp.c
+++ b/common/stringhelp.c
@@ -2,7 +2,7 @@
  * Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2007,
  *               2008, 2009, 2010  Free Software Foundation, Inc.
  * Copyright (C) 2014 Werner Koch
- * Copyright (C) 2015  g10 Code GmbH
+ * Copyright (C) 2015, 2021  g10 Code GmbH
  *
  * This file is part of GnuPG.
  *
@@ -29,6 +29,7 @@
  * You should have received a copies of the GNU General Public License
  * and the GNU Lesser General Public License along with this program;
  * if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (LGPL-3.0-or-later OR GPL-2.0-or-later)
  */
 
 #include <config.h>
@@ -48,7 +49,6 @@
 # endif
 # include <windows.h>
 #endif
-#include <assert.h>
 #include <limits.h>
 
 #include "util.h"
@@ -214,7 +214,7 @@ trim_spaces( char *str )
 }
 
 
-/* Same as trim_spaces but only condider, space, tab, cr and lf as space.  */
+/* Same as trim_spaces but only consider, space, tab, cr and lf as space.  */
 char *
 ascii_trim_spaces (char *str)
 {
@@ -1291,8 +1291,8 @@ strsplit (char *string, char delim, char replacement, int *count)
  * Returns: A malloced and NULL delimited array with the tokens.  On
  *          memory error NULL is returned and ERRNO is set.
  */
-char **
-strtokenize (const char *string, const char *delim)
+static char **
+do_strtokenize (const char *string, const char *delim, int trim)
 {
   const char *s;
   size_t fields;
@@ -1331,24 +1331,51 @@ strtokenize (const char *string, const char *delim)
   for (n = 0, p = buffer; (pend = strpbrk (p, delim)); p = pend + 1)
     {
       *pend = 0;
-      while (spacep (p))
-        p++;
-      for (px = pend - 1; px >= p && spacep (px); px--)
-        *px = 0;
+      if (trim)
+        {
+          while (spacep (p))
+            p++;
+          for (px = pend - 1; px >= p && spacep (px); px--)
+            *px = 0;
+        }
       result[n++] = p;
     }
-  while (spacep (p))
-    p++;
-  for (px = p + strlen (p) - 1; px >= p && spacep (px); px--)
-    *px = 0;
+  if (trim)
+    {
+      while (spacep (p))
+        p++;
+      for (px = p + strlen (p) - 1; px >= p && spacep (px); px--)
+        *px = 0;
+    }
   result[n++] = p;
   result[n] = NULL;
 
-  assert ((char*)(result + n + 1) == buffer);
+  log_assert ((char*)(result + n + 1) == buffer);
 
   return result;
 }
 
+/* Tokenize STRING using the set of delimiters in DELIM.  Leading
+ * spaces and tabs are removed from all tokens.  The caller must xfree
+ * the result.
+ *
+ * Returns: A malloced and NULL delimited array with the tokens.  On
+ *          memory error NULL is returned and ERRNO is set.
+ */
+char **
+strtokenize (const char *string, const char *delim)
+{
+  return do_strtokenize (string, delim, 1);
+}
+
+/* Same as strtokenize but does not trim leading and trailing spaces
+ * from the fields.  */
+char **
+strtokenize_nt (const char *string, const char *delim)
+{
+  return do_strtokenize (string, delim, 0);
+}
+
 
 /* Split a string into space delimited fields and remove leading and
  * trailing spaces from each field.  A pointer to each field is stored
diff --git a/common/stringhelp.h b/common/stringhelp.h
index 42bb19aaf..70edb6e0d 100644
--- a/common/stringhelp.h
+++ b/common/stringhelp.h
@@ -148,6 +148,9 @@ char **strsplit (char *string, char delim, char replacement, int *count);
 
 /* Tokenize STRING using the set of delimiters in DELIM.  */
 char **strtokenize (const char *string, const char *delim);
+/* Tokenize STRING using the set of delimiters in DELIM but do not
+ * trim the tokens.  */
+char **strtokenize_nt (const char *string, const char *delim);
 
 /* Split STRING into space delimited fields and store them in the
  * provided ARRAY.  */
diff --git a/common/t-stringhelp.c b/common/t-stringhelp.c
index 7c6fb8022..332391689 100644
--- a/common/t-stringhelp.c
+++ b/common/t-stringhelp.c
@@ -1,6 +1,6 @@
 /* t-stringhelp.c - Regression tests for stringhelp.c
  * Copyright (C) 2007 Free Software Foundation, Inc.
- *               2015  g10 Code GmbH
+ *               2015, 2021  g10 Code GmbH
  *
  * This file is part of GnuPG.
  *
@@ -27,6 +27,7 @@
  * You should have received a copies of the GNU General Public License
  * and the GNU Lesser General Public License along with this program;
  * if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (LGPL-3.0-or-later OR GPL-2.0-or-later)
  */
 
 #include <config.h>
@@ -34,13 +35,13 @@
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
-#include <assert.h>
 #ifdef HAVE_PWD_H
 # include <pwd.h>
 #endif
 #include <unistd.h>
 #include <sys/types.h>
 #include <limits.h>
+#include <assert.h>
 
 #include "t-support.h"
 #include "stringhelp.h"
@@ -686,6 +687,144 @@ test_strtokenize (void)
 }
 
 
+static void
+test_strtokenize_nt (void)
+{
+  struct {
+    const char *s;
+    const char *delim;
+    const char *fields_expected[10];
+  } tv[] = {
+    {
+      "", ":",
+      { "", NULL }
+    },
+    {
+      "a", ":",
+      { "a", NULL }
+    },
+    {
+      ":", ":",
+      { "", "", NULL }
+    },
+    {
+      "::", ":",
+      { "", "", "", NULL }
+    },
+    {
+      "a:b:c", ":",
+      { "a", "b", "c", NULL }
+    },
+    {
+      "a:b:", ":",
+      { "a", "b", "", NULL }
+    },
+    {
+      "a:b", ":",
+      { "a", "b", NULL }
+    },
+    {
+      "aa:b:cd", ":",
+      { "aa", "b", "cd", NULL }
+    },
+    {
+      "aa::b:cd", ":",
+      { "aa", "", "b", "cd", NULL }
+    },
+    {
+      "::b:cd", ":",
+      { "", "", "b", "cd", NULL }
+    },
+    {
+      "aa:   : b:cd ", ":",
+      { "aa", "   ", " b", "cd ", NULL }
+    },
+    {
+      "  aa:   : b:  cd ", ":",
+      { "  aa", "   ", " b", "  cd ", NULL }
+    },
+    {
+      "  ", ":",
+      { "  ", NULL }
+    },
+    {
+      "  :", ":",
+      { "  ", "", NULL }
+    },
+    {
+      "  : ", ":",
+      { "  ", " ", NULL }
+    },
+    {
+      ": ", ":",
+      { "", " ", NULL }
+    },
+    {
+      ": x ", ":",
+      { "", " x ", NULL }
+    },
+    {
+      "a:bc:cde:fghi:jklmn::foo:", ":",
+      { "a", "bc", "cde", "fghi", "jklmn", "", "foo", "", NULL }
+    },
+    {
+      ",a,bc,,def,", ",",
+      { "", "a", "bc", "", "def", "", NULL }
+    },
+    {
+      " a ", " ",
+      { "", "a", "", NULL }
+    },
+    {
+      " ", " ",
+      { "", "", NULL }
+    },
+    {
+      "", " ",
+      { "", NULL }
+    }
+  };
+
+  int tidx;
+
+  for (tidx = 0; tidx < DIM(tv); tidx++)
+    {
+      char **fields;
+      int field_count;
+      int field_count_expected;
+      int i;
+
+      for (field_count_expected = 0;
+           tv[tidx].fields_expected[field_count_expected];
+           field_count_expected ++)
+        ;
+
+      fields = strtokenize_nt (tv[tidx].s, tv[tidx].delim);
+      if (!fields)
+        fail (tidx * 1000);
+      else
+        {
+          for (field_count = 0; fields[field_count]; field_count++)
+            ;
+          if (field_count != field_count_expected)
+            fail (tidx * 1000);
+          else
+            {
+              for (i = 0; i < field_count_expected; i++)
+                if (strcmp (tv[tidx].fields_expected[i], fields[i]))
+                  {
+                    printf ("For field %d, expected '%s', but got '%s'\n",
+                            i, tv[tidx].fields_expected[i], fields[i]);
+                    fail (tidx * 1000 + i + 1);
+                  }
+            }
+          }
+
+      xfree (fields);
+    }
+}
+
+
 static void
 test_split_fields (void)
 {
@@ -1070,6 +1209,7 @@ main (int argc, char **argv)
   test_make_absfilename_try ();
   test_strsplit ();
   test_strtokenize ();
+  test_strtokenize_nt ();
   test_split_fields ();
   test_split_fields_colon ();
   test_compare_version_strings ();
diff --git a/doc/gpg.texi b/doc/gpg.texi
index dd6e432ec..16b6ec7fc 100644
--- a/doc/gpg.texi
+++ b/doc/gpg.texi
@@ -3944,6 +3944,9 @@ are:
   @var{VALUE} spans to the end of the expression.
   @item -c
   The string match in this part is done case-sensitive.
+  @item -t
+  Leading and trailing spaces are not removed from @var{VALUE}.
+  The optional single space after @var{op} is here required.
 @end table
 
 The filter options concatenate several specifications for a filter of
diff --git a/tools/wks-util.c b/tools/wks-util.c
index 8090a7eaa..c7227e063 100644
--- a/tools/wks-util.c
+++ b/tools/wks-util.c
@@ -348,7 +348,7 @@ wks_list_key (estream_t key, char **r_fpr, uidinfo_list_t *r_mboxes)
       /* log_debug ("line '%s'\n", line); */
 
       xfree (fields);
-      fields = strtokenize (line, ":");
+      fields = strtokenize_nt (line, ":");
       if (!fields)
         {
           err = gpg_error_from_syserror ();
@@ -467,7 +467,7 @@ wks_filter_uid (estream_t *r_newkey, estream_t key, const char *uid,
     es_fputs ("Content-Type: application/pgp-keys\n"
               "\n", newkey);
 
-  filterexp = es_bsprintf ("keep-uid=uid= %s", uid);
+  filterexp = es_bsprintf ("keep-uid=-t uid= %s", uid);
   if (!filterexp)
     {
       err = gpg_error_from_syserror ();