common: First take on handling Unicode command line args.

* common/w32-misc.c: New.
* common/t-w32-cmdline.c: New.
* common/init.c: Include w32help.h.
(prepare_w32_commandline): New.
(_init_common_subsystems) [W32]: Call prepare_w32_commandline.

* common/Makefile.am (common_sources) [W32]: Add w32-misc.c
(module_tests): Add t-w32-cmdline
(t_w32_cmdline_LDADD): New.
--

The rules for the command line parser are not cleary specified - if at
all.  See the comment in t-w32-cmdline.c.

We can't use the mingw version because that would require to change
all argv handling to be wchar_t and that only for Windows.  That would
be too ugly.  Parsing the command line into argv by us is much easier
and we can do that only if needed - i.e. if globing is required (we
are prepared for this) or a non-ASCII character has been encountered.
This way we keep things stable and only fix the currently not working
Unicode problem.

GnuPG-bug-id: 4398
This commit is contained in:
Werner Koch 2021-03-04 10:16:48 +01:00
parent be2da24456
commit deb6c94362
No known key found for this signature in database
GPG Key ID: E3FDFF218E45B72B
5 changed files with 459 additions and 6 deletions

View File

@ -102,7 +102,7 @@ common_sources = \
if HAVE_W32_SYSTEM
common_sources += w32-reg.c
common_sources += w32-reg.c w32-misc.c
endif
# To make the code easier to read we have split home some code into
@ -168,7 +168,7 @@ module_tests = t-stringhelp t-timestuff \
t-convert t-percent t-gettime t-sysutils t-sexputil \
t-session-env t-openpgp-oid t-ssh-utils \
t-mapstrings t-zb32 t-mbox-util t-iobuf t-strlist \
t-name-value t-ccparray t-recsel
t-name-value t-ccparray t-recsel t-w32-cmdline
if !HAVE_W32CE_SYSTEM
module_tests += t-exechelp t-exectool
endif
@ -222,6 +222,9 @@ t_name_value_LDADD = $(t_common_ldadd)
t_ccparray_LDADD = $(t_common_ldadd)
t_recsel_LDADD = $(t_common_ldadd)
t_w32_cmdline_SOURCES = t-w32-cmdline.c w32-misc.c $(t_extra_src)
t_w32_cmdline_LDADD = $(t_common_ldadd)
# System specific test
if HAVE_W32_SYSTEM
t_w32_reg_SOURCES = t-w32-reg.c $(t_extra_src)

View File

@ -42,6 +42,7 @@
#include <gcrypt.h>
#include "util.h"
#include "i18n.h"
#include "w32help.h"
/* This object is used to register memory cleanup functions.
Technically they are not needed but they can avoid frequent
@ -79,6 +80,11 @@ sleep_on_exit (void)
}
#endif /*HAVE_W32CE_SYSTEM*/
#if HAVE_W32_SYSTEM
static void prepare_w32_commandline (int *argcp, char ***argvp);
#endif /*HAVE_W32_SYSTEM*/
static void
run_mem_cleanup (void)
@ -190,13 +196,10 @@ _init_common_subsystems (gpg_err_source_t errsource, int *argcp, char ***argvp)
gpgrt_init ();
gpgrt_set_alloc_func (gcry_realloc);
#ifdef HAVE_W32CE_SYSTEM
/* Special hack for Windows CE: We extract some options from arg
to setup the standard handles. */
#ifdef HAVE_W32CE_SYSTEM
parse_std_file_handles (argcp, argvp);
#else
(void)argcp;
(void)argvp;
#endif
/* Access the standard estreams as early as possible. If we don't
@ -217,6 +220,16 @@ _init_common_subsystems (gpg_err_source_t errsource, int *argcp, char ***argvp)
/* Logging shall use the standard socket directory as fallback. */
log_set_socket_dir_cb (gnupg_socketdir);
#if HAVE_W32_SYSTEM
/* For Standard Windows we use our own parser for the command line
* so that we can return an array of utf-8 encoded strings. */
prepare_w32_commandline (argcp, argvp);
#else
(void)argcp;
(void)argvp;
#endif
}
@ -290,3 +303,60 @@ parse_std_file_handles (int *argcp, char ***argvp)
}
#endif /*HAVE_W32CE_SYSTEM*/
/* For Windows we need to parse the command line so that we can
* provide an UTF-8 encoded argv. If there is any Unicode character
* we return a new array but if there is no Unicode character we do
* nothing. */
#ifdef HAVE_W32_SYSTEM
static void
prepare_w32_commandline (int *r_argc, char ***r_argv)
{
const wchar_t *wcmdline, *ws;
char *cmdline;
int argc;
char **argv;
const char *s;
int globing;
s = gpgrt_strusage (95);
globing = (s && *s == '1');
wcmdline = GetCommandLineW ();
if (!wcmdline)
{
log_error ("GetCommandLineW failed\n");
return; /* Ooops. */
}
if (!globing)
{
/* If globbing is not enabled we use our own parser only if
* there are any non-ASCII characters. */
for (ws=wcmdline; *ws; ws++)
if (!iswascii (*ws))
break;
if (!*ws)
return; /* No Unicode - return directly. */
}
cmdline = wchar_to_utf8 (wcmdline);
if (!cmdline)
{
log_error ("parsing command line failed: %s\n", strerror (errno));
return; /* Ooops. */
}
gpgrt_annotate_leaked_object (cmdline);
argv = w32_parse_commandline (cmdline, globing, &argc);
if (!argv)
{
log_error ("parsing command line failed: %s\n", "internal error");
return; /* Ooops. */
}
gpgrt_annotate_leaked_object (argv);
*r_argv = argv;
*r_argc = argc;
}
#endif /*HAVE_W32_SYSTEM*/

181
common/t-w32-cmdline.c Normal file
View File

@ -0,0 +1,181 @@
/* t-w32-cmdline.c - Test the parser for the Windows command line
* Copyright (C) 2021 g10 Code GmbH
*
* This file is part of GnuPG.
*
* This file is free software; you can redistribute it and/or modify
* it under the terms of either
*
* - the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* or
*
* - the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at
* your option) any later version.
*
* or both in parallel, as here.
*
* This file is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <https://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
#include "t-support.h"
#include "w32help.h"
#define PGM "t-w32-cmdline"
static int verbose;
static int debug;
static int errcount;
static void
test_all (void)
{
static struct {
const char *cmdline;
int argc; /* Expected number of args. */
char *argv[10]; /* Expected results. */
} tests[] = {
/* Examples from "Parsing C++ Command-Line Arguments" dated 11/18/2006.
* https://docs.microsoft.com/en-us/previous-versions/17w5ykft(v=vs.85)
*/
{ "\"abc\" d e", 3, { "abc", "d", "e" }},
{ "a\\\\\\b d\"e f\"g h", 3, { "a\\\\\\b", "de fg", "h" }},
{ "a\\\\\\\"b c d", 3, { "a\\\"b", "c", "d" }},
{ "a\\\\\\\\\"b c\" d e", 3, { "a\\\\b c", "d", "e" }},
/* Some arbitrary tests created using mingw.
* But I am nire sure whether their parser is fully correct.
*/
{ "e:a a b\"c\" ", 3, { "e:a", "a", "bc" }},
/* { "e:a a b\"c\"\" d\"\"e \" ", */
/* 5, { "e:a", "a", "bc\"", "de", " " }}, */
/* { "e:a a b\"c\"\" d\"\"e\" f\\gh ", */
/* 4, { "e:a", "a", "bc\"", "de f\\gh "}}, */
/* { "e:a a b\"c\"\" d\"\"e\" f\\\"gh \" ", */
/* 4, { "e:a", "a", "bc\"", "de f\"gh " }},*/
{ "\"foo bar\"", 1 , { "foo bar" }},
{ "", 1 , { "" }}
};
int tidx;
int i, any, argc;
char *cmdline;
char **argv;
for (tidx = 0; tidx < DIM(tests); tidx++)
{
cmdline = xstrdup (tests[tidx].cmdline);
if (verbose && tidx)
putchar ('\n');
if (verbose)
printf ("test %d: line ->%s<-\n", tidx, cmdline);
argv = w32_parse_commandline (cmdline, 0, &argc);
if (!argv)
{
fail (tidx);
xfree (cmdline);
continue;
}
if (tests[tidx].argc != argc)
{
fprintf (stderr, PGM": test %d: argc wrong (want %d, got %d)\n",
tidx, tests[tidx].argc, argc);
any = 1;
}
else
any = 0;
for (i=0; i < tests[tidx].argc; i++)
{
if (verbose)
printf ("test %d: argv[%d] ->%s<-\n",
tidx, i, tests[tidx].argv[i]);
if (i < argc && strcmp (tests[tidx].argv[i], argv[i]))
{
if (verbose)
printf ("test %d: got[%d] ->%s<- ERROR\n",
tidx, i, argv[i]);
any = 1;
}
}
if (any)
{
fprintf (stderr, PGM": test %d: error%s\n",
tidx, verbose? "":" (use --verbose)");
errcount++;
}
xfree (argv);
}
}
int
main (int argc, char **argv)
{
int last_argc = -1;
no_exit_on_fail = 1;
if (argc)
{ argc--; argv++; }
while (argc && last_argc != argc )
{
last_argc = argc;
if (!strcmp (*argv, "--"))
{
argc--; argv++;
break;
}
else if (!strcmp (*argv, "--help"))
{
fputs ("usage: " PGM " [FILE]\n"
"Options:\n"
" --verbose Print timings etc.\n"
" --debug Flyswatter\n"
, stdout);
exit (0);
}
else if (!strcmp (*argv, "--verbose"))
{
verbose++;
argc--; argv++;
}
else if (!strcmp (*argv, "--debug"))
{
verbose += 2;
debug++;
argc--; argv++;
}
else if (!strncmp (*argv, "--", 2))
{
fprintf (stderr, PGM ": unknown option '%s'\n", *argv);
exit (1);
}
}
if (argc)
{
fprintf (stderr, PGM ": no arguments allowed\n");
exit (1);
}
test_all ();
return !!errcount;
}

192
common/w32-misc.c Normal file
View File

@ -0,0 +1,192 @@
/* w32-misc.c - Helper functions needed in Windows
* Copyright (C) 2021 g10 Code GmbH
*
* This file is part of GnuPG.
*
* This file is free software; you can redistribute it and/or modify
* it under the terms of either
*
* - the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* or
*
* - the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at
* your option) any later version.
*
* or both in parallel, as here.
*
* This file is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <https://www.gnu.org/licenses/>.
*/
#include <config.h>
#include "util.h"
#include "w32help.h"
/* Return the number of backslashes. */
static unsigned int
count_backslashes (const char *s)
{
unsigned int count = 0;
for ( ;*s == '\\'; s++)
count++;
return count;
}
static void
strip_one_arg (char *string)
{
char *s, *d;
unsigned int n, i;
for (s=d=string; *s; s++)
if (*s == '\\')
{
n = count_backslashes (s);
if (s[n] == '"')
{
for (i=0; i < n/2; i++)
*d++ = '\\';
if ((n&1)) /* Odd number of backslashes. */
*d++ = '"'; /* Print the quote. */
}
else /* Print all backslashes. */
{
for (i=0; i < n; i++)
*d++ = '\\';
n--; /* Adjust for the increment in the for. */
}
s += n;
}
else if (*s == '"' && s[1])
*d++ = *++s;
else
*d++ = *s;
*d = 0;
}
/* Helper for parse_w32_commandline. */
static int
parse_cmdstring (char *string, char **argv)
{
int argc = 0;
int inquote = 0;
char *p0, *p;
unsigned int n;
p0 = string;
for (p=string; *p; p++)
{
if (inquote)
{
if (*p == '\\' && p[1] == '"')
p++;
else if (*p == '"')
{
if (argv && (p[1] == ' ' || p[1] == '\t' || !p[1]))
*p = 0;
inquote = 0;
}
}
else if (*p == '\\' && (n=count_backslashes (p)))
{
if (!p0) /* First non-WS; set start. */
p0 = p;
if (p[n] == '"')
{
if (!(n&1)) /* Even number. */
inquote = 1;
p++;
}
p += n;
}
else if (*p == '"')
{
inquote = 1;
if (!p0 || p == string) /* First non-WS or first char; set start. */
p0 = p + 1;
}
else if (*p == ' ' || *p == '\t')
{
if (p0) /* We are in an argument and reached WS. */
{
if (argv)
{
*p = 0;
strip_one_arg (p0);
argv[argc] = p0;
}
argc++;
p0 = NULL;
}
}
else if (!p0) /* First non-WS; set start. */
p0 = p;
}
if (inquote || p0)
{
/* Closing quote missing (we accept this as argument anyway) or
* an open argument. */
if (argv)
{
*p = 0;
strip_one_arg (p0);
argv[argc] = p0;
}
argc++;
}
return argc;
}
/* This is a Windows command line parser, returning an array with
* strings and its count. The argument CMDLINE is expected to be
* utf-8 encoded and may be modified after returning from this
* function. The returned array points into CMDLINE, so this should
* not be freed. If GLOBING is set to true globing is done for all
* items. Returns NULL on error. The number of items in the array is
* returned at R_ARGC. */
char **
w32_parse_commandline (char *cmdline, int globing, int *r_argc)
{
int argc, i;
char **argv;
(void)globing;
argc = parse_cmdstring (cmdline, NULL);
if (!argc)
{
log_error ("%s failed: %s\n", __func__, "internal error");
return NULL; /* Ooops. */
}
argv = xtrycalloc (argc+1, sizeof *argv);
if (!argv)
{
log_error ("%s failed: %s\n", __func__, strerror (errno));
return NULL; /* Ooops. */
}
i = parse_cmdstring (cmdline, argv);
if (argc != i)
{
log_error ("%s failed (argc=%d i=%d)\n", __func__, argc, i);
xfree (argv);
return NULL; /* Ooops. */
}
*r_argc = argc;
return argv;
}

View File

@ -30,6 +30,13 @@
#ifndef GNUPG_COMMON_W32HELP_H
#define GNUPG_COMMON_W32HELP_H
/*-- w32-misc.c --*/
/* This module is also part of the Unix tests. */
char **w32_parse_commandline (char *cmdline, int globing, int *r_argc);
#ifdef HAVE_W32_SYSTEM
/*-- w32-reg.c --*/