mirror of
git://git.gnupg.org/gnupg.git
synced 2025-01-21 14:47:03 +01:00
gpg: Add regular expression support.
* AUTHORS, COPYING.other: Update. * Makefile.am (SUBDIRS): Add regexp sub directory. * configure.ac (DISABLE_REGEX): Remove. * g10/Makefile.am (needed_libs): Add libregexp.a. * g10/trustdb.c: Remove DISABLE_REGEX support. * regexp/LICENSE, regexp/jimregexp.c, regexp/jimregexp.h, regexp/utf8.c, regexp/utf8.h: New from Jim Tcl. * regexp/UnicodeData.txt: New from Unicode. * regexp/Makefile.am, regexp/parse-unidata.awk: New. * tests/openpgp/Makefile.am: Remove DISABLE_REGEX support. * tools/Makefile.am: Remove DISABLE_REGEX support. -- Backport master commit of: ba247a114c75a84473c11c1484013b09fbb9bcd1 GnuPG-bug-id: 4843 Signed-off-by: NIIBE Yutaka <gniibe@fsij.org>
This commit is contained in:
parent
c5ab1dcd54
commit
199309190a
1
.gitignore
vendored
1
.gitignore
vendored
@ -100,6 +100,7 @@ dirmngr/dirmngr
|
||||
dirmngr/dirmngr-client
|
||||
dirmngr/dirmngr_ldap
|
||||
dirmngr/no-libgcrypt.c
|
||||
regexp/_unicode_mapping.c
|
||||
tests/asschk
|
||||
tests/gpg-agent.conf
|
||||
tests/gpg.conf
|
||||
|
2
AUTHORS
2
AUTHORS
@ -70,6 +70,8 @@ The DNS resolver code is libdns by William Ahern; see COPYING.other.
|
||||
The test driver is based on TinySCHEME by Dimitrios Souflis and
|
||||
available under a permissive license; see COPYING.other.
|
||||
|
||||
Regular expression implementation is taken from Jim Tcl, originally
|
||||
by Henry Spencer, and modified by others; see COPYING.other.
|
||||
|
||||
License
|
||||
========
|
||||
|
100
COPYING.other
100
COPYING.other
@ -58,3 +58,103 @@
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
* Regular expression support
|
||||
|
||||
regexp/jimregexp.[ch]:
|
||||
|
||||
Originally:
|
||||
|
||||
Copyright (c) 1986 by University of Toronto.
|
||||
Written by Henry Spencer. Not derived from licensed software.
|
||||
|
||||
Subsequently modified by:
|
||||
|
||||
1986 John Gilmore hoptoad!gnu
|
||||
1987 James A. Woods ames!jaw
|
||||
1993 Christopher Seiwald seiwald@vix.com
|
||||
2000, 2002 Christopher Seiwald <seiwald@perforce.com>
|
||||
2010 Steve Bennett <steveb@workware.net.au>
|
||||
2020 NIIBE Yutaka <gniibe@fsij.org>
|
||||
|
||||
Permission is granted to anyone to use this software for any
|
||||
purpose on any computer system, and to redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The author is not responsible for the consequences of use of
|
||||
this software, no matter how awful, even if they arise
|
||||
from defects in it.
|
||||
|
||||
2. The origin of this software must not be misrepresented, either
|
||||
by explicit claim or by omission.
|
||||
|
||||
3. Altered versions must be plainly marked as such, and must not
|
||||
be misrepresented as being the original software.
|
||||
|
||||
|
||||
regexp/utf8.[ch] (see regexp/LICENSE):
|
||||
|
||||
(c) 2010-2016 Steve Bennett <steveb@workware.net.au>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials
|
||||
provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE JIM TCL PROJECT ``AS IS'' AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
JIM TCL PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
The views and conclusions contained in the software and documentation
|
||||
are those of the authors and should not be interpreted as representing
|
||||
official policies, either expressed or implied, of the Jim Tcl Project.
|
||||
|
||||
|
||||
UnicodeData.txt:
|
||||
|
||||
Copyright © 1991-2020 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
(the "Data Files") or Unicode software and any associated documentation
|
||||
(the "Software") to deal in the Data Files or Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of
|
||||
the Data Files or Software, and to permit persons to whom the Data Files
|
||||
or Software are furnished to do so, provided that either
|
||||
(a) this copyright and permission notice appear with all copies
|
||||
of the Data Files or Software, or
|
||||
(b) this copyright and permission notice appear in associated
|
||||
Documentation.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
||||
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
||||
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
||||
|
@ -101,7 +101,7 @@ else
|
||||
doc =
|
||||
endif
|
||||
|
||||
SUBDIRS = m4 common kbx \
|
||||
SUBDIRS = m4 common regexp kbx \
|
||||
${gpg} ${sm} ${agent} ${scd} ${g13} ${dirmngr} \
|
||||
tools po ${doc} tests
|
||||
|
||||
|
64
configure.ac
64
configure.ac
@ -1452,62 +1452,6 @@ AC_CHECK_FUNCS([getpeerucred])
|
||||
#
|
||||
GNUPG_FUNC_MKDIR_TAKES_ONE_ARG
|
||||
|
||||
#
|
||||
# Sanity check regex. Tests adapted from mutt.
|
||||
#
|
||||
AC_MSG_CHECKING([whether regular expression support is requested])
|
||||
AC_ARG_ENABLE(regex,
|
||||
AC_HELP_STRING([--disable-regex],
|
||||
[do not handle regular expressions in trust signatures]),
|
||||
use_regex=$enableval, use_regex=yes)
|
||||
AC_MSG_RESULT($use_regex)
|
||||
|
||||
if test "$use_regex" = yes ; then
|
||||
_cppflags="${CPPFLAGS}"
|
||||
_ldflags="${LDFLAGS}"
|
||||
AC_ARG_WITH(regex,
|
||||
AC_HELP_STRING([--with-regex=DIR],[look for regex in DIR]),
|
||||
[
|
||||
if test -d "$withval" ; then
|
||||
CPPFLAGS="${CPPFLAGS} -I$withval/include"
|
||||
LDFLAGS="${LDFLAGS} -L$withval/lib"
|
||||
fi
|
||||
],withval="")
|
||||
|
||||
# Does the system have regex functions at all?
|
||||
AC_SEARCH_LIBS([regcomp], [regex])
|
||||
AC_CHECK_FUNC(regcomp, gnupg_cv_have_regex=yes, gnupg_cv_have_regex=no)
|
||||
|
||||
if test $gnupg_cv_have_regex = no; then
|
||||
use_regex=no
|
||||
else
|
||||
if test x"$cross_compiling" = xyes; then
|
||||
AC_MSG_WARN([cross compiling; assuming regexp libray is not broken])
|
||||
else
|
||||
AC_CACHE_CHECK([whether your system's regexp library is broken],
|
||||
[gnupg_cv_regex_broken],
|
||||
AC_TRY_RUN([
|
||||
#include <unistd.h>
|
||||
#include <regex.h>
|
||||
main() { regex_t blah ; regmatch_t p; p.rm_eo = p.rm_eo; return regcomp(&blah, "foo.*bar", REG_NOSUB) || regexec (&blah, "foobar", 0, NULL, 0); }],
|
||||
gnupg_cv_regex_broken=no, gnupg_cv_regex_broken=yes, gnupg_cv_regex_broken=yes))
|
||||
|
||||
if test $gnupg_cv_regex_broken = yes; then
|
||||
AC_MSG_WARN([your regex is broken - disabling regex use])
|
||||
use_regex=no
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
CPPFLAGS="${_cppflags}"
|
||||
LDFLAGS="${_ldflags}"
|
||||
fi
|
||||
|
||||
if test "$use_regex" != yes ; then
|
||||
AC_DEFINE(DISABLE_REGEX,1, [Define to disable regular expression support])
|
||||
fi
|
||||
AM_CONDITIONAL(DISABLE_REGEX, test x"$use_regex" != xyes)
|
||||
|
||||
|
||||
|
||||
#
|
||||
# Do we have zlib? Must do it here because Solaris failed
|
||||
@ -2024,6 +1968,7 @@ Makefile
|
||||
po/Makefile.in
|
||||
common/Makefile
|
||||
common/w32info-rc.h
|
||||
regexp/Makefile
|
||||
kbx/Makefile
|
||||
g10/Makefile
|
||||
sm/Makefile
|
||||
@ -2077,13 +2022,6 @@ echo "
|
||||
TOFU support: $use_tofu
|
||||
Tor support: $show_tor_support
|
||||
"
|
||||
if test x"$use_regex" != xyes ; then
|
||||
echo "
|
||||
Warning: No regular expression support available.
|
||||
OpenPGP trust signatures won't work.
|
||||
gpg-check-pattern will not be built.
|
||||
"
|
||||
fi
|
||||
if test "x${gpg_config_script_warn}" != x; then
|
||||
cat <<G10EOF
|
||||
Warning: Mismatches between the target platform and the
|
||||
|
@ -31,7 +31,7 @@ include $(top_srcdir)/am/cmacros.am
|
||||
AM_CFLAGS = $(SQLITE3_CFLAGS) $(LIBGCRYPT_CFLAGS) \
|
||||
$(LIBASSUAN_CFLAGS) $(GPG_ERROR_CFLAGS)
|
||||
|
||||
needed_libs = ../kbx/libkeybox.a $(libcommon)
|
||||
needed_libs = ../kbx/libkeybox.a $(libcommon) ../regexp/libregexp.a
|
||||
|
||||
# Because there are no program specific transform macros we need to
|
||||
# work around that to allow installing gpg as gpg2.
|
||||
|
@ -23,14 +23,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifndef DISABLE_REGEX
|
||||
#include <sys/types.h>
|
||||
#include <regex.h>
|
||||
#endif /* !DISABLE_REGEX */
|
||||
|
||||
#include "gpg.h"
|
||||
#include "../common/status.h"
|
||||
#include "../common/iobuf.h"
|
||||
#include "../regexp/jimregexp.h"
|
||||
#include "keydb.h"
|
||||
#include "../common/util.h"
|
||||
#include "options.h"
|
||||
@ -1519,8 +1515,7 @@ store_validation_status (ctrl_t ctrl, int depth,
|
||||
|
||||
/* Returns a sanitized copy of the regexp (which might be "", but not
|
||||
NULL). */
|
||||
#ifndef DISABLE_REGEX
|
||||
/* Operator charactors except '.' and backslash.
|
||||
/* Operator characters except '.' and backslash.
|
||||
See regex(7) on BSD. */
|
||||
#define REGEXP_OPERATOR_CHARS "^[$()|*+?{"
|
||||
|
||||
@ -1584,7 +1579,6 @@ sanitize_regexp(const char *old)
|
||||
|
||||
return new;
|
||||
}
|
||||
#endif /*!DISABLE_REGEX*/
|
||||
|
||||
/* Used by validate_one_keyblock to confirm a regexp within a trust
|
||||
signature. Returns 1 for match, and 0 for no match or regex
|
||||
@ -1592,25 +1586,15 @@ sanitize_regexp(const char *old)
|
||||
static int
|
||||
check_regexp(const char *expr,const char *string)
|
||||
{
|
||||
#ifdef DISABLE_REGEX
|
||||
(void)expr;
|
||||
(void)string;
|
||||
/* When DISABLE_REGEX is defined, assume all regexps do not
|
||||
match. */
|
||||
return 0;
|
||||
#else
|
||||
int ret;
|
||||
char *regexp;
|
||||
|
||||
regexp=sanitize_regexp(expr);
|
||||
|
||||
#ifdef __riscos__
|
||||
ret=riscos_check_regexp(expr, string, DBG_TRUST);
|
||||
#else
|
||||
{
|
||||
regex_t pat;
|
||||
|
||||
ret=regcomp(&pat,regexp,REG_ICASE|REG_NOSUB|REG_EXTENDED);
|
||||
ret=regcomp(&pat,regexp,REG_ICASE|REG_EXTENDED);
|
||||
if(ret==0)
|
||||
{
|
||||
ret=regexec(&pat,string,0,NULL,0);
|
||||
@ -1618,7 +1602,6 @@ check_regexp(const char *expr,const char *string)
|
||||
}
|
||||
ret=(ret==0);
|
||||
}
|
||||
#endif
|
||||
|
||||
if(DBG_TRUST)
|
||||
log_debug("regexp '%s' ('%s') on '%s': %s\n",
|
||||
@ -1627,7 +1610,6 @@ check_regexp(const char *expr,const char *string)
|
||||
xfree(regexp);
|
||||
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
|
45
regexp/LICENSE
Normal file
45
regexp/LICENSE
Normal file
@ -0,0 +1,45 @@
|
||||
Unless explicitly stated, all files within Jim repository are released
|
||||
under following license:
|
||||
|
||||
/* Jim - A small embeddable Tcl interpreter
|
||||
*
|
||||
* Copyright 2005 Salvatore Sanfilippo <antirez@invece.org>
|
||||
* Copyright 2005 Clemens Hintze <c.hintze@gmx.net>
|
||||
* Copyright 2005 patthoyts - Pat Thoyts <patthoyts@users.sf.net>
|
||||
* Copyright 2008 oharboe - Øyvind Harboe - oyvind.harboe@zylin.com
|
||||
* Copyright 2008 Andrew Lunn <andrew@lunn.ch>
|
||||
* Copyright 2008 Duane Ellis <openocd@duaneellis.com>
|
||||
* Copyright 2008 Uwe Klein <uklein@klein-messgeraete.de>
|
||||
* Copyright 2008 Steve Bennett <steveb@workware.net.au>
|
||||
* Copyright 2009 Nico Coesel <ncoesel@dealogic.nl>
|
||||
* Copyright 2009 Zachary T Welch zw@superlucidity.net
|
||||
* Copyright 2009 David Brownell
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE JIM TCL PROJECT ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* JIM TCL PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* The views and conclusions contained in the software and documentation
|
||||
* are those of the authors and should not be interpreted as representing
|
||||
* official policies, either expressed or implied, of the Jim Tcl Project.
|
||||
*/
|
38
regexp/Makefile.am
Normal file
38
regexp/Makefile.am
Normal file
@ -0,0 +1,38 @@
|
||||
# Makefile for common gnupg modules
|
||||
# Copyright (C) 2020 g10 Code GmbH
|
||||
#
|
||||
# This file is part of GnuPG.
|
||||
#
|
||||
# GnuPG is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GnuPG is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
## Process this file with automake to produce Makefile.in
|
||||
|
||||
noinst_LIBRARIES = libregexp.a
|
||||
|
||||
AM_CPPFLAGS = -DJIM_REGEXP -DJIM_UTF8 -DUSE_UTF8
|
||||
|
||||
AM_CFLAGS =
|
||||
|
||||
libregexp_a_SOURCES = jimregexp.h utf8.h jimregexp.c utf8.c
|
||||
libregexp_a_CFLAGS = $(AM_CFLAGS)
|
||||
|
||||
EXTRA_DIST = parse-unidata.awk UnicodeData.txt _unicode_mapping.c
|
||||
|
||||
if MAINTAINER_MODE
|
||||
BUILT_SOURCES = _unicode_mapping.c
|
||||
MAINTAINERCLEANFILES = _unicode_mapping.c
|
||||
|
||||
_unicode_mapping.c: parse-unidata.awk UnicodeData.txt
|
||||
$(AWK) -f $(srcdir)/parse-unidata.awk $(srcdir)/UnicodeData.txt >$@
|
||||
endif
|
33797
regexp/UnicodeData.txt
Normal file
33797
regexp/UnicodeData.txt
Normal file
File diff suppressed because it is too large
Load Diff
1909
regexp/jimregexp.c
Normal file
1909
regexp/jimregexp.c
Normal file
File diff suppressed because it is too large
Load Diff
109
regexp/jimregexp.h
Normal file
109
regexp/jimregexp.h
Normal file
@ -0,0 +1,109 @@
|
||||
#ifndef JIMREGEXP_H
|
||||
#define JIMREGEXP_H
|
||||
|
||||
/** regexp(3)-compatible regular expression implementation for Jim.
|
||||
*
|
||||
* See jimregexp.c for details
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct {
|
||||
int rm_so;
|
||||
int rm_eo;
|
||||
} regmatch_t;
|
||||
|
||||
/*
|
||||
* The "internal use only" fields in regexp.h are present to pass info from
|
||||
* compile to execute that permits the execute phase to run lots faster on
|
||||
* simple cases. They are:
|
||||
*
|
||||
* regstart char that must begin a match; '\0' if none obvious
|
||||
* reganch is the match anchored (at beginning-of-line only)?
|
||||
* regmust string (pointer into program) that match must include, or NULL
|
||||
* regmlen length of regmust string
|
||||
*
|
||||
* Regstart and reganch permit very fast decisions on suitable starting points
|
||||
* for a match, cutting down the work a lot. Regmust permits fast rejection
|
||||
* of lines that cannot possibly match. The regmust tests are costly enough
|
||||
* that regcomp() supplies a regmust only if the r.e. contains something
|
||||
* potentially expensive (at present, the only such thing detected is * or +
|
||||
* at the start of the r.e., which can involve a lot of backup). Regmlen is
|
||||
* supplied because the test in regexec() needs it and regcomp() is computing
|
||||
* it anyway.
|
||||
*/
|
||||
|
||||
struct regexp {
|
||||
/* -- public -- */
|
||||
int re_nsub; /* number of parenthesized subexpressions */
|
||||
|
||||
/* -- private -- */
|
||||
int cflags; /* Flags used when compiling */
|
||||
int err; /* Any error which occurred during compile */
|
||||
int regstart; /* Internal use only. */
|
||||
int reganch; /* Internal use only. */
|
||||
int regmust; /* Internal use only. */
|
||||
int regmlen; /* Internal use only. */
|
||||
int *program; /* Allocated */
|
||||
|
||||
/* working state - compile */
|
||||
const char *regparse; /* Input-scan pointer. */
|
||||
int p; /* Current output pos in program */
|
||||
int proglen; /* Allocated program size */
|
||||
|
||||
/* working state - exec */
|
||||
int eflags; /* Flags used when executing */
|
||||
const char *start; /* Initial string pointer. */
|
||||
const char *reginput; /* Current input pointer. */
|
||||
const char *regbol; /* Beginning of input, for ^ check. */
|
||||
|
||||
/* Input to regexec() */
|
||||
regmatch_t *pmatch; /* submatches will be stored here */
|
||||
int nmatch; /* size of pmatch[] */
|
||||
};
|
||||
|
||||
typedef struct regexp regex_t;
|
||||
|
||||
#define REG_EXTENDED 0
|
||||
#define REG_NEWLINE 1
|
||||
#define REG_ICASE 2
|
||||
|
||||
#define REG_NOTBOL 16
|
||||
|
||||
enum {
|
||||
REG_NOERROR, /* Success. */
|
||||
REG_NOMATCH, /* Didn't find a match (for regexec). */
|
||||
REG_BADPAT, /* >= REG_BADPAT is an error */
|
||||
REG_ERR_NULL_ARGUMENT,
|
||||
REG_ERR_UNKNOWN,
|
||||
REG_ERR_TOO_BIG,
|
||||
REG_ERR_NOMEM,
|
||||
REG_ERR_TOO_MANY_PAREN,
|
||||
REG_ERR_UNMATCHED_PAREN,
|
||||
REG_ERR_UNMATCHED_BRACES,
|
||||
REG_ERR_BAD_COUNT,
|
||||
REG_ERR_JUNK_ON_END,
|
||||
REG_ERR_OPERAND_COULD_BE_EMPTY,
|
||||
REG_ERR_NESTED_COUNT,
|
||||
REG_ERR_INTERNAL,
|
||||
REG_ERR_COUNT_FOLLOWS_NOTHING,
|
||||
REG_ERR_TRAILING_BACKSLASH,
|
||||
REG_ERR_CORRUPTED,
|
||||
REG_ERR_NULL_CHAR,
|
||||
REG_ERR_NUM
|
||||
};
|
||||
|
||||
int regcomp(regex_t *preg, const char *regex, int cflags);
|
||||
int regexec(regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
|
||||
size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size);
|
||||
void regfree(regex_t *preg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
62
regexp/parse-unidata.awk
Normal file
62
regexp/parse-unidata.awk
Normal file
@ -0,0 +1,62 @@
|
||||
#
|
||||
# parse-unidata.awk - generate a table (unicode_case_mapping_upper)
|
||||
#
|
||||
# Copyright (C) 2020 g10 Code GmbH
|
||||
#
|
||||
# This file is part of GnuPG.
|
||||
#
|
||||
# GnuPG is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GnuPG is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
# Parse the unicode data from:
|
||||
# https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
|
||||
# to generate case mapping table
|
||||
|
||||
BEGIN {
|
||||
print("/* Generated from UnicodeData.txt */")
|
||||
print("")
|
||||
print("static const struct casemap unicode_case_mapping_upper[] = {")
|
||||
FS = ";"
|
||||
count = 0
|
||||
}
|
||||
|
||||
{
|
||||
code = strtonum(("0x" $1))
|
||||
name = $2
|
||||
class = $3
|
||||
upper = $13
|
||||
lower = $14
|
||||
title = $15
|
||||
|
||||
if (code <= 0x7f) {
|
||||
next
|
||||
}
|
||||
if (code > 0xffff) {
|
||||
next
|
||||
}
|
||||
if ($3 !~ /^L.*/) {
|
||||
next
|
||||
}
|
||||
if (upper != "") {
|
||||
printf("\t{ 0x" tolower($1) ", 0x" tolower(upper) " },")
|
||||
count++
|
||||
if ((count % 4) == 0) {
|
||||
print("")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
END {
|
||||
print("\n};")
|
||||
}
|
150
regexp/utf8.c
Normal file
150
regexp/utf8.c
Normal file
@ -0,0 +1,150 @@
|
||||
/**
|
||||
* UTF-8 utility functions
|
||||
*
|
||||
* (c) 2010-2016 Steve Bennett <steveb@workware.net.au>
|
||||
*
|
||||
* See LICENCE for licence details.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include "utf8.h"
|
||||
|
||||
/* This one is always implemented */
|
||||
int utf8_fromunicode(char *p, unsigned uc)
|
||||
{
|
||||
if (uc <= 0x7f) {
|
||||
*p = uc;
|
||||
return 1;
|
||||
}
|
||||
else if (uc <= 0x7ff) {
|
||||
*p++ = 0xc0 | ((uc & 0x7c0) >> 6);
|
||||
*p = 0x80 | (uc & 0x3f);
|
||||
return 2;
|
||||
}
|
||||
else if (uc <= 0xffff) {
|
||||
*p++ = 0xe0 | ((uc & 0xf000) >> 12);
|
||||
*p++ = 0x80 | ((uc & 0xfc0) >> 6);
|
||||
*p = 0x80 | (uc & 0x3f);
|
||||
return 3;
|
||||
}
|
||||
/* Note: We silently truncate to 21 bits here: 0x1fffff */
|
||||
else {
|
||||
*p++ = 0xf0 | ((uc & 0x1c0000) >> 18);
|
||||
*p++ = 0x80 | ((uc & 0x3f000) >> 12);
|
||||
*p++ = 0x80 | ((uc & 0xfc0) >> 6);
|
||||
*p = 0x80 | (uc & 0x3f);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(USE_UTF8) && !defined(JIM_BOOTSTRAP)
|
||||
int utf8_charlen(int c)
|
||||
{
|
||||
if ((c & 0x80) == 0) {
|
||||
return 1;
|
||||
}
|
||||
if ((c & 0xe0) == 0xc0) {
|
||||
return 2;
|
||||
}
|
||||
if ((c & 0xf0) == 0xe0) {
|
||||
return 3;
|
||||
}
|
||||
if ((c & 0xf8) == 0xf0) {
|
||||
return 4;
|
||||
}
|
||||
/* Invalid sequence, so treat it as a single byte */
|
||||
return 1;
|
||||
}
|
||||
|
||||
int utf8_index(const char *str, int index)
|
||||
{
|
||||
const char *s = str;
|
||||
while (index--) {
|
||||
s += utf8_charlen(*s);
|
||||
}
|
||||
return s - str;
|
||||
}
|
||||
|
||||
int utf8_tounicode(const char *str, int *uc)
|
||||
{
|
||||
unsigned const char *s = (unsigned const char *)str;
|
||||
|
||||
if (s[0] < 0xc0) {
|
||||
*uc = s[0];
|
||||
return 1;
|
||||
}
|
||||
if (s[0] < 0xe0) {
|
||||
if ((s[1] & 0xc0) == 0x80) {
|
||||
*uc = ((s[0] & ~0xc0) << 6) | (s[1] & ~0x80);
|
||||
if (*uc >= 0x80) {
|
||||
return 2;
|
||||
}
|
||||
/* Otherwise this is an invalid sequence */
|
||||
}
|
||||
}
|
||||
else if (s[0] < 0xf0) {
|
||||
if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80)) {
|
||||
*uc = ((s[0] & ~0xe0) << 12) | ((s[1] & ~0x80) << 6) | (s[2] & ~0x80);
|
||||
if (*uc >= 0x800) {
|
||||
return 3;
|
||||
}
|
||||
/* Otherwise this is an invalid sequence */
|
||||
}
|
||||
}
|
||||
else if (s[0] < 0xf8) {
|
||||
if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80) && ((str[3] & 0xc0) == 0x80)) {
|
||||
*uc = ((s[0] & ~0xf0) << 18) | ((s[1] & ~0x80) << 12) | ((s[2] & ~0x80) << 6) | (s[3] & ~0x80);
|
||||
if (*uc >= 0x10000) {
|
||||
return 4;
|
||||
}
|
||||
/* Otherwise this is an invalid sequence */
|
||||
}
|
||||
}
|
||||
|
||||
/* Invalid sequence, so just return the byte */
|
||||
*uc = *s;
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct casemap {
|
||||
unsigned short code; /* code point */
|
||||
unsigned short altcode; /* alternate case code point */
|
||||
};
|
||||
|
||||
|
||||
/* Generated mapping tables */
|
||||
#include "_unicode_mapping.c"
|
||||
|
||||
#define ARRAYSIZE(A) sizeof(A) / sizeof(*(A))
|
||||
|
||||
static int cmp_casemap(const void *key, const void *cm)
|
||||
{
|
||||
return *(int *)key - (int)((const struct casemap *)cm)->code;
|
||||
}
|
||||
|
||||
static int utf8_map_case(const struct casemap *mapping, int num, int ch)
|
||||
{
|
||||
/* We only support 16 bit case mapping */
|
||||
if (ch <= 0xffff) {
|
||||
const struct casemap *cm =
|
||||
bsearch(&ch, mapping, num, sizeof(*mapping), cmp_casemap);
|
||||
|
||||
if (cm) {
|
||||
return cm->altcode;
|
||||
}
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
int utf8_upper(int ch)
|
||||
{
|
||||
if (isascii(ch)) {
|
||||
return toupper(ch);
|
||||
}
|
||||
return utf8_map_case(unicode_case_mapping_upper, ARRAYSIZE(unicode_case_mapping_upper), ch);
|
||||
}
|
||||
#endif /* JIM_BOOTSTRAP */
|
90
regexp/utf8.h
Normal file
90
regexp/utf8.h
Normal file
@ -0,0 +1,90 @@
|
||||
#ifndef UTF8_UTIL_H
|
||||
#define UTF8_UTIL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* UTF-8 utility functions
|
||||
*
|
||||
* (c) 2010-2016 Steve Bennett <steveb@workware.net.au>
|
||||
*
|
||||
* See LICENCE for licence details.
|
||||
*/
|
||||
|
||||
/* Currently we support unicode points up to 2^22-1 */
|
||||
#define MAX_UTF8_LEN 4
|
||||
|
||||
/**
|
||||
* Converts the given unicode codepoint (0 - 0x1fffff) to utf-8
|
||||
* and stores the result at 'p'.
|
||||
*
|
||||
* Returns the number of utf-8 characters (up to MAX_UTF8_LEN).
|
||||
*/
|
||||
int utf8_fromunicode(char *p, unsigned uc);
|
||||
|
||||
#ifndef JIM_UTF8
|
||||
#include <ctype.h>
|
||||
|
||||
/* No utf-8 support. 1 byte = 1 char */
|
||||
#define utf8_tounicode(S, CP) (*(CP) = (unsigned char)*(S), 1)
|
||||
#define utf8_getchars(CP, C) (*(CP) = (C), 1)
|
||||
#define utf8_upper(C) toupper(C)
|
||||
#define utf8_index(C, I) (I)
|
||||
#define utf8_charlen(C) 1
|
||||
|
||||
#else
|
||||
#if !defined(JIM_BOOTSTRAP)
|
||||
|
||||
#define utf8_getchars utf8_fromunicode
|
||||
|
||||
/**
|
||||
* Returns the length of the utf-8 sequence starting with 'c'.
|
||||
*
|
||||
* Returns 1-4.
|
||||
* If 'c' is not a valid start byte, returns 1.
|
||||
*/
|
||||
int utf8_charlen(int c);
|
||||
|
||||
/**
|
||||
* Returns the byte index of the given character in the utf-8 string.
|
||||
*
|
||||
* The string *must* be null terminated.
|
||||
*
|
||||
* This will return the byte length of a utf-8 string
|
||||
* if given the char length.
|
||||
*/
|
||||
int utf8_index(const char *str, int charindex);
|
||||
|
||||
/**
|
||||
* Returns the unicode codepoint corresponding to the
|
||||
* utf-8 sequence 'str'.
|
||||
*
|
||||
* Stores the result in *uc and returns the number of bytes
|
||||
* consumed.
|
||||
*
|
||||
* If 'str' is null terminated, then an invalid utf-8 sequence
|
||||
* at the end of the string will be returned as individual bytes.
|
||||
*
|
||||
* If it is not null terminated, the length *must* be checked first.
|
||||
*
|
||||
* Does not support unicode code points > \u1fffff
|
||||
*/
|
||||
int utf8_tounicode(const char *str, int *uc);
|
||||
|
||||
/**
|
||||
* Returns the upper-case variant of the given unicode codepoint.
|
||||
*
|
||||
* Unicode code points > \uffff are returned unchanged.
|
||||
*/
|
||||
int utf8_upper(int uc);
|
||||
#endif /* JIM_BOOTSTRAP */
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@ -279,6 +279,8 @@ CLEANFILES = prepared.stamp x y yy z out err $(data_files) \
|
||||
gnupg-test.stop random_seed gpg-agent.log tofu.db \
|
||||
passphrases sshcontrol S.gpg-agent.ssh report.xml
|
||||
|
||||
XTESTS += trust-pgp-4.scm
|
||||
|
||||
clean-local:
|
||||
-rm -rf private-keys-v1.d openpgp-revocs.d tofu.d gpgtar.d
|
||||
|
||||
|
@ -58,9 +58,7 @@ if !HAVE_W32_SYSTEM
|
||||
bin_PROGRAMS += watchgnupg gpgparsemail ${gpg_wks_server}
|
||||
endif
|
||||
|
||||
if !DISABLE_REGEX
|
||||
libexec_PROGRAMS += gpg-check-pattern
|
||||
endif
|
||||
|
||||
if !HAVE_W32CE_SYSTEM
|
||||
noinst_PROGRAMS = clean-sat make-dns-cert gpgsplit
|
||||
@ -94,6 +92,9 @@ if HAVE_W32CE_SYSTEM
|
||||
opt_libassuan_libs = $(LIBASSUAN_LIBS)
|
||||
endif
|
||||
|
||||
regexp_libs = ../regexp/libregexp.a
|
||||
|
||||
|
||||
gpgsplit_LDADD = $(common_libs) \
|
||||
$(LIBGCRYPT_LIBS) $(GPG_ERROR_LIBS) \
|
||||
$(ZLIBS) $(LIBINTL) $(NETLIBS) $(LIBICONV)
|
||||
@ -126,13 +127,12 @@ gpg_connect_agent_LDADD = ../common/libgpgrl.a $(common_libs) \
|
||||
$(resource_objs)
|
||||
|
||||
|
||||
if !DISABLE_REGEX
|
||||
gpg_check_pattern_SOURCES = gpg-check-pattern.c
|
||||
gpg_check_pattern_CFLAGS = $(LIBGCRYPT_CFLAGS) $(GPG_ERROR_CFLAGS) $(INCICONV)
|
||||
gpg_check_pattern_LDADD = $(common_libs) $(LIBGCRYPT_LIBS) $(GPG_ERROR_LIBS) \
|
||||
gpg_check_pattern_LDADD = $(common_libs) $(regexp_libs) $(LIBGCRYPT_LIBS) \
|
||||
$(GPG_ERROR_LIBS) \
|
||||
$(LIBINTL) $(NETLIBS) $(LIBICONV) $(W32SOCKLIBS) \
|
||||
$(LIBICONV)
|
||||
endif
|
||||
|
||||
gpgtar_SOURCES = \
|
||||
gpgtar.c gpgtar.h \
|
||||
|
Loading…
x
Reference in New Issue
Block a user