1
0
mirror of git://git.gnupg.org/gnupg.git synced 2025-01-21 14:47:03 +01:00
gnupg/tools/mime-parser.c
Werner Koch 2130760904
tools: New support functions for the mail parser.
* tools/rfc822parse.h (RFC822PARSE_HEADER_SEEN): New.
* tools/rfc822parse.c (rfc822_cmp_header_name): New.
(insert_header): Run header seen callback.
(rfc822parse_last_header_line): New.
(rfc822_free): New.
* tools/wks-receive.c (t2body): Use it here.
* tools/mime-parser.c (parse_message_cb): and here.
---

Backported-from-master: 675b12ddd8ca742314d96a02bc95b837841070fb
2024-07-01 10:50:18 +02:00

834 lines
26 KiB
C

/* mime-parser.c - Parse MIME structures (high level rfc822 parser).
* Copyright (C) 2016 g10 Code GmbH
* Copyright (C) 2016 Bundesamt für Sicherheit in der Informationstechnik
*
* This file is part of GnuPG.
*
* This file is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This file is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see <https://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../common/util.h"
#include "rfc822parse.h"
#include "mime-parser.h"
enum pgpmime_states
{
PGPMIME_NONE = 0,
PGPMIME_WAIT_ENCVERSION,
PGPMIME_IN_ENCVERSION,
PGPMIME_WAIT_ENCDATA,
PGPMIME_IN_ENCDATA,
PGPMIME_GOT_ENCDATA,
PGPMIME_WAIT_SIGNEDDATA,
PGPMIME_IN_SIGNEDDATA,
PGPMIME_WAIT_SIGNATURE,
PGPMIME_IN_SIGNATURE,
PGPMIME_GOT_SIGNATURE,
PGPMIME_INVALID
};
/* Definition of the mime parser object. */
struct mime_parser_context_s
{
void *cookie; /* Cookie passed to all callbacks. */
/* The callback to announce the transition from header to body. */
gpg_error_t (*t2body) (void *cookie, int level);
/* The callback to announce a new part. */
gpg_error_t (*new_part) (void *cookie,
const char *mediatype,
const char *mediasubtype);
/* The callback to return data of a part. */
gpg_error_t (*part_data) (void *cookie,
const void *data,
size_t datalen);
/* The callback to collect encrypted data. */
gpg_error_t (*collect_encrypted) (void *cookie, const char *data);
/* The callback to collect signed data. */
gpg_error_t (*collect_signeddata) (void *cookie, const char *data);
/* The callback to collect a signature. */
gpg_error_t (*collect_signature) (void *cookie, const char *data);
/* The RFC822 parser context is stored here during callbacks. */
rfc822parse_t msg;
/* Helper to convey error codes from user callbacks. */
gpg_error_t err;
int nesting_level; /* The current nesting level. */
int hashing_at_level; /* The nesting level at which we are hashing. */
enum pgpmime_states pgpmime; /* Current PGP/MIME state. */
unsigned int delay_hashing:1;/* Helper for PGPMIME_IN_SIGNEDDATA. */
unsigned int want_part:1; /* Return the current part. */
unsigned int decode_part:2; /* Decode the part. 1 = QP, 2 = Base64. */
unsigned int verbose:1; /* Enable verbose mode. */
unsigned int debug:1; /* Enable debug mode. */
/* Flags to help with debug output. */
struct {
unsigned int n_skip; /* Skip showing these number of lines. */
unsigned int header:1; /* Show the header lines. */
unsigned int data:1; /* Show the data lines. */
unsigned int as_note:1; /* Show the next data line as a note. */
unsigned int boundary : 1;
} show;
struct b64state *b64state; /* NULL or malloced Base64 decoder state. */
/* A buffer for reading a mail line, */
char line[5000];
};
/* Print the event received by the parser for debugging. */
static void
show_message_parser_event (rfc822parse_event_t event)
{
const char *s;
switch (event)
{
case RFC822PARSE_OPEN: s= "Open"; break;
case RFC822PARSE_CLOSE: s= "Close"; break;
case RFC822PARSE_CANCEL: s= "Cancel"; break;
case RFC822PARSE_T2BODY: s= "T2Body"; break;
case RFC822PARSE_FINISH: s= "Finish"; break;
case RFC822PARSE_RCVD_SEEN: s= "Rcvd_Seen"; break;
case RFC822PARSE_LEVEL_DOWN: s= "Level_Down"; break;
case RFC822PARSE_LEVEL_UP: s= "Level_Up"; break;
case RFC822PARSE_BOUNDARY: s= "Boundary"; break;
case RFC822PARSE_LAST_BOUNDARY: s= "Last_Boundary"; break;
case RFC822PARSE_BEGIN_HEADER: s= "Begin_Header"; break;
case RFC822PARSE_PREAMBLE: s= "Preamble"; break;
case RFC822PARSE_EPILOGUE: s= "Epilogue"; break;
default: s= "[unknown event]"; break;
}
log_debug ("*** RFC822 event %s\n", s);
}
/* Do in-place decoding of quoted-printable data of LENGTH in BUFFER.
Returns the new length of the buffer and stores true at R_SLBRK if
the line ended with a soft line break; false is stored if not.
This function assumes that a complete line is passed in
buffer. */
static size_t
qp_decode (char *buffer, size_t length, int *r_slbrk)
{
char *d, *s;
if (r_slbrk)
*r_slbrk = 0;
/* Fixme: We should remove trailing white space first. */
for (s=d=buffer; length; length--)
{
if (*s == '=')
{
if (length > 2 && hexdigitp (s+1) && hexdigitp (s+2))
{
s++;
*(unsigned char*)d++ = xtoi_2 (s);
s += 2;
length -= 2;
}
else if (length > 2 && s[1] == '\r' && s[2] == '\n')
{
/* Soft line break. */
s += 3;
length -= 2;
if (r_slbrk && length == 1)
*r_slbrk = 1;
}
else if (length > 1 && s[1] == '\n')
{
/* Soft line break with only a Unix line terminator. */
s += 2;
length -= 1;
if (r_slbrk && length == 1)
*r_slbrk = 1;
}
else if (length == 1)
{
/* Soft line break at the end of the line. */
s += 1;
if (r_slbrk)
*r_slbrk = 1;
}
else
*d++ = *s++;
}
else
*d++ = *s++;
}
return d - buffer;
}
/* This function is called by parse_mail to communicate events. This
* callback communicates with the caller using a structure passed in
* OPAQUE. Should return 0 on success or set ERRNO and return -1. */
static int
parse_message_cb (void *opaque, rfc822parse_event_t event, rfc822parse_t msg)
{
mime_parser_t ctx = opaque;
const char *s;
int rc = 0;
/* Make the RFC822 parser context available for callbacks. */
ctx->msg = msg;
if (ctx->debug)
show_message_parser_event (event);
if (event == RFC822PARSE_BEGIN_HEADER || event == RFC822PARSE_T2BODY)
{
/* We need to check here whether to start collecting signed data
* because attachments might come without header lines and thus
* we won't see the BEGIN_HEADER event. */
if (ctx->pgpmime == PGPMIME_WAIT_SIGNEDDATA)
{
if (ctx->debug)
log_debug ("begin_hash\n");
ctx->hashing_at_level = ctx->nesting_level;
ctx->pgpmime = PGPMIME_IN_SIGNEDDATA;
ctx->delay_hashing = 0;
}
}
if (event == RFC822PARSE_OPEN)
{
/* Initialize for a new message. */
ctx->show.header = 1;
}
else if (event == RFC822PARSE_T2BODY)
{
rfc822parse_field_t field;
ctx->want_part = 0;
ctx->decode_part = 0;
if (ctx->t2body)
{
rc = ctx->t2body (ctx->cookie, ctx->nesting_level);
if (rc)
goto t2body_leave;
}
field = rfc822parse_parse_field (msg, "Content-Type", -1);
if (field)
{
const char *s1, *s2;
s1 = rfc822parse_query_media_type (field, &s2);
if (s1)
{
if (ctx->verbose)
log_debug ("h media: %*s%s %s\n",
ctx->nesting_level*2, "", s1, s2);
if (ctx->pgpmime == PGPMIME_WAIT_ENCVERSION)
{
if (!strcmp (s1, "application")
&& !strcmp (s2, "pgp-encrypted"))
{
if (ctx->debug)
log_debug ("c begin_encversion\n");
ctx->pgpmime = PGPMIME_IN_ENCVERSION;
}
else
{
log_error ("invalid PGP/MIME structure;"
" expected '%s', got '%s/%s'\n",
"application/pgp-encrypted", s1, s2);
ctx->pgpmime = PGPMIME_INVALID;
}
}
else if (ctx->pgpmime == PGPMIME_WAIT_ENCDATA)
{
if (!strcmp (s1, "application")
&& !strcmp (s2, "octet-stream"))
{
if (ctx->debug)
log_debug ("c begin_encdata\n");
ctx->pgpmime = PGPMIME_IN_ENCDATA;
}
else
{
log_error ("invalid PGP/MIME structure;"
" expected '%s', got '%s/%s'\n",
"application/octet-stream", s1, s2);
ctx->pgpmime = PGPMIME_INVALID;
}
}
else if (ctx->pgpmime == PGPMIME_WAIT_SIGNATURE)
{
if (!strcmp (s1, "application")
&& !strcmp (s2, "pgp-signature"))
{
if (ctx->debug)
log_debug ("c begin_signature\n");
ctx->pgpmime = PGPMIME_IN_SIGNATURE;
}
else
{
log_error ("invalid PGP/MIME structure;"
" expected '%s', got '%s/%s'\n",
"application/pgp-signature", s1, s2);
ctx->pgpmime = PGPMIME_INVALID;
}
}
else if (!strcmp (s1, "multipart")
&& !strcmp (s2, "encrypted"))
{
s = rfc822parse_query_parameter (field, "protocol", 0);
if (s)
{
if (ctx->debug)
log_debug ("h encrypted.protocol: %s\n", s);
if (!strcmp (s, "application/pgp-encrypted"))
{
if (ctx->pgpmime)
log_error ("note: "
"ignoring nested PGP/MIME signature\n");
else
ctx->pgpmime = PGPMIME_WAIT_ENCVERSION;
}
else if (ctx->verbose)
log_debug ("# this protocol is not supported\n");
}
}
else if (!strcmp (s1, "multipart")
&& !strcmp (s2, "signed"))
{
s = rfc822parse_query_parameter (field, "protocol", 1);
if (s)
{
if (ctx->debug)
log_debug ("h signed.protocol: %s\n", s);
if (!strcmp (s, "application/pgp-signature"))
{
if (ctx->pgpmime)
log_error ("note: "
"ignoring nested PGP/MIME signature\n");
else
ctx->pgpmime = PGPMIME_WAIT_SIGNEDDATA;
}
else if (ctx->verbose)
log_debug ("# this protocol is not supported\n");
}
}
else if (ctx->new_part)
{
ctx->err = ctx->new_part (ctx->cookie, s1, s2);
if (!ctx->err)
ctx->want_part = 1;
else if (gpg_err_code (ctx->err) == GPG_ERR_FALSE)
ctx->err = 0;
else if (gpg_err_code (ctx->err) == GPG_ERR_TRUE)
{
ctx->want_part = ctx->decode_part = 1;
ctx->err = 0;
}
}
}
else
{
if (ctx->debug)
log_debug ("h media: %*s none\n", ctx->nesting_level*2, "");
if (ctx->new_part)
{
ctx->err = ctx->new_part (ctx->cookie, "", "");
if (!ctx->err)
ctx->want_part = 1;
else if (gpg_err_code (ctx->err) == GPG_ERR_FALSE)
ctx->err = 0;
else if (gpg_err_code (ctx->err) == GPG_ERR_TRUE)
{
ctx->want_part = ctx->decode_part = 1;
ctx->err = 0;
}
}
}
rfc822parse_release_field (field);
}
else
{
if (ctx->verbose)
log_debug ("h media: %*stext plain [assumed]\n",
ctx->nesting_level*2, "");
if (ctx->new_part)
{
ctx->err = ctx->new_part (ctx->cookie, "text", "plain");
if (!ctx->err)
ctx->want_part = 1;
else if (gpg_err_code (ctx->err) == GPG_ERR_FALSE)
ctx->err = 0;
else if (gpg_err_code (ctx->err) == GPG_ERR_TRUE)
{
ctx->want_part = ctx->decode_part = 1;
ctx->err = 0;
}
}
}
/* Figure out the encoding if needed. */
if (ctx->decode_part)
{
char *value;
size_t valueoff;
ctx->decode_part = 0; /* Fallback for unknown encoding. */
value = rfc822parse_get_field (msg, "Content-Transfer-Encoding", -1,
&valueoff);
if (value)
{
if (!stricmp (value+valueoff, "quoted-printable"))
ctx->decode_part = 1;
else if (!stricmp (value+valueoff, "base64"))
{
ctx->decode_part = 2;
if (ctx->b64state)
b64dec_finish (ctx->b64state); /* Reuse state. */
else
{
ctx->b64state = xtrymalloc (sizeof *ctx->b64state);
if (!ctx->b64state)
rc = gpg_error_from_syserror ();
}
if (!rc)
rc = b64dec_start (ctx->b64state, NULL);
}
rfc822_free (value);
}
}
t2body_leave:
ctx->show.header = 0;
ctx->show.data = 1;
ctx->show.n_skip = 1;
}
else if (event == RFC822PARSE_PREAMBLE)
ctx->show.as_note = 1;
else if (event == RFC822PARSE_LEVEL_DOWN)
{
if (ctx->debug)
log_debug ("b down\n");
ctx->nesting_level++;
}
else if (event == RFC822PARSE_LEVEL_UP)
{
if (ctx->debug)
log_debug ("b up\n");
if (ctx->nesting_level)
ctx->nesting_level--;
else
log_error ("invalid structure (bad nesting level)\n");
}
else if (event == RFC822PARSE_BOUNDARY || event == RFC822PARSE_LAST_BOUNDARY)
{
ctx->show.data = 0;
ctx->show.boundary = 1;
if (event == RFC822PARSE_BOUNDARY)
{
ctx->show.header = 1;
ctx->show.n_skip = 1;
if (ctx->debug)
log_debug ("b part\n");
}
else if (ctx->debug)
log_debug ("b last\n");
if (ctx->pgpmime == PGPMIME_IN_ENCDATA)
{
if (ctx->debug)
log_debug ("c end_encdata\n");
ctx->pgpmime = PGPMIME_GOT_ENCDATA;
/* FIXME: We should assert (event == LAST_BOUNDARY). */
}
else if (ctx->pgpmime == PGPMIME_IN_SIGNEDDATA
&& ctx->nesting_level == ctx->hashing_at_level)
{
if (ctx->debug)
log_debug ("c end_hash\n");
ctx->pgpmime = PGPMIME_WAIT_SIGNATURE;
if (ctx->collect_signeddata)
ctx->err = ctx->collect_signeddata (ctx->cookie, NULL);
}
else if (ctx->pgpmime == PGPMIME_IN_SIGNATURE)
{
if (ctx->debug)
log_debug ("c end_signature\n");
ctx->pgpmime = PGPMIME_GOT_SIGNATURE;
/* FIXME: We should assert (event == LAST_BOUNDARY). */
}
else if (ctx->want_part)
{
if (ctx->part_data)
{
/* FIXME: We may need to flush things. */
ctx->err = ctx->part_data (ctx->cookie, NULL, 0);
}
ctx->want_part = 0;
}
}
ctx->msg = NULL;
return rc;
}
/* Create a new mime parser object. COOKIE is a values which will be
* used as first argument for all callbacks registered with this
* parser object. */
gpg_error_t
mime_parser_new (mime_parser_t *r_parser, void *cookie)
{
mime_parser_t ctx;
*r_parser = NULL;
ctx = xtrycalloc (1, sizeof *ctx);
if (!ctx)
return gpg_error_from_syserror ();
ctx->cookie = cookie;
*r_parser = ctx;
return 0;
}
/* Release a mime parser object. */
void
mime_parser_release (mime_parser_t ctx)
{
if (!ctx)
return;
if (ctx->b64state)
{
b64dec_finish (ctx->b64state);
xfree (ctx->b64state);
}
xfree (ctx);
}
/* Set verbose and debug mode. */
void
mime_parser_set_verbose (mime_parser_t ctx, int level)
{
if (!level)
{
ctx->verbose = 0;
ctx->debug = 0;
}
else
{
ctx->verbose = 1;
if (level > 10)
ctx->debug = 1;
}
}
/* Set a callback for the transition from header to body. LEVEL is
* the current nesting level, starting with 0. This callback can be
* used to evaluate headers before any other action is done. Note
* that if a new NEW_PART callback needs to be called it is done after
* this T2BODY callback. */
void
mime_parser_set_t2body (mime_parser_t ctx,
gpg_error_t (*fnc) (void *cookie, int level))
{
ctx->t2body = fnc;
}
/* Set the callback used to announce a new part. It will be called
* with the media type and media subtype of the part. If no
* Content-type header was given both values are the empty string.
* The callback should return 0 on success or an error code. The
* error code GPG_ERR_FALSE indicates that the caller is not
* interested in the part and data shall not be returned via a
* registered part_data callback. The error code GPG_ERR_TRUE
* indicates that the parts shall be redurned in decoded format
* (i.e. base64 or QP encoding is removed). */
void
mime_parser_set_new_part (mime_parser_t ctx,
gpg_error_t (*fnc) (void *cookie,
const char *mediatype,
const char *mediasubtype))
{
ctx->new_part = fnc;
}
/* Set the callback used to return the data of a part to the caller.
* The end of the part is indicated by passing NUL for DATA. */
void
mime_parser_set_part_data (mime_parser_t ctx,
gpg_error_t (*fnc) (void *cookie,
const void *data,
size_t datalen))
{
ctx->part_data = fnc;
}
/* Set the callback to collect encrypted data. A NULL passed to the
* callback indicates the end of the encrypted data; the callback may
* then decrypt the collected data. */
void
mime_parser_set_collect_encrypted (mime_parser_t ctx,
gpg_error_t (*fnc) (void *cookie,
const char *data))
{
ctx->collect_encrypted = fnc;
}
/* Set the callback to collect signed data. A NULL passed to the
* callback indicates the end of the signed data. */
void
mime_parser_set_collect_signeddata (mime_parser_t ctx,
gpg_error_t (*fnc) (void *cookie,
const char *data))
{
ctx->collect_signeddata = fnc;
}
/* Set the callback to collect the signature. A NULL passed to the
* callback indicates the end of the signature; the callback may the
* verify the signature. */
void
mime_parser_set_collect_signature (mime_parser_t ctx,
gpg_error_t (*fnc) (void *cookie,
const char *data))
{
ctx->collect_signature = fnc;
}
/* Return the RFC888 parser context. This is only available inside a
* callback. */
rfc822parse_t
mime_parser_rfc822parser (mime_parser_t ctx)
{
return ctx->msg;
}
/* Helper for mime_parser_parse. */
static gpg_error_t
process_part_data (mime_parser_t ctx, char *line, size_t *length)
{
gpg_error_t err;
size_t nbytes;
if (!ctx->want_part)
return 0;
if (!ctx->part_data)
return 0;
if (ctx->decode_part == 1)
{
*length = qp_decode (line, *length, NULL);
}
else if (ctx->decode_part == 2)
{
log_assert (ctx->b64state);
err = b64dec_proc (ctx->b64state, line, *length, &nbytes);
if (err)
return err;
*length = nbytes;
}
return ctx->part_data (ctx->cookie, line, *length);
}
/* Read and parse a message from FP and call the appropriate
* callbacks. */
gpg_error_t
mime_parser_parse (mime_parser_t ctx, estream_t fp)
{
gpg_error_t err;
rfc822parse_t msg = NULL;
unsigned int lineno = 0;
size_t length;
char *line;
line = ctx->line;
msg = rfc822parse_open (parse_message_cb, ctx);
if (!msg)
{
err = gpg_error_from_syserror ();
log_error ("can't open mail parser: %s", gpg_strerror (err));
goto leave;
}
/* Fixme: We should not use fgets because it can't cope with
embedded nul characters. */
while (es_fgets (ctx->line, sizeof (ctx->line), fp))
{
lineno++;
if (lineno == 1 && !strncmp (line, "From ", 5))
continue; /* We better ignore a leading From line. */
length = strlen (line);
if (length && line[length - 1] == '\n')
line[--length] = 0;
else
log_error ("mail parser detected too long or"
" non terminated last line (lnr=%u)\n", lineno);
if (length && line[length - 1] == '\r')
line[--length] = 0;
ctx->err = 0;
if (rfc822parse_insert (msg, line, length))
{
err = gpg_error_from_syserror ();
log_error ("mail parser failed: %s", gpg_strerror (err));
goto leave;
}
if (ctx->err)
{
/* Error from a callback detected. */
err = ctx->err;
goto leave;
}
/* Debug output. Note that the boundary is shown before n_skip
* is evaluated. */
if (ctx->show.boundary)
{
if (ctx->debug)
log_debug ("# Boundary: %s\n", line);
ctx->show.boundary = 0;
}
if (ctx->show.n_skip)
ctx->show.n_skip--;
else if (ctx->show.data)
{
if (ctx->show.as_note)
{
if (ctx->verbose)
log_debug ("# Note: %s\n", line);
ctx->show.as_note = 0;
}
else if (ctx->debug)
log_debug ("# Data: %s\n", line);
}
else if (ctx->show.header && ctx->verbose)
log_debug ("# Header: %s\n", line);
if (ctx->pgpmime == PGPMIME_IN_ENCVERSION)
{
trim_trailing_spaces (line);
if (!*line)
; /* Skip empty lines. */
else if (!strcmp (line, "Version: 1"))
ctx->pgpmime = PGPMIME_WAIT_ENCDATA;
else
{
log_error ("invalid PGP/MIME structure;"
" garbage in pgp-encrypted part ('%s')\n", line);
ctx->pgpmime = PGPMIME_INVALID;
}
}
else if (ctx->pgpmime == PGPMIME_IN_ENCDATA)
{
if (ctx->collect_encrypted)
{
err = ctx->collect_encrypted (ctx->cookie, line);
if (!err)
err = ctx->collect_encrypted (ctx->cookie, "\r\n");
if (err)
goto leave;
}
}
else if (ctx->pgpmime == PGPMIME_GOT_ENCDATA)
{
ctx->pgpmime = PGPMIME_NONE;
if (ctx->collect_encrypted)
ctx->collect_encrypted (ctx->cookie, NULL);
}
else if (ctx->pgpmime == PGPMIME_IN_SIGNEDDATA)
{
/* If we are processing signed data, store the signed data.
* We need to delay the hashing of the CR/LF because the
* last line ending belongs to the next boundary. This is
* the reason why we can't use the PGPMIME state as a
* condition. */
if (ctx->debug)
log_debug ("# hashing %s'%s'\n",
ctx->delay_hashing? "CR,LF+":"", line);
if (ctx->collect_signeddata)
{
if (ctx->delay_hashing)
ctx->collect_signeddata (ctx->cookie, "\r\n");
ctx->collect_signeddata (ctx->cookie, line);
}
ctx->delay_hashing = 1;
err = process_part_data (ctx, line, &length);
if (err)
goto leave;
}
else if (ctx->pgpmime == PGPMIME_IN_SIGNATURE)
{
if (ctx->collect_signeddata)
{
ctx->collect_signature (ctx->cookie, line);
ctx->collect_signature (ctx->cookie, "\r\n");
}
}
else if (ctx->pgpmime == PGPMIME_GOT_SIGNATURE)
{
ctx->pgpmime = PGPMIME_NONE;
if (ctx->collect_signeddata)
ctx->collect_signature (ctx->cookie, NULL);
}
else
{
err = process_part_data (ctx, line, &length);
if (err)
goto leave;
}
}
rfc822parse_close (msg);
msg = NULL;
err = 0;
leave:
rfc822parse_cancel (msg);
return err;
}