mirror of
git://git.gnupg.org/gnupg.git
synced 2024-10-31 20:08:43 +01:00
54eb375ff1
Signed-off-by: Daniel Kahn Gillmor <dkg@fifthhorseman.net>
1332 lines
30 KiB
C
1332 lines
30 KiB
C
/* rfc822parse.c - Simple mail and MIME parser
|
|
* Copyright (C) 1999, 2000 Werner Koch, Duesseldorf
|
|
* Copyright (C) 2003, 2004 g10 Code GmbH
|
|
*
|
|
* This file is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU Lesser General Public License as
|
|
* published by the Free Software Foundation; either version 2.1 of
|
|
* the License, or (at your option) any later version.
|
|
*
|
|
* This file is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* along with this program; if not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
|
|
/* According to RFC822 binary zeroes are allowed at many places. We do
|
|
* not handle this correct especially in the field parsing code. It
|
|
* should be easy to fix and the API provides a interfaces which
|
|
* returns the length but in addition makes sure that returned strings
|
|
* are always ended by a \0.
|
|
*
|
|
* Furthermore, the case of field names is changed and thus it is not
|
|
* always a good idea to use these modified header
|
|
* lines (e.g. signatures may break).
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include <config.h>
|
|
#endif
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include <stdarg.h>
|
|
#include <assert.h>
|
|
|
|
#include "rfc822parse.h"
|
|
|
|
/* All valid characters in a header name. */
|
|
#define HEADER_NAME_CHARS ("abcdefghijklmnopqrstuvwxyz" \
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
|
|
"-01234567890")
|
|
|
|
|
|
enum token_type
|
|
{
|
|
tSPACE,
|
|
tATOM,
|
|
tQUOTED,
|
|
tDOMAINLIT,
|
|
tSPECIAL
|
|
};
|
|
|
|
/* For now we directly use our TOKEN as the parse context */
|
|
typedef struct rfc822parse_field_context *TOKEN;
|
|
struct rfc822parse_field_context
|
|
{
|
|
TOKEN next;
|
|
enum token_type type;
|
|
struct {
|
|
unsigned int cont:1;
|
|
unsigned int lowered:1;
|
|
} flags;
|
|
/*TOKEN owner_pantry; */
|
|
char data[1];
|
|
};
|
|
|
|
struct hdr_line
|
|
{
|
|
struct hdr_line *next;
|
|
int cont; /* This is a continuation of the previous line. */
|
|
unsigned char line[1];
|
|
};
|
|
|
|
typedef struct hdr_line *HDR_LINE;
|
|
|
|
|
|
struct part
|
|
{
|
|
struct part *right; /* The next part. */
|
|
struct part *down; /* A contained part. */
|
|
HDR_LINE hdr_lines; /* Header lines os that part. */
|
|
HDR_LINE *hdr_lines_tail; /* Helper for adding lines. */
|
|
char *boundary; /* Only used in the first part. */
|
|
};
|
|
typedef struct part *part_t;
|
|
|
|
struct rfc822parse_context
|
|
{
|
|
rfc822parse_cb_t callback;
|
|
void *callback_value;
|
|
int callback_error;
|
|
int in_body;
|
|
int in_preamble; /* Whether we are before the first boundary. */
|
|
part_t parts; /* The tree of parts. */
|
|
part_t current_part; /* Whom we are processing (points into parts). */
|
|
const char *boundary; /* Current boundary. */
|
|
};
|
|
|
|
static HDR_LINE find_header (rfc822parse_t msg, const char *name,
|
|
int which, HDR_LINE * rprev);
|
|
|
|
|
|
static size_t
|
|
length_sans_trailing_ws (const unsigned char *line, size_t len)
|
|
{
|
|
const unsigned char *p, *mark;
|
|
size_t n;
|
|
|
|
for (mark=NULL, p=line, n=0; n < len; n++, p++)
|
|
{
|
|
if (strchr (" \t\r\n", *p ))
|
|
{
|
|
if( !mark )
|
|
mark = p;
|
|
}
|
|
else
|
|
mark = NULL;
|
|
}
|
|
|
|
if (mark)
|
|
return mark - line;
|
|
return len;
|
|
}
|
|
|
|
|
|
static void
|
|
lowercase_string (unsigned char *string)
|
|
{
|
|
for (; *string; string++)
|
|
if (*string >= 'A' && *string <= 'Z')
|
|
*string = *string - 'A' + 'a';
|
|
}
|
|
|
|
|
|
static int
|
|
my_toupper (int c)
|
|
{
|
|
if (c >= 'a' && c <= 'z')
|
|
c &= ~0x20;
|
|
return c;
|
|
}
|
|
|
|
/* This is the same as ascii_strcasecmp. */
|
|
static int
|
|
my_strcasecmp (const char *a, const char *b)
|
|
{
|
|
if (a == b)
|
|
return 0;
|
|
|
|
for (; *a && *b; a++, b++)
|
|
{
|
|
if (*a != *b && my_toupper(*a) != my_toupper(*b))
|
|
break;
|
|
}
|
|
return *a == *b? 0 : (my_toupper (*a) - my_toupper (*b));
|
|
}
|
|
|
|
|
|
#ifndef HAVE_STPCPY
|
|
static char *
|
|
my_stpcpy (char *a,const char *b)
|
|
{
|
|
while (*b)
|
|
*a++ = *b++;
|
|
*a = 0;
|
|
|
|
return (char*)a;
|
|
}
|
|
#define stpcpy my_stpcpy
|
|
#endif
|
|
|
|
|
|
/* If a callback has been registered, call it for the event of type
|
|
EVENT. */
|
|
static int
|
|
do_callback (rfc822parse_t msg, rfc822parse_event_t event)
|
|
{
|
|
int rc;
|
|
|
|
if (!msg->callback || msg->callback_error)
|
|
return 0;
|
|
rc = msg->callback (msg->callback_value, event, msg);
|
|
if (rc)
|
|
msg->callback_error = rc;
|
|
return rc;
|
|
}
|
|
|
|
static part_t
|
|
new_part (void)
|
|
{
|
|
part_t part;
|
|
|
|
part = calloc (1, sizeof *part);
|
|
if (part)
|
|
{
|
|
part->hdr_lines_tail = &part->hdr_lines;
|
|
}
|
|
return part;
|
|
}
|
|
|
|
|
|
static void
|
|
release_part (part_t part)
|
|
{
|
|
part_t tmp;
|
|
HDR_LINE hdr, hdr2;
|
|
|
|
for (; part; part = tmp)
|
|
{
|
|
tmp = part->right;
|
|
if (part->down)
|
|
release_part (part->down);
|
|
for (hdr = part->hdr_lines; hdr; hdr = hdr2)
|
|
{
|
|
hdr2 = hdr->next;
|
|
free (hdr);
|
|
}
|
|
free (part->boundary);
|
|
free (part);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
release_handle_data (rfc822parse_t msg)
|
|
{
|
|
release_part (msg->parts);
|
|
msg->parts = NULL;
|
|
msg->current_part = NULL;
|
|
msg->boundary = NULL;
|
|
}
|
|
|
|
|
|
/* Check that the header name is valid. We allow all lower and
|
|
* uppercase letters and, except for the first character, digits and
|
|
* the dash. The check stops at the first colon or at string end.
|
|
* Returns true if the name is valid. */
|
|
int
|
|
rfc822_valid_header_name_p (const char *name)
|
|
{
|
|
const char *s;
|
|
size_t namelen;
|
|
|
|
if ((s=strchr (name, ':')))
|
|
namelen = s - name;
|
|
else
|
|
namelen = strlen (name);
|
|
|
|
if (!namelen
|
|
|| strspn (name, HEADER_NAME_CHARS) != namelen
|
|
|| strchr ("-0123456789", *name))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
|
|
/* Transform a header NAME into a standard capitalized format.
|
|
* Conversion stops at the colon. */
|
|
void
|
|
rfc822_capitalize_header_name (char *name)
|
|
{
|
|
unsigned char *p = name;
|
|
int first = 1;
|
|
|
|
/* Special cases first. */
|
|
if (!my_strcasecmp (name, "MIME-Version"))
|
|
{
|
|
strcpy (name, "MIME-Version");
|
|
return;
|
|
}
|
|
|
|
/* Regular cases. */
|
|
for (; *p && *p != ':'; p++)
|
|
{
|
|
if (*p == '-')
|
|
first = 1;
|
|
else if (first)
|
|
{
|
|
if (*p >= 'a' && *p <= 'z')
|
|
*p = *p - 'a' + 'A';
|
|
first = 0;
|
|
}
|
|
else if (*p >= 'A' && *p <= 'Z')
|
|
*p = *p - 'A' + 'a';
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/* Create a new parsing context for an entire rfc822 message and
|
|
return it. CB and CB_VALUE may be given to callback for certain
|
|
events. NULL is returned on error with errno set appropriately. */
|
|
rfc822parse_t
|
|
rfc822parse_open (rfc822parse_cb_t cb, void *cb_value)
|
|
{
|
|
rfc822parse_t msg = calloc (1, sizeof *msg);
|
|
if (msg)
|
|
{
|
|
msg->parts = msg->current_part = new_part ();
|
|
if (!msg->parts)
|
|
{
|
|
free (msg);
|
|
msg = NULL;
|
|
}
|
|
else
|
|
{
|
|
msg->callback = cb;
|
|
msg->callback_value = cb_value;
|
|
if (do_callback (msg, RFC822PARSE_OPEN))
|
|
{
|
|
release_handle_data (msg);
|
|
free (msg);
|
|
msg = NULL;
|
|
}
|
|
}
|
|
}
|
|
return msg;
|
|
}
|
|
|
|
|
|
void
|
|
rfc822parse_cancel (rfc822parse_t msg)
|
|
{
|
|
if (msg)
|
|
{
|
|
do_callback (msg, RFC822PARSE_CANCEL);
|
|
release_handle_data (msg);
|
|
free (msg);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
rfc822parse_close (rfc822parse_t msg)
|
|
{
|
|
if (msg)
|
|
{
|
|
do_callback (msg, RFC822PARSE_CLOSE);
|
|
release_handle_data (msg);
|
|
free (msg);
|
|
}
|
|
}
|
|
|
|
static part_t
|
|
find_parent (part_t tree, part_t target)
|
|
{
|
|
part_t part;
|
|
|
|
for (part = tree->down; part; part = part->right)
|
|
{
|
|
if (part == target)
|
|
return tree; /* Found. */
|
|
if (part->down)
|
|
{
|
|
part_t tmp = find_parent (part, target);
|
|
if (tmp)
|
|
return tmp;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void
|
|
set_current_part_to_parent (rfc822parse_t msg)
|
|
{
|
|
part_t parent;
|
|
|
|
assert (msg->current_part);
|
|
parent = find_parent (msg->parts, msg->current_part);
|
|
if (!parent)
|
|
return; /* Already at the top. */
|
|
|
|
#ifndef NDEBUG
|
|
{
|
|
part_t part;
|
|
for (part = parent->down; part; part = part->right)
|
|
if (part == msg->current_part)
|
|
break;
|
|
assert (part);
|
|
}
|
|
#endif
|
|
msg->current_part = parent;
|
|
|
|
parent = find_parent (msg->parts, parent);
|
|
msg->boundary = parent? parent->boundary: NULL;
|
|
}
|
|
|
|
|
|
|
|
/****************
|
|
* We have read in all header lines and are about to receive the body
|
|
* part. The delimiter line has already been processed.
|
|
*
|
|
* FIXME: we's better return an error in case of memory failures.
|
|
*/
|
|
static int
|
|
transition_to_body (rfc822parse_t msg)
|
|
{
|
|
rfc822parse_field_t ctx;
|
|
int rc;
|
|
|
|
rc = do_callback (msg, RFC822PARSE_T2BODY);
|
|
if (!rc)
|
|
{
|
|
/* Store the boundary if we have multipart type. */
|
|
ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
|
|
if (ctx)
|
|
{
|
|
const char *s;
|
|
|
|
s = rfc822parse_query_media_type (ctx, NULL);
|
|
if (s && !strcmp (s,"multipart"))
|
|
{
|
|
s = rfc822parse_query_parameter (ctx, "boundary", 0);
|
|
if (s)
|
|
{
|
|
assert (!msg->current_part->boundary);
|
|
msg->current_part->boundary = malloc (strlen (s) + 1);
|
|
if (msg->current_part->boundary)
|
|
{
|
|
part_t part;
|
|
|
|
strcpy (msg->current_part->boundary, s);
|
|
msg->boundary = msg->current_part->boundary;
|
|
part = new_part ();
|
|
if (!part)
|
|
{
|
|
int save_errno = errno;
|
|
rfc822parse_release_field (ctx);
|
|
errno = save_errno;
|
|
return -1;
|
|
}
|
|
rc = do_callback (msg, RFC822PARSE_LEVEL_DOWN);
|
|
assert (!msg->current_part->down);
|
|
msg->current_part->down = part;
|
|
msg->current_part = part;
|
|
msg->in_preamble = 1;
|
|
}
|
|
}
|
|
}
|
|
rfc822parse_release_field (ctx);
|
|
}
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/* We have just passed a MIME boundary and need to prepare for new part.
|
|
headers. */
|
|
static int
|
|
transition_to_header (rfc822parse_t msg)
|
|
{
|
|
part_t part;
|
|
|
|
assert (msg->current_part);
|
|
assert (!msg->current_part->right);
|
|
|
|
part = new_part ();
|
|
if (!part)
|
|
return -1;
|
|
|
|
msg->current_part->right = part;
|
|
msg->current_part = part;
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int
|
|
insert_header (rfc822parse_t msg, const unsigned char *line, size_t length)
|
|
{
|
|
HDR_LINE hdr;
|
|
|
|
assert (msg->current_part);
|
|
if (!length)
|
|
{
|
|
msg->in_body = 1;
|
|
return transition_to_body (msg);
|
|
}
|
|
|
|
if (!msg->current_part->hdr_lines)
|
|
do_callback (msg, RFC822PARSE_BEGIN_HEADER);
|
|
|
|
length = length_sans_trailing_ws (line, length);
|
|
hdr = malloc (sizeof (*hdr) + length);
|
|
if (!hdr)
|
|
return -1;
|
|
hdr->next = NULL;
|
|
hdr->cont = (*line == ' ' || *line == '\t');
|
|
memcpy (hdr->line, line, length);
|
|
hdr->line[length] = 0; /* Make it a string. */
|
|
|
|
/* Transform a field name into canonical format. */
|
|
if (!hdr->cont && strchr (line, ':'))
|
|
rfc822_capitalize_header_name (hdr->line);
|
|
|
|
*msg->current_part->hdr_lines_tail = hdr;
|
|
msg->current_part->hdr_lines_tail = &hdr->next;
|
|
|
|
/* Lets help the caller to prevent mail loops and issue an event for
|
|
* every Received header. */
|
|
if (length >= 9 && !memcmp (line, "Received:", 9))
|
|
do_callback (msg, RFC822PARSE_RCVD_SEEN);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/****************
|
|
* Note: We handle the body transparent to allow binary zeroes in it.
|
|
*/
|
|
static int
|
|
insert_body (rfc822parse_t msg, const unsigned char *line, size_t length)
|
|
{
|
|
int rc = 0;
|
|
|
|
if (length > 2 && *line == '-' && line[1] == '-' && msg->boundary)
|
|
{
|
|
size_t blen = strlen (msg->boundary);
|
|
|
|
if (length == blen + 2
|
|
&& !memcmp (line+2, msg->boundary, blen))
|
|
{
|
|
rc = do_callback (msg, RFC822PARSE_BOUNDARY);
|
|
msg->in_body = 0;
|
|
if (!rc && !msg->in_preamble)
|
|
rc = transition_to_header (msg);
|
|
msg->in_preamble = 0;
|
|
}
|
|
else if (length == blen + 4
|
|
&& line[length-2] =='-' && line[length-1] == '-'
|
|
&& !memcmp (line+2, msg->boundary, blen))
|
|
{
|
|
rc = do_callback (msg, RFC822PARSE_LAST_BOUNDARY);
|
|
msg->boundary = NULL; /* No current boundary anymore. */
|
|
set_current_part_to_parent (msg);
|
|
|
|
/* Fixme: The next should actually be send right before the
|
|
next boundary, so that we can mark the epilogue. */
|
|
if (!rc)
|
|
rc = do_callback (msg, RFC822PARSE_LEVEL_UP);
|
|
}
|
|
}
|
|
if (msg->in_preamble && !rc)
|
|
rc = do_callback (msg, RFC822PARSE_PREAMBLE);
|
|
|
|
return rc;
|
|
}
|
|
|
|
/* Insert the next line into the parser. Return 0 on success or true
|
|
on error with errno set appropriately. */
|
|
int
|
|
rfc822parse_insert (rfc822parse_t msg, const unsigned char *line, size_t length)
|
|
{
|
|
return (msg->in_body
|
|
? insert_body (msg, line, length)
|
|
: insert_header (msg, line, length));
|
|
}
|
|
|
|
|
|
/* Tell the parser that we have finished the message. */
|
|
int
|
|
rfc822parse_finish (rfc822parse_t msg)
|
|
{
|
|
return do_callback (msg, RFC822PARSE_FINISH);
|
|
}
|
|
|
|
|
|
|
|
/****************
|
|
* Get a copy of a header line. The line is returned as one long
|
|
* string with LF to separate the continuation line. Caller must free
|
|
* the return buffer. WHICH may be used to enumerate over all lines.
|
|
* Wildcards are allowed. This function works on the current headers;
|
|
* i.e. the regular mail headers or the MIME headers of the current
|
|
* part.
|
|
*
|
|
* WHICH gives the mode:
|
|
* -1 := Take the last occurrence
|
|
* n := Take the n-th one.
|
|
*
|
|
* Returns a newly allocated buffer or NULL on error. errno is set in
|
|
* case of a memory failure or set to 0 if the requested field is not
|
|
* available.
|
|
*
|
|
* If VALUEOFF is not NULL it will receive the offset of the first non
|
|
* space character in the value part of the line (i.e. after the first
|
|
* colon).
|
|
*/
|
|
char *
|
|
rfc822parse_get_field (rfc822parse_t msg, const char *name, int which,
|
|
size_t *valueoff)
|
|
{
|
|
HDR_LINE h, h2;
|
|
char *buf, *p;
|
|
size_t n;
|
|
|
|
h = find_header (msg, name, which, NULL);
|
|
if (!h)
|
|
{
|
|
errno = 0;
|
|
return NULL; /* no such field */
|
|
}
|
|
|
|
n = strlen (h->line) + 1;
|
|
for (h2 = h->next; h2 && h2->cont; h2 = h2->next)
|
|
n += strlen (h2->line) + 1;
|
|
|
|
buf = p = malloc (n);
|
|
if (buf)
|
|
{
|
|
p = stpcpy (p, h->line);
|
|
*p++ = '\n';
|
|
for (h2 = h->next; h2 && h2->cont; h2 = h2->next)
|
|
{
|
|
p = stpcpy (p, h2->line);
|
|
*p++ = '\n';
|
|
}
|
|
p[-1] = 0;
|
|
}
|
|
|
|
if (valueoff)
|
|
{
|
|
p = strchr (buf, ':');
|
|
if (!p)
|
|
*valueoff = 0; /* Oops: should never happen. */
|
|
else
|
|
{
|
|
p++;
|
|
while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
|
|
p++;
|
|
*valueoff = p - buf;
|
|
}
|
|
}
|
|
|
|
return buf;
|
|
}
|
|
|
|
|
|
/****************
|
|
* Enumerate all header. Caller has to provide the address of a pointer
|
|
* which has to be initialized to NULL, the caller should then never change this
|
|
* pointer until he has closed the enumeration by passing again the address
|
|
* of the pointer but with msg set to NULL.
|
|
* The function returns pointers to all the header lines or NULL when
|
|
* all lines have been enumerated or no headers are available.
|
|
*/
|
|
const char *
|
|
rfc822parse_enum_header_lines (rfc822parse_t msg, void **context)
|
|
{
|
|
HDR_LINE l;
|
|
|
|
if (!msg) /* Close. */
|
|
return NULL;
|
|
|
|
if (*context == msg || !msg->current_part)
|
|
return NULL;
|
|
|
|
l = *context ? (HDR_LINE) *context : msg->current_part->hdr_lines;
|
|
|
|
if (l)
|
|
{
|
|
*context = l->next ? (void *) (l->next) : (void *) msg;
|
|
return l->line;
|
|
}
|
|
*context = msg; /* Mark end of list. */
|
|
return NULL;
|
|
}
|
|
|
|
|
|
|
|
/****************
|
|
* Find a header field. If the Name does end in an asterisk this is meant
|
|
* to be a wildcard.
|
|
*
|
|
* which -1 : Retrieve the last field
|
|
* >0 : Retrieve the n-th field
|
|
|
|
* RPREV may be used to return the predecessor of the returned field;
|
|
* which may be NULL for the very first one. It has to be initialized
|
|
* to either NULL in which case the search start at the first header line,
|
|
* or it may point to a headerline, where the search should start
|
|
*/
|
|
static HDR_LINE
|
|
find_header (rfc822parse_t msg, const char *name, int which, HDR_LINE *rprev)
|
|
{
|
|
HDR_LINE hdr, prev = NULL, mark = NULL;
|
|
unsigned char *p;
|
|
size_t namelen, n;
|
|
int found = 0;
|
|
int glob = 0;
|
|
|
|
if (!msg->current_part)
|
|
return NULL;
|
|
|
|
namelen = strlen (name);
|
|
if (namelen && name[namelen - 1] == '*')
|
|
{
|
|
namelen--;
|
|
glob = 1;
|
|
}
|
|
|
|
hdr = msg->current_part->hdr_lines;
|
|
if (rprev && *rprev)
|
|
{
|
|
/* spool forward to the requested starting place.
|
|
* we cannot simply set this as we have to return
|
|
* the previous list element too */
|
|
for (; hdr && hdr != *rprev; prev = hdr, hdr = hdr->next)
|
|
;
|
|
}
|
|
|
|
for (; hdr; prev = hdr, hdr = hdr->next)
|
|
{
|
|
if (hdr->cont)
|
|
continue;
|
|
if (!(p = strchr (hdr->line, ':')))
|
|
continue; /* invalid header, just skip it. */
|
|
n = p - hdr->line;
|
|
if (!n)
|
|
continue; /* invalid name */
|
|
if ((glob ? (namelen <= n) : (namelen == n))
|
|
&& !memcmp (hdr->line, name, namelen))
|
|
{
|
|
found++;
|
|
if (which == -1)
|
|
mark = hdr;
|
|
else if (found == which)
|
|
{
|
|
if (rprev)
|
|
*rprev = prev;
|
|
return hdr;
|
|
}
|
|
}
|
|
}
|
|
if (mark && rprev)
|
|
*rprev = prev;
|
|
return mark;
|
|
}
|
|
|
|
|
|
|
|
static const char *
|
|
skip_ws (const char *s)
|
|
{
|
|
while (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n')
|
|
s++;
|
|
return s;
|
|
}
|
|
|
|
|
|
static void
|
|
release_token_list (TOKEN t)
|
|
{
|
|
while (t)
|
|
{
|
|
TOKEN t2 = t->next;
|
|
/* fixme: If we have owner_pantry, put the token back to
|
|
* this pantry so that it can be reused later */
|
|
free (t);
|
|
t = t2;
|
|
}
|
|
}
|
|
|
|
|
|
static TOKEN
|
|
new_token (enum token_type type, const char *buf, size_t length)
|
|
{
|
|
TOKEN t;
|
|
|
|
/* fixme: look through our pantries to find a suitable
|
|
* token for reuse */
|
|
t = malloc (sizeof *t + length);
|
|
if (t)
|
|
{
|
|
t->next = NULL;
|
|
t->type = type;
|
|
memset (&t->flags, 0, sizeof (t->flags));
|
|
t->data[0] = 0;
|
|
if (buf)
|
|
{
|
|
memcpy (t->data, buf, length);
|
|
t->data[length] = 0; /* Make sure it is a C string. */
|
|
}
|
|
else
|
|
t->data[0] = 0;
|
|
}
|
|
return t;
|
|
}
|
|
|
|
static TOKEN
|
|
append_to_token (TOKEN old, const char *buf, size_t length)
|
|
{
|
|
size_t n = strlen (old->data);
|
|
TOKEN t;
|
|
|
|
t = malloc (sizeof *t + n + length);
|
|
if (t)
|
|
{
|
|
t->next = old->next;
|
|
t->type = old->type;
|
|
t->flags = old->flags;
|
|
memcpy (t->data, old->data, n);
|
|
memcpy (t->data + n, buf, length);
|
|
t->data[n + length] = 0;
|
|
old->next = NULL;
|
|
release_token_list (old);
|
|
}
|
|
return t;
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
Parse a field into tokens as defined by rfc822.
|
|
*/
|
|
static TOKEN
|
|
parse_field (HDR_LINE hdr)
|
|
{
|
|
static const char specials[] = "<>@.,;:\\[]\"()";
|
|
static const char specials2[] = "<>@.,;:";
|
|
static const char tspecials[] = "/?=<>@,;:\\[]\"()";
|
|
static const char tspecials2[] = "/?=<>@.,;:"; /* FIXME: really
|
|
include '.'?*/
|
|
static struct
|
|
{
|
|
const unsigned char *name;
|
|
size_t namelen;
|
|
} tspecial_header[] = {
|
|
{ "Content-Type", 12},
|
|
{ "Content-Transfer-Encoding", 25},
|
|
{ "Content-Disposition", 19},
|
|
{ NULL, 0}
|
|
};
|
|
const char *delimiters;
|
|
const char *delimiters2;
|
|
const unsigned char *line, *s, *s2;
|
|
size_t n;
|
|
int i, invalid = 0;
|
|
TOKEN t, tok, *tok_tail;
|
|
|
|
errno = 0;
|
|
if (!hdr)
|
|
return NULL;
|
|
|
|
tok = NULL;
|
|
tok_tail = &tok;
|
|
|
|
line = hdr->line;
|
|
if (!(s = strchr (line, ':')))
|
|
return NULL; /* oops */
|
|
|
|
n = s - line;
|
|
if (!n)
|
|
return NULL; /* oops: invalid name */
|
|
|
|
delimiters = specials;
|
|
delimiters2 = specials2;
|
|
for (i = 0; tspecial_header[i].name; i++)
|
|
{
|
|
if (n == tspecial_header[i].namelen
|
|
&& !memcmp (line, tspecial_header[i].name, n))
|
|
{
|
|
delimiters = tspecials;
|
|
delimiters2 = tspecials2;
|
|
break;
|
|
}
|
|
}
|
|
|
|
s++; /* Move over the colon. */
|
|
for (;;)
|
|
{
|
|
while (!*s)
|
|
{
|
|
if (!hdr->next || !hdr->next->cont)
|
|
return tok; /* Ready. */
|
|
|
|
/* Next item is a header continuation line. */
|
|
hdr = hdr->next;
|
|
s = hdr->line;
|
|
}
|
|
|
|
if (*s == '(')
|
|
{
|
|
int level = 1;
|
|
int in_quote = 0;
|
|
|
|
invalid = 0;
|
|
for (s++;; s++)
|
|
{
|
|
while (!*s)
|
|
{
|
|
if (!hdr->next || !hdr->next->cont)
|
|
goto oparen_out;
|
|
/* Next item is a header continuation line. */
|
|
hdr = hdr->next;
|
|
s = hdr->line;
|
|
}
|
|
|
|
if (in_quote)
|
|
{
|
|
if (*s == '\"')
|
|
in_quote = 0;
|
|
else if (*s == '\\' && s[1]) /* what about continuation? */
|
|
s++;
|
|
}
|
|
else if (*s == ')')
|
|
{
|
|
if (!--level)
|
|
break;
|
|
}
|
|
else if (*s == '(')
|
|
level++;
|
|
else if (*s == '\"')
|
|
in_quote = 1;
|
|
}
|
|
oparen_out:
|
|
if (!*s)
|
|
; /* Actually this is an error, but we don't care about it. */
|
|
else
|
|
s++;
|
|
}
|
|
else if (*s == '\"' || *s == '[')
|
|
{
|
|
/* We do not check for non-allowed nesting of domainliterals */
|
|
int term = *s == '\"' ? '\"' : ']';
|
|
invalid = 0;
|
|
s++;
|
|
t = NULL;
|
|
|
|
for (;;)
|
|
{
|
|
for (s2 = s; *s2; s2++)
|
|
{
|
|
if (*s2 == term)
|
|
break;
|
|
else if (*s2 == '\\' && s2[1]) /* what about continuation? */
|
|
s2++;
|
|
}
|
|
|
|
t = (t
|
|
? append_to_token (t, s, s2 - s)
|
|
: new_token (term == '\"'? tQUOTED : tDOMAINLIT, s, s2 - s));
|
|
if (!t)
|
|
goto failure;
|
|
|
|
if (*s2 || !hdr->next || !hdr->next->cont)
|
|
break;
|
|
/* Next item is a header continuation line. */
|
|
hdr = hdr->next;
|
|
s = hdr->line;
|
|
}
|
|
*tok_tail = t;
|
|
tok_tail = &t->next;
|
|
s = s2;
|
|
if (*s)
|
|
s++; /* skip the delimiter */
|
|
}
|
|
else if ((s2 = strchr (delimiters2, *s)))
|
|
{ /* Special characters which are not handled above. */
|
|
invalid = 0;
|
|
t = new_token (tSPECIAL, s, 1);
|
|
if (!t)
|
|
goto failure;
|
|
*tok_tail = t;
|
|
tok_tail = &t->next;
|
|
s++;
|
|
}
|
|
else if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n')
|
|
{
|
|
invalid = 0;
|
|
s = skip_ws (s + 1);
|
|
}
|
|
else if (*s > 0x20 && !(*s & 128))
|
|
{ /* Atom. */
|
|
invalid = 0;
|
|
for (s2 = s + 1; *s2 > 0x20
|
|
&& !(*s2 & 128) && !strchr (delimiters, *s2); s2++)
|
|
;
|
|
t = new_token (tATOM, s, s2 - s);
|
|
if (!t)
|
|
goto failure;
|
|
*tok_tail = t;
|
|
tok_tail = &t->next;
|
|
s = s2;
|
|
}
|
|
else
|
|
{ /* Invalid character. */
|
|
if (!invalid)
|
|
{ /* For parsing we assume only one space. */
|
|
t = new_token (tSPACE, NULL, 0);
|
|
if (!t)
|
|
goto failure;
|
|
*tok_tail = t;
|
|
tok_tail = &t->next;
|
|
invalid = 1;
|
|
}
|
|
s++;
|
|
}
|
|
}
|
|
/*NOTREACHED*/
|
|
|
|
failure:
|
|
{
|
|
int save = errno;
|
|
release_token_list (tok);
|
|
errno = save;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
|
|
|
|
/****************
|
|
* Find and parse a header field.
|
|
* WHICH indicates what to do if there are multiple instance of the same
|
|
* field (like "Received"); the following value are defined:
|
|
* -1 := Take the last occurrence
|
|
* 0 := Reserved
|
|
* n := Take the n-th one.
|
|
* Returns a handle for further operations on the parse context of the field
|
|
* or NULL if the field was not found.
|
|
*/
|
|
rfc822parse_field_t
|
|
rfc822parse_parse_field (rfc822parse_t msg, const char *name, int which)
|
|
{
|
|
HDR_LINE hdr;
|
|
|
|
if (!which)
|
|
return NULL;
|
|
|
|
hdr = find_header (msg, name, which, NULL);
|
|
if (!hdr)
|
|
return NULL;
|
|
return parse_field (hdr);
|
|
}
|
|
|
|
void
|
|
rfc822parse_release_field (rfc822parse_field_t ctx)
|
|
{
|
|
if (ctx)
|
|
release_token_list (ctx);
|
|
}
|
|
|
|
|
|
|
|
/****************
|
|
* Check whether T points to a parameter.
|
|
* A parameter starts with a semicolon and it is assumed that t
|
|
* points to exactly this one.
|
|
*/
|
|
static int
|
|
is_parameter (TOKEN t)
|
|
{
|
|
t = t->next;
|
|
if (!t || t->type != tATOM)
|
|
return 0;
|
|
t = t->next;
|
|
if (!t || !(t->type == tSPECIAL && t->data[0] == '='))
|
|
return 0;
|
|
t = t->next;
|
|
if (!t)
|
|
return 1; /* We assume that an non existing value is an empty one. */
|
|
return t->type == tQUOTED || t->type == tATOM;
|
|
}
|
|
|
|
/*
|
|
Some header (Content-type) have a special syntax where attribute=value
|
|
pairs are used after a leading semicolon. The parse_field code
|
|
knows about these fields and changes the parsing to the one defined
|
|
in RFC2045.
|
|
Returns a pointer to the value which is valid as long as the
|
|
parse context is valid; NULL is returned in case that attr is not
|
|
defined in the header, a missing value is reppresented by an empty string.
|
|
|
|
With LOWER_VALUE set to true, a matching field value will be
|
|
lowercased.
|
|
|
|
Note, that ATTR should be lowercase.
|
|
*/
|
|
const char *
|
|
rfc822parse_query_parameter (rfc822parse_field_t ctx, const char *attr,
|
|
int lower_value)
|
|
{
|
|
TOKEN t, a;
|
|
|
|
for (t = ctx; t; t = t->next)
|
|
{
|
|
/* skip to the next semicolon */
|
|
for (; t && !(t->type == tSPECIAL && t->data[0] == ';'); t = t->next)
|
|
;
|
|
if (!t)
|
|
return NULL;
|
|
if (is_parameter (t))
|
|
{ /* Look closer. */
|
|
a = t->next; /* We know that this is an atom */
|
|
if ( !a->flags.lowered )
|
|
{
|
|
lowercase_string (a->data);
|
|
a->flags.lowered = 1;
|
|
}
|
|
if (!strcmp (a->data, attr))
|
|
{ /* found */
|
|
t = a->next->next;
|
|
/* Either T is now an atom, a quoted string or NULL in
|
|
* which case we return an empty string. */
|
|
|
|
if ( lower_value && t && !t->flags.lowered )
|
|
{
|
|
lowercase_string (t->data);
|
|
t->flags.lowered = 1;
|
|
}
|
|
return t ? t->data : "";
|
|
}
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/****************
|
|
* This function may be used for the Content-Type header to figure out
|
|
* the media type and subtype. Note, that the returned strings are
|
|
* guaranteed to be lowercase as required by MIME.
|
|
*
|
|
* Returns: a pointer to the media type and if subtype is not NULL,
|
|
* a pointer to the subtype.
|
|
*/
|
|
const char *
|
|
rfc822parse_query_media_type (rfc822parse_field_t ctx, const char **subtype)
|
|
{
|
|
TOKEN t = ctx;
|
|
const char *type;
|
|
|
|
if (t->type != tATOM)
|
|
return NULL;
|
|
if (!t->flags.lowered)
|
|
{
|
|
lowercase_string (t->data);
|
|
t->flags.lowered = 1;
|
|
}
|
|
type = t->data;
|
|
t = t->next;
|
|
if (!t || t->type != tSPECIAL || t->data[0] != '/')
|
|
return NULL;
|
|
t = t->next;
|
|
if (!t || t->type != tATOM)
|
|
return NULL;
|
|
|
|
if (subtype)
|
|
{
|
|
if (!t->flags.lowered)
|
|
{
|
|
lowercase_string (t->data);
|
|
t->flags.lowered = 1;
|
|
}
|
|
*subtype = t->data;
|
|
}
|
|
return type;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef TESTING
|
|
|
|
/* Internal debug function to print the structure of the message. */
|
|
static void
|
|
dump_structure (rfc822parse_t msg, part_t part, int indent)
|
|
{
|
|
if (!part)
|
|
{
|
|
printf ("*** Structure of this message:\n");
|
|
part = msg->parts;
|
|
}
|
|
|
|
for (; part; part = part->right)
|
|
{
|
|
rfc822parse_field_t ctx;
|
|
part_t save_part; /* ugly hack - we should have a function to
|
|
get part information. */
|
|
const char *s;
|
|
|
|
save_part = msg->current_part;
|
|
msg->current_part = part;
|
|
ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
|
|
msg->current_part = save_part;
|
|
if (ctx)
|
|
{
|
|
const char *s1, *s2;
|
|
s1 = rfc822parse_query_media_type (ctx, &s2);
|
|
if (s1)
|
|
printf ("*** %*s %s/%s", indent*2, "", s1, s2);
|
|
else
|
|
printf ("*** %*s [not found]", indent*2, "");
|
|
|
|
s = rfc822parse_query_parameter (ctx, "boundary", 0);
|
|
if (s)
|
|
printf (" (boundary=\"%s\")", s);
|
|
rfc822parse_release_field (ctx);
|
|
}
|
|
else
|
|
printf ("*** %*s text/plain [assumed]", indent*2, "");
|
|
putchar('\n');
|
|
|
|
if (part->down)
|
|
dump_structure (msg, part->down, indent + 1);
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
show_param (rfc822parse_field_t ctx, const char *name)
|
|
{
|
|
const char *s;
|
|
|
|
if (!ctx)
|
|
return;
|
|
s = rfc822parse_query_parameter (ctx, name, 0);
|
|
if (s)
|
|
printf ("*** %s: '%s'\n", name, s);
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
show_event (rfc822parse_event_t event)
|
|
{
|
|
const char *s;
|
|
|
|
switch (event)
|
|
{
|
|
case RFC822PARSE_OPEN: s= "Open"; break;
|
|
case RFC822PARSE_CLOSE: s= "Close"; break;
|
|
case RFC822PARSE_CANCEL: s= "Cancel"; break;
|
|
case RFC822PARSE_T2BODY: s= "T2Body"; break;
|
|
case RFC822PARSE_FINISH: s= "Finish"; break;
|
|
case RFC822PARSE_RCVD_SEEN: s= "Rcvd_Seen"; break;
|
|
case RFC822PARSE_LEVEL_DOWN: s= "Level_Down"; break;
|
|
case RFC822PARSE_LEVEL_UP: s= "Level_Up"; break;
|
|
case RFC822PARSE_BOUNDARY: s= "Boundary"; break;
|
|
case RFC822PARSE_LAST_BOUNDARY: s= "Last_Boundary"; break;
|
|
case RFC822PARSE_BEGIN_HEADER: s= "Begin_Header"; break;
|
|
case RFC822PARSE_PREAMBLE: s= "Preamble"; break;
|
|
case RFC822PARSE_EPILOGUE: s= "Epilogue"; break;
|
|
default: s= "***invalid event***"; break;
|
|
}
|
|
printf ("*** got RFC822 event %s\n", s);
|
|
}
|
|
|
|
static int
|
|
msg_cb (void *dummy_arg, rfc822parse_event_t event, rfc822parse_t msg)
|
|
{
|
|
show_event (event);
|
|
if (event == RFC822PARSE_T2BODY)
|
|
{
|
|
rfc822parse_field_t ctx;
|
|
void *ectx;
|
|
const char *line;
|
|
|
|
for (ectx=NULL; (line = rfc822parse_enum_header_lines (msg, &ectx)); )
|
|
{
|
|
printf ("*** HDR: %s\n", line);
|
|
}
|
|
rfc822parse_enum_header_lines (NULL, &ectx); /* Close enumerator. */
|
|
|
|
ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
|
|
if (ctx)
|
|
{
|
|
const char *s1, *s2;
|
|
s1 = rfc822parse_query_media_type (ctx, &s2);
|
|
if (s1)
|
|
printf ("*** media: '%s/%s'\n", s1, s2);
|
|
else
|
|
printf ("*** media: [not found]\n");
|
|
show_param (ctx, "boundary");
|
|
show_param (ctx, "protocol");
|
|
rfc822parse_release_field (ctx);
|
|
}
|
|
else
|
|
printf ("*** media: text/plain [assumed]\n");
|
|
|
|
}
|
|
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
int
|
|
main (int argc, char **argv)
|
|
{
|
|
char line[5000];
|
|
size_t length;
|
|
rfc822parse_t msg;
|
|
|
|
msg = rfc822parse_open (msg_cb, NULL);
|
|
if (!msg)
|
|
abort ();
|
|
|
|
while (fgets (line, sizeof (line), stdin))
|
|
{
|
|
length = strlen (line);
|
|
if (length && line[length - 1] == '\n')
|
|
line[--length] = 0;
|
|
if (length && line[length - 1] == '\r')
|
|
line[--length] = 0;
|
|
if (rfc822parse_insert (msg, line, length))
|
|
abort ();
|
|
}
|
|
|
|
dump_structure (msg, NULL, 0);
|
|
|
|
rfc822parse_close (msg);
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
Local Variables:
|
|
compile-command: "gcc -Wall -Wno-pointer-sign -g -DTESTING -o rfc822parse rfc822parse.c"
|
|
End:
|
|
*/
|