Commit f8c4138ff for clamav.net
commit f8c4138ffafd2afea91b8f77366b5a3380c0cd4c
Author: Val S. <valsnyde@cisco.com>
Date: Mon Jun 22 10:53:27 2026 -0400
Improve detection of attachments in malformed email (#1720)
Reconstruct the email attachment edge cases reported and add
regression fixtures for MIME messages that use comments in header names,
ambiguous boundary parameters, RFC2231 disposition fields, trailing boundary
whitespace, folded headers, and message/partial reassembly.
Harden the mail parser so those messages keep their attachment and boundary
metadata during extraction. Normalize commented header names before lookup,
choose MIME boundary and disposition values consistently, count folded header
data against parser limits, and preserve malformed-but-supported boundary
handling.
Tighten allocation-failure handling and cleanup in the email text, message, and
mbox paths. Replace fragile assert and fall-through cases with explicit error
propagation, avoid silent text truncation, centralize mbox cleanup, and fix stale
partial-file cleanup so old temporary fragments can be removed safely.
Credit: Artem Danilov at Positive Technologies
CLAM-2947
diff --git a/libclamav/mbox.c b/libclamav/mbox.c
index c71864bd9..4d6de65ad 100644
--- a/libclamav/mbox.c
+++ b/libclamav/mbox.c
@@ -120,9 +120,15 @@ typedef enum {
OK_ATTACHMENTS_NOT_SAVED,
VIRUS,
MAXREC,
- MAXFILES
+ MAXFILES,
+ FORMAT_ERROR
} mbox_status;
+enum {
+ PARSE_HEADER_MALFORMED = -1,
+ PARSE_HEADER_ALLOC_FAIL = -2
+};
+
#ifndef isblank
#define isblank(c) (((c) == ' ') || ((c) == '\t'))
#endif
@@ -197,6 +203,7 @@ static int initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
static int getTextPart(message *const messages[], size_t size);
static size_t strip(char *buf, int len);
static int parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg, cli_ctx *ctx, bool *heuristicFound);
+static int tableFindRfc822Header(const table_t *rfc821Table, const char *cmd);
static int saveTextPart(mbox_ctx *mctx, message *m, int destroy_text);
static char *rfc2047(const char *in);
static char *rfc822comments(const char *in, char *out);
@@ -222,6 +229,7 @@ static bool hitLineFoldCnt(const char *const line, size_t *lineFoldCnt, cli_ctx
static bool haveTooManyHeaderBytes(size_t totalLen, cli_ctx *ctx, bool *heuristicFound);
static bool haveTooManyEmailHeaders(size_t totalHeaderCnt, cli_ctx *ctx, bool *heuristicFound);
static bool haveTooManyMIMEArguments(size_t argCnt, cli_ctx *ctx, bool *heuristicFound);
+static bool parsePositiveUnsignedArgument(const char *value, unsigned int max, unsigned int *result);
/* Maximum line length according to RFC2821 */
#define RFC2821LENGTH 1000
@@ -276,6 +284,7 @@ static bool haveTooManyMIMEArguments(size_t argCnt, cli_ctx *ctx, bool *heuristi
#define HEURISTIC_EMAIL_MAX_HEADERS 1024
#define HEURISTIC_EMAIL_MAX_MIME_PARTS_PER_MESSAGE 1024
#define HEURISTIC_EMAIL_MAX_ARGUMENTS_PER_HEADER 256
+#define HEURISTIC_EMAIL_MAX_PARTIAL_MESSAGE_PARTS 1024
static const struct tableinit {
const char *key;
@@ -326,7 +335,16 @@ int cli_mbox(const char *dir, cli_ctx *ctx)
return cli_parse_mbox(dir, ctx);
}
-/*
+/**
+ * @brief Parse and scan an RFC822 or mbox email stream.
+ *
+ * @param dir Temporary directory used for extracted mail parts.
+ * @param ctx Scan context for the mapped email stream.
+ *
+ * @return CL_SUCCESS/CL_CLEAN when no detection is found, CL_VIRUS on
+ * detection, or another CL_E* status when parsing or scanning must
+ * stop early.
+ *
* TODO: when signal handling is added, need to remove temp files when a
* signal is received
* TODO: add option to scan in memory not via temp files, perhaps with a
@@ -344,38 +362,40 @@ int cli_mbox(const char *dir, cli_ctx *ctx)
static int
cli_parse_mbox(const char *dir, cli_ctx *ctx)
{
- int retcode;
- message *body;
+ cl_error_t status = CL_SUCCESS;
+ message *body = NULL;
+ message *m = NULL;
char buffer[RFC2821LENGTH + 1];
mbox_ctx mctx;
size_t at = 0;
fmap_t *map = ctx->fmap;
+#ifdef CL_THREAD_SAFE
+ bool tables_locked = false;
+#endif
cli_dbgmsg("in mbox()\n");
if (!fmap_gets(map, buffer, &at, sizeof(buffer) - 1)) {
/* empty message */
- return CL_CLEAN;
+ status = CL_CLEAN;
+ goto done;
}
#ifdef CL_THREAD_SAFE
pthread_mutex_lock(&tables_mutex);
+ tables_locked = true;
#endif
if (initialiseTables(&rfc821, &subtype) < 0) {
-#ifdef CL_THREAD_SAFE
- pthread_mutex_unlock(&tables_mutex);
-#endif
- return CL_EMEM;
+ status = CL_EMEM;
+ goto done;
}
#ifdef CL_THREAD_SAFE
pthread_mutex_unlock(&tables_mutex);
+ tables_locked = false;
#endif
- retcode = CL_SUCCESS;
- body = NULL;
-
mctx.dir = dir;
mctx.rfc821Table = rfc821;
mctx.subtypeTable = subtype;
@@ -414,10 +434,11 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
*/
bool lastLineWasEmpty;
int messagenumber;
- message *m = messageCreate(); /*Create an empty email */
+ m = messageCreate(); /*Create an empty email */
if (m == NULL) {
- return CL_EMEM;
+ status = CL_EMEM;
+ goto done;
}
lastLineWasEmpty = false;
@@ -438,26 +459,31 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
messageReset(m);
messageSetCTX(m, ctx);
if (heuristicFound) {
- retcode = CL_VIRUS;
- break;
+ status = CL_VIRUS;
+ goto done;
}
continue;
}
messageSetCTX(body, ctx);
+ if (body->isTruncated) {
+ status = CL_EMEM;
+ goto done;
+ }
messageDestroy(m);
+ m = NULL;
if (messageGetBody(body)) {
mbox_status rc = parseEmailBody(body, NULL, &mctx, 0);
if (rc == FAIL) {
m = body;
+ body = NULL;
messageReset(m);
messageSetCTX(m, ctx);
continue;
} else if (rc == VIRUS) {
cli_dbgmsg("Message number %d is infected\n",
messagenumber - 1);
- retcode = CL_VIRUS;
- m = NULL;
- break;
+ status = CL_VIRUS;
+ goto done;
}
}
/*
@@ -469,6 +495,7 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
* called
*/
m = body;
+ body = NULL;
messageReset(m);
messageSetCTX(m, ctx);
@@ -495,17 +522,17 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
}
} while (fmap_gets(map, buffer, &at, sizeof(buffer) - 1));
- if (retcode == CL_SUCCESS) {
+ if (status == CL_SUCCESS) {
cli_dbgmsg("Extract attachments from email %d\n", messagenumber);
bool heuristicFound = false;
body = parseEmailHeaders(m, rfc821, &heuristicFound);
if (heuristicFound) {
- retcode = CL_VIRUS;
+ status = CL_VIRUS;
+ goto done;
}
}
- if (m) {
- messageDestroy(m);
- }
+ messageDestroy(m);
+ m = NULL;
} else {
/*
* It's a single message, parse the headers then the body
@@ -534,7 +561,8 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
bool heuristicFound = false;
body = parseEmailFile(map, &at, rfc821, buffer, dir, ctx, &heuristicFound);
if (heuristicFound) {
- retcode = CL_VIRUS;
+ status = CL_VIRUS;
+ goto done;
}
}
@@ -542,7 +570,7 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
/*
* Write out the last entry in the mailbox
*/
- if ((retcode == CL_SUCCESS) && messageGetBody(body)) {
+ if ((status == CL_SUCCESS) && messageGetBody(body)) {
messageSetCTX(body, ctx);
switch (parseEmailBody(body, NULL, &mctx, 0)) {
case OK:
@@ -558,38 +586,47 @@ cli_parse_mbox(const char *dir, cli_ctx *ctx)
* decoding errors on what *is* a valid
* mbox
*/
- retcode = CL_EFORMAT;
+ status = CL_EFORMAT;
break;
case MAXREC:
- retcode = CL_EMAXREC;
+ status = CL_EMAXREC;
cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxRecursion"); // Doing this now because it's actually tracking email recursion,-
// not fmap recursion, but it still is aborting with stuff not scanned.
// Also, we didn't have access to the ctx when this happened earlier.
break;
case MAXFILES:
- retcode = CL_EMAXFILES;
+ status = CL_EMAXFILES;
cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles"); // Doing this now because it's actually tracking email parts,-
// not actual files, but it still is aborting with stuff not scanned.
// Also, we didn't have access to the ctx when this happened earlier.
break;
+ case FORMAT_ERROR:
+ status = CL_EFORMAT;
+ break;
case VIRUS:
- retcode = CL_VIRUS;
+ status = CL_VIRUS;
break;
}
}
- if (body->isTruncated && retcode == CL_SUCCESS) {
- retcode = CL_EMEM;
+ if (body->isTruncated && status == CL_SUCCESS) {
+ status = CL_EMEM;
}
- /*
- * Tidy up and quit
- */
- messageDestroy(body);
}
- cli_dbgmsg("cli_mbox returning %d\n", retcode);
+done:
+#ifdef CL_THREAD_SAFE
+ if (tables_locked) {
+ pthread_mutex_unlock(&tables_mutex);
+ }
+#endif
- return retcode;
+ messageDestroy(body);
+ messageDestroy(m);
+
+ cli_dbgmsg("cli_mbox returning %d\n", status);
+
+ return status;
}
#define READ_STRUCT_BUFFER_LEN 1024
@@ -697,15 +734,17 @@ static bool
doContinueMultipleEmptyOptions(const char *const line, bool *lastWasOnlySemi)
{
if (line) {
- size_t i = 0;
- int doCont = 1;
- for (; i < strlen(line); i++) {
- if (isblank(line[i])) {
- } else if (';' == line[i]) {
+ const char *p = line;
+ int doCont = 1;
+
+ while (*p) {
+ if (isblank((unsigned char)*p)) {
+ } else if (';' == *p) {
} else {
doCont = 0;
break;
}
+ p++;
}
if (1 == doCont) {
@@ -721,24 +760,32 @@ doContinueMultipleEmptyOptions(const char *const line, bool *lastWasOnlySemi)
}
static bool
-hitLineFoldCnt(const char *const line, size_t *lineFoldCnt, cli_ctx *ctx, bool *heuristicFound)
+hitLineFoldLimit(size_t *lineFoldCnt, cli_ctx *ctx, bool *heuristicFound)
{
- if (line) {
- if (isblank(line[0])) {
- (*lineFoldCnt)++;
- } else {
- (*lineFoldCnt) = 0;
+ (*lineFoldCnt)++;
+
+ if ((*lineFoldCnt) >= HEURISTIC_EMAIL_MAX_LINE_FOLDS_PER_HEADER) {
+ if (SCAN_HEURISTIC_EXCEEDS_MAX) {
+ cli_append_potentially_unwanted(ctx, "Heuristics.Limits.Exceeded.EmailLineFoldCnt");
+ *heuristicFound = true;
}
- if ((*lineFoldCnt) >= HEURISTIC_EMAIL_MAX_LINE_FOLDS_PER_HEADER) {
- if (SCAN_HEURISTIC_EXCEEDS_MAX) {
- cli_append_potentially_unwanted(ctx, "Heuristics.Limits.Exceeded.EmailLineFoldCnt");
- *heuristicFound = true;
- }
+ return true;
+ }
+ return false;
+}
- return true;
+static bool
+hitLineFoldCnt(const char *const line, size_t *lineFoldCnt, cli_ctx *ctx, bool *heuristicFound)
+{
+
+ if (line) {
+ if (isblank((unsigned char)line[0])) {
+ return hitLineFoldLimit(lineFoldCnt, ctx, heuristicFound);
}
+
+ (*lineFoldCnt) = 0;
}
return false;
}
@@ -804,6 +851,58 @@ haveTooManyMIMEArguments(size_t argCnt, cli_ctx *ctx, bool *heuristicFound)
return false;
}
+static bool
+parsePositiveUnsignedArgument(const char *value, unsigned int max, unsigned int *result)
+{
+ char *end = NULL;
+ unsigned long parsed;
+
+ if ((value == NULL) || (result == NULL))
+ return false;
+
+ while (isspace((unsigned char)*value))
+ value++;
+
+ if (!isdigit((unsigned char)*value))
+ return false;
+
+ errno = 0;
+ parsed = strtoul(value, &end, 10);
+ if ((errno != 0) || (end == value) || (parsed == 0) ||
+ (parsed > max) || (parsed > UINT_MAX)) {
+ return false;
+ }
+
+ while (isspace((unsigned char)*end))
+ end++;
+ if (*end != '\0')
+ return false;
+
+ *result = (unsigned int)parsed;
+ return true;
+}
+
+static int
+tableFindRfc822Header(const table_t *rfc821Table, const char *cmd)
+{
+ char *stripped;
+ int commandNumber;
+
+ if ((rfc821Table == NULL) || (cmd == NULL))
+ return -1;
+
+ stripped = rfc822comments(cmd, NULL);
+ if (stripped) {
+ strstrip(stripped);
+ commandNumber = tableFind(rfc821Table, stripped);
+ free(stripped);
+ } else {
+ commandNumber = tableFind(rfc821Table, cmd);
+ }
+
+ return commandNumber;
+}
+
/*
* Read in an email message from fin, parse it, and return the message
*
@@ -878,7 +977,7 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
* Ensure wide characters are handled where
* sizeof(char) > 1
*/
- if (line && isspace(line[0] & 0xFF)) {
+ if (line && isspace((unsigned char)line[0])) {
char copy[sizeof(buffer)];
strcpy(copy, buffer);
@@ -900,6 +999,7 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
if (head->bufferLen) {
char *header = getMallocedBufferFromList(head);
int needContinue = 0;
+ int parseStatus;
CLI_VERIFY_POINTER_OR_GOTO_DONE(header);
totalHeaderCnt++;
@@ -907,7 +1007,12 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
CLI_FREE_AND_SET_NULL(header);
break;
}
- needContinue = (parseEmailHeader(ret, header, rfc821, ctx, heuristicFound) < 0);
+ parseStatus = parseEmailHeader(ret, header, rfc821, ctx, heuristicFound);
+ if (parseStatus == PARSE_HEADER_ALLOC_FAIL) {
+ CLI_FREE_AND_SET_NULL(header);
+ goto done;
+ }
+ needContinue = (parseStatus < 0);
if (*heuristicFound) {
CLI_FREE_AND_SET_NULL(header);
break;
@@ -922,7 +1027,7 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
}
if (boundary ||
- ((boundary = (char *)messageFindArgument(ret, "boundary")) != NULL)) {
+ ((boundary = (char *)messageFindArgumentLast(ret, "boundary")) != NULL)) {
lastWasBlank = true;
continue;
}
@@ -941,17 +1046,16 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
inHeader = false;
bodyIsEmpty = true;
} else {
- char *ptr;
const char *lookahead;
bool lineAdded = true;
if (0 == head->bufferLen) {
- char cmd[RFC2821LENGTH + 1], out[RFC2821LENGTH + 1];
+ char cmd[RFC2821LENGTH + 1];
/*
* Continuation of line we're ignoring?
*/
- if (isblank(line[0]))
+ if (isblank((unsigned char)line[0]))
continue;
/*
@@ -964,8 +1068,7 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
continue;
}
- ptr = rfc822comments(cmd, out);
- commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
+ commandNumber = tableFindRfc822Header(rfc821, cmd);
switch (commandNumber) {
case CONTENT_TRANSFER_ENCODING:
@@ -1006,7 +1109,7 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
*
* Add all the arguments on the line
*/
- if (isblank(*lookahead))
+ if (isblank((unsigned char)*lookahead))
continue;
}
@@ -1017,6 +1120,7 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
{
char *header = getMallocedBufferFromList(head); /*This is the issue */
int needContinue = 0;
+ int parseStatus;
CLI_VERIFY_POINTER_OR_GOTO_DONE(header);
needContinue = (header[strlen(header) - 1] == ';');
@@ -1030,7 +1134,12 @@ parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *first
CLI_FREE_AND_SET_NULL(header);
break;
}
- needContinue = (parseEmailHeader(ret, header, rfc821, ctx, heuristicFound) < 0);
+ parseStatus = parseEmailHeader(ret, header, rfc821, ctx, heuristicFound);
+ if (parseStatus == PARSE_HEADER_ALLOC_FAIL) {
+ CLI_FREE_AND_SET_NULL(header);
+ goto done;
+ }
+ needContinue = (parseStatus < 0);
if (*heuristicFound) {
CLI_FREE_AND_SET_NULL(header);
break;
@@ -1149,6 +1258,8 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool *heuristicFound)
return NULL;
ret = messageCreate();
+ if (ret == NULL)
+ return NULL;
for (t = messageGetBody(m); t; t = t->t_next) {
const char *line;
@@ -1184,6 +1295,7 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool *heuristicFound)
} else {
char *ptr;
bool lineAdded = true;
+ int parseStatus;
if (fullline == NULL) {
char cmd[RFC2821LENGTH + 1];
@@ -1191,7 +1303,7 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool *heuristicFound)
/*
* Continuation of line we're ignoring?
*/
- if (isblank(line[0]))
+ if (isblank((unsigned char)line[0]))
continue;
/*
@@ -1204,10 +1316,7 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool *heuristicFound)
continue;
}
- ptr = rfc822comments(cmd, NULL);
- commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
- if (ptr)
- free(ptr);
+ commandNumber = tableFindRfc822Header(rfc821, cmd);
switch (commandNumber) {
case CONTENT_TRANSFER_ENCODING:
@@ -1221,12 +1330,18 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool *heuristicFound)
continue;
}
fullline = cli_safer_strdup(line);
+ if (fullline == NULL) {
+ ret->isTruncated = true;
+ break;
+ }
fulllinelength = strlen(line) + 1;
} else if (line) {
fulllinelength += strlen(line) + 1;
ptr = cli_max_realloc(fullline, fulllinelength);
- if (ptr == NULL)
- continue;
+ if (ptr == NULL) {
+ ret->isTruncated = true;
+ break;
+ }
fullline = ptr;
cli_strlcat(fullline, line, fulllinelength);
} else {
@@ -1254,17 +1369,16 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool *heuristicFound)
if (count_quotes(fullline) & 1)
continue;
- ptr = rfc822comments(fullline, NULL);
- if (ptr) {
- free(fullline);
- fullline = ptr;
- }
-
totalHeaderCnt++;
if (haveTooManyEmailHeaders(totalHeaderCnt, m->ctx, heuristicFound)) {
break;
}
- if (parseEmailHeader(ret, fullline, rfc821, m->ctx, heuristicFound) < 0) {
+ parseStatus = parseEmailHeader(ret, fullline, rfc821, m->ctx, heuristicFound);
+ if (parseStatus == PARSE_HEADER_ALLOC_FAIL) {
+ ret->isTruncated = true;
+ break;
+ }
+ if (parseStatus < 0) {
continue;
}
if (*heuristicFound) {
@@ -1292,7 +1406,10 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool *heuristicFound)
/*if(t->t_line && isuuencodebegin(t->t_line))
puts("FIXME: add fast visa here");*/
cli_dbgmsg("parseEmailHeaders: finished with headers, moving body\n");
- messageMoveText(ret, t, m);
+ if (messageMoveText(ret, t, m) < 0) {
+ messageDestroy(ret);
+ return NULL;
+ }
break;
}
}
@@ -1332,7 +1449,7 @@ parseEmailHeaders(message *m, const table_t *rfc821, bool *heuristicFound)
static int
parseEmailHeader(message *m, const char *line, const table_t *rfc821, cli_ctx *ctx, bool *heuristicFound)
{
- int ret = -1;
+ int ret = PARSE_HEADER_MALFORMED;
#ifdef CL_THREAD_SAFE
char *strptr;
#endif
@@ -1352,13 +1469,14 @@ parseEmailHeader(message *m, const char *line, const table_t *rfc821, cli_ctx *c
break;
if (*separator == '\0')
- return -1;
+ return PARSE_HEADER_MALFORMED;
copy = rfc2047(line);
if (copy == NULL) {
/* an RFC checker would return -1 here */
copy = cli_safer_strdup(line);
if (NULL == copy) {
+ ret = PARSE_HEADER_ALLOC_FAIL;
goto done;
}
}
@@ -1693,7 +1811,15 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
switch (mimeType) {
case NOMIME:
cli_dbgmsg("Not a mime encoded message\n");
- aText = textAddMessage(aText, mainMessage);
+ {
+ int textStatus = 0;
+
+ aText = textAddMessageWithStatus(aText, mainMessage, &textStatus);
+ if (textStatus < 0) {
+ rc = FAIL;
+ break;
+ }
+ }
if (!doPhishingScan) {
break;
@@ -1722,7 +1848,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
break;
case MULTIPART:
cli_dbgmsg("Content-type 'multipart' handler\n");
- boundary = messageFindArgument(mainMessage, "boundary");
+ boundary = messageFindArgumentLast(mainMessage, "boundary");
if (mctx->wrkobj != NULL)
cli_jsonstr(mctx->wrkobj, "Boundary", boundary);
@@ -1837,33 +1963,46 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
int lines = 0;
message **m;
mbox_status old_rc;
+ bool partFailed = false;
+
+ if (haveTooManyMIMEPartsPerMessage((size_t)multiparts, mctx->ctx, &rc)) {
+ if (rc == VIRUS)
+ infected = true;
+ break;
+ }
m = cli_max_realloc(messages, ((multiparts + 1) * sizeof(message *)));
- if (m == NULL)
+ if (m == NULL) {
+ rc = FAIL;
break;
+ }
messages = m;
aMessage = messages[multiparts] = messageCreate();
if (aMessage == NULL) {
- multiparts--;
- /* if allocation failed the first time,
- * there's no point in retrying, just
- * break out */
+ rc = FAIL;
break;
}
messageSetCTX(aMessage, mctx->ctx);
+ size_t partHeaderBytes = 0;
+ size_t partHeaderCnt = 0;
+ size_t partLineFoldCnt = 0;
+
cli_dbgmsg("Now read in part %d\n", multiparts);
/*
* Ignore blank lines. There shouldn't be ANY
* but some viruses insert them
*/
- while ((t_line = t_line->t_next) != NULL)
- if (t_line->t_line &&
+ while ((t_line = t_line->t_next) != NULL) {
+ const char *data = t_line->t_line ? lineGetData(t_line->t_line) : NULL;
+
+ if (data &&
/*(cli_chomp(t_line->t_text) > 0))*/
- (strlen(lineGetData(t_line->t_line)) > 0))
+ (strlen(data) > 0))
break;
+ }
if (t_line == NULL) {
cli_dbgmsg("Empty part\n");
@@ -1891,6 +2030,33 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
t_line->t_next && t_line->t_next->t_line ? lineGetData(t_line->t_next->t_line) : "(null)");
*/
+ if ((inhead || inMimeHead) && (line != NULL)) {
+ size_t lineLen;
+
+ if (hitLineFoldCnt(line, &partLineFoldCnt, mctx->ctx, &heuristicFound)) {
+ if (heuristicFound) {
+ rc = VIRUS;
+ infected = true;
+ }
+ break;
+ }
+
+ lineLen = strlen(line);
+ if ((partHeaderBytes > HEURISTIC_EMAIL_MAX_HEADER_BYTES) ||
+ (lineLen > HEURISTIC_EMAIL_MAX_HEADER_BYTES - partHeaderBytes)) {
+ partHeaderBytes = HEURISTIC_EMAIL_MAX_HEADER_BYTES + 1;
+ } else {
+ partHeaderBytes += lineLen;
+ }
+ if (haveTooManyHeaderBytes(partHeaderBytes, mctx->ctx, &heuristicFound)) {
+ if (heuristicFound) {
+ rc = VIRUS;
+ infected = true;
+ }
+ break;
+ }
+ }
+
if (inMimeHead) { /* continuation line */
if (line == NULL) {
/*inhead =*/inMimeHead = 0;
@@ -1911,13 +2077,27 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
* Content-Type: application/octet-stream;
* Content-Transfer-Encoding: base64
*/
- parseEmailHeader(aMessage, line, mctx->rfc821Table, mctx->ctx, &heuristicFound);
+ partHeaderCnt++;
+ if (haveTooManyEmailHeaders(partHeaderCnt, mctx->ctx, &heuristicFound)) {
+ if (heuristicFound) {
+ rc = VIRUS;
+ infected = true;
+ }
+ break;
+ }
+ if (parseEmailHeader(aMessage, line, mctx->rfc821Table,
+ mctx->ctx, &heuristicFound) == PARSE_HEADER_ALLOC_FAIL) {
+ rc = FAIL;
+ partFailed = true;
+ break;
+ }
if (heuristicFound) {
- rc = VIRUS;
+ rc = VIRUS;
+ infected = true;
break;
}
- while (isspace((int)*line))
+ while (isspace((unsigned char)*line))
line++;
if (*line == '\0') {
@@ -1929,6 +2109,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
} else if (inhead) { /* handling normal headers */
/*int quotes;*/
char *fullline, *ptr;
+ bool stopHeader = false;
if (line == NULL) {
/*
@@ -1981,7 +2162,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
inhead = 0;
continue;
}
- if (isspace((int)*line)) {
+ if (isspace((unsigned char)*line)) {
/*
* The first line is
* continuation line.
@@ -2019,9 +2200,12 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
continue;
}
- fullline = rfc822comments(line, NULL);
- if (fullline == NULL)
- fullline = cli_safer_strdup(line);
+ fullline = cli_safer_strdup(line);
+ if (fullline == NULL) {
+ rc = FAIL;
+ partFailed = true;
+ break;
+ }
/*quotes = count_quotes(fullline);*/
@@ -2034,12 +2218,37 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
*/
while (t_line && next_is_folded_header(t_line)) {
const char *data;
- size_t datasz;
+ size_t dataLen, datasz;
t_line = t_line->t_next;
data = lineGetData(t_line->t_line);
+ if (hitLineFoldLimit(&partLineFoldCnt, mctx->ctx, &heuristicFound)) {
+ if (heuristicFound) {
+ rc = VIRUS;
+ infected = true;
+ }
+ stopHeader = true;
+ break;
+ }
+
+ dataLen = strlen(data);
+ if ((partHeaderBytes > HEURISTIC_EMAIL_MAX_HEADER_BYTES) ||
+ (dataLen > HEURISTIC_EMAIL_MAX_HEADER_BYTES - partHeaderBytes)) {
+ partHeaderBytes = HEURISTIC_EMAIL_MAX_HEADER_BYTES + 1;
+ } else {
+ partHeaderBytes += dataLen;
+ }
+ if (haveTooManyHeaderBytes(partHeaderBytes, mctx->ctx, &heuristicFound)) {
+ if (heuristicFound) {
+ rc = VIRUS;
+ infected = true;
+ }
+ stopHeader = true;
+ break;
+ }
+
if (data[1] == '\0') {
/*
* Broken message: the
@@ -2053,11 +2262,14 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
break;
}
- datasz = strlen(fullline) + strlen(data) + 1;
+ datasz = strlen(fullline) + dataLen + 1;
ptr = cli_max_realloc(fullline, datasz);
- if (ptr == NULL)
+ if (ptr == NULL) {
+ rc = FAIL;
+ partFailed = true;
break;
+ }
fullline = ptr;
cli_strlcat(fullline, data, datasz);
@@ -2065,13 +2277,35 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
/*quotes = count_quotes(data);*/
}
+ if (partFailed || stopHeader) {
+ free(fullline);
+ break;
+ }
+
cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n",
multiparts, fullline);
- parseEmailHeader(aMessage, fullline, mctx->rfc821Table, mctx->ctx, &heuristicFound);
+ partHeaderCnt++;
+ if (haveTooManyEmailHeaders(partHeaderCnt, mctx->ctx, &heuristicFound)) {
+ free(fullline);
+ if (heuristicFound) {
+ rc = VIRUS;
+ infected = true;
+ }
+ break;
+ }
+ if (parseEmailHeader(aMessage, fullline, mctx->rfc821Table, mctx->ctx, &heuristicFound) == PARSE_HEADER_ALLOC_FAIL) {
+ rc = FAIL;
+ partFailed = true;
+ }
free(fullline);
+ if (partFailed) {
+ break;
+ }
if (heuristicFound) {
- rc = VIRUS;
+ rc = VIRUS;
+ infected = true;
+ break;
}
} else if (boundaryEnd(line, boundary)) {
/*
@@ -2088,8 +2322,11 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
inhead = 1;
break;
} else {
- if (messageAddLine(aMessage, t_line->t_line) < 0)
+ if (messageAddLine(aMessage, t_line->t_line) < 0) {
+ rc = FAIL;
+ partFailed = true;
break;
+ }
lines++;
}
} while ((t_line = t_line->t_next) != NULL);
@@ -2097,6 +2334,14 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
cli_dbgmsg("Part %d has %d lines, rc = %d\n",
multiparts, lines, (int)rc);
+ if (partFailed || rc == FAIL) {
+ if (messages[multiparts]) {
+ messageDestroy(messages[multiparts]);
+ messages[multiparts] = NULL;
+ }
+ break;
+ }
+
/*
* Only save in the array of messages if some
* decision will be taken on whether to scan.
@@ -2149,6 +2394,27 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
free((char *)boundary);
+ if (rc == FAIL) {
+ if (mainMessage && (mainMessage != messageIn)) {
+ messageDestroy(mainMessage);
+ mainMessage = NULL;
+ }
+ if (aText && (textIn == NULL)) {
+ textDestroy(aText);
+ aText = NULL;
+ }
+ if (messages) {
+ for (i = 0; i < multiparts; i++) {
+ if (messages[i])
+ messageDestroy(messages[i]);
+ }
+ free(messages);
+ messages = NULL;
+ }
+ mctx->wrkobj = saveobj;
+ return rc;
+ }
+
if (haveTooManyMIMEPartsPerMessage(multiparts, mctx->ctx, &rc)) {
if (messages) {
for (i = 0; i < multiparts; i++) {
@@ -2241,8 +2507,13 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
if (htmltextPart >= 0 && messages) {
if (messageGetBody(messages[htmltextPart])) {
+ int textStatus = 0;
- aText = textAddMessage(aText, messages[htmltextPart]);
+ aText = textAddMessageWithStatus(aText, messages[htmltextPart], &textStatus);
+ if (textStatus < 0) {
+ rc = FAIL;
+ break;
+ }
}
} else {
/*
@@ -2370,7 +2641,7 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
break;
default:
cli_dbgmsg("Unexpected mime sub type\n");
- rc = CL_EFORMAT;
+ rc = FORMAT_ERROR;
break;
}
@@ -2425,6 +2696,11 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
cli_dbgmsg("Decode rfc822\n");
messageSetCTX(m, mctx->ctx);
+ if (m->isTruncated) {
+ rc = FAIL;
+ messageDestroy(m);
+ break;
+ }
if (mainMessage && (mainMessage != messageIn)) {
messageDestroy(mainMessage);
@@ -2524,6 +2800,19 @@ parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int re
}
}
+ if (rc == FAIL) {
+ if (aText && (textIn == NULL)) {
+ textDestroy(aText);
+ aText = NULL;
+ }
+ if (mainMessage && (mainMessage != messageIn)) {
+ messageDestroy(mainMessage);
+ mainMessage = NULL;
+ }
+ mctx->wrkobj = saveobj;
+ return rc;
+ }
+
if (aText && (textIn == NULL)) {
/* Look for a bounce in the text (non mime encoded) portion */
const text *t;
@@ -2774,6 +3063,32 @@ boundaryStart(const char *line, const char *boundary)
if (boundary == NULL)
return 0;
+ if ((*line != '-') && (*line != '('))
+ return 0;
+
+ if ((*line == '-') && (line[1] != '-'))
+ return 0;
+
+ if (strchr(line, '-') == NULL)
+ return 0;
+
+ if ((line[0] == '-') && (line[1] == '-')) {
+ size_t boundary_len = strlen(boundary);
+ const char *tail = &line[2];
+
+ if (strncasecmp(tail, boundary, boundary_len) == 0) {
+ tail += boundary_len;
+ while (*tail == ' ')
+ tail++;
+ if ((*tail == '\0') || (*tail == '\r') || (*tail == '\n')) {
+ cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line);
+ return 1;
+ }
+ if ((tail[0] == '-') && (tail[1] == '-'))
+ return 0;
+ }
+ }
+
newline = strdup(line);
if (!(newline))
newline = (char *)line;
@@ -2891,77 +3206,51 @@ static int
boundaryEnd(const char *line, const char *boundary)
{
size_t len;
- char *newline, *p, *p2;
+ const char *p;
if (line == NULL || *line == '\0')
return 0;
- p = newline = strdup(line);
- if (!(newline)) {
- p = (char *)line;
- newline = (char *)line;
- }
+ if (boundary == NULL)
+ return 0;
- if (newline != line && strlen(line)) {
- /* Trim trailing spaces */
- p2 = newline + strlen(line) - 1;
- while (p2 >= newline && *p2 == ' ')
- *(p2--) = '\0';
- }
+ /* cli_dbgmsg("boundaryEnd: line = '%s' boundary = '%s'\n", line, boundary); */
- /* cli_dbgmsg("boundaryEnd: line = '%s' boundary = '%s'\n", newline, boundary); */
+ p = line;
if (*p++ != '-') {
- if (newline != line)
- free(newline);
return 0;
}
if (*p++ != '-') {
- if (newline != line)
- free(newline);
-
return 0;
}
len = strlen(boundary);
if (strncasecmp(p, boundary, len) != 0) {
- if (newline != line)
- free(newline);
-
return 0;
}
- /*
- * Use < rather than == because some broken mails have white
- * space after the boundary
- */
- if (strlen(p) < (len + 2)) {
- if (newline != line)
- free(newline);
+ if (p[len] != '-') {
return 0;
}
- p = &p[len];
- if (*p++ != '-') {
- if (newline != line)
- free(newline);
+ p = &p[len + 1];
+ if (*p++ != '-') {
return 0;
}
- if (*p == '-') {
- /* cli_dbgmsg("boundaryEnd: found %s in %s\n", boundary, p); */
- if (newline != line)
- free(newline);
-
- return 1;
+ while (isblank((unsigned char)*p)) {
+ p++;
}
- if (newline != line)
- free(newline);
+ if ((*p != '\0') && (*p != '\r') && (*p != '\n')) {
+ return 0;
+ }
- return 0;
+ /* cli_dbgmsg("boundaryEnd: found %s in %s\n", boundary, p); */
+ return 1;
}
/*
@@ -3069,14 +3358,14 @@ strip(char *buf, int len)
do
if (*ptr)
*ptr = '\0';
- while ((--len >= 0) && (!isgraph(*--ptr)) && (*ptr != '\n') && (*ptr != '\r'));
+ while ((--len >= 0) && (!isgraph((unsigned char)*--ptr)) && (*ptr != '\n') && (*ptr != '\r'));
#else /* more characters can be displayed on DOS */
do
#ifndef REAL_MODE_DOS
if (*ptr) /* C8.0 puts into a text area */
#endif
*ptr = '\0';
- while ((--len >= 0) && ((*--ptr == '\0') || isspace((int)(*ptr & 0xFF))));
+ while ((--len >= 0) && ((*--ptr == '\0') || isspace((unsigned char)*ptr)));
#endif
return ((size_t)(len + 1));
}
@@ -3094,8 +3383,97 @@ strstrip(char *s)
return (strip(s, strlen(s) + 1));
}
+static bool
+isMimeParameter(const char *arg, const char *variable)
+{
+ size_t len;
+
+ if (arg == NULL || variable == NULL)
+ return false;
+
+ while (isspace((unsigned char)*arg))
+ arg++;
+
+ len = strlen(variable);
+ if (strncasecmp(arg, variable, len) != 0)
+ return false;
+
+ arg += len;
+ while (isspace((unsigned char)*arg))
+ arg++;
+
+ if (*arg == '*') {
+ arg++;
+ while (isdigit((unsigned char)*arg))
+ arg++;
+ if (*arg == '*')
+ arg++;
+ while (isspace((unsigned char)*arg))
+ arg++;
+ }
+
+ return (*arg == '=') || (*arg == ':');
+}
+
+static const char *
+nextMimeArgument(const char *ptr, char *buf, size_t buflen)
+{
+ const char *p;
+
+ if (ptr == NULL || buf == NULL || buflen == 0)
+ return NULL;
+
+ p = ptr;
+ for (;;) {
+ bool inquote = false, backslash = false;
+ char *out = buf;
+
+ while (*p && *p != ';')
+ p++;
+ if (*p == '\0')
+ return NULL;
+ p++;
+
+ while (isspace((unsigned char)*p))
+ p++;
+
+ while (*p) {
+ if (backslash) {
+ backslash = false;
+ } else {
+ switch (*p) {
+ case '\\':
+ backslash = true;
+ break;
+ case '"':
+ inquote = !inquote;
+ break;
+ case ';':
+ if (!inquote)
+ goto done;
+ break;
+ }
+ }
+
+ if ((size_t)(out - buf) < buflen - 1)
+ *out++ = *p;
+ p++;
+ }
+
+ done:
+ *out = '\0';
+ strstrip(buf);
+
+ if (buf[0] != '\0')
+ return p;
+
+ if (*p == '\0')
+ return NULL;
+ }
+}
+
/*
- * Returns 0 for OK, -1 for error
+ * Returns 0 for OK, PARSE_HEADER_ALLOC_FAIL for allocation failure.
*/
static int
parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg, cli_ctx *ctx, bool *heuristicFound)
@@ -3104,18 +3482,13 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
const char *ptr;
int commandNumber;
size_t argCnt = 0;
+ size_t buflen = 0;
*heuristicFound = false;
cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
- copy = rfc822comments(cmd, NULL);
- if (copy) {
- commandNumber = tableFind(rfc821Table, copy);
- free(copy);
- } else {
- commandNumber = tableFind(rfc821Table, cmd);
- }
+ commandNumber = tableFindRfc822Header(rfc821Table, cmd);
copy = rfc822comments(arg, NULL);
@@ -3155,14 +3528,13 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
*/
cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", ptr);
else {
- int i;
-
- buf = cli_max_malloc(strlen(ptr) + 1);
+ buflen = strlen(ptr) + 1;
+ buf = cli_max_malloc(buflen);
if (buf == NULL) {
- cli_errmsg("parseMimeHeader: Unable to allocate memory for buf %llu\n", (long long unsigned)(strlen(ptr) + 1));
+ cli_errmsg("parseMimeHeader: Unable to allocate memory for buf %llu\n", (long long unsigned)buflen);
if (copy)
free(copy);
- return -1;
+ return PARSE_HEADER_ALLOC_FAIL;
}
/*
* Some clients are broken and
@@ -3180,7 +3552,7 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
* the quotes, it doesn't handle
* them properly
*/
- while (isspace((const unsigned char)*ptr))
+ while (isspace((unsigned char)*ptr))
ptr++;
if (ptr[0] == '\"')
ptr++;
@@ -3205,7 +3577,7 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
if (copy)
free(copy);
free(buf);
- return -1;
+ return PARSE_HEADER_ALLOC_FAIL;
}
for (;;) {
#ifdef CL_THREAD_SAFE
@@ -3222,10 +3594,10 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
if (s == NULL)
break;
if (set) {
- size_t len = strstrip(s) - 1;
- if (s[len] == '\"') {
- s[len] = '\0';
- len = strstrip(s);
+ size_t len = strstrip(s);
+ if ((len > 0) && (s[len - 1] == '\"')) {
+ s[len - 1] = '\0';
+ len = strstrip(s);
}
if (len) {
if (strchr(s, ' '))
@@ -3254,8 +3626,8 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
* Content-Type:', arg='multipart/mixed; boundary=foo
* we find the boundary argument set it
*/
- i = 1;
- while (cli_strtokbuf(ptr, i++, ";", buf) != NULL) {
+ ptr = nextMimeArgument(ptr, buf, buflen);
+ while (ptr != NULL) {
cli_dbgmsg("mimeArgs = '%s'\n", buf);
argCnt++;
@@ -3263,6 +3635,7 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
break;
}
messageAddArguments(m, buf);
+ ptr = nextMimeArgument(ptr, buf, buflen);
}
}
break;
@@ -3270,17 +3643,32 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
messageSetEncoding(m, ptr);
break;
case CONTENT_DISPOSITION:
- buf = cli_max_malloc(strlen(ptr) + 1);
+ buflen = strlen(ptr) + 1;
+ buf = cli_max_malloc(buflen);
if (buf == NULL) {
- cli_errmsg("parseMimeHeader: Unable to allocate memory for buf %llu\n", (long long unsigned)(strlen(ptr) + 1));
+ cli_errmsg("parseMimeHeader: Unable to allocate memory for buf %llu\n", (long long unsigned)buflen);
if (copy)
free(copy);
- return -1;
+ return PARSE_HEADER_ALLOC_FAIL;
}
p = cli_strtokbuf(ptr, 0, ";", buf);
if (p && *p) {
+ const char *disposition_arg;
+
messageSetDispositionType(m, p);
- messageAddArgument(m, cli_strtokbuf(ptr, 1, ";", buf));
+ disposition_arg = nextMimeArgument(ptr, buf, buflen);
+ while (disposition_arg != NULL) {
+ argCnt++;
+ if (haveTooManyMIMEArguments(argCnt, ctx, heuristicFound)) {
+ break;
+ }
+ if (isMimeParameter(buf, "boundary")) {
+ cli_dbgmsg("Ignoring boundary parameter in Content-Disposition header\n");
+ } else {
+ messageAddArgument(m, buf);
+ }
+ disposition_arg = nextMimeArgument(disposition_arg, buf, buflen);
+ }
}
if (!messageHasFilename(m))
/*
@@ -3345,7 +3733,7 @@ rfc822comments(const char *in, char *out)
return NULL;
}
- while (isspace((const unsigned char)*in)) {
+ while (isspace((unsigned char)*in)) {
in++;
}
@@ -3447,7 +3835,7 @@ rfc2047(const char *in)
if (*in == '\0')
break;
encoding = *++in;
- encoding = (char)tolower(encoding);
+ encoding = (char)tolower((unsigned char)encoding);
if ((encoding != 'q') && (encoding != 'b')) {
cli_warnmsg("Unsupported RFC2047 encoding type '%c' - if you believe this file contains a virus, submit it to www.clamav.net\n", encoding);
@@ -3527,12 +3915,240 @@ rfc2047(const char *in)
/*
* Handle partial messages
*/
+static void
+freePartialPartFiles(char **partfiles, unsigned int total_parts)
+{
+ unsigned int n;
+
+ if (partfiles == NULL)
+ return;
+
+ for (n = 1; n <= total_parts; n++)
+ free(partfiles[n]);
+ free(partfiles);
+}
+
+static unsigned int
+partialFilenamePartNumber(const char *filename, const char *md5_hex, unsigned int max_part)
+{
+ const char *idpart;
+ const char *partstr;
+ char *end = NULL;
+ unsigned long parsed;
+ size_t md5_len;
+
+ if ((filename == NULL) || (md5_hex == NULL))
+ return 0;
+
+ idpart = strchr(filename, '_');
+ if (idpart == NULL)
+ return 0;
+ idpart++;
+
+ md5_len = strlen(md5_hex);
+ if ((strncmp(idpart, md5_hex, md5_len) != 0) || (idpart[md5_len] != '-'))
+ return 0;
+
+ partstr = &idpart[md5_len + 1];
+ if (*partstr == '\0')
+ return 0;
+
+ errno = 0;
+ parsed = strtoul(partstr, &end, 10);
+ if ((errno != 0) || (end == partstr) || (*end != '\0') ||
+ (parsed == 0) || (parsed > max_part) || (parsed > UINT_MAX))
+ return 0;
+
+ return (unsigned int)parsed;
+}
+
+static int
+removeOldPartialFile(const char *fullname, time_t now)
+{
+ int test_fd;
+ STATBUF statb;
+
+ test_fd = open(fullname, O_RDONLY | O_BINARY);
+ if (test_fd < 0)
+ return 0;
+
+ if (FSTAT(test_fd, &statb) == 0) {
+ if ((now > statb.st_mtime) &&
+ (now - statb.st_mtime > (time_t)(7 * 24 * 3600))) {
+ close(test_fd);
+ test_fd = -1;
+
+ if (cli_unlink(fullname)) {
+ return -1;
+ }
+ }
+ }
+
+ if (test_fd >= 0)
+ close(test_fd);
+ return 0;
+}
+
+static int
+reassemblePartialMessage(mbox_ctx *mctx, message *m, const char *pdir, char *id, const char *md5_hex, unsigned int total_parts)
+{
+ DIR *dd;
+ struct dirent *dent;
+ char **partfiles = NULL;
+ char outname[PATH_MAX + 1];
+ FILE *fout = NULL;
+ time_t now;
+ unsigned int n;
+ int rc = 0;
+ bool keep_tmp;
+
+ dd = opendir(pdir);
+ if (dd == NULL)
+ return 0;
+
+ partfiles = cli_max_calloc((size_t)total_parts + 1, sizeof(*partfiles));
+ if (partfiles == NULL) {
+ closedir(dd);
+ return -1;
+ }
+
+ keep_tmp = (m->ctx && m->ctx->engine && m->ctx->engine->keeptmp);
+ time(&now);
+
+ while ((dent = readdir(dd)) != NULL) {
+ char fullname[PATH_MAX + 1];
+ unsigned int part;
+ int pathlen;
+
+ if (dent->d_ino == 0)
+ continue;
+
+ if (!strcmp(".", dent->d_name) || !strcmp("..", dent->d_name))
+ continue;
+
+ pathlen = snprintf(fullname, sizeof(fullname), "%s" PATHSEP "%s", pdir, dent->d_name);
+ if ((pathlen < 0) || ((size_t)pathlen >= sizeof(fullname))) {
+ cli_dbgmsg("reassemblePartialMessage: partial path is too long\n");
+ continue;
+ }
+
+ part = partialFilenamePartNumber(dent->d_name, md5_hex, total_parts);
+ if (part == 0) {
+ if (!keep_tmp && (removeOldPartialFile(fullname, now) < 0)) {
+ rc = -1;
+ break;
+ }
+ continue;
+ }
+
+ if (partfiles[part] != NULL)
+ continue;
+
+ partfiles[part] = cli_safer_strdup(fullname);
+ if (partfiles[part] == NULL) {
+ rc = -1;
+ break;
+ }
+ }
+ closedir(dd);
+
+ if (rc != 0)
+ goto done;
+
+ for (n = 1; n <= total_parts; n++) {
+ if (partfiles[n] == NULL) {
+ cli_dbgmsg("reassemblePartialMessage: missing partial message part %u of %u\n", n, total_parts);
+ goto done;
+ }
+ }
+
+ sanitiseName(id);
+
+ {
+ int pathlen = snprintf(outname, sizeof(outname), "%s" PATHSEP "%s", mctx->dir, id);
+
+ if ((pathlen < 0) || ((size_t)pathlen >= sizeof(outname))) {
+ cli_errmsg("reassemblePartialMessage: output filename is too long\n");
+ rc = -1;
+ goto done;
+ }
+ }
+
+ cli_dbgmsg("outname: %s\n", outname);
+
+ fout = fopen(outname, "wb");
+ if (fout == NULL) {
+ cli_errmsg("Can't open '%s' for writing", outname);
+ rc = -1;
+ goto done;
+ }
+
+ for (n = 1; n <= total_parts; n++) {
+ FILE *fin;
+ char buffer[BUFSIZ];
+ int nblanks = 0;
+
+ fin = fopen(partfiles[n], "rb");
+ if (fin == NULL) {
+ cli_errmsg("Can't open '%s' for reading", partfiles[n]);
+ rc = -1;
+ break;
+ }
+
+ while (fgets(buffer, sizeof(buffer) - 1, fin) != NULL) {
+ if (buffer[0] == '\n') {
+ nblanks++;
+ continue;
+ }
+
+ while (nblanks > 0) {
+ if (putc('\n', fout) == EOF) {
+ rc = -1;
+ break;
+ }
+ nblanks--;
+ }
+ if (rc != 0)
+ break;
+
+ if (fputs(buffer, fout) == EOF) {
+ rc = -1;
+ break;
+ }
+ }
+
+ if (ferror(fin))
+ rc = -1;
+ fclose(fin);
+
+ if (rc != 0)
+ break;
+
+ if (!keep_tmp && cli_unlink(partfiles[n])) {
+ rc = -1;
+ break;
+ }
+ }
+
+done:
+ if (fout != NULL) {
+ if ((fclose(fout) != 0) && (rc == 0))
+ rc = -1;
+ if (rc != 0)
+ cli_unlink(outname);
+ }
+ freePartialPartFiles(partfiles, total_parts);
+
+ return rc;
+}
+
static int
rfc1341(mbox_ctx *mctx, message *m)
{
char *arg, *id, *number, *total, *oldfilename;
const char *tmpdir = NULL;
- int n;
+ unsigned int part_number, total_parts = 0;
+ bool have_total = false;
char pdir[PATH_MAX + 1];
unsigned char md5_val[16];
char *md5_hex;
@@ -3554,13 +4170,27 @@ rfc1341(mbox_ctx *mctx, message *m)
tmpdir = cli_gettmpdir();
}
- snprintf(pdir, sizeof(pdir) - 1, "%s" PATHSEP "clamav-partial", tmpdir);
+ {
+ int pathlen = snprintf(pdir, sizeof(pdir), "%s" PATHSEP "clamav-partial", tmpdir);
- if ((mkdir(pdir, S_IRUSR | S_IWUSR) < 0) && (errno != EEXIST)) {
- cli_errmsg("Can't create the directory '%s'\n", pdir);
- free(id);
- return -1;
- } else if (errno == EEXIST) {
+ if ((pathlen < 0) || ((size_t)pathlen >= sizeof(pdir))) {
+ cli_errmsg("Partial directory path is too long\n");
+ free(id);
+ return -1;
+ }
+ }
+
+ if (mkdir(pdir, S_IRWXU) < 0) {
+ int mkdir_errno = errno;
+
+ if (mkdir_errno != EEXIST) {
+ cli_errmsg("Can't create the directory '%s'\n", pdir);
+ free(id);
+ return -1;
+ }
+ }
+
+ {
STATBUF statb;
if (CLAMSTAT(pdir, &statb) < 0) {
@@ -3570,6 +4200,16 @@ rfc1341(mbox_ctx *mctx, message *m)
free(id);
return -1;
}
+ if (!S_ISDIR(statb.st_mode)) {
+ cli_errmsg("Partial path %s is not a directory\n", pdir);
+ free(id);
+ return -1;
+ }
+#if defined(HAVE_UNISTD_H) && !defined(_WIN32) && !defined(_WIN64)
+ if (statb.st_uid != geteuid())
+ cli_warnmsg("Partial directory %s is owned by uid %lu, expected uid %lu\n",
+ pdir, (unsigned long)statb.st_uid, (unsigned long)geteuid());
+#endif
if (statb.st_mode & 077)
cli_warnmsg("Insecure partial directory %s (mode 0%o)\n",
pdir,
@@ -3587,13 +4227,47 @@ rfc1341(mbox_ctx *mctx, message *m)
return -1;
}
+ if (!parsePositiveUnsignedArgument(number, HEURISTIC_EMAIL_MAX_PARTIAL_MESSAGE_PARTS, &part_number)) {
+ cli_warnmsg("Invalid message/partial number '%s'\n", number);
+ free(id);
+ free(number);
+ return -1;
+ }
+
+ total = (char *)messageFindArgument(m, "total");
+ cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?");
+ if (total) {
+ have_total = true;
+ if (!parsePositiveUnsignedArgument(total, HEURISTIC_EMAIL_MAX_PARTIAL_MESSAGE_PARTS, &total_parts) ||
+ (part_number > total_parts)) {
+ cli_warnmsg("Invalid message/partial total '%s' for part '%s'\n", total, number);
+ free(total);
+ free(id);
+ free(number);
+ return -1;
+ }
+ free(total);
+ }
+
oldfilename = messageGetFilename(m);
- arg = cli_max_malloc(10 + strlen(id) + strlen(number));
- if (arg) {
- sprintf(arg, "filename=%s%s", id, number);
- messageAddArgument(m, arg);
- free(arg);
+ {
+ size_t id_len = strlen(id);
+ size_t number_len = strlen(number);
+ size_t arg_len;
+
+ if (id_len > (size_t)-1 - number_len - sizeof("filename=")) {
+ free(id);
+ free(number);
+ return -1;
+ }
+ arg_len = id_len + number_len + sizeof("filename=");
+ arg = cli_max_malloc(arg_len);
+ if (arg) {
+ snprintf(arg, arg_len, "filename=%s%s", id, number);
+ messageAddArgument(m, arg);
+ free(arg);
+ }
}
if (oldfilename) {
@@ -3601,165 +4275,36 @@ rfc1341(mbox_ctx *mctx, message *m)
free(oldfilename);
}
- n = atoi(number);
cl_hash_data("md5", id, strlen(id), md5_val, NULL);
md5_hex = cli_str2hex((const char *)md5_val, 16);
if (!md5_hex) {
free(id);
free(number);
- return CL_EMEM;
+ return -1;
}
- if (messageSavePartial(m, pdir, md5_hex, n) < 0) {
+ if (messageSavePartial(m, pdir, md5_hex, part_number) != CL_SUCCESS) {
free(md5_hex);
free(id);
free(number);
return -1;
}
- total = (char *)messageFindArgument(m, "total");
- cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?");
- if (total) {
- int t = atoi(total);
- DIR *dd = NULL;
-
- free(total);
+ if (have_total) {
/*
* If it's the last one - reassemble it
* FIXME: this assumes that we receive the parts in order
*/
- if ((n == t) && ((dd = opendir(pdir)) != NULL)) {
- FILE *fout;
- char outname[PATH_MAX + 1];
- time_t now;
-
- sanitiseName(id);
-
- snprintf(outname, sizeof(outname) - 1, "%s" PATHSEP "%s", mctx->dir, id);
+ if (part_number == total_parts) {
+ int reassemble_rc = reassemblePartialMessage(mctx, m, pdir, id, md5_hex, total_parts);
- cli_dbgmsg("outname: %s\n", outname);
-
- fout = fopen(outname, "wb");
- if (fout == NULL) {
- cli_errmsg("Can't open '%s' for writing", outname);
- free(id);
+ if (reassemble_rc != 0) {
free(number);
+ free(id);
free(md5_hex);
- closedir(dd);
- return -1;
- }
-
- time(&now);
- for (n = 1; n <= t; n++) {
- char filename[NAME_MAX + 1];
- struct dirent *dent;
-
- snprintf(filename, sizeof(filename), "_%s-%u", md5_hex, n);
-
- while ((dent = readdir(dd))) {
- FILE *fin;
- char buffer[BUFSIZ], fullname[PATH_MAX + 1 + 256 + 1];
- int nblanks;
- STATBUF statb;
- const char *dentry_idpart;
- int test_fd;
-
- if (dent->d_ino == 0)
- continue;
-
- if (!strcmp(".", dent->d_name) ||
- !strcmp("..", dent->d_name))
- continue;
- snprintf(fullname, sizeof(fullname) - 1,
- "%s" PATHSEP "%s", pdir, dent->d_name);
- dentry_idpart = strchr(dent->d_name, '_');
-
- if (!dentry_idpart ||
- strcmp(filename, dentry_idpart) != 0) {
- if (!m->ctx->engine->keeptmp)
- continue;
-
- if ((test_fd = open(fullname, O_RDONLY | O_BINARY)) < 0)
- continue;
-
- if (FSTAT(test_fd, &statb) < 0) {
- close(test_fd);
- continue;
- }
-
- if (now - statb.st_mtime > (time_t)(7 * 24 * 3600)) {
- if (cli_unlink(fullname)) {
- cli_unlink(outname);
- fclose(fout);
- free(md5_hex);
- free(id);
- free(number);
- closedir(dd);
- close(test_fd);
- return -1;
- }
- }
-
- close(test_fd);
- continue;
- }
-
- fin = fopen(fullname, "rb");
- if (fin == NULL) {
- cli_errmsg("Can't open '%s' for reading", fullname);
- fclose(fout);
- cli_unlink(outname);
- free(md5_hex);
- free(id);
- free(number);
- closedir(dd);
- return -1;
- }
- nblanks = 0;
- while (fgets(buffer, sizeof(buffer) - 1, fin) != NULL)
- /*
- * Ensure that trailing newlines
- * aren't copied
- */
- if (buffer[0] == '\n')
- nblanks++;
- else {
- if (nblanks)
- do {
- if (putc('\n', fout) == EOF) break;
- } while (--nblanks > 0);
- if (nblanks || fputs(buffer, fout) == EOF) {
- fclose(fin);
- fclose(fout);
- cli_unlink(outname);
- free(md5_hex);
- free(id);
- free(number);
- closedir(dd);
- return -1;
- }
- }
- fclose(fin);
-
- /* don't unlink if leave temps */
- if (!m->ctx->engine->keeptmp) {
- if (cli_unlink(fullname)) {
- fclose(fout);
- cli_unlink(outname);
- free(md5_hex);
- free(id);
- free(number);
- closedir(dd);
- return -1;
- }
- }
- break;
- }
- rewinddir(dd);
+ return reassemble_rc;
}
- closedir(dd);
- fclose(fout);
}
}
free(number);
@@ -4060,7 +4605,7 @@ isBounceStart(mbox_ctx *mctx, const char *line)
do
if (*line == ' ')
numSpaces++;
- else if (isdigit((*line) & 0xFF))
+ else if (isdigit((unsigned char)*line))
numDigits++;
while (*++line != '\0');
@@ -4139,7 +4684,7 @@ exportBounceMessage(mbox_ctx *mctx, text *start)
if (cli_strtokbuf(txt, 0, ":", cmd) == NULL)
continue;
- switch (tableFind(mctx->rfc821Table, cmd)) {
+ switch (tableFindRfc822Header(mctx->rfc821Table, cmd)) {
case CONTENT_TRANSFER_ENCODING:
if ((strstr(txt, "7bit") == NULL) &&
(strstr(txt, "8bit") == NULL))
@@ -4376,7 +4921,14 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
* Scan in memory, faster but is open to DoS attacks
* when many nested levels are involved.
*/
- body = parseEmailHeaders(aMessage, mctx->rfc821Table);
+ {
+ bool heuristicFound = false;
+
+ body = parseEmailHeaders(aMessage, mctx->rfc821Table, &heuristicFound);
+ if (heuristicFound) {
+ *rc = VIRUS;
+ }
+ }
/*
* We've finished with the
@@ -4392,9 +4944,13 @@ do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, m
if (body) {
messageSetCTX(body, mctx->ctx);
- *rc = parseEmailBody(body, NULL, mctx, recursion_level + 1);
- if ((*rc == OK) && messageContainsVirus(body))
- *rc = VIRUS;
+ if (body->isTruncated) {
+ *rc = FAIL;
+ } else {
+ *rc = parseEmailBody(body, NULL, mctx, recursion_level + 1);
+ if ((*rc == OK) && messageContainsVirus(body))
+ *rc = VIRUS;
+ }
messageDestroy(body);
}
@@ -4525,12 +5081,14 @@ next_is_folded_header(const text *t)
return false;
data = lineGetData(next->t_line);
+ if (data == NULL)
+ return false;
/*
* Section B.2 of RFC822 says TAB or SPACE means a continuation of the
* previous entry.
*/
- if (isblank(data[0]))
+ if (isblank((unsigned char)data[0]))
return true;
if (strchr(data, '=') == NULL)
@@ -4557,6 +5115,8 @@ next_is_folded_header(const text *t)
* verifier we need to handle these
*/
data = lineGetData(t->t_line);
+ if (data == NULL)
+ return false;
ptr = strchr(data, '\0');
diff --git a/libclamav/message.c b/libclamav/message.c
index 91505ec44..33c12f78e 100644
--- a/libclamav/message.c
+++ b/libclamav/message.c
@@ -226,7 +226,7 @@ int messageSetMimeType(message *mess, const char *type)
cli_dbgmsg("messageSetMimeType: '%s'\n", type);
/* Ignore leading spaces */
- while (!isalpha(*type))
+ while (!isalpha((unsigned char)*type))
if (*type++ == '\0')
return 0;
@@ -367,7 +367,7 @@ void messageSetDispositionType(message *m, const char *disptype)
* that something is wrong if we get that - maybe we should force a
* scan of this part
*/
- while (*disptype && isspace((int)*disptype))
+ while (*disptype && isspace((unsigned char)*disptype))
disptype++;
if (*disptype) {
m->mimeDispositionType = cli_safer_strdup(disptype);
@@ -403,7 +403,7 @@ void messageAddArgument(message *m, const char *arg)
if (arg == NULL)
return; /* Note: this is not an error condition */
- while (isspace(*arg))
+ while (isspace((unsigned char)*arg))
arg++;
if (*arg == '\0')
@@ -493,10 +493,10 @@ void messageAddArguments(message *m, const char *s)
while (*string) {
const char *key, *cptr;
- char *data, *field;
+ char *data, *field = NULL;
size_t datasz = 0;
- if (isspace(*string & 0xff) || (*string == ';')) {
+ if (isspace((unsigned char)*string) || (*string == ';')) {
string++;
continue;
}
@@ -538,7 +538,7 @@ void messageAddArguments(message *m, const char *s)
* or tspecials>
* But too many MUAs ignore this
*/
- while (isspace(*string) && (*string != '\0'))
+ while (isspace((unsigned char)*string) && (*string != '\0'))
string++;
cptr = string;
@@ -628,7 +628,7 @@ void messageAddArguments(message *m, const char *s)
* The field is not in quotes, so look for the closing
* white space
*/
- while ((*string != '\0') && !isspace(*string))
+ while ((*string != '\0') && !isspace((unsigned char)*string))
string++;
len = (size_t)string - (size_t)key + 1;
@@ -661,6 +661,53 @@ messageGetArgument(const message *m, size_t arg)
return (m->mimeArguments[arg]) ? m->mimeArguments[arg] : "";
}
+static char *
+messageArgumentValue(const char *ptr, const char *variable)
+{
+ size_t len;
+
+ if ((ptr == NULL) || (*ptr == '\0') || variable == NULL)
+ return NULL;
+
+ len = strlen(variable);
+ if (strncasecmp(ptr, variable, len) == 0) {
+ ptr = &ptr[len];
+ while (isspace((unsigned char)*ptr))
+ ptr++;
+ if (*ptr != '=') {
+ cli_dbgmsg("messageArgumentValue: no '=' sign found in MIME header '%s' (%s)\n", variable, ptr);
+ return NULL;
+ }
+ ptr++;
+ if ((strlen(ptr) > 1) && (*ptr == '"') && (strchr(&ptr[1], '"') != NULL)) {
+ /* Remove any quote characters */
+ char *ret = cli_safer_strdup(++ptr);
+ char *p;
+
+ if (ret == NULL)
+ return NULL;
+
+ /*
+ * fix un-quoting of boundary strings from
+ * header, occurs if boundary was given as
+ * 'boundary="_Test_";'
+ *
+ * At least two quotes in string, assume
+ * quoted argument
+ * end string at next quote
+ */
+ if ((p = strchr(ret, '"')) != NULL) {
+ ret[strlen(ret) - 1] = '\0';
+ *p = '\0';
+ }
+ return ret;
+ }
+ return cli_safer_strdup(ptr);
+ }
+
+ return NULL;
+}
+
/*
* Find a MIME variable from the header and return a COPY to the value of that
* variable. The caller must free the copy
@@ -669,61 +716,60 @@ char *
messageFindArgument(const message *m, const char *variable)
{
size_t i;
- size_t len;
if (m == NULL || variable == NULL) {
cli_errmsg("Internal email parser error: invalid arguments when finding message arguments\n");
return NULL;
}
- len = strlen(variable);
-
for (i = 0; i < m->numberOfArguments; i++) {
const char *ptr;
+ char *ret;
ptr = messageGetArgument(m, i);
if ((ptr == NULL) || (*ptr == '\0'))
continue;
#ifdef CL_DEBUG
- cli_dbgmsg("messageFindArgument: compare %lu bytes of %s with %s\n",
- (unsigned long)len, variable, ptr);
+ cli_dbgmsg("messageFindArgument: compare %s with %s\n", variable, ptr);
#endif
- if (strncasecmp(ptr, variable, len) == 0) {
- ptr = &ptr[len];
- while (isspace(*ptr))
- ptr++;
- if (*ptr != '=') {
- cli_dbgmsg("messageFindArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i));
- return NULL;
- }
- ptr++;
- if ((strlen(ptr) > 1) && (*ptr == '"') && (strchr(&ptr[1], '"') != NULL)) {
- /* Remove any quote characters */
- char *ret = cli_safer_strdup(++ptr);
- char *p;
+ ret = messageArgumentValue(ptr, variable);
+ if (ret)
+ return ret;
+ }
+ return NULL;
+}
- if (ret == NULL)
- return NULL;
+/*
+ * Find the last MIME variable from the header and return a COPY to the value
+ * of that variable. The caller must free the copy.
+ */
+char *
+messageFindArgumentLast(const message *m, const char *variable)
+{
+ size_t i;
+ char *match = NULL;
- /*
- * fix un-quoting of boundary strings from
- * header, occurs if boundary was given as
- * 'boundary="_Test_";'
- *
- * At least two quotes in string, assume
- * quoted argument
- * end string at next quote
- */
- if ((p = strchr(ret, '"')) != NULL) {
- ret[strlen(ret) - 1] = '\0';
- *p = '\0';
- }
- return ret;
- }
- return cli_safer_strdup(ptr);
+ if (m == NULL || variable == NULL) {
+ cli_errmsg("Internal email parser error: invalid arguments when finding message arguments\n");
+ return NULL;
+ }
+
+ for (i = 0; i < m->numberOfArguments; i++) {
+ const char *ptr;
+ char *ret;
+
+ ptr = messageGetArgument(m, i);
+#ifdef CL_DEBUG
+ cli_dbgmsg("messageFindArgumentLast: compare %s with %s\n", variable, ptr);
+#endif
+ ret = messageArgumentValue(ptr, variable);
+ if (ret) {
+ free(match);
+ match = ret;
}
}
- return NULL;
+
+ return match;
}
char *
@@ -763,11 +809,11 @@ messageHasArgument(const message *m, const char *variable)
#endif
if (strncasecmp(ptr, variable, len) == 0) {
ptr = &ptr[len];
- while (isspace(*ptr))
+ while (isspace((unsigned char)*ptr))
ptr++;
if (*ptr != '=') {
cli_dbgmsg("messageHasArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i));
- return 0;
+ continue;
}
return 1;
}
@@ -793,7 +839,7 @@ void messageSetEncoding(message *m, const char *enctype)
/*m->encodingType = EEXTENSION;*/
- while (isblank(*enctype))
+ while (isblank((unsigned char)*enctype))
enctype++;
cli_dbgmsg("messageSetEncoding: '%s'\n", enctype);
@@ -815,9 +861,9 @@ void messageSetEncoding(message *m, const char *enctype)
for (e = encoding_map; e->string; e++) {
int sim;
- const char lowertype = tolower(type[0]);
+ const char lowertype = (char)tolower((unsigned char)type[0]);
- if ((lowertype != tolower(e->string[0])) && (lowertype != 'x'))
+ if ((lowertype != tolower((unsigned char)e->string[0])) && (lowertype != 'x'))
/*
* simil is expensive, I'm yet to encounter only
* one example of a missent encoding when the
@@ -909,31 +955,49 @@ messageGetEncoding(const message *m)
int messageAddLine(message *m, line_t *line)
{
+ line_t *linked = NULL;
+ text *new_node = NULL;
+
if (m == NULL) {
cli_errmsg("Internal email parser error: invalid arguments when adding line to message.\n");
return -1;
}
- if (m->body_first == NULL)
- m->body_last = m->body_first = (text *)malloc(sizeof(text));
- else {
- m->body_last->t_next = (text *)malloc(sizeof(text));
- m->body_last = m->body_last->t_next;
+ if (line && lineGetData(line)) {
+ linked = lineLink(line);
+ if (linked == NULL) {
+ cli_errmsg("messageAddLine: out of memory for linked line\n");
+ return -1;
+ }
}
- if (m->body_last == NULL) {
- cli_errmsg("messageAddLine: out of memory for m->body_last\n");
+ new_node = (text *)malloc(sizeof(text));
+ if (new_node == NULL) {
+ if (linked)
+ lineUnlink(linked);
+ cli_errmsg("messageAddLine: out of memory for new_node\n");
return -1;
}
- m->body_last->t_next = NULL;
+ new_node->t_line = linked;
+ new_node->t_next = NULL;
- if (line && lineGetData(line)) {
- m->body_last->t_line = lineLink(line);
+ if (m->body_first == NULL)
+ m->body_last = m->body_first = new_node;
+ else {
+ if (m->body_last == NULL) {
+ if (linked)
+ lineUnlink(linked);
+ free(new_node);
+ cli_errmsg("Internal email parser error: message 'body_last' pointer should not be NULL if 'body_first' is set.\n");
+ return -1;
+ }
+ m->body_last->t_next = new_node;
+ m->body_last = new_node;
+ }
+ if (linked)
messageIsEncoding(m);
- } else
- m->body_last->t_line = NULL;
return 1;
}
@@ -965,7 +1029,7 @@ int messageAddStr(message *m, const char *data)
const char *p;
for (p = data; *p; p++)
- if (((*p) & 0x80) || !isspace(*p)) {
+ if (((*p) & 0x80) || !isspace((unsigned char)*p)) {
iswhite = 0;
break;
}
@@ -1093,19 +1157,23 @@ int messageMoveText(message *m, text *t, message *old_message)
m->body_last = m->body_first;
rc = 0;
} else {
- m->body_last = m->body_first = textMove(NULL, t);
- if (m->body_first == NULL)
+ int moveStatus = 0;
+
+ m->body_last = m->body_first = textMoveWithStatus(NULL, t, &moveStatus);
+ if ((moveStatus < 0) || (m->body_first == NULL))
return -1;
else
rc = 0;
}
} else {
- m->body_last = textMove(m->body_last, t);
- if (m->body_last == NULL) {
- rc = -1;
- m->body_last = m->body_first;
- } else
- rc = 0;
+ int moveStatus = 0;
+ text *newLast = textMoveWithStatus(m->body_last, t, &moveStatus);
+
+ if (moveStatus < 0)
+ return -1;
+
+ m->body_last = newLast;
+ rc = 0;
}
while (m->body_last->t_next) {
@@ -1644,7 +1712,7 @@ messageToText(message *m)
if (line == NULL)
continue;
- if ((line != NULL) && (strlen(line) > sizeof(data))) {
+ if ((line != NULL) && (strlen(line) >= sizeof(data))) {
cli_errmsg("Internal email parser error: line size greater than size of receiving data buffer\n");
break;
}
@@ -1815,12 +1883,14 @@ decodeLine(message *m, encoding_type et, const char *line, unsigned char *buf, s
bool softbreak;
char *p2, *copy;
char base64buf[RFC2045LENGTH + 1];
+ size_t outleft;
/*cli_dbgmsg("decodeLine(et = %d buflen = %u)\n", (int)et, buflen);*/
- if (NULL == m || NULL == buf) {
+ if (NULL == m || NULL == buf || buflen == 0) {
cli_dbgmsg("decodeLine: invalid parameters\n");
return NULL;
}
+ outleft = buflen - 1; /* reserve room for the NUL terminator */
switch (et) {
case BINARY:
@@ -1831,19 +1901,35 @@ decodeLine(message *m, encoding_type et, const char *line, unsigned char *buf, s
case NOENCODING:
case EIGHTBIT:
default: /* unknown encoding type - try our best */
- if (line) /* empty line? */
- buf = (unsigned char *)cli_strrcpy((char *)buf, line);
+ if (line) { /* empty line? */
+ while (outleft && *line) {
+ *buf++ = *line++;
+ outleft--;
+ }
+ if (*line)
+ cli_dbgmsg("decodeLine: output truncated while copying undecoded line\n");
+ }
/* Put the new line back in */
- return (unsigned char *)cli_strrcpy((char *)buf, "\n");
+ if (outleft) {
+ *buf++ = '\n';
+ } else {
+ cli_dbgmsg("decodeLine: no room to append newline\n");
+ }
+ break;
case QUOTEDPRINTABLE:
if (line == NULL) { /* empty line */
- *buf++ = '\n';
+ if (outleft) {
+ *buf++ = '\n';
+ outleft--;
+ } else {
+ cli_dbgmsg("decodeLine: no room for empty quoted-printable line\n");
+ }
break;
}
softbreak = false;
- while (buflen && *line) {
+ while (outleft && *line) {
if (*line == '=') {
unsigned char byte;
@@ -1861,6 +1947,7 @@ decodeLine(message *m, encoding_type et, const char *line, unsigned char *buf, s
* adhering to RFC2045
*/
*buf++ = byte;
+ --outleft;
break;
}
@@ -1879,11 +1966,15 @@ decodeLine(message *m, encoding_type et, const char *line, unsigned char *buf, s
*buf++ = *line;
}
++line;
- --buflen;
+ --outleft;
}
if (!softbreak) {
/* Put the new line back in */
- *buf++ = '\n';
+ if (outleft) {
+ *buf++ = '\n';
+ } else {
+ cli_dbgmsg("decodeLine: no room to append quoted-printable newline\n");
+ }
}
break;
@@ -1943,7 +2034,7 @@ decodeLine(message *m, encoding_type et, const char *line, unsigned char *buf, s
break;
len = strlen(line);
- if ((len > buflen) || (reallen > len))
+ if ((reallen > outleft) || (reallen > len))
/*
* In practice this should never occur since
* the maximum length of a uuencoded line is
@@ -1962,13 +2053,17 @@ decodeLine(message *m, encoding_type et, const char *line, unsigned char *buf, s
if (strncmp(line, "=yend ", 6) == 0)
break;
- while (*line)
+ while (outleft && *line) {
if (*line == '=') {
if (*++line == '\0')
break;
*buf++ = ((*line++ - 64) & 255);
} else
*buf++ = ((*line++ - 42) & 255);
+ outleft--;
+ }
+ if (*line)
+ cli_dbgmsg("decodeLine: output truncated while decoding yEnc line\n");
break;
}
@@ -2059,9 +2154,9 @@ decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(
return out;
cli_dbgmsg("base64chars = %d (%c %c %c)\n", m->base64chars,
- isalnum(cb1) ? cb1 : '@',
- isalnum(cb2) ? cb2 : '@',
- isalnum(cb3) ? cb3 : '@');
+ isalnum((unsigned char)cb1) ? cb1 : '@',
+ isalnum((unsigned char)cb2) ? cb2 : '@',
+ isalnum((unsigned char)cb3) ? cb3 : '@');
m->base64chars--;
b1 = cb1;
@@ -2170,7 +2265,7 @@ decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(
static unsigned char
hex(char c)
{
- if (isdigit(c))
+ if (isdigit((unsigned char)c))
return c - '0';
if ((c >= 'A') && (c <= 'F'))
return c - 'A' + 10;
@@ -2255,7 +2350,7 @@ messageDedup(message *m)
t1 = m->dedupedThisFar ? m->dedupedThisFar : m->body_first;
- for (t1 = m->body_first; t1; t1 = t1->t_next) {
+ for (; t1; t1 = t1->t_next) {
const char *d1;
text *t2;
line_t *l1;
@@ -2575,7 +2670,7 @@ compare(char *ls1, char **rs1, char *ls2, char **rs2)
if (s1 < end1) {
while (s1 < end1 && s2 < end2) {
- if (tolower(*s1) == tolower(*s2)) {
+ if (tolower((unsigned char)*s1) == tolower((unsigned char)*s2)) {
some_similarity = true;
cs1 = s1;
cs2 = s2;
@@ -2588,7 +2683,7 @@ compare(char *ls1, char **rs1, char *ls2, char **rs2)
s2++;
common++;
}
- while (tolower(*s1) == tolower(*s2));
+ while (tolower((unsigned char)*s1) == tolower((unsigned char)*s2));
if (common > maxchars) {
unsigned int diff = common - maxchars;
@@ -2661,6 +2756,6 @@ int isuuencodebegin(const char *line)
return 0;
return (strncasecmp(line, "begin ", 6) == 0) &&
- isdigit(line[6]) && isdigit(line[7]) &&
- isdigit(line[8]) && (line[9] == ' ');
+ isdigit((unsigned char)line[6]) && isdigit((unsigned char)line[7]) &&
+ isdigit((unsigned char)line[8]) && (line[9] == ' ');
}
diff --git a/libclamav/message.h b/libclamav/message.h
index 1d2f45277..d24fd566d 100644
--- a/libclamav/message.h
+++ b/libclamav/message.h
@@ -64,6 +64,7 @@ const char *messageGetDispositionType(const message *m);
void messageAddArgument(message *m, const char *arg);
void messageAddArguments(message *m, const char *arg);
char *messageFindArgument(const message *m, const char *variable);
+char *messageFindArgumentLast(const message *m, const char *variable);
char *messageGetFilename(const message *m);
int messageHasFilename(const message *m);
void messageSetEncoding(message *m, const char *enctype);
diff --git a/libclamav/text.c b/libclamav/text.c
index 9cad1e279..7fcebd1f5 100644
--- a/libclamav/text.c
+++ b/libclamav/text.c
@@ -104,7 +104,6 @@
#endif
#include <string.h>
#include <ctype.h>
-#include <assert.h>
#include <stdio.h>
#include "clamav.h"
@@ -112,8 +111,8 @@
#include "mbox.h"
-static text *textCopy(const text *t_head);
-static text *textAdd(text *t_head, const text *t);
+static text *textCopyWithStatus(const text *t_head, int *status);
+static text *textAddWithStatus(text *t_head, const text *t, int *status);
static void addToFileblob(const line_t *line, void *arg);
static void getLength(const line_t *line, void *arg);
static void addToBlob(const line_t *line, void *arg);
@@ -134,54 +133,73 @@ void textDestroy(text *t_head)
/* Clone the current object */
static text *
-textCopy(const text *t_head)
+textCopyWithStatus(const text *t_head, int *status)
{
text *first = NULL, *last = NULL;
+ if (status)
+ *status = 0;
+
while (t_head) {
- if (first == NULL)
- last = first = (text *)malloc(sizeof(text));
- else {
- last->t_next = (text *)malloc(sizeof(text));
- last = last->t_next;
- }
+ text *next = (text *)malloc(sizeof(text));
- if (last == NULL) {
+ if (next == NULL) {
cli_errmsg("textCopy: Unable to allocate memory to clone object\n");
if (first)
textDestroy(first);
+ if (status)
+ *status = -1;
return NULL;
}
- last->t_next = NULL;
+ next->t_next = NULL;
- if (t_head->t_line)
- last->t_line = lineLink(t_head->t_line);
- else
- last->t_line = NULL;
+ if (t_head->t_line) {
+ next->t_line = lineLink(t_head->t_line);
+ if (next->t_line == NULL) {
+ cli_errmsg("textCopy: Unable to link line while cloning object\n");
+ free(next);
+ if (first)
+ textDestroy(first);
+ if (status)
+ *status = -1;
+ return NULL;
+ }
+ } else {
+ next->t_line = NULL;
+ }
+
+ if (first == NULL) {
+ first = next;
+ } else {
+ last->t_next = next;
+ }
+ last = next;
t_head = t_head->t_next;
}
- if (first)
- last->t_next = NULL;
-
return first;
}
/* Add a copy of a text to the end of the current object */
static text *
-textAdd(text *t_head, const text *t)
+textAddWithStatus(text *t_head, const text *t, int *status)
{
text *ret;
+ int localStatus = 0;
+ int *copyStatus = status ? status : &localStatus;
int count;
+ *copyStatus = 0;
+
if (t_head == NULL) {
if (t == NULL) {
cli_errmsg("textAdd fails sanity check\n");
+ *copyStatus = -1;
return NULL;
}
- return textCopy(t);
+ return textCopyWithStatus(t, copyStatus);
}
if (t == NULL)
@@ -197,22 +215,12 @@ textAdd(text *t_head, const text *t)
cli_dbgmsg("textAdd: count = %d\n", count);
- while (t) {
- t_head->t_next = (text *)malloc(sizeof(text));
- t_head = t_head->t_next;
-
- assert(t_head != NULL);
-
- if (t->t_line)
- t_head->t_line = lineLink(t->t_line);
- else
- t_head->t_line = NULL;
-
- t = t->t_next;
+ t_head->t_next = textCopyWithStatus(t, copyStatus);
+ if (*copyStatus < 0) {
+ t_head->t_next = NULL;
+ return status ? ret : NULL;
}
- t_head->t_next = NULL;
-
return ret;
}
@@ -220,42 +228,76 @@ textAdd(text *t_head, const text *t)
* Add a message's content to the end of the current object
*/
text *
-textAddMessage(text *aText, message *aMessage)
+textAddMessageWithStatus(text *aText, message *aMessage, int *status)
{
- assert(aMessage != NULL);
+ if (status)
+ *status = 0;
+
+ if (aMessage == NULL) {
+ cli_errmsg("textAddMessage: message is NULL\n");
+ if (status)
+ *status = -1;
+ return aText;
+ }
if (messageGetEncoding(aMessage) == NOENCODING)
- return textAdd(aText, messageGetBody(aMessage));
+ return textAddWithStatus(aText, messageGetBody(aMessage), status);
else {
text *anotherText = messageToText(aMessage);
+ if (anotherText == NULL) {
+ if (status)
+ *status = -1;
+ return aText;
+ }
+
if (aText) {
- text *newHead = textMove(aText, anotherText);
- free(anotherText);
+ int moveStatus = 0;
+ text *newHead = textMoveWithStatus(aText, anotherText, &moveStatus);
+
+ textDestroy(anotherText);
+ if (moveStatus < 0) {
+ if (status)
+ *status = -1;
+ return aText;
+ }
return newHead;
}
return anotherText;
}
}
+text *
+textAddMessage(text *aText, message *aMessage)
+{
+ return textAddMessageWithStatus(aText, aMessage, NULL);
+}
+
/*
* Put the contents of the given text at the end of the current object.
* The given text emptied; it can be used again if needed, though be warned that
* it will have an empty line at the start.
*/
text *
-textMove(text *t_head, text *t)
+textMoveWithStatus(text *t_head, text *t, int *status)
{
text *ret;
+ if (status)
+ *status = 0;
+
if (t_head == NULL) {
if (t == NULL) {
cli_errmsg("textMove fails sanity check\n");
+ if (status)
+ *status = -1;
return NULL;
}
t_head = (text *)malloc(sizeof(text));
if (t_head == NULL) {
cli_errmsg("textMove: Unable to allocate memory for head\n");
+ if (status)
+ *status = -1;
return NULL;
}
t_head->t_line = t->t_line;
@@ -280,12 +322,12 @@ textMove(text *t_head, text *t)
t_head->t_next = (text *)malloc(sizeof(text));
if (t_head->t_next == NULL) {
cli_errmsg("textMove: Unable to allocate memory for head->next\n");
- return NULL;
+ if (status)
+ *status = -1;
+ return ret;
}
t_head = t_head->t_next;
- assert(t_head != NULL);
-
if (t->t_line) {
t_head->t_line = t->t_line;
t->t_line = NULL;
@@ -298,6 +340,12 @@ textMove(text *t_head, text *t)
return ret;
}
+text *
+textMove(text *t_head, text *t)
+{
+ return textMoveWithStatus(t_head, t, NULL);
+}
+
/*
* Transfer the contents of the text into a blob
* The caller must free the returned blob if b is NULL
@@ -364,8 +412,8 @@ textToBlob(text *t, blob *b, int destroy)
fileblob *
textToFileblob(text *t, fileblob *fb, int destroy)
{
- assert(fb != NULL);
- assert(t != NULL);
+ if (t == NULL)
+ return NULL;
if (fb == NULL) {
cli_dbgmsg("textToFileBlob, destroy = %d\n", destroy);
diff --git a/libclamav/text.h b/libclamav/text.h
index efd9e19e2..f6ec2ba56 100644
--- a/libclamav/text.h
+++ b/libclamav/text.h
@@ -54,7 +54,9 @@ typedef struct text {
#include "message.h"
void textDestroy(text *t_head);
+text *textAddMessageWithStatus(text *aText, message *aMessage, int *status);
text *textAddMessage(text *aText, message *aMessage);
+text *textMoveWithStatus(text *t_head, text *t, int *status);
text *textMove(text *t_head, text *t);
blob *textToBlob(text *t, blob *b, int destroy);
fileblob *textToFileblob(text *t, fileblob *fb, int destroy);
diff --git a/unit_tests/check_clamav.c b/unit_tests/check_clamav.c
index d21f511d3..8ca075713 100644
--- a/unit_tests/check_clamav.c
+++ b/unit_tests/check_clamav.c
@@ -536,7 +536,7 @@ END_TEST
static char **testfiles = NULL;
static unsigned testfiles_n = 0;
-static const int expected_testfiles = 53;
+static const int expected_testfiles = 60;
static unsigned skip_files(void)
{
diff --git a/unit_tests/input/CMakeLists.txt b/unit_tests/input/CMakeLists.txt
index cd0ef1296..08c24f507 100644
--- a/unit_tests/input/CMakeLists.txt
+++ b/unit_tests/input/CMakeLists.txt
@@ -29,6 +29,13 @@ set(ENCRYPTED_TESTFILES
clamav_hdb_scanfiles/clam.mail
clamav_hdb_scanfiles/clam.ppt
clamav_hdb_scanfiles/clam.tnef
+ clamav_hdb_scanfiles/clam.mail-multiple-content-type.eml
+ clamav_hdb_scanfiles/clam.mail-comment-header-name.eml
+ clamav_hdb_scanfiles/clam.mail-multiple-boundaries.eml
+ clamav_hdb_scanfiles/clam.mail-boundary-content-disposition.eml
+ clamav_hdb_scanfiles/clam.mail-semicolon-boundary.eml
+ clamav_hdb_scanfiles/clam.mail-nested-comment-header-name.eml
+ clamav_hdb_scanfiles/clam.mail-disposition-rfc2231-boundary.eml
clamav_hdb_scanfiles/clam.ea05.exe
clamav_hdb_scanfiles/clam.ea06.exe
clamav_hdb_scanfiles/clam.d64.zip
diff --git a/unit_tests/input/clamav_hdb_scanfiles/clam.mail-boundary-content-disposition.eml.xor b/unit_tests/input/clamav_hdb_scanfiles/clam.mail-boundary-content-disposition.eml.xor
new file mode 100644
index 000000000..7d878225c
Binary files /dev/null and b/unit_tests/input/clamav_hdb_scanfiles/clam.mail-boundary-content-disposition.eml.xor differ
diff --git a/unit_tests/input/clamav_hdb_scanfiles/clam.mail-comment-header-name.eml.xor b/unit_tests/input/clamav_hdb_scanfiles/clam.mail-comment-header-name.eml.xor
new file mode 100644
index 000000000..cfd60ff51
Binary files /dev/null and b/unit_tests/input/clamav_hdb_scanfiles/clam.mail-comment-header-name.eml.xor differ
diff --git a/unit_tests/input/clamav_hdb_scanfiles/clam.mail-disposition-rfc2231-boundary.eml.xor b/unit_tests/input/clamav_hdb_scanfiles/clam.mail-disposition-rfc2231-boundary.eml.xor
new file mode 100644
index 000000000..1a0509dd4
Binary files /dev/null and b/unit_tests/input/clamav_hdb_scanfiles/clam.mail-disposition-rfc2231-boundary.eml.xor differ
diff --git a/unit_tests/input/clamav_hdb_scanfiles/clam.mail-multiple-boundaries.eml.xor b/unit_tests/input/clamav_hdb_scanfiles/clam.mail-multiple-boundaries.eml.xor
new file mode 100644
index 000000000..5046faa95
Binary files /dev/null and b/unit_tests/input/clamav_hdb_scanfiles/clam.mail-multiple-boundaries.eml.xor differ
diff --git a/unit_tests/input/clamav_hdb_scanfiles/clam.mail-multiple-content-type.eml.xor b/unit_tests/input/clamav_hdb_scanfiles/clam.mail-multiple-content-type.eml.xor
new file mode 100644
index 000000000..61161854b
Binary files /dev/null and b/unit_tests/input/clamav_hdb_scanfiles/clam.mail-multiple-content-type.eml.xor differ
diff --git a/unit_tests/input/clamav_hdb_scanfiles/clam.mail-nested-comment-header-name.eml.xor b/unit_tests/input/clamav_hdb_scanfiles/clam.mail-nested-comment-header-name.eml.xor
new file mode 100644
index 000000000..4ebce7f49
Binary files /dev/null and b/unit_tests/input/clamav_hdb_scanfiles/clam.mail-nested-comment-header-name.eml.xor differ
diff --git a/unit_tests/input/clamav_hdb_scanfiles/clam.mail-semicolon-boundary.eml.xor b/unit_tests/input/clamav_hdb_scanfiles/clam.mail-semicolon-boundary.eml.xor
new file mode 100644
index 000000000..9aab4f848
Binary files /dev/null and b/unit_tests/input/clamav_hdb_scanfiles/clam.mail-semicolon-boundary.eml.xor differ