Commit fa83ece93 for clamav.net
commit fa83ece937d8ba9c0e10d0b43870c1884dceb4e9
Author: John Humlick <15677335+jhumlick@users.noreply.github.com>
Date: Wed Jan 14 05:55:41 2026 -0800
Add support for real-domain-only phishing allow list signatures. (#1607)
Add support for Y-type signatures in wdb files.
There are some cases where it is desired to allow a single domain to
have any displayed address and not count that as phishing. An example of
this would be the domain for outlook URL checker, or Google safe
browsing. If a wdb file contains a Y type entry only the real domain
will be matched, not the real and displayed domain.
CLAM-2426
diff --git a/libclamav/CMakeLists.txt b/libclamav/CMakeLists.txt
index 8e47a3ab5..42d0c9d66 100644
--- a/libclamav/CMakeLists.txt
+++ b/libclamav/CMakeLists.txt
@@ -352,14 +352,14 @@ set(LIBCLAMAV_SOURCES
# mail & phishing
iana_cctld.h
iana_tld.h
- line.c line.h
- mbox.c mbox.h
- message.c message.h
- phish_domaincheck_db.c phish_domaincheck_db.h
- phish_allow_list.c phish_allow_list.h
- phishcheck.c phishcheck.h
- regex_list.c regex_list.h
- regex_suffix.c regex_suffix.h
+ line.c line.h
+ mbox.c mbox.h
+ message.c message.h
+ phish_domaincheck_db.c phish_domaincheck_db.h
+ phish_allow_list.c phish_allow_list.h
+ phishcheck.c phishcheck.h
+ regex_list.c regex_list.h
+ regex_suffix.c regex_suffix.h
# sis
sis.c sis.h
# tnef
diff --git a/libclamav/libclamav.map b/libclamav/libclamav.map
index a2762b605..ffd7ec30a 100644
--- a/libclamav/libclamav.map
+++ b/libclamav/libclamav.map
@@ -308,8 +308,8 @@ CLAMAV_PRIVATE {
cli_infomsg_simple;
cli_set_debug_flag;
fmap_dump_to_file;
- init_allow_list;
- init_domain_list;
+ phish_allow_list_init;
+ phish_protected_domain_init;
lsig_increment_subsig_match;
readdb_parse_ldb_subsignature;
fuzzy_hash_calculate_image;
diff --git a/libclamav/others.h b/libclamav/others.h
index 756990007..a58b8c660 100644
--- a/libclamav/others.h
+++ b/libclamav/others.h
@@ -340,8 +340,8 @@ struct cl_engine {
struct cli_cdb *cdb;
/* Phishing .pdb and .wdb databases*/
- struct regex_matcher *allow_list_matcher;
- struct regex_matcher *domain_list_matcher;
+ struct regex_matcher *phish_allow_list_matcher;
+ struct regex_matcher *phish_protected_domain_matcher;
struct phishcheck *phishcheck;
/* Dynamic configuration */
@@ -575,16 +575,16 @@ extern LIBCLAMAV_EXPORT int have_rar;
/* based on macros from A. Melnikoff */
#define cbswap16(v) (((v & 0xff) << 8) | (((v) >> 8) & 0xff))
-#define cbswap32(v) ((((v) & 0x000000ff) << 24) | (((v) & 0x0000ff00) << 8) | \
- (((v) & 0x00ff0000) >> 8) | (((v) & 0xff000000) >> 24))
-#define cbswap64(v) ((((v) & 0x00000000000000ffULL) << 56) | \
- (((v) & 0x000000000000ff00ULL) << 40) | \
- (((v) & 0x0000000000ff0000ULL) << 24) | \
- (((v) & 0x00000000ff000000ULL) << 8) | \
- (((v) & 0x000000ff00000000ULL) >> 8) | \
- (((v) & 0x0000ff0000000000ULL) >> 24) | \
- (((v) & 0x00ff000000000000ULL) >> 40) | \
- (((v) & 0xff00000000000000ULL) >> 56))
+#define cbswap32(v) ((((v)&0x000000ff) << 24) | (((v)&0x0000ff00) << 8) | \
+ (((v)&0x00ff0000) >> 8) | (((v)&0xff000000) >> 24))
+#define cbswap64(v) ((((v)&0x00000000000000ffULL) << 56) | \
+ (((v)&0x000000000000ff00ULL) << 40) | \
+ (((v)&0x0000000000ff0000ULL) << 24) | \
+ (((v)&0x00000000ff000000ULL) << 8) | \
+ (((v)&0x000000ff00000000ULL) >> 8) | \
+ (((v)&0x0000ff0000000000ULL) >> 24) | \
+ (((v)&0x00ff000000000000ULL) >> 40) | \
+ (((v)&0xff00000000000000ULL) >> 56))
#ifndef HAVE_ATTRIB_PACKED
#define __attribute__(x)
@@ -833,8 +833,8 @@ cl_error_t cli_dispatch_scan_callback(cli_ctx *ctx, cl_scan_callback_t location)
/* used by: spin, yc (C) aCaB */
#define __SHIFTBITS(a) (sizeof(a) << 3)
#define __SHIFTMASK(a) (__SHIFTBITS(a) - 1)
-#define CLI_ROL(a, b) a = (a << ((b) & __SHIFTMASK(a))) | (a >> ((__SHIFTBITS(a) - (b)) & __SHIFTMASK(a)))
-#define CLI_ROR(a, b) a = (a >> ((b) & __SHIFTMASK(a))) | (a << ((__SHIFTBITS(a) - (b)) & __SHIFTMASK(a)))
+#define CLI_ROL(a, b) a = (a << ((b)&__SHIFTMASK(a))) | (a >> ((__SHIFTBITS(a) - (b)) & __SHIFTMASK(a)))
+#define CLI_ROR(a, b) a = (a >> ((b)&__SHIFTMASK(a))) | (a << ((__SHIFTBITS(a) - (b)) & __SHIFTMASK(a)))
/* Implementation independent sign-extended signed right shift */
#ifdef HAVE_SAR
diff --git a/libclamav/others_common.c b/libclamav/others_common.c
index 1458b7d53..0451cb986 100644
--- a/libclamav/others_common.c
+++ b/libclamav/others_common.c
@@ -218,7 +218,7 @@ int cli_matchregex(const char *str, const char *regex)
}
void *cli_max_malloc(size_t size)
{
- void *alloc;
+ void *alloc = NULL;
if (0 == size || size > CLI_MAX_ALLOCATION) {
cli_warnmsg("cli_max_malloc(): File or section is too large to scan (%zu bytes). For your safety, ClamAV limits how much memory an operation can allocate to %d bytes\n",
@@ -239,7 +239,7 @@ void *cli_max_malloc(size_t size)
void *cli_max_calloc(size_t nmemb, size_t size)
{
- void *alloc;
+ void *alloc = NULL;
if (!nmemb || 0 == size || size > CLI_MAX_ALLOCATION || nmemb > CLI_MAX_ALLOCATION || (nmemb * size > CLI_MAX_ALLOCATION)) {
cli_warnmsg("cli_max_calloc(): File or section is too large to scan (%zu bytes). For your safety, ClamAV limits how much memory an operation can allocate to %d bytes\n",
@@ -260,7 +260,7 @@ void *cli_max_calloc(size_t nmemb, size_t size)
void *cli_safer_realloc(void *ptr, size_t size)
{
- void *alloc;
+ void *alloc = NULL;
if (0 == size) {
cli_errmsg("cli_max_realloc(): Attempt to allocate 0 bytes. Please report to https://github.com/Cisco-Talos/clamav/issues\n");
@@ -280,7 +280,7 @@ void *cli_safer_realloc(void *ptr, size_t size)
void *cli_safer_realloc_or_free(void *ptr, size_t size)
{
- void *alloc;
+ void *alloc = NULL;
if (0 == size) {
cli_errmsg("cli_max_realloc_or_free(): Attempt to allocate 0 bytes. Please report to https://github.com/Cisco-Talos/clamav/issues\n");
@@ -306,7 +306,7 @@ void *cli_safer_realloc_or_free(void *ptr, size_t size)
void *cli_max_realloc(void *ptr, size_t size)
{
- void *alloc;
+ void *alloc = NULL;
if (0 == size || size > CLI_MAX_ALLOCATION) {
cli_warnmsg("cli_max_realloc(): File or section is too large to scan (%zu bytes). For your safety, ClamAV limits how much memory an operation can allocate to %d bytes\n",
@@ -327,7 +327,7 @@ void *cli_max_realloc(void *ptr, size_t size)
void *cli_max_realloc_or_free(void *ptr, size_t size)
{
- void *alloc;
+ void *alloc = NULL;
if (0 == size || size > CLI_MAX_ALLOCATION) {
cli_warnmsg("cli_max_realloc_or_free(): File or section is too large to scan (%zu bytes). For your safety, ClamAV limits how much memory an operation can allocate to %d bytes\n",
@@ -354,7 +354,7 @@ void *cli_max_realloc_or_free(void *ptr, size_t size)
char *cli_safer_strdup(const char *s)
{
- char *alloc;
+ char *alloc = NULL;
if (s == NULL) {
cli_errmsg("cli_safer_strdup(): passed reference is NULL, nothing to duplicate\n");
@@ -375,7 +375,7 @@ char *cli_safer_strdup(const char *s)
/* returns converted timestamp, in case of error the returned string contains at least one character */
const char *cli_ctime(const time_t *timep, char *buf, const size_t bufsize)
{
- const char *ret;
+ const char *ret = NULL;
if (bufsize < 26) {
/* standard says we must have at least 26 bytes buffer */
cli_warnmsg("buffer too small for ctime\n");
@@ -431,7 +431,7 @@ size_t cli_readn(int fd, void *buff, size_t count)
{
ssize_t retval;
size_t todo;
- unsigned char *current;
+ unsigned char *current = NULL;
todo = count;
current = (unsigned char *)buff;
@@ -475,7 +475,7 @@ size_t cli_writen(int fd, const void *buff, size_t count)
{
ssize_t retval;
size_t todo;
- const unsigned char *current;
+ const unsigned char *current = NULL;
if (!buff) {
cli_errmsg("cli_writen: invalid NULL buff argument\n");
@@ -825,7 +825,7 @@ done:
static cl_error_t cli_ftw_dir(const char *dirname, int flags, int maxdepth, cli_ftw_cb callback, struct cli_ftw_cbdata *data, cli_ftw_pathchk pathchk)
{
- DIR *dd;
+ DIR *dd = NULL;
struct dirent_data *entries = NULL;
size_t i, entries_cnt = 0;
cl_error_t ret;
@@ -980,7 +980,7 @@ static cl_error_t cli_ftw_dir(const char *dirname, int flags, int maxdepth, cli_
* used */
const char *cli_strerror(int errnum, char *buf, size_t len)
{
- const char *err;
+ const char *err = NULL;
#ifdef CL_THREAD_SAFE
pthread_mutex_lock(&cli_strerror_mutex);
#endif
@@ -996,7 +996,8 @@ const char *cli_strerror(int errnum, char *buf, size_t len)
static char *cli_md5buff(const unsigned char *buffer, unsigned int len, unsigned char *dig)
{
unsigned char digest[16] = {0};
- char *md5str, *pt;
+ char *md5str = NULL;
+ char *pt = NULL;
int i;
cl_hash_data("md5", buffer, len, digest, NULL);
@@ -1183,7 +1184,7 @@ char *cli_genfname(const char *prefix)
char *sanitized_prefix_base = NULL;
char *fname = NULL;
unsigned char salt[16 + 32];
- char *tmp;
+ char *tmp = NULL;
int i;
size_t len;
@@ -1245,8 +1246,8 @@ char *cli_genfname(const char *prefix)
char *cli_newfilepath(const char *dir, const char *fname)
{
- char *fullpath;
- const char *mdir;
+ char *fullpath = NULL;
+ const char *mdir = NULL;
size_t len;
mdir = dir ? dir : cli_gettmpdir();
@@ -1298,9 +1299,9 @@ cl_error_t cli_newfilepathfd(const char *dir, char *fname, char **name, int *fd)
char *cli_gentemp_with_prefix(const char *dir, const char *prefix)
{
- char *fname;
- char *fullpath;
- const char *mdir;
+ char *fname = NULL;
+ char *fullpath = NULL;
+ const char *mdir = NULL;
size_t len;
mdir = dir ? dir : cli_gettmpdir();
diff --git a/libclamav/phish_allow_list.c b/libclamav/phish_allow_list.c
index 9efe7534c..2293ec80c 100644
--- a/libclamav/phish_allow_list.c
+++ b/libclamav/phish_allow_list.c
@@ -42,39 +42,39 @@
#include "mpool.h"
-cl_error_t allow_list_match(const struct cl_engine* engine, char* real_url, const char* display_url, int hostOnly)
+cl_error_t phish_allow_list_match(const struct cl_engine* engine, char* real_url, const char* display_url, int hostOnly, int is_allow_list_lookup)
{
const char* info; /*unused*/
cli_dbgmsg("Phishing: looking up in allow list: %s:%s; host-only:%d\n", real_url, display_url, hostOnly);
- return engine->allow_list_matcher ? regex_list_match(engine->allow_list_matcher, real_url, display_url, NULL, hostOnly, &info, 1) : 0;
+ return engine->phish_allow_list_matcher ? regex_list_match(engine->phish_allow_list_matcher, real_url, display_url, NULL, hostOnly, &info, is_allow_list_lookup) : 0;
}
-cl_error_t init_allow_list(struct cl_engine* engine)
+cl_error_t phish_allow_list_init(struct cl_engine* engine)
{
if (engine) {
- engine->allow_list_matcher = (struct regex_matcher*)MPOOL_MALLOC(engine->mempool, sizeof(struct regex_matcher));
- if (!engine->allow_list_matcher) {
+ engine->phish_allow_list_matcher = (struct regex_matcher*)MPOOL_MALLOC(engine->mempool, sizeof(struct regex_matcher));
+ if (!engine->phish_allow_list_matcher) {
cli_errmsg("Phish_allow_list: Unable to allocate memory for allow_list_match\n");
return CL_EMEM;
}
#ifdef USE_MPOOL
- ((struct regex_matcher*)(engine->allow_list_matcher))->mempool = engine->mempool;
+ ((struct regex_matcher*)(engine->phish_allow_list_matcher))->mempool = engine->mempool;
#endif
- return init_regex_list(engine->allow_list_matcher, engine->dconf->other & OTHER_CONF_PREFILTERING);
+ return init_regex_list(engine->phish_allow_list_matcher, engine->dconf->other & OTHER_CONF_PREFILTERING);
} else
return CL_ENULLARG;
}
-int is_allow_list_ok(const struct cl_engine* engine)
+int phish_is_allow_list_ok(const struct cl_engine* engine)
{
- return (engine && engine->allow_list_matcher) ? is_regex_ok(engine->allow_list_matcher) : 1;
+ return (engine && engine->phish_allow_list_matcher) ? is_regex_ok(engine->phish_allow_list_matcher) : 1;
}
-void allow_list_done(struct cl_engine* engine)
+void phish_allow_list_done(struct cl_engine* engine)
{
- if (engine && engine->allow_list_matcher) {
- regex_list_done(engine->allow_list_matcher);
- MPOOL_FREE(engine->mempool, engine->allow_list_matcher);
- engine->allow_list_matcher = NULL;
+ if (engine && engine->phish_allow_list_matcher) {
+ regex_list_done(engine->phish_allow_list_matcher);
+ MPOOL_FREE(engine->mempool, engine->phish_allow_list_matcher);
+ engine->phish_allow_list_matcher = NULL;
}
}
diff --git a/libclamav/phish_allow_list.h b/libclamav/phish_allow_list.h
index 0064c1b04..86045775c 100644
--- a/libclamav/phish_allow_list.h
+++ b/libclamav/phish_allow_list.h
@@ -26,10 +26,10 @@
#include "clamav.h"
-cl_error_t init_allow_list(struct cl_engine* engine);
-void allow_list_done(struct cl_engine* engine);
+cl_error_t phish_allow_list_init(struct cl_engine* engine);
+void phish_allow_list_done(struct cl_engine* engine);
void allow_list_cleanup(const struct cl_engine* engine);
-int is_allow_list_ok(const struct cl_engine* engine);
-cl_error_t allow_list_match(const struct cl_engine* engine, char* real_url, const char* display_url, int hostOnly);
+int phish_is_allow_list_ok(const struct cl_engine* engine);
+cl_error_t phish_allow_list_match(const struct cl_engine* engine, char* real_url, const char* display_url, int hostOnly, int is_allow_list_lookup);
#endif
diff --git a/libclamav/phish_domaincheck_db.c b/libclamav/phish_domaincheck_db.c
index 5377ac9fa..34c67cef6 100644
--- a/libclamav/phish_domaincheck_db.c
+++ b/libclamav/phish_domaincheck_db.c
@@ -41,38 +41,38 @@
#include "phish_domaincheck_db.h"
#include "regex_list.h"
-int domain_list_match(const struct cl_engine* engine, char* real_url, const char* display_url, const struct pre_fixup_info* pre_fixup, int hostOnly)
+int phish_protected_domain_match(const struct cl_engine* engine, char* real_url, const char* display_url, const struct pre_fixup_info* pre_fixup, int hostOnly)
{
const char* info;
- int rc = engine->domain_list_matcher ? regex_list_match(engine->domain_list_matcher, real_url, display_url, hostOnly ? pre_fixup : NULL, hostOnly, &info, 0) : 0;
+ int rc = engine->phish_protected_domain_matcher ? regex_list_match(engine->phish_protected_domain_matcher, real_url, display_url, hostOnly ? pre_fixup : NULL, hostOnly, &info, 0) : 0;
return rc;
}
-int init_domain_list(struct cl_engine* engine)
+int phish_protected_domain_init(struct cl_engine* engine)
{
if (engine) {
- engine->domain_list_matcher = (struct regex_matcher*)malloc(sizeof(struct regex_matcher));
- if (!engine->domain_list_matcher) {
+ engine->phish_protected_domain_matcher = (struct regex_matcher*)malloc(sizeof(struct regex_matcher));
+ if (!engine->phish_protected_domain_matcher) {
cli_errmsg("Phishcheck: Unable to allocate memory for init_domain_list\n");
return CL_EMEM;
}
#ifdef USE_MPOOL
- ((struct regex_matcher*)engine->domain_list_matcher)->mempool = engine->mempool;
+ ((struct regex_matcher*)engine->phish_protected_domain_matcher)->mempool = engine->mempool;
#endif
- return init_regex_list(engine->domain_list_matcher, engine->dconf->other & OTHER_CONF_PREFILTERING);
+ return init_regex_list(engine->phish_protected_domain_matcher, engine->dconf->other & OTHER_CONF_PREFILTERING);
} else
return CL_ENULLARG;
}
-int is_domain_list_ok(const struct cl_engine* engine)
+int phish_is_protected_domain_ok(const struct cl_engine* engine)
{
- return (engine && engine->domain_list_matcher) ? is_regex_ok(engine->domain_list_matcher) : 1;
+ return (engine && engine->phish_protected_domain_matcher) ? is_regex_ok(engine->phish_protected_domain_matcher) : 1;
}
-void domain_list_done(struct cl_engine* engine)
+void phish_protected_domain_done(struct cl_engine* engine)
{
- if (engine && engine->domain_list_matcher) {
- regex_list_done(engine->domain_list_matcher);
- free(engine->domain_list_matcher);
+ if (engine && engine->phish_protected_domain_matcher) {
+ regex_list_done(engine->phish_protected_domain_matcher);
+ free(engine->phish_protected_domain_matcher);
}
}
diff --git a/libclamav/phish_domaincheck_db.h b/libclamav/phish_domaincheck_db.h
index f5617a6c9..fe84ac70b 100644
--- a/libclamav/phish_domaincheck_db.h
+++ b/libclamav/phish_domaincheck_db.h
@@ -25,10 +25,9 @@
#define _PHISH_DOMAINCHECK_DB_H
#include "clamav.h"
-int init_domain_list(struct cl_engine* engine);
-void domain_list_done(struct cl_engine* engine);
-void domain_list_cleanup(const struct cl_engine* engine);
-int is_domain_list_ok(const struct cl_engine* engine);
-int domain_list_match(const struct cl_engine* engine, char* real_url, const char* display_url, const struct pre_fixup_info* pre_fixup, int hostOnly);
+int phish_protected_domain_init(struct cl_engine* engine);
+void phish_protected_domain_done(struct cl_engine* engine);
+int phish_is_protected_domain_ok(const struct cl_engine* engine);
+int phish_protected_domain_match(const struct cl_engine* engine, char* real_url, const char* display_url, const struct pre_fixup_info* pre_fixup, int hostOnly);
#endif
diff --git a/libclamav/phishcheck.c b/libclamav/phishcheck.c
index a5134ab1a..a1ce4f55d 100644
--- a/libclamav/phishcheck.c
+++ b/libclamav/phishcheck.c
@@ -861,8 +861,8 @@ void phishing_done(struct cl_engine* engine)
if (pchk && !pchk->is_disabled) {
free_regex(&pchk->preg_numeric);
}
- allow_list_done(engine);
- domain_list_done(engine);
+ phish_allow_list_done(engine);
+ phish_protected_domain_done(engine);
if (pchk) {
cli_dbgmsg("Freeing phishcheck struct\n");
MPOOL_FREE(engine->mempool, pchk);
@@ -1139,9 +1139,9 @@ static enum phish_status phishy_map(int phishy, enum phish_status fallback)
return fallback;
}
-static cl_error_t allow_list_check(const struct cl_engine* engine, struct url_check* urls, int hostOnly)
+static cl_error_t allow_list_check(const struct cl_engine* engine, struct url_check* urls, int hostOnly, int is_allow_list_lookup)
{
- return allow_list_match(engine, urls->realLink.data, urls->displayLink.data, hostOnly);
+ return phish_allow_list_match(engine, urls->realLink.data, urls->displayLink.data, hostOnly, is_allow_list_lookup);
}
static cl_error_t hash_match(const struct regex_matcher* rlist,
@@ -1477,7 +1477,7 @@ static enum phish_status phishingCheck(cli_ctx* ctx, struct url_check* urls)
goto done;
}
- if (CL_SUCCESS != (status = url_hash_match(ctx->engine->domain_list_matcher,
+ if (CL_SUCCESS != (status = url_hash_match(ctx->engine->phish_protected_domain_matcher,
urls->realLink.data,
strlen(urls->realLink.data),
&phishing_verdict))) {
@@ -1525,7 +1525,17 @@ static enum phish_status phishingCheck(cli_ctx* ctx, struct url_check* urls)
* Eg:
* X:.+\.benign\.com([/?].*)?:.+\.benign\.de
*/
- if (allow_list_check(ctx->engine, urls, 0)) { /* if url is allowed don't perform further checks */
+ if (allow_list_check(ctx->engine, urls, 0, 1)) { /* if url is allowed don't perform further checks */
+ phishing_verdict = CL_PHISH_CLEAN;
+ goto done;
+ }
+
+ /*
+ * Allow List Y-type WDB signatures: Y:RealHostname
+ * Eg:
+ * Y:^(.+\.)?email\.isbenign\.com$
+ */
+ if (allow_list_check(ctx->engine, urls, 1, 2)) {
phishing_verdict = CL_PHISH_CLEAN;
goto done;
}
@@ -1549,7 +1559,7 @@ static enum phish_status phishingCheck(cli_ctx* ctx, struct url_check* urls)
phishing_verdict = CL_PHISH_CLEAN;
goto done;
}
- if (domain_list_match(ctx->engine, realData, displayData, &urls->pre_fixup, 0)) {
+ if (phish_protected_domain_match(ctx->engine, realData, displayData, &urls->pre_fixup, 0)) {
phishy |= DOMAIN_LISTED;
}
@@ -1578,7 +1588,7 @@ static enum phish_status phishingCheck(cli_ctx* ctx, struct url_check* urls)
* Eg:
* M:email.isbenign.com:benign.com
*/
- if (allow_list_check(ctx->engine, &host_url, 1)) {
+ if (allow_list_check(ctx->engine, &host_url, 1, 1)) {
phishing_verdict = CL_PHISH_CLEAN;
goto done;
}
@@ -1588,7 +1598,7 @@ static enum phish_status phishingCheck(cli_ctx* ctx, struct url_check* urls)
* Eg:
* H:malicious.com
*/
- if (domain_list_match(ctx->engine, host_url.displayLink.data, host_url.realLink.data, &urls->pre_fixup, 1)) {
+ if (phish_protected_domain_match(ctx->engine, host_url.displayLink.data, host_url.realLink.data, &urls->pre_fixup, 1)) {
phishy |= DOMAIN_LISTED;
} else {
urls->flags &= urls->always_check_flags;
diff --git a/libclamav/readdb.c b/libclamav/readdb.c
index 45e00cfe1..910067261 100644
--- a/libclamav/readdb.c
+++ b/libclamav/readdb.c
@@ -1597,13 +1597,13 @@ static int cli_loadwdb(FILE *fs, struct cl_engine *engine, unsigned int options,
if (!(engine->dconf->phishing & PHISHING_CONF_ENGINE))
return CL_SUCCESS;
- if (!engine->allow_list_matcher) {
- if (CL_SUCCESS != (ret = init_allow_list(engine))) {
+ if (!engine->phish_allow_list_matcher) {
+ if (CL_SUCCESS != (ret = phish_allow_list_init(engine))) {
return ret;
}
}
- if (CL_SUCCESS != (ret = load_regex_matcher(engine, engine->allow_list_matcher, fs, NULL, options, 1, dbio, engine->dconf->other & OTHER_CONF_PREFILTERING))) {
+ if (CL_SUCCESS != (ret = load_regex_matcher(engine, engine->phish_allow_list_matcher, fs, NULL, options, 1, dbio, engine->dconf->other & OTHER_CONF_PREFILTERING))) {
return ret;
}
@@ -1617,13 +1617,13 @@ static int cli_loadpdb(FILE *fs, struct cl_engine *engine, unsigned int *signo,
if (!(engine->dconf->phishing & PHISHING_CONF_ENGINE))
return CL_SUCCESS;
- if (!engine->domain_list_matcher) {
- if (CL_SUCCESS != (ret = init_domain_list(engine))) {
+ if (!engine->phish_protected_domain_matcher) {
+ if (CL_SUCCESS != (ret = phish_protected_domain_init(engine))) {
return ret;
}
}
- if (CL_SUCCESS != (ret = load_regex_matcher(engine, engine->domain_list_matcher, fs, signo, options, 0, dbio, engine->dconf->other & OTHER_CONF_PREFILTERING))) {
+ if (CL_SUCCESS != (ret = load_regex_matcher(engine, engine->phish_protected_domain_matcher, fs, signo, options, 0, dbio, engine->dconf->other & OTHER_CONF_PREFILTERING))) {
return ret;
}
@@ -6045,12 +6045,12 @@ cl_error_t cl_engine_compile(struct cl_engine *engine)
hm_flush(engine->hm_fp);
TASK_COMPLETE();
- if ((ret = cli_build_regex_list(engine->allow_list_matcher))) {
+ if ((ret = cli_build_regex_list(engine->phish_allow_list_matcher))) {
return ret;
}
TASK_COMPLETE();
- if ((ret = cli_build_regex_list(engine->domain_list_matcher))) {
+ if ((ret = cli_build_regex_list(engine->phish_protected_domain_matcher))) {
return ret;
}
TASK_COMPLETE();
diff --git a/libclamav/regex/regcomp.c b/libclamav/regex/regcomp.c
index 6fa1dbbaa..27b420c5d 100644
--- a/libclamav/regex/regcomp.c
+++ b/libclamav/regex/regcomp.c
@@ -54,17 +54,17 @@
* other clumsinesses
*/
struct parse {
- const char *next; /* next character in RE */
- const char *end; /* end of string (-> NUL normally) */
- int error; /* has an error been seen? */
- sop *strip; /* malloced strip */
- sopno ssize; /* malloced strip size (allocated) */
- sopno slen; /* malloced strip length (used) */
- int ncsalloc; /* number of csets allocated */
- struct re_guts *g;
-# define NPAREN 10 /* we need to remember () 1-9 for back refs */
- sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
- sopno pend[NPAREN]; /* -> ) ([0] unused) */
+ const char *next; /* next character in RE */
+ const char *end; /* end of string (-> NUL normally) */
+ int error; /* has an error been seen? */
+ sop *strip; /* malloced strip */
+ sopno ssize; /* malloced strip size (allocated) */
+ sopno slen; /* malloced strip length (used) */
+ int ncsalloc; /* number of csets allocated */
+ struct re_guts *g;
+#define NPAREN 10 /* we need to remember () 1-9 for back refs */
+ sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
+ sopno pend[NPAREN]; /* -> ) ([0] unused) */
};
static void p_ere(struct parse *, int);
@@ -100,199 +100,202 @@ static void stripsnug(struct parse *, struct re_guts *);
static void findmust(struct parse *, struct re_guts *);
static sopno pluscount(struct parse *, struct re_guts *);
-static char nuls[10]; /* place to point scanner in event of error */
+static char nuls[10]; /* place to point scanner in event of error */
/*
* macros for use with parse structure
* BEWARE: these know that the parse structure is named `p' !!!
*/
-#define PEEK() (*p->next)
-#define PEEK2() (*(p->next+1))
-#define MORE() (p->end - p->next > 0)
-#define MORE2() (p->end - p->next > 1)
-#define SEE(c) (MORE() && PEEK() == (c))
-#define SEETWO(a, b) (MORE2() && PEEK() == (a) && PEEK2() == (b))
-#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
-#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
-#define NEXT() (p->next++)
-#define NEXT2() (p->next += 2)
-#define NEXTn(n) (p->next += (n))
-#define GETNEXT() (*p->next++)
-#define SETERROR(e) seterr(p, (e))
-#define REQUIRE(co, e) do { if (!(co)) SETERROR(e); } while (0)
-#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
-#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
-#define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
-#define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
-#define HERE() (p->slen)
-#define THERE() (p->slen - 1)
-#define THERETHERE() (p->slen - 2)
-#define DROP(n) (p->slen -= (n))
+#define PEEK() (*p->next)
+#define PEEK2() (*(p->next + 1))
+#define MORE() (p->end - p->next > 0)
+#define MORE2() (p->end - p->next > 1)
+#define SEE(c) (MORE() && PEEK() == (c))
+#define SEETWO(a, b) (MORE2() && PEEK() == (a) && PEEK2() == (b))
+#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
+#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
+#define NEXT() (p->next++)
+#define NEXT2() (p->next += 2)
+#define NEXTn(n) (p->next += (n))
+#define GETNEXT() (*p->next++)
+#define SETERROR(e) seterr(p, (e))
+#define REQUIRE(co, e) \
+ do { \
+ if (!(co)) SETERROR(e); \
+ } while (0)
+#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
+#define INSERT(op, pos) doinsert(p, (sop)(op), HERE() - (pos) + 1, pos)
+#define AHEAD(pos) dofwd(p, pos, HERE() - (pos))
+#define ASTERN(sop, pos) EMIT(sop, HERE() - pos)
+#define HERE() (p->slen)
+#define THERE() (p->slen - 1)
+#define THERETHERE() (p->slen - 2)
+#define DROP(n) (p->slen -= (n))
#ifndef NDEBUG
-static int never = 0; /* for use in asserts; shuts lint up */
+static int never = 0; /* for use in asserts; shuts lint up */
#else
-#define never 0 /* some <assert.h>s have bugs too */
+#define never 0 /* some <assert.h>s have bugs too */
#endif
/*
- regcomp - interface for parser and compilation
*/
-int /* 0 success, otherwise REG_something */
+int /* 0 success, otherwise REG_something */
cli_regcomp_real(regex_t *preg, const char *pattern, int cflags)
{
- struct parse pa;
- struct re_guts *g;
- struct parse *p = &pa;
- int i;
- size_t len;
- size_t maxlen;
+ struct parse pa;
+ struct re_guts *g;
+ struct parse *p = &pa;
+ int i;
+ size_t len;
+ size_t maxlen;
#ifdef REDEBUG
-# define GOODFLAGS(f) (f)
+#define GOODFLAGS(f) (f)
#else
-# define GOODFLAGS(f) ((f)&~REG_DUMP)
+#define GOODFLAGS(f) ((f) & ~REG_DUMP)
#endif
- cflags = GOODFLAGS(cflags);
- if ((cflags®_EXTENDED) && (cflags®_NOSPEC))
- return(REG_INVARG);
-
- if (cflags®_PEND) {
- if (preg->re_endp < pattern)
- return(REG_INVARG);
- len = preg->re_endp - pattern;
- } else
- len = strlen((char *)pattern);
-
- /* do the mallocs early so failure handling is easy */
- g = (struct re_guts *)cli_max_malloc(sizeof(struct re_guts) +
- (NC-1)*sizeof(unsigned char));
- if (g == NULL)
- return(REG_ESPACE);
- /* Patch for bb11264 submitted by the Debian team: */
- /*
- * Limit the pattern space to avoid a 32-bit overflow on buffer
- * extension. Also avoid any signed overflow in case of conversion
- * so make the real limit based on a 31-bit overflow.
- *
- * Likely not applicable on 64-bit systems but handle the case
- * generically (who are we to stop people from using ~715MB+
- * patterns?).
- */
- maxlen = ((size_t)-1 >> 1) / sizeof(sop) * 2 / 3;
- if (len >= maxlen) {
- free((char *)g);
- return(REG_ESPACE);
- }
- p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
- if (p->ssize < len) {
- free((char *)g);
- return(REG_ESPACE);
- }
-
- p->strip = (sop *)cli_max_calloc(p->ssize, sizeof(sop));
- p->slen = 0;
- if (p->strip == NULL) {
- free(g);
- return(REG_ESPACE);
- }
-
- /* set things up */
- p->g = g;
- p->next = pattern;
- p->end = p->next + len;
- p->error = 0;
- p->ncsalloc = 0;
- for (i = 0; i < NPAREN; i++) {
- p->pbegin[i] = 0;
- p->pend[i] = 0;
- }
- g->csetsize = NC;
- g->sets = NULL;
- g->setbits = NULL;
- g->ncsets = 0;
- g->cflags = cflags;
- g->iflags = 0;
- g->nbol = 0;
- g->neol = 0;
- g->must = NULL;
- g->mlen = 0;
- g->nsub = 0;
- g->backrefs = 0;
-
- /* do it */
- EMIT(OEND, 0);
- g->firststate = THERE();
- if (cflags®_EXTENDED)
- p_ere(p, OUT);
- else if (cflags®_NOSPEC)
- p_str(p);
- else
- p_bre(p, OUT, OUT);
- EMIT(OEND, 0);
- g->laststate = THERE();
-
- /* tidy up loose ends and fill things in */
- stripsnug(p, g);
- findmust(p, g);
- g->nplus = pluscount(p, g);
- g->magic = MAGIC2;
- preg->re_nsub = g->nsub;
- preg->re_g = g;
- preg->re_magic = MAGIC1;
+ cflags = GOODFLAGS(cflags);
+ if ((cflags & REG_EXTENDED) && (cflags & REG_NOSPEC))
+ return (REG_INVARG);
+
+ if (cflags & REG_PEND) {
+ if (preg->re_endp < pattern)
+ return (REG_INVARG);
+ len = preg->re_endp - pattern;
+ } else
+ len = strlen((char *)pattern);
+
+ /* do the mallocs early so failure handling is easy */
+ g = (struct re_guts *)cli_max_malloc(sizeof(struct re_guts) +
+ (NC - 1) * sizeof(unsigned char));
+ if (g == NULL)
+ return (REG_ESPACE);
+ /* Patch for bb11264 submitted by the Debian team: */
+ /*
+ * Limit the pattern space to avoid a 32-bit overflow on buffer
+ * extension. Also avoid any signed overflow in case of conversion
+ * so make the real limit based on a 31-bit overflow.
+ *
+ * Likely not applicable on 64-bit systems but handle the case
+ * generically (who are we to stop people from using ~715MB+
+ * patterns?).
+ */
+ maxlen = ((size_t)-1 >> 1) / sizeof(sop) * 2 / 3;
+ if (len >= maxlen) {
+ free((char *)g);
+ return (REG_ESPACE);
+ }
+ p->ssize = len / (size_t)2 * (size_t)3 + (size_t)1; /* ugh */
+ if (p->ssize < len) {
+ free((char *)g);
+ return (REG_ESPACE);
+ }
+
+ p->strip = (sop *)cli_max_calloc(p->ssize, sizeof(sop));
+ p->slen = 0;
+ if (p->strip == NULL) {
+ free(g);
+ return (REG_ESPACE);
+ }
+
+ /* set things up */
+ p->g = g;
+ p->next = pattern;
+ p->end = p->next + len;
+ p->error = 0;
+ p->ncsalloc = 0;
+ for (i = 0; i < NPAREN; i++) {
+ p->pbegin[i] = 0;
+ p->pend[i] = 0;
+ }
+ g->csetsize = NC;
+ g->sets = NULL;
+ g->setbits = NULL;
+ g->ncsets = 0;
+ g->cflags = cflags;
+ g->iflags = 0;
+ g->nbol = 0;
+ g->neol = 0;
+ g->must = NULL;
+ g->mlen = 0;
+ g->nsub = 0;
+ g->backrefs = 0;
+
+ /* do it */
+ EMIT(OEND, 0);
+ g->firststate = THERE();
+ if (cflags & REG_EXTENDED)
+ p_ere(p, OUT);
+ else if (cflags & REG_NOSPEC)
+ p_str(p);
+ else
+ p_bre(p, OUT, OUT);
+ EMIT(OEND, 0);
+ g->laststate = THERE();
+
+ /* tidy up loose ends and fill things in */
+ stripsnug(p, g);
+ findmust(p, g);
+ g->nplus = pluscount(p, g);
+ g->magic = MAGIC2;
+ preg->re_nsub = g->nsub;
+ preg->re_g = g;
+ preg->re_magic = MAGIC1;
#ifndef REDEBUG
- /* not debugging, so can't rely on the assert() in regexec() */
- if (g->iflags®EX_BAD)
- SETERROR(REG_ASSERT);
+ /* not debugging, so can't rely on the assert() in regexec() */
+ if (g->iflags & REGEX_BAD)
+ SETERROR(REG_ASSERT);
#endif
- /* win or lose, we're done */
- if (p->error != 0) /* lose */
- cli_regfree(preg);
- return(p->error);
+ /* win or lose, we're done */
+ if (p->error != 0) /* lose */
+ cli_regfree(preg);
+ return (p->error);
}
/*
- p_ere - ERE parser top level, concatenation and alternation
*/
static void
-p_ere(struct parse *p, int stop) /* character this ERE should end at */
+p_ere(struct parse *p, int stop) /* character this ERE should end at */
{
- char c;
- sopno prevback = 0;
- sopno prevfwd = 0;
- sopno conc;
- int first = 1; /* is this the first alternative? */
-
- for (;;) {
- /* do a bunch of concatenated expressions */
- conc = HERE();
- while (MORE() && (c = PEEK()) != '|' && c != stop)
- p_ere_exp(p);
- REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
-
- if (!EAT('|'))
- break; /* NOTE BREAK OUT */
-
- if (first) {
- INSERT(OCH_, conc); /* offset is wrong */
- prevfwd = conc;
- prevback = conc;
- first = 0;
- }
- ASTERN(OOR1, prevback);
- prevback = THERE();
- AHEAD(prevfwd); /* fix previous offset */
- prevfwd = HERE();
- EMIT(OOR2, 0); /* offset is very wrong */
- }
-
- if (!first) { /* tail-end fixups */
- AHEAD(prevfwd);
- ASTERN(O_CH, prevback);
- }
-
- assert(!MORE() || SEE(stop));
+ char c;
+ sopno prevback = 0;
+ sopno prevfwd = 0;
+ sopno conc;
+ int first = 1; /* is this the first alternative? */
+
+ for (;;) {
+ /* do a bunch of concatenated expressions */
+ conc = HERE();
+ while (MORE() && (c = PEEK()) != '|' && c != stop)
+ p_ere_exp(p);
+ REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
+
+ if (!EAT('|'))
+ break; /* NOTE BREAK OUT */
+
+ if (first) {
+ INSERT(OCH_, conc); /* offset is wrong */
+ prevfwd = conc;
+ prevback = conc;
+ first = 0;
+ }
+ ASTERN(OOR1, prevback);
+ prevback = THERE();
+ AHEAD(prevfwd); /* fix previous offset */
+ prevfwd = HERE();
+ EMIT(OOR2, 0); /* offset is very wrong */
+ }
+
+ if (!first) { /* tail-end fixups */
+ AHEAD(prevfwd);
+ ASTERN(O_CH, prevback);
+ }
+
+ assert(!MORE() || SEE(stop));
}
/*
@@ -301,135 +304,135 @@ p_ere(struct parse *p, int stop) /* character this ERE should end at */
static void
p_ere_exp(struct parse *p)
{
- char c;
- sopno pos;
- int count;
- int count2;
- sopno subno;
- int wascaret = 0;
-
- assert(MORE()); /* caller should have ensured this */
- c = GETNEXT();
-
- pos = HERE();
- switch (c) {
- case '(':
- REQUIRE(MORE(), REG_EPAREN);
- p->g->nsub++;
- subno = p->g->nsub;
- if (subno < NPAREN)
- p->pbegin[subno] = HERE();
- EMIT(OLPAREN, subno);
- if (!SEE(')'))
- p_ere(p, ')');
- if (subno < NPAREN) {
- p->pend[subno] = HERE();
- assert(p->pend[subno] != 0);
- }
- EMIT(ORPAREN, subno);
- REQUIRE(MORE() && GETNEXT() == ')', REG_EPAREN);
- break;
- case '^':
- EMIT(OBOL, 0);
- p->g->iflags |= USEBOL;
- p->g->nbol++;
- wascaret = 1;
- break;
- case '$':
- EMIT(OEOL, 0);
- p->g->iflags |= USEEOL;
- p->g->neol++;
- break;
- case '|':
- SETERROR(REG_EMPTY);
- break;
- case '*':
- case '+':
- case '?':
- SETERROR(REG_BADRPT);
- break;
- case '.':
- if (p->g->cflags®_NEWLINE)
- nonnewline(p);
- else
- EMIT(OANY, 0);
- break;
- case '[':
- p_bracket(p);
- break;
- case '\\':
- REQUIRE(MORE(), REG_EESCAPE);
- c = GETNEXT();
- backslash(p, c);
- break;
- case '{': /* okay as ordinary except if digit follows */
- REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
- /* FALLTHROUGH */
- default:
- if (p->error != 0)
- return;
- ordinary(p, c);
- break;
- }
-
- if (!MORE())
- return;
- c = PEEK();
- /* we call { a repetition if followed by a digit */
- if (!( c == '*' || c == '+' || c == '?' ||
- (c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
- return; /* no repetition, we're done */
- NEXT();
-
- REQUIRE(!wascaret, REG_BADRPT);
- switch (c) {
- case '*': /* implemented as +? */
- /* this case does not require the (y|) trick, noKLUDGE */
- INSERT(OPLUS_, pos);
- ASTERN(O_PLUS, pos);
- INSERT(OQUEST_, pos);
- ASTERN(O_QUEST, pos);
- break;
- case '+':
- INSERT(OPLUS_, pos);
- ASTERN(O_PLUS, pos);
- break;
- case '?':
- /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
- INSERT(OCH_, pos); /* offset slightly wrong */
- ASTERN(OOR1, pos); /* this one's right */
- AHEAD(pos); /* fix the OCH_ */
- EMIT(OOR2, 0); /* offset very wrong... */
- AHEAD(THERE()); /* ...so fix it */
- ASTERN(O_CH, THERETHERE());
- break;
- case '{':
- count = p_count(p);
- if (EAT(',')) {
- if (isdigit((uch)PEEK())) {
- count2 = p_count(p);
- REQUIRE(count <= count2, REG_BADBR);
- } else /* single number with comma */
- count2 = REGEX_INFINITY;
- } else /* just a single number */
- count2 = count;
- repeat(p, pos, count, count2);
- if (!EAT('}')) { /* error heuristics */
- while (MORE() && PEEK() != '}')
- NEXT();
- REQUIRE(MORE(), REG_EBRACE);
- SETERROR(REG_BADBR);
- }
- break;
- }
-
- if (!MORE())
- return;
- c = PEEK();
- if (!( c == '*' || c == '+' || c == '?' ||
- (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
- return;
- SETERROR(REG_BADRPT);
+ char c;
+ sopno pos;
+ int count;
+ int count2;
+ sopno subno;
+ int wascaret = 0;
+
+ assert(MORE()); /* caller should have ensured this */
+ c = GETNEXT();
+
+ pos = HERE();
+ switch (c) {
+ case '(':
+ REQUIRE(MORE(), REG_EPAREN);
+ p->g->nsub++;
+ subno = p->g->nsub;
+ if (subno < NPAREN)
+ p->pbegin[subno] = HERE();
+ EMIT(OLPAREN, subno);
+ if (!SEE(')'))
+ p_ere(p, ')');
+ if (subno < NPAREN) {
+ p->pend[subno] = HERE();
+ assert(p->pend[subno] != 0);
+ }
+ EMIT(ORPAREN, subno);
+ REQUIRE(MORE() && GETNEXT() == ')', REG_EPAREN);
+ break;
+ case '^':
+ EMIT(OBOL, 0);
+ p->g->iflags |= USEBOL;
+ p->g->nbol++;
+ wascaret = 1;
+ break;
+ case '$':
+ EMIT(OEOL, 0);
+ p->g->iflags |= USEEOL;
+ p->g->neol++;
+ break;
+ case '|':
+ SETERROR(REG_EMPTY);
+ break;
+ case '*':
+ case '+':
+ case '?':
+ SETERROR(REG_BADRPT);
+ break;
+ case '.':
+ if (p->g->cflags & REG_NEWLINE)
+ nonnewline(p);
+ else
+ EMIT(OANY, 0);
+ break;
+ case '[':
+ p_bracket(p);
+ break;
+ case '\\':
+ REQUIRE(MORE(), REG_EESCAPE);
+ c = GETNEXT();
+ backslash(p, c);
+ break;
+ case '{': /* okay as ordinary except if digit follows */
+ REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
+ /* FALLTHROUGH */
+ default:
+ if (p->error != 0)
+ return;
+ ordinary(p, c);
+ break;
+ }
+
+ if (!MORE())
+ return;
+ c = PEEK();
+ /* we call { a repetition if followed by a digit */
+ if (!(c == '*' || c == '+' || c == '?' ||
+ (c == '{' && MORE2() && isdigit((uch)PEEK2()))))
+ return; /* no repetition, we're done */
+ NEXT();
+
+ REQUIRE(!wascaret, REG_BADRPT);
+ switch (c) {
+ case '*': /* implemented as +? */
+ /* this case does not require the (y|) trick, noKLUDGE */
+ INSERT(OPLUS_, pos);
+ ASTERN(O_PLUS, pos);
+ INSERT(OQUEST_, pos);
+ ASTERN(O_QUEST, pos);
+ break;
+ case '+':
+ INSERT(OPLUS_, pos);
+ ASTERN(O_PLUS, pos);
+ break;
+ case '?':
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+ INSERT(OCH_, pos); /* offset slightly wrong */
+ ASTERN(OOR1, pos); /* this one's right */
+ AHEAD(pos); /* fix the OCH_ */
+ EMIT(OOR2, 0); /* offset very wrong... */
+ AHEAD(THERE()); /* ...so fix it */
+ ASTERN(O_CH, THERETHERE());
+ break;
+ case '{':
+ count = p_count(p);
+ if (EAT(',')) {
+ if (isdigit((uch)PEEK())) {
+ count2 = p_count(p);
+ REQUIRE(count <= count2, REG_BADBR);
+ } else /* single number with comma */
+ count2 = REGEX_INFINITY;
+ } else /* just a single number */
+ count2 = count;
+ repeat(p, pos, count, count2);
+ if (!EAT('}')) { /* error heuristics */
+ while (MORE() && PEEK() != '}')
+ NEXT();
+ REQUIRE(MORE(), REG_EBRACE);
+ SETERROR(REG_BADBR);
+ }
+ break;
+ }
+
+ if (!MORE())
+ return;
+ c = PEEK();
+ if (!(c == '*' || c == '+' || c == '?' ||
+ (c == '{' && MORE2() && isdigit((uch)PEEK2()))))
+ return;
+ SETERROR(REG_BADRPT);
}
/*
@@ -438,9 +441,9 @@ p_ere_exp(struct parse *p)
static void
p_str(struct parse *p)
{
- REQUIRE(MORE(), REG_EMPTY);
- while (MORE())
- ordinary(p, GETNEXT());
+ REQUIRE(MORE(), REG_EMPTY);
+ while (MORE())
+ ordinary(p, GETNEXT());
}
/*
@@ -455,172 +458,172 @@ p_str(struct parse *p)
*/
static void
p_bre(struct parse *p,
- int end1, /* first terminating character */
- int end2) /* second terminating character */
+ int end1, /* first terminating character */
+ int end2) /* second terminating character */
{
- sopno start = HERE();
- int first = 1; /* first subexpression? */
- int wasdollar = 0;
-
- if (EAT('^')) {
- EMIT(OBOL, 0);
- p->g->iflags |= USEBOL;
- p->g->nbol++;
- }
- while (MORE() && !SEETWO(end1, end2)) {
- wasdollar = p_simp_re(p, first);
- first = 0;
- }
- if (wasdollar) { /* oops, that was a trailing anchor */
- DROP(1);
- EMIT(OEOL, 0);
- p->g->iflags |= USEEOL;
- p->g->neol++;
- }
-
- REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
+ sopno start = HERE();
+ int first = 1; /* first subexpression? */
+ int wasdollar = 0;
+
+ if (EAT('^')) {
+ EMIT(OBOL, 0);
+ p->g->iflags |= USEBOL;
+ p->g->nbol++;
+ }
+ while (MORE() && !SEETWO(end1, end2)) {
+ wasdollar = p_simp_re(p, first);
+ first = 0;
+ }
+ if (wasdollar) { /* oops, that was a trailing anchor */
+ DROP(1);
+ EMIT(OEOL, 0);
+ p->g->iflags |= USEEOL;
+ p->g->neol++;
+ }
+
+ REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
}
/*
- p_simp_re - parse a simple RE, an atom possibly followed by a repetition
*/
-static int /* was the simple RE an unbackslashed $? */
+static int /* was the simple RE an unbackslashed $? */
p_simp_re(struct parse *p,
- int starordinary) /* is a leading * an ordinary character? */
+ int starordinary) /* is a leading * an ordinary character? */
{
- int c;
- int count;
- int count2;
- sopno pos;
- int i;
- sopno subno;
-# define BACKSL (1<<CHAR_BIT)
-
- pos = HERE(); /* repetition op, if any, covers from here */
-
- assert(MORE()); /* caller should have ensured this */
- c = GETNEXT();
- if (c == '\\') {
- REQUIRE(MORE(), REG_EESCAPE);
- c = BACKSL | GETNEXT();
- }
- switch (c) {
- case '.':
- if (p->g->cflags®_NEWLINE)
- nonnewline(p);
- else
- EMIT(OANY, 0);
- break;
- case '[':
- p_bracket(p);
- break;
- case BACKSL|'<':
- EMIT(OBOW, 0);
- break;
- case BACKSL|'>':
- EMIT(OEOW, 0);
- break;
- case BACKSL|'{':
- SETERROR(REG_BADRPT);
- break;
- case BACKSL|'(':
- p->g->nsub++;
- subno = p->g->nsub;
- if (subno < NPAREN)
- p->pbegin[subno] = HERE();
- EMIT(OLPAREN, subno);
- /* the MORE here is an error heuristic */
- if (MORE() && !SEETWO('\\', ')'))
- p_bre(p, '\\', ')');
- if (subno < NPAREN) {
- p->pend[subno] = HERE();
- assert(p->pend[subno] != 0);
- }
- EMIT(ORPAREN, subno);
- REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
- break;
- case BACKSL|')': /* should not get here -- must be user */
- case BACKSL|'}':
- SETERROR(REG_EPAREN);
- break;
- case BACKSL|'1':
- case BACKSL|'2':
- case BACKSL|'3':
- case BACKSL|'4':
- case BACKSL|'5':
- case BACKSL|'6':
- case BACKSL|'7':
- case BACKSL|'8':
- case BACKSL|'9':
- i = (c&~BACKSL) - '0';
- assert(i < NPAREN);
- if (p->pend[i] != 0) {
- assert(i <= p->g->nsub);
- EMIT(OBACK_, i);
- assert(p->pbegin[i] != 0);
- assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
- assert(OP(p->strip[p->pend[i]]) == ORPAREN);
- (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
- EMIT(O_BACK, i);
- } else
- SETERROR(REG_ESUBREG);
- p->g->backrefs = 1;
- break;
- case '*':
- REQUIRE(starordinary, REG_BADRPT);
- /* FALLTHROUGH */
- default:
- if (p->error != 0)
- return(0); /* Definitely not $... */
- ordinary(p, (char)c);
- break;
- }
-
- if (EAT('*')) { /* implemented as +? */
- /* this case does not require the (y|) trick, noKLUDGE */
- INSERT(OPLUS_, pos);
- ASTERN(O_PLUS, pos);
- INSERT(OQUEST_, pos);
- ASTERN(O_QUEST, pos);
- } else if (EATTWO('\\', '{')) {
- count = p_count(p);
- if (EAT(',')) {
- if (MORE() && isdigit((uch)PEEK())) {
- count2 = p_count(p);
- REQUIRE(count <= count2, REG_BADBR);
- } else /* single number with comma */
- count2 = REGEX_INFINITY;
- } else /* just a single number */
- count2 = count;
- repeat(p, pos, count, count2);
- if (!EATTWO('\\', '}')) { /* error heuristics */
- while (MORE() && !SEETWO('\\', '}'))
- NEXT();
- REQUIRE(MORE(), REG_EBRACE);
- SETERROR(REG_BADBR);
- }
- } else if (c == '$') /* $ (but not \$) ends it */
- return(1);
-
- return(0);
+ int c;
+ int count;
+ int count2;
+ sopno pos;
+ int i;
+ sopno subno;
+#define BACKSL (1 << CHAR_BIT)
+
+ pos = HERE(); /* repetition op, if any, covers from here */
+
+ assert(MORE()); /* caller should have ensured this */
+ c = GETNEXT();
+ if (c == '\\') {
+ REQUIRE(MORE(), REG_EESCAPE);
+ c = BACKSL | GETNEXT();
+ }
+ switch (c) {
+ case '.':
+ if (p->g->cflags & REG_NEWLINE)
+ nonnewline(p);
+ else
+ EMIT(OANY, 0);
+ break;
+ case '[':
+ p_bracket(p);
+ break;
+ case BACKSL | '<':
+ EMIT(OBOW, 0);
+ break;
+ case BACKSL | '>':
+ EMIT(OEOW, 0);
+ break;
+ case BACKSL | '{':
+ SETERROR(REG_BADRPT);
+ break;
+ case BACKSL | '(':
+ p->g->nsub++;
+ subno = p->g->nsub;
+ if (subno < NPAREN)
+ p->pbegin[subno] = HERE();
+ EMIT(OLPAREN, subno);
+ /* the MORE here is an error heuristic */
+ if (MORE() && !SEETWO('\\', ')'))
+ p_bre(p, '\\', ')');
+ if (subno < NPAREN) {
+ p->pend[subno] = HERE();
+ assert(p->pend[subno] != 0);
+ }
+ EMIT(ORPAREN, subno);
+ REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
+ break;
+ case BACKSL | ')': /* should not get here -- must be user */
+ case BACKSL | '}':
+ SETERROR(REG_EPAREN);
+ break;
+ case BACKSL | '1':
+ case BACKSL | '2':
+ case BACKSL | '3':
+ case BACKSL | '4':
+ case BACKSL | '5':
+ case BACKSL | '6':
+ case BACKSL | '7':
+ case BACKSL | '8':
+ case BACKSL | '9':
+ i = (c & ~BACKSL) - '0';
+ assert(i < NPAREN);
+ if (p->pend[i] != 0) {
+ assert(i <= p->g->nsub);
+ EMIT(OBACK_, i);
+ assert(p->pbegin[i] != 0);
+ assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
+ assert(OP(p->strip[p->pend[i]]) == ORPAREN);
+ (void)dupl(p, p->pbegin[i] + 1, p->pend[i]);
+ EMIT(O_BACK, i);
+ } else
+ SETERROR(REG_ESUBREG);
+ p->g->backrefs = 1;
+ break;
+ case '*':
+ REQUIRE(starordinary, REG_BADRPT);
+ /* FALLTHROUGH */
+ default:
+ if (p->error != 0)
+ return (0); /* Definitely not $... */
+ ordinary(p, (char)c);
+ break;
+ }
+
+ if (EAT('*')) { /* implemented as +? */
+ /* this case does not require the (y|) trick, noKLUDGE */
+ INSERT(OPLUS_, pos);
+ ASTERN(O_PLUS, pos);
+ INSERT(OQUEST_, pos);
+ ASTERN(O_QUEST, pos);
+ } else if (EATTWO('\\', '{')) {
+ count = p_count(p);
+ if (EAT(',')) {
+ if (MORE() && isdigit((uch)PEEK())) {
+ count2 = p_count(p);
+ REQUIRE(count <= count2, REG_BADBR);
+ } else /* single number with comma */
+ count2 = REGEX_INFINITY;
+ } else /* just a single number */
+ count2 = count;
+ repeat(p, pos, count, count2);
+ if (!EATTWO('\\', '}')) { /* error heuristics */
+ while (MORE() && !SEETWO('\\', '}'))
+ NEXT();
+ REQUIRE(MORE(), REG_EBRACE);
+ SETERROR(REG_BADBR);
+ }
+ } else if (c == '$') /* $ (but not \$) ends it */
+ return (1);
+
+ return (0);
}
/*
- p_count - parse a repetition count
*/
-static int /* the value */
+static int /* the value */
p_count(struct parse *p)
{
- int count = 0;
- int ndigits = 0;
+ int count = 0;
+ int ndigits = 0;
- while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
- count = count*10 + (GETNEXT() - '0');
- ndigits++;
- }
+ while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
+ count = count * 10 + (GETNEXT() - '0');
+ ndigits++;
+ }
- REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
- return(count);
+ REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
+ return (count);
}
/*
@@ -632,73 +635,73 @@ p_count(struct parse *p)
static void
p_bracket(struct parse *p)
{
- cset *cs;
- int invert = 0;
-
- /* Dept of Truly Sickening Special-Case Kludges */
- if (p->end - p->next > 5) {
- if (strncmp(p->next, "[:<:]]", 6) == 0) {
- EMIT(OBOW, 0);
- NEXTn(6);
- return;
- }
- if (strncmp(p->next, "[:>:]]", 6) == 0) {
- EMIT(OEOW, 0);
- NEXTn(6);
- return;
- }
- }
-
- if ((cs = allocset(p)) == NULL) {
- /* allocset did set error status in p */
- return;
- }
-
- if (EAT('^'))
- invert++; /* make note to invert set at end */
- if (EAT(']'))
- CHadd(cs, ']');
- else if (EAT('-'))
- CHadd(cs, '-');
- while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
- p_b_term(p, cs);
- if (EAT('-'))
- CHadd(cs, '-');
- REQUIRE(MORE() && GETNEXT() == ']', REG_EBRACK);
-
- if (p->error != 0) { /* don't mess things up further */
- freeset(p, cs);
- return;
- }
-
- if (p->g->cflags®_ICASE) {
- int i;
- int ci;
-
- for (i = p->g->csetsize - 1; i >= 0; i--)
- if (CHIN(cs, i) && isalpha(i)) {
- ci = othercase(i);
- if (ci != i)
- CHadd(cs, ci);
- }
- }
- if (invert) {
- int i;
-
- for (i = p->g->csetsize - 1; i >= 0; i--)
- if (CHIN(cs, i))
- CHsub(cs, i);
- else
- CHadd(cs, i);
- if (p->g->cflags®_NEWLINE)
- CHsub(cs, '\n');
- }
-
- if (nch(p, cs) == 1) { /* optimize singleton sets */
- ordinary(p, firstch(p, cs));
- freeset(p, cs);
- } else
- EMIT(OANYOF, freezeset(p, cs));
+ cset *cs;
+ int invert = 0;
+
+ /* Dept of Truly Sickening Special-Case Kludges */
+ if (p->end - p->next > 5) {
+ if (strncmp(p->next, "[:<:]]", 6) == 0) {
+ EMIT(OBOW, 0);
+ NEXTn(6);
+ return;
+ }
+ if (strncmp(p->next, "[:>:]]", 6) == 0) {
+ EMIT(OEOW, 0);
+ NEXTn(6);
+ return;
+ }
+ }
+
+ if ((cs = allocset(p)) == NULL) {
+ /* allocset did set error status in p */
+ return;
+ }
+
+ if (EAT('^'))
+ invert++; /* make note to invert set at end */
+ if (EAT(']'))
+ CHadd(cs, ']');
+ else if (EAT('-'))
+ CHadd(cs, '-');
+ while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
+ p_b_term(p, cs);
+ if (EAT('-'))
+ CHadd(cs, '-');
+ REQUIRE(MORE() && GETNEXT() == ']', REG_EBRACK);
+
+ if (p->error != 0) { /* don't mess things up further */
+ freeset(p, cs);
+ return;
+ }
+
+ if (p->g->cflags & REG_ICASE) {
+ int i;
+ int ci;
+
+ for (i = p->g->csetsize - 1; i >= 0; i--)
+ if (CHIN(cs, i) && isalpha(i)) {
+ ci = othercase(i);
+ if (ci != i)
+ CHadd(cs, ci);
+ }
+ }
+ if (invert) {
+ int i;
+
+ for (i = p->g->csetsize - 1; i >= 0; i--)
+ if (CHIN(cs, i))
+ CHsub(cs, i);
+ else
+ CHadd(cs, i);
+ if (p->g->cflags & REG_NEWLINE)
+ CHsub(cs, '\n');
+ }
+
+ if (nch(p, cs) == 1) { /* optimize singleton sets */
+ ordinary(p, firstch(p, cs));
+ freeset(p, cs);
+ } else
+ EMIT(OANYOF, freezeset(p, cs));
}
/*
@@ -707,61 +710,61 @@ p_bracket(struct parse *p)
static void
p_b_term(struct parse *p, cset *cs)
{
- char c;
- char start, finish;
- int i;
-
- /* classify what we've got */
- switch ((MORE()) ? PEEK() : '\0') {
- case '[':
- c = (MORE2()) ? PEEK2() : '\0';
- break;
- case '-':
- SETERROR(REG_ERANGE);
- return; /* NOTE RETURN */
- break;
- default:
- c = '\0';
- break;
- }
-
- switch (c) {
- case ':': /* character class */
- NEXT2();
- REQUIRE(MORE(), REG_EBRACK);
- c = PEEK();
- REQUIRE(c != '-' && c != ']', REG_ECTYPE);
- p_b_cclass(p, cs);
- REQUIRE(MORE(), REG_EBRACK);
- REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
- break;
- case '=': /* equivalence class */
- NEXT2();
- REQUIRE(MORE(), REG_EBRACK);
- c = PEEK();
- REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
- p_b_eclass(p, cs);
- REQUIRE(MORE(), REG_EBRACK);
- REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
- break;
- default: /* symbol, ordinary character, or range */
-/* xxx revision needed for multichar stuff */
- start = p_b_symbol(p);
- if (SEE('-') && MORE2() && PEEK2() != ']') {
- /* range */
- NEXT();
- if (EAT('-'))
- finish = '-';
- else
- finish = p_b_symbol(p);
- } else
- finish = start;
-/* xxx what about signed chars here... */
- REQUIRE(start <= finish, REG_ERANGE);
- for (i = start; i <= finish; i++)
- CHadd(cs, i);
- break;
- }
+ char c;
+ char start, finish;
+ int i;
+
+ /* classify what we've got */
+ switch ((MORE()) ? PEEK() : '\0') {
+ case '[':
+ c = (MORE2()) ? PEEK2() : '\0';
+ break;
+ case '-':
+ SETERROR(REG_ERANGE);
+ return; /* NOTE RETURN */
+ break;
+ default:
+ c = '\0';
+ break;
+ }
+
+ switch (c) {
+ case ':': /* character class */
+ NEXT2();
+ REQUIRE(MORE(), REG_EBRACK);
+ c = PEEK();
+ REQUIRE(c != '-' && c != ']', REG_ECTYPE);
+ p_b_cclass(p, cs);
+ REQUIRE(MORE(), REG_EBRACK);
+ REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
+ break;
+ case '=': /* equivalence class */
+ NEXT2();
+ REQUIRE(MORE(), REG_EBRACK);
+ c = PEEK();
+ REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
+ p_b_eclass(p, cs);
+ REQUIRE(MORE(), REG_EBRACK);
+ REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
+ break;
+ default: /* symbol, ordinary character, or range */
+ /* xxx revision needed for multichar stuff */
+ start = p_b_symbol(p);
+ if (SEE('-') && MORE2() && PEEK2() != ']') {
+ /* range */
+ NEXT();
+ if (EAT('-'))
+ finish = '-';
+ else
+ finish = p_b_symbol(p);
+ } else
+ finish = start;
+ /* xxx what about signed chars here... */
+ REQUIRE(start <= finish, REG_ERANGE);
+ for (i = start; i <= finish; i++)
+ CHadd(cs, i);
+ break;
+ }
}
/*
@@ -770,27 +773,27 @@ p_b_term(struct parse *p, cset *cs)
static void
p_b_cclass(struct parse *p, cset *cs)
{
- const char *sp = p->next;
- const struct cclass *cp;
- size_t len;
- const char *u;
- char c;
-
- while (MORE() && isalpha((uch)PEEK()))
- NEXT();
- len = p->next - sp;
- for (cp = cclasses; cp->name != NULL; cp++)
- if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
- break;
- if (cp->name == NULL) {
- /* oops, didn't find it */
- SETERROR(REG_ECTYPE);
- return;
- }
-
- u = cp->chars;
- while ((c = *u++) != '\0')
- CHadd(cs, c);
+ const char *sp = p->next;
+ const struct cclass *cp;
+ size_t len;
+ const char *u;
+ char c;
+
+ while (MORE() && isalpha((uch)PEEK()))
+ NEXT();
+ len = p->next - sp;
+ for (cp = cclasses; cp->name != NULL; cp++)
+ if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+ break;
+ if (cp->name == NULL) {
+ /* oops, didn't find it */
+ SETERROR(REG_ECTYPE);
+ return;
+ }
+
+ u = cp->chars;
+ while ((c = *u++) != '\0')
+ CHadd(cs, c);
}
/*
@@ -801,71 +804,71 @@ p_b_cclass(struct parse *p, cset *cs)
static void
p_b_eclass(struct parse *p, cset *cs)
{
- char c;
+ char c;
- c = p_b_coll_elem(p, '=');
- CHadd(cs, c);
+ c = p_b_coll_elem(p, '=');
+ CHadd(cs, c);
}
/*
- p_b_symbol - parse a character or [..]ed multicharacter collating symbol
*/
-static char /* value of symbol */
+static char /* value of symbol */
p_b_symbol(struct parse *p)
{
- char value;
+ char value;
- REQUIRE(MORE(), REG_EBRACK);
- if (!EATTWO('[', '.'))
- return(GETNEXT());
+ REQUIRE(MORE(), REG_EBRACK);
+ if (!EATTWO('[', '.'))
+ return (GETNEXT());
- /* collating symbol */
- value = p_b_coll_elem(p, '.');
- REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
- return(value);
+ /* collating symbol */
+ value = p_b_coll_elem(p, '.');
+ REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
+ return (value);
}
/*
- p_b_coll_elem - parse a collating-element name and look it up
*/
-static char /* value of collating element */
+static char /* value of collating element */
p_b_coll_elem(struct parse *p,
- int endc) /* name ended by endc,']' */
+ int endc) /* name ended by endc,']' */
{
- const char *sp = p->next;
- const struct cname *cp;
- size_t len;
-
- while (MORE() && !SEETWO(endc, ']'))
- NEXT();
- if (!MORE()) {
- SETERROR(REG_EBRACK);
- return(0);
- }
- len = p->next - sp;
- for (cp = cnames; cp->name != NULL; cp++)
- if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len)
- return(cp->code); /* known name */
- if (len == 1)
- return(*sp); /* single character */
- SETERROR(REG_ECOLLATE); /* neither */
- return(0);
+ const char *sp = p->next;
+ const struct cname *cp;
+ size_t len;
+
+ while (MORE() && !SEETWO(endc, ']'))
+ NEXT();
+ if (!MORE()) {
+ SETERROR(REG_EBRACK);
+ return (0);
+ }
+ len = p->next - sp;
+ for (cp = cnames; cp->name != NULL; cp++)
+ if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len)
+ return (cp->code); /* known name */
+ if (len == 1)
+ return (*sp); /* single character */
+ SETERROR(REG_ECOLLATE); /* neither */
+ return (0);
}
/*
- othercase - return the case counterpart of an alphabetic
*/
-static char /* if no counterpart, return ch */
+static char /* if no counterpart, return ch */
othercase(int ch)
{
- ch = (uch)ch;
- assert(isalpha(ch));
- if (isupper(ch))
- return ((uch)tolower(ch));
- else if (islower(ch))
- return ((uch)toupper(ch));
- else /* peculiar, but could happen */
- return(ch);
+ ch = (uch)ch;
+ assert(isalpha(ch));
+ if (isupper(ch))
+ return ((uch)tolower(ch));
+ else if (islower(ch))
+ return ((uch)toupper(ch));
+ else /* peculiar, but could happen */
+ return (ch);
}
/*
@@ -876,21 +879,21 @@ othercase(int ch)
static void
bothcases(struct parse *p, int ch)
{
- const char *oldnext = p->next;
- const char *oldend = p->end;
- char bracket[3];
-
- ch = (uch)ch;
- assert(othercase(ch) != ch); /* p_bracket() would recurse */
- p->next = bracket;
- p->end = bracket+2;
- bracket[0] = ch;
- bracket[1] = ']';
- bracket[2] = '\0';
- p_bracket(p);
- assert(p->next == bracket+2);
- p->next = oldnext;
- p->end = oldend;
+ const char *oldnext = p->next;
+ const char *oldend = p->end;
+ char bracket[3];
+
+ ch = (uch)ch;
+ assert(othercase(ch) != ch); /* p_bracket() would recurse */
+ p->next = bracket;
+ p->end = bracket + 2;
+ bracket[0] = ch;
+ bracket[1] = ']';
+ bracket[2] = '\0';
+ p_bracket(p);
+ assert(p->next == bracket + 2);
+ p->next = oldnext;
+ p->end = oldend;
}
/*
@@ -899,10 +902,10 @@ bothcases(struct parse *p, int ch)
static void
ordinary(struct parse *p, int ch)
{
- if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
- bothcases(p, ch);
- else
- EMIT(OCHAR, (uch)ch);
+ if ((p->g->cflags & REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
+ bothcases(p, ch);
+ else
+ EMIT(OCHAR, (uch)ch);
}
/*
@@ -911,17 +914,17 @@ ordinary(struct parse *p, int ch)
static void
backslash(struct parse *p, int ch)
{
- switch (ch) {
- case '<':
- EMIT(OBOW, 0);
- break;
- case '>':
- EMIT(OEOW, 0);
- break;
- default:
- ordinary(p, ch);
- break;
- }
+ switch (ch) {
+ case '<':
+ EMIT(OBOW, 0);
+ break;
+ case '>':
+ EMIT(OEOW, 0);
+ break;
+ default:
+ ordinary(p, ch);
+ break;
+ }
}
/*
@@ -932,16 +935,16 @@ backslash(struct parse *p, int ch)
static void
nonnewline(struct parse *p)
{
- const char *oldnext = p->next;
- const char *oldend = p->end;
- static const char bracket[4] = { '^', '\n', ']', '\0' };
-
- p->next = bracket;
- p->end = bracket+3;
- p_bracket(p);
- assert(p->next == bracket+3);
- p->next = oldnext;
- p->end = oldend;
+ const char *oldnext = p->next;
+ const char *oldend = p->end;
+ static const char bracket[4] = {'^', '\n', ']', '\0'};
+
+ p->next = bracket;
+ p->end = bracket + 3;
+ p_bracket(p);
+ assert(p->next == bracket + 3);
+ p->next = oldnext;
+ p->end = oldend;
}
/*
@@ -949,69 +952,70 @@ nonnewline(struct parse *p)
*/
static void
repeat(struct parse *p,
- sopno start, /* operand from here to end of strip */
- int from, /* repeated from this number */
- int to) /* to this number of times (maybe REGEX_INFINITY) */
+ sopno start, /* operand from here to end of strip */
+ int from, /* repeated from this number */
+ int to) /* to this number of times (maybe REGEX_INFINITY) */
{
- sopno finish = HERE();
-# define N 2
-# define INF 3
-# define REP(f, t) ((f)*8 + (t))
-# define MAP(n) (((n) <= 1) ? (n) : ((n) == REGEX_INFINITY) ? INF : N)
- sopno copy;
-
- if (p->error != 0) /* head off possible runaway recursion */
- return;
-
- assert(from <= to);
-
- switch (REP(MAP(from), MAP(to))) {
- case REP(0, 0): /* must be user doing this */
- DROP(finish-start); /* drop the operand */
- break;
- case REP(0, 1): /* as x{1,1}? */
- case REP(0, N): /* as x{1,n}? */
- case REP(0, INF): /* as x{1,}? */
- /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
- INSERT(OCH_, start); /* offset is wrong... */
- repeat(p, start+1, 1, to);
- ASTERN(OOR1, start);
- AHEAD(start); /* ... fix it */
- EMIT(OOR2, 0);
- AHEAD(THERE());
- ASTERN(O_CH, THERETHERE());
- break;
- case REP(1, 1): /* trivial case */
- /* done */
- break;
- case REP(1, N): /* as x?x{1,n-1} */
- /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
- INSERT(OCH_, start);
- ASTERN(OOR1, start);
- AHEAD(start);
- EMIT(OOR2, 0); /* offset very wrong... */
- AHEAD(THERE()); /* ...so fix it */
- ASTERN(O_CH, THERETHERE());
- copy = dupl(p, start+1, finish+1);
- assert(copy == finish+4);
- repeat(p, copy, 1, to-1);
- break;
- case REP(1, INF): /* as x+ */
- INSERT(OPLUS_, start);
- ASTERN(O_PLUS, start);
- break;
- case REP(N, N): /* as xx{m-1,n-1} */
- copy = dupl(p, start, finish);
- repeat(p, copy, from-1, to-1);
- break;
- case REP(N, INF): /* as xx{n-1,INF} */
- copy = dupl(p, start, finish);
- repeat(p, copy, from-1, to);
- break;
- default: /* "can't happen" */
- SETERROR(REG_ASSERT); /* just in case */
- break;
- }
+ sopno finish = HERE();
+#define N 2
+#define INF 3
+#define REP(f, t) ((f)*8 + (t))
+#define MAP(n) (((n) <= 1) ? (n) : ((n) == REGEX_INFINITY) ? INF \
+ : N)
+ sopno copy;
+
+ if (p->error != 0) /* head off possible runaway recursion */
+ return;
+
+ assert(from <= to);
+
+ switch (REP(MAP(from), MAP(to))) {
+ case REP(0, 0): /* must be user doing this */
+ DROP(finish - start); /* drop the operand */
+ break;
+ case REP(0, 1): /* as x{1,1}? */
+ case REP(0, N): /* as x{1,n}? */
+ case REP(0, INF): /* as x{1,}? */
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+ INSERT(OCH_, start); /* offset is wrong... */
+ repeat(p, start + 1, 1, to);
+ ASTERN(OOR1, start);
+ AHEAD(start); /* ... fix it */
+ EMIT(OOR2, 0);
+ AHEAD(THERE());
+ ASTERN(O_CH, THERETHERE());
+ break;
+ case REP(1, 1): /* trivial case */
+ /* done */
+ break;
+ case REP(1, N): /* as x?x{1,n-1} */
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+ INSERT(OCH_, start);
+ ASTERN(OOR1, start);
+ AHEAD(start);
+ EMIT(OOR2, 0); /* offset very wrong... */
+ AHEAD(THERE()); /* ...so fix it */
+ ASTERN(O_CH, THERETHERE());
+ copy = dupl(p, start + 1, finish + 1);
+ assert(copy == finish + 4);
+ repeat(p, copy, 1, to - 1);
+ break;
+ case REP(1, INF): /* as x+ */
+ INSERT(OPLUS_, start);
+ ASTERN(O_PLUS, start);
+ break;
+ case REP(N, N): /* as xx{m-1,n-1} */
+ copy = dupl(p, start, finish);
+ repeat(p, copy, from - 1, to - 1);
+ break;
+ case REP(N, INF): /* as xx{n-1,INF} */
+ copy = dupl(p, start, finish);
+ repeat(p, copy, from - 1, to);
+ break;
+ default: /* "can't happen" */
+ SETERROR(REG_ASSERT); /* just in case */
+ break;
+ }
}
/*
@@ -1020,10 +1024,10 @@ repeat(struct parse *p,
static void
seterr(struct parse *p, int e)
{
- if (p->error == 0) /* keep earliest error condition */
- p->error = e;
- p->next = nuls; /* try to bring things to a halt */
- p->end = nuls;
+ if (p->error == 0) /* keep earliest error condition */
+ p->error = e;
+ p->next = nuls; /* try to bring things to a halt */
+ p->end = nuls;
}
/*
@@ -1032,56 +1036,56 @@ seterr(struct parse *p, int e)
static cset *
allocset(struct parse *p)
{
- int no = p->g->ncsets++;
- size_t nc;
- size_t nbytes;
- cset *cs;
- size_t css = (size_t)p->g->csetsize;
- int i;
-
- if (no >= p->ncsalloc) { /* need another column of space */
- void *ptr;
-
- p->ncsalloc += CHAR_BIT;
- nc = p->ncsalloc;
- assert(nc % CHAR_BIT == 0);
- nbytes = nc / CHAR_BIT *css;
-
- ptr = (cset *)cli_max_realloc((char*)p->g->sets, nc * sizeof(cset));
- if (ptr == NULL)
- goto nomem;
- p->g->sets = ptr;
-
- ptr = (uch *)cli_max_realloc((char*)p->g->setbits, nbytes);
- if (ptr == NULL)
- goto nomem;
- nbytes = (nc / CHAR_BIT) * css;
- p->g->setbits = ptr;
-
- for (i = 0; i < no; i++)
- p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
-
- (void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
- }
- /* XXX should not happen */
- if (p->g->sets == NULL || p->g->setbits == NULL)
- goto nomem;
-
- cs = &p->g->sets[no];
- cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
- cs->mask = 1 << ((no) % CHAR_BIT);
- cs->hash = 0;
-
- return(cs);
+ int no = p->g->ncsets++;
+ size_t nc;
+ size_t nbytes;
+ cset *cs;
+ size_t css = (size_t)p->g->csetsize;
+ int i;
+
+ if (no >= p->ncsalloc) { /* need another column of space */
+ void *ptr;
+
+ p->ncsalloc += CHAR_BIT;
+ nc = p->ncsalloc;
+ assert(nc % CHAR_BIT == 0);
+ nbytes = nc / CHAR_BIT * css;
+
+ ptr = (cset *)cli_max_realloc((char *)p->g->sets, nc * sizeof(cset));
+ if (ptr == NULL)
+ goto nomem;
+ p->g->sets = ptr;
+
+ ptr = (uch *)cli_max_realloc((char *)p->g->setbits, nbytes);
+ if (ptr == NULL)
+ goto nomem;
+ nbytes = (nc / CHAR_BIT) * css;
+ p->g->setbits = ptr;
+
+ for (i = 0; i < no; i++)
+ p->g->sets[i].ptr = p->g->setbits + css * (i / CHAR_BIT);
+
+ (void)memset((char *)p->g->setbits + (nbytes - css), 0, css);
+ }
+ /* XXX should not happen */
+ if (p->g->sets == NULL || p->g->setbits == NULL)
+ goto nomem;
+
+ cs = &p->g->sets[no];
+ cs->ptr = p->g->setbits + css * ((no) / CHAR_BIT);
+ cs->mask = 1 << ((no) % CHAR_BIT);
+ cs->hash = 0;
+
+ return (cs);
nomem:
- free(p->g->sets);
- p->g->sets = NULL;
- free(p->g->setbits);
- p->g->setbits = NULL;
-
- SETERROR(REG_ESPACE);
- /* caller's responsibility not to do set ops */
- return(NULL);
+ free(p->g->sets);
+ p->g->sets = NULL;
+ free(p->g->setbits);
+ p->g->setbits = NULL;
+
+ SETERROR(REG_ESPACE);
+ /* caller's responsibility not to do set ops */
+ return (NULL);
}
/*
@@ -1090,14 +1094,14 @@ nomem:
static void
freeset(struct parse *p, cset *cs)
{
- int i;
- cset *top = &p->g->sets[p->g->ncsets];
- size_t css = (size_t)p->g->csetsize;
-
- for (i = 0; i < css; i++)
- CHsub(cs, i);
- if (cs == top-1) /* recover only the easy case */
- p->g->ncsets--;
+ int i;
+ cset *top = &p->g->sets[p->g->ncsets];
+ size_t css = (size_t)p->g->csetsize;
+
+ for (i = 0; i < css; i++)
+ CHsub(cs, i);
+ if (cs == top - 1) /* recover only the easy case */
+ p->g->ncsets--;
}
/*
@@ -1109,48 +1113,48 @@ freeset(struct parse *p, cset *cs)
* is done using addition rather than xor -- all ASCII [aA] sets xor to
* the same value!
*/
-static int /* set number */
+static int /* set number */
freezeset(struct parse *p, cset *cs)
{
- uch h = cs->hash;
- int i;
- cset *top = &p->g->sets[p->g->ncsets];
- cset *cs2;
- size_t css = (size_t)p->g->csetsize;
-
- /* look for an earlier one which is the same */
- for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
- if (cs2->hash == h && cs2 != cs) {
- /* maybe */
- for (i = 0; i < css; i++)
- if (CHIN(cs2, i) != CHIN(cs, i))
- break; /* no */
- if (i == css)
- break; /* yes */
- }
-
- if (cs2 < top) { /* found one */
- freeset(p, cs);
- cs = cs2;
- }
-
- return((int)(cs - p->g->sets));
+ uch h = cs->hash;
+ int i;
+ cset *top = &p->g->sets[p->g->ncsets];
+ cset *cs2;
+ size_t css = (size_t)p->g->csetsize;
+
+ /* look for an earlier one which is the same */
+ for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
+ if (cs2->hash == h && cs2 != cs) {
+ /* maybe */
+ for (i = 0; i < css; i++)
+ if (CHIN(cs2, i) != CHIN(cs, i))
+ break; /* no */
+ if (i == css)
+ break; /* yes */
+ }
+
+ if (cs2 < top) { /* found one */
+ freeset(p, cs);
+ cs = cs2;
+ }
+
+ return ((int)(cs - p->g->sets));
}
/*
- firstch - return first character in a set (which must have at least one)
*/
-static int /* character; there is no "none" value */
+static int /* character; there is no "none" value */
firstch(struct parse *p, cset *cs)
{
- int i;
- size_t css = (size_t)p->g->csetsize;
-
- for (i = 0; i < css; i++)
- if (CHIN(cs, i))
- return((char)i);
- assert(never);
- return(0); /* arbitrary */
+ int i;
+ size_t css = (size_t)p->g->csetsize;
+
+ for (i = 0; i < css; i++)
+ if (CHIN(cs, i))
+ return ((char)i);
+ assert(never);
+ return (0); /* arbitrary */
}
/*
@@ -1159,35 +1163,35 @@ firstch(struct parse *p, cset *cs)
static int
nch(struct parse *p, cset *cs)
{
- int i;
- size_t css = (size_t)p->g->csetsize;
- int n = 0;
-
- for (i = 0; i < css; i++)
- if (CHIN(cs, i))
- n++;
- return(n);
+ int i;
+ size_t css = (size_t)p->g->csetsize;
+ int n = 0;
+
+ for (i = 0; i < css; i++)
+ if (CHIN(cs, i))
+ n++;
+ return (n);
}
/*
- dupl - emit a duplicate of a bunch of sops
*/
-static sopno /* start of duplicate */
+static sopno /* start of duplicate */
dupl(struct parse *p,
- sopno start, /* from here */
- sopno finish) /* to this less one */
+ sopno start, /* from here */
+ sopno finish) /* to this less one */
{
- sopno ret = HERE();
- sopno len = finish - start;
-
- assert(finish >= start);
- if (len == 0)
- return(ret);
- if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
- return(ret);
- (void) memcpy(p->strip + p->slen, p->strip + start, len * sizeof(sop));
- p->slen += len;
- return(ret);
+ sopno ret = HERE();
+ sopno len = finish - start;
+
+ assert(finish >= start);
+ if (len == 0)
+ return (ret);
+ if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
+ return (ret);
+ (void)memcpy(p->strip + p->slen, p->strip + start, len * sizeof(sop));
+ p->slen += len;
+ return (ret);
}
/*
@@ -1200,20 +1204,20 @@ dupl(struct parse *p,
static void
doemit(struct parse *p, sop op, size_t opnd)
{
- /* avoid making error situations worse */
- if (p->error != 0)
- return;
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
- /* deal with oversize operands ("can't happen", more or less) */
- assert(opnd < 1<<OPSHIFT);
+ /* deal with oversize operands ("can't happen", more or less) */
+ assert(opnd < 1 << OPSHIFT);
- /* deal with undersized strip */
- if (p->slen >= p->ssize)
- if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */
- return;
+ /* deal with undersized strip */
+ if (p->slen >= p->ssize)
+ if (!enlarge(p, (p->ssize + 1) / 2 * 3)) /* +50% */
+ return;
- /* finally, it's all reduced to the easy case */
- p->strip[p->slen++] = SOP(op, opnd);
+ /* finally, it's all reduced to the easy case */
+ p->strip[p->slen++] = SOP(op, opnd);
}
/*
@@ -1222,36 +1226,36 @@ doemit(struct parse *p, sop op, size_t opnd)
static void
doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
{
- sopno sn;
- sop s;
- int i;
-
- /* avoid making error situations worse */
- if (p->error != 0)
- return;
-
- sn = HERE();
- EMIT(op, opnd); /* do checks, ensure space */
- if (HERE() != sn+1) {
- SETERROR(REG_ASSERT);
- return;
- }
- s = p->strip[sn];
-
- /* adjust paren pointers */
- assert(pos > 0);
- for (i = 1; i < NPAREN; i++) {
- if (p->pbegin[i] >= pos) {
- p->pbegin[i]++;
- }
- if (p->pend[i] >= pos) {
- p->pend[i]++;
- }
- }
-
- memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
- (HERE()-pos-1)*sizeof(sop));
- p->strip[pos] = s;
+ sopno sn;
+ sop s;
+ int i;
+
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ sn = HERE();
+ EMIT(op, opnd); /* do checks, ensure space */
+ if (HERE() != sn + 1) {
+ SETERROR(REG_ASSERT);
+ return;
+ }
+ s = p->strip[sn];
+
+ /* adjust paren pointers */
+ assert(pos > 0);
+ for (i = 1; i < NPAREN; i++) {
+ if (p->pbegin[i] >= pos) {
+ p->pbegin[i]++;
+ }
+ if (p->pend[i] >= pos) {
+ p->pend[i]++;
+ }
+ }
+
+ memmove((char *)&p->strip[pos + 1], (char *)&p->strip[pos],
+ (HERE() - pos - 1) * sizeof(sop));
+ p->strip[pos] = s;
}
/*
@@ -1260,12 +1264,12 @@ doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
static void
dofwd(struct parse *p, sopno pos, sop value)
{
- /* avoid making error situations worse */
- if (p->error != 0)
- return;
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
- assert(value < 1<<OPSHIFT);
- p->strip[pos] = OP(p->strip[pos]) | value;
+ assert(value < 1 << OPSHIFT);
+ p->strip[pos] = OP(p->strip[pos]) | value;
}
/*
@@ -1274,19 +1278,19 @@ dofwd(struct parse *p, sopno pos, sop value)
static int
enlarge(struct parse *p, sopno size)
{
- sop *sp;
-
- if (p->ssize >= size)
- return 1;
-
- sp = (sop *)cli_max_realloc(p->strip, size * sizeof(sop));
- if (sp == NULL) {
- SETERROR(REG_ESPACE);
- return 0;
- }
- p->strip = sp;
- p->ssize = size;
- return 1;
+ sop *sp;
+
+ if (p->ssize >= size)
+ return 1;
+
+ sp = (sop *)cli_max_realloc(p->strip, size * sizeof(sop));
+ if (sp == NULL) {
+ SETERROR(REG_ESPACE);
+ return 0;
+ }
+ p->strip = sp;
+ p->ssize = size;
+ return 1;
}
/*
@@ -1295,12 +1299,12 @@ enlarge(struct parse *p, sopno size)
static void
stripsnug(struct parse *p, struct re_guts *g)
{
- g->nstates = p->slen;
- g->strip = (sop *)cli_max_realloc((char *)p->strip, p->slen * sizeof(sop));
- if (g->strip == NULL) {
- SETERROR(REG_ESPACE);
- g->strip = p->strip;
- }
+ g->nstates = p->slen;
+ g->strip = (sop *)cli_max_realloc((char *)p->strip, p->slen * sizeof(sop));
+ if (g->strip == NULL) {
+ SETERROR(REG_ESPACE);
+ g->strip = p->strip;
+ }
}
/*
@@ -1315,111 +1319,111 @@ stripsnug(struct parse *p, struct re_guts *g)
static void
findmust(struct parse *p, struct re_guts *g)
{
- sop *scan;
- sop *start = NULL; /* start initialized in the default case, after that */
- sop *newstart = NULL; /* newstart was initialized in the OCHAR case */
- sopno newlen;
- sop s;
- char *cp;
- sopno i;
-
- /* avoid making error situations worse */
- if (p->error != 0)
- return;
-
- /* find the longest OCHAR sequence in strip */
- newlen = 0;
- scan = g->strip + 1;
- do {
- s = *scan++;
- switch (OP(s)) {
- case OCHAR: /* sequence member */
- if (newlen == 0) /* new sequence */
- newstart = scan - 1;
- newlen++;
- break;
- case OPLUS_: /* things that don't break one */
- case OLPAREN:
- case ORPAREN:
- break;
- case OQUEST_: /* things that must be skipped */
- case OCH_:
- scan--;
- do {
- scan += OPND(s);
- s = *scan;
- /* assert() interferes w debug printouts */
- if (OP(s) != O_QUEST && OP(s) != O_CH &&
- OP(s) != OOR2) {
- g->iflags |= REGEX_BAD;
- return;
- }
- } while (OP(s) != O_QUEST && OP(s) != O_CH);
- /* fallthrough */
- default: /* things that break a sequence */
- if (newlen > g->mlen) { /* ends one */
- start = newstart;
- g->mlen = newlen;
- }
- newlen = 0;
- break;
- }
- } while (OP(s) != OEND);
-
- if (g->mlen == 0) /* there isn't one */
- return;
- if (start == NULL) { /* something went wrong */
- g->mlen = 0;
- return;
- }
-
- /* turn it into a character string */
- g->must = cli_max_malloc((size_t)g->mlen + 1);
- if (g->must == NULL) { /* argh; just forget it */
- g->mlen = 0;
- return;
- }
- cp = g->must;
- scan = start;
- for (i = g->mlen; i > 0; i--) {
- while (OP(s = *scan++) != OCHAR)
- continue;
- assert(cp < g->must + g->mlen);
- *cp++ = (char)OPND(s);
- }
- assert(cp == g->must + g->mlen);
- *cp = '\0'; /* just on general principles */
+ sop *scan;
+ sop *start = NULL; /* start initialized in the default case, after that */
+ sop *newstart = NULL; /* newstart was initialized in the OCHAR case */
+ sopno newlen;
+ sop s;
+ char *cp;
+ sopno i;
+
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ /* find the longest OCHAR sequence in strip */
+ newlen = 0;
+ scan = g->strip + 1;
+ do {
+ s = *scan++;
+ switch (OP(s)) {
+ case OCHAR: /* sequence member */
+ if (newlen == 0) /* new sequence */
+ newstart = scan - 1;
+ newlen++;
+ break;
+ case OPLUS_: /* things that don't break one */
+ case OLPAREN:
+ case ORPAREN:
+ break;
+ case OQUEST_: /* things that must be skipped */
+ case OCH_:
+ scan--;
+ do {
+ scan += OPND(s);
+ s = *scan;
+ /* assert() interferes w debug printouts */
+ if (OP(s) != O_QUEST && OP(s) != O_CH &&
+ OP(s) != OOR2) {
+ g->iflags |= REGEX_BAD;
+ return;
+ }
+ } while (OP(s) != O_QUEST && OP(s) != O_CH);
+ /* fallthrough */
+ default: /* things that break a sequence */
+ if (newlen > g->mlen) { /* ends one */
+ start = newstart;
+ g->mlen = newlen;
+ }
+ newlen = 0;
+ break;
+ }
+ } while (OP(s) != OEND);
+
+ if (g->mlen == 0) /* there isn't one */
+ return;
+ if (start == NULL) { /* something went wrong */
+ g->mlen = 0;
+ return;
+ }
+
+ /* turn it into a character string */
+ g->must = cli_max_malloc((size_t)g->mlen + 1);
+ if (g->must == NULL) { /* argh; just forget it */
+ g->mlen = 0;
+ return;
+ }
+ cp = g->must;
+ scan = start;
+ for (i = g->mlen; i > 0; i--) {
+ while (OP(s = *scan++) != OCHAR)
+ continue;
+ assert(cp < g->must + g->mlen);
+ *cp++ = (char)OPND(s);
+ }
+ assert(cp == g->must + g->mlen);
+ *cp = '\0'; /* just on general principles */
}
/*
- pluscount - count + nesting
*/
-static sopno /* nesting depth */
+static sopno /* nesting depth */
pluscount(struct parse *p, struct re_guts *g)
{
- sop *scan;
- sop s;
- sopno plusnest = 0;
- sopno maxnest = 0;
-
- if (p->error != 0)
- return(0); /* there may not be an OEND */
-
- scan = g->strip + 1;
- do {
- s = *scan++;
- switch (OP(s)) {
- case OPLUS_:
- plusnest++;
- break;
- case O_PLUS:
- if (plusnest > maxnest)
- maxnest = plusnest;
- plusnest--;
- break;
- }
- } while (OP(s) != OEND);
- if (plusnest != 0)
- g->iflags |= REGEX_BAD;
- return(maxnest);
+ sop *scan;
+ sop s;
+ sopno plusnest = 0;
+ sopno maxnest = 0;
+
+ if (p->error != 0)
+ return (0); /* there may not be an OEND */
+
+ scan = g->strip + 1;
+ do {
+ s = *scan++;
+ switch (OP(s)) {
+ case OPLUS_:
+ plusnest++;
+ break;
+ case O_PLUS:
+ if (plusnest > maxnest)
+ maxnest = plusnest;
+ plusnest--;
+ break;
+ }
+ } while (OP(s) != OEND);
+ if (plusnest != 0)
+ g->iflags |= REGEX_BAD;
+ return (maxnest);
}
diff --git a/libclamav/regex_list.c b/libclamav/regex_list.c
index 8d6bcf6af..2e1de60f5 100644
--- a/libclamav/regex_list.c
+++ b/libclamav/regex_list.c
@@ -195,30 +195,46 @@ cl_error_t regex_list_match(struct regex_matcher *matcher, char *real_url, const
if (display_url[0] == '.') display_url++;
real_len = strlen(real_url);
display_len = strlen(display_url);
- buffer_len = (hostOnly && !is_allow_list_lookup) ? real_len + 1 : real_len + display_len + 1 + 1;
+ if (hostOnly && (is_allow_list_lookup != 1)) {
+ // Buffer is only for real part
+ buffer_len = real_len + 1;
+ } else {
+ // Buffer to hold both parts
+ buffer_len = real_len + display_len + 1 + 1;
+ }
if (buffer_len < 3) {
/* too short, no match possible */
return CL_SUCCESS;
}
- buffer = cli_max_malloc(buffer_len + 1);
+ buffer = cli_max_calloc(buffer_len + 1, sizeof(char));
if (!buffer) {
cli_errmsg("regex_list_match: Unable to allocate memory for buffer\n");
return CL_EMEM;
}
- strncpy(buffer, real_url, buffer_len);
- buffer[real_len] = (!is_allow_list_lookup && hostOnly) ? '/' : ':';
+ if (is_allow_list_lookup == 2) {
+ /* Y-Type signatures only contain the real part */
+ strncpy(buffer, real_url, real_len);
+ buffer[real_len] = '/';
+ /* This is the number of characters not including null termination */
+ buffer_len--;
+ } else {
+ strncpy(buffer, real_url, buffer_len);
+ buffer[real_len] = (!is_allow_list_lookup && hostOnly) ? '/' : ':';
+ }
/*
* For H-type PDB signatures, real_url is actually the DisplayedHostname.
* RealHostname is not used.
*/
- if (!hostOnly || is_allow_list_lookup) {
+ if (is_allow_list_lookup != 2 && (!hostOnly || is_allow_list_lookup == 1)) {
/* For all other PDB and WDB signatures concatenate Real:Displayed. */
strncpy(buffer + real_len + 1, display_url, buffer_len - real_len);
}
- buffer[buffer_len - 1] = '/';
- buffer[buffer_len] = 0;
+
+ if (is_allow_list_lookup != 2) {
+ buffer[buffer_len - 1] = '/';
+ }
cli_dbgmsg("Looking up in regex_list: %s\n", buffer);
if (CL_SUCCESS != (rc = cli_ac_initdata(&mdata, 0, 0, 0, CLI_DEFAULT_AC_TRACKLEN)))
@@ -231,6 +247,7 @@ cl_error_t regex_list_match(struct regex_matcher *matcher, char *real_url, const
reverse_string(bufrev);
filter_search_rc = filter_search(&matcher->filter, (const unsigned char *)bufrev, buffer_len);
+
if (filter_search_rc == -1) {
free(buffer);
free(bufrev);
@@ -884,6 +901,7 @@ cl_error_t regex_list_add_pattern(struct regex_matcher *matcher, char *pattern)
cl_error_t rc;
regex_t *preg;
size_t len;
+
/* we only match the host, so remove useless stuff */
const char remove_end[] = "([/?].*)?/";
const char remove_end2[] = "([/?].*)/";
diff --git a/libclamav/regex_suffix.c b/libclamav/regex_suffix.c
index 1952eb6c8..076872098 100644
--- a/libclamav/regex_suffix.c
+++ b/libclamav/regex_suffix.c
@@ -480,11 +480,11 @@ cl_error_t cli_regex2suffix(const char *pattern, regex_t *preg, suffix_callback
struct node *n = NULL;
size_t last = 0;
int rc;
+ cl_error_t ret = CL_SUCCESS;
if (NULL == pattern) {
cli_errmsg("cli_regex2suffix: pattern can't be NULL\n");
- rc = REG_INVARG;
- goto done;
+ return CL_ENULLARG;
}
regex.preg = preg;
@@ -499,27 +499,49 @@ cl_error_t cli_regex2suffix(const char *pattern, regex_t *preg, suffix_callback
} else {
cli_errmsg(MODULE "Error compiling regular expression: %s\n", pattern);
}
- return rc;
+ switch (rc) {
+ case REG_BADBR:
+ case REG_BADPAT:
+ case REG_BADRPT:
+ case REG_EBRACE:
+ case REG_EBRACK:
+ case REG_ECOLLATE:
+ case REG_ECTYPE:
+ case REG_EPAREN:
+ case REG_ERANGE:
+ case REG_ESUBREG:
+ case REG_INVARG:
+ ret = CL_EPARSE;
+ break;
+ case REG_ESPACE:
+ ret = CL_EMEM;
+ break;
+ default:
+ ret = CL_ERROR;
+ break;
+ }
+ return ret;
}
+
regex.nxt = NULL;
CLI_SAFER_STRDUP_OR_GOTO_DONE(pattern, regex.pattern,
cli_errmsg("cli_regex2suffix: unable to strdup regex.pattern\n");
- rc = REG_ESPACE);
+ ret = CL_EMEM);
n = parse_regex((const uint8_t *)pattern, strlen(pattern), &last);
if (!n) {
- rc = REG_ESPACE;
+ ret = CL_EMEM;
goto done;
}
memset(&buf, 0, sizeof(buf));
memset(&root_node, 0, sizeof(root_node));
n->parent = &root_node;
- rc = build_suffixtree_descend(n, &buf, cb, cbdata, ®ex);
+ ret = build_suffixtree_descend(n, &buf, cb, cbdata, ®ex);
done:
CLI_FREE_AND_SET_NULL(regex.pattern);
CLI_FREE_AND_SET_NULL(buf.data);
destroy_tree(n);
- return rc;
+ return ret;
}
diff --git a/libfreshclam/libfreshclam.c b/libfreshclam/libfreshclam.c
index 165be70f0..462ad869d 100644
--- a/libfreshclam/libfreshclam.c
+++ b/libfreshclam/libfreshclam.c
@@ -515,8 +515,8 @@ fc_error_t fc_test_database(const char *dbFilename, int bBytecodeEnabled)
done:
if (NULL != engine) {
- if (engine->domain_list_matcher && engine->domain_list_matcher->sha2_256_pfx_set.keys)
- cli_hashset_destroy(&engine->domain_list_matcher->sha2_256_pfx_set);
+ if (engine->phish_protected_domain_matcher && engine->phish_protected_domain_matcher->sha2_256_pfx_set.keys)
+ cli_hashset_destroy(&engine->phish_protected_domain_matcher->sha2_256_pfx_set);
cl_engine_free(engine);
}
diff --git a/unit_tests/check_regex.c b/unit_tests/check_regex.c
index 2d354ce76..04ff1154d 100644
--- a/unit_tests/check_regex.c
+++ b/unit_tests/check_regex.c
@@ -85,7 +85,7 @@ START_TEST(empty)
ck_assert_msg(!!preg, "malloc");
rc = cli_regex2suffix(pattern, preg, cb_fail, NULL);
free(preg);
- ck_assert_msg(rc == REG_EMPTY, "empty pattern");
+ ck_assert_msg(rc == CL_ERROR, "empty pattern");
ck_assert_msg(cb_called == 0, "callback shouldn't be called");
}
END_TEST
@@ -294,7 +294,7 @@ START_TEST(regex_list_match_test)
realurl = cli_safer_strdup(rtest->realurl);
rc = regex_list_match(&matcher, realurl, rtest->displayurl, NULL, 1, &info, 1);
- ck_assert_msg(rc == rtest->result, "regex_list_match");
+ ck_assert_msg(rc == (cl_error_t)rtest->result, "regex_list_match");
/* regex_list_match is not supposed to modify realurl in this case */
ck_assert_msg(!strcmp(realurl, rtest->realurl), "realurl altered");
free(realurl);
@@ -316,13 +316,13 @@ static void psetup_impl(int load2)
phishing_init(engine);
ck_assert_msg(!!engine->phishcheck, "phishing_init");
- rc = init_domain_list(engine);
+ rc = phish_protected_domain_init(engine);
ck_assert_msg(rc == CL_SUCCESS, "init_domain_list");
f = fdopen(open_testfile("input" PATHSEP "other_sigs" PATHSEP "daily.pdb", O_RDONLY | O_BINARY), "r");
ck_assert_msg(!!f, "fopen daily.pdb");
- rc = load_regex_matcher(engine, engine->domain_list_matcher, f, &signo, 0, 0, NULL, 1);
+ rc = load_regex_matcher(engine, engine->phish_protected_domain_matcher, f, &signo, 0, 0, NULL, 1);
ck_assert_msg(rc == CL_SUCCESS, "load_regex_matcher");
fclose(f);
@@ -333,7 +333,7 @@ static void psetup_impl(int load2)
ck_assert_msg(!!f, "fopen daily.gdb");
signo = 0;
- rc = load_regex_matcher(engine, engine->domain_list_matcher, f, &signo, 0, 0, NULL, 1);
+ rc = load_regex_matcher(engine, engine->phish_protected_domain_matcher, f, &signo, 0, 0, NULL, 1);
ck_assert_msg(rc == CL_SUCCESS, "load_regex_matcher");
fclose(f);
@@ -341,25 +341,25 @@ static void psetup_impl(int load2)
}
loaded_2 = load2;
- rc = init_allow_list(engine);
- ck_assert_msg(rc == CL_SUCCESS, "init_allow_list");
+ rc = phish_allow_list_init(engine);
+ ck_assert_msg(rc == CL_SUCCESS, "phish_allow_list_init");
f = fdopen(open_testfile("input" PATHSEP "other_sigs" PATHSEP "daily.wdb", O_RDONLY | O_BINARY), "r");
signo = 0;
- rc = load_regex_matcher(engine, engine->allow_list_matcher, f, &signo, 0, 1, NULL, 1);
- ck_assert_msg(rc == CL_SUCCESS, "load_regex_matcher");
+ rc = load_regex_matcher(engine, engine->phish_allow_list_matcher, f, &signo, 0, 1, NULL, 1);
+ ck_assert_msg(rc == CL_SUCCESS, "load_regex_matcher phish_allow_list");
fclose(f);
ck_assert_msg(signo == 31, "Incorrect number of signatures: %u, expected %u", signo, 31);
- rc = cli_build_regex_list(engine->allow_list_matcher);
- ck_assert_msg(rc == CL_SUCCESS, "cli_build_regex_list");
+ rc = cli_build_regex_list(engine->phish_allow_list_matcher);
+ ck_assert_msg(rc == CL_SUCCESS, "cli_build_regex_list phish_allow_list");
- rc = cli_build_regex_list(engine->domain_list_matcher);
- ck_assert_msg(rc == CL_SUCCESS, "cli_build_regex_list");
+ rc = cli_build_regex_list(engine->phish_protected_domain_matcher);
+ ck_assert_msg(rc == CL_SUCCESS, "cli_build_regex_list phish_protected_domain");
- ck_assert_msg(is_regex_ok(engine->allow_list_matcher), "is_regex_ok");
- ck_assert_msg(is_regex_ok(engine->domain_list_matcher), "is_regex_ok");
+ ck_assert_msg(is_regex_ok(engine->phish_allow_list_matcher), "is_regex_ok");
+ ck_assert_msg(is_regex_ok(engine->phish_protected_domain_matcher), "is_regex_ok");
}
static void psetup(void)