Commit f90fcf0fb for clamav.net
commit f90fcf0fb779b337a6e9f427fbc12cc58b1df0d5
Author: Burak Emir <burak.emir@gmail.com>
Date: Fri Jun 5 19:33:58 2026 +0200
feat: Add support for scanning Zstandard (zstd) compressed files (#1700)
ClamAV did not previously detect or decompress zstd-compressed files.
This adds full support in Rust using the ruzstd crate.
A few special cases:
- Concatenated frames: the decoder is recreated per frame until the input
is exhausted (ruzstd's StreamingDecoder decodes a single frame).
- Skippable frames: handled via the SkipFrame header error.
- Partial output is always scanned: on decode error, scan-limit hit, or
even a decoder panic (caught so it cannot unwind across the FFI boundary),
whatever was decompressed so far is still scanned, avoiding evasion gaps.
- Scan limits are enforced between and within frames.
- It is per zstd spec possible that a file starts with a skippable frame
diff --git a/Cargo.lock b/Cargo.lock
index 37226169b..878b982a0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -401,6 +401,7 @@ dependencies = [
"onenote_parser",
"openssl",
"rustdct",
+ "ruzstd",
"sha1",
"sha2",
"tar",
@@ -1836,6 +1837,15 @@ version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+[[package]]
+name = "ruzstd"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7c1c839d570d835527c9a5e4db7cb2198683a988cb9d7293fc8674e6bd58fc8"
+dependencies = [
+ "twox-hash",
+]
+
[[package]]
name = "ryu"
version = "1.0.20"
@@ -2193,6 +2203,12 @@ dependencies = [
"strength_reduce",
]
+[[package]]
+name = "twox-hash"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
+
[[package]]
name = "typenum"
version = "1.19.0"
diff --git a/libclamav/dconf.c b/libclamav/dconf.c
index e68c99f97..c5e448454 100644
--- a/libclamav/dconf.c
+++ b/libclamav/dconf.c
@@ -109,6 +109,7 @@ static struct dconf_module modules[] = {
{"ARCHIVE", "UDF", ARCH_CONF_UDF, 1},
{"ARCHIVE", "LHA", ARCH_CONF_LHA_LZH, 1},
{"ARCHIVE", "ALZ", ARCH_CONF_ALZ, 1},
+ {"ARCHIVE", "ZSTD", ARCH_CONF_ZSTD, 1},
{"DOCUMENT", "HTML", DOC_CONF_HTML, 1},
{"DOCUMENT", "RTF", DOC_CONF_RTF, 1},
diff --git a/libclamav/dconf.h b/libclamav/dconf.h
index 5bc2d5a7e..0b6bb7562 100644
--- a/libclamav/dconf.h
+++ b/libclamav/dconf.h
@@ -99,6 +99,7 @@ struct cli_dconf {
#define ARCH_CONF_UDF 0x8000000
#define ARCH_CONF_LHA_LZH 0x10000000
#define ARCH_CONF_ALZ 0x20000000
+#define ARCH_CONF_ZSTD 0x40000000
/* Document flags */
#define DOC_CONF_HTML 0x1
diff --git a/libclamav/filetypes.c b/libclamav/filetypes.c
index 07c225334..8ec51d812 100644
--- a/libclamav/filetypes.c
+++ b/libclamav/filetypes.c
@@ -143,6 +143,7 @@ static const struct ftmap_s {
{ "CL_TYPE_PYTHON_COMPILED", CL_TYPE_PYTHON_COMPILED },
{ "CL_TYPE_LHA_LZH", CL_TYPE_LHA_LZH },
{ "CL_TYPE_AI_MODEL", CL_TYPE_AI_MODEL },
+ { "CL_TYPE_ZSTD", CL_TYPE_ZSTD },
{ NULL, CL_TYPE_IGNORED }
};
// clang-format on
diff --git a/libclamav/filetypes.h b/libclamav/filetypes.h
index 68cfa097e..d963715f2 100644
--- a/libclamav/filetypes.h
+++ b/libclamav/filetypes.h
@@ -96,6 +96,7 @@ typedef enum cli_file {
CL_TYPE_PYTHON_COMPILED,
CL_TYPE_LHA_LZH,
CL_TYPE_AI_MODEL,
+ CL_TYPE_ZSTD,
/* Section for partition types */
CL_TYPE_PART_ANY, /* unknown partition type */
diff --git a/libclamav/filetypes_int.h b/libclamav/filetypes_int.h
index ac5011378..414d9033b 100644
--- a/libclamav/filetypes_int.h
+++ b/libclamav/filetypes_int.h
@@ -163,6 +163,8 @@ static const char *ftypes_int[] = {
"0:0:78617221:XAR container file:CL_TYPE_ANY:CL_TYPE_XAR:75",
"1:EOF-512:6b6f6c79:DMG container file:CL_TYPE_ANY:CL_TYPE_DMG:75",
"0:0:fd377a585a00:XZ container file:CL_TYPE_ANY:CL_TYPE_XZ:76",
+ "0:0:28b52ffd:Zstandard compressed file:CL_TYPE_ANY:CL_TYPE_ZSTD:76",
+ "1:0:5?2a4d18:Zstandard skippable frame:CL_TYPE_ANY:CL_TYPE_ZSTD:76",
"4:1024:482B0004:HFS+ partition:CL_TYPE_PART_ANY:CL_TYPE_PART_HFSPLUS:75",
"4:1024:48580005:HFSX partition:CL_TYPE_PART_ANY:CL_TYPE_PART_HFSPLUS:75",
"1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}3c576f726b626f6f6b:Microsoft Excel 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_XL:80",
diff --git a/libclamav/scanners.c b/libclamav/scanners.c
index d9722a98f..35201dc15 100644
--- a/libclamav/scanners.c
+++ b/libclamav/scanners.c
@@ -4857,6 +4857,11 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type)
ret = cli_scanxz(ctx);
break;
+ case CL_TYPE_ZSTD:
+ if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ZSTD))
+ ret = cli_scanzstd(ctx);
+ break;
+
case CL_TYPE_GPT:
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_GPT))
ret = cli_scangpt(ctx, 0);
diff --git a/libclamav_rust/Cargo.toml b/libclamav_rust/Cargo.toml
index cdb1ba5f2..e79dadce6 100644
--- a/libclamav_rust/Cargo.toml
+++ b/libclamav_rust/Cargo.toml
@@ -32,6 +32,7 @@ md5 = "0.7.0"
openssl = "0.10.70"
glob = "0.3.1"
indexmap = "2.10.0"
+ruzstd = "0.8.3"
[features]
not_ready = []
diff --git a/libclamav_rust/src/scanners.rs b/libclamav_rust/src/scanners.rs
index d47681584..76cfceee0 100644
--- a/libclamav_rust/src/scanners.rs
+++ b/libclamav_rust/src/scanners.rs
@@ -22,7 +22,7 @@
use std::{
ffi::{c_char, CString},
- io::Read,
+ io::{Cursor, Read},
panic,
path::Path,
ptr::null_mut,
@@ -31,6 +31,10 @@ use std::{
use delharc::LhaDecodeReader;
use libc::c_void;
use log::{debug, error, warn};
+use ruzstd::decoding::{
+ errors::{FrameDecoderError, ReadFrameHeaderError},
+ StreamingDecoder,
+};
use crate::{
alz::Alz,
@@ -352,3 +356,115 @@ pub unsafe extern "C" fn cli_scanalz(ctx: *mut cli_ctx) -> cl_error_t {
cl_error_t_CL_SUCCESS
}
+
+/// Decompress and scan a Zstandard (zstd) compressed file.
+///
+/// Uses the pure-Rust `ruzstd` decoder, so no libzstd C dependency is required.
+/// Handles streams made up of multiple concatenated frames as well as
+/// skippable frames, mirroring the behavior of the gzip/bzip2/xz scanners.
+///
+/// # Safety
+///
+/// Must be a valid ctx pointer.
+#[no_mangle]
+pub unsafe extern "C" fn cli_scanzstd(ctx: *mut cli_ctx) -> cl_error_t {
+ let fmap = match ctx::current_fmap(ctx) {
+ Ok(fmap) => fmap,
+ Err(e) => {
+ warn!("Error getting FMap from ctx: {e}");
+ return cl_error_t_CL_ERROR;
+ }
+ };
+
+ let file_bytes = match fmap.need_off(0, fmap.len()) {
+ Ok(bytes) => bytes,
+ Err(err) => {
+ error!(
+ "Failed to get file bytes for fmap of size {}: {err}",
+ fmap.len()
+ );
+ return cl_error_t_CL_ERROR;
+ }
+ };
+
+ debug!("in cli_scanzstd()");
+
+ // Decompress every zstd frame into a single buffer.
+ //
+ // `output` is owned outside the closure so that even if the decoder panics
+ // on malformed input we still scan whatever was decompressed so far, rather
+ // than discarding it (an evasion gap). The decode loop is wrapped in
+ // catch_unwind so that a panic cannot unwind across the C FFI boundary.
+ let mut output: Vec<u8> = Vec::new();
+
+ let decompress = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+ let mut cursor = Cursor::new(file_bytes);
+ let total_len = file_bytes.len() as u64;
+ let mut chunk = [0u8; 65536];
+
+ 'frames: while cursor.position() < total_len {
+ // Stop before starting a new frame if we've already hit scan limits.
+ if unsafe { check_scan_limits("zstd", ctx, output.len() as u64, 0, 0) }
+ != cl_error_t_CL_SUCCESS
+ {
+ debug!("cli_scanzstd: exceeded scan limits. Bailing out.");
+ break;
+ }
+
+ // ruzstd's StreamingDecoder decodes a single frame, so we recreate it
+ // for each concatenated frame in the stream.
+ let mut decoder = match StreamingDecoder::new(&mut cursor) {
+ Ok(decoder) => decoder,
+ Err(FrameDecoderError::ReadFrameHeaderError(ReadFrameHeaderError::SkipFrame {
+ length,
+ ..
+ })) => {
+ // Skippable frame: its 8-byte header was already consumed; skip the body.
+ let next = cursor
+ .position()
+ .saturating_add(length as u64)
+ .min(total_len);
+ cursor.set_position(next);
+ continue;
+ }
+ Err(err) => {
+ // No more valid frames (e.g. trailing data). Scan what we have.
+ debug!("cli_scanzstd: stopping frame parsing: {err}");
+ break;
+ }
+ };
+
+ loop {
+ match decoder.read(&mut chunk) {
+ Ok(0) => break, // current frame fully decoded
+ Ok(n) => {
+ output.extend_from_slice(&chunk[..n]);
+
+ if unsafe { check_scan_limits("zstd", ctx, output.len() as u64, 0, 0) }
+ != cl_error_t_CL_SUCCESS
+ {
+ debug!(
+ "cli_scanzstd: decompressed size exceeds limits - \
+ only scanning {} bytes",
+ output.len()
+ );
+ break 'frames;
+ }
+ }
+ Err(err) => {
+ // Scan whatever we decompressed so far.
+ debug!("cli_scanzstd: decompress error: {err}");
+ break 'frames;
+ }
+ }
+ }
+ }
+ }));
+
+ if decompress.is_err() {
+ // The decoder panicked; scan whatever was decompressed before the panic.
+ debug!("cli_scanzstd: panic while decompressing zstd data");
+ }
+
+ magic_scan(ctx, &output, None)
+}
diff --git a/libclamav_rust/src/sys.rs b/libclamav_rust/src/sys.rs
index ba42e3235..b635bb85c 100644
--- a/libclamav_rust/src/sys.rs
+++ b/libclamav_rust/src/sys.rs
@@ -369,36 +369,37 @@ pub const cli_file_CL_TYPE_ONENOTE: cli_file = 554;
pub const cli_file_CL_TYPE_PYTHON_COMPILED: cli_file = 555;
pub const cli_file_CL_TYPE_LHA_LZH: cli_file = 556;
pub const cli_file_CL_TYPE_AI_MODEL: cli_file = 557;
-pub const cli_file_CL_TYPE_PART_ANY: cli_file = 558;
-pub const cli_file_CL_TYPE_PART_HFSPLUS: cli_file = 559;
-pub const cli_file_CL_TYPE_MBR: cli_file = 560;
-pub const cli_file_CL_TYPE_HTML: cli_file = 561;
-pub const cli_file_CL_TYPE_MAIL: cli_file = 562;
-pub const cli_file_CL_TYPE_SFX: cli_file = 563;
-pub const cli_file_CL_TYPE_ZIPSFX: cli_file = 564;
-pub const cli_file_CL_TYPE_RARSFX: cli_file = 565;
-pub const cli_file_CL_TYPE_7ZSFX: cli_file = 566;
-pub const cli_file_CL_TYPE_CABSFX: cli_file = 567;
-pub const cli_file_CL_TYPE_ARJSFX: cli_file = 568;
-pub const cli_file_CL_TYPE_EGGSFX: cli_file = 569;
-pub const cli_file_CL_TYPE_NULSFT: cli_file = 570;
-pub const cli_file_CL_TYPE_AUTOIT: cli_file = 571;
-pub const cli_file_CL_TYPE_ISHIELD_MSI: cli_file = 572;
-pub const cli_file_CL_TYPE_ISO9660: cli_file = 573;
-pub const cli_file_CL_TYPE_DMG: cli_file = 574;
-pub const cli_file_CL_TYPE_GPT: cli_file = 575;
-pub const cli_file_CL_TYPE_APM: cli_file = 576;
-pub const cli_file_CL_TYPE_XDP: cli_file = 577;
-pub const cli_file_CL_TYPE_XML_WORD: cli_file = 578;
-pub const cli_file_CL_TYPE_XML_XL: cli_file = 579;
-pub const cli_file_CL_TYPE_XML_HWP: cli_file = 580;
-pub const cli_file_CL_TYPE_HWPOLE2: cli_file = 581;
-pub const cli_file_CL_TYPE_MHTML: cli_file = 582;
-pub const cli_file_CL_TYPE_LNK: cli_file = 583;
-pub const cli_file_CL_TYPE_UDF: cli_file = 584;
-pub const cli_file_CL_TYPE_ALZ: cli_file = 585;
-pub const cli_file_CL_TYPE_OTHER: cli_file = 586;
-pub const cli_file_CL_TYPE_IGNORED: cli_file = 587;
+pub const cli_file_CL_TYPE_ZSTD: cli_file = 558;
+pub const cli_file_CL_TYPE_PART_ANY: cli_file = 559;
+pub const cli_file_CL_TYPE_PART_HFSPLUS: cli_file = 560;
+pub const cli_file_CL_TYPE_MBR: cli_file = 561;
+pub const cli_file_CL_TYPE_HTML: cli_file = 562;
+pub const cli_file_CL_TYPE_MAIL: cli_file = 563;
+pub const cli_file_CL_TYPE_SFX: cli_file = 564;
+pub const cli_file_CL_TYPE_ZIPSFX: cli_file = 565;
+pub const cli_file_CL_TYPE_RARSFX: cli_file = 566;
+pub const cli_file_CL_TYPE_7ZSFX: cli_file = 567;
+pub const cli_file_CL_TYPE_CABSFX: cli_file = 568;
+pub const cli_file_CL_TYPE_ARJSFX: cli_file = 569;
+pub const cli_file_CL_TYPE_EGGSFX: cli_file = 570;
+pub const cli_file_CL_TYPE_NULSFT: cli_file = 571;
+pub const cli_file_CL_TYPE_AUTOIT: cli_file = 572;
+pub const cli_file_CL_TYPE_ISHIELD_MSI: cli_file = 573;
+pub const cli_file_CL_TYPE_ISO9660: cli_file = 574;
+pub const cli_file_CL_TYPE_DMG: cli_file = 575;
+pub const cli_file_CL_TYPE_GPT: cli_file = 576;
+pub const cli_file_CL_TYPE_APM: cli_file = 577;
+pub const cli_file_CL_TYPE_XDP: cli_file = 578;
+pub const cli_file_CL_TYPE_XML_WORD: cli_file = 579;
+pub const cli_file_CL_TYPE_XML_XL: cli_file = 580;
+pub const cli_file_CL_TYPE_XML_HWP: cli_file = 581;
+pub const cli_file_CL_TYPE_HWPOLE2: cli_file = 582;
+pub const cli_file_CL_TYPE_MHTML: cli_file = 583;
+pub const cli_file_CL_TYPE_LNK: cli_file = 584;
+pub const cli_file_CL_TYPE_UDF: cli_file = 585;
+pub const cli_file_CL_TYPE_ALZ: cli_file = 586;
+pub const cli_file_CL_TYPE_OTHER: cli_file = 587;
+pub const cli_file_CL_TYPE_IGNORED: cli_file = 588;
pub type cli_file = ::std::os::raw::c_uint;
pub use self::cli_file as cli_file_t;
#[repr(C)]
diff --git a/unit_tests/clamscan/zstd_test.py b/unit_tests/clamscan/zstd_test.py
new file mode 100644
index 000000000..eb1ab48d8
--- /dev/null
+++ b/unit_tests/clamscan/zstd_test.py
@@ -0,0 +1,81 @@
+# Copyright (C) 2020-2026 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+
+"""
+Run clamscan tests for Zstandard (zstd) compressed files.
+"""
+
+import sys
+
+sys.path.append('../unit_tests')
+import testcase
+
+
+class TC(testcase.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ super(TC, cls).setUpClass()
+
+ @classmethod
+ def tearDownClass(cls):
+ super(TC, cls).tearDownClass()
+
+ def setUp(self):
+ super(TC, self).setUp()
+
+ def tearDown(self):
+ super(TC, self).tearDown()
+ self.verify_valgrind_log()
+
+ def test_zstd(self):
+ self.step_name('Test scanning a zstd compressed file')
+
+ testfile = TC.path_source / 'unit_tests' / 'input' / 'other_scanfiles' / 'zstd' / 'testfile.txt.zst'
+ command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfile}'.format(
+ valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
+ path_db=TC.path_source / 'unit_tests' / 'input' / 'other_sigs' / 'zstd.hdb',
+ testfile=testfile,
+ )
+ output = self.execute_command(command)
+
+ assert output.ec == 1 # virus
+
+ expected_results = [
+ 'ZSTD_TEST_FILE.UNOFFICIAL FOUND',
+ ]
+ self.verify_output(output.out, expected=expected_results)
+
+ def test_zstd_concatenated(self):
+ self.step_name('Test scanning a zstd file with concatenated frames')
+
+ testfile = TC.path_source / 'unit_tests' / 'input' / 'other_scanfiles' / 'zstd' / 'concat.txt.zst'
+ command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfile}'.format(
+ valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
+ path_db=TC.path_source / 'unit_tests' / 'input' / 'other_sigs' / 'zstd.hdb',
+ testfile=testfile,
+ )
+ output = self.execute_command(command)
+
+ assert output.ec == 1 # virus
+
+ expected_results = [
+ 'ZSTD_TEST_FILE.UNOFFICIAL FOUND',
+ ]
+ self.verify_output(output.out, expected=expected_results)
+
+ def test_zstd_skippable_prefix(self):
+ self.step_name('Test scanning a zstd file with a skippable frame prefix')
+
+ testfile = TC.path_source / 'unit_tests' / 'input' / 'other_scanfiles' / 'zstd' / 'skip_prefix.txt.zst'
+ command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfile}'.format(
+ valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
+ path_db=TC.path_source / 'unit_tests' / 'input' / 'other_sigs' / 'zstd.hdb',
+ testfile=testfile,
+ )
+ output = self.execute_command(command)
+
+ assert output.ec == 1 # virus
+
+ expected_results = [
+ 'ZSTD_TEST_FILE.UNOFFICIAL FOUND',
+ ]
+ self.verify_output(output.out, expected=expected_results)
diff --git a/unit_tests/input/other_scanfiles/zstd/concat.txt.zst b/unit_tests/input/other_scanfiles/zstd/concat.txt.zst
new file mode 100644
index 000000000..f8b6871f2
Binary files /dev/null and b/unit_tests/input/other_scanfiles/zstd/concat.txt.zst differ
diff --git a/unit_tests/input/other_scanfiles/zstd/skip_prefix.txt.zst b/unit_tests/input/other_scanfiles/zstd/skip_prefix.txt.zst
new file mode 100644
index 000000000..aa2016146
Binary files /dev/null and b/unit_tests/input/other_scanfiles/zstd/skip_prefix.txt.zst differ
diff --git a/unit_tests/input/other_scanfiles/zstd/testfile.txt.zst b/unit_tests/input/other_scanfiles/zstd/testfile.txt.zst
new file mode 100644
index 000000000..0cbf31e51
Binary files /dev/null and b/unit_tests/input/other_scanfiles/zstd/testfile.txt.zst differ
diff --git a/unit_tests/input/other_sigs/zstd.hdb b/unit_tests/input/other_sigs/zstd.hdb
new file mode 100644
index 000000000..e8e90c435
--- /dev/null
+++ b/unit_tests/input/other_sigs/zstd.hdb
@@ -0,0 +1,2 @@
+1735a5732d057a5b57421ccd0a88e7049c342579:28:ZSTD_TEST_FILE
+cf327fba15169e4594083e3a03283ec6c52e835c:54:ZSTD_TEST_FILE