Commit 8fc8e060a for clamav.net
commit 8fc8e060af3ed18a0657e84be263452aed34ce2f
Author: Val S. <valsnyde@cisco.com>
Date: Wed Mar 4 16:31:13 2026 -0500
Fix possible panic when scanning some HTML files (#1682)
It is possible that invalid UTF-8 characters may trigger a Rust panic
(crash) when parsing CSS style blocks to extract images.
The issue is using `split_at()` instead of `split_at_checked()`.
I also found a few places where I could use string trim methods rather
than doing that logic manually.
Thank you to Krishnap7p for reporting this issue.
CLAM-2819
CLAM-2828
diff --git a/libclamav_rust/src/css_image_extract.rs b/libclamav_rust/src/css_image_extract.rs
index e49614214..d480e0e16 100644
--- a/libclamav_rust/src/css_image_extract.rs
+++ b/libclamav_rust/src/css_image_extract.rs
@@ -66,10 +66,15 @@ impl<'a> CssImageExtractor<'a> {
'outer: loop {
// Find occurrence of "url" with
if let Some(pos) = self.remaining.find("url") {
- (_, self.remaining) = self.remaining.split_at(pos + "url".len());
- // Found 'url'.
+ // Found 'url'. Get the stuff after it.
+ if let Some((_, rest)) = self.remaining.split_at_checked(pos + "url".len()) {
+ self.remaining = rest;
+ } else {
+ // Split failed.
+ self.remaining = "";
+ return None;
+ }
} else {
- // No occurrence of "url"
// No more 'url's.
self.remaining = "";
return None;
@@ -78,8 +83,14 @@ impl<'a> CssImageExtractor<'a> {
// Skip whitespace until we find '('
for (pos, c) in self.remaining.grapheme_indices(true) {
if c == "(" {
- // Found left-paren.
- (_, self.remaining) = self.remaining.split_at(pos + 1);
+ // Found left-paren. Get the stuff after it.
+ self.remaining =
+ if let Some((_, rest)) = self.remaining.split_at_checked(pos + 1) {
+ rest
+ } else {
+ // Split failed. Was not whitespace or '(', and maybe not a legal char.
+ continue 'outer;
+ };
break;
} else if c.contains(char::is_whitespace) {
// Skipping whitespace.
@@ -103,7 +114,13 @@ impl<'a> CssImageExtractor<'a> {
depth -= 1;
} else {
// Found right-paren.
- let (contents, remaining) = self.remaining.split_at(pos);
+ let (contents, remaining) =
+ if let Some((c, r)) = self.remaining.split_at_checked(pos) {
+ (c, r)
+ } else {
+ // Split failed.
+ continue;
+ };
url_parameter = Some(contents);
// Set the remaining bytes so we can look for more later.
@@ -123,65 +140,14 @@ impl<'a> CssImageExtractor<'a> {
// Strip optional whitespace and quotes from front and back.
- // Trim off whitespace at beginning
- for (pos, c) in url_parameter.grapheme_indices(true) {
- if c.contains(char::is_whitespace) {
- // Skipping whitespace before url contents.
- continue;
- } else {
- (_, url_parameter) = url_parameter.split_at(pos);
- break;
- }
- }
+ // Trim off whitespace at beginning and end.
+ url_parameter = url_parameter.trim();
- // Trim off whitespace at end
- for (pos, c) in url_parameter.graphemes(true).rev().enumerate() {
- if c.contains(char::is_whitespace) {
- // Skipping whitespace after url contents.
- continue;
- } else {
- (url_parameter, _) = url_parameter.split_at(url_parameter.len() - pos);
- break;
- }
- }
-
- // Trim off " at beginning.
- let c = url_parameter.graphemes(true).next();
- if let Some(c) = c {
- if c == "\"" {
- (_, url_parameter) = url_parameter.split_at(1);
- }
- };
-
- // Trim off " at end.
- let c = url_parameter.graphemes(true).next_back();
- if let Some(c) = c {
- if c == "\"" {
- (url_parameter, _) = url_parameter.split_at(url_parameter.len() - 1);
- }
- };
-
- // Trim off whitespace at beginning.
- for (pos, c) in url_parameter.grapheme_indices(true) {
- if c.contains(char::is_whitespace) {
- // Skipping whitespace before url contents.
- continue;
- } else {
- (_, url_parameter) = url_parameter.split_at(pos);
- break;
- }
- }
+ // Trim off " at beginning and end.
+ url_parameter = url_parameter.trim_matches('"');
- // Trim off whitespace at end.
- for (pos, c) in url_parameter.graphemes(true).rev().enumerate() {
- if c.contains(char::is_whitespace) {
- // Skipping whitespace after url contents.
- continue;
- } else {
- (url_parameter, _) = url_parameter.split_at(url_parameter.len() - pos);
- break;
- }
- }
+ // Trim off more whitespace at beginning and end which had been inside the quotes.
+ url_parameter = url_parameter.trim();
// Check for embedded image data for the "url"
if !url_parameter.starts_with("data:") {
@@ -190,7 +156,12 @@ impl<'a> CssImageExtractor<'a> {
}
// Found "data:"
- (_, url_parameter) = url_parameter.split_at("data:".len());
+ if let Some((_, rest)) = url_parameter.split_at_checked("data:".len()) {
+ url_parameter = rest;
+ } else {
+ // Split failed. Let's move along.
+ continue;
+ }
// The exact image type doesn't matter at all to a browser.
// They really don't care if it's "image/gif" or "blah blah blah".
@@ -198,8 +169,13 @@ impl<'a> CssImageExtractor<'a> {
// Find contents after ";"
if let Some(pos) = url_parameter.find(';') {
- (_, url_parameter) = url_parameter.split_at(pos + ";".len());
- // Found ";"
+ // Found ";". Get the stuff after it.
+ if let Some((_, rest)) = url_parameter.split_at_checked(pos + ";".len()) {
+ url_parameter = rest;
+ } else {
+ // Split failed. Something in this data appears to be malformed. Let's move along.
+ continue 'outer;
+ }
} else {
// No occurrence of ";" in the url() parameter.
continue 'outer;
@@ -209,7 +185,12 @@ impl<'a> CssImageExtractor<'a> {
for (pos, c) in url_parameter.grapheme_indices(true) {
if c == "b" {
// Found 'b'.
- (_, url_parameter) = url_parameter.split_at(pos + 1);
+ if let Some((_, rest)) = url_parameter.split_at_checked(pos + 1) {
+ url_parameter = rest;
+ } else {
+ // Split failed. Was not whitespace or 'b', and maybe not a legal char.
+ continue 'outer;
+ }
break;
} else if c.contains(char::is_whitespace) {
// Skipping whitespace.
@@ -227,13 +208,23 @@ impl<'a> CssImageExtractor<'a> {
}
// Found "base64"
- (_, url_parameter) = url_parameter.split_at("ase64".len());
+ if let Some((_, rest)) = url_parameter.split_at_checked("ase64".len()) {
+ url_parameter = rest;
+ } else {
+ // Split failed. Something in this data appears to be malformed. Let's move along.
+ continue 'outer;
+ }
// Skip whitespace until we find ','
for (pos, c) in url_parameter.grapheme_indices(true) {
if c == "," {
- // Found ','.
- (_, url_parameter) = url_parameter.split_at(pos + 1);
+ // Found ','. Get the stuff after it.
+ if let Some((_, rest)) = url_parameter.split_at_checked(pos + 1) {
+ url_parameter = rest;
+ } else {
+ // Split failed. Was not whitespace or ',', and maybe not a legal char.
+ continue 'outer;
+ }
break;
} else if c.contains(char::is_whitespace) {
// Skipping whitespace.
@@ -245,15 +236,7 @@ impl<'a> CssImageExtractor<'a> {
}
// Trim off whitespace at beginning.
- for (pos, c) in url_parameter.grapheme_indices(true) {
- if c.contains(char::is_whitespace) {
- // Skipping whitespace before url contents.
- continue;
- } else {
- (_, url_parameter) = url_parameter.split_at(pos);
- break;
- }
- }
+ url_parameter = url_parameter.trim_start();
debug!("Found base64'd image data CSS url() function args.");
return Some(url_parameter);