Commit 98563c2d8e4 for php.net
commit 98563c2d8e40dc71848fb2d090702bfc3d25aeb1
Author: Ilia Alshanetsky <ilia@ilia.ws>
Date: Tue Jun 16 15:33:43 2026 -0400
Fix MIME charset sniffing advancing by name length not value length
php_libxml_sniff_charset_from_string() advanced the parse cursor by the
parameter name length after collecting an unquoted parameter value
(WHATWG mime-sniff step 11.9.1), instead of the value length. When a
Content-Type parameter before charset had a name and value of different
lengths, the cursor misaligned and the charset parameter was missed, so
document loading fell back to the wrong encoding.
Closes GH-22343
diff --git a/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_http_header.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_http_header.phpt
index 5c602b87f23..5164ac68041 100644
--- a/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_http_header.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_http_header.phpt
@@ -46,6 +46,7 @@
"text/html; ;; ; ;; Charset=\"ISO-8859-1\"",
"text/html;Charset=\"ISO-8859-1",
"tex.t/h#\$%!&'*%2B-.^_`|~tml;Charset=\"ISO-8859-1\"", // Note: have to encode + as 2B because of implementation details of http_server()
+ "text/html; abcd=ef;charset=ISO-8859-1",
],
"Valid input, but invalid encoding name" => [
"text/html;Charset=\"ISO-8859-1\\",
@@ -100,6 +101,7 @@
äöü
äöü
äöü
+äöü
--- Valid input, but invalid encoding name ---
���
���
diff --git a/ext/libxml/mime_sniff.c b/ext/libxml/mime_sniff.c
index 0ca032f9b79..2840c69701f 100644
--- a/ext/libxml/mime_sniff.c
+++ b/ext/libxml/mime_sniff.c
@@ -273,7 +273,7 @@ PHP_LIBXML_API zend_string *php_libxml_sniff_charset_from_string(const char *sta
/* 11.9.1. Set parameterValue to the result of collecting a sequence of code points that are not ';' */
size_t parameter_value_length = collect_a_sequence_of_code_points(start, end, is_not_semicolon);
parameter_value = zend_string_init(start, parameter_value_length, false);
- start += parameter_name_length;
+ start += parameter_value_length;
/* 11.9.2. Remove trailing HTTP whitespace from parameterValue */
while (ZSTR_LEN(parameter_value) > 0 && is_http_whitespace(ZSTR_VAL(parameter_value)[ZSTR_LEN(parameter_value) - 1])) {