Commit 37c5a13d677 for php.net
commit 37c5a13d67798909e18e03c81b3341950f04a409
Author: Jordi Kroon <jkroon@onyourmarks.agency>
Date: Sun Feb 15 23:55:43 2026 +0100
replace alloca with do_alloca in mb_guess_encoding_for_strings
This avoids a crash in cases where the list of candidate encodings is so huge
that alloca would fail. Such crashes have been observed when the list of
encodings was larger than around 208,000 entries.
diff --git a/NEWS b/NEWS
index 4f965513037..ee8e49f7320 100644
--- a/NEWS
+++ b/NEWS
@@ -27,6 +27,10 @@ PHP NEWS
. Fixed bug GH-21097 (Accessing Dom\Node properties can can throw TypeError).
(ndossche)
+- MBString:
+ . Fixed bug GH-21223; mb_guess_encoding no longer crashes when passed huge
+ list of candidate encodings (with 200,000+ entries). (Jordi Kroon)
+
- Opcache:
. Fixed bug GH-20718 ("Insufficient shared memory" when using JIT on Solaris).
(Petr Sumbera)
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index b320a6a5f0e..9833a37cdb7 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -3376,8 +3376,9 @@ MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned c
return *elist;
}
- /* Allocate on stack; when we return, this array is automatically freed */
- struct candidate *array = alloca(elist_size * sizeof(struct candidate));
+ /* Allocate on stack or heap */
+ ALLOCA_FLAG(use_heap)
+ struct candidate *array = do_alloca(elist_size * sizeof(struct candidate), use_heap);
elist_size = init_candidate_array(array, elist_size, elist, strings, str_lengths, n, strict, order_significant);
while (n--) {
@@ -3385,6 +3386,7 @@ MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned c
elist_size = count_demerits(array, elist_size, strict);
if (elist_size == 0) {
/* All candidates were eliminated */
+ free_alloca(array, use_heap);
return NULL;
}
}
@@ -3396,7 +3398,10 @@ MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned c
best = i;
}
}
- return array[best].enc;
+
+ const mbfl_encoding *result = array[best].enc;
+ free_alloca(array, use_heap);
+ return result;
}
/* When doing 'strict' detection, any string which is invalid in the candidate encoding
diff --git a/ext/mbstring/tests/gh21223.phpt b/ext/mbstring/tests/gh21223.phpt
new file mode 100644
index 00000000000..7138868af16
--- /dev/null
+++ b/ext/mbstring/tests/gh21223.phpt
@@ -0,0 +1,19 @@
+--TEST--
+GH-21223 (Stack overflow in mb_guess_encoding called via mb_detect_encoding)
+--EXTENSIONS--
+mbstring
+--FILE--
+<?php
+$str = "hello";
+
+$list = [];
+for ($i = 0; $i < 500000; $i++) {
+ $list[] = "UTF-8";
+}
+
+var_dump(mb_detect_encoding($str, $list, false));
+echo "Done";
+?>
+--EXPECT--
+string(5) "UTF-8"
+Done