Commit 3cb7d1bd8ac for php.net

commit 3cb7d1bd8ac352d7c97237e509da3b4c1db1aed8
Author: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date:   Thu May 29 21:53:36 2025 +0200

    Remove custom UTF-8 check function from ext/libxml

    This was originally introduced as a workaround for a libxml2 bug [1].
    This bug has been fixed for more than a decade [2], and we can use the
    libxml2 API again. We bumped our version requirement for libxml2 beyond
    that in 7.4 [3].

    [1] 7e53511ec810c2ab257b9cb68c1fc315e057a37f
    [2] https://github.com/GNOME/libxml2/commit/3ffe90ea1cbd4fd8eb2eb162d7940a445f1455d6
    [3] https://github.com/php/php-src/commit/74235ca5f3fae3325175cf0e68cd7e28c86e1378

    Closes GH-18706.

diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS
index 2af2d4ce1c5..82bd53db518 100644
--- a/UPGRADING.INTERNALS
+++ b/UPGRADING.INTERNALS
@@ -62,6 +62,7 @@ PHP 8.5 INTERNALS UPGRADE NOTES

 - ext/libxml
   . The refcount APIs now return an `unsigned int` instead of an `int`.
+  . Removed php_libxml_xmlCheckUTF8(). Use xmlCheckUTF8() from libxml instead.

 - ext/pdo
   . Added `php_pdo_stmt_valid_db_obj_handle()` to check if the database object
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index 5d5f3f383cd..03a89c7aad5 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -1236,32 +1236,6 @@ PHP_FUNCTION(libxml_get_external_entity_loader)
 /* }}} */

 /* {{{ Common functions shared by extensions */
-bool php_libxml_xmlCheckUTF8(const unsigned char *s)
-{
-	size_t i;
-	unsigned char c;
-
-	for (i = 0; (c = s[i++]);) {
-		if ((c & 0x80) == 0) {
-		} else if ((c & 0xe0) == 0xc0) {
-			if ((s[i++] & 0xc0) != 0x80) {
-				return false;
-			}
-		} else if ((c & 0xf0) == 0xe0) {
-			if ((s[i++] & 0xc0) != 0x80 || (s[i++] & 0xc0) != 0x80) {
-				return false;
-			}
-		} else if ((c & 0xf8) == 0xf0) {
-			if ((s[i++] & 0xc0) != 0x80 || (s[i++] & 0xc0) != 0x80 || (s[i++] & 0xc0) != 0x80) {
-				return false;
-			}
-		} else {
-			return false;
-		}
-	}
-	return true;
-}
-
 zval *php_libxml_register_export(const zend_class_entry *ce, php_libxml_export_node export_function)
 {
 	/* Initialize in case this module hasn't been loaded yet */
diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h
index ea7961dc2f1..eb10e7e4cb9 100644
--- a/ext/libxml/php_libxml.h
+++ b/ext/libxml/php_libxml.h
@@ -207,7 +207,6 @@ PHP_LIBXML_API void php_libxml_ctx_warning(void *ctx, const char *msg, ...);
 PHP_LIBXML_API void php_libxml_pretend_ctx_error_ex(const char *file, int line, int column, const char *msg,...);
 PHP_LIBXML_API void php_libxml_ctx_error(void *ctx, const char *msg, ...);
 PHP_LIBXML_API void php_libxml_error_handler_va(php_libxml_error_level error_type, void *ctx, const char *msg, va_list args);
-PHP_LIBXML_API bool php_libxml_xmlCheckUTF8(const unsigned char *s);
 PHP_LIBXML_API void php_libxml_switch_context(const zval *context, zval *oldcontext);
 PHP_LIBXML_API void php_libxml_issue_error(int level, const char *msg);
 PHP_LIBXML_API bool php_libxml_disable_entity_loader(bool disable);
diff --git a/ext/soap/php_encoding.c b/ext/soap/php_encoding.c
index e3bd3029388..8423f50fd4b 100644
--- a/ext/soap/php_encoding.c
+++ b/ext/soap/php_encoding.c
@@ -878,7 +878,7 @@ static xmlNodePtr to_xml_string(encodeTypePtr type, zval *data, int style, xmlNo
 		xmlBufferFree(in);
 	}

-	if (!php_libxml_xmlCheckUTF8(BAD_CAST(str))) {
+	if (!xmlCheckUTF8(BAD_CAST str)) {
 		char *err = emalloc(new_len + 8);
 		char c;
 		int i;