Commit 6c0578d31c3 for php.net

commit 6c0578d31c341b505983fc5b63cab04c540fa86a
Author: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date:   Mon Apr 21 13:32:38 2025 +0200

    Improve performance of urldecode() and rawurldecode()

    There are two hot spots on my machines:
    1. We copy the string because the internal PHP API works in-place.
    2. The conversion of hex characters is slow due to going through the C
       locale handling.

    This patch resolves the first hot spots by introducing 2 new internal
    APIs that avoid the redundant copy and allocate an empty string upfront.
    The second hotspot is resolved by having a specialised htoi handler.

    For the following benchmark:
    ```php
    $encoded = "Hello%20World%21+This%20is%20a%20test%3A%20%40%23%24%25%5E%26*%28%29";
    for ($i=0;$i<2000000;$i++) {
      rawurldecode($encoded);
      urldecode($encoded);
    }
    ```

    On an i7-4790:
    ```
    Benchmark 1: ./sapi/cli/php x.php
      Time (mean ± σ):     364.8 ms ±   3.7 ms    [User: 359.9 ms, System: 3.3 ms]
      Range (min … max):   359.9 ms … 372.0 ms    10 runs

    Benchmark 2: ./sapi/cli/php_old x.php
      Time (mean ± σ):     565.5 ms ±   4.9 ms    [User: 561.8 ms, System: 2.5 ms]
      Range (min … max):   560.7 ms … 578.2 ms    10 runs

    Summary
      ./sapi/cli/php x.php ran
        1.55 ± 0.02 times faster than ./sapi/cli/php_old x.php
    ```

    On an i7-1185G7:
    ```
    Benchmark 1: ./sapi/cli/php x.php
      Time (mean ± σ):     708.8 ms ±   6.1 ms    [User: 701.4 ms, System: 6.3 ms]
      Range (min … max):   701.9 ms … 722.3 ms    10 runs

    Benchmark 2: ./sapi/cli/php_old x.php
      Time (mean ± σ):      1.311 s ±  0.019 s    [User: 1.300 s, System: 0.008 s]
      Range (min … max):    1.281 s …  1.348 s    10 runs

    Summary
      ./sapi/cli/php x.php ran
        1.85 ± 0.03 times faster than ./sapi/cli/php_old x.php
    ```

    Closes GH-18378.

diff --git a/UPGRADING b/UPGRADING
index 40645a28ac6..68f7d20a7f3 100644
--- a/UPGRADING
+++ b/UPGRADING
@@ -476,6 +476,7 @@ PHP 8.5 UPGRADE NOTES
 - Standard:
   . Improved performance of array functions with callbacks
     (array_find, array_filter, array_map, usort, ...).
+  . Improved performance of urlencode() and rawurlencode().

 - XMLReader:
   . Improved property access performance.
diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS
index 56c75351584..7c7f093e50c 100644
--- a/UPGRADING.INTERNALS
+++ b/UPGRADING.INTERNALS
@@ -61,6 +61,10 @@ PHP 8.5 INTERNALS UPGRADE NOTES
     is still valid. This is useful when a GC cycle is collected and the
     database object can be destroyed prior to destroying the statement.

+- ext/standard
+  . Added php_url_decode_ex() and php_raw_url_decode_ex() that unlike their
+    non-ex counterparts do not work in-place.
+
 ========================
 4. OpCode changes
 ========================
diff --git a/ext/standard/url.c b/ext/standard/url.c
index 3d704b0140c..da2ddea0673 100644
--- a/ext/standard/url.c
+++ b/ext/standard/url.c
@@ -411,21 +411,24 @@ PHP_FUNCTION(parse_url)
 }
 /* }}} */

+/* https://stackoverflow.com/questions/34365746/whats-the-fastest-way-to-convert-hex-to-integer-in-c */
+static unsigned int php_htoi_single(unsigned char x)
+{
+	ZEND_ASSERT((x >= 'a' && x <= 'f') || (x >= 'A' && x <= 'F') || (x >= '0' && x <= '9'));
+	return 9 * (x >> 6) + (x & 0xf);
+}
+
 /* {{{ php_htoi */
-static int php_htoi(char *s)
+static int php_htoi(const char *s)
 {
 	int value;
-	int c;
+	unsigned char c;

 	c = ((unsigned char *)s)[0];
-	if (isupper(c))
-		c = tolower(c);
-	value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16;
+	value = php_htoi_single(c) * 16;

 	c = ((unsigned char *)s)[1];
-	if (isupper(c))
-		c = tolower(c);
-	value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10;
+	value += php_htoi_single(c);

 	return (value);
 }
@@ -572,28 +575,27 @@ PHP_FUNCTION(urldecode)
 		Z_PARAM_STR(in_str)
 	ZEND_PARSE_PARAMETERS_END();

-	out_str = zend_string_init(ZSTR_VAL(in_str), ZSTR_LEN(in_str), 0);
-	ZSTR_LEN(out_str) = php_url_decode(ZSTR_VAL(out_str), ZSTR_LEN(out_str));
+	out_str = zend_string_alloc(ZSTR_LEN(in_str), false);
+	ZSTR_LEN(out_str) = php_url_decode_ex(ZSTR_VAL(out_str), ZSTR_VAL(in_str), ZSTR_LEN(in_str));

 	RETURN_NEW_STR(out_str);
 }
 /* }}} */

-/* {{{ php_url_decode */
-PHPAPI size_t php_url_decode(char *str, size_t len)
+PHPAPI size_t php_url_decode_ex(char *dest, const char *src, size_t src_len)
 {
-	char *dest = str;
-	char *data = str;
+	char *dest_start = dest;
+	const char *data = src;

-	while (len--) {
+	while (src_len--) {
 		if (*data == '+') {
 			*dest = ' ';
 		}
-		else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
+		else if (*data == '%' && src_len >= 2 && isxdigit((int) *(data + 1))
 				 && isxdigit((int) *(data + 2))) {
 			*dest = (char) php_htoi(data + 1);
 			data += 2;
-			len -= 2;
+			src_len -= 2;
 		} else {
 			*dest = *data;
 		}
@@ -601,7 +603,13 @@ PHPAPI size_t php_url_decode(char *str, size_t len)
 		dest++;
 	}
 	*dest = '\0';
-	return dest - str;
+	return dest - dest_start;
+}
+
+/* {{{ php_url_decode */
+PHPAPI size_t php_url_decode(char *str, size_t len)
+{
+	return php_url_decode_ex(str, str, len);
 }
 /* }}} */

@@ -634,25 +642,24 @@ PHP_FUNCTION(rawurldecode)
 		Z_PARAM_STR(in_str)
 	ZEND_PARSE_PARAMETERS_END();

-	out_str = zend_string_init(ZSTR_VAL(in_str), ZSTR_LEN(in_str), 0);
-	ZSTR_LEN(out_str) = php_raw_url_decode(ZSTR_VAL(out_str), ZSTR_LEN(out_str));
+	out_str = zend_string_alloc(ZSTR_LEN(in_str), false);
+	ZSTR_LEN(out_str) = php_raw_url_decode_ex(ZSTR_VAL(out_str), ZSTR_VAL(in_str), ZSTR_LEN(in_str));

 	RETURN_NEW_STR(out_str);
 }
 /* }}} */

-/* {{{ php_raw_url_decode */
-PHPAPI size_t php_raw_url_decode(char *str, size_t len)
+PHPAPI size_t php_raw_url_decode_ex(char *dest, const char *src, size_t src_len)
 {
-	char *dest = str;
-	char *data = str;
+	char *dest_start = dest;
+	const char *data = src;

-	while (len--) {
-		if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
+	while (src_len--) {
+		if (*data == '%' && src_len >= 2 && isxdigit((int) *(data + 1))
 			&& isxdigit((int) *(data + 2))) {
 			*dest = (char) php_htoi(data + 1);
 			data += 2;
-			len -= 2;
+			src_len -= 2;
 		} else {
 			*dest = *data;
 		}
@@ -660,7 +667,13 @@ PHPAPI size_t php_raw_url_decode(char *str, size_t len)
 		dest++;
 	}
 	*dest = '\0';
-	return dest - str;
+	return dest - dest_start;
+}
+
+/* {{{ php_raw_url_decode */
+PHPAPI size_t php_raw_url_decode(char *str, size_t len)
+{
+	return php_raw_url_decode_ex(str, str, len);
 }
 /* }}} */

diff --git a/ext/standard/url.h b/ext/standard/url.h
index 4126ee6c6db..5c531c0086a 100644
--- a/ext/standard/url.h
+++ b/ext/standard/url.h
@@ -33,7 +33,9 @@ PHPAPI php_url *php_url_parse(char const *str);
 PHPAPI php_url *php_url_parse_ex(char const *str, size_t length);
 PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port);
 PHPAPI size_t php_url_decode(char *str, size_t len); /* return value: length of decoded string */
+PHPAPI size_t php_url_decode_ex(char *dest, const char *src, size_t src_len);
 PHPAPI size_t php_raw_url_decode(char *str, size_t len); /* return value: length of decoded string */
+PHPAPI size_t php_raw_url_decode_ex(char *dest, const char *src, size_t src_len);
 PHPAPI zend_string *php_url_encode(char const *s, size_t len);
 PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len);