Commit 66376389feb for php.net
commit 66376389feb55a1fc3afd38e78af7f67fc0ab374
Author: Máté Kocsis <kocsismate@woohoolabs.com>
Date: Tue Jul 1 21:53:33 2025 +0200
Update uriparser to commit 5f7c6d88c50f548d0c7f499c22d36f51d34775b3
While there, fix Windows build by adding UriResolve.c to the sources.
diff --git a/ext/uri/config.m4 b/ext/uri/config.m4
index 08dc044d8d2..3631ad3c5c0 100644
--- a/ext/uri/config.m4
+++ b/ext/uri/config.m4
@@ -11,8 +11,8 @@ AC_DEFINE([URI_ENABLE_ANSI], [1], [Define to 1 for enabling ANSI support of urip
AC_DEFINE([URI_NO_UNICODE], [1], [Define to 1 for disabling unicode support of uriparser.])
URIPARSER_DIR="uriparser"
-URIPARSER_SOURCES="$URIPARSER_DIR/src/UriCommon.c $URIPARSER_DIR/src/UriCompare.c $URIPARSER_DIR/src/UriEscape.c \
-$URIPARSER_DIR/src/UriFile.c $URIPARSER_DIR/src/UriIp4.c $URIPARSER_DIR/src/UriIp4Base.c \
+URIPARSER_SOURCES="$URIPARSER_DIR/src/UriCommon.c $URIPARSER_DIR/src/UriCompare.c $URIPARSER_DIR/src/UriCopy.c \
+$URIPARSER_DIR/src/UriEscape.c $URIPARSER_DIR/src/UriFile.c $URIPARSER_DIR/src/UriIp4.c $URIPARSER_DIR/src/UriIp4Base.c \
$URIPARSER_DIR/src/UriMemory.c $URIPARSER_DIR/src/UriNormalize.c $URIPARSER_DIR/src/UriNormalizeBase.c \
$URIPARSER_DIR/src/UriParse.c $URIPARSER_DIR/src/UriParseBase.c $URIPARSER_DIR/src/UriQuery.c \
$URIPARSER_DIR/src/UriRecompose.c $URIPARSER_DIR/src/UriResolve.c $URIPARSER_DIR/src/UriShorten.c"
diff --git a/ext/uri/config.w32 b/ext/uri/config.w32
index 9c6af0cc5fa..8086b4b9bfc 100644
--- a/ext/uri/config.w32
+++ b/ext/uri/config.w32
@@ -5,5 +5,5 @@ AC_DEFINE("URI_NO_UNICODE", 1, "Define to 1 for disabling unicode support of uri
ADD_FLAG("CFLAGS_URI", "/D URI_STATIC_BUILD");
ADD_EXTENSION_DEP('uri', 'lexbor');
-ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriParseBase.c UriQuery.c UriRecompose.c UriShorten.c", "uri");
+ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriCopy.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriParseBase.c UriQuery.c UriRecompose.c UriResolve.c UriShorten.c", "uri");
PHP_INSTALL_HEADERS("ext/uri", "php_lexbor.h php_uri.h php_uri_common.h uriparser/src uriparser/include");
diff --git a/ext/uri/uriparser/include/uriparser/Uri.h b/ext/uri/uriparser/include/uriparser/Uri.h
index 44bc5acc54b..f0f2ad9a34b 100644
--- a/ext/uri/uriparser/include/uriparser/Uri.h
+++ b/ext/uri/uriparser/include/uriparser/Uri.h
@@ -201,6 +201,17 @@ typedef struct URI_TYPE(QueryListStruct) {
} URI_TYPE(QueryList); /**< @copydoc UriQueryListStructA */
+/**
+ * Checks if a URI has the host component set.
+ *
+ * @param uri <b>IN</b>: %URI to check
+ * @return <c>URI_TRUE</c> when host is set, <c>URI_FALSE</c> otherwise
+ *
+ * @since 0.9.9
+ */
+URI_PUBLIC UriBool URI_FUNC(HasHost)(const URI_TYPE(Uri) * uri);
+
+
/**
* Parses a RFC 3986 %URI.
@@ -644,6 +655,36 @@ URI_PUBLIC int URI_FUNC(ToString)(URI_CHAR * dest, const URI_TYPE(Uri) * uri,
+/**
+ * Copies a %URI structure.
+ *
+ * @param destUri <b>OUT</b>: Output destination
+ * @param sourceUri <b>IN</b>: %URI to copy
+ * @param memory <b>IN</b>: Memory manager to use, NULL for default libc
+ * @return Error code or 0 on success
+ *
+ * @see uriCopyUriA
+ * @since 0.9.9
+ */
+URI_PUBLIC int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
+ const URI_TYPE(Uri) * sourceUri, UriMemoryManager * memory);
+
+
+
+/**
+ * Copies a %URI structure.
+ *
+ * @param destUri <b>OUT</b>: Output destination
+ * @param sourceUri <b>IN</b>: %URI to copy
+ * @return Error code or 0 on success
+ *
+ * @see uriCopyUriMmA
+ * @since 0.9.9
+ */
+URI_PUBLIC int URI_FUNC(CopyUri)(URI_TYPE(Uri) * destUri, const URI_TYPE(Uri) * sourceUri);
+
+
+
/**
* Determines the components of a %URI that are not normalized.
*
diff --git a/ext/uri/uriparser/include/uriparser/UriBase.h b/ext/uri/uriparser/include/uriparser/UriBase.h
index dc3883e6516..46c02135bb1 100644
--- a/ext/uri/uriparser/include/uriparser/UriBase.h
+++ b/ext/uri/uriparser/include/uriparser/UriBase.h
@@ -258,7 +258,8 @@ typedef enum UriNormalizationMaskEnum {
URI_NORMALIZE_HOST = 1 << 2, /**< Normalize host (fix uppercase letters) */
URI_NORMALIZE_PATH = 1 << 3, /**< Normalize path (fix uppercase percent-encodings and redundant dot segments) */
URI_NORMALIZE_QUERY = 1 << 4, /**< Normalize query (fix uppercase percent-encodings) */
- URI_NORMALIZE_FRAGMENT = 1 << 5 /**< Normalize fragment (fix uppercase percent-encodings) */
+ URI_NORMALIZE_FRAGMENT = 1 << 5, /**< Normalize fragment (fix uppercase percent-encodings) */
+ URI_NORMALIZE_PORT = 1 << 6 /**< Normalize port (drop leading zeros) @since 0.9.9 */
} UriNormalizationMask; /**< @copydoc UriNormalizationMaskEnum */
diff --git a/ext/uri/uriparser/src/UriCommon.c b/ext/uri/uriparser/src/UriCommon.c
index 88e2767d71c..ccec5d4d5c8 100644
--- a/ext/uri/uriparser/src/UriCommon.c
+++ b/ext/uri/uriparser/src/UriCommon.c
@@ -119,6 +119,40 @@ int URI_FUNC(CompareRange)(
+UriBool URI_FUNC(CopyRange)(URI_TYPE(TextRange) * destRange,
+ const URI_TYPE(TextRange) * sourceRange, UriMemoryManager * memory) {
+ const int lenInChars = (int)(sourceRange->afterLast - sourceRange->first);
+ const int lenInBytes = lenInChars * sizeof(URI_CHAR);
+ URI_CHAR * dup = memory->malloc(memory, lenInBytes);
+ if (dup == NULL) {
+ return URI_FALSE;
+ }
+ memcpy(dup, sourceRange->first, lenInBytes);
+ destRange->first = dup;
+ destRange->afterLast = dup + lenInChars;
+
+ return URI_TRUE;
+}
+
+
+
+UriBool URI_FUNC(CopyRangeAsNeeded)(URI_TYPE(TextRange) * destRange,
+ const URI_TYPE(TextRange) * sourceRange, UriBool useSafe, UriMemoryManager * memory) {
+ if (sourceRange->first == NULL) {
+ destRange->first = NULL;
+ destRange->afterLast = NULL;
+ } else if (sourceRange->first == sourceRange->afterLast && useSafe) {
+ destRange->first = URI_FUNC(SafeToPointTo);
+ destRange->afterLast = URI_FUNC(SafeToPointTo);
+ } else {
+ return URI_FUNC(CopyRange)(destRange, sourceRange, memory);
+ }
+
+ return URI_TRUE;
+}
+
+
+
UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
UriBool relative, UriBool pathOwned, UriMemoryManager * memory) {
URI_TYPE(PathSegment) * walker;
@@ -189,7 +223,7 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
if (prev == NULL) {
/* Last and first */
- if (URI_FUNC(IsHostSet)(uri)) {
+ if (URI_FUNC(HasHost)(uri)) {
/* Replace "." with empty segment to represent trailing slash */
walker->text.first = URI_FUNC(SafeToPointTo);
walker->text.afterLast = URI_FUNC(SafeToPointTo);
@@ -463,7 +497,7 @@ URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase) {
/* Checks if a URI has the host component set. */
-UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri) {
+UriBool URI_FUNC(HasHost)(const URI_TYPE(Uri) * uri) {
return (uri != NULL)
&& ((uri->hostText.first != NULL)
|| (uri->hostData.ip4 != NULL)
@@ -601,7 +635,7 @@ void URI_FUNC(FixEmptyTrailSegment)(URI_TYPE(Uri) * uri,
UriMemoryManager * memory) {
/* Fix path if only one empty segment */
if (!uri->absolutePath
- && !URI_FUNC(IsHostSet)(uri)
+ && !URI_FUNC(HasHost)(uri)
&& (uri->pathHead != NULL)
&& (uri->pathHead->next == NULL)
&& (uri->pathHead->text.first == uri->pathHead->text.afterLast)) {
diff --git a/ext/uri/uriparser/src/UriCommon.h b/ext/uri/uriparser/src/UriCommon.h
index 42311ddc98b..8dffab9f9f6 100644
--- a/ext/uri/uriparser/src/UriCommon.h
+++ b/ext/uri/uriparser/src/UriCommon.h
@@ -82,6 +82,11 @@ int URI_FUNC(CompareRange)(
const URI_TYPE(TextRange) * a,
const URI_TYPE(TextRange) * b);
+UriBool URI_FUNC(CopyRange)(URI_TYPE(TextRange) * destRange,
+ const URI_TYPE(TextRange) * sourceRange, UriMemoryManager * memory);
+UriBool URI_FUNC(CopyRangeAsNeeded)(URI_TYPE(TextRange) * destRange,
+ const URI_TYPE(TextRange) * sourceRange, UriBool useSafe, UriMemoryManager * memory);
+
UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri,
UriMemoryManager * memory);
UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
@@ -91,8 +96,6 @@ unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig);
URI_CHAR URI_FUNC(HexToLetter)(unsigned int value);
URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase);
-UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri);
-
UriBool URI_FUNC(CopyPath)(URI_TYPE(Uri) * dest, const URI_TYPE(Uri) * source,
UriMemoryManager * memory);
UriBool URI_FUNC(CopyAuthority)(URI_TYPE(Uri) * dest,
diff --git a/ext/uri/uriparser/src/UriCopy.c b/ext/uri/uriparser/src/UriCopy.c
new file mode 100644
index 00000000000..0974ec5c040
--- /dev/null
+++ b/ext/uri/uriparser/src/UriCopy.c
@@ -0,0 +1,234 @@
+/*
+ * uriparser - RFC 3986 URI parsing library
+ *
+ * Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
+ * Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
+ * Copyright (C) 2025, Máté Kocsis <kocsismate@php.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file UriCopy.c
+ * Holds the RFC 3986 %URI normalization implementation.
+ * NOTE: This source file includes itself twice.
+ */
+
+/* What encodings are enabled? */
+#include <uriparser/UriDefsConfig.h>
+#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
+/* Include SELF twice */
+# ifdef URI_ENABLE_ANSI
+# define URI_PASS_ANSI 1
+# include "UriCopy.c"
+# undef URI_PASS_ANSI
+# endif
+# ifdef URI_ENABLE_UNICODE
+# define URI_PASS_UNICODE 1
+# include "UriCopy.c"
+# undef URI_PASS_UNICODE
+# endif
+#else
+# ifdef URI_PASS_ANSI
+# include <uriparser/UriDefsAnsi.h>
+# else
+# include <uriparser/UriDefsUnicode.h>
+# include <wchar.h>
+# endif
+
+
+
+#ifndef URI_DOXYGEN
+# include <uriparser/Uri.h>
+# include "UriCommon.h"
+# include "UriMemory.h"
+# include "UriNormalize.h"
+# include "UriCopy.h"
+#endif
+
+
+
+static void URI_FUNC(PreventLeakageAfterCopy)(URI_TYPE(Uri) * uri,
+ unsigned int revertMask, UriMemoryManager * memory) {
+ URI_FUNC(PreventLeakage)(uri, revertMask, memory);
+
+ if (uri->hostData.ip4 != NULL) {
+ memory->free(memory, uri->hostData.ip4);
+ uri->hostData.ip4 = NULL;
+ } else if (uri->hostData.ip6 != NULL) {
+ memory->free(memory, uri->hostData.ip6);
+ uri->hostData.ip6 = NULL;
+ }
+
+ if (revertMask & URI_NORMALIZE_PORT) {
+ if (uri->portText.first != uri->portText.afterLast) {
+ memory->free(memory, (URI_CHAR *)uri->portText.first);
+ }
+ uri->portText.first = NULL;
+ uri->portText.afterLast = NULL;
+ }
+}
+
+
+
+int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
+ const URI_TYPE(Uri) * sourceUri, UriMemoryManager * memory) {
+ unsigned int doneMask = URI_NORMALIZED;
+
+ if (sourceUri == NULL || destUri == NULL) {
+ return URI_ERROR_NULL;
+ }
+
+ URI_CHECK_MEMORY_MANAGER(memory); /* may return */
+
+ if (URI_FUNC(CopyRangeAsNeeded)(&destUri->scheme, &sourceUri->scheme, URI_FALSE, memory) == URI_FALSE) {
+ return URI_ERROR_MALLOC;
+ }
+
+ doneMask |= URI_NORMALIZE_SCHEME;
+
+ if (URI_FUNC(CopyRangeAsNeeded)(&destUri->userInfo, &sourceUri->userInfo, URI_FALSE, memory) == URI_FALSE) {
+ URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
+ return URI_ERROR_MALLOC;
+ }
+
+ doneMask |= URI_NORMALIZE_USER_INFO;
+
+ if (URI_FUNC(CopyRangeAsNeeded)(&destUri->hostText, &sourceUri->hostText, URI_TRUE, memory) == URI_FALSE) {
+ URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
+ return URI_ERROR_MALLOC;
+ }
+
+ doneMask |= URI_NORMALIZE_HOST;
+
+ if (sourceUri->hostData.ip4 == NULL) {
+ destUri->hostData.ip4 = NULL;
+ } else {
+ destUri->hostData.ip4 = memory->malloc(memory, sizeof(UriIp4));
+ if (destUri->hostData.ip4 == NULL) {
+ URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
+ return URI_ERROR_MALLOC;
+ }
+ *(destUri->hostData.ip4) = *(sourceUri->hostData.ip4);
+ }
+
+ if (sourceUri->hostData.ip6 == NULL) {
+ destUri->hostData.ip6 = NULL;
+ } else {
+ destUri->hostData.ip6 = memory->malloc(memory, sizeof(UriIp6));
+ if (destUri->hostData.ip6 == NULL) {
+ URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
+ return URI_ERROR_MALLOC;
+ }
+ *(destUri->hostData.ip6) = *(sourceUri->hostData.ip6);
+ }
+
+ if (URI_FUNC(CopyRangeAsNeeded)(&destUri->hostData.ipFuture, &sourceUri->hostData.ipFuture, URI_FALSE, memory) == URI_FALSE) {
+ URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
+ return URI_ERROR_MALLOC;
+ }
+
+ if (URI_FUNC(CopyRangeAsNeeded)(&destUri->portText, &sourceUri->portText, URI_FALSE, memory) == URI_FALSE) {
+ URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
+ return URI_ERROR_MALLOC;
+ }
+
+ doneMask |= URI_NORMALIZE_PORT;
+
+ destUri->pathHead = NULL;
+ destUri->pathTail = NULL;
+
+ if (sourceUri->pathHead != NULL) {
+ URI_TYPE(PathSegment) * sourceWalker = sourceUri->pathHead;
+ URI_TYPE(PathSegment) * destPrev = NULL;
+
+ while (sourceWalker != NULL) {
+ URI_TYPE(PathSegment) * destWalker = memory->malloc(memory, sizeof(URI_TYPE(PathSegment)));
+ if (destWalker == NULL) {
+ URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
+ return URI_ERROR_MALLOC;
+ }
+
+ destWalker->text.first = NULL;
+ destWalker->text.afterLast = NULL;
+ destWalker->next = NULL;
+ destWalker->reserved = NULL;
+
+ if (destUri->pathHead == NULL) {
+ destUri->pathHead = destWalker;
+ doneMask |= URI_NORMALIZE_PATH;
+ }
+
+ if (URI_FUNC(CopyRangeAsNeeded)(&destWalker->text, &sourceWalker->text, URI_TRUE, memory) == URI_FALSE) {
+ URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
+ return URI_ERROR_MALLOC;
+ }
+
+ if (destPrev != NULL) {
+ destPrev->next = destWalker;
+ }
+
+ destPrev = destWalker;
+ sourceWalker = sourceWalker->next;
+
+ destUri->pathTail = destWalker;
+ }
+ }
+
+ if (URI_FUNC(CopyRangeAsNeeded)(&destUri->query, &sourceUri->query, URI_FALSE, memory) == URI_FALSE) {
+ URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
+ return URI_ERROR_MALLOC;
+ }
+
+ doneMask |= URI_NORMALIZE_QUERY;
+
+ if (URI_FUNC(CopyRangeAsNeeded)(&destUri->fragment, &sourceUri->fragment, URI_FALSE, memory) == URI_FALSE) {
+ URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
+ return URI_ERROR_MALLOC;
+ }
+
+ destUri->absolutePath = sourceUri->absolutePath;
+ destUri->owner = URI_TRUE;
+ destUri->reserved = NULL;
+
+ return URI_SUCCESS;
+}
+
+
+
+int URI_FUNC(CopyUri)(URI_TYPE(Uri) * destUri,
+ const URI_TYPE(Uri) * sourceUri) {
+ return URI_FUNC(CopyUriMm)(destUri, sourceUri, NULL);
+}
+
+#endif
diff --git a/ext/uri/uriparser/src/UriCopy.h b/ext/uri/uriparser/src/UriCopy.h
new file mode 100644
index 00000000000..952b1df4f9c
--- /dev/null
+++ b/ext/uri/uriparser/src/UriCopy.h
@@ -0,0 +1,78 @@
+/*
+ * uriparser - RFC 3986 URI parsing library
+ *
+ * Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
+ * Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
+ * Copyright (C) 2025, Máté Kocsis <kocsismate@php.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#if (defined(URI_PASS_ANSI) && !defined(URI_COPY_H_ANSI)) \
+ || (defined(URI_PASS_UNICODE) && !defined(URI_COPY_H_UNICODE)) \
+ || (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
+/* What encodings are enabled? */
+#include <uriparser/UriDefsConfig.h>
+#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
+/* Include SELF twice */
+# ifdef URI_ENABLE_ANSI
+# define URI_PASS_ANSI 1
+# include "UriCopy.h"
+# undef URI_PASS_ANSI
+# endif
+# ifdef URI_ENABLE_UNICODE
+# define URI_PASS_UNICODE 1
+# include "UriCopy.h"
+# undef URI_PASS_UNICODE
+# endif
+/* Only one pass for each encoding */
+#elif (defined(URI_PASS_ANSI) && !defined(URI_COPY_H_ANSI) \
+ && defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \
+ && !defined(URI_COPY_H_UNICODE) && defined(URI_ENABLE_UNICODE))
+# ifdef URI_PASS_ANSI
+# define URI_COPY_H_ANSI 1
+# include <uriparser/UriDefsAnsi.h>
+# else
+# define URI_COPY_H_UNICODE 1
+# include <uriparser/UriDefsUnicode.h>
+# endif
+
+
+
+int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
+ const URI_TYPE(Uri) * sourceUri, UriMemoryManager * memory);
+int URI_FUNC(CopyUri)(URI_TYPE(Uri) * destUri,
+ const URI_TYPE(Uri) * sourceUri);
+
+#endif
+#endif
diff --git a/ext/uri/uriparser/src/UriNormalize.c b/ext/uri/uriparser/src/UriNormalize.c
index 0cf353f1111..56b19573665 100644
--- a/ext/uri/uriparser/src/UriNormalize.c
+++ b/ext/uri/uriparser/src/UriNormalize.c
@@ -109,12 +109,9 @@ static void URI_FUNC(LowercaseInplaceExceptPercentEncoding)(const URI_CHAR * fir
static UriBool URI_FUNC(LowercaseMalloc)(const URI_CHAR ** first,
const URI_CHAR ** afterLast, UriMemoryManager * memory);
-static void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
- unsigned int revertMask, UriMemoryManager * memory);
-
-static URI_INLINE void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
+void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
unsigned int revertMask, UriMemoryManager * memory) {
if (revertMask & URI_NORMALIZE_SCHEME) {
/* NOTE: A scheme cannot be the empty string
@@ -407,15 +404,9 @@ static URI_INLINE UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask,
&& (range->first != NULL)
&& (range->afterLast != NULL)
&& (range->afterLast > range->first)) {
- const int lenInChars = (int)(range->afterLast - range->first);
- const int lenInBytes = lenInChars * sizeof(URI_CHAR);
- URI_CHAR * dup = memory->malloc(memory, lenInBytes);
- if (dup == NULL) {
- return URI_FALSE; /* Raises malloc error */
- }
- memcpy(dup, range->first, lenInBytes);
- range->first = dup;
- range->afterLast = dup + lenInChars;
+ if (URI_FUNC(CopyRange)(range, range, memory) == URI_FALSE) {
+ return URI_FALSE;
+ }
*doneMask |= maskTest;
}
return URI_TRUE;
@@ -557,6 +548,75 @@ int URI_FUNC(NormalizeSyntax)(URI_TYPE(Uri) * uri) {
}
+static const URI_CHAR * URI_FUNC(PastLeadingZeros)(const URI_CHAR * first, const URI_CHAR * afterLast) {
+ assert(first != NULL);
+ assert(afterLast != NULL);
+ assert(first != afterLast);
+
+ {
+ /* Find the first non-zero character */
+ const URI_CHAR * remainderFirst = first;
+ while ((remainderFirst < afterLast) && (remainderFirst[0] == _UT('0'))) {
+ remainderFirst++;
+ }
+
+ /* Is the string /all/ zeros? */
+ if (remainderFirst == afterLast) {
+ /* Yes, and length is >=1 because we ruled out the empty string earlier;
+ * pull back onto rightmost zero */
+ assert(remainderFirst > first);
+ remainderFirst--;
+ assert(remainderFirst[0] == _UT('0'));
+ }
+
+ return remainderFirst;
+ }
+}
+
+
+
+static void URI_FUNC(DropLeadingZerosInplace)(URI_CHAR * first, const URI_CHAR ** afterLast) {
+ assert(first != NULL);
+ assert(afterLast != NULL);
+ assert(*afterLast != NULL);
+
+ if (first == *afterLast) {
+ return;
+ }
+
+ {
+ const URI_CHAR * const remainderFirst = URI_FUNC(PastLeadingZeros)(first, *afterLast);
+
+ if (remainderFirst > first) {
+ const size_t remainderLen = *afterLast - remainderFirst;
+ memmove(first, remainderFirst, remainderLen * sizeof(URI_CHAR));
+ first[remainderLen] = _UT('\0');
+ *afterLast = first + remainderLen;
+ }
+ }
+}
+
+
+
+static void URI_FUNC(AdvancePastLeadingZeros)(
+ const URI_CHAR ** first, const URI_CHAR * afterLast) {
+ assert(first != NULL);
+ assert(*first != NULL);
+ assert(afterLast != NULL);
+
+ if (*first == afterLast) {
+ return;
+ }
+
+ {
+ const URI_CHAR * const remainderFirst = URI_FUNC(PastLeadingZeros)(*first, afterLast);
+
+ /* Cut off leading zeros */
+ *first = remainderFirst;
+ }
+}
+
+
static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
unsigned int inMask, unsigned int * outMask,
@@ -658,6 +718,27 @@ static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
}
}
+ /* Port */
+ if (outMask != NULL) {
+ /* Is there a port even? */
+ if (uri->portText.first != NULL) {
+ /* Determine whether the port is already normalized, i.e. either "", "0" or no leading zeros */
+ const size_t portLen = uri->portText.afterLast - uri->portText.first;
+ if ((portLen > 1) && (uri->portText.first[0] == _UT('0'))) {
+ *outMask |= URI_NORMALIZE_PORT;
+ }
+ }
+ } else {
+ /* Normalize the port, i.e. drop leading zeros (except for string "0") */
+ if ((inMask & URI_NORMALIZE_PORT) && (uri->portText.first != NULL)) {
+ if (uri->owner) {
+ URI_FUNC(DropLeadingZerosInplace)((URI_CHAR *)uri->portText.first, &(uri->portText.afterLast));
+ } else {
+ URI_FUNC(AdvancePastLeadingZeros)(&(uri->portText.first), uri->portText.afterLast);
+ }
+ }
+ }
+
/* User info */
if (outMask != NULL) {
const UriBool normalizeUserInfo = URI_FUNC(ContainsUglyPercentEncoding)(
diff --git a/ext/uri/uriparser/src/UriNormalize.h b/ext/uri/uriparser/src/UriNormalize.h
new file mode 100644
index 00000000000..cb58085b7d3
--- /dev/null
+++ b/ext/uri/uriparser/src/UriNormalize.h
@@ -0,0 +1,76 @@
+/*
+ * uriparser - RFC 3986 URI parsing library
+ *
+ * Copyright (C) 2018, Weijia Song <songweijia@gmail.com>
+ * Copyright (C) 2018, Sebastian Pipping <sebastian@pipping.org>
+ * Copyright (C) 2025, Máté Kocsis <kocsismate@php.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#if (defined(URI_PASS_ANSI) && !defined(URI_NORMALIZE_H_ANSI)) \
+ || (defined(URI_PASS_UNICODE) && !defined(URI_NORMALIZE_H_UNICODE)) \
+ || (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
+/* What encodings are enabled? */
+#include <uriparser/UriDefsConfig.h>
+#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
+/* Include SELF twice */
+# ifdef URI_ENABLE_ANSI
+# define URI_PASS_ANSI 1
+# include "UriNormalize.h"
+# undef URI_PASS_ANSI
+# endif
+# ifdef URI_ENABLE_UNICODE
+# define URI_PASS_UNICODE 1
+# include "UriNormalize.h"
+# undef URI_PASS_UNICODE
+# endif
+/* Only one pass for each encoding */
+#elif (defined(URI_PASS_ANSI) && !defined(URI_NORMALIZE_H_ANSI) \
+ && defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \
+ && !defined(URI_NORMALIZE_H_UNICODE) && defined(URI_ENABLE_UNICODE))
+# ifdef URI_PASS_ANSI
+# define URI_NORMALIZE_H_ANSI 1
+# include <uriparser/UriDefsAnsi.h>
+# else
+# define URI_NORMALIZE_H_UNICODE 1
+# include <uriparser/UriDefsUnicode.h>
+# endif
+
+
+
+void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
+ unsigned int revertMask, UriMemoryManager * memory);
+
+#endif
+#endif
diff --git a/ext/uri/uriparser/src/UriRecompose.c b/ext/uri/uriparser/src/UriRecompose.c
index 5027eca6cfa..1567efc81dc 100644
--- a/ext/uri/uriparser/src/UriRecompose.c
+++ b/ext/uri/uriparser/src/UriRecompose.c
@@ -152,7 +152,7 @@ static URI_INLINE int URI_FUNC(ToStringEngine)(URI_CHAR * dest,
/* [05/19] endif; */
}
/* [06/19] if defined(authority) then */
- if (URI_FUNC(IsHostSet)(uri)) {
+ if (URI_FUNC(HasHost)(uri)) {
/* [07/19] append "//" to result; */
if (dest != NULL) {
if (written + 2 <= maxChars) {
@@ -422,7 +422,7 @@ static URI_INLINE int URI_FUNC(ToStringEngine)(URI_CHAR * dest,
/* [10/19] append path to result; */
/* Slash needed here? */
if (uri->absolutePath || ((uri->pathHead != NULL)
- && URI_FUNC(IsHostSet)(uri))) {
+ && URI_FUNC(HasHost)(uri))) {
if (dest != NULL) {
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT("/"),
diff --git a/ext/uri/uriparser/src/UriResolve.c b/ext/uri/uriparser/src/UriResolve.c
index 80031a894d4..8e47e6af8c6 100644
--- a/ext/uri/uriparser/src/UriResolve.c
+++ b/ext/uri/uriparser/src/UriResolve.c
@@ -128,7 +128,7 @@ static int URI_FUNC(ResolveAbsolutePathFlag)(URI_TYPE(Uri) * absWork,
return URI_ERROR_NULL;
}
- if (URI_FUNC(IsHostSet)(absWork) && absWork->absolutePath) {
+ if (URI_FUNC(HasHost)(absWork) && absWork->absolutePath) {
/* Empty segment needed, instead? */
if (absWork->pathHead == NULL) {
URI_TYPE(PathSegment) * const segment = memory->malloc(memory, sizeof(URI_TYPE(PathSegment)));
@@ -203,7 +203,7 @@ static int URI_FUNC(AddBaseUriImpl)(URI_TYPE(Uri) * absDest,
/* [06/32] else */
} else {
/* [07/32] if defined(R.authority) then */
- if (URI_FUNC(IsHostSet)(relSource)) {
+ if (URI_FUNC(HasHost)(relSource)) {
/* [08/32] T.authority = R.authority; */
if (!URI_FUNC(CopyAuthority)(absDest, relSource, memory)) {
return URI_ERROR_MALLOC;