Dev news

Commit 8fd69e15e0b for php.net

commit 8fd69e15e0bc623fe317e21261d88e176de43bbf
Author: Tim Düsterhus <tim@bastelstu.be>
Date:   Tue Dec 9 11:52:42 2025 +0100

    uri: Update to uriparser-0.9.9-79-gf47a7f0 (#20671)

    This is in preparation of importing a fix for the uriparser/uriparser#282
    security issue, which will likely depend on this refactoring to cleanly apply.

diff --git a/ext/uri/uriparser/src/UriCommon.c b/ext/uri/uriparser/src/UriCommon.c
index a594fcceed7..3644e8828f3 100644
--- a/ext/uri/uriparser/src/UriCommon.c
+++ b/ext/uri/uriparser/src/UriCommon.c
@@ -62,6 +62,7 @@
 #  ifndef URI_DOXYGEN
 #    include <uriparser/Uri.h>
 #    include "UriCommon.h"
+#    include "UriSets.h"
 #  endif

 #  include <assert.h>
@@ -468,32 +469,11 @@ UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri,

 unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig) {
     switch (hexdig) {
-    case _UT('0'):
-    case _UT('1'):
-    case _UT('2'):
-    case _UT('3'):
-    case _UT('4'):
-    case _UT('5'):
-    case _UT('6'):
-    case _UT('7'):
-    case _UT('8'):
-    case _UT('9'):
+    case URI_SET_DIGIT(_UT):
         return (unsigned char)(9 + hexdig - _UT('9'));
-
-    case _UT('a'):
-    case _UT('b'):
-    case _UT('c'):
-    case _UT('d'):
-    case _UT('e'):
-    case _UT('f'):
+    case URI_SET_HEX_LETTER_LOWER(_UT):
         return (unsigned char)(15 + hexdig - _UT('f'));
-
-    case _UT('A'):
-    case _UT('B'):
-    case _UT('C'):
-    case _UT('D'):
-    case _UT('E'):
-    case _UT('F'):
+    case URI_SET_HEX_LETTER_UPPER(_UT):
         return (unsigned char)(15 + hexdig - _UT('F'));

     default:
diff --git a/ext/uri/uriparser/src/UriEscape.c b/ext/uri/uriparser/src/UriEscape.c
index b23050783fb..a1763f97153 100644
--- a/ext/uri/uriparser/src/UriEscape.c
+++ b/ext/uri/uriparser/src/UriEscape.c
@@ -62,6 +62,7 @@
 #  ifndef URI_DOXYGEN
 #    include <uriparser/Uri.h>
 #    include "UriCommon.h"
+#    include "UriSets.h"
 #  endif

 URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out, UriBool spaceToPlus,
@@ -108,72 +109,7 @@ URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, const URI_CHAR * inAfter
             prevWasCr = URI_FALSE;
             break;

-        case _UT('a'): /* ALPHA */
-        case _UT('A'):
-        case _UT('b'):
-        case _UT('B'):
-        case _UT('c'):
-        case _UT('C'):
-        case _UT('d'):
-        case _UT('D'):
-        case _UT('e'):
-        case _UT('E'):
-        case _UT('f'):
-        case _UT('F'):
-        case _UT('g'):
-        case _UT('G'):
-        case _UT('h'):
-        case _UT('H'):
-        case _UT('i'):
-        case _UT('I'):
-        case _UT('j'):
-        case _UT('J'):
-        case _UT('k'):
-        case _UT('K'):
-        case _UT('l'):
-        case _UT('L'):
-        case _UT('m'):
-        case _UT('M'):
-        case _UT('n'):
-        case _UT('N'):
-        case _UT('o'):
-        case _UT('O'):
-        case _UT('p'):
-        case _UT('P'):
-        case _UT('q'):
-        case _UT('Q'):
-        case _UT('r'):
-        case _UT('R'):
-        case _UT('s'):
-        case _UT('S'):
-        case _UT('t'):
-        case _UT('T'):
-        case _UT('u'):
-        case _UT('U'):
-        case _UT('v'):
-        case _UT('V'):
-        case _UT('w'):
-        case _UT('W'):
-        case _UT('x'):
-        case _UT('X'):
-        case _UT('y'):
-        case _UT('Y'):
-        case _UT('z'):
-        case _UT('Z'):
-        case _UT('0'): /* DIGIT */
-        case _UT('1'):
-        case _UT('2'):
-        case _UT('3'):
-        case _UT('4'):
-        case _UT('5'):
-        case _UT('6'):
-        case _UT('7'):
-        case _UT('8'):
-        case _UT('9'):
-        case _UT('-'): /* "-" / "." / "_" / "~" */
-        case _UT('.'):
-        case _UT('_'):
-        case _UT('~'):
+        case URI_SET_UNRESERVED(_UT):
             /* Copy unmodified */
             write[0] = read[0];
             write++;
@@ -263,51 +199,9 @@ const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout, UriBool plusToSpa

         case _UT('%'):
             switch (read[1]) {
-            case _UT('0'):
-            case _UT('1'):
-            case _UT('2'):
-            case _UT('3'):
-            case _UT('4'):
-            case _UT('5'):
-            case _UT('6'):
-            case _UT('7'):
-            case _UT('8'):
-            case _UT('9'):
-            case _UT('a'):
-            case _UT('b'):
-            case _UT('c'):
-            case _UT('d'):
-            case _UT('e'):
-            case _UT('f'):
-            case _UT('A'):
-            case _UT('B'):
-            case _UT('C'):
-            case _UT('D'):
-            case _UT('E'):
-            case _UT('F'):
+            case URI_SET_HEXDIG(_UT):
                 switch (read[2]) {
-                case _UT('0'):
-                case _UT('1'):
-                case _UT('2'):
-                case _UT('3'):
-                case _UT('4'):
-                case _UT('5'):
-                case _UT('6'):
-                case _UT('7'):
-                case _UT('8'):
-                case _UT('9'):
-                case _UT('a'):
-                case _UT('b'):
-                case _UT('c'):
-                case _UT('d'):
-                case _UT('e'):
-                case _UT('f'):
-                case _UT('A'):
-                case _UT('B'):
-                case _UT('C'):
-                case _UT('D'):
-                case _UT('E'):
-                case _UT('F'): {
+                case URI_SET_HEXDIG(_UT): {
                     /* Percent group found */
                     const unsigned char left = URI_FUNC(HexdigToInt)(read[1]);
                     const unsigned char right = URI_FUNC(HexdigToInt)(read[2]);
diff --git a/ext/uri/uriparser/src/UriIp4.c b/ext/uri/uriparser/src/UriIp4.c
index 162a75a556d..ae61141f7a3 100644
--- a/ext/uri/uriparser/src/UriIp4.c
+++ b/ext/uri/uriparser/src/UriIp4.c
@@ -68,6 +68,7 @@
 #    include <uriparser/UriIp4.h>
 #    include "UriIp4Base.h"
 #    include <uriparser/UriBase.h>
+#    include "UriSets.h"
 #  endif

 /* Prototypes */
@@ -194,16 +195,7 @@ URI_FUNC(ParseDecOctetOne)(UriIp4Parser * parser, const URI_CHAR * first,
     }

     switch (*first) {
-    case _UT('0'):
-    case _UT('1'):
-    case _UT('2'):
-    case _UT('3'):
-    case _UT('4'):
-    case _UT('5'):
-    case _UT('6'):
-    case _UT('7'):
-    case _UT('8'):
-    case _UT('9'):
+    case URI_SET_DIGIT(_UT):
         uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9')));
         return (const URI_CHAR *)URI_FUNC(ParseDecOctetThree)(parser, first + 1,
                                                               afterLast);
@@ -272,16 +264,7 @@ URI_FUNC(ParseDecOctetThree)(UriIp4Parser * parser, const URI_CHAR * first,
     }

     switch (*first) {
-    case _UT('0'):
-    case _UT('1'):
-    case _UT('2'):
-    case _UT('3'):
-    case _UT('4'):
-    case _UT('5'):
-    case _UT('6'):
-    case _UT('7'):
-    case _UT('8'):
-    case _UT('9'):
+    case URI_SET_DIGIT(_UT):
         uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9')));
         return first + 1;

diff --git a/ext/uri/uriparser/src/UriParse.c b/ext/uri/uriparser/src/UriParse.c
index db48b380464..ed851e94fbd 100644
--- a/ext/uri/uriparser/src/UriParse.c
+++ b/ext/uri/uriparser/src/UriParse.c
@@ -71,82 +71,9 @@
 #    include "UriCommon.h"
 #    include "UriMemory.h"
 #    include "UriParseBase.h"
+#    include "UriSets.h"
 #  endif

-#  define URI_SET_DIGIT \
-  _UT('0') : case _UT('1'): \
-  case _UT('2'): \
-  case _UT('3'): \
-  case _UT('4'): \
-  case _UT('5'): \
-  case _UT('6'): \
-  case _UT('7'): \
-  case _UT('8'): \
-  case _UT('9')
-
-#  define URI_SET_HEX_LETTER_UPPER \
-  _UT('A') : case _UT('B'): \
-  case _UT('C'): \
-  case _UT('D'): \
-  case _UT('E'): \
-  case _UT('F')
-
-#  define URI_SET_HEX_LETTER_LOWER \
-  _UT('a') : case _UT('b'): \
-  case _UT('c'): \
-  case _UT('d'): \
-  case _UT('e'): \
-  case _UT('f')
-
-#  define URI_SET_HEXDIG \
-  URI_SET_DIGIT: \
-  case URI_SET_HEX_LETTER_UPPER: \
-  case URI_SET_HEX_LETTER_LOWER
-
-#  define URI_SET_ALPHA \
-  URI_SET_HEX_LETTER_UPPER: \
-  case URI_SET_HEX_LETTER_LOWER: \
-  case _UT('g'): \
-  case _UT('G'): \
-  case _UT('h'): \
-  case _UT('H'): \
-  case _UT('i'): \
-  case _UT('I'): \
-  case _UT('j'): \
-  case _UT('J'): \
-  case _UT('k'): \
-  case _UT('K'): \
-  case _UT('l'): \
-  case _UT('L'): \
-  case _UT('m'): \
-  case _UT('M'): \
-  case _UT('n'): \
-  case _UT('N'): \
-  case _UT('o'): \
-  case _UT('O'): \
-  case _UT('p'): \
-  case _UT('P'): \
-  case _UT('q'): \
-  case _UT('Q'): \
-  case _UT('r'): \
-  case _UT('R'): \
-  case _UT('s'): \
-  case _UT('S'): \
-  case _UT('t'): \
-  case _UT('T'): \
-  case _UT('u'): \
-  case _UT('U'): \
-  case _UT('v'): \
-  case _UT('V'): \
-  case _UT('w'): \
-  case _UT('W'): \
-  case _UT('x'): \
-  case _UT('X'): \
-  case _UT('y'): \
-  case _UT('Y'): \
-  case _UT('z'): \
-  case _UT('Z')
-
 static const URI_CHAR * URI_FUNC(ParseAuthority)(URI_TYPE(ParserState) * state,
                                                  const URI_CHAR * first,
                                                  const URI_CHAR * afterLast,
@@ -340,26 +267,7 @@ static URI_INLINE const URI_CHAR * URI_FUNC(ParseAuthority)(URI_TYPE(ParserState
         return URI_FUNC(ParseAuthorityTwo)(state, afterIpLit2, afterLast);
     }

-    case _UT('!'):
-    case _UT('$'):
-    case _UT('%'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('-'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT('.'):
-    case _UT(':'):
-    case _UT(';'):
-    case _UT('@'):
-    case _UT('\''):
-    case _UT('_'):
-    case _UT('~'):
-    case _UT('+'):
-    case _UT('='):
-    case URI_SET_DIGIT:
-    case URI_SET_ALPHA:
+    case URI_SET_PCHAR(_UT):
         state->uri->userInfo.first = first; /* USERINFO BEGIN */
         return URI_FUNC(ParseOwnHostUserInfoNz)(state, first, afterLast, memory);

@@ -411,7 +319,7 @@ static const URI_CHAR * URI_FUNC(ParseHexZero)(URI_TYPE(ParserState) * state,
     }

     switch (*first) {
-    case URI_SET_HEXDIG:
+    case URI_SET_HEXDIG(_UT):
         return URI_FUNC(ParseHexZero)(state, first + 1, afterLast);

     default:
@@ -433,26 +341,7 @@ static URI_INLINE const URI_CHAR * URI_FUNC(ParseHierPart)(URI_TYPE(ParserState)
     }

     switch (*first) {
-    case _UT('!'):
-    case _UT('$'):
-    case _UT('%'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('-'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT('.'):
-    case _UT(':'):
-    case _UT(';'):
-    case _UT('@'):
-    case _UT('\''):
-    case _UT('_'):
-    case _UT('~'):
-    case _UT('+'):
-    case _UT('='):
-    case URI_SET_DIGIT:
-    case URI_SET_ALPHA:
+    case URI_SET_PCHAR(_UT):
         return URI_FUNC(ParsePathRootless)(state, first, afterLast, memory);

     case _UT('/'):
@@ -478,24 +367,9 @@ static const URI_CHAR * URI_FUNC(ParseIpFutLoop)(URI_TYPE(ParserState) * state,
     }

     switch (*first) {
-    case _UT('!'):
-    case _UT('$'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('-'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT('.'):
     case _UT(':'):
-    case _UT(';'):
-    case _UT('\''):
-    case _UT('_'):
-    case _UT('~'):
-    case _UT('+'):
-    case _UT('='):
-    case URI_SET_DIGIT:
-    case URI_SET_ALPHA:
+    case URI_SET_SUB_DELIMS(_UT):
+    case URI_SET_UNRESERVED(_UT):
         return URI_FUNC(ParseIpFutStopGo)(state, first + 1, afterLast, memory);

     default:
@@ -517,24 +391,9 @@ static const URI_CHAR * URI_FUNC(ParseIpFutStopGo)(URI_TYPE(ParserState) * state
     }

     switch (*first) {
-    case _UT('!'):
-    case _UT('$'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('-'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT('.'):
     case _UT(':'):
-    case _UT(';'):
-    case _UT('\''):
-    case _UT('_'):
-    case _UT('~'):
-    case _UT('+'):
-    case _UT('='):
-    case URI_SET_DIGIT:
-    case URI_SET_ALPHA:
+    case URI_SET_SUB_DELIMS(_UT):
+    case URI_SET_UNRESERVED(_UT):
         return URI_FUNC(ParseIpFutLoop)(state, first, afterLast, memory);

     default:
@@ -568,7 +427,7 @@ static const URI_CHAR * URI_FUNC(ParseIpFuture)(URI_TYPE(ParserState) * state,
     }

     switch (first[1]) {
-    case URI_SET_HEXDIG: {
+    case URI_SET_HEXDIG(_UT): {
         const URI_CHAR * afterIpFutLoop;
         const URI_CHAR * const afterHexZero =
             URI_FUNC(ParseHexZero)(state, first + 2, afterLast);
@@ -643,7 +502,7 @@ static URI_INLINE const URI_CHAR * URI_FUNC(ParseIpLit2)(URI_TYPE(ParserState) *

     case _UT(':'):
     case _UT(']'):
-    case URI_SET_HEXDIG:
+    case URI_SET_HEXDIG(_UT):
         state->uri->hostData.ip6 = memory->malloc(
             memory, 1 * sizeof(UriIp6)); /* Freed when stopping on parse error */
         if (state->uri->hostData.ip6 == NULL) {
@@ -685,7 +544,7 @@ static const URI_CHAR * URI_FUNC(ParseIPv6address2)(URI_TYPE(ParserState) * stat
             /* Eat rest of IPv4 address */
             for (;;) {
                 switch (*first) {
-                case URI_SET_DIGIT:
+                case URI_SET_DIGIT(_UT):
                     if (digitCount == 4) {
                         URI_FUNC(StopSyntax)(state, first, memory);
                         return NULL;
@@ -780,7 +639,7 @@ static const URI_CHAR * URI_FUNC(ParseIPv6address2)(URI_TYPE(ParserState) * stat
             int walking = 1;
             do {
                 switch (*first) {
-                case URI_SET_HEX_LETTER_LOWER:
+                case URI_SET_HEX_LETTER_LOWER(_UT):
                     letterAmong = 1;
                     if (digitCount == 4) {
                         URI_FUNC(StopSyntax)(state, first, memory);
@@ -790,7 +649,7 @@ static const URI_CHAR * URI_FUNC(ParseIPv6address2)(URI_TYPE(ParserState) * stat
                     digitCount++;
                     break;

-                case URI_SET_HEX_LETTER_UPPER:
+                case URI_SET_HEX_LETTER_UPPER(_UT):
                     letterAmong = 1;
                     if (digitCount == 4) {
                         URI_FUNC(StopSyntax)(state, first, memory);
@@ -800,7 +659,7 @@ static const URI_CHAR * URI_FUNC(ParseIPv6address2)(URI_TYPE(ParserState) * stat
                     digitCount++;
                     break;

-                case URI_SET_DIGIT:
+                case URI_SET_DIGIT(_UT):
                     if (digitCount == 4) {
                         URI_FUNC(StopSyntax)(state, first, memory);
                         return NULL;
@@ -995,23 +854,8 @@ static const URI_CHAR * URI_FUNC(ParseMustBeSegmentNzNc)(URI_TYPE(ParserState) *
     }

     case _UT('@'):
-    case _UT('!'):
-    case _UT('$'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT(';'):
-    case _UT('\''):
-    case _UT('+'):
-    case _UT('='):
-    case _UT('-'):
-    case _UT('.'):
-    case _UT('_'):
-    case _UT('~'):
-    case URI_SET_DIGIT:
-    case URI_SET_ALPHA:
+    case URI_SET_SUB_DELIMS(_UT):
+    case URI_SET_UNRESERVED(_UT):
         return URI_FUNC(ParseMustBeSegmentNzNc)(state, first + 1, afterLast, memory);

     case _UT('/'): {
@@ -1118,24 +962,9 @@ static const URI_CHAR * URI_FUNC(ParseOwnHost2)(URI_TYPE(ParserState) * state,
     }

     switch (*first) {
-    case _UT('!'):
-    case _UT('$'):
     case _UT('%'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('-'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT('.'):
-    case _UT(';'):
-    case _UT('\''):
-    case _UT('_'):
-    case _UT('~'):
-    case _UT('+'):
-    case _UT('='):
-    case URI_SET_DIGIT:
-    case URI_SET_ALPHA: {
+    case URI_SET_SUB_DELIMS(_UT):
+    case URI_SET_UNRESERVED(_UT): {
         const URI_CHAR * const afterPctSubUnres =
             URI_FUNC(ParsePctSubUnres)(state, first, afterLast, memory);
         if (afterPctSubUnres == NULL) {
@@ -1193,26 +1022,7 @@ URI_FUNC(ParseOwnHostUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * f
     }

     switch (*first) {
-    case _UT('!'):
-    case _UT('$'):
-    case _UT('%'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('-'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT('.'):
-    case _UT(':'):
-    case _UT(';'):
-    case _UT('@'):
-    case _UT('\''):
-    case _UT('_'):
-    case _UT('~'):
-    case _UT('+'):
-    case _UT('='):
-    case URI_SET_DIGIT:
-    case URI_SET_ALPHA:
+    case URI_SET_PCHAR(_UT):
         return URI_FUNC(ParseOwnHostUserInfoNz)(state, first, afterLast, memory);

     default:
@@ -1239,24 +1049,9 @@ static const URI_CHAR * URI_FUNC(ParseOwnHostUserInfoNz)(URI_TYPE(ParserState) *
     }

     switch (*first) {
-    case _UT('!'):
-    case _UT('$'):
     case _UT('%'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('-'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT('.'):
-    case _UT(';'):
-    case _UT('\''):
-    case _UT('_'):
-    case _UT('~'):
-    case _UT('+'):
-    case _UT('='):
-    case URI_SET_DIGIT:
-    case URI_SET_ALPHA: {
+    case URI_SET_SUB_DELIMS(_UT):
+    case URI_SET_UNRESERVED(_UT): {
         const URI_CHAR * const afterPctSubUnres =
             URI_FUNC(ParsePctSubUnres)(state, first, afterLast, memory);
         if (afterPctSubUnres == NULL) {
@@ -1331,19 +1126,7 @@ static const URI_CHAR * URI_FUNC(ParseOwnPortUserInfo)(URI_TYPE(ParserState) * s
     }

     switch (*first) {
-    /* begin sub-delims */
-    case _UT('!'):
-    case _UT('$'):
-    case _UT('&'):
-    case _UT('\''):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('*'):
-    case _UT('+'):
-    case _UT(','):
-    case _UT(';'):
-    case _UT('='):
-    /* end sub-delims */
+    case URI_SET_SUB_DELIMS(_UT):
     /* begin unreserved (except alpha and digit) */
     case _UT('-'):
     case _UT('.'):
@@ -1351,12 +1134,12 @@ static const URI_CHAR * URI_FUNC(ParseOwnPortUserInfo)(URI_TYPE(ParserState) * s
     case _UT('~'):
     /* end unreserved (except alpha and digit) */
     case _UT(':'):
-    case URI_SET_ALPHA:
+    case URI_SET_ALPHA(_UT):
         state->uri->hostText.afterLast = NULL; /* Not a host, reset */
         state->uri->portText.first = NULL; /* Not a port, reset */
         return URI_FUNC(ParseOwnUserInfo)(state, first + 1, afterLast, memory);

-    case URI_SET_DIGIT:
+    case URI_SET_DIGIT(_UT):
         return URI_FUNC(ParseOwnPortUserInfo)(state, first + 1, afterLast, memory);

     case _UT('%'):
@@ -1399,24 +1182,9 @@ static const URI_CHAR * URI_FUNC(ParseOwnUserInfo)(URI_TYPE(ParserState) * state
     }

     switch (*first) {
-    case _UT('!'):
-    case _UT('$'):
     case _UT('%'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('-'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT('.'):
-    case _UT(';'):
-    case _UT('\''):
-    case _UT('_'):
-    case _UT('~'):
-    case _UT('+'):
-    case _UT('='):
-    case URI_SET_DIGIT:
-    case URI_SET_ALPHA: {
+    case URI_SET_SUB_DELIMS(_UT):
+    case URI_SET_UNRESERVED(_UT): {
         const URI_CHAR * const afterPctSubUnres =
             URI_FUNC(ParsePctSubUnres)(state, first, afterLast, memory);
         if (afterPctSubUnres == NULL) {
@@ -1522,26 +1290,7 @@ URI_FUNC(ParsePathAbsNoLeadSlash)(URI_TYPE(ParserState) * state, const URI_CHAR
     }

     switch (*first) {
-    case _UT('!'):
-    case _UT('$'):
-    case _UT('%'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('-'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT('.'):
-    case _UT(':'):
-    case _UT(';'):
-    case _UT('@'):
-    case _UT('\''):
-    case _UT('_'):
-    case _UT('~'):
-    case _UT('+'):
-    case _UT('='):
-    case URI_SET_DIGIT:
-    case URI_SET_ALPHA: {
+    case URI_SET_PCHAR(_UT): {
         const URI_CHAR * const afterSegmentNz =
             URI_FUNC(ParseSegmentNz)(state, first, afterLast, memory);
         if (afterSegmentNz == NULL) {
@@ -1600,25 +1349,7 @@ static const URI_CHAR * URI_FUNC(ParsePchar)(URI_TYPE(ParserState) * state,
     case _UT('%'):
         return URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory);

-    case _UT(':'):
-    case _UT('@'):
-    case _UT('!'):
-    case _UT('$'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT(';'):
-    case _UT('\''):
-    case _UT('+'):
-    case _UT('='):
-    case _UT('-'):
-    case _UT('.'):
-    case _UT('_'):
-    case _UT('~'):
-    case URI_SET_DIGIT:
-    case URI_SET_ALPHA:
+    case URI_SET_PCHAR_WITHOUT_PERCENT(_UT):
         return first + 1;

     default:
@@ -1652,14 +1383,14 @@ static const URI_CHAR * URI_FUNC(ParsePctEncoded)(URI_TYPE(ParserState) * state,
     }

     switch (first[1]) {
-    case URI_SET_HEXDIG:
+    case URI_SET_HEXDIG(_UT):
         if (afterLast - first < 3) {
             URI_FUNC(StopSyntax)(state, afterLast, memory);
             return NULL;
         }

         switch (first[2]) {
-        case URI_SET_HEXDIG:
+        case URI_SET_HEXDIG(_UT):
             return first + 3;

         default:
@@ -1698,23 +1429,8 @@ static const URI_CHAR * URI_FUNC(ParsePctSubUnres)(URI_TYPE(ParserState) * state
     case _UT('%'):
         return URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory);

-    case _UT('!'):
-    case _UT('$'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT(';'):
-    case _UT('\''):
-    case _UT('+'):
-    case _UT('='):
-    case _UT('-'):
-    case _UT('.'):
-    case _UT('_'):
-    case _UT('~'):
-    case URI_SET_DIGIT:
-    case URI_SET_ALPHA:
+    case URI_SET_SUB_DELIMS(_UT):
+    case URI_SET_UNRESERVED(_UT):
         return first + 1;

     default:
@@ -1735,7 +1451,7 @@ static const URI_CHAR * URI_FUNC(ParsePort)(URI_TYPE(ParserState) * state,
     }

     switch (*first) {
-    case URI_SET_DIGIT:
+    case URI_SET_DIGIT(_UT):
         return URI_FUNC(ParsePort)(state, first + 1, afterLast);

     default:
@@ -1758,26 +1474,7 @@ static const URI_CHAR * URI_FUNC(ParseQueryFrag)(URI_TYPE(ParserState) * state,
     }

     switch (*first) {
-    case _UT('!'):
-    case _UT('$'):
-    case _UT('%'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('-'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT('.'):
-    case _UT(':'):
-    case _UT(';'):
-    case _UT('@'):
-    case _UT('\''):
-    case _UT('_'):
-    case _UT('~'):
-    case _UT('+'):
-    case _UT('='):
-    case URI_SET_DIGIT:
-    case URI_SET_ALPHA: {
+    case URI_SET_PCHAR(_UT): {
         const URI_CHAR * const afterPchar =
             URI_FUNC(ParsePchar)(state, first, afterLast, memory);
         if (afterPchar == NULL) {
@@ -1808,26 +1505,7 @@ static const URI_CHAR * URI_FUNC(ParseSegment)(URI_TYPE(ParserState) * state,
     }

     switch (*first) {
-    case _UT('!'):
-    case _UT('$'):
-    case _UT('%'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('-'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT('.'):
-    case _UT(':'):
-    case _UT(';'):
-    case _UT('@'):
-    case _UT('\''):
-    case _UT('_'):
-    case _UT('~'):
-    case _UT('+'):
-    case _UT('='):
-    case URI_SET_DIGIT:
-    case URI_SET_ALPHA: {
+    case URI_SET_PCHAR(_UT): {
         const URI_CHAR * const afterPchar =
             URI_FUNC(ParsePchar)(state, first, afterLast, memory);
         if (afterPchar == NULL) {
@@ -1906,8 +1584,8 @@ static const URI_CHAR * URI_FUNC(ParseSegmentNzNcOrScheme2)(URI_TYPE(ParserState
     case _UT('.'):
     case _UT('+'):
     case _UT('-'):
-    case URI_SET_ALPHA:
-    case URI_SET_DIGIT:
+    case URI_SET_ALPHA(_UT):
+    case URI_SET_DIGIT(_UT):
         return URI_FUNC(ParseSegmentNzNcOrScheme2)(state, first + 1, afterLast, memory);

     case _UT('%'): {
@@ -2002,22 +1680,12 @@ static const URI_CHAR * URI_FUNC(ParseUriReference)(URI_TYPE(ParserState) * stat
     }

     switch (*first) {
-    case URI_SET_ALPHA:
+    case URI_SET_ALPHA(_UT):
         state->uri->scheme.first = first; /* SCHEME BEGIN */
         return URI_FUNC(ParseSegmentNzNcOrScheme2)(state, first + 1, afterLast, memory);

-    case URI_SET_DIGIT:
-    case _UT('!'):
-    case _UT('$'):
-    case _UT('&'):
-    case _UT('('):
-    case _UT(')'):
-    case _UT('*'):
-    case _UT(','):
-    case _UT(';'):
-    case _UT('\''):
-    case _UT('+'):
-    case _UT('='):
+    case URI_SET_DIGIT(_UT):
+    case URI_SET_SUB_DELIMS(_UT):
     case _UT('.'):
     case _UT('_'):
     case _UT('~'):
diff --git a/ext/uri/uriparser/src/UriSetFragment.c b/ext/uri/uriparser/src/UriSetFragment.c
index b9c5c53b042..4479391d859 100644
--- a/ext/uri/uriparser/src/UriSetFragment.c
+++ b/ext/uri/uriparser/src/UriSetFragment.c
@@ -62,104 +62,11 @@
 #    include <uriparser/Uri.h>
 #    include "UriCommon.h"
 #    include "UriMemory.h"
+#    include "UriSets.h"
 #  endif

 #  include <assert.h>

-#  define URI_SET_DIGIT \
-  _UT('0') : case _UT('1'): \
-  case _UT('2'): \
-  case _UT('3'): \
-  case _UT('4'): \
-  case _UT('5'): \
-  case _UT('6'): \
-  case _UT('7'): \
-  case _UT('8'): \
-  case _UT('9')
-
-#  define URI_SET_HEX_LETTER_UPPER \
-  _UT('A') : case _UT('B'): \
-  case _UT('C'): \
-  case _UT('D'): \
-  case _UT('E'): \
-  case _UT('F')
-
-#  define URI_SET_HEX_LETTER_LOWER \
-  _UT('a') : case _UT('b'): \
-  case _UT('c'): \
-  case _UT('d'): \
-  case _UT('e'): \
-  case _UT('f')
-
-#  define URI_SET_HEXDIG \
-  URI_SET_DIGIT: \
-  case URI_SET_HEX_LETTER_UPPER: \
-  case URI_SET_HEX_LETTER_LOWER
-
-#  define URI_SET_ALPHA \
-  URI_SET_HEX_LETTER_UPPER: \
-  case URI_SET_HEX_LETTER_LOWER: \
-  case _UT('g'): \
-  case _UT('G'): \
-  case _UT('h'): \
-  case _UT('H'): \
-  case _UT('i'): \
-  case _UT('I'): \
-  case _UT('j'): \
-  case _UT('J'): \
-  case _UT('k'): \
-  case _UT('K'): \
-  case _UT('l'): \
-  case _UT('L'): \
-  case _UT('m'): \
-  case _UT('M'): \
-  case _UT('n'): \
-  case _UT('N'): \
-  case _UT('o'): \
-  case _UT('O'): \
-  case _UT('p'): \
-  case _UT('P'): \
-  case _UT('q'): \
-  case _UT('Q'): \
-  case _UT('r'): \
-  case _UT('R'): \
-  case _UT('s'): \
-  case _UT('S'): \
-  case _UT('t'): \
-  case _UT('T'): \
-  case _UT('u'): \
-  case _UT('U'): \
-  case _UT('v'): \
-  case _UT('V'): \
-  case _UT('w'): \
-  case _UT('W'): \
-  case _UT('x'): \
-  case _UT('X'): \
-  case _UT('y'): \
-  case _UT('Y'): \
-  case _UT('z'): \
-  case _UT('Z')
-
-#  define URI_SET_SUB_DELIMS \
-  _UT('!') : case _UT('$'): \
-  case _UT('&'): \
-  case _UT('\''): \
-  case _UT('('): \
-  case _UT(')'): \
-  case _UT('*'): \
-  case _UT('+'): \
-  case _UT(','): \
-  case _UT(';'): \
-  case _UT('=')
-
-#  define URI_SET_UNRESERVED \
-  URI_SET_ALPHA: \
-  case URI_SET_DIGIT: \
-  case _UT('-'): \
-  case _UT('.'): \
-  case _UT('_'): \
-  case _UT('~')
-
 UriBool URI_FUNC(IsWellFormedFragment)(const URI_CHAR * first,
                                        const URI_CHAR * afterLast) {
     if ((first == NULL) || (afterLast == NULL)) {
@@ -173,7 +80,7 @@ UriBool URI_FUNC(IsWellFormedFragment)(const URI_CHAR * first,
      */
     while (first < afterLast) {
         switch (first[0]) {
-        case URI_SET_UNRESERVED:
+        case URI_SET_PCHAR_WITHOUT_PERCENT(_UT):
             break;

         /* pct-encoded */
@@ -182,13 +89,13 @@ UriBool URI_FUNC(IsWellFormedFragment)(const URI_CHAR * first,
                 return URI_FALSE;
             }
             switch (first[1]) {
-            case URI_SET_HEXDIG:
+            case URI_SET_HEXDIG(_UT):
                 break;
             default:
                 return URI_FALSE;
             }
             switch (first[2]) {
-            case URI_SET_HEXDIG:
+            case URI_SET_HEXDIG(_UT):
                 break;
             default:
                 return URI_FALSE;
@@ -196,12 +103,6 @@ UriBool URI_FUNC(IsWellFormedFragment)(const URI_CHAR * first,
             first += 2;
             break;

-        case URI_SET_SUB_DELIMS:
-            break;
-
-        /* ":" / "@" and "/" / "?" */
-        case _UT(':'):
-        case _UT('@'):
         case _UT('/'):
         case _UT('?'):
             break;
diff --git a/ext/uri/uriparser/src/UriSetHostRegName.c b/ext/uri/uriparser/src/UriSetHostRegName.c
index 61694b248ad..01bc4e47f16 100644
--- a/ext/uri/uriparser/src/UriSetHostRegName.c
+++ b/ext/uri/uriparser/src/UriSetHostRegName.c
@@ -63,102 +63,9 @@
 #    include "UriMemory.h"
 #    include "UriSetHostBase.h"
 #    include "UriSetHostCommon.h"
+#    include "UriSets.h"
 #  endif

-#  define URI_SET_DIGIT \
-  _UT('0') : case _UT('1'): \
-  case _UT('2'): \
-  case _UT('3'): \
-  case _UT('4'): \
-  case _UT('5'): \
-  case _UT('6'): \
-  case _UT('7'): \
-  case _UT('8'): \
-  case _UT('9')
-
-#  define URI_SET_HEX_LETTER_UPPER \
-  _UT('A') : case _UT('B'): \
-  case _UT('C'): \
-  case _UT('D'): \
-  case _UT('E'): \
-  case _UT('F')
-
-#  define URI_SET_HEX_LETTER_LOWER \
-  _UT('a') : case _UT('b'): \
-  case _UT('c'): \
-  case _UT('d'): \
-  case _UT('e'): \
-  case _UT('f')
-
-#  define URI_SET_HEXDIG \
-  URI_SET_DIGIT: \
-  case URI_SET_HEX_LETTER_UPPER: \
-  case URI_SET_HEX_LETTER_LOWER
-
-#  define URI_SET_ALPHA \
-  URI_SET_HEX_LETTER_UPPER: \
-  case URI_SET_HEX_LETTER_LOWER: \
-  case _UT('g'): \
-  case _UT('G'): \
-  case _UT('h'): \
-  case _UT('H'): \
-  case _UT('i'): \
-  case _UT('I'): \
-  case _UT('j'): \
-  case _UT('J'): \
-  case _UT('k'): \
-  case _UT('K'): \
-  case _UT('l'): \
-  case _UT('L'): \
-  case _UT('m'): \
-  case _UT('M'): \
-  case _UT('n'): \
-  case _UT('N'): \
-  case _UT('o'): \
-  case _UT('O'): \
-  case _UT('p'): \
-  case _UT('P'): \
-  case _UT('q'): \
-  case _UT('Q'): \
-  case _UT('r'): \
-  case _UT('R'): \
-  case _UT('s'): \
-  case _UT('S'): \
-  case _UT('t'): \
-  case _UT('T'): \
-  case _UT('u'): \
-  case _UT('U'): \
-  case _UT('v'): \
-  case _UT('V'): \
-  case _UT('w'): \
-  case _UT('W'): \
-  case _UT('x'): \
-  case _UT('X'): \
-  case _UT('y'): \
-  case _UT('Y'): \
-  case _UT('z'): \
-  case _UT('Z')
-
-#  define URI_SET_SUB_DELIMS \
-  _UT('!') : case _UT('$'): \
-  case _UT('&'): \
-  case _UT('\''): \
-  case _UT('('): \
-  case _UT(')'): \
-  case _UT('*'): \
-  case _UT('+'): \
-  case _UT(','): \
-  case _UT(';'): \
-  case _UT('=')
-
-#  define URI_SET_UNRESERVED \
-  URI_SET_ALPHA: \
-  case URI_SET_DIGIT: \
-  case _UT('-'): \
-  case _UT('.'): \
-  case _UT('_'): \
-  case _UT('~')
-
 UriBool URI_FUNC(IsWellFormedHostRegName)(const URI_CHAR * first,
                                           const URI_CHAR * afterLast) {
     if ((first == NULL) || (afterLast == NULL)) {
@@ -168,7 +75,7 @@ UriBool URI_FUNC(IsWellFormedHostRegName)(const URI_CHAR * first,
     /* reg-name = *( unreserved / pct-encoded / sub-delims ) */
     while (first < afterLast) {
         switch (first[0]) {
-        case URI_SET_UNRESERVED:
+        case URI_SET_UNRESERVED(_UT):
             break;

         /* pct-encoded */
@@ -177,13 +84,13 @@ UriBool URI_FUNC(IsWellFormedHostRegName)(const URI_CHAR * first,
                 return URI_FALSE;
             }
             switch (first[1]) {
-            case URI_SET_HEXDIG:
+            case URI_SET_HEXDIG(_UT):
                 break;
             default:
                 return URI_FALSE;
             }
             switch (first[2]) {
-            case URI_SET_HEXDIG:
+            case URI_SET_HEXDIG(_UT):
                 break;
             default:
                 return URI_FALSE;
@@ -191,7 +98,7 @@ UriBool URI_FUNC(IsWellFormedHostRegName)(const URI_CHAR * first,
             first += 2;
             break;

-        case URI_SET_SUB_DELIMS:
+        case URI_SET_SUB_DELIMS(_UT):
             break;

         default:
diff --git a/ext/uri/uriparser/src/UriSetPath.c b/ext/uri/uriparser/src/UriSetPath.c
index d9e8bec0aa8..17aef0fca42 100644
--- a/ext/uri/uriparser/src/UriSetPath.c
+++ b/ext/uri/uriparser/src/UriSetPath.c
@@ -62,104 +62,11 @@
 #    include <uriparser/Uri.h>
 #    include "UriCommon.h"
 #    include "UriMemory.h"
+#    include "UriSets.h"
 #  endif

 #  include <assert.h>

-#  define URI_SET_DIGIT \
-  _UT('0') : case _UT('1'): \
-  case _UT('2'): \
-  case _UT('3'): \
-  case _UT('4'): \
-  case _UT('5'): \
-  case _UT('6'): \
-  case _UT('7'): \
-  case _UT('8'): \
-  case _UT('9')
-
-#  define URI_SET_HEX_LETTER_UPPER \
-  _UT('A') : case _UT('B'): \
-  case _UT('C'): \
-  case _UT('D'): \
-  case _UT('E'): \
-  case _UT('F')
-
-#  define URI_SET_HEX_LETTER_LOWER \
-  _UT('a') : case _UT('b'): \
-  case _UT('c'): \
-  case _UT('d'): \
-  case _UT('e'): \
-  case _UT('f')
-
-#  define URI_SET_HEXDIG \
-  URI_SET_DIGIT: \
-  case URI_SET_HEX_LETTER_UPPER: \
-  case URI_SET_HEX_LETTER_LOWER
-
-#  define URI_SET_ALPHA \
-  URI_SET_HEX_LETTER_UPPER: \
-  case URI_SET_HEX_LETTER_LOWER: \
-  case _UT('g'): \
-  case _UT('G'): \
-  case _UT('h'): \
-  case _UT('H'): \
-  case _UT('i'): \
-  case _UT('I'): \
-  case _UT('j'): \
-  case _UT('J'): \
-  case _UT('k'): \
-  case _UT('K'): \
-  case _UT('l'): \
-  case _UT('L'): \
-  case _UT('m'): \
-  case _UT('M'): \
-  case _UT('n'): \
-  case _UT('N'): \
-  case _UT('o'): \
-  case _UT('O'): \
-  case _UT('p'): \
-  case _UT('P'): \
-  case _UT('q'): \
-  case _UT('Q'): \
-  case _UT('r'): \
-  case _UT('R'): \
-  case _UT('s'): \
-  case _UT('S'): \
-  case _UT('t'): \
-  case _UT('T'): \
-  case _UT('u'): \
-  case _UT('U'): \
-  case _UT('v'): \
-  case _UT('V'): \
-  case _UT('w'): \
-  case _UT('W'): \
-  case _UT('x'): \
-  case _UT('X'): \
-  case _UT('y'): \
-  case _UT('Y'): \
-  case _UT('z'): \
-  case _UT('Z')
-
-#  define URI_SET_SUB_DELIMS \
-  _UT('!') : case _UT('$'): \
-  case _UT('&'): \
-  case _UT('\''): \
-  case _UT('('): \
-  case _UT(')'): \
-  case _UT('*'): \
-  case _UT('+'): \
-  case _UT(','): \
-  case _UT(';'): \
-  case _UT('=')
-
-#  define URI_SET_UNRESERVED \
-  URI_SET_ALPHA: \
-  case URI_SET_DIGIT: \
-  case _UT('-'): \
-  case _UT('.'): \
-  case _UT('_'): \
-  case _UT('~')
-
 UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afterLast,
                                    UriBool hasHost) {
     if ((first == NULL) || (afterLast == NULL)) {
@@ -200,7 +107,7 @@ UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afte
      */
     while (first < afterLast) {
         switch (first[0]) {
-        case URI_SET_UNRESERVED:
+        case URI_SET_PCHAR_WITHOUT_PERCENT(_UT):
             break;

         /* pct-encoded */
@@ -209,13 +116,13 @@ UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afte
                 return URI_FALSE;
             }
             switch (first[1]) {
-            case URI_SET_HEXDIG:
+            case URI_SET_HEXDIG(_UT):
                 break;
             default:
                 return URI_FALSE;
             }
             switch (first[2]) {
-            case URI_SET_HEXDIG:
+            case URI_SET_HEXDIG(_UT):
                 break;
             default:
                 return URI_FALSE;
@@ -223,12 +130,6 @@ UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afte
             first += 2;
             break;

-        case URI_SET_SUB_DELIMS:
-            break;
-
-        /* ":" / "@" and "/" */
-        case _UT(':'):
-        case _UT('@'):
         case _UT('/'):
             break;

diff --git a/ext/uri/uriparser/src/UriSetPort.c b/ext/uri/uriparser/src/UriSetPort.c
index 1c373013f66..5e2160e3097 100644
--- a/ext/uri/uriparser/src/UriSetPort.c
+++ b/ext/uri/uriparser/src/UriSetPort.c
@@ -62,21 +62,11 @@
 #    include <uriparser/Uri.h>
 #    include "UriCommon.h"
 #    include "UriMemory.h"
+#    include "UriSets.h"
 #  endif

 #  include <assert.h>

-#  define URI_SET_DIGIT \
-  _UT('0') : case _UT('1'): \
-  case _UT('2'): \
-  case _UT('3'): \
-  case _UT('4'): \
-  case _UT('5'): \
-  case _UT('6'): \
-  case _UT('7'): \
-  case _UT('8'): \
-  case _UT('9')
-
 UriBool URI_FUNC(IsWellFormedPort)(const URI_CHAR * first, const URI_CHAR * afterLast) {
     if ((first == NULL) || (afterLast == NULL)) {
         return URI_FALSE;
@@ -85,7 +75,7 @@ UriBool URI_FUNC(IsWellFormedPort)(const URI_CHAR * first, const URI_CHAR * afte
     /* NOTE: Grammar reads "port = *DIGIT" which includes the empty string. */
     while (first < afterLast) {
         switch (first[0]) {
-        case URI_SET_DIGIT:
+        case URI_SET_DIGIT(_UT):
             break;
         default:
             return URI_FALSE;
diff --git a/ext/uri/uriparser/src/UriSetQuery.c b/ext/uri/uriparser/src/UriSetQuery.c
index a189c14bb1e..4f58c8286ed 100644
--- a/ext/uri/uriparser/src/UriSetQuery.c
+++ b/ext/uri/uriparser/src/UriSetQuery.c
@@ -62,104 +62,11 @@
 #    include <uriparser/Uri.h>
 #    include "UriCommon.h"
 #    include "UriMemory.h"
+#    include "UriSets.h"
 #  endif

 #  include <assert.h>

-#  define URI_SET_DIGIT \
-  _UT('0') : case _UT('1'): \
-  case _UT('2'): \
-  case _UT('3'): \
-  case _UT('4'): \
-  case _UT('5'): \
-  case _UT('6'): \
-  case _UT('7'): \
-  case _UT('8'): \
-  case _UT('9')
-
-#  define URI_SET_HEX_LETTER_UPPER \
-  _UT('A') : case _UT('B'): \
-  case _UT('C'): \
-  case _UT('D'): \
-  case _UT('E'): \
-  case _UT('F')
-
-#  define URI_SET_HEX_LETTER_LOWER \
-  _UT('a') : case _UT('b'): \
-  case _UT('c'): \
-  case _UT('d'): \
-  case _UT('e'): \
-  case _UT('f')
-
-#  define URI_SET_HEXDIG \
-  URI_SET_DIGIT: \
-  case URI_SET_HEX_LETTER_UPPER: \
-  case URI_SET_HEX_LETTER_LOWER
-
-#  define URI_SET_ALPHA \
-  URI_SET_HEX_LETTER_UPPER: \
-  case URI_SET_HEX_LETTER_LOWER: \
-  case _UT('g'): \
-  case _UT('G'): \
-  case _UT('h'): \
-  case _UT('H'): \
-  case _UT('i'): \
-  case _UT('I'): \
-  case _UT('j'): \
-  case _UT('J'): \
-  case _UT('k'): \
-  case _UT('K'): \
-  case _UT('l'): \
-  case _UT('L'): \
-  case _UT('m'): \
-  case _UT('M'): \
-  case _UT('n'): \
-  case _UT('N'): \
-  case _UT('o'): \
-  case _UT('O'): \
-  case _UT('p'): \
-  case _UT('P'): \
-  case _UT('q'): \
-  case _UT('Q'): \
-  case _UT('r'): \
-  case _UT('R'): \
-  case _UT('s'): \
-  case _UT('S'): \
-  case _UT('t'): \
-  case _UT('T'): \
-  case _UT('u'): \
-  case _UT('U'): \
-  case _UT('v'): \
-  case _UT('V'): \
-  case _UT('w'): \
-  case _UT('W'): \
-  case _UT('x'): \
-  case _UT('X'): \
-  case _UT('y'): \
-  case _UT('Y'): \
-  case _UT('z'): \
-  case _UT('Z')
-
-#  define URI_SET_SUB_DELIMS \
-  _UT('!') : case _UT('$'): \
-  case _UT('&'): \
-  case _UT('\''): \
-  case _UT('('): \
-  case _UT(')'): \
-  case _UT('*'): \
-  case _UT('+'): \
-  case _UT(','): \
-  case _UT(';'): \
-  case _UT('=')
-
-#  define URI_SET_UNRESERVED \
-  URI_SET_ALPHA: \
-  case URI_SET_DIGIT: \
-  case _UT('-'): \
-  case _UT('.'): \
-  case _UT('_'): \
-  case _UT('~')
-
 UriBool URI_FUNC(IsWellFormedQuery)(const URI_CHAR * first, const URI_CHAR * afterLast) {
     if ((first == NULL) || (afterLast == NULL)) {
         return URI_FALSE;
@@ -172,7 +79,7 @@ UriBool URI_FUNC(IsWellFormedQuery)(const URI_CHAR * first, const URI_CHAR * aft
      */
     while (first < afterLast) {
         switch (first[0]) {
-        case URI_SET_UNRESERVED:
+        case URI_SET_PCHAR_WITHOUT_PERCENT(_UT):
             break;

         /* pct-encoded */
@@ -181,13 +88,13 @@ UriBool URI_FUNC(IsWellFormedQuery)(const URI_CHAR * first, const URI_CHAR * aft
                 return URI_FALSE;
             }
             switch (first[1]) {
-            case URI_SET_HEXDIG:
+            case URI_SET_HEXDIG(_UT):
                 break;
             default:
                 return URI_FALSE;
             }
             switch (first[2]) {
-            case URI_SET_HEXDIG:
+            case URI_SET_HEXDIG(_UT):
                 break;
             default:
                 return URI_FALSE;
@@ -195,12 +102,6 @@ UriBool URI_FUNC(IsWellFormedQuery)(const URI_CHAR * first, const URI_CHAR * aft
             first += 2;
             break;

-        case URI_SET_SUB_DELIMS:
-            break;
-
-        /* ":" / "@" and "/" / "?" */
-        case _UT(':'):
-        case _UT('@'):
         case _UT('/'):
         case _UT('?'):
             break;
diff --git a/ext/uri/uriparser/src/UriSetScheme.c b/ext/uri/uriparser/src/UriSetScheme.c
index 9a21d45f263..3dfaf1e9f15 100644
--- a/ext/uri/uriparser/src/UriSetScheme.c
+++ b/ext/uri/uriparser/src/UriSetScheme.c
@@ -62,84 +62,11 @@
 #    include <uriparser/Uri.h>
 #    include "UriCommon.h"
 #    include "UriMemory.h"
+#    include "UriSets.h"
 #  endif

 #  include <assert.h>

-#  define URI_SET_DIGIT \
-  _UT('0') : case _UT('1'): \
-  case _UT('2'): \
-  case _UT('3'): \
-  case _UT('4'): \
-  case _UT('5'): \
-  case _UT('6'): \
-  case _UT('7'): \
-  case _UT('8'): \
-  case _UT('9')
-
-#  define URI_SET_HEX_LETTER_UPPER \
-  _UT('A') : case _UT('B'): \
-  case _UT('C'): \
-  case _UT('D'): \
-  case _UT('E'): \
-  case _UT('F')
-
-#  define URI_SET_HEX_LETTER_LOWER \
-  _UT('a') : case _UT('b'): \
-  case _UT('c'): \
-  case _UT('d'): \
-  case _UT('e'): \
-  case _UT('f')
-
-#  define URI_SET_HEXDIG \
-  URI_SET_DIGIT: \
-  case URI_SET_HEX_LETTER_UPPER: \
-  case URI_SET_HEX_LETTER_LOWER
-
-#  define URI_SET_ALPHA \
-  URI_SET_HEX_LETTER_UPPER: \
-  case URI_SET_HEX_LETTER_LOWER: \
-  case _UT('g'): \
-  case _UT('G'): \
-  case _UT('h'): \
-  case _UT('H'): \
-  case _UT('i'): \
-  case _UT('I'): \
-  case _UT('j'): \
-  case _UT('J'): \
-  case _UT('k'): \
-  case _UT('K'): \
-  case _UT('l'): \
-  case _UT('L'): \
-  case _UT('m'): \
-  case _UT('M'): \
-  case _UT('n'): \
-  case _UT('N'): \
-  case _UT('o'): \
-  case _UT('O'): \
-  case _UT('p'): \
-  case _UT('P'): \
-  case _UT('q'): \
-  case _UT('Q'): \
-  case _UT('r'): \
-  case _UT('R'): \
-  case _UT('s'): \
-  case _UT('S'): \
-  case _UT('t'): \
-  case _UT('T'): \
-  case _UT('u'): \
-  case _UT('U'): \
-  case _UT('v'): \
-  case _UT('V'): \
-  case _UT('w'): \
-  case _UT('W'): \
-  case _UT('x'): \
-  case _UT('X'): \
-  case _UT('y'): \
-  case _UT('Y'): \
-  case _UT('z'): \
-  case _UT('Z')
-
 UriBool URI_FUNC(IsWellFormedScheme)(const URI_CHAR * first, const URI_CHAR * afterLast) {
     if ((first == NULL) || (afterLast == NULL)) {
         return URI_FALSE;
@@ -154,7 +81,7 @@ UriBool URI_FUNC(IsWellFormedScheme)(const URI_CHAR * first, const URI_CHAR * af
     }

     switch (first[0]) {
-    case URI_SET_ALPHA:
+    case URI_SET_ALPHA(_UT):
         break;

     default:
@@ -165,8 +92,8 @@ UriBool URI_FUNC(IsWellFormedScheme)(const URI_CHAR * first, const URI_CHAR * af

     while (first < afterLast) {
         switch (first[0]) {
-        case URI_SET_ALPHA:
-        case URI_SET_DIGIT:
+        case URI_SET_ALPHA(_UT):
+        case URI_SET_DIGIT(_UT):
         case _UT('+'):
         case _UT('-'):
         case _UT('.'):
diff --git a/ext/uri/uriparser/src/UriSetUserInfo.c b/ext/uri/uriparser/src/UriSetUserInfo.c
index af1ec41a076..7865e837deb 100644
--- a/ext/uri/uriparser/src/UriSetUserInfo.c
+++ b/ext/uri/uriparser/src/UriSetUserInfo.c
@@ -62,104 +62,11 @@
 #    include <uriparser/Uri.h>
 #    include "UriCommon.h"
 #    include "UriMemory.h"
+#    include "UriSets.h"
 #  endif

 #  include <assert.h>

-#  define URI_SET_DIGIT \
-  _UT('0') : case _UT('1'): \
-  case _UT('2'): \
-  case _UT('3'): \
-  case _UT('4'): \
-  case _UT('5'): \
-  case _UT('6'): \
-  case _UT('7'): \
-  case _UT('8'): \
-  case _UT('9')
-
-#  define URI_SET_HEX_LETTER_UPPER \
-  _UT('A') : case _UT('B'): \
-  case _UT('C'): \
-  case _UT('D'): \
-  case _UT('E'): \
-  case _UT('F')
-
-#  define URI_SET_HEX_LETTER_LOWER \
-  _UT('a') : case _UT('b'): \
-  case _UT('c'): \
-  case _UT('d'): \
-  case _UT('e'): \
-  case _UT('f')
-
-#  define URI_SET_HEXDIG \
-  URI_SET_DIGIT: \
-  case URI_SET_HEX_LETTER_UPPER: \
-  case URI_SET_HEX_LETTER_LOWER
-
-#  define URI_SET_ALPHA \
-  URI_SET_HEX_LETTER_UPPER: \
-  case URI_SET_HEX_LETTER_LOWER: \
-  case _UT('g'): \
-  case _UT('G'): \
-  case _UT('h'): \
-  case _UT('H'): \
-  case _UT('i'): \
-  case _UT('I'): \
-  case _UT('j'): \
-  case _UT('J'): \
-  case _UT('k'): \
-  case _UT('K'): \
-  case _UT('l'): \
-  case _UT('L'): \
-  case _UT('m'): \
-  case _UT('M'): \
-  case _UT('n'): \
-  case _UT('N'): \
-  case _UT('o'): \
-  case _UT('O'): \
-  case _UT('p'): \
-  case _UT('P'): \
-  case _UT('q'): \
-  case _UT('Q'): \
-  case _UT('r'): \
-  case _UT('R'): \
-  case _UT('s'): \
-  case _UT('S'): \
-  case _UT('t'): \
-  case _UT('T'): \
-  case _UT('u'): \
-  case _UT('U'): \
-  case _UT('v'): \
-  case _UT('V'): \
-  case _UT('w'): \
-  case _UT('W'): \
-  case _UT('x'): \
-  case _UT('X'): \
-  case _UT('y'): \
-  case _UT('Y'): \
-  case _UT('z'): \
-  case _UT('Z')
-
-#  define URI_SET_SUB_DELIMS \
-  _UT('!') : case _UT('$'): \
-  case _UT('&'): \
-  case _UT('\''): \
-  case _UT('('): \
-  case _UT(')'): \
-  case _UT('*'): \
-  case _UT('+'): \
-  case _UT(','): \
-  case _UT(';'): \
-  case _UT('=')
-
-#  define URI_SET_UNRESERVED \
-  URI_SET_ALPHA: \
-  case URI_SET_DIGIT: \
-  case _UT('-'): \
-  case _UT('.'): \
-  case _UT('_'): \
-  case _UT('~')
-
 UriBool URI_FUNC(IsWellFormedUserInfo)(const URI_CHAR * first,
                                        const URI_CHAR * afterLast) {
     if ((first == NULL) || (afterLast == NULL)) {
@@ -169,7 +76,7 @@ UriBool URI_FUNC(IsWellFormedUserInfo)(const URI_CHAR * first,
     /* userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) */
     while (first < afterLast) {
         switch (first[0]) {
-        case URI_SET_UNRESERVED:
+        case URI_SET_UNRESERVED(_UT):
             break;

         /* pct-encoded */
@@ -178,13 +85,13 @@ UriBool URI_FUNC(IsWellFormedUserInfo)(const URI_CHAR * first,
                 return URI_FALSE;
             }
             switch (first[1]) {
-            case URI_SET_HEXDIG:
+            case URI_SET_HEXDIG(_UT):
                 break;
             default:
                 return URI_FALSE;
             }
             switch (first[2]) {
-            case URI_SET_HEXDIG:
+            case URI_SET_HEXDIG(_UT):
                 break;
             default:
                 return URI_FALSE;
@@ -192,7 +99,7 @@ UriBool URI_FUNC(IsWellFormedUserInfo)(const URI_CHAR * first,
             first += 2;
             break;

-        case URI_SET_SUB_DELIMS:
+        case URI_SET_SUB_DELIMS(_UT):
             break;

         /* ":" */
diff --git a/ext/uri/uriparser/src/UriSets.h b/ext/uri/uriparser/src/UriSets.h
new file mode 100644
index 00000000000..a6a2c46a14d
--- /dev/null
+++ b/ext/uri/uriparser/src/UriSets.h
@@ -0,0 +1,174 @@
+/*
+ * uriparser - RFC 3986 URI parsing library
+ *
+ * Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source  and binary forms, with or without
+ * modification, are permitted provided  that the following conditions
+ * are met:
+ *
+ *     1. Redistributions  of  source  code   must  retain  the  above
+ *        copyright notice, this list  of conditions and the following
+ *        disclaimer.
+ *
+ *     2. Redistributions  in binary  form  must  reproduce the  above
+ *        copyright notice, this list  of conditions and the following
+ *        disclaimer  in  the  documentation  and/or  other  materials
+ *        provided with the distribution.
+ *
+ *     3. Neither the  name of the  copyright holder nor the  names of
+ *        its contributors may be used  to endorse or promote products
+ *        derived from  this software  without specific  prior written
+ *        permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND  ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING, BUT NOT
+ * LIMITED TO,  THE IMPLIED WARRANTIES OF  MERCHANTABILITY AND FITNESS
+ * FOR  A  PARTICULAR  PURPOSE  ARE  DISCLAIMED.  IN  NO  EVENT  SHALL
+ * THE  COPYRIGHT HOLDER  OR CONTRIBUTORS  BE LIABLE  FOR ANY  DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA,  OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT  LIABILITY,  OR  TORT (INCLUDING  NEGLIGENCE  OR  OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file UriSets.h
+ * Holds character set definitions.
+ */
+
+// NOTE: We cannot use a regular include-once guard here because the
+//       file must support being included twice, e.g. from file UriParse.c.
+#if !defined(URI_SET_DIGIT)
+
+// clang-format off
+#  define URI_SET_DIGIT(ut) \
+       ut('0'):  \
+  case ut('1'): \
+  /* clang-format on */ \
+  case ut('2'): \
+  case ut('3'): \
+  case ut('4'): \
+  case ut('5'): \
+  case ut('6'): \
+  case ut('7'): \
+  case ut('8'): \
+  case ut('9')
+
+// clang-format off
+#  define URI_SET_HEX_LETTER_LOWER(ut) \
+       ut('a'): \
+  case ut('b'): \
+  /* clang-format on */ \
+  case ut('c'): \
+  case ut('d'): \
+  case ut('e'): \
+  case ut('f')
+
+// clang-format off
+#  define URI_SET_HEX_LETTER_UPPER(ut) \
+       ut('A'): \
+  case ut('B'): \
+  /* clang-format on */ \
+  case ut('C'): \
+  case ut('D'): \
+  case ut('E'): \
+  case ut('F')
+
+// clang-format off
+#  define URI_SET_HEXDIG(ut) \
+       URI_SET_DIGIT(ut): \
+  case URI_SET_HEX_LETTER_LOWER(ut): \
+  /* clang-format on */ \
+  case URI_SET_HEX_LETTER_UPPER(ut)
+
+// clang-format off
+#  define URI_SET_ALPHA(ut) \
+       URI_SET_HEX_LETTER_UPPER(ut): \
+  case URI_SET_HEX_LETTER_LOWER(ut): \
+  /* clang-format on */ \
+  case ut('g'): \
+  case ut('G'): \
+  case ut('h'): \
+  case ut('H'): \
+  case ut('i'): \
+  case ut('I'): \
+  case ut('j'): \
+  case ut('J'): \
+  case ut('k'): \
+  case ut('K'): \
+  case ut('l'): \
+  case ut('L'): \
+  case ut('m'): \
+  case ut('M'): \
+  case ut('n'): \
+  case ut('N'): \
+  case ut('o'): \
+  case ut('O'): \
+  case ut('p'): \
+  case ut('P'): \
+  case ut('q'): \
+  case ut('Q'): \
+  case ut('r'): \
+  case ut('R'): \
+  case ut('s'): \
+  case ut('S'): \
+  case ut('t'): \
+  case ut('T'): \
+  case ut('u'): \
+  case ut('U'): \
+  case ut('v'): \
+  case ut('V'): \
+  case ut('w'): \
+  case ut('W'): \
+  case ut('x'): \
+  case ut('X'): \
+  case ut('y'): \
+  case ut('Y'): \
+  case ut('z'): \
+  case ut('Z')
+
+// clang-format off
+#  define URI_SET_SUB_DELIMS(ut) \
+       ut('!'): \
+  case ut('$'): \
+  /* clang-format on */ \
+  case ut('&'): \
+  case ut('\''): \
+  case ut('('): \
+  case ut(')'): \
+  case ut('*'): \
+  case ut('+'): \
+  case ut(','): \
+  case ut(';'): \
+  case ut('=')
+
+// clang-format off
+#  define URI_SET_UNRESERVED(ut) \
+       URI_SET_ALPHA(ut): \
+  case URI_SET_DIGIT(ut): \
+  /* clang-format on */ \
+  case ut('-'): \
+  case ut('.'): \
+  case ut('_'): \
+  case ut('~')
+
+// clang-format off
+#  define URI_SET_PCHAR_WITHOUT_PERCENT(ut) \
+       URI_SET_UNRESERVED(ut): \
+  case URI_SET_SUB_DELIMS(ut): \
+  /* clang-format on */ \
+  case ut(':'): \
+  case ut('@')
+
+// clang-format off
+#  define URI_SET_PCHAR(ut) \
+       URI_SET_PCHAR_WITHOUT_PERCENT(ut): \
+  case ut('%')
+/* clang-format on */
+
+#endif  // ! defined(URI_SET_DIGIT)