Commit 8fd69e15e0b for php.net
commit 8fd69e15e0bc623fe317e21261d88e176de43bbf
Author: Tim Düsterhus <tim@bastelstu.be>
Date: Tue Dec 9 11:52:42 2025 +0100
uri: Update to uriparser-0.9.9-79-gf47a7f0 (#20671)
This is in preparation of importing a fix for the uriparser/uriparser#282
security issue, which will likely depend on this refactoring to cleanly apply.
diff --git a/ext/uri/uriparser/src/UriCommon.c b/ext/uri/uriparser/src/UriCommon.c
index a594fcceed7..3644e8828f3 100644
--- a/ext/uri/uriparser/src/UriCommon.c
+++ b/ext/uri/uriparser/src/UriCommon.c
@@ -62,6 +62,7 @@
# ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include "UriCommon.h"
+# include "UriSets.h"
# endif
# include <assert.h>
@@ -468,32 +469,11 @@ UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri,
unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig) {
switch (hexdig) {
- case _UT('0'):
- case _UT('1'):
- case _UT('2'):
- case _UT('3'):
- case _UT('4'):
- case _UT('5'):
- case _UT('6'):
- case _UT('7'):
- case _UT('8'):
- case _UT('9'):
+ case URI_SET_DIGIT(_UT):
return (unsigned char)(9 + hexdig - _UT('9'));
-
- case _UT('a'):
- case _UT('b'):
- case _UT('c'):
- case _UT('d'):
- case _UT('e'):
- case _UT('f'):
+ case URI_SET_HEX_LETTER_LOWER(_UT):
return (unsigned char)(15 + hexdig - _UT('f'));
-
- case _UT('A'):
- case _UT('B'):
- case _UT('C'):
- case _UT('D'):
- case _UT('E'):
- case _UT('F'):
+ case URI_SET_HEX_LETTER_UPPER(_UT):
return (unsigned char)(15 + hexdig - _UT('F'));
default:
diff --git a/ext/uri/uriparser/src/UriEscape.c b/ext/uri/uriparser/src/UriEscape.c
index b23050783fb..a1763f97153 100644
--- a/ext/uri/uriparser/src/UriEscape.c
+++ b/ext/uri/uriparser/src/UriEscape.c
@@ -62,6 +62,7 @@
# ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include "UriCommon.h"
+# include "UriSets.h"
# endif
URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out, UriBool spaceToPlus,
@@ -108,72 +109,7 @@ URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, const URI_CHAR * inAfter
prevWasCr = URI_FALSE;
break;
- case _UT('a'): /* ALPHA */
- case _UT('A'):
- case _UT('b'):
- case _UT('B'):
- case _UT('c'):
- case _UT('C'):
- case _UT('d'):
- case _UT('D'):
- case _UT('e'):
- case _UT('E'):
- case _UT('f'):
- case _UT('F'):
- case _UT('g'):
- case _UT('G'):
- case _UT('h'):
- case _UT('H'):
- case _UT('i'):
- case _UT('I'):
- case _UT('j'):
- case _UT('J'):
- case _UT('k'):
- case _UT('K'):
- case _UT('l'):
- case _UT('L'):
- case _UT('m'):
- case _UT('M'):
- case _UT('n'):
- case _UT('N'):
- case _UT('o'):
- case _UT('O'):
- case _UT('p'):
- case _UT('P'):
- case _UT('q'):
- case _UT('Q'):
- case _UT('r'):
- case _UT('R'):
- case _UT('s'):
- case _UT('S'):
- case _UT('t'):
- case _UT('T'):
- case _UT('u'):
- case _UT('U'):
- case _UT('v'):
- case _UT('V'):
- case _UT('w'):
- case _UT('W'):
- case _UT('x'):
- case _UT('X'):
- case _UT('y'):
- case _UT('Y'):
- case _UT('z'):
- case _UT('Z'):
- case _UT('0'): /* DIGIT */
- case _UT('1'):
- case _UT('2'):
- case _UT('3'):
- case _UT('4'):
- case _UT('5'):
- case _UT('6'):
- case _UT('7'):
- case _UT('8'):
- case _UT('9'):
- case _UT('-'): /* "-" / "." / "_" / "~" */
- case _UT('.'):
- case _UT('_'):
- case _UT('~'):
+ case URI_SET_UNRESERVED(_UT):
/* Copy unmodified */
write[0] = read[0];
write++;
@@ -263,51 +199,9 @@ const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout, UriBool plusToSpa
case _UT('%'):
switch (read[1]) {
- case _UT('0'):
- case _UT('1'):
- case _UT('2'):
- case _UT('3'):
- case _UT('4'):
- case _UT('5'):
- case _UT('6'):
- case _UT('7'):
- case _UT('8'):
- case _UT('9'):
- case _UT('a'):
- case _UT('b'):
- case _UT('c'):
- case _UT('d'):
- case _UT('e'):
- case _UT('f'):
- case _UT('A'):
- case _UT('B'):
- case _UT('C'):
- case _UT('D'):
- case _UT('E'):
- case _UT('F'):
+ case URI_SET_HEXDIG(_UT):
switch (read[2]) {
- case _UT('0'):
- case _UT('1'):
- case _UT('2'):
- case _UT('3'):
- case _UT('4'):
- case _UT('5'):
- case _UT('6'):
- case _UT('7'):
- case _UT('8'):
- case _UT('9'):
- case _UT('a'):
- case _UT('b'):
- case _UT('c'):
- case _UT('d'):
- case _UT('e'):
- case _UT('f'):
- case _UT('A'):
- case _UT('B'):
- case _UT('C'):
- case _UT('D'):
- case _UT('E'):
- case _UT('F'): {
+ case URI_SET_HEXDIG(_UT): {
/* Percent group found */
const unsigned char left = URI_FUNC(HexdigToInt)(read[1]);
const unsigned char right = URI_FUNC(HexdigToInt)(read[2]);
diff --git a/ext/uri/uriparser/src/UriIp4.c b/ext/uri/uriparser/src/UriIp4.c
index 162a75a556d..ae61141f7a3 100644
--- a/ext/uri/uriparser/src/UriIp4.c
+++ b/ext/uri/uriparser/src/UriIp4.c
@@ -68,6 +68,7 @@
# include <uriparser/UriIp4.h>
# include "UriIp4Base.h"
# include <uriparser/UriBase.h>
+# include "UriSets.h"
# endif
/* Prototypes */
@@ -194,16 +195,7 @@ URI_FUNC(ParseDecOctetOne)(UriIp4Parser * parser, const URI_CHAR * first,
}
switch (*first) {
- case _UT('0'):
- case _UT('1'):
- case _UT('2'):
- case _UT('3'):
- case _UT('4'):
- case _UT('5'):
- case _UT('6'):
- case _UT('7'):
- case _UT('8'):
- case _UT('9'):
+ case URI_SET_DIGIT(_UT):
uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9')));
return (const URI_CHAR *)URI_FUNC(ParseDecOctetThree)(parser, first + 1,
afterLast);
@@ -272,16 +264,7 @@ URI_FUNC(ParseDecOctetThree)(UriIp4Parser * parser, const URI_CHAR * first,
}
switch (*first) {
- case _UT('0'):
- case _UT('1'):
- case _UT('2'):
- case _UT('3'):
- case _UT('4'):
- case _UT('5'):
- case _UT('6'):
- case _UT('7'):
- case _UT('8'):
- case _UT('9'):
+ case URI_SET_DIGIT(_UT):
uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9')));
return first + 1;
diff --git a/ext/uri/uriparser/src/UriParse.c b/ext/uri/uriparser/src/UriParse.c
index db48b380464..ed851e94fbd 100644
--- a/ext/uri/uriparser/src/UriParse.c
+++ b/ext/uri/uriparser/src/UriParse.c
@@ -71,82 +71,9 @@
# include "UriCommon.h"
# include "UriMemory.h"
# include "UriParseBase.h"
+# include "UriSets.h"
# endif
-# define URI_SET_DIGIT \
- _UT('0') : case _UT('1'): \
- case _UT('2'): \
- case _UT('3'): \
- case _UT('4'): \
- case _UT('5'): \
- case _UT('6'): \
- case _UT('7'): \
- case _UT('8'): \
- case _UT('9')
-
-# define URI_SET_HEX_LETTER_UPPER \
- _UT('A') : case _UT('B'): \
- case _UT('C'): \
- case _UT('D'): \
- case _UT('E'): \
- case _UT('F')
-
-# define URI_SET_HEX_LETTER_LOWER \
- _UT('a') : case _UT('b'): \
- case _UT('c'): \
- case _UT('d'): \
- case _UT('e'): \
- case _UT('f')
-
-# define URI_SET_HEXDIG \
- URI_SET_DIGIT: \
- case URI_SET_HEX_LETTER_UPPER: \
- case URI_SET_HEX_LETTER_LOWER
-
-# define URI_SET_ALPHA \
- URI_SET_HEX_LETTER_UPPER: \
- case URI_SET_HEX_LETTER_LOWER: \
- case _UT('g'): \
- case _UT('G'): \
- case _UT('h'): \
- case _UT('H'): \
- case _UT('i'): \
- case _UT('I'): \
- case _UT('j'): \
- case _UT('J'): \
- case _UT('k'): \
- case _UT('K'): \
- case _UT('l'): \
- case _UT('L'): \
- case _UT('m'): \
- case _UT('M'): \
- case _UT('n'): \
- case _UT('N'): \
- case _UT('o'): \
- case _UT('O'): \
- case _UT('p'): \
- case _UT('P'): \
- case _UT('q'): \
- case _UT('Q'): \
- case _UT('r'): \
- case _UT('R'): \
- case _UT('s'): \
- case _UT('S'): \
- case _UT('t'): \
- case _UT('T'): \
- case _UT('u'): \
- case _UT('U'): \
- case _UT('v'): \
- case _UT('V'): \
- case _UT('w'): \
- case _UT('W'): \
- case _UT('x'): \
- case _UT('X'): \
- case _UT('y'): \
- case _UT('Y'): \
- case _UT('z'): \
- case _UT('Z')
-
static const URI_CHAR * URI_FUNC(ParseAuthority)(URI_TYPE(ParserState) * state,
const URI_CHAR * first,
const URI_CHAR * afterLast,
@@ -340,26 +267,7 @@ static URI_INLINE const URI_CHAR * URI_FUNC(ParseAuthority)(URI_TYPE(ParserState
return URI_FUNC(ParseAuthorityTwo)(state, afterIpLit2, afterLast);
}
- case _UT('!'):
- case _UT('$'):
- case _UT('%'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('-'):
- case _UT('*'):
- case _UT(','):
- case _UT('.'):
- case _UT(':'):
- case _UT(';'):
- case _UT('@'):
- case _UT('\''):
- case _UT('_'):
- case _UT('~'):
- case _UT('+'):
- case _UT('='):
- case URI_SET_DIGIT:
- case URI_SET_ALPHA:
+ case URI_SET_PCHAR(_UT):
state->uri->userInfo.first = first; /* USERINFO BEGIN */
return URI_FUNC(ParseOwnHostUserInfoNz)(state, first, afterLast, memory);
@@ -411,7 +319,7 @@ static const URI_CHAR * URI_FUNC(ParseHexZero)(URI_TYPE(ParserState) * state,
}
switch (*first) {
- case URI_SET_HEXDIG:
+ case URI_SET_HEXDIG(_UT):
return URI_FUNC(ParseHexZero)(state, first + 1, afterLast);
default:
@@ -433,26 +341,7 @@ static URI_INLINE const URI_CHAR * URI_FUNC(ParseHierPart)(URI_TYPE(ParserState)
}
switch (*first) {
- case _UT('!'):
- case _UT('$'):
- case _UT('%'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('-'):
- case _UT('*'):
- case _UT(','):
- case _UT('.'):
- case _UT(':'):
- case _UT(';'):
- case _UT('@'):
- case _UT('\''):
- case _UT('_'):
- case _UT('~'):
- case _UT('+'):
- case _UT('='):
- case URI_SET_DIGIT:
- case URI_SET_ALPHA:
+ case URI_SET_PCHAR(_UT):
return URI_FUNC(ParsePathRootless)(state, first, afterLast, memory);
case _UT('/'):
@@ -478,24 +367,9 @@ static const URI_CHAR * URI_FUNC(ParseIpFutLoop)(URI_TYPE(ParserState) * state,
}
switch (*first) {
- case _UT('!'):
- case _UT('$'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('-'):
- case _UT('*'):
- case _UT(','):
- case _UT('.'):
case _UT(':'):
- case _UT(';'):
- case _UT('\''):
- case _UT('_'):
- case _UT('~'):
- case _UT('+'):
- case _UT('='):
- case URI_SET_DIGIT:
- case URI_SET_ALPHA:
+ case URI_SET_SUB_DELIMS(_UT):
+ case URI_SET_UNRESERVED(_UT):
return URI_FUNC(ParseIpFutStopGo)(state, first + 1, afterLast, memory);
default:
@@ -517,24 +391,9 @@ static const URI_CHAR * URI_FUNC(ParseIpFutStopGo)(URI_TYPE(ParserState) * state
}
switch (*first) {
- case _UT('!'):
- case _UT('$'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('-'):
- case _UT('*'):
- case _UT(','):
- case _UT('.'):
case _UT(':'):
- case _UT(';'):
- case _UT('\''):
- case _UT('_'):
- case _UT('~'):
- case _UT('+'):
- case _UT('='):
- case URI_SET_DIGIT:
- case URI_SET_ALPHA:
+ case URI_SET_SUB_DELIMS(_UT):
+ case URI_SET_UNRESERVED(_UT):
return URI_FUNC(ParseIpFutLoop)(state, first, afterLast, memory);
default:
@@ -568,7 +427,7 @@ static const URI_CHAR * URI_FUNC(ParseIpFuture)(URI_TYPE(ParserState) * state,
}
switch (first[1]) {
- case URI_SET_HEXDIG: {
+ case URI_SET_HEXDIG(_UT): {
const URI_CHAR * afterIpFutLoop;
const URI_CHAR * const afterHexZero =
URI_FUNC(ParseHexZero)(state, first + 2, afterLast);
@@ -643,7 +502,7 @@ static URI_INLINE const URI_CHAR * URI_FUNC(ParseIpLit2)(URI_TYPE(ParserState) *
case _UT(':'):
case _UT(']'):
- case URI_SET_HEXDIG:
+ case URI_SET_HEXDIG(_UT):
state->uri->hostData.ip6 = memory->malloc(
memory, 1 * sizeof(UriIp6)); /* Freed when stopping on parse error */
if (state->uri->hostData.ip6 == NULL) {
@@ -685,7 +544,7 @@ static const URI_CHAR * URI_FUNC(ParseIPv6address2)(URI_TYPE(ParserState) * stat
/* Eat rest of IPv4 address */
for (;;) {
switch (*first) {
- case URI_SET_DIGIT:
+ case URI_SET_DIGIT(_UT):
if (digitCount == 4) {
URI_FUNC(StopSyntax)(state, first, memory);
return NULL;
@@ -780,7 +639,7 @@ static const URI_CHAR * URI_FUNC(ParseIPv6address2)(URI_TYPE(ParserState) * stat
int walking = 1;
do {
switch (*first) {
- case URI_SET_HEX_LETTER_LOWER:
+ case URI_SET_HEX_LETTER_LOWER(_UT):
letterAmong = 1;
if (digitCount == 4) {
URI_FUNC(StopSyntax)(state, first, memory);
@@ -790,7 +649,7 @@ static const URI_CHAR * URI_FUNC(ParseIPv6address2)(URI_TYPE(ParserState) * stat
digitCount++;
break;
- case URI_SET_HEX_LETTER_UPPER:
+ case URI_SET_HEX_LETTER_UPPER(_UT):
letterAmong = 1;
if (digitCount == 4) {
URI_FUNC(StopSyntax)(state, first, memory);
@@ -800,7 +659,7 @@ static const URI_CHAR * URI_FUNC(ParseIPv6address2)(URI_TYPE(ParserState) * stat
digitCount++;
break;
- case URI_SET_DIGIT:
+ case URI_SET_DIGIT(_UT):
if (digitCount == 4) {
URI_FUNC(StopSyntax)(state, first, memory);
return NULL;
@@ -995,23 +854,8 @@ static const URI_CHAR * URI_FUNC(ParseMustBeSegmentNzNc)(URI_TYPE(ParserState) *
}
case _UT('@'):
- case _UT('!'):
- case _UT('$'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('*'):
- case _UT(','):
- case _UT(';'):
- case _UT('\''):
- case _UT('+'):
- case _UT('='):
- case _UT('-'):
- case _UT('.'):
- case _UT('_'):
- case _UT('~'):
- case URI_SET_DIGIT:
- case URI_SET_ALPHA:
+ case URI_SET_SUB_DELIMS(_UT):
+ case URI_SET_UNRESERVED(_UT):
return URI_FUNC(ParseMustBeSegmentNzNc)(state, first + 1, afterLast, memory);
case _UT('/'): {
@@ -1118,24 +962,9 @@ static const URI_CHAR * URI_FUNC(ParseOwnHost2)(URI_TYPE(ParserState) * state,
}
switch (*first) {
- case _UT('!'):
- case _UT('$'):
case _UT('%'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('-'):
- case _UT('*'):
- case _UT(','):
- case _UT('.'):
- case _UT(';'):
- case _UT('\''):
- case _UT('_'):
- case _UT('~'):
- case _UT('+'):
- case _UT('='):
- case URI_SET_DIGIT:
- case URI_SET_ALPHA: {
+ case URI_SET_SUB_DELIMS(_UT):
+ case URI_SET_UNRESERVED(_UT): {
const URI_CHAR * const afterPctSubUnres =
URI_FUNC(ParsePctSubUnres)(state, first, afterLast, memory);
if (afterPctSubUnres == NULL) {
@@ -1193,26 +1022,7 @@ URI_FUNC(ParseOwnHostUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * f
}
switch (*first) {
- case _UT('!'):
- case _UT('$'):
- case _UT('%'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('-'):
- case _UT('*'):
- case _UT(','):
- case _UT('.'):
- case _UT(':'):
- case _UT(';'):
- case _UT('@'):
- case _UT('\''):
- case _UT('_'):
- case _UT('~'):
- case _UT('+'):
- case _UT('='):
- case URI_SET_DIGIT:
- case URI_SET_ALPHA:
+ case URI_SET_PCHAR(_UT):
return URI_FUNC(ParseOwnHostUserInfoNz)(state, first, afterLast, memory);
default:
@@ -1239,24 +1049,9 @@ static const URI_CHAR * URI_FUNC(ParseOwnHostUserInfoNz)(URI_TYPE(ParserState) *
}
switch (*first) {
- case _UT('!'):
- case _UT('$'):
case _UT('%'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('-'):
- case _UT('*'):
- case _UT(','):
- case _UT('.'):
- case _UT(';'):
- case _UT('\''):
- case _UT('_'):
- case _UT('~'):
- case _UT('+'):
- case _UT('='):
- case URI_SET_DIGIT:
- case URI_SET_ALPHA: {
+ case URI_SET_SUB_DELIMS(_UT):
+ case URI_SET_UNRESERVED(_UT): {
const URI_CHAR * const afterPctSubUnres =
URI_FUNC(ParsePctSubUnres)(state, first, afterLast, memory);
if (afterPctSubUnres == NULL) {
@@ -1331,19 +1126,7 @@ static const URI_CHAR * URI_FUNC(ParseOwnPortUserInfo)(URI_TYPE(ParserState) * s
}
switch (*first) {
- /* begin sub-delims */
- case _UT('!'):
- case _UT('$'):
- case _UT('&'):
- case _UT('\''):
- case _UT('('):
- case _UT(')'):
- case _UT('*'):
- case _UT('+'):
- case _UT(','):
- case _UT(';'):
- case _UT('='):
- /* end sub-delims */
+ case URI_SET_SUB_DELIMS(_UT):
/* begin unreserved (except alpha and digit) */
case _UT('-'):
case _UT('.'):
@@ -1351,12 +1134,12 @@ static const URI_CHAR * URI_FUNC(ParseOwnPortUserInfo)(URI_TYPE(ParserState) * s
case _UT('~'):
/* end unreserved (except alpha and digit) */
case _UT(':'):
- case URI_SET_ALPHA:
+ case URI_SET_ALPHA(_UT):
state->uri->hostText.afterLast = NULL; /* Not a host, reset */
state->uri->portText.first = NULL; /* Not a port, reset */
return URI_FUNC(ParseOwnUserInfo)(state, first + 1, afterLast, memory);
- case URI_SET_DIGIT:
+ case URI_SET_DIGIT(_UT):
return URI_FUNC(ParseOwnPortUserInfo)(state, first + 1, afterLast, memory);
case _UT('%'):
@@ -1399,24 +1182,9 @@ static const URI_CHAR * URI_FUNC(ParseOwnUserInfo)(URI_TYPE(ParserState) * state
}
switch (*first) {
- case _UT('!'):
- case _UT('$'):
case _UT('%'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('-'):
- case _UT('*'):
- case _UT(','):
- case _UT('.'):
- case _UT(';'):
- case _UT('\''):
- case _UT('_'):
- case _UT('~'):
- case _UT('+'):
- case _UT('='):
- case URI_SET_DIGIT:
- case URI_SET_ALPHA: {
+ case URI_SET_SUB_DELIMS(_UT):
+ case URI_SET_UNRESERVED(_UT): {
const URI_CHAR * const afterPctSubUnres =
URI_FUNC(ParsePctSubUnres)(state, first, afterLast, memory);
if (afterPctSubUnres == NULL) {
@@ -1522,26 +1290,7 @@ URI_FUNC(ParsePathAbsNoLeadSlash)(URI_TYPE(ParserState) * state, const URI_CHAR
}
switch (*first) {
- case _UT('!'):
- case _UT('$'):
- case _UT('%'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('-'):
- case _UT('*'):
- case _UT(','):
- case _UT('.'):
- case _UT(':'):
- case _UT(';'):
- case _UT('@'):
- case _UT('\''):
- case _UT('_'):
- case _UT('~'):
- case _UT('+'):
- case _UT('='):
- case URI_SET_DIGIT:
- case URI_SET_ALPHA: {
+ case URI_SET_PCHAR(_UT): {
const URI_CHAR * const afterSegmentNz =
URI_FUNC(ParseSegmentNz)(state, first, afterLast, memory);
if (afterSegmentNz == NULL) {
@@ -1600,25 +1349,7 @@ static const URI_CHAR * URI_FUNC(ParsePchar)(URI_TYPE(ParserState) * state,
case _UT('%'):
return URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory);
- case _UT(':'):
- case _UT('@'):
- case _UT('!'):
- case _UT('$'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('*'):
- case _UT(','):
- case _UT(';'):
- case _UT('\''):
- case _UT('+'):
- case _UT('='):
- case _UT('-'):
- case _UT('.'):
- case _UT('_'):
- case _UT('~'):
- case URI_SET_DIGIT:
- case URI_SET_ALPHA:
+ case URI_SET_PCHAR_WITHOUT_PERCENT(_UT):
return first + 1;
default:
@@ -1652,14 +1383,14 @@ static const URI_CHAR * URI_FUNC(ParsePctEncoded)(URI_TYPE(ParserState) * state,
}
switch (first[1]) {
- case URI_SET_HEXDIG:
+ case URI_SET_HEXDIG(_UT):
if (afterLast - first < 3) {
URI_FUNC(StopSyntax)(state, afterLast, memory);
return NULL;
}
switch (first[2]) {
- case URI_SET_HEXDIG:
+ case URI_SET_HEXDIG(_UT):
return first + 3;
default:
@@ -1698,23 +1429,8 @@ static const URI_CHAR * URI_FUNC(ParsePctSubUnres)(URI_TYPE(ParserState) * state
case _UT('%'):
return URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory);
- case _UT('!'):
- case _UT('$'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('*'):
- case _UT(','):
- case _UT(';'):
- case _UT('\''):
- case _UT('+'):
- case _UT('='):
- case _UT('-'):
- case _UT('.'):
- case _UT('_'):
- case _UT('~'):
- case URI_SET_DIGIT:
- case URI_SET_ALPHA:
+ case URI_SET_SUB_DELIMS(_UT):
+ case URI_SET_UNRESERVED(_UT):
return first + 1;
default:
@@ -1735,7 +1451,7 @@ static const URI_CHAR * URI_FUNC(ParsePort)(URI_TYPE(ParserState) * state,
}
switch (*first) {
- case URI_SET_DIGIT:
+ case URI_SET_DIGIT(_UT):
return URI_FUNC(ParsePort)(state, first + 1, afterLast);
default:
@@ -1758,26 +1474,7 @@ static const URI_CHAR * URI_FUNC(ParseQueryFrag)(URI_TYPE(ParserState) * state,
}
switch (*first) {
- case _UT('!'):
- case _UT('$'):
- case _UT('%'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('-'):
- case _UT('*'):
- case _UT(','):
- case _UT('.'):
- case _UT(':'):
- case _UT(';'):
- case _UT('@'):
- case _UT('\''):
- case _UT('_'):
- case _UT('~'):
- case _UT('+'):
- case _UT('='):
- case URI_SET_DIGIT:
- case URI_SET_ALPHA: {
+ case URI_SET_PCHAR(_UT): {
const URI_CHAR * const afterPchar =
URI_FUNC(ParsePchar)(state, first, afterLast, memory);
if (afterPchar == NULL) {
@@ -1808,26 +1505,7 @@ static const URI_CHAR * URI_FUNC(ParseSegment)(URI_TYPE(ParserState) * state,
}
switch (*first) {
- case _UT('!'):
- case _UT('$'):
- case _UT('%'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('-'):
- case _UT('*'):
- case _UT(','):
- case _UT('.'):
- case _UT(':'):
- case _UT(';'):
- case _UT('@'):
- case _UT('\''):
- case _UT('_'):
- case _UT('~'):
- case _UT('+'):
- case _UT('='):
- case URI_SET_DIGIT:
- case URI_SET_ALPHA: {
+ case URI_SET_PCHAR(_UT): {
const URI_CHAR * const afterPchar =
URI_FUNC(ParsePchar)(state, first, afterLast, memory);
if (afterPchar == NULL) {
@@ -1906,8 +1584,8 @@ static const URI_CHAR * URI_FUNC(ParseSegmentNzNcOrScheme2)(URI_TYPE(ParserState
case _UT('.'):
case _UT('+'):
case _UT('-'):
- case URI_SET_ALPHA:
- case URI_SET_DIGIT:
+ case URI_SET_ALPHA(_UT):
+ case URI_SET_DIGIT(_UT):
return URI_FUNC(ParseSegmentNzNcOrScheme2)(state, first + 1, afterLast, memory);
case _UT('%'): {
@@ -2002,22 +1680,12 @@ static const URI_CHAR * URI_FUNC(ParseUriReference)(URI_TYPE(ParserState) * stat
}
switch (*first) {
- case URI_SET_ALPHA:
+ case URI_SET_ALPHA(_UT):
state->uri->scheme.first = first; /* SCHEME BEGIN */
return URI_FUNC(ParseSegmentNzNcOrScheme2)(state, first + 1, afterLast, memory);
- case URI_SET_DIGIT:
- case _UT('!'):
- case _UT('$'):
- case _UT('&'):
- case _UT('('):
- case _UT(')'):
- case _UT('*'):
- case _UT(','):
- case _UT(';'):
- case _UT('\''):
- case _UT('+'):
- case _UT('='):
+ case URI_SET_DIGIT(_UT):
+ case URI_SET_SUB_DELIMS(_UT):
case _UT('.'):
case _UT('_'):
case _UT('~'):
diff --git a/ext/uri/uriparser/src/UriSetFragment.c b/ext/uri/uriparser/src/UriSetFragment.c
index b9c5c53b042..4479391d859 100644
--- a/ext/uri/uriparser/src/UriSetFragment.c
+++ b/ext/uri/uriparser/src/UriSetFragment.c
@@ -62,104 +62,11 @@
# include <uriparser/Uri.h>
# include "UriCommon.h"
# include "UriMemory.h"
+# include "UriSets.h"
# endif
# include <assert.h>
-# define URI_SET_DIGIT \
- _UT('0') : case _UT('1'): \
- case _UT('2'): \
- case _UT('3'): \
- case _UT('4'): \
- case _UT('5'): \
- case _UT('6'): \
- case _UT('7'): \
- case _UT('8'): \
- case _UT('9')
-
-# define URI_SET_HEX_LETTER_UPPER \
- _UT('A') : case _UT('B'): \
- case _UT('C'): \
- case _UT('D'): \
- case _UT('E'): \
- case _UT('F')
-
-# define URI_SET_HEX_LETTER_LOWER \
- _UT('a') : case _UT('b'): \
- case _UT('c'): \
- case _UT('d'): \
- case _UT('e'): \
- case _UT('f')
-
-# define URI_SET_HEXDIG \
- URI_SET_DIGIT: \
- case URI_SET_HEX_LETTER_UPPER: \
- case URI_SET_HEX_LETTER_LOWER
-
-# define URI_SET_ALPHA \
- URI_SET_HEX_LETTER_UPPER: \
- case URI_SET_HEX_LETTER_LOWER: \
- case _UT('g'): \
- case _UT('G'): \
- case _UT('h'): \
- case _UT('H'): \
- case _UT('i'): \
- case _UT('I'): \
- case _UT('j'): \
- case _UT('J'): \
- case _UT('k'): \
- case _UT('K'): \
- case _UT('l'): \
- case _UT('L'): \
- case _UT('m'): \
- case _UT('M'): \
- case _UT('n'): \
- case _UT('N'): \
- case _UT('o'): \
- case _UT('O'): \
- case _UT('p'): \
- case _UT('P'): \
- case _UT('q'): \
- case _UT('Q'): \
- case _UT('r'): \
- case _UT('R'): \
- case _UT('s'): \
- case _UT('S'): \
- case _UT('t'): \
- case _UT('T'): \
- case _UT('u'): \
- case _UT('U'): \
- case _UT('v'): \
- case _UT('V'): \
- case _UT('w'): \
- case _UT('W'): \
- case _UT('x'): \
- case _UT('X'): \
- case _UT('y'): \
- case _UT('Y'): \
- case _UT('z'): \
- case _UT('Z')
-
-# define URI_SET_SUB_DELIMS \
- _UT('!') : case _UT('$'): \
- case _UT('&'): \
- case _UT('\''): \
- case _UT('('): \
- case _UT(')'): \
- case _UT('*'): \
- case _UT('+'): \
- case _UT(','): \
- case _UT(';'): \
- case _UT('=')
-
-# define URI_SET_UNRESERVED \
- URI_SET_ALPHA: \
- case URI_SET_DIGIT: \
- case _UT('-'): \
- case _UT('.'): \
- case _UT('_'): \
- case _UT('~')
-
UriBool URI_FUNC(IsWellFormedFragment)(const URI_CHAR * first,
const URI_CHAR * afterLast) {
if ((first == NULL) || (afterLast == NULL)) {
@@ -173,7 +80,7 @@ UriBool URI_FUNC(IsWellFormedFragment)(const URI_CHAR * first,
*/
while (first < afterLast) {
switch (first[0]) {
- case URI_SET_UNRESERVED:
+ case URI_SET_PCHAR_WITHOUT_PERCENT(_UT):
break;
/* pct-encoded */
@@ -182,13 +89,13 @@ UriBool URI_FUNC(IsWellFormedFragment)(const URI_CHAR * first,
return URI_FALSE;
}
switch (first[1]) {
- case URI_SET_HEXDIG:
+ case URI_SET_HEXDIG(_UT):
break;
default:
return URI_FALSE;
}
switch (first[2]) {
- case URI_SET_HEXDIG:
+ case URI_SET_HEXDIG(_UT):
break;
default:
return URI_FALSE;
@@ -196,12 +103,6 @@ UriBool URI_FUNC(IsWellFormedFragment)(const URI_CHAR * first,
first += 2;
break;
- case URI_SET_SUB_DELIMS:
- break;
-
- /* ":" / "@" and "/" / "?" */
- case _UT(':'):
- case _UT('@'):
case _UT('/'):
case _UT('?'):
break;
diff --git a/ext/uri/uriparser/src/UriSetHostRegName.c b/ext/uri/uriparser/src/UriSetHostRegName.c
index 61694b248ad..01bc4e47f16 100644
--- a/ext/uri/uriparser/src/UriSetHostRegName.c
+++ b/ext/uri/uriparser/src/UriSetHostRegName.c
@@ -63,102 +63,9 @@
# include "UriMemory.h"
# include "UriSetHostBase.h"
# include "UriSetHostCommon.h"
+# include "UriSets.h"
# endif
-# define URI_SET_DIGIT \
- _UT('0') : case _UT('1'): \
- case _UT('2'): \
- case _UT('3'): \
- case _UT('4'): \
- case _UT('5'): \
- case _UT('6'): \
- case _UT('7'): \
- case _UT('8'): \
- case _UT('9')
-
-# define URI_SET_HEX_LETTER_UPPER \
- _UT('A') : case _UT('B'): \
- case _UT('C'): \
- case _UT('D'): \
- case _UT('E'): \
- case _UT('F')
-
-# define URI_SET_HEX_LETTER_LOWER \
- _UT('a') : case _UT('b'): \
- case _UT('c'): \
- case _UT('d'): \
- case _UT('e'): \
- case _UT('f')
-
-# define URI_SET_HEXDIG \
- URI_SET_DIGIT: \
- case URI_SET_HEX_LETTER_UPPER: \
- case URI_SET_HEX_LETTER_LOWER
-
-# define URI_SET_ALPHA \
- URI_SET_HEX_LETTER_UPPER: \
- case URI_SET_HEX_LETTER_LOWER: \
- case _UT('g'): \
- case _UT('G'): \
- case _UT('h'): \
- case _UT('H'): \
- case _UT('i'): \
- case _UT('I'): \
- case _UT('j'): \
- case _UT('J'): \
- case _UT('k'): \
- case _UT('K'): \
- case _UT('l'): \
- case _UT('L'): \
- case _UT('m'): \
- case _UT('M'): \
- case _UT('n'): \
- case _UT('N'): \
- case _UT('o'): \
- case _UT('O'): \
- case _UT('p'): \
- case _UT('P'): \
- case _UT('q'): \
- case _UT('Q'): \
- case _UT('r'): \
- case _UT('R'): \
- case _UT('s'): \
- case _UT('S'): \
- case _UT('t'): \
- case _UT('T'): \
- case _UT('u'): \
- case _UT('U'): \
- case _UT('v'): \
- case _UT('V'): \
- case _UT('w'): \
- case _UT('W'): \
- case _UT('x'): \
- case _UT('X'): \
- case _UT('y'): \
- case _UT('Y'): \
- case _UT('z'): \
- case _UT('Z')
-
-# define URI_SET_SUB_DELIMS \
- _UT('!') : case _UT('$'): \
- case _UT('&'): \
- case _UT('\''): \
- case _UT('('): \
- case _UT(')'): \
- case _UT('*'): \
- case _UT('+'): \
- case _UT(','): \
- case _UT(';'): \
- case _UT('=')
-
-# define URI_SET_UNRESERVED \
- URI_SET_ALPHA: \
- case URI_SET_DIGIT: \
- case _UT('-'): \
- case _UT('.'): \
- case _UT('_'): \
- case _UT('~')
-
UriBool URI_FUNC(IsWellFormedHostRegName)(const URI_CHAR * first,
const URI_CHAR * afterLast) {
if ((first == NULL) || (afterLast == NULL)) {
@@ -168,7 +75,7 @@ UriBool URI_FUNC(IsWellFormedHostRegName)(const URI_CHAR * first,
/* reg-name = *( unreserved / pct-encoded / sub-delims ) */
while (first < afterLast) {
switch (first[0]) {
- case URI_SET_UNRESERVED:
+ case URI_SET_UNRESERVED(_UT):
break;
/* pct-encoded */
@@ -177,13 +84,13 @@ UriBool URI_FUNC(IsWellFormedHostRegName)(const URI_CHAR * first,
return URI_FALSE;
}
switch (first[1]) {
- case URI_SET_HEXDIG:
+ case URI_SET_HEXDIG(_UT):
break;
default:
return URI_FALSE;
}
switch (first[2]) {
- case URI_SET_HEXDIG:
+ case URI_SET_HEXDIG(_UT):
break;
default:
return URI_FALSE;
@@ -191,7 +98,7 @@ UriBool URI_FUNC(IsWellFormedHostRegName)(const URI_CHAR * first,
first += 2;
break;
- case URI_SET_SUB_DELIMS:
+ case URI_SET_SUB_DELIMS(_UT):
break;
default:
diff --git a/ext/uri/uriparser/src/UriSetPath.c b/ext/uri/uriparser/src/UriSetPath.c
index d9e8bec0aa8..17aef0fca42 100644
--- a/ext/uri/uriparser/src/UriSetPath.c
+++ b/ext/uri/uriparser/src/UriSetPath.c
@@ -62,104 +62,11 @@
# include <uriparser/Uri.h>
# include "UriCommon.h"
# include "UriMemory.h"
+# include "UriSets.h"
# endif
# include <assert.h>
-# define URI_SET_DIGIT \
- _UT('0') : case _UT('1'): \
- case _UT('2'): \
- case _UT('3'): \
- case _UT('4'): \
- case _UT('5'): \
- case _UT('6'): \
- case _UT('7'): \
- case _UT('8'): \
- case _UT('9')
-
-# define URI_SET_HEX_LETTER_UPPER \
- _UT('A') : case _UT('B'): \
- case _UT('C'): \
- case _UT('D'): \
- case _UT('E'): \
- case _UT('F')
-
-# define URI_SET_HEX_LETTER_LOWER \
- _UT('a') : case _UT('b'): \
- case _UT('c'): \
- case _UT('d'): \
- case _UT('e'): \
- case _UT('f')
-
-# define URI_SET_HEXDIG \
- URI_SET_DIGIT: \
- case URI_SET_HEX_LETTER_UPPER: \
- case URI_SET_HEX_LETTER_LOWER
-
-# define URI_SET_ALPHA \
- URI_SET_HEX_LETTER_UPPER: \
- case URI_SET_HEX_LETTER_LOWER: \
- case _UT('g'): \
- case _UT('G'): \
- case _UT('h'): \
- case _UT('H'): \
- case _UT('i'): \
- case _UT('I'): \
- case _UT('j'): \
- case _UT('J'): \
- case _UT('k'): \
- case _UT('K'): \
- case _UT('l'): \
- case _UT('L'): \
- case _UT('m'): \
- case _UT('M'): \
- case _UT('n'): \
- case _UT('N'): \
- case _UT('o'): \
- case _UT('O'): \
- case _UT('p'): \
- case _UT('P'): \
- case _UT('q'): \
- case _UT('Q'): \
- case _UT('r'): \
- case _UT('R'): \
- case _UT('s'): \
- case _UT('S'): \
- case _UT('t'): \
- case _UT('T'): \
- case _UT('u'): \
- case _UT('U'): \
- case _UT('v'): \
- case _UT('V'): \
- case _UT('w'): \
- case _UT('W'): \
- case _UT('x'): \
- case _UT('X'): \
- case _UT('y'): \
- case _UT('Y'): \
- case _UT('z'): \
- case _UT('Z')
-
-# define URI_SET_SUB_DELIMS \
- _UT('!') : case _UT('$'): \
- case _UT('&'): \
- case _UT('\''): \
- case _UT('('): \
- case _UT(')'): \
- case _UT('*'): \
- case _UT('+'): \
- case _UT(','): \
- case _UT(';'): \
- case _UT('=')
-
-# define URI_SET_UNRESERVED \
- URI_SET_ALPHA: \
- case URI_SET_DIGIT: \
- case _UT('-'): \
- case _UT('.'): \
- case _UT('_'): \
- case _UT('~')
-
UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afterLast,
UriBool hasHost) {
if ((first == NULL) || (afterLast == NULL)) {
@@ -200,7 +107,7 @@ UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afte
*/
while (first < afterLast) {
switch (first[0]) {
- case URI_SET_UNRESERVED:
+ case URI_SET_PCHAR_WITHOUT_PERCENT(_UT):
break;
/* pct-encoded */
@@ -209,13 +116,13 @@ UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afte
return URI_FALSE;
}
switch (first[1]) {
- case URI_SET_HEXDIG:
+ case URI_SET_HEXDIG(_UT):
break;
default:
return URI_FALSE;
}
switch (first[2]) {
- case URI_SET_HEXDIG:
+ case URI_SET_HEXDIG(_UT):
break;
default:
return URI_FALSE;
@@ -223,12 +130,6 @@ UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afte
first += 2;
break;
- case URI_SET_SUB_DELIMS:
- break;
-
- /* ":" / "@" and "/" */
- case _UT(':'):
- case _UT('@'):
case _UT('/'):
break;
diff --git a/ext/uri/uriparser/src/UriSetPort.c b/ext/uri/uriparser/src/UriSetPort.c
index 1c373013f66..5e2160e3097 100644
--- a/ext/uri/uriparser/src/UriSetPort.c
+++ b/ext/uri/uriparser/src/UriSetPort.c
@@ -62,21 +62,11 @@
# include <uriparser/Uri.h>
# include "UriCommon.h"
# include "UriMemory.h"
+# include "UriSets.h"
# endif
# include <assert.h>
-# define URI_SET_DIGIT \
- _UT('0') : case _UT('1'): \
- case _UT('2'): \
- case _UT('3'): \
- case _UT('4'): \
- case _UT('5'): \
- case _UT('6'): \
- case _UT('7'): \
- case _UT('8'): \
- case _UT('9')
-
UriBool URI_FUNC(IsWellFormedPort)(const URI_CHAR * first, const URI_CHAR * afterLast) {
if ((first == NULL) || (afterLast == NULL)) {
return URI_FALSE;
@@ -85,7 +75,7 @@ UriBool URI_FUNC(IsWellFormedPort)(const URI_CHAR * first, const URI_CHAR * afte
/* NOTE: Grammar reads "port = *DIGIT" which includes the empty string. */
while (first < afterLast) {
switch (first[0]) {
- case URI_SET_DIGIT:
+ case URI_SET_DIGIT(_UT):
break;
default:
return URI_FALSE;
diff --git a/ext/uri/uriparser/src/UriSetQuery.c b/ext/uri/uriparser/src/UriSetQuery.c
index a189c14bb1e..4f58c8286ed 100644
--- a/ext/uri/uriparser/src/UriSetQuery.c
+++ b/ext/uri/uriparser/src/UriSetQuery.c
@@ -62,104 +62,11 @@
# include <uriparser/Uri.h>
# include "UriCommon.h"
# include "UriMemory.h"
+# include "UriSets.h"
# endif
# include <assert.h>
-# define URI_SET_DIGIT \
- _UT('0') : case _UT('1'): \
- case _UT('2'): \
- case _UT('3'): \
- case _UT('4'): \
- case _UT('5'): \
- case _UT('6'): \
- case _UT('7'): \
- case _UT('8'): \
- case _UT('9')
-
-# define URI_SET_HEX_LETTER_UPPER \
- _UT('A') : case _UT('B'): \
- case _UT('C'): \
- case _UT('D'): \
- case _UT('E'): \
- case _UT('F')
-
-# define URI_SET_HEX_LETTER_LOWER \
- _UT('a') : case _UT('b'): \
- case _UT('c'): \
- case _UT('d'): \
- case _UT('e'): \
- case _UT('f')
-
-# define URI_SET_HEXDIG \
- URI_SET_DIGIT: \
- case URI_SET_HEX_LETTER_UPPER: \
- case URI_SET_HEX_LETTER_LOWER
-
-# define URI_SET_ALPHA \
- URI_SET_HEX_LETTER_UPPER: \
- case URI_SET_HEX_LETTER_LOWER: \
- case _UT('g'): \
- case _UT('G'): \
- case _UT('h'): \
- case _UT('H'): \
- case _UT('i'): \
- case _UT('I'): \
- case _UT('j'): \
- case _UT('J'): \
- case _UT('k'): \
- case _UT('K'): \
- case _UT('l'): \
- case _UT('L'): \
- case _UT('m'): \
- case _UT('M'): \
- case _UT('n'): \
- case _UT('N'): \
- case _UT('o'): \
- case _UT('O'): \
- case _UT('p'): \
- case _UT('P'): \
- case _UT('q'): \
- case _UT('Q'): \
- case _UT('r'): \
- case _UT('R'): \
- case _UT('s'): \
- case _UT('S'): \
- case _UT('t'): \
- case _UT('T'): \
- case _UT('u'): \
- case _UT('U'): \
- case _UT('v'): \
- case _UT('V'): \
- case _UT('w'): \
- case _UT('W'): \
- case _UT('x'): \
- case _UT('X'): \
- case _UT('y'): \
- case _UT('Y'): \
- case _UT('z'): \
- case _UT('Z')
-
-# define URI_SET_SUB_DELIMS \
- _UT('!') : case _UT('$'): \
- case _UT('&'): \
- case _UT('\''): \
- case _UT('('): \
- case _UT(')'): \
- case _UT('*'): \
- case _UT('+'): \
- case _UT(','): \
- case _UT(';'): \
- case _UT('=')
-
-# define URI_SET_UNRESERVED \
- URI_SET_ALPHA: \
- case URI_SET_DIGIT: \
- case _UT('-'): \
- case _UT('.'): \
- case _UT('_'): \
- case _UT('~')
-
UriBool URI_FUNC(IsWellFormedQuery)(const URI_CHAR * first, const URI_CHAR * afterLast) {
if ((first == NULL) || (afterLast == NULL)) {
return URI_FALSE;
@@ -172,7 +79,7 @@ UriBool URI_FUNC(IsWellFormedQuery)(const URI_CHAR * first, const URI_CHAR * aft
*/
while (first < afterLast) {
switch (first[0]) {
- case URI_SET_UNRESERVED:
+ case URI_SET_PCHAR_WITHOUT_PERCENT(_UT):
break;
/* pct-encoded */
@@ -181,13 +88,13 @@ UriBool URI_FUNC(IsWellFormedQuery)(const URI_CHAR * first, const URI_CHAR * aft
return URI_FALSE;
}
switch (first[1]) {
- case URI_SET_HEXDIG:
+ case URI_SET_HEXDIG(_UT):
break;
default:
return URI_FALSE;
}
switch (first[2]) {
- case URI_SET_HEXDIG:
+ case URI_SET_HEXDIG(_UT):
break;
default:
return URI_FALSE;
@@ -195,12 +102,6 @@ UriBool URI_FUNC(IsWellFormedQuery)(const URI_CHAR * first, const URI_CHAR * aft
first += 2;
break;
- case URI_SET_SUB_DELIMS:
- break;
-
- /* ":" / "@" and "/" / "?" */
- case _UT(':'):
- case _UT('@'):
case _UT('/'):
case _UT('?'):
break;
diff --git a/ext/uri/uriparser/src/UriSetScheme.c b/ext/uri/uriparser/src/UriSetScheme.c
index 9a21d45f263..3dfaf1e9f15 100644
--- a/ext/uri/uriparser/src/UriSetScheme.c
+++ b/ext/uri/uriparser/src/UriSetScheme.c
@@ -62,84 +62,11 @@
# include <uriparser/Uri.h>
# include "UriCommon.h"
# include "UriMemory.h"
+# include "UriSets.h"
# endif
# include <assert.h>
-# define URI_SET_DIGIT \
- _UT('0') : case _UT('1'): \
- case _UT('2'): \
- case _UT('3'): \
- case _UT('4'): \
- case _UT('5'): \
- case _UT('6'): \
- case _UT('7'): \
- case _UT('8'): \
- case _UT('9')
-
-# define URI_SET_HEX_LETTER_UPPER \
- _UT('A') : case _UT('B'): \
- case _UT('C'): \
- case _UT('D'): \
- case _UT('E'): \
- case _UT('F')
-
-# define URI_SET_HEX_LETTER_LOWER \
- _UT('a') : case _UT('b'): \
- case _UT('c'): \
- case _UT('d'): \
- case _UT('e'): \
- case _UT('f')
-
-# define URI_SET_HEXDIG \
- URI_SET_DIGIT: \
- case URI_SET_HEX_LETTER_UPPER: \
- case URI_SET_HEX_LETTER_LOWER
-
-# define URI_SET_ALPHA \
- URI_SET_HEX_LETTER_UPPER: \
- case URI_SET_HEX_LETTER_LOWER: \
- case _UT('g'): \
- case _UT('G'): \
- case _UT('h'): \
- case _UT('H'): \
- case _UT('i'): \
- case _UT('I'): \
- case _UT('j'): \
- case _UT('J'): \
- case _UT('k'): \
- case _UT('K'): \
- case _UT('l'): \
- case _UT('L'): \
- case _UT('m'): \
- case _UT('M'): \
- case _UT('n'): \
- case _UT('N'): \
- case _UT('o'): \
- case _UT('O'): \
- case _UT('p'): \
- case _UT('P'): \
- case _UT('q'): \
- case _UT('Q'): \
- case _UT('r'): \
- case _UT('R'): \
- case _UT('s'): \
- case _UT('S'): \
- case _UT('t'): \
- case _UT('T'): \
- case _UT('u'): \
- case _UT('U'): \
- case _UT('v'): \
- case _UT('V'): \
- case _UT('w'): \
- case _UT('W'): \
- case _UT('x'): \
- case _UT('X'): \
- case _UT('y'): \
- case _UT('Y'): \
- case _UT('z'): \
- case _UT('Z')
-
UriBool URI_FUNC(IsWellFormedScheme)(const URI_CHAR * first, const URI_CHAR * afterLast) {
if ((first == NULL) || (afterLast == NULL)) {
return URI_FALSE;
@@ -154,7 +81,7 @@ UriBool URI_FUNC(IsWellFormedScheme)(const URI_CHAR * first, const URI_CHAR * af
}
switch (first[0]) {
- case URI_SET_ALPHA:
+ case URI_SET_ALPHA(_UT):
break;
default:
@@ -165,8 +92,8 @@ UriBool URI_FUNC(IsWellFormedScheme)(const URI_CHAR * first, const URI_CHAR * af
while (first < afterLast) {
switch (first[0]) {
- case URI_SET_ALPHA:
- case URI_SET_DIGIT:
+ case URI_SET_ALPHA(_UT):
+ case URI_SET_DIGIT(_UT):
case _UT('+'):
case _UT('-'):
case _UT('.'):
diff --git a/ext/uri/uriparser/src/UriSetUserInfo.c b/ext/uri/uriparser/src/UriSetUserInfo.c
index af1ec41a076..7865e837deb 100644
--- a/ext/uri/uriparser/src/UriSetUserInfo.c
+++ b/ext/uri/uriparser/src/UriSetUserInfo.c
@@ -62,104 +62,11 @@
# include <uriparser/Uri.h>
# include "UriCommon.h"
# include "UriMemory.h"
+# include "UriSets.h"
# endif
# include <assert.h>
-# define URI_SET_DIGIT \
- _UT('0') : case _UT('1'): \
- case _UT('2'): \
- case _UT('3'): \
- case _UT('4'): \
- case _UT('5'): \
- case _UT('6'): \
- case _UT('7'): \
- case _UT('8'): \
- case _UT('9')
-
-# define URI_SET_HEX_LETTER_UPPER \
- _UT('A') : case _UT('B'): \
- case _UT('C'): \
- case _UT('D'): \
- case _UT('E'): \
- case _UT('F')
-
-# define URI_SET_HEX_LETTER_LOWER \
- _UT('a') : case _UT('b'): \
- case _UT('c'): \
- case _UT('d'): \
- case _UT('e'): \
- case _UT('f')
-
-# define URI_SET_HEXDIG \
- URI_SET_DIGIT: \
- case URI_SET_HEX_LETTER_UPPER: \
- case URI_SET_HEX_LETTER_LOWER
-
-# define URI_SET_ALPHA \
- URI_SET_HEX_LETTER_UPPER: \
- case URI_SET_HEX_LETTER_LOWER: \
- case _UT('g'): \
- case _UT('G'): \
- case _UT('h'): \
- case _UT('H'): \
- case _UT('i'): \
- case _UT('I'): \
- case _UT('j'): \
- case _UT('J'): \
- case _UT('k'): \
- case _UT('K'): \
- case _UT('l'): \
- case _UT('L'): \
- case _UT('m'): \
- case _UT('M'): \
- case _UT('n'): \
- case _UT('N'): \
- case _UT('o'): \
- case _UT('O'): \
- case _UT('p'): \
- case _UT('P'): \
- case _UT('q'): \
- case _UT('Q'): \
- case _UT('r'): \
- case _UT('R'): \
- case _UT('s'): \
- case _UT('S'): \
- case _UT('t'): \
- case _UT('T'): \
- case _UT('u'): \
- case _UT('U'): \
- case _UT('v'): \
- case _UT('V'): \
- case _UT('w'): \
- case _UT('W'): \
- case _UT('x'): \
- case _UT('X'): \
- case _UT('y'): \
- case _UT('Y'): \
- case _UT('z'): \
- case _UT('Z')
-
-# define URI_SET_SUB_DELIMS \
- _UT('!') : case _UT('$'): \
- case _UT('&'): \
- case _UT('\''): \
- case _UT('('): \
- case _UT(')'): \
- case _UT('*'): \
- case _UT('+'): \
- case _UT(','): \
- case _UT(';'): \
- case _UT('=')
-
-# define URI_SET_UNRESERVED \
- URI_SET_ALPHA: \
- case URI_SET_DIGIT: \
- case _UT('-'): \
- case _UT('.'): \
- case _UT('_'): \
- case _UT('~')
-
UriBool URI_FUNC(IsWellFormedUserInfo)(const URI_CHAR * first,
const URI_CHAR * afterLast) {
if ((first == NULL) || (afterLast == NULL)) {
@@ -169,7 +76,7 @@ UriBool URI_FUNC(IsWellFormedUserInfo)(const URI_CHAR * first,
/* userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) */
while (first < afterLast) {
switch (first[0]) {
- case URI_SET_UNRESERVED:
+ case URI_SET_UNRESERVED(_UT):
break;
/* pct-encoded */
@@ -178,13 +85,13 @@ UriBool URI_FUNC(IsWellFormedUserInfo)(const URI_CHAR * first,
return URI_FALSE;
}
switch (first[1]) {
- case URI_SET_HEXDIG:
+ case URI_SET_HEXDIG(_UT):
break;
default:
return URI_FALSE;
}
switch (first[2]) {
- case URI_SET_HEXDIG:
+ case URI_SET_HEXDIG(_UT):
break;
default:
return URI_FALSE;
@@ -192,7 +99,7 @@ UriBool URI_FUNC(IsWellFormedUserInfo)(const URI_CHAR * first,
first += 2;
break;
- case URI_SET_SUB_DELIMS:
+ case URI_SET_SUB_DELIMS(_UT):
break;
/* ":" */
diff --git a/ext/uri/uriparser/src/UriSets.h b/ext/uri/uriparser/src/UriSets.h
new file mode 100644
index 00000000000..a6a2c46a14d
--- /dev/null
+++ b/ext/uri/uriparser/src/UriSets.h
@@ -0,0 +1,174 @@
+/*
+ * uriparser - RFC 3986 URI parsing library
+ *
+ * Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file UriSets.h
+ * Holds character set definitions.
+ */
+
+// NOTE: We cannot use a regular include-once guard here because the
+// file must support being included twice, e.g. from file UriParse.c.
+#if !defined(URI_SET_DIGIT)
+
+// clang-format off
+# define URI_SET_DIGIT(ut) \
+ ut('0'): \
+ case ut('1'): \
+ /* clang-format on */ \
+ case ut('2'): \
+ case ut('3'): \
+ case ut('4'): \
+ case ut('5'): \
+ case ut('6'): \
+ case ut('7'): \
+ case ut('8'): \
+ case ut('9')
+
+// clang-format off
+# define URI_SET_HEX_LETTER_LOWER(ut) \
+ ut('a'): \
+ case ut('b'): \
+ /* clang-format on */ \
+ case ut('c'): \
+ case ut('d'): \
+ case ut('e'): \
+ case ut('f')
+
+// clang-format off
+# define URI_SET_HEX_LETTER_UPPER(ut) \
+ ut('A'): \
+ case ut('B'): \
+ /* clang-format on */ \
+ case ut('C'): \
+ case ut('D'): \
+ case ut('E'): \
+ case ut('F')
+
+// clang-format off
+# define URI_SET_HEXDIG(ut) \
+ URI_SET_DIGIT(ut): \
+ case URI_SET_HEX_LETTER_LOWER(ut): \
+ /* clang-format on */ \
+ case URI_SET_HEX_LETTER_UPPER(ut)
+
+// clang-format off
+# define URI_SET_ALPHA(ut) \
+ URI_SET_HEX_LETTER_UPPER(ut): \
+ case URI_SET_HEX_LETTER_LOWER(ut): \
+ /* clang-format on */ \
+ case ut('g'): \
+ case ut('G'): \
+ case ut('h'): \
+ case ut('H'): \
+ case ut('i'): \
+ case ut('I'): \
+ case ut('j'): \
+ case ut('J'): \
+ case ut('k'): \
+ case ut('K'): \
+ case ut('l'): \
+ case ut('L'): \
+ case ut('m'): \
+ case ut('M'): \
+ case ut('n'): \
+ case ut('N'): \
+ case ut('o'): \
+ case ut('O'): \
+ case ut('p'): \
+ case ut('P'): \
+ case ut('q'): \
+ case ut('Q'): \
+ case ut('r'): \
+ case ut('R'): \
+ case ut('s'): \
+ case ut('S'): \
+ case ut('t'): \
+ case ut('T'): \
+ case ut('u'): \
+ case ut('U'): \
+ case ut('v'): \
+ case ut('V'): \
+ case ut('w'): \
+ case ut('W'): \
+ case ut('x'): \
+ case ut('X'): \
+ case ut('y'): \
+ case ut('Y'): \
+ case ut('z'): \
+ case ut('Z')
+
+// clang-format off
+# define URI_SET_SUB_DELIMS(ut) \
+ ut('!'): \
+ case ut('$'): \
+ /* clang-format on */ \
+ case ut('&'): \
+ case ut('\''): \
+ case ut('('): \
+ case ut(')'): \
+ case ut('*'): \
+ case ut('+'): \
+ case ut(','): \
+ case ut(';'): \
+ case ut('=')
+
+// clang-format off
+# define URI_SET_UNRESERVED(ut) \
+ URI_SET_ALPHA(ut): \
+ case URI_SET_DIGIT(ut): \
+ /* clang-format on */ \
+ case ut('-'): \
+ case ut('.'): \
+ case ut('_'): \
+ case ut('~')
+
+// clang-format off
+# define URI_SET_PCHAR_WITHOUT_PERCENT(ut) \
+ URI_SET_UNRESERVED(ut): \
+ case URI_SET_SUB_DELIMS(ut): \
+ /* clang-format on */ \
+ case ut(':'): \
+ case ut('@')
+
+// clang-format off
+# define URI_SET_PCHAR(ut) \
+ URI_SET_PCHAR_WITHOUT_PERCENT(ut): \
+ case ut('%')
+/* clang-format on */
+
+#endif // ! defined(URI_SET_DIGIT)