Commit 2b5d69caca for openssl.org
commit 2b5d69caca23d8fc59c20b5a5bee10ef8c2524b7
Author: Bob Beck <beck@openssl.org>
Date: Fri Apr 24 17:22:32 2026 -0600
Deprecate UTF8_putc/getc() and use uint32_t for unicode
Unicode codepoints fit in 32 bits so we can use uint32_t
instead of unsigned long.
Also use OPENSSL_load_XX_be instead of manual shifting.
Reviewed-by: Viktor Dukhovni <viktor@openssl.org>
Reviewed-by: Norbert Pocs <norbertp@openssl.org>
MergeDate: Mon May 18 11:02:37 2026
(Merged from https://github.com/openssl/openssl/pull/30967)
diff --git a/CHANGES.md b/CHANGES.md
index cb6771d7bc..75bc601a9b 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -142,6 +142,12 @@ OpenSSL Releases
*William McCormack*
+ * The undocumented public functions `UTF8_putc()` and `UTF8_getc()`
+ were deprecated, and their functionality moved internal to the
+ library. No public replacement is planned.
+
+ *Bob Beck*
+
* Added IKEV2 KDF (EVP_KDF-IKEV2KDF) implementation.
*Helen Zhang*
diff --git a/crypto/asn1/a_mbstr.c b/crypto/asn1/a_mbstr.c
index 05b8ac8706..89ba832111 100644
--- a/crypto/asn1/a_mbstr.c
+++ b/crypto/asn1/a_mbstr.c
@@ -12,19 +12,20 @@
#include "internal/cryptlib.h"
#include "internal/unicode.h"
#include <openssl/asn1.h>
+#include <openssl/byteorder.h>
#include <crypto/asn1.h>
static int traverse_string(const unsigned char *p, int len, int inform,
- int (*rfunc)(unsigned long value, void *in),
+ int (*rfunc)(uint32_t value, void *in),
void *arg);
-static int in_utf8(unsigned long value, void *arg);
-static int out_utf8(unsigned long value, void *arg);
-static int type_str(unsigned long value, void *arg);
-static int cpy_asc(unsigned long value, void *arg);
-static int cpy_bmp(unsigned long value, void *arg);
-static int cpy_univ(unsigned long value, void *arg);
-static int cpy_utf8(unsigned long value, void *arg);
+static int in_utf8(uint32_t value, void *arg);
+static int out_utf8(uint32_t value, void *arg);
+static int type_str(uint32_t value, void *arg);
+static int cpy_asc(uint32_t value, void *arg);
+static int cpy_bmp(uint32_t value, void *arg);
+static int cpy_univ(uint32_t value, void *arg);
+static int cpy_utf8(uint32_t value, void *arg);
/*
* These functions take a string in UTF8, ASCII or multibyte form and a mask
@@ -42,7 +43,7 @@ int ASN1_mbstring_copy(ASN1_STRING **out, const unsigned char *in, int len,
}
int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len,
- int inform, unsigned long mask,
+ int inform, unsigned long mask_in,
long minsize, long maxsize)
{
int str_type;
@@ -52,7 +53,8 @@ int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len,
ASN1_STRING *dest;
unsigned char *p;
int nchar;
- int (*cpyfunc)(unsigned long, void *) = NULL;
+ uint32_t mask = (uint32_t)mask_in;
+ int (*cpyfunc)(uint32_t, void *) = NULL;
if (len == -1) {
size_t len_s = strlen((const char *)in);
@@ -224,27 +226,25 @@ int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len,
*/
static int traverse_string(const unsigned char *p, int len, int inform,
- int (*rfunc)(unsigned long value, void *in),
+ int (*rfunc)(uint32_t value, void *in),
void *arg)
{
- unsigned long value;
+ uint32_t value;
int ret;
while (len) {
if (inform == MBSTRING_ASC) {
value = *p++;
len--;
} else if (inform == MBSTRING_BMP) {
- value = *p++ << 8;
- value |= *p++;
+ uint16_t tmp;
+ p = OPENSSL_load_u16_be(&tmp, p);
+ value = tmp;
len -= 2;
} else if (inform == MBSTRING_UNIV) {
- value = ((unsigned long)*p++) << 24;
- value |= ((unsigned long)*p++) << 16;
- value |= *p++ << 8;
- value |= *p++;
+ p = OPENSSL_load_u32_be(&value, p);
len -= 4;
} else {
- ret = UTF8_getc(p, len, &value);
+ ret = ossl_utf8_getc_internal(p, len, &value);
if (ret < 0)
return -1;
len -= ret;
@@ -263,7 +263,7 @@ static int traverse_string(const unsigned char *p, int len, int inform,
/* Just count number of characters */
-static int in_utf8(unsigned long value, void *arg)
+static int in_utf8(uint32_t value, void *arg)
{
int *nchar;
@@ -276,11 +276,11 @@ static int in_utf8(unsigned long value, void *arg)
/* Determine size of output as a UTF8 String */
-static int out_utf8(unsigned long value, void *arg)
+static int out_utf8(uint32_t value, void *arg)
{
int *outlen, len;
- len = UTF8_putc(NULL, -1, value);
+ len = ossl_utf8_putc_internal(NULL, -1, value);
if (len <= 0)
return len;
outlen = arg;
@@ -293,10 +293,10 @@ static int out_utf8(unsigned long value, void *arg)
* "mask".
*/
-static int type_str(unsigned long value, void *arg)
+static int type_str(uint32_t value, void *arg)
{
- unsigned long usable_types = *((unsigned long *)arg);
- unsigned long types = usable_types;
+ uint32_t usable_types = *((uint32_t *)arg);
+ uint32_t types = usable_types;
const int native = value > INT_MAX ? INT_MAX : ossl_fromascii(value);
/*
@@ -332,13 +332,13 @@ static int type_str(unsigned long value, void *arg)
types &= ~B_ASN1_UTF8STRING;
if (!types)
return -1;
- *((unsigned long *)arg) = types;
+ *((uint32_t *)arg) = types;
return 1;
}
/* Copy one byte per character ASCII like strings */
-static int cpy_asc(unsigned long value, void *arg)
+static int cpy_asc(uint32_t value, void *arg)
{
unsigned char **p, *q;
p = arg;
@@ -350,7 +350,7 @@ static int cpy_asc(unsigned long value, void *arg)
/* Copy two byte per character BMPStrings */
-static int cpy_bmp(unsigned long value, void *arg)
+static int cpy_bmp(uint32_t value, void *arg)
{
unsigned char **p, *q;
p = arg;
@@ -363,7 +363,7 @@ static int cpy_bmp(unsigned long value, void *arg)
/* Copy four byte per character UniversalStrings */
-static int cpy_univ(unsigned long value, void *arg)
+static int cpy_univ(uint32_t value, void *arg)
{
unsigned char **p, *q;
p = arg;
@@ -378,13 +378,13 @@ static int cpy_univ(unsigned long value, void *arg)
/* Copy to a UTF8String */
-static int cpy_utf8(unsigned long value, void *arg)
+static int cpy_utf8(uint32_t value, void *arg)
{
unsigned char **p;
int ret;
p = arg;
/* We already know there is enough room so pass 0xff as the length */
- ret = UTF8_putc(*p, 0xff, value);
+ ret = ossl_utf8_putc_internal(*p, 0xff, value);
if (ret < 0)
return ret;
*p += ret;
diff --git a/crypto/asn1/a_strex.c b/crypto/asn1/a_strex.c
index f36f526221..e488c87f5b 100644
--- a/crypto/asn1/a_strex.c
+++ b/crypto/asn1/a_strex.c
@@ -13,9 +13,11 @@
#include "internal/sizes.h"
#include "internal/unicode.h"
#include "crypto/asn1.h"
+#include <openssl/byteorder.h>
#include <openssl/crypto.h>
#include <openssl/x509.h>
#include <openssl/asn1.h>
+#include <inttypes.h>
#include "charmap.h"
@@ -57,11 +59,11 @@ typedef int char_io(void *arg, const void *buf, int len);
/*
* This function handles display of strings, one character at a time. It is
- * passed an unsigned long for each character because it could come from 2 or
+ * passed a uint32_t for each character because it could come from 2 or
* even 4 byte forms.
*/
-static int do_esc_char(unsigned long c, unsigned short flags, char *do_quotes,
+static int do_esc_char(uint32_t c, unsigned short flags, char *do_quotes,
char_io *io_ch, void *arg)
{
unsigned short chflgs;
@@ -71,13 +73,13 @@ static int do_esc_char(unsigned long c, unsigned short flags, char *do_quotes,
if (c > UNICODE_MAX)
return -1;
if (c > 0xffff) {
- BIO_snprintf(tmphex, sizeof(tmphex), "\\W%08lX", c);
+ BIO_snprintf(tmphex, sizeof(tmphex), "\\W%08" PRIX32, c);
if (!io_ch(arg, tmphex, 10))
return -1;
return 10;
}
if (c > 0xff) {
- BIO_snprintf(tmphex, sizeof(tmphex), "\\U%04lX", c);
+ BIO_snprintf(tmphex, sizeof(tmphex), "\\U%04" PRIX32, c);
if (!io_ch(arg, tmphex, 6))
return -1;
return 6;
@@ -131,14 +133,14 @@ static int do_esc_char(unsigned long c, unsigned short flags, char *do_quotes,
* appropriate.
*/
-static int do_buf(unsigned char *buf, int buflen,
+static int do_buf(const unsigned char *buf, int buflen,
int type, unsigned short flags, char *quotes, char_io *io_ch,
void *arg)
{
int i, outlen, len, charwidth;
unsigned short orflags;
- unsigned char *p, *q;
- unsigned long c;
+ const unsigned char *p, *q;
+ uint32_t c;
p = buf;
q = buf + buflen;
@@ -163,6 +165,8 @@ static int do_buf(unsigned char *buf, int buflen,
}
while (p != q) {
+ uint16_t tmp;
+
if (p == buf && flags & ASN1_STRFLGS_ESC_2253)
orflags = CHARTYPE_FIRST_ESC_2253;
else
@@ -170,15 +174,12 @@ static int do_buf(unsigned char *buf, int buflen,
switch (charwidth) {
case 4:
- c = ((unsigned long)*p++) << 24;
- c |= ((unsigned long)*p++) << 16;
- c |= ((unsigned long)*p++) << 8;
- c |= *p++;
+ p = OPENSSL_load_u32_be(&c, p);
break;
case 2:
- c = ((unsigned long)*p++) << 8;
- c |= *p++;
+ p = OPENSSL_load_u16_be(&tmp, p);
+ c = tmp;
break;
case 1:
@@ -186,7 +187,7 @@ static int do_buf(unsigned char *buf, int buflen,
break;
case 0:
- i = UTF8_getc(p, buflen, &c);
+ i = ossl_utf8_getc_internal(p, buflen, &c);
if (i < 0)
return -1; /* Invalid UTF8String */
buflen -= i;
@@ -199,7 +200,7 @@ static int do_buf(unsigned char *buf, int buflen,
orflags = CHARTYPE_LAST_ESC_2253;
if (type & BUF_TYPE_CONVUTF8) {
unsigned char utfbuf[6];
- int utflen = UTF8_putc(utfbuf, sizeof(utfbuf), c);
+ int utflen = ossl_utf8_putc_internal(utfbuf, sizeof(utfbuf), c);
if (utflen < 0)
return -1; /* error happened with UTF8 */
diff --git a/crypto/asn1/a_utf8.c b/crypto/asn1/a_utf8.c
index fb5999fa7f..76f5ccbda4 100644
--- a/crypto/asn1/a_utf8.c
+++ b/crypto/asn1/a_utf8.c
@@ -11,6 +11,7 @@
#include "internal/cryptlib.h"
#include "internal/unicode.h"
#include <openssl/asn1.h>
+#include <crypto/asn1.h>
/* UTF8 utilities */
@@ -25,10 +26,10 @@
* -4 = character encoded incorrectly (not minimal length).
*/
-int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
+int ossl_utf8_getc_internal(const unsigned char *str, int len, uint32_t *val)
{
const unsigned char *p;
- unsigned long value;
+ uint32_t value;
int ret;
if (len <= 0)
return 0;
@@ -82,6 +83,21 @@ int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
return ret;
}
+#if !defined(OPENSSL_NO_DEPRECATED_4_1)
+int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
+{
+ uint32_t value = 0;
+ int ret;
+
+ ret = ossl_utf8_getc_internal(str, len, &value);
+
+ if (ret)
+ *val = (unsigned long)value;
+
+ return ret;
+}
+#endif /* !defined(OPENSSL_NO_DEPRECATED_4_1) */
+
/*
* This takes a character 'value' and writes the UTF8 encoded value in 'str'
* where 'str' is a buffer containing 'len' characters. Returns the number of
@@ -90,7 +106,7 @@ int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
* characters. It will need at most 4 characters.
*/
-int UTF8_putc(unsigned char *str, int len, unsigned long value)
+int ossl_utf8_putc_internal(unsigned char *str, int len, uint32_t value)
{
if (!str)
len = 4; /* Maximum we will need */
@@ -135,3 +151,10 @@ int UTF8_putc(unsigned char *str, int len, unsigned long value)
}
return -2;
}
+
+#if !defined(OPENSSL_NO_DEPRECATED_4_1)
+int UTF8_putc(unsigned char *str, int len, unsigned long value)
+{
+ return ossl_utf8_putc_internal(str, len, (uint32_t)value);
+}
+#endif
diff --git a/crypto/pkcs12/p12_utl.c b/crypto/pkcs12/p12_utl.c
index 0b130fc8bd..c864c9c9aa 100644
--- a/crypto/pkcs12/p12_utl.c
+++ b/crypto/pkcs12/p12_utl.c
@@ -12,6 +12,7 @@
#include <openssl/pkcs12.h>
#include "p12_local.h"
#include "crypto/pkcs7/pk7_local.h"
+#include <crypto/asn1.h>
/* Cheap and nasty Unicode stuff */
@@ -79,13 +80,14 @@ unsigned char *OPENSSL_utf82uni(const char *asc, int asclen,
{
int ulen, i, j;
unsigned char *unitmp, *ret;
- unsigned long utf32chr = 0;
+ uint32_t utf32chr = 0;
if (asclen == -1)
asclen = (int)strlen(asc);
for (ulen = 0, i = 0; i < asclen; i += j) {
- j = UTF8_getc((const unsigned char *)asc + i, asclen - i, &utf32chr);
+ j = ossl_utf8_getc_internal((const unsigned char *)asc + i, asclen - i,
+ &utf32chr);
/*
* Following condition is somewhat opportunistic is sense that
@@ -121,7 +123,8 @@ unsigned char *OPENSSL_utf82uni(const char *asc, int asclen,
return NULL;
/* re-run the loop writing down UTF-16 characters in big-endian order */
for (unitmp = ret, i = 0; i < asclen; i += j) {
- j = UTF8_getc((const unsigned char *)asc + i, asclen - i, &utf32chr);
+ j = ossl_utf8_getc_internal((const unsigned char *)asc + i, asclen - i,
+ &utf32chr);
if (utf32chr >= 0x10000) { /* pair if UTF-16 characters */
unsigned int hi, lo;
@@ -149,7 +152,7 @@ unsigned char *OPENSSL_utf82uni(const char *asc, int asclen,
static int bmp_to_utf8(char *str, const unsigned char *utf16, int len)
{
- unsigned long utf32chr;
+ uint32_t utf32chr;
if (len == 0)
return 0;
@@ -175,9 +178,8 @@ static int bmp_to_utf8(char *str, const unsigned char *utf16, int len)
utf32chr += 0x10000;
}
- return UTF8_putc((unsigned char *)str, 4, utf32chr);
+ return ossl_utf8_putc_internal((unsigned char *)str, 4, utf32chr);
}
-
char *OPENSSL_uni2utf8(const unsigned char *uni, int unilen)
{
int asclen, i, j;
diff --git a/include/crypto/asn1.h b/include/crypto/asn1.h
index 6f969eab60..f7dc7852f6 100644
--- a/include/crypto/asn1.h
+++ b/include/crypto/asn1.h
@@ -192,4 +192,7 @@ int asn1_item_embed_d2i(ASN1_VALUE **pval, const unsigned char **in,
ASN1_TIME *ossl_asn1_time_from_tm(ASN1_TIME *s, struct tm *ts, int type);
+int ossl_utf8_getc_internal(const unsigned char *str, int len, uint32_t *val);
+int ossl_utf8_putc_internal(unsigned char *str, int len, uint32_t value);
+
#endif /* ndef OSSL_CRYPTO_ASN1_H */
diff --git a/include/internal/unicode.h b/include/internal/unicode.h
index d1dae6d245..f09bfc7320 100644
--- a/include/internal/unicode.h
+++ b/include/internal/unicode.h
@@ -12,11 +12,12 @@
#pragma once
#include <openssl/e_os2.h>
+#include <stdint.h>
typedef enum {
- SURROGATE_MIN = 0xd800UL,
- SURROGATE_MAX = 0xdfffUL,
- UNICODE_MAX = 0x10ffffUL,
+ SURROGATE_MIN = UINT32_C(0xd800),
+ SURROGATE_MAX = UINT32_C(0xdfff),
+ UNICODE_MAX = UINT32_C(0x10ffff),
UNICODE_LIMIT
} UNICODE_CONSTANTS;
diff --git a/include/openssl/asn1.h.in b/include/openssl/asn1.h.in
index 7beef17569..cce56701c9 100644
--- a/include/openssl/asn1.h.in
+++ b/include/openssl/asn1.h.in
@@ -618,8 +618,10 @@ DECLARE_ASN1_FUNCTIONS(ASN1_UTF8STRING)
DECLARE_ASN1_FUNCTIONS(ASN1_NULL)
DECLARE_ASN1_FUNCTIONS(ASN1_BMPSTRING)
-int UTF8_getc(const unsigned char *str, int len, unsigned long *val);
-int UTF8_putc(unsigned char *str, int len, unsigned long value);
+#if !defined(OPENSSL_NO_DEPRECATED_4_1)
+OSSL_DEPRECATEDIN_4_1 int UTF8_getc(const unsigned char *str, int len, unsigned long *val);
+OSSL_DEPRECATEDIN_4_1 int UTF8_putc(unsigned char *str, int len, unsigned long value);
+#endif /* !defined(OPENSSL_NO_DEPRECATED_4_1) */
/* clang-format off */
{-
diff --git a/test/asn1_internal_test.c b/test/asn1_internal_test.c
index 83b5a2c630..8edea7ca91 100644
--- a/test/asn1_internal_test.c
+++ b/test/asn1_internal_test.c
@@ -195,9 +195,9 @@ static int test_unicode_range(void)
static int test_invalid_utf8(void)
{
const unsigned char inv_utf8[] = "\xF4\x90\x80\x80";
- unsigned long val;
+ uint32_t val;
- if (!TEST_int_lt(UTF8_getc(inv_utf8, sizeof(inv_utf8), &val), 0))
+ if (!TEST_int_lt(ossl_utf8_getc_internal(inv_utf8, sizeof(inv_utf8), &val), 0))
return 0;
return 1;
}
diff --git a/util/libcrypto.num b/util/libcrypto.num
index 90c81f1fce..dbaac8b37f 100644
--- a/util/libcrypto.num
+++ b/util/libcrypto.num
@@ -2646,8 +2646,8 @@ i2d_ASN1_BMPSTRING 2644 4_0_0 EXIST::FUNCTION:
ASN1_BMPSTRING_free 2645 4_0_0 EXIST::FUNCTION:
ASN1_BMPSTRING_new 2646 4_0_0 EXIST::FUNCTION:
ASN1_BMPSTRING_it 2647 4_0_0 EXIST::FUNCTION:
-UTF8_getc 2648 4_0_0 EXIST::FUNCTION:
-UTF8_putc 2649 4_0_0 EXIST::FUNCTION:
+UTF8_getc 2648 4_0_0 EXIST::FUNCTION:DEPRECATEDIN_4_1
+UTF8_putc 2649 4_0_0 EXIST::FUNCTION:DEPRECATEDIN_4_1
d2i_ASN1_PRINTABLE 2650 4_0_0 EXIST::FUNCTION:
i2d_ASN1_PRINTABLE 2651 4_0_0 EXIST::FUNCTION:
ASN1_PRINTABLE_free 2652 4_0_0 EXIST::FUNCTION: