Commit 2b5d69caca for openssl.org

commit 2b5d69caca23d8fc59c20b5a5bee10ef8c2524b7
Author: Bob Beck <beck@openssl.org>
Date:   Fri Apr 24 17:22:32 2026 -0600

    Deprecate UTF8_putc/getc() and use uint32_t for unicode

    Unicode codepoints fit in 32 bits so we can use uint32_t
    instead of unsigned long.

    Also use OPENSSL_load_XX_be instead of manual shifting.

    Reviewed-by: Viktor Dukhovni <viktor@openssl.org>
    Reviewed-by: Norbert Pocs <norbertp@openssl.org>
    MergeDate: Mon May 18 11:02:37 2026
    (Merged from https://github.com/openssl/openssl/pull/30967)

diff --git a/CHANGES.md b/CHANGES.md
index cb6771d7bc..75bc601a9b 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -142,6 +142,12 @@ OpenSSL Releases

    *William McCormack*

+ * The undocumented public functions `UTF8_putc()` and `UTF8_getc()`
+   were deprecated, and their functionality moved internal to the
+   library. No public replacement is planned.
+
+   *Bob Beck*
+
  * Added IKEV2 KDF (EVP_KDF-IKEV2KDF) implementation.

    *Helen Zhang*
diff --git a/crypto/asn1/a_mbstr.c b/crypto/asn1/a_mbstr.c
index 05b8ac8706..89ba832111 100644
--- a/crypto/asn1/a_mbstr.c
+++ b/crypto/asn1/a_mbstr.c
@@ -12,19 +12,20 @@
 #include "internal/cryptlib.h"
 #include "internal/unicode.h"
 #include <openssl/asn1.h>
+#include <openssl/byteorder.h>

 #include <crypto/asn1.h>

 static int traverse_string(const unsigned char *p, int len, int inform,
-    int (*rfunc)(unsigned long value, void *in),
+    int (*rfunc)(uint32_t value, void *in),
     void *arg);
-static int in_utf8(unsigned long value, void *arg);
-static int out_utf8(unsigned long value, void *arg);
-static int type_str(unsigned long value, void *arg);
-static int cpy_asc(unsigned long value, void *arg);
-static int cpy_bmp(unsigned long value, void *arg);
-static int cpy_univ(unsigned long value, void *arg);
-static int cpy_utf8(unsigned long value, void *arg);
+static int in_utf8(uint32_t value, void *arg);
+static int out_utf8(uint32_t value, void *arg);
+static int type_str(uint32_t value, void *arg);
+static int cpy_asc(uint32_t value, void *arg);
+static int cpy_bmp(uint32_t value, void *arg);
+static int cpy_univ(uint32_t value, void *arg);
+static int cpy_utf8(uint32_t value, void *arg);

 /*
  * These functions take a string in UTF8, ASCII or multibyte form and a mask
@@ -42,7 +43,7 @@ int ASN1_mbstring_copy(ASN1_STRING **out, const unsigned char *in, int len,
 }

 int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len,
-    int inform, unsigned long mask,
+    int inform, unsigned long mask_in,
     long minsize, long maxsize)
 {
     int str_type;
@@ -52,7 +53,8 @@ int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len,
     ASN1_STRING *dest;
     unsigned char *p;
     int nchar;
-    int (*cpyfunc)(unsigned long, void *) = NULL;
+    uint32_t mask = (uint32_t)mask_in;
+    int (*cpyfunc)(uint32_t, void *) = NULL;
     if (len == -1) {
         size_t len_s = strlen((const char *)in);

@@ -224,27 +226,25 @@ int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len,
  */

 static int traverse_string(const unsigned char *p, int len, int inform,
-    int (*rfunc)(unsigned long value, void *in),
+    int (*rfunc)(uint32_t value, void *in),
     void *arg)
 {
-    unsigned long value;
+    uint32_t value;
     int ret;
     while (len) {
         if (inform == MBSTRING_ASC) {
             value = *p++;
             len--;
         } else if (inform == MBSTRING_BMP) {
-            value = *p++ << 8;
-            value |= *p++;
+            uint16_t tmp;
+            p = OPENSSL_load_u16_be(&tmp, p);
+            value = tmp;
             len -= 2;
         } else if (inform == MBSTRING_UNIV) {
-            value = ((unsigned long)*p++) << 24;
-            value |= ((unsigned long)*p++) << 16;
-            value |= *p++ << 8;
-            value |= *p++;
+            p = OPENSSL_load_u32_be(&value, p);
             len -= 4;
         } else {
-            ret = UTF8_getc(p, len, &value);
+            ret = ossl_utf8_getc_internal(p, len, &value);
             if (ret < 0)
                 return -1;
             len -= ret;
@@ -263,7 +263,7 @@ static int traverse_string(const unsigned char *p, int len, int inform,

 /* Just count number of characters */

-static int in_utf8(unsigned long value, void *arg)
+static int in_utf8(uint32_t value, void *arg)
 {
     int *nchar;

@@ -276,11 +276,11 @@ static int in_utf8(unsigned long value, void *arg)

 /* Determine size of output as a UTF8 String */

-static int out_utf8(unsigned long value, void *arg)
+static int out_utf8(uint32_t value, void *arg)
 {
     int *outlen, len;

-    len = UTF8_putc(NULL, -1, value);
+    len = ossl_utf8_putc_internal(NULL, -1, value);
     if (len <= 0)
         return len;
     outlen = arg;
@@ -293,10 +293,10 @@ static int out_utf8(unsigned long value, void *arg)
  * "mask".
  */

-static int type_str(unsigned long value, void *arg)
+static int type_str(uint32_t value, void *arg)
 {
-    unsigned long usable_types = *((unsigned long *)arg);
-    unsigned long types = usable_types;
+    uint32_t usable_types = *((uint32_t *)arg);
+    uint32_t types = usable_types;
     const int native = value > INT_MAX ? INT_MAX : ossl_fromascii(value);

     /*
@@ -332,13 +332,13 @@ static int type_str(unsigned long value, void *arg)
         types &= ~B_ASN1_UTF8STRING;
     if (!types)
         return -1;
-    *((unsigned long *)arg) = types;
+    *((uint32_t *)arg) = types;
     return 1;
 }

 /* Copy one byte per character ASCII like strings */

-static int cpy_asc(unsigned long value, void *arg)
+static int cpy_asc(uint32_t value, void *arg)
 {
     unsigned char **p, *q;
     p = arg;
@@ -350,7 +350,7 @@ static int cpy_asc(unsigned long value, void *arg)

 /* Copy two byte per character BMPStrings */

-static int cpy_bmp(unsigned long value, void *arg)
+static int cpy_bmp(uint32_t value, void *arg)
 {
     unsigned char **p, *q;
     p = arg;
@@ -363,7 +363,7 @@ static int cpy_bmp(unsigned long value, void *arg)

 /* Copy four byte per character UniversalStrings */

-static int cpy_univ(unsigned long value, void *arg)
+static int cpy_univ(uint32_t value, void *arg)
 {
     unsigned char **p, *q;
     p = arg;
@@ -378,13 +378,13 @@ static int cpy_univ(unsigned long value, void *arg)

 /* Copy to a UTF8String */

-static int cpy_utf8(unsigned long value, void *arg)
+static int cpy_utf8(uint32_t value, void *arg)
 {
     unsigned char **p;
     int ret;
     p = arg;
     /* We already know there is enough room so pass 0xff as the length */
-    ret = UTF8_putc(*p, 0xff, value);
+    ret = ossl_utf8_putc_internal(*p, 0xff, value);
     if (ret < 0)
         return ret;
     *p += ret;
diff --git a/crypto/asn1/a_strex.c b/crypto/asn1/a_strex.c
index f36f526221..e488c87f5b 100644
--- a/crypto/asn1/a_strex.c
+++ b/crypto/asn1/a_strex.c
@@ -13,9 +13,11 @@
 #include "internal/sizes.h"
 #include "internal/unicode.h"
 #include "crypto/asn1.h"
+#include <openssl/byteorder.h>
 #include <openssl/crypto.h>
 #include <openssl/x509.h>
 #include <openssl/asn1.h>
+#include <inttypes.h>

 #include "charmap.h"

@@ -57,11 +59,11 @@ typedef int char_io(void *arg, const void *buf, int len);

 /*
  * This function handles display of strings, one character at a time. It is
- * passed an unsigned long for each character because it could come from 2 or
+ * passed a uint32_t for each character because it could come from 2 or
  * even 4 byte forms.
  */

-static int do_esc_char(unsigned long c, unsigned short flags, char *do_quotes,
+static int do_esc_char(uint32_t c, unsigned short flags, char *do_quotes,
     char_io *io_ch, void *arg)
 {
     unsigned short chflgs;
@@ -71,13 +73,13 @@ static int do_esc_char(unsigned long c, unsigned short flags, char *do_quotes,
     if (c > UNICODE_MAX)
         return -1;
     if (c > 0xffff) {
-        BIO_snprintf(tmphex, sizeof(tmphex), "\\W%08lX", c);
+        BIO_snprintf(tmphex, sizeof(tmphex), "\\W%08" PRIX32, c);
         if (!io_ch(arg, tmphex, 10))
             return -1;
         return 10;
     }
     if (c > 0xff) {
-        BIO_snprintf(tmphex, sizeof(tmphex), "\\U%04lX", c);
+        BIO_snprintf(tmphex, sizeof(tmphex), "\\U%04" PRIX32, c);
         if (!io_ch(arg, tmphex, 6))
             return -1;
         return 6;
@@ -131,14 +133,14 @@ static int do_esc_char(unsigned long c, unsigned short flags, char *do_quotes,
  * appropriate.
  */

-static int do_buf(unsigned char *buf, int buflen,
+static int do_buf(const unsigned char *buf, int buflen,
     int type, unsigned short flags, char *quotes, char_io *io_ch,
     void *arg)
 {
     int i, outlen, len, charwidth;
     unsigned short orflags;
-    unsigned char *p, *q;
-    unsigned long c;
+    const unsigned char *p, *q;
+    uint32_t c;

     p = buf;
     q = buf + buflen;
@@ -163,6 +165,8 @@ static int do_buf(unsigned char *buf, int buflen,
     }

     while (p != q) {
+        uint16_t tmp;
+
         if (p == buf && flags & ASN1_STRFLGS_ESC_2253)
             orflags = CHARTYPE_FIRST_ESC_2253;
         else
@@ -170,15 +174,12 @@ static int do_buf(unsigned char *buf, int buflen,

         switch (charwidth) {
         case 4:
-            c = ((unsigned long)*p++) << 24;
-            c |= ((unsigned long)*p++) << 16;
-            c |= ((unsigned long)*p++) << 8;
-            c |= *p++;
+            p = OPENSSL_load_u32_be(&c, p);
             break;

         case 2:
-            c = ((unsigned long)*p++) << 8;
-            c |= *p++;
+            p = OPENSSL_load_u16_be(&tmp, p);
+            c = tmp;
             break;

         case 1:
@@ -186,7 +187,7 @@ static int do_buf(unsigned char *buf, int buflen,
             break;

         case 0:
-            i = UTF8_getc(p, buflen, &c);
+            i = ossl_utf8_getc_internal(p, buflen, &c);
             if (i < 0)
                 return -1; /* Invalid UTF8String */
             buflen -= i;
@@ -199,7 +200,7 @@ static int do_buf(unsigned char *buf, int buflen,
             orflags = CHARTYPE_LAST_ESC_2253;
         if (type & BUF_TYPE_CONVUTF8) {
             unsigned char utfbuf[6];
-            int utflen = UTF8_putc(utfbuf, sizeof(utfbuf), c);
+            int utflen = ossl_utf8_putc_internal(utfbuf, sizeof(utfbuf), c);

             if (utflen < 0)
                 return -1; /* error happened with UTF8 */
diff --git a/crypto/asn1/a_utf8.c b/crypto/asn1/a_utf8.c
index fb5999fa7f..76f5ccbda4 100644
--- a/crypto/asn1/a_utf8.c
+++ b/crypto/asn1/a_utf8.c
@@ -11,6 +11,7 @@
 #include "internal/cryptlib.h"
 #include "internal/unicode.h"
 #include <openssl/asn1.h>
+#include <crypto/asn1.h>

 /* UTF8 utilities */

@@ -25,10 +26,10 @@
  * -4 = character encoded incorrectly (not minimal length).
  */

-int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
+int ossl_utf8_getc_internal(const unsigned char *str, int len, uint32_t *val)
 {
     const unsigned char *p;
-    unsigned long value;
+    uint32_t value;
     int ret;
     if (len <= 0)
         return 0;
@@ -82,6 +83,21 @@ int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
     return ret;
 }

+#if !defined(OPENSSL_NO_DEPRECATED_4_1)
+int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
+{
+    uint32_t value = 0;
+    int ret;
+
+    ret = ossl_utf8_getc_internal(str, len, &value);
+
+    if (ret)
+        *val = (unsigned long)value;
+
+    return ret;
+}
+#endif /* !defined(OPENSSL_NO_DEPRECATED_4_1) */
+
 /*
  * This takes a character 'value' and writes the UTF8 encoded value in 'str'
  * where 'str' is a buffer containing 'len' characters. Returns the number of
@@ -90,7 +106,7 @@ int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
  * characters. It will need at most 4 characters.
  */

-int UTF8_putc(unsigned char *str, int len, unsigned long value)
+int ossl_utf8_putc_internal(unsigned char *str, int len, uint32_t value)
 {
     if (!str)
         len = 4; /* Maximum we will need */
@@ -135,3 +151,10 @@ int UTF8_putc(unsigned char *str, int len, unsigned long value)
     }
     return -2;
 }
+
+#if !defined(OPENSSL_NO_DEPRECATED_4_1)
+int UTF8_putc(unsigned char *str, int len, unsigned long value)
+{
+    return ossl_utf8_putc_internal(str, len, (uint32_t)value);
+}
+#endif
diff --git a/crypto/pkcs12/p12_utl.c b/crypto/pkcs12/p12_utl.c
index 0b130fc8bd..c864c9c9aa 100644
--- a/crypto/pkcs12/p12_utl.c
+++ b/crypto/pkcs12/p12_utl.c
@@ -12,6 +12,7 @@
 #include <openssl/pkcs12.h>
 #include "p12_local.h"
 #include "crypto/pkcs7/pk7_local.h"
+#include <crypto/asn1.h>

 /* Cheap and nasty Unicode stuff */

@@ -79,13 +80,14 @@ unsigned char *OPENSSL_utf82uni(const char *asc, int asclen,
 {
     int ulen, i, j;
     unsigned char *unitmp, *ret;
-    unsigned long utf32chr = 0;
+    uint32_t utf32chr = 0;

     if (asclen == -1)
         asclen = (int)strlen(asc);

     for (ulen = 0, i = 0; i < asclen; i += j) {
-        j = UTF8_getc((const unsigned char *)asc + i, asclen - i, &utf32chr);
+        j = ossl_utf8_getc_internal((const unsigned char *)asc + i, asclen - i,
+            &utf32chr);

         /*
          * Following condition is somewhat opportunistic is sense that
@@ -121,7 +123,8 @@ unsigned char *OPENSSL_utf82uni(const char *asc, int asclen,
         return NULL;
     /* re-run the loop writing down UTF-16 characters in big-endian order */
     for (unitmp = ret, i = 0; i < asclen; i += j) {
-        j = UTF8_getc((const unsigned char *)asc + i, asclen - i, &utf32chr);
+        j = ossl_utf8_getc_internal((const unsigned char *)asc + i, asclen - i,
+            &utf32chr);
         if (utf32chr >= 0x10000) { /* pair if UTF-16 characters */
             unsigned int hi, lo;

@@ -149,7 +152,7 @@ unsigned char *OPENSSL_utf82uni(const char *asc, int asclen,

 static int bmp_to_utf8(char *str, const unsigned char *utf16, int len)
 {
-    unsigned long utf32chr;
+    uint32_t utf32chr;

     if (len == 0)
         return 0;
@@ -175,9 +178,8 @@ static int bmp_to_utf8(char *str, const unsigned char *utf16, int len)
         utf32chr += 0x10000;
     }

-    return UTF8_putc((unsigned char *)str, 4, utf32chr);
+    return ossl_utf8_putc_internal((unsigned char *)str, 4, utf32chr);
 }
-
 char *OPENSSL_uni2utf8(const unsigned char *uni, int unilen)
 {
     int asclen, i, j;
diff --git a/include/crypto/asn1.h b/include/crypto/asn1.h
index 6f969eab60..f7dc7852f6 100644
--- a/include/crypto/asn1.h
+++ b/include/crypto/asn1.h
@@ -192,4 +192,7 @@ int asn1_item_embed_d2i(ASN1_VALUE **pval, const unsigned char **in,

 ASN1_TIME *ossl_asn1_time_from_tm(ASN1_TIME *s, struct tm *ts, int type);

+int ossl_utf8_getc_internal(const unsigned char *str, int len, uint32_t *val);
+int ossl_utf8_putc_internal(unsigned char *str, int len, uint32_t value);
+
 #endif /* ndef OSSL_CRYPTO_ASN1_H */
diff --git a/include/internal/unicode.h b/include/internal/unicode.h
index d1dae6d245..f09bfc7320 100644
--- a/include/internal/unicode.h
+++ b/include/internal/unicode.h
@@ -12,11 +12,12 @@
 #pragma once

 #include <openssl/e_os2.h>
+#include <stdint.h>

 typedef enum {
-    SURROGATE_MIN = 0xd800UL,
-    SURROGATE_MAX = 0xdfffUL,
-    UNICODE_MAX = 0x10ffffUL,
+    SURROGATE_MIN = UINT32_C(0xd800),
+    SURROGATE_MAX = UINT32_C(0xdfff),
+    UNICODE_MAX = UINT32_C(0x10ffff),
     UNICODE_LIMIT
 } UNICODE_CONSTANTS;

diff --git a/include/openssl/asn1.h.in b/include/openssl/asn1.h.in
index 7beef17569..cce56701c9 100644
--- a/include/openssl/asn1.h.in
+++ b/include/openssl/asn1.h.in
@@ -618,8 +618,10 @@ DECLARE_ASN1_FUNCTIONS(ASN1_UTF8STRING)
 DECLARE_ASN1_FUNCTIONS(ASN1_NULL)
 DECLARE_ASN1_FUNCTIONS(ASN1_BMPSTRING)

-int UTF8_getc(const unsigned char *str, int len, unsigned long *val);
-int UTF8_putc(unsigned char *str, int len, unsigned long value);
+#if !defined(OPENSSL_NO_DEPRECATED_4_1)
+OSSL_DEPRECATEDIN_4_1 int UTF8_getc(const unsigned char *str, int len, unsigned long *val);
+OSSL_DEPRECATEDIN_4_1 int UTF8_putc(unsigned char *str, int len, unsigned long value);
+#endif /* !defined(OPENSSL_NO_DEPRECATED_4_1) */

 /* clang-format off */
 {-
diff --git a/test/asn1_internal_test.c b/test/asn1_internal_test.c
index 83b5a2c630..8edea7ca91 100644
--- a/test/asn1_internal_test.c
+++ b/test/asn1_internal_test.c
@@ -195,9 +195,9 @@ static int test_unicode_range(void)
 static int test_invalid_utf8(void)
 {
     const unsigned char inv_utf8[] = "\xF4\x90\x80\x80";
-    unsigned long val;
+    uint32_t val;

-    if (!TEST_int_lt(UTF8_getc(inv_utf8, sizeof(inv_utf8), &val), 0))
+    if (!TEST_int_lt(ossl_utf8_getc_internal(inv_utf8, sizeof(inv_utf8), &val), 0))
         return 0;
     return 1;
 }
diff --git a/util/libcrypto.num b/util/libcrypto.num
index 90c81f1fce..dbaac8b37f 100644
--- a/util/libcrypto.num
+++ b/util/libcrypto.num
@@ -2646,8 +2646,8 @@ i2d_ASN1_BMPSTRING                      2644	4_0_0	EXIST::FUNCTION:
 ASN1_BMPSTRING_free                     2645	4_0_0	EXIST::FUNCTION:
 ASN1_BMPSTRING_new                      2646	4_0_0	EXIST::FUNCTION:
 ASN1_BMPSTRING_it                       2647	4_0_0	EXIST::FUNCTION:
-UTF8_getc                               2648	4_0_0	EXIST::FUNCTION:
-UTF8_putc                               2649	4_0_0	EXIST::FUNCTION:
+UTF8_getc                               2648	4_0_0	EXIST::FUNCTION:DEPRECATEDIN_4_1
+UTF8_putc                               2649	4_0_0	EXIST::FUNCTION:DEPRECATEDIN_4_1
 d2i_ASN1_PRINTABLE                      2650	4_0_0	EXIST::FUNCTION:
 i2d_ASN1_PRINTABLE                      2651	4_0_0	EXIST::FUNCTION:
 ASN1_PRINTABLE_free                     2652	4_0_0	EXIST::FUNCTION: