Commit d9b72b33 for guacamole.apache.org
commit d9b72b33944e6e9b247fe0302a1112ad9cc81fd7
Author: Bradley Bennett <bbennett@keepersecurity.com>
Date: Thu Apr 30 20:11:44 2026 -0400
GUACAMOLE-2272: VNC: add MacRoman clipboard encoding support.
diff --git a/src/common/common/defaults.h b/src/common/common/defaults.h
index 72c1e51e..e6c90ee6 100644
--- a/src/common/common/defaults.h
+++ b/src/common/common/defaults.h
@@ -20,6 +20,13 @@
#ifndef GUAC_COMMON_DEFAULTS_H
#define GUAC_COMMON_DEFAULTS_H
+/**
+ * Returns the number of elements in a statically-sized array.
+ */
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#endif
+
/**
* The default number of seconds to wait after sending the Wake-on-LAN packet
* for the destination host to start responding.
@@ -27,4 +34,3 @@
#define GUAC_WOL_DEFAULT_BOOT_WAIT_TIME 0
#endif /* GUAC_COMMON_DEFAULTS_H */
-
diff --git a/src/common/common/iconv.h b/src/common/common/iconv.h
index 43335eab..38b85788 100644
--- a/src/common/common/iconv.h
+++ b/src/common/common/iconv.h
@@ -20,6 +20,8 @@
#ifndef __GUAC_COMMON_ICONV_H
#define __GUAC_COMMON_ICONV_H
+#include "common/defaults.h"
+
/**
* Function which reads a character from the given string data, returning
* the Unicode codepoint read, updating the string pointer to point to the
@@ -118,6 +120,16 @@ guac_iconv_write GUAC_WRITE_CP1252;
*/
guac_iconv_write GUAC_WRITE_ISO8859_1;
+/**
+ * Read function for Mac OS Roman.
+ */
+guac_iconv_read GUAC_READ_MACROMAN;
+
+/**
+ * Write function for Mac OS Roman.
+ */
+guac_iconv_write GUAC_WRITE_MACROMAN;
+
/**
* Write function for UTF-8 which writes newline characters ('\n') as
* Windows-style newlines ("\r\n").
@@ -142,5 +154,17 @@ guac_iconv_write GUAC_WRITE_CP1252_CRLF;
*/
guac_iconv_write GUAC_WRITE_ISO8859_1_CRLF;
+/**
+ * Read function for Mac OS Roman which normalizes newline character sequences
+ * like "\r\n" to Unix-style newlines ('\n').
+ */
+guac_iconv_read GUAC_READ_MACROMAN_NORMALIZED;
+
+/**
+ * Write function for Mac OS Roman which writes newline characters ('\n') as
+ * Windows-style newlines ("\r\n").
+ */
+guac_iconv_write GUAC_WRITE_MACROMAN_CRLF;
+
#endif
diff --git a/src/common/iconv.c b/src/common/iconv.c
index 8ff772ae..9580d794 100644
--- a/src/common/iconv.c
+++ b/src/common/iconv.c
@@ -21,13 +21,24 @@
#include "common/iconv.h"
#include <guacamole/unicode.h>
-#include <stdint.h>
+
#include <endian.h>
+#include <stdint.h>
+#include <stdlib.h>
/**
- * Lookup table for Unicode code points, indexed by CP-1252 codepoint.
+ * CP1252 lookup tables.
+ *
+ * __GUAC_CP1252_CODEPOINT maps CP1252 byte values in the 0x80-0x9F exception
+ * range to Unicode codepoints.
+ *
+ * __GUAC_UNICODE_TO_CP1252 maps the Unicode codepoints representable in that
+ * exception range back to their byte values. Because only 27 codepoints in the
+ * range differ from their Unicode byte values, the reverse mapping is stored
+ * as a sparse array sorted by codepoint, allowing the CP1252 byte to be found
+ * with bsearch() in O(log n) time.
*/
-const static int __GUAC_RDP_CP1252_CODEPOINT[32] = {
+const static int __GUAC_CP1252_CODEPOINT[32] = {
0x20AC, /* 0x80 */
0xFFFD, /* 0x81 */
0x201A, /* 0x82 */
@@ -62,6 +73,351 @@ const static int __GUAC_RDP_CP1252_CODEPOINT[32] = {
0x0178, /* 0x9F */
};
+typedef struct {
+ int codepoint;
+ unsigned char byte;
+} guac_cp1252_reverse_t;
+
+const static guac_cp1252_reverse_t __GUAC_UNICODE_TO_CP1252[27] = {
+ { 0x0152, 0x8C }, /* 0x8C */
+ { 0x0153, 0x9C }, /* 0x9C */
+ { 0x0160, 0x8A }, /* 0x8A */
+ { 0x0161, 0x9A }, /* 0x9A */
+ { 0x0178, 0x9F }, /* 0x9F */
+ { 0x017D, 0x8E }, /* 0x8E */
+ { 0x017E, 0x9E }, /* 0x9E */
+ { 0x0192, 0x83 }, /* 0x83 */
+ { 0x02C6, 0x88 }, /* 0x88 */
+ { 0x02DC, 0x98 }, /* 0x98 */
+ { 0x2013, 0x96 }, /* 0x96 */
+ { 0x2014, 0x97 }, /* 0x97 */
+ { 0x2018, 0x91 }, /* 0x91 */
+ { 0x2019, 0x92 }, /* 0x92 */
+ { 0x201A, 0x82 }, /* 0x82 */
+ { 0x201C, 0x93 }, /* 0x93 */
+ { 0x201D, 0x94 }, /* 0x94 */
+ { 0x201E, 0x84 }, /* 0x84 */
+ { 0x2020, 0x86 }, /* 0x86 */
+ { 0x2021, 0x87 }, /* 0x87 */
+ { 0x2022, 0x95 }, /* 0x95 */
+ { 0x2026, 0x85 }, /* 0x85 */
+ { 0x2030, 0x89 }, /* 0x89 */
+ { 0x2039, 0x8B }, /* 0x8B */
+ { 0x203A, 0x9B }, /* 0x9B */
+ { 0x20AC, 0x80 }, /* 0x80 */
+ { 0x2122, 0x99 } /* 0x99 */
+};
+
+/*
+ * MacRoman lookup tables.
+ *
+ * __GUAC_MACROMAN_CODEPOINT maps MacRoman byte values to Unicode codepoints,
+ * allowing the codepoint to be determined by direct array access.
+ *
+ * __GUAC_UNICODE_TO_MACROMAN maps the Unicode codepoints representable in
+ * MacRoman back to their byte values. Because the Unicode lookup space is much
+ * larger than 256 bytes, the reverse mapping is stored as a sparse array
+ * sorted by codepoint, allowing the MacRoman byte to be found with bsearch()
+ * in O(log n) time.
+ *
+ * To regenerate both tables, run:
+ * python3 generate_macroman_table.py
+ *
+ * where generate_macroman_table.py contains:
+ * MACROMAN_HIGH_RANGE = range(0x80, 0x100)
+ *
+ * # Forward table: MacRoman byte -> Unicode codepoint
+ * pairs = []
+ * print("const static int __GUAC_MACROMAN_CODEPOINT[128] = {")
+ * for mac_byte in MACROMAN_HIGH_RANGE:
+ * unicode_char = bytes([mac_byte]).decode('mac_roman')
+ * unicode_codepoint = ord(unicode_char)
+ * pairs.append((unicode_codepoint, mac_byte))
+ * trailing_comma = "," if mac_byte < 0xFF else ""
+ * print(f" 0x{unicode_codepoint:04X}{trailing_comma} "
+ * f"/ * 0x{mac_byte:02X} - {unicode_char} * /")
+ * print("};")
+ * print()
+ *
+ * # Reverse table: Unicode codepoint -> MacRoman byte, sorted by codepoint
+ * print("const static struct {")
+ * print(" int codepoint;")
+ * print(" unsigned char byte;")
+ * print("} __GUAC_UNICODE_TO_MACROMAN[ARRAY_SIZE(__GUAC_MACROMAN_CODEPOINT)] = {")
+ * for unicode_codepoint, mac_byte in sorted(pairs):
+ * unicode_char = chr(unicode_codepoint)
+ * print(f" {{ 0x{unicode_codepoint:04X}, 0x{mac_byte:02X} }}, "
+ * f"/ * {unicode_char} * /")
+ * print("};")
+ */
+
+const static int __GUAC_MACROMAN_CODEPOINT[128] = {
+ 0x00C4, /* 0x80 - Ä */
+ 0x00C5, /* 0x81 - Å */
+ 0x00C7, /* 0x82 - Ç */
+ 0x00C9, /* 0x83 - É */
+ 0x00D1, /* 0x84 - Ñ */
+ 0x00D6, /* 0x85 - Ö */
+ 0x00DC, /* 0x86 - Ü */
+ 0x00E1, /* 0x87 - á */
+ 0x00E0, /* 0x88 - à */
+ 0x00E2, /* 0x89 - â */
+ 0x00E4, /* 0x8A - ä */
+ 0x00E3, /* 0x8B - ã */
+ 0x00E5, /* 0x8C - å */
+ 0x00E7, /* 0x8D - ç */
+ 0x00E9, /* 0x8E - é */
+ 0x00E8, /* 0x8F - è */
+ 0x00EA, /* 0x90 - ê */
+ 0x00EB, /* 0x91 - ë */
+ 0x00ED, /* 0x92 - í */
+ 0x00EC, /* 0x93 - ì */
+ 0x00EE, /* 0x94 - î */
+ 0x00EF, /* 0x95 - ï */
+ 0x00F1, /* 0x96 - ñ */
+ 0x00F3, /* 0x97 - ó */
+ 0x00F2, /* 0x98 - ò */
+ 0x00F4, /* 0x99 - ô */
+ 0x00F6, /* 0x9A - ö */
+ 0x00F5, /* 0x9B - õ */
+ 0x00FA, /* 0x9C - ú */
+ 0x00F9, /* 0x9D - ù */
+ 0x00FB, /* 0x9E - û */
+ 0x00FC, /* 0x9F - ü */
+ 0x2020, /* 0xA0 - † */
+ 0x00B0, /* 0xA1 - ° */
+ 0x00A2, /* 0xA2 - ¢ */
+ 0x00A3, /* 0xA3 - £ */
+ 0x00A7, /* 0xA4 - § */
+ 0x2022, /* 0xA5 - • */
+ 0x00B6, /* 0xA6 - ¶ */
+ 0x00DF, /* 0xA7 - ß */
+ 0x00AE, /* 0xA8 - ® */
+ 0x00A9, /* 0xA9 - © */
+ 0x2122, /* 0xAA - ™ */
+ 0x00B4, /* 0xAB - ´ */
+ 0x00A8, /* 0xAC - ¨ */
+ 0x2260, /* 0xAD - ≠ */
+ 0x00C6, /* 0xAE - Æ */
+ 0x00D8, /* 0xAF - Ø */
+ 0x221E, /* 0xB0 - ∞ */
+ 0x00B1, /* 0xB1 - ± */
+ 0x2264, /* 0xB2 - ≤ */
+ 0x2265, /* 0xB3 - ≥ */
+ 0x00A5, /* 0xB4 - ¥ */
+ 0x00B5, /* 0xB5 - µ */
+ 0x2202, /* 0xB6 - ∂ */
+ 0x2211, /* 0xB7 - ∑ */
+ 0x220F, /* 0xB8 - ∏ */
+ 0x03C0, /* 0xB9 - π */
+ 0x222B, /* 0xBA - ∫ */
+ 0x00AA, /* 0xBB - ª */
+ 0x00BA, /* 0xBC - º */
+ 0x03A9, /* 0xBD - Ω */
+ 0x00E6, /* 0xBE - æ */
+ 0x00F8, /* 0xBF - ø */
+ 0x00BF, /* 0xC0 - ¿ */
+ 0x00A1, /* 0xC1 - ¡ */
+ 0x00AC, /* 0xC2 - ¬ */
+ 0x221A, /* 0xC3 - √ */
+ 0x0192, /* 0xC4 - ƒ */
+ 0x2248, /* 0xC5 - ≈ */
+ 0x2206, /* 0xC6 - ∆ */
+ 0x00AB, /* 0xC7 - « */
+ 0x00BB, /* 0xC8 - » */
+ 0x2026, /* 0xC9 - … */
+ 0x00A0, /* 0xCA - */
+ 0x00C0, /* 0xCB - À */
+ 0x00C3, /* 0xCC - Ã */
+ 0x00D5, /* 0xCD - Õ */
+ 0x0152, /* 0xCE - Œ */
+ 0x0153, /* 0xCF - œ */
+ 0x2013, /* 0xD0 - – */
+ 0x2014, /* 0xD1 - — */
+ 0x201C, /* 0xD2 - “ */
+ 0x201D, /* 0xD3 - ” */
+ 0x2018, /* 0xD4 - ‘ */
+ 0x2019, /* 0xD5 - ’ */
+ 0x00F7, /* 0xD6 - ÷ */
+ 0x25CA, /* 0xD7 - ◊ */
+ 0x00FF, /* 0xD8 - ÿ */
+ 0x0178, /* 0xD9 - Ÿ */
+ 0x2044, /* 0xDA - ⁄ */
+ 0x20AC, /* 0xDB - € */
+ 0x2039, /* 0xDC - ‹ */
+ 0x203A, /* 0xDD - › */
+ 0xFB01, /* 0xDE - fi */
+ 0xFB02, /* 0xDF - fl */
+ 0x2021, /* 0xE0 - ‡ */
+ 0x00B7, /* 0xE1 - · */
+ 0x201A, /* 0xE2 - ‚ */
+ 0x201E, /* 0xE3 - „ */
+ 0x2030, /* 0xE4 - ‰ */
+ 0x00C2, /* 0xE5 - Â */
+ 0x00CA, /* 0xE6 - Ê */
+ 0x00C1, /* 0xE7 - Á */
+ 0x00CB, /* 0xE8 - Ë */
+ 0x00C8, /* 0xE9 - È */
+ 0x00CD, /* 0xEA - Í */
+ 0x00CE, /* 0xEB - Î */
+ 0x00CF, /* 0xEC - Ï */
+ 0x00CC, /* 0xED - Ì */
+ 0x00D3, /* 0xEE - Ó */
+ 0x00D4, /* 0xEF - Ô */
+ 0xF8FF, /* 0xF0 - */
+ 0x00D2, /* 0xF1 - Ò */
+ 0x00DA, /* 0xF2 - Ú */
+ 0x00DB, /* 0xF3 - Û */
+ 0x00D9, /* 0xF4 - Ù */
+ 0x0131, /* 0xF5 - ı */
+ 0x02C6, /* 0xF6 - ˆ */
+ 0x02DC, /* 0xF7 - ˜ */
+ 0x00AF, /* 0xF8 - ¯ */
+ 0x02D8, /* 0xF9 - ˘ */
+ 0x02D9, /* 0xFA - ˙ */
+ 0x02DA, /* 0xFB - ˚ */
+ 0x00B8, /* 0xFC - ¸ */
+ 0x02DD, /* 0xFD - ˝ */
+ 0x02DB, /* 0xFE - ˛ */
+ 0x02C7 /* 0xFF - ˇ */
+};
+
+typedef struct {
+ int codepoint;
+ unsigned char byte;
+} guac_macroman_reverse_t;
+
+const static guac_macroman_reverse_t __GUAC_UNICODE_TO_MACROMAN[ARRAY_SIZE(__GUAC_MACROMAN_CODEPOINT)] = {
+ { 0x00A0, 0xCA }, /* 0xCA - */
+ { 0x00A1, 0xC1 }, /* 0xC1 - ¡ */
+ { 0x00A2, 0xA2 }, /* 0xA2 - ¢ */
+ { 0x00A3, 0xA3 }, /* 0xA3 - £ */
+ { 0x00A5, 0xB4 }, /* 0xB4 - ¥ */
+ { 0x00A7, 0xA4 }, /* 0xA4 - § */
+ { 0x00A8, 0xAC }, /* 0xAC - ¨ */
+ { 0x00A9, 0xA9 }, /* 0xA9 - © */
+ { 0x00AA, 0xBB }, /* 0xBB - ª */
+ { 0x00AB, 0xC7 }, /* 0xC7 - « */
+ { 0x00AC, 0xC2 }, /* 0xC2 - ¬ */
+ { 0x00AE, 0xA8 }, /* 0xA8 - ® */
+ { 0x00AF, 0xF8 }, /* 0xF8 - ¯ */
+ { 0x00B0, 0xA1 }, /* 0xA1 - ° */
+ { 0x00B1, 0xB1 }, /* 0xB1 - ± */
+ { 0x00B4, 0xAB }, /* 0xAB - ´ */
+ { 0x00B5, 0xB5 }, /* 0xB5 - µ */
+ { 0x00B6, 0xA6 }, /* 0xA6 - ¶ */
+ { 0x00B7, 0xE1 }, /* 0xE1 - · */
+ { 0x00B8, 0xFC }, /* 0xFC - ¸ */
+ { 0x00BA, 0xBC }, /* 0xBC - º */
+ { 0x00BB, 0xC8 }, /* 0xC8 - » */
+ { 0x00BF, 0xC0 }, /* 0xC0 - ¿ */
+ { 0x00C0, 0xCB }, /* 0xCB - À */
+ { 0x00C1, 0xE7 }, /* 0xE7 - Á */
+ { 0x00C2, 0xE5 }, /* 0xE5 - Â */
+ { 0x00C3, 0xCC }, /* 0xCC - Ã */
+ { 0x00C4, 0x80 }, /* 0x80 - Ä */
+ { 0x00C5, 0x81 }, /* 0x81 - Å */
+ { 0x00C6, 0xAE }, /* 0xAE - Æ */
+ { 0x00C7, 0x82 }, /* 0x82 - Ç */
+ { 0x00C8, 0xE9 }, /* 0xE9 - È */
+ { 0x00C9, 0x83 }, /* 0x83 - É */
+ { 0x00CA, 0xE6 }, /* 0xE6 - Ê */
+ { 0x00CB, 0xE8 }, /* 0xE8 - Ë */
+ { 0x00CC, 0xED }, /* 0xED - Ì */
+ { 0x00CD, 0xEA }, /* 0xEA - Í */
+ { 0x00CE, 0xEB }, /* 0xEB - Î */
+ { 0x00CF, 0xEC }, /* 0xEC - Ï */
+ { 0x00D1, 0x84 }, /* 0x84 - Ñ */
+ { 0x00D2, 0xF1 }, /* 0xF1 - Ò */
+ { 0x00D3, 0xEE }, /* 0xEE - Ó */
+ { 0x00D4, 0xEF }, /* 0xEF - Ô */
+ { 0x00D5, 0xCD }, /* 0xCD - Õ */
+ { 0x00D6, 0x85 }, /* 0x85 - Ö */
+ { 0x00D8, 0xAF }, /* 0xAF - Ø */
+ { 0x00D9, 0xF4 }, /* 0xF4 - Ù */
+ { 0x00DA, 0xF2 }, /* 0xF2 - Ú */
+ { 0x00DB, 0xF3 }, /* 0xF3 - Û */
+ { 0x00DC, 0x86 }, /* 0x86 - Ü */
+ { 0x00DF, 0xA7 }, /* 0xA7 - ß */
+ { 0x00E0, 0x88 }, /* 0x88 - à */
+ { 0x00E1, 0x87 }, /* 0x87 - á */
+ { 0x00E2, 0x89 }, /* 0x89 - â */
+ { 0x00E3, 0x8B }, /* 0x8B - ã */
+ { 0x00E4, 0x8A }, /* 0x8A - ä */
+ { 0x00E5, 0x8C }, /* 0x8C - å */
+ { 0x00E6, 0xBE }, /* 0xBE - æ */
+ { 0x00E7, 0x8D }, /* 0x8D - ç */
+ { 0x00E8, 0x8F }, /* 0x8F - è */
+ { 0x00E9, 0x8E }, /* 0x8E - é */
+ { 0x00EA, 0x90 }, /* 0x90 - ê */
+ { 0x00EB, 0x91 }, /* 0x91 - ë */
+ { 0x00EC, 0x93 }, /* 0x93 - ì */
+ { 0x00ED, 0x92 }, /* 0x92 - í */
+ { 0x00EE, 0x94 }, /* 0x94 - î */
+ { 0x00EF, 0x95 }, /* 0x95 - ï */
+ { 0x00F1, 0x96 }, /* 0x96 - ñ */
+ { 0x00F2, 0x98 }, /* 0x98 - ò */
+ { 0x00F3, 0x97 }, /* 0x97 - ó */
+ { 0x00F4, 0x99 }, /* 0x99 - ô */
+ { 0x00F5, 0x9B }, /* 0x9B - õ */
+ { 0x00F6, 0x9A }, /* 0x9A - ö */
+ { 0x00F7, 0xD6 }, /* 0xD6 - ÷ */
+ { 0x00F8, 0xBF }, /* 0xBF - ø */
+ { 0x00F9, 0x9D }, /* 0x9D - ù */
+ { 0x00FA, 0x9C }, /* 0x9C - ú */
+ { 0x00FB, 0x9E }, /* 0x9E - û */
+ { 0x00FC, 0x9F }, /* 0x9F - ü */
+ { 0x00FF, 0xD8 }, /* 0xD8 - ÿ */
+ { 0x0131, 0xF5 }, /* 0xF5 - ı */
+ { 0x0152, 0xCE }, /* 0xCE - Œ */
+ { 0x0153, 0xCF }, /* 0xCF - œ */
+ { 0x0178, 0xD9 }, /* 0xD9 - Ÿ */
+ { 0x0192, 0xC4 }, /* 0xC4 - ƒ */
+ { 0x02C6, 0xF6 }, /* 0xF6 - ˆ */
+ { 0x02C7, 0xFF }, /* 0xFF - ˇ */
+ { 0x02D8, 0xF9 }, /* 0xF9 - ˘ */
+ { 0x02D9, 0xFA }, /* 0xFA - ˙ */
+ { 0x02DA, 0xFB }, /* 0xFB - ˚ */
+ { 0x02DB, 0xFE }, /* 0xFE - ˛ */
+ { 0x02DC, 0xF7 }, /* 0xF7 - ˜ */
+ { 0x02DD, 0xFD }, /* 0xFD - ˝ */
+ { 0x03A9, 0xBD }, /* 0xBD - Ω */
+ { 0x03C0, 0xB9 }, /* 0xB9 - π */
+ { 0x2013, 0xD0 }, /* 0xD0 - – */
+ { 0x2014, 0xD1 }, /* 0xD1 - — */
+ { 0x2018, 0xD4 }, /* 0xD4 - ‘ */
+ { 0x2019, 0xD5 }, /* 0xD5 - ’ */
+ { 0x201A, 0xE2 }, /* 0xE2 - ‚ */
+ { 0x201C, 0xD2 }, /* 0xD2 - “ */
+ { 0x201D, 0xD3 }, /* 0xD3 - ” */
+ { 0x201E, 0xE3 }, /* 0xE3 - „ */
+ { 0x2020, 0xA0 }, /* 0xA0 - † */
+ { 0x2021, 0xE0 }, /* 0xE0 - ‡ */
+ { 0x2022, 0xA5 }, /* 0xA5 - • */
+ { 0x2026, 0xC9 }, /* 0xC9 - … */
+ { 0x2030, 0xE4 }, /* 0xE4 - ‰ */
+ { 0x2039, 0xDC }, /* 0xDC - ‹ */
+ { 0x203A, 0xDD }, /* 0xDD - › */
+ { 0x2044, 0xDA }, /* 0xDA - ⁄ */
+ { 0x20AC, 0xDB }, /* 0xDB - € */
+ { 0x2122, 0xAA }, /* 0xAA - ™ */
+ { 0x2202, 0xB6 }, /* 0xB6 - ∂ */
+ { 0x2206, 0xC6 }, /* 0xC6 - ∆ */
+ { 0x220F, 0xB8 }, /* 0xB8 - ∏ */
+ { 0x2211, 0xB7 }, /* 0xB7 - ∑ */
+ { 0x221A, 0xC3 }, /* 0xC3 - √ */
+ { 0x221E, 0xB0 }, /* 0xB0 - ∞ */
+ { 0x222B, 0xBA }, /* 0xBA - ∫ */
+ { 0x2248, 0xC5 }, /* 0xC5 - ≈ */
+ { 0x2260, 0xAD }, /* 0xAD - ≠ */
+ { 0x2264, 0xB2 }, /* 0xB2 - ≤ */
+ { 0x2265, 0xB3 }, /* 0xB3 - ≥ */
+ { 0x25CA, 0xD7 }, /* 0xD7 - ◊ */
+ { 0xF8FF, 0xF0 }, /* 0xF0 - */
+ { 0xFB01, 0xDE }, /* 0xDE - fi */
+ { 0xFB02, 0xDF }, /* 0xDF - fl */
+};
+
int guac_iconv(guac_iconv_read* reader, const char** input, int in_remaining,
guac_iconv_write* writer, char** output, int out_remaining) {
@@ -123,7 +479,7 @@ int GUAC_READ_CP1252(const char** input, int remaining) {
/* Replace value with exception if not identical to ISO-8859-1 */
if (value >= 0x80 && value <= 0x9F)
- value = __GUAC_RDP_CP1252_CODEPOINT[value - 0x80];
+ value = __GUAC_CP1252_CODEPOINT[value - 0x80];
(*input)++;
return value;
@@ -203,6 +559,47 @@ int GUAC_READ_ISO8859_1_NORMALIZED(const char** input, int remaining) {
return guac_iconv_read_normalized(GUAC_READ_ISO8859_1, input, remaining);
}
+int GUAC_READ_MACROMAN(const char** input, int remaining) {
+
+ /* MacRoman is a single-byte encoding: each character is one byte */
+ int value = (unsigned char) **input;
+ (*input)++;
+
+ /* Bytes 0x00-0x7F are identical to ASCII/Unicode; bytes 0x80-0xFF are
+ * remapped via the lookup table. ARRAY_SIZE guards against the table
+ * being resized without updating the encoding range. */
+ if (value >= 0x80 && (size_t)(value - 0x80) < ARRAY_SIZE(__GUAC_MACROMAN_CODEPOINT))
+ value = __GUAC_MACROMAN_CODEPOINT[value - 0x80];
+
+ return value;
+
+}
+
+int GUAC_READ_MACROMAN_NORMALIZED(const char** input, int remaining) {
+
+ const char* input_start = *input;
+ int value = GUAC_READ_MACROMAN(input, remaining);
+
+ /* Translate both bare CR (\r) and CRLF (\r\n) to Unix newline (\n).
+ * Classic Mac OS uses bare CR as its line separator, so bare CR must be
+ * normalized in addition to the CRLF pairs handled by other encodings. */
+ if (value == '\r') {
+
+ int peek_remaining = remaining - (*input - input_start);
+ const char* peek_input = *input;
+ int peek_value = GUAC_READ_MACROMAN(&peek_input, peek_remaining);
+
+ /* Consume the following LF if this is a CRLF pair */
+ if (peek_value == '\n')
+ *input = peek_input;
+
+ value = '\n';
+ }
+
+ return value;
+
+}
+
void GUAC_WRITE_UTF8(char** output, int remaining, int value) {
*output += guac_utf8_write(value, *output, remaining);
}
@@ -219,29 +616,49 @@ void GUAC_WRITE_UTF16(char** output, int remaining, int value) {
}
-void GUAC_WRITE_CP1252(char** output, int remaining, int value) {
-
- /* If not in ISO-8859-1 part of CP1252, check lookup table */
- if ((value >= 0x80 && value <= 0x9F) || value > 0xFF) {
-
- int i;
- int replacement_value = '?';
- const int* codepoint = __GUAC_RDP_CP1252_CODEPOINT;
-
- /* Search lookup table for value */
- for (i=0x80; i<=0x9F; i++, codepoint++) {
- if (*codepoint == value) {
- replacement_value = i;
- break;
- }
- }
+/**
+ * Compares a Unicode codepoint against a CP1252 reverse-lookup table entry.
+ *
+ * @param key
+ * Pointer to the Unicode codepoint being searched for.
+ *
+ * @param elem
+ * Pointer to the CP1252 reverse-lookup table entry being compared.
+ *
+ * @return
+ * A negative value if the requested codepoint sorts before the table
+ * entry, a positive value if it sorts after the table entry, or zero if
+ * the codepoints are equal.
+ */
+static int guac_cp1252_reverse_cmp(const void* key, const void* elem) {
+ int value = *(const int*) key;
+ const guac_cp1252_reverse_t* entry = elem;
+ return (value > entry->codepoint) - (value < entry->codepoint);
+}
- /* Replace value with discovered value (or question mark) */
- value = replacement_value;
+void GUAC_WRITE_CP1252(char** output, int remaining, int value) {
+ /* CP1252 matches Unicode directly for 0x00-0x7F and 0xA0-0xFF. */
+ if ((value >= 0x00 && value < 0x80) || (value >= 0xA0 && value <= 0xFF)) {
+ *((unsigned char*) *output) = (unsigned char) value;
+ (*output)++;
+ return;
}
- *((unsigned char*) *output) = (unsigned char) value;
+ /* The 0x80-0x9F range is a sparse remapping, so the reverse mapping is
+ * stored as a codepoint-sorted table and searched with bsearch(). Values
+ * outside the direct-write ranges also land here: representable exception
+ * codepoints map to 0x80-0x9F, while everything else falls back to '?'. */
+ const guac_cp1252_reverse_t* match = bsearch(
+ &value, /* key */
+ __GUAC_UNICODE_TO_CP1252, /* base */
+ ARRAY_SIZE(__GUAC_UNICODE_TO_CP1252), /* nmemb */
+ sizeof(*__GUAC_UNICODE_TO_CP1252), /* size */
+ guac_cp1252_reverse_cmp /* compar */
+ );
+
+ /* Write the matched byte, or '?' if the codepoint is not representable. */
+ *((unsigned char*) *output) = (match != NULL) ? match->byte : '?';
(*output)++;
}
@@ -305,3 +722,54 @@ void GUAC_WRITE_ISO8859_1_CRLF(char** output, int remaining, int value) {
guac_iconv_write_crlf(GUAC_WRITE_ISO8859_1, output, remaining, value);
}
+/**
+ * Compares a Unicode codepoint against a MacRoman reverse-lookup table entry.
+ *
+ * @param key
+ * Pointer to the Unicode codepoint being searched for.
+ *
+ * @param elem
+ * Pointer to the MacRoman reverse-lookup table entry being compared.
+ *
+ * @return
+ * A negative value if the requested codepoint sorts before the table
+ * entry, a positive value if it sorts after the table entry, or zero if
+ * the codepoints are equal.
+ */
+static int guac_macroman_reverse_cmp(const void* key, const void* elem) {
+ int value = *(const int*) key;
+ const guac_macroman_reverse_t* entry = elem;
+ return (value > entry->codepoint) - (value < entry->codepoint);
+}
+
+void GUAC_WRITE_MACROMAN(char** output, int remaining, int value) {
+
+ /* MacRoman is a single-byte encoding: each character is one byte */
+
+ /* ASCII range is identical to Unicode; write directly */
+ if (value >= 0x00 && value < 0x80) {
+ *((unsigned char*) *output) = (unsigned char) value;
+ (*output)++;
+ return;
+ }
+
+ /* The reverse mapping is stored as a sparse array of representable Unicode
+ * codepoints sorted by codepoint, allowing bsearch() to find the MacRoman
+ * byte in O(log n) time. */
+ const guac_macroman_reverse_t* match = bsearch(
+ &value, /* key */
+ __GUAC_UNICODE_TO_MACROMAN, /* base */
+ ARRAY_SIZE(__GUAC_UNICODE_TO_MACROMAN), /* nmemb */
+ sizeof(*__GUAC_UNICODE_TO_MACROMAN), /* size */
+ guac_macroman_reverse_cmp /* compar */
+ );
+
+ /* Write the matched byte, or '?' if the codepoint is not representable. */
+ *((unsigned char*) *output) = (match != NULL) ? match->byte : '?';
+ (*output)++;
+
+}
+
+void GUAC_WRITE_MACROMAN_CRLF(char** output, int remaining, int value) {
+ guac_iconv_write_crlf(GUAC_WRITE_MACROMAN, output, remaining, value);
+}
diff --git a/src/common/tests/iconv/convert-test-data.c b/src/common/tests/iconv/convert-test-data.c
index 2032e9d4..d927ef3f 100644
--- a/src/common/tests/iconv/convert-test-data.c
+++ b/src/common/tests/iconv/convert-test-data.c
@@ -20,6 +20,15 @@
#include "common/iconv.h"
#include "convert-test-data.h"
+/*
+ * Test fixtures for the iconv conversion matrix.
+ *
+ * Each entry provides the same logical text encoded in one supported character
+ * set, with variants covering mixed, Unix, and CRLF line endings. The tests in
+ * convert.c iterate over every source/target encoding pair and verify that
+ * guac_iconv() preserves the text exactly or normalizes line endings as
+ * requested while converting between encodings.
+ */
encoding_test_parameters test_params[NUM_SUPPORTED_ENCODINGS] = {
/*
@@ -147,6 +156,37 @@ encoding_test_parameters test_params[NUM_SUPPORTED_ENCODINGS] = {
"pap\xE0 \xE8 bello\r\n"
"pap\xE0 \xE8 bello"
)
+ },
+
+ /*
+ * MacRoman
+ */
+
+ {
+ "MacRoman",
+ GUAC_READ_MACROMAN, GUAC_READ_MACROMAN_NORMALIZED,
+ GUAC_WRITE_MACROMAN, GUAC_WRITE_MACROMAN_CRLF,
+ .test_mixed = TEST_STRING(
+ "pap\x88 \x8F bello\n"
+ "pap\x88 \x8F bello\r\n"
+ "pap\x88 \x8F bello\n"
+ "pap\x88 \x8F bello\r\n"
+ "pap\x88 \x8F bello"
+ ),
+ .test_unix = TEST_STRING(
+ "pap\x88 \x8F bello\n"
+ "pap\x88 \x8F bello\n"
+ "pap\x88 \x8F bello\n"
+ "pap\x88 \x8F bello\n"
+ "pap\x88 \x8F bello"
+ ),
+ .test_windows = TEST_STRING(
+ "pap\x88 \x8F bello\r\n"
+ "pap\x88 \x8F bello\r\n"
+ "pap\x88 \x8F bello\r\n"
+ "pap\x88 \x8F bello\r\n"
+ "pap\x88 \x8F bello"
+ )
}
};
diff --git a/src/common/tests/iconv/convert-test-data.h b/src/common/tests/iconv/convert-test-data.h
index f682cfb3..67cf18ab 100644
--- a/src/common/tests/iconv/convert-test-data.h
+++ b/src/common/tests/iconv/convert-test-data.h
@@ -110,7 +110,7 @@ typedef struct encoding_test_parameters {
/**
* The total number of encodings supported by guac_iconv().
*/
-#define NUM_SUPPORTED_ENCODINGS 4
+#define NUM_SUPPORTED_ENCODINGS 5
/**
* Test parameters for each supported encoding. The test strings included each
diff --git a/src/common/tests/iconv/convert.c b/src/common/tests/iconv/convert.c
index d177ba01..7a940072 100644
--- a/src/common/tests/iconv/convert.c
+++ b/src/common/tests/iconv/convert.c
@@ -21,6 +21,7 @@
#include "convert-test-data.h"
#include <CUnit/CUnit.h>
+#include <guacamole/mem.h>
#include <stdio.h>
/**
@@ -46,15 +47,18 @@ static void verify_conversion(
guac_iconv_read* reader, test_string* in_string,
guac_iconv_write* writer, test_string* out_string) {
- char output[4096];
- char input[4096];
+ char* input = guac_mem_alloc(in_string->size);
+ char* output = guac_mem_alloc(out_string->size);
const char* current_input = input;
char* current_output = output;
+ CU_ASSERT_PTR_NOT_NULL_FATAL(input);
+ CU_ASSERT_PTR_NOT_NULL_FATAL(output);
+
memcpy(input, in_string->buffer, in_string->size);
- guac_iconv(reader, ¤t_input, sizeof(input),
- writer, ¤t_output, sizeof(output));
+ guac_iconv(reader, ¤t_input, in_string->size,
+ writer, ¤t_output, out_string->size);
/* Verify output length */
CU_ASSERT_EQUAL(out_string->size, current_output - output);
@@ -65,6 +69,9 @@ static void verify_conversion(
/* Verify output content */
CU_ASSERT_EQUAL(0, memcmp(output, out_string->buffer, out_string->size));
+ guac_mem_free(output);
+ guac_mem_free(input);
+
}
/**
@@ -127,3 +134,112 @@ void test_iconv__normalize_crlf() {
}
}
+/**
+ * Verifies that MacRoman encoding and decoding are symmetrical for every byte
+ * value.
+ */
+void test_iconv__macroman_encode_decode_symmetrical() {
+ /* Verify symmetry for each value in the MacRoman encoding range, i.e. the
+ * lookup value matches the reverse lookup value. */
+ for (int i = 0x00; i <= 0xFF; i++) {
+
+ /* Build a one-byte encoded input buffer, then view it through the
+ * reader's const char* interface. */
+ unsigned char input[] = { i };
+ const char* current_input = (const char*) input;
+
+ /* Decode one MacRoman byte to its Unicode codepoint. */
+ int codepoint = GUAC_READ_MACROMAN(¤t_input, sizeof(input));
+
+ char output[4];
+ char* current_output = output;
+
+ /* Re-encode that codepoint and verify the original byte is restored. */
+ GUAC_WRITE_MACROMAN(¤t_output, sizeof(output), codepoint);
+
+ CU_ASSERT_EQUAL(1, current_input - (const char*) input);
+ CU_ASSERT_EQUAL(1, current_output - output);
+ CU_ASSERT_EQUAL((unsigned char) i, (unsigned char) output[0]);
+
+ }
+}
+
+/**
+ * Verifies that codepoints outside the valid Unicode range written to
+ * CP1252 degrade to '?' instead of being truncated.
+ */
+void test_iconv__cp1252_invalid_codepoint() {
+ /* Exercise several clearly invalid Unicode codepoints. */
+ const int invalid_codepoints[] = { -1, 0x110000, 0x123456 };
+
+ for (int i = 0; i < (int)ARRAY_SIZE(invalid_codepoints); i++) {
+
+ char output[4];
+ char* current_output = output;
+ int actual_value;
+
+ /* Invalid codepoints must fall back to '?' rather than truncating. */
+ GUAC_WRITE_CP1252(¤t_output, sizeof(output), invalid_codepoints[i]);
+ actual_value = (unsigned char) output[0];
+
+ CU_ASSERT_EQUAL(1, current_output - output);
+ CU_ASSERT_EQUAL((unsigned char) '?', actual_value);
+
+ }
+}
+
+/**
+ * Verifies that GUAC_READ_MACROMAN_NORMALIZED normalizes both bare CR (\r)
+ * and CRLF (\r\n) sequences to Unix newlines (\n).
+ */
+void test_iconv__normalize_cr() {
+
+ /* Input contains a bare CR, a CRLF pair, and a plain LF */
+ const unsigned char input_buf[] = "line1\rline2\r\nline3\n";
+ const unsigned char expected[] = "line1\nline2\nline3\n";
+
+ char* input = guac_mem_alloc(sizeof(input_buf));
+ char* output = guac_mem_alloc(sizeof(expected));
+
+ CU_ASSERT_PTR_NOT_NULL_FATAL(input);
+ CU_ASSERT_PTR_NOT_NULL_FATAL(output);
+
+ memcpy(input, input_buf, sizeof(input_buf));
+
+ const char* current_input = input;
+ char* current_output = output;
+
+ guac_iconv(GUAC_READ_MACROMAN_NORMALIZED, ¤t_input, sizeof(input_buf),
+ GUAC_WRITE_UTF8, ¤t_output, sizeof(expected));
+
+ CU_ASSERT_EQUAL((int) sizeof(expected), current_output - output);
+ CU_ASSERT_EQUAL(0, memcmp(output, expected, sizeof(expected)));
+
+ guac_mem_free(output);
+ guac_mem_free(input);
+
+}
+
+/**
+ * Verifies that codepoints outside the valid Unicode range written to
+ * MacRoman degrade to '?' instead of being truncated.
+ */
+void test_iconv__macroman_invalid_codepoint() {
+ /* Exercise several clearly invalid Unicode codepoints. */
+ const int invalid_codepoints[] = { -1, 0x110000, 0x123456 };
+
+ for (int i = 0; i < (int) ARRAY_SIZE(invalid_codepoints); i++) {
+
+ char output[4];
+ char* current_output = output;
+ int actual_value;
+
+ /* Invalid codepoints must fall back to '?' rather than truncating. */
+ GUAC_WRITE_MACROMAN(¤t_output, sizeof(output), invalid_codepoints[i]);
+ actual_value = (unsigned char) output[0];
+
+ CU_ASSERT_EQUAL(1, current_output - output);
+ CU_ASSERT_EQUAL((unsigned char) '?', actual_value);
+
+ }
+}
diff --git a/src/protocols/vnc/clipboard.c b/src/protocols/vnc/clipboard.c
index 3d4a400d..4af5b458 100644
--- a/src/protocols/vnc/clipboard.c
+++ b/src/protocols/vnc/clipboard.c
@@ -26,6 +26,7 @@
#include "vnc.h"
#include <guacamole/client.h>
+#include <guacamole/mem.h>
#include <guacamole/stream.h>
#include <guacamole/user.h>
#include <rfb/rfbclient.h>
@@ -65,6 +66,13 @@ int guac_vnc_set_clipboard_encoding(guac_client* client,
return 1;
}
+ /* MacRoman */
+ if (strcmp(name, "MacRoman") == 0) {
+ vnc_client->clipboard_reader = GUAC_READ_MACROMAN;
+ vnc_client->clipboard_writer = GUAC_WRITE_MACROMAN;
+ return 1;
+ }
+
/* If encoding unrecognized, warn and default to ISO8859-1 */
guac_client_log(client, GUAC_LOG_WARNING,
"Encoding '%s' is invalid. Defaulting to ISO8859-1.", name);
@@ -102,6 +110,7 @@ int guac_vnc_clipboard_blob_handler(guac_user* user, guac_stream* stream,
int guac_vnc_clipboard_end_handler(guac_user* user, guac_stream* stream) {
guac_vnc_client* vnc_client = (guac_vnc_client*) user->client->data;
+ guac_client* client = user->client;
rfbClient* rfb_client = vnc_client->rfb_client;
/* Send via VNC only if finished connecting */
@@ -133,7 +142,13 @@ int guac_vnc_clipboard_end_handler(guac_user* user, guac_stream* stream) {
}
/* Fall back to classic clipboard with encoding conversion */
- char output_data[GUAC_COMMON_CLIPBOARD_MAX_LENGTH];
+ char* output_data = guac_mem_alloc(GUAC_COMMON_CLIPBOARD_MAX_LENGTH);
+ if (output_data == NULL) {
+ guac_client_log(client, GUAC_LOG_WARNING,
+ "Clipboard conversion failed: unable to allocate output "
+ "buffer.");
+ return 1;
+ }
const char* input = vnc_client->clipboard->buffer;
char* output = output_data;
@@ -141,10 +156,11 @@ int guac_vnc_clipboard_end_handler(guac_user* user, guac_stream* stream) {
/* Convert clipboard contents */
guac_iconv(GUAC_READ_UTF8, &input, vnc_client->clipboard->length,
- writer, &output, sizeof(output_data));
+ writer, &output, GUAC_COMMON_CLIPBOARD_MAX_LENGTH);
SendClientCutText(rfb_client, output_data, output - output_data);
+ guac_mem_free(output_data);
return 0;
}
@@ -157,7 +173,13 @@ void guac_vnc_cut_text(rfbClient* client, const char* text, int textlen) {
if (vnc_client->settings->disable_copy)
return;
- char received_data[GUAC_COMMON_CLIPBOARD_MAX_LENGTH];
+ char* received_data = guac_mem_alloc(GUAC_COMMON_CLIPBOARD_MAX_LENGTH);
+ if (received_data == NULL) {
+ guac_client_log(gc, GUAC_LOG_WARNING,
+ "Clipboard conversion failed: unable to allocate receive "
+ "buffer.");
+ return;
+ }
const char* input = text;
char* output = received_data;
@@ -165,13 +187,15 @@ void guac_vnc_cut_text(rfbClient* client, const char* text, int textlen) {
/* Convert clipboard contents */
guac_iconv(reader, &input, textlen,
- GUAC_WRITE_UTF8, &output, sizeof(received_data));
+ GUAC_WRITE_UTF8, &output, GUAC_COMMON_CLIPBOARD_MAX_LENGTH);
/* Send converted data */
guac_common_clipboard_reset(vnc_client->clipboard, "text/plain");
guac_common_clipboard_append(vnc_client->clipboard, received_data, output - received_data);
guac_common_clipboard_send(vnc_client->clipboard, gc);
+ guac_mem_free(received_data);
+
}
void guac_vnc_cut_text_utf8(rfbClient* client, const char* text, int textlen) {
@@ -183,7 +207,13 @@ void guac_vnc_cut_text_utf8(rfbClient* client, const char* text, int textlen) {
if (vnc_client->settings->disable_copy)
return;
- char received_data[GUAC_COMMON_CLIPBOARD_MAX_LENGTH];
+ char* received_data = guac_mem_alloc(GUAC_COMMON_CLIPBOARD_MAX_LENGTH);
+ if (received_data == NULL) {
+ guac_client_log(gc, GUAC_LOG_WARNING,
+ "Clipboard conversion failed: unable to allocate UTF-8 "
+ "receive buffer.");
+ return;
+ }
const char* input = text;
char* output = received_data;
@@ -192,11 +222,13 @@ void guac_vnc_cut_text_utf8(rfbClient* client, const char* text, int textlen) {
* GUAC_COMMON_CLIPBOARD_MAX_LENGTH and replaces invalid lead bytes
* with the Unicode replacement character (U+FFFD, ?) */
guac_iconv(GUAC_READ_UTF8, &input, textlen,
- GUAC_WRITE_UTF8, &output, sizeof(received_data));
+ GUAC_WRITE_UTF8, &output, GUAC_COMMON_CLIPBOARD_MAX_LENGTH);
/* Send converted data */
guac_common_clipboard_reset(vnc_client->clipboard, "text/plain");
guac_common_clipboard_append(vnc_client->clipboard, received_data, output - received_data);
guac_common_clipboard_send(vnc_client->clipboard, gc);
+ guac_mem_free(received_data);
+
}
diff --git a/src/protocols/vnc/clipboard.h b/src/protocols/vnc/clipboard.h
index 48e08ab5..35360524 100644
--- a/src/protocols/vnc/clipboard.h
+++ b/src/protocols/vnc/clipboard.h
@@ -34,7 +34,7 @@
*
* @param name
* The name of the encoding to use for all clipboard data. Valid values
- * are: "ISO8859-1", "UTF-8", "UTF-16", "CP1252", or NULL.
+ * are: "ISO8859-1", "UTF-8", "UTF-16", "CP1252", "MacRoman", or NULL.
*
* @return
* Zero if the chosen encoding is standard for VNC, or non-zero if the VNC
diff --git a/src/protocols/vnc/settings.c b/src/protocols/vnc/settings.c
index 11d3075f..f3baf2d3 100644
--- a/src/protocols/vnc/settings.c
+++ b/src/protocols/vnc/settings.c
@@ -175,9 +175,9 @@ enum VNC_ARGS_IDX {
/**
* The encoding to use for clipboard data sent to the VNC server if we are
- * going to be deviating from the standard (which mandates ISO 8829-1).
- * Valid values are "ISO8829-1" (the only legal value with respect to the
- * VNC standard), "UTF-8", "UTF-16", and "CP2252".
+ * going to be deviating from the standard (which mandates ISO 8859-1).
+ * Valid values are "ISO8859-1" (the only legal value with respect to the
+ * VNC standard), "UTF-8", "UTF-16", "CP1252", and "MacRoman".
*/
IDX_CLIPBOARD_ENCODING,
@@ -276,8 +276,8 @@ enum VNC_ARGS_IDX {
IDX_SFTP_PASSPHRASE,
/**
- * The base64-encode public key to use when authentication with the SSH
- * server for SFTP using key-based authentication.
+ * The base64-encoded public key to use when authentication with
+ * the SSH server for SFTP using key-based authentication.
*/
IDX_SFTP_PUBLIC_KEY,