Commit 84c180d88b for php.net

commit 84c180d88b291dda66a53073516f49f7ab046f82
Author: Alex Dowad <alexinbeijing@gmail.com>
Date:   Sat Sep 19 18:04:27 2020 +0200

    Add test suite for ISO-8859-x encoding verification and conversion

diff --git a/ext/mbstring/tests/data/8859-1.txt b/ext/mbstring/tests/data/8859-1.txt
new file mode 100644
index 0000000000..3a55afefc4
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-1.txt
@@ -0,0 +1,292 @@
+# 8859-1.TXT
+# Date: 2015-12-02 20:19:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO/IEC 8859-1:1998 to Unicode
+#	Unicode version:  3.0
+#	Table version:    2.0
+#	Table format:     Format A
+#	Date:             1999 July 27 (header updated: 2015 December 02)
+#	Authors:          Ken Whistler <ken@unicode.org>
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO/IEC 8859-1:1998 characters map into Unicode.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO/IEC 8859-1 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO/IEC 8859-1 order.
+#
+#	Version history
+#   1.0 version: updates 0.1 version by adding mappings for all
+#       control characters.
+#   2.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA1	0x00A1	#	INVERTED EXCLAMATION MARK
+0xA2	0x00A2	#	CENT SIGN
+0xA3	0x00A3	#	POUND SIGN
+0xA4	0x00A4	#	CURRENCY SIGN
+0xA5	0x00A5	#	YEN SIGN
+0xA6	0x00A6	#	BROKEN BAR
+0xA7	0x00A7	#	SECTION SIGN
+0xA8	0x00A8	#	DIAERESIS
+0xA9	0x00A9	#	COPYRIGHT SIGN
+0xAA	0x00AA	#	FEMININE ORDINAL INDICATOR
+0xAB	0x00AB	#	LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC	0x00AC	#	NOT SIGN
+0xAD	0x00AD	#	SOFT HYPHEN
+0xAE	0x00AE	#	REGISTERED SIGN
+0xAF	0x00AF	#	MACRON
+0xB0	0x00B0	#	DEGREE SIGN
+0xB1	0x00B1	#	PLUS-MINUS SIGN
+0xB2	0x00B2	#	SUPERSCRIPT TWO
+0xB3	0x00B3	#	SUPERSCRIPT THREE
+0xB4	0x00B4	#	ACUTE ACCENT
+0xB5	0x00B5	#	MICRO SIGN
+0xB6	0x00B6	#	PILCROW SIGN
+0xB7	0x00B7	#	MIDDLE DOT
+0xB8	0x00B8	#	CEDILLA
+0xB9	0x00B9	#	SUPERSCRIPT ONE
+0xBA	0x00BA	#	MASCULINE ORDINAL INDICATOR
+0xBB	0x00BB	#	RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC	0x00BC	#	VULGAR FRACTION ONE QUARTER
+0xBD	0x00BD	#	VULGAR FRACTION ONE HALF
+0xBE	0x00BE	#	VULGAR FRACTION THREE QUARTERS
+0xBF	0x00BF	#	INVERTED QUESTION MARK
+0xC0	0x00C0	#	LATIN CAPITAL LETTER A WITH GRAVE
+0xC1	0x00C1	#	LATIN CAPITAL LETTER A WITH ACUTE
+0xC2	0x00C2	#	LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3	0x00C3	#	LATIN CAPITAL LETTER A WITH TILDE
+0xC4	0x00C4	#	LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5	0x00C5	#	LATIN CAPITAL LETTER A WITH RING ABOVE
+0xC6	0x00C6	#	LATIN CAPITAL LETTER AE
+0xC7	0x00C7	#	LATIN CAPITAL LETTER C WITH CEDILLA
+0xC8	0x00C8	#	LATIN CAPITAL LETTER E WITH GRAVE
+0xC9	0x00C9	#	LATIN CAPITAL LETTER E WITH ACUTE
+0xCA	0x00CA	#	LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0xCB	0x00CB	#	LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC	0x00CC	#	LATIN CAPITAL LETTER I WITH GRAVE
+0xCD	0x00CD	#	LATIN CAPITAL LETTER I WITH ACUTE
+0xCE	0x00CE	#	LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF	0x00CF	#	LATIN CAPITAL LETTER I WITH DIAERESIS
+0xD0	0x00D0	#	LATIN CAPITAL LETTER ETH (Icelandic)
+0xD1	0x00D1	#	LATIN CAPITAL LETTER N WITH TILDE
+0xD2	0x00D2	#	LATIN CAPITAL LETTER O WITH GRAVE
+0xD3	0x00D3	#	LATIN CAPITAL LETTER O WITH ACUTE
+0xD4	0x00D4	#	LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5	0x00D5	#	LATIN CAPITAL LETTER O WITH TILDE
+0xD6	0x00D6	#	LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7	0x00D7	#	MULTIPLICATION SIGN
+0xD8	0x00D8	#	LATIN CAPITAL LETTER O WITH STROKE
+0xD9	0x00D9	#	LATIN CAPITAL LETTER U WITH GRAVE
+0xDA	0x00DA	#	LATIN CAPITAL LETTER U WITH ACUTE
+0xDB	0x00DB	#	LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xDC	0x00DC	#	LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD	0x00DD	#	LATIN CAPITAL LETTER Y WITH ACUTE
+0xDE	0x00DE	#	LATIN CAPITAL LETTER THORN (Icelandic)
+0xDF	0x00DF	#	LATIN SMALL LETTER SHARP S (German)
+0xE0	0x00E0	#	LATIN SMALL LETTER A WITH GRAVE
+0xE1	0x00E1	#	LATIN SMALL LETTER A WITH ACUTE
+0xE2	0x00E2	#	LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3	0x00E3	#	LATIN SMALL LETTER A WITH TILDE
+0xE4	0x00E4	#	LATIN SMALL LETTER A WITH DIAERESIS
+0xE5	0x00E5	#	LATIN SMALL LETTER A WITH RING ABOVE
+0xE6	0x00E6	#	LATIN SMALL LETTER AE
+0xE7	0x00E7	#	LATIN SMALL LETTER C WITH CEDILLA
+0xE8	0x00E8	#	LATIN SMALL LETTER E WITH GRAVE
+0xE9	0x00E9	#	LATIN SMALL LETTER E WITH ACUTE
+0xEA	0x00EA	#	LATIN SMALL LETTER E WITH CIRCUMFLEX
+0xEB	0x00EB	#	LATIN SMALL LETTER E WITH DIAERESIS
+0xEC	0x00EC	#	LATIN SMALL LETTER I WITH GRAVE
+0xED	0x00ED	#	LATIN SMALL LETTER I WITH ACUTE
+0xEE	0x00EE	#	LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF	0x00EF	#	LATIN SMALL LETTER I WITH DIAERESIS
+0xF0	0x00F0	#	LATIN SMALL LETTER ETH (Icelandic)
+0xF1	0x00F1	#	LATIN SMALL LETTER N WITH TILDE
+0xF2	0x00F2	#	LATIN SMALL LETTER O WITH GRAVE
+0xF3	0x00F3	#	LATIN SMALL LETTER O WITH ACUTE
+0xF4	0x00F4	#	LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5	0x00F5	#	LATIN SMALL LETTER O WITH TILDE
+0xF6	0x00F6	#	LATIN SMALL LETTER O WITH DIAERESIS
+0xF7	0x00F7	#	DIVISION SIGN
+0xF8	0x00F8	#	LATIN SMALL LETTER O WITH STROKE
+0xF9	0x00F9	#	LATIN SMALL LETTER U WITH GRAVE
+0xFA	0x00FA	#	LATIN SMALL LETTER U WITH ACUTE
+0xFB	0x00FB	#	LATIN SMALL LETTER U WITH CIRCUMFLEX
+0xFC	0x00FC	#	LATIN SMALL LETTER U WITH DIAERESIS
+0xFD	0x00FD	#	LATIN SMALL LETTER Y WITH ACUTE
+0xFE	0x00FE	#	LATIN SMALL LETTER THORN (Icelandic)
+0xFF	0x00FF	#	LATIN SMALL LETTER Y WITH DIAERESIS
diff --git a/ext/mbstring/tests/data/8859-10.txt b/ext/mbstring/tests/data/8859-10.txt
new file mode 100644
index 0000000000..0ec67d72ee
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-10.txt
@@ -0,0 +1,292 @@
+# 8859-10.TXT
+# Date: 2015-12-02 21:53:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO/IEC 8859-10:1998 to Unicode
+#	Unicode version:  3.0
+#	Table version:    2.0
+#	Table format:     Format A
+#	Date:             1999 October 11 (header updated: 2015 December 02)
+#	Authors:          Ken Whistler <ken@unicode.org>
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO/IEC 8859-10:1998 characters map into Unicode.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO/IEC 8859-10 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO/IEC 8859-10 order.
+#
+#	Version history
+#	1.0 version new.
+#   1.1 corrected mistake in mapping of 0xA4
+#   2.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA1	0x0104	#	LATIN CAPITAL LETTER A WITH OGONEK
+0xA2	0x0112	#	LATIN CAPITAL LETTER E WITH MACRON
+0xA3	0x0122	#	LATIN CAPITAL LETTER G WITH CEDILLA
+0xA4	0x012A	#	LATIN CAPITAL LETTER I WITH MACRON
+0xA5	0x0128	#	LATIN CAPITAL LETTER I WITH TILDE
+0xA6	0x0136	#	LATIN CAPITAL LETTER K WITH CEDILLA
+0xA7	0x00A7	#	SECTION SIGN
+0xA8	0x013B	#	LATIN CAPITAL LETTER L WITH CEDILLA
+0xA9	0x0110	#	LATIN CAPITAL LETTER D WITH STROKE
+0xAA	0x0160	#	LATIN CAPITAL LETTER S WITH CARON
+0xAB	0x0166	#	LATIN CAPITAL LETTER T WITH STROKE
+0xAC	0x017D	#	LATIN CAPITAL LETTER Z WITH CARON
+0xAD	0x00AD	#	SOFT HYPHEN
+0xAE	0x016A	#	LATIN CAPITAL LETTER U WITH MACRON
+0xAF	0x014A	#	LATIN CAPITAL LETTER ENG
+0xB0	0x00B0	#	DEGREE SIGN
+0xB1	0x0105	#	LATIN SMALL LETTER A WITH OGONEK
+0xB2	0x0113	#	LATIN SMALL LETTER E WITH MACRON
+0xB3	0x0123	#	LATIN SMALL LETTER G WITH CEDILLA
+0xB4	0x012B	#	LATIN SMALL LETTER I WITH MACRON
+0xB5	0x0129	#	LATIN SMALL LETTER I WITH TILDE
+0xB6	0x0137	#	LATIN SMALL LETTER K WITH CEDILLA
+0xB7	0x00B7	#	MIDDLE DOT
+0xB8	0x013C	#	LATIN SMALL LETTER L WITH CEDILLA
+0xB9	0x0111	#	LATIN SMALL LETTER D WITH STROKE
+0xBA	0x0161	#	LATIN SMALL LETTER S WITH CARON
+0xBB	0x0167	#	LATIN SMALL LETTER T WITH STROKE
+0xBC	0x017E	#	LATIN SMALL LETTER Z WITH CARON
+0xBD	0x2015	#	HORIZONTAL BAR
+0xBE	0x016B	#	LATIN SMALL LETTER U WITH MACRON
+0xBF	0x014B	#	LATIN SMALL LETTER ENG
+0xC0	0x0100	#	LATIN CAPITAL LETTER A WITH MACRON
+0xC1	0x00C1	#	LATIN CAPITAL LETTER A WITH ACUTE
+0xC2	0x00C2	#	LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3	0x00C3	#	LATIN CAPITAL LETTER A WITH TILDE
+0xC4	0x00C4	#	LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5	0x00C5	#	LATIN CAPITAL LETTER A WITH RING ABOVE
+0xC6	0x00C6	#	LATIN CAPITAL LETTER AE
+0xC7	0x012E	#	LATIN CAPITAL LETTER I WITH OGONEK
+0xC8	0x010C	#	LATIN CAPITAL LETTER C WITH CARON
+0xC9	0x00C9	#	LATIN CAPITAL LETTER E WITH ACUTE
+0xCA	0x0118	#	LATIN CAPITAL LETTER E WITH OGONEK
+0xCB	0x00CB	#	LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC	0x0116	#	LATIN CAPITAL LETTER E WITH DOT ABOVE
+0xCD	0x00CD	#	LATIN CAPITAL LETTER I WITH ACUTE
+0xCE	0x00CE	#	LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF	0x00CF	#	LATIN CAPITAL LETTER I WITH DIAERESIS
+0xD0	0x00D0	#	LATIN CAPITAL LETTER ETH (Icelandic)
+0xD1	0x0145	#	LATIN CAPITAL LETTER N WITH CEDILLA
+0xD2	0x014C	#	LATIN CAPITAL LETTER O WITH MACRON
+0xD3	0x00D3	#	LATIN CAPITAL LETTER O WITH ACUTE
+0xD4	0x00D4	#	LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5	0x00D5	#	LATIN CAPITAL LETTER O WITH TILDE
+0xD6	0x00D6	#	LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7	0x0168	#	LATIN CAPITAL LETTER U WITH TILDE
+0xD8	0x00D8	#	LATIN CAPITAL LETTER O WITH STROKE
+0xD9	0x0172	#	LATIN CAPITAL LETTER U WITH OGONEK
+0xDA	0x00DA	#	LATIN CAPITAL LETTER U WITH ACUTE
+0xDB	0x00DB	#	LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xDC	0x00DC	#	LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD	0x00DD	#	LATIN CAPITAL LETTER Y WITH ACUTE
+0xDE	0x00DE	#	LATIN CAPITAL LETTER THORN (Icelandic)
+0xDF	0x00DF	#	LATIN SMALL LETTER SHARP S (German)
+0xE0	0x0101	#	LATIN SMALL LETTER A WITH MACRON
+0xE1	0x00E1	#	LATIN SMALL LETTER A WITH ACUTE
+0xE2	0x00E2	#	LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3	0x00E3	#	LATIN SMALL LETTER A WITH TILDE
+0xE4	0x00E4	#	LATIN SMALL LETTER A WITH DIAERESIS
+0xE5	0x00E5	#	LATIN SMALL LETTER A WITH RING ABOVE
+0xE6	0x00E6	#	LATIN SMALL LETTER AE
+0xE7	0x012F	#	LATIN SMALL LETTER I WITH OGONEK
+0xE8	0x010D	#	LATIN SMALL LETTER C WITH CARON
+0xE9	0x00E9	#	LATIN SMALL LETTER E WITH ACUTE
+0xEA	0x0119	#	LATIN SMALL LETTER E WITH OGONEK
+0xEB	0x00EB	#	LATIN SMALL LETTER E WITH DIAERESIS
+0xEC	0x0117	#	LATIN SMALL LETTER E WITH DOT ABOVE
+0xED	0x00ED	#	LATIN SMALL LETTER I WITH ACUTE
+0xEE	0x00EE	#	LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF	0x00EF	#	LATIN SMALL LETTER I WITH DIAERESIS
+0xF0	0x00F0	#	LATIN SMALL LETTER ETH (Icelandic)
+0xF1	0x0146	#	LATIN SMALL LETTER N WITH CEDILLA
+0xF2	0x014D	#	LATIN SMALL LETTER O WITH MACRON
+0xF3	0x00F3	#	LATIN SMALL LETTER O WITH ACUTE
+0xF4	0x00F4	#	LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5	0x00F5	#	LATIN SMALL LETTER O WITH TILDE
+0xF6	0x00F6	#	LATIN SMALL LETTER O WITH DIAERESIS
+0xF7	0x0169	#	LATIN SMALL LETTER U WITH TILDE
+0xF8	0x00F8	#	LATIN SMALL LETTER O WITH STROKE
+0xF9	0x0173	#	LATIN SMALL LETTER U WITH OGONEK
+0xFA	0x00FA	#	LATIN SMALL LETTER U WITH ACUTE
+0xFB	0x00FB	#	LATIN SMALL LETTER U WITH CIRCUMFLEX
+0xFC	0x00FC	#	LATIN SMALL LETTER U WITH DIAERESIS
+0xFD	0x00FD	#	LATIN SMALL LETTER Y WITH ACUTE
+0xFE	0x00FE	#	LATIN SMALL LETTER THORN (Icelandic)
+0xFF	0x0138	#	LATIN SMALL LETTER KRA
diff --git a/ext/mbstring/tests/data/8859-11.txt b/ext/mbstring/tests/data/8859-11.txt
new file mode 100644
index 0000000000..5334e3c276
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-11.txt
@@ -0,0 +1,286 @@
+# 8859-11.TXT
+# Date: 2015-12-02 21:55:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO/IEC 8859-11:2001 to Unicode
+#	Unicode version:  3.2
+#	Table version:    2.0
+#	Table format:     Format A
+#	Date:             2002 October 7 (header updated: 2015 December 02)
+#	Authors:          Ken Whistler <ken@unicode.org>
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO/IEC 8859-11:2001 characters map into Unicode.
+#
+#	ISO/IEC 8859-11:2001 is equivalent to TIS 620-2533 (1990) with
+#	the addition of 0xA0 NO-BREAK SPACE.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO/IEC 8859-11 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO/IEC 8859-11 order.
+#
+#	Version history:
+#		2002 October 7  Created
+#   2.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA1	0x0E01	#	THAI CHARACTER KO KAI
+0xA2	0x0E02	#	THAI CHARACTER KHO KHAI
+0xA3	0x0E03	#	THAI CHARACTER KHO KHUAT
+0xA4	0x0E04	#	THAI CHARACTER KHO KHWAI
+0xA5	0x0E05	#	THAI CHARACTER KHO KHON
+0xA6	0x0E06	#	THAI CHARACTER KHO RAKHANG
+0xA7	0x0E07	#	THAI CHARACTER NGO NGU
+0xA8	0x0E08	#	THAI CHARACTER CHO CHAN
+0xA9	0x0E09	#	THAI CHARACTER CHO CHING
+0xAA	0x0E0A	#	THAI CHARACTER CHO CHANG
+0xAB	0x0E0B	#	THAI CHARACTER SO SO
+0xAC	0x0E0C	#	THAI CHARACTER CHO CHOE
+0xAD	0x0E0D	#	THAI CHARACTER YO YING
+0xAE	0x0E0E	#	THAI CHARACTER DO CHADA
+0xAF	0x0E0F	#	THAI CHARACTER TO PATAK
+0xB0	0x0E10	#	THAI CHARACTER THO THAN
+0xB1	0x0E11	#	THAI CHARACTER THO NANGMONTHO
+0xB2	0x0E12	#	THAI CHARACTER THO PHUTHAO
+0xB3	0x0E13	#	THAI CHARACTER NO NEN
+0xB4	0x0E14	#	THAI CHARACTER DO DEK
+0xB5	0x0E15	#	THAI CHARACTER TO TAO
+0xB6	0x0E16	#	THAI CHARACTER THO THUNG
+0xB7	0x0E17	#	THAI CHARACTER THO THAHAN
+0xB8	0x0E18	#	THAI CHARACTER THO THONG
+0xB9	0x0E19	#	THAI CHARACTER NO NU
+0xBA	0x0E1A	#	THAI CHARACTER BO BAIMAI
+0xBB	0x0E1B	#	THAI CHARACTER PO PLA
+0xBC	0x0E1C	#	THAI CHARACTER PHO PHUNG
+0xBD	0x0E1D	#	THAI CHARACTER FO FA
+0xBE	0x0E1E	#	THAI CHARACTER PHO PHAN
+0xBF	0x0E1F	#	THAI CHARACTER FO FAN
+0xC0	0x0E20	#	THAI CHARACTER PHO SAMPHAO
+0xC1	0x0E21	#	THAI CHARACTER MO MA
+0xC2	0x0E22	#	THAI CHARACTER YO YAK
+0xC3	0x0E23	#	THAI CHARACTER RO RUA
+0xC4	0x0E24	#	THAI CHARACTER RU
+0xC5	0x0E25	#	THAI CHARACTER LO LING
+0xC6	0x0E26	#	THAI CHARACTER LU
+0xC7	0x0E27	#	THAI CHARACTER WO WAEN
+0xC8	0x0E28	#	THAI CHARACTER SO SALA
+0xC9	0x0E29	#	THAI CHARACTER SO RUSI
+0xCA	0x0E2A	#	THAI CHARACTER SO SUA
+0xCB	0x0E2B	#	THAI CHARACTER HO HIP
+0xCC	0x0E2C	#	THAI CHARACTER LO CHULA
+0xCD	0x0E2D	#	THAI CHARACTER O ANG
+0xCE	0x0E2E	#	THAI CHARACTER HO NOKHUK
+0xCF	0x0E2F	#	THAI CHARACTER PAIYANNOI
+0xD0	0x0E30	#	THAI CHARACTER SARA A
+0xD1	0x0E31	#	THAI CHARACTER MAI HAN-AKAT
+0xD2	0x0E32	#	THAI CHARACTER SARA AA
+0xD3	0x0E33	#	THAI CHARACTER SARA AM
+0xD4	0x0E34	#	THAI CHARACTER SARA I
+0xD5	0x0E35	#	THAI CHARACTER SARA II
+0xD6	0x0E36	#	THAI CHARACTER SARA UE
+0xD7	0x0E37	#	THAI CHARACTER SARA UEE
+0xD8	0x0E38	#	THAI CHARACTER SARA U
+0xD9	0x0E39	#	THAI CHARACTER SARA UU
+0xDA	0x0E3A	#	THAI CHARACTER PHINTHU
+0xDF	0x0E3F	#	THAI CURRENCY SYMBOL BAHT
+0xE0	0x0E40	#	THAI CHARACTER SARA E
+0xE1	0x0E41	#	THAI CHARACTER SARA AE
+0xE2	0x0E42	#	THAI CHARACTER SARA O
+0xE3	0x0E43	#	THAI CHARACTER SARA AI MAIMUAN
+0xE4	0x0E44	#	THAI CHARACTER SARA AI MAIMALAI
+0xE5	0x0E45	#	THAI CHARACTER LAKKHANGYAO
+0xE6	0x0E46	#	THAI CHARACTER MAIYAMOK
+0xE7	0x0E47	#	THAI CHARACTER MAITAIKHU
+0xE8	0x0E48	#	THAI CHARACTER MAI EK
+0xE9	0x0E49	#	THAI CHARACTER MAI THO
+0xEA	0x0E4A	#	THAI CHARACTER MAI TRI
+0xEB	0x0E4B	#	THAI CHARACTER MAI CHATTAWA
+0xEC	0x0E4C	#	THAI CHARACTER THANTHAKHAT
+0xED	0x0E4D	#	THAI CHARACTER NIKHAHIT
+0xEE	0x0E4E	#	THAI CHARACTER YAMAKKAN
+0xEF	0x0E4F	#	THAI CHARACTER FONGMAN
+0xF0	0x0E50	#	THAI DIGIT ZERO
+0xF1	0x0E51	#	THAI DIGIT ONE
+0xF2	0x0E52	#	THAI DIGIT TWO
+0xF3	0x0E53	#	THAI DIGIT THREE
+0xF4	0x0E54	#	THAI DIGIT FOUR
+0xF5	0x0E55	#	THAI DIGIT FIVE
+0xF6	0x0E56	#	THAI DIGIT SIX
+0xF7	0x0E57	#	THAI DIGIT SEVEN
+0xF8	0x0E58	#	THAI DIGIT EIGHT
+0xF9	0x0E59	#	THAI DIGIT NINE
+0xFA	0x0E5A	#	THAI CHARACTER ANGKHANKHU
+0xFB	0x0E5B	#	THAI CHARACTER KHOMUT
diff --git a/ext/mbstring/tests/data/8859-13.txt b/ext/mbstring/tests/data/8859-13.txt
new file mode 100644
index 0000000000..b613dc00e1
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-13.txt
@@ -0,0 +1,291 @@
+# 8859-13.TXT
+# Date: 2015-12-02 22:03:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO/IEC 8859-13:1998  to Unicode
+#	Unicode version:  3.0
+#	Table version:    2.0
+#	Table format:     Format A
+#	Date:             1999 July 27 (header updated: 2015 December 02)
+#	Authors:          Ken Whistler <ken@unicode.org>
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO/IEC 8859-13:1998 characters map into Unicode.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO/IEC 8859-13 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO/IEC 8859-13 order.
+#
+#	Version history
+#   1.0 version: created
+#   2.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA1	0x201D	#	RIGHT DOUBLE QUOTATION MARK
+0xA2	0x00A2	#	CENT SIGN
+0xA3	0x00A3	#	POUND SIGN
+0xA4	0x00A4	#	CURRENCY SIGN
+0xA5	0x201E	#	DOUBLE LOW-9 QUOTATION MARK
+0xA6	0x00A6	#	BROKEN BAR
+0xA7	0x00A7	#	SECTION SIGN
+0xA8	0x00D8	#	LATIN CAPITAL LETTER O WITH STROKE
+0xA9	0x00A9	#	COPYRIGHT SIGN
+0xAA	0x0156	#	LATIN CAPITAL LETTER R WITH CEDILLA
+0xAB	0x00AB	#	LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC	0x00AC	#	NOT SIGN
+0xAD	0x00AD	#	SOFT HYPHEN
+0xAE	0x00AE	#	REGISTERED SIGN
+0xAF	0x00C6	#	LATIN CAPITAL LETTER AE
+0xB0	0x00B0	#	DEGREE SIGN
+0xB1	0x00B1	#	PLUS-MINUS SIGN
+0xB2	0x00B2	#	SUPERSCRIPT TWO
+0xB3	0x00B3	#	SUPERSCRIPT THREE
+0xB4	0x201C	#	LEFT DOUBLE QUOTATION MARK
+0xB5	0x00B5	#	MICRO SIGN
+0xB6	0x00B6	#	PILCROW SIGN
+0xB7	0x00B7	#	MIDDLE DOT
+0xB8	0x00F8	#	LATIN SMALL LETTER O WITH STROKE
+0xB9	0x00B9	#	SUPERSCRIPT ONE
+0xBA	0x0157	#	LATIN SMALL LETTER R WITH CEDILLA
+0xBB	0x00BB	#	RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC	0x00BC	#	VULGAR FRACTION ONE QUARTER
+0xBD	0x00BD	#	VULGAR FRACTION ONE HALF
+0xBE	0x00BE	#	VULGAR FRACTION THREE QUARTERS
+0xBF	0x00E6	#	LATIN SMALL LETTER AE
+0xC0	0x0104	#	LATIN CAPITAL LETTER A WITH OGONEK
+0xC1	0x012E	#	LATIN CAPITAL LETTER I WITH OGONEK
+0xC2	0x0100	#	LATIN CAPITAL LETTER A WITH MACRON
+0xC3	0x0106	#	LATIN CAPITAL LETTER C WITH ACUTE
+0xC4	0x00C4	#	LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5	0x00C5	#	LATIN CAPITAL LETTER A WITH RING ABOVE
+0xC6	0x0118	#	LATIN CAPITAL LETTER E WITH OGONEK
+0xC7	0x0112	#	LATIN CAPITAL LETTER E WITH MACRON
+0xC8	0x010C	#	LATIN CAPITAL LETTER C WITH CARON
+0xC9	0x00C9	#	LATIN CAPITAL LETTER E WITH ACUTE
+0xCA	0x0179	#	LATIN CAPITAL LETTER Z WITH ACUTE
+0xCB	0x0116	#	LATIN CAPITAL LETTER E WITH DOT ABOVE
+0xCC	0x0122	#	LATIN CAPITAL LETTER G WITH CEDILLA
+0xCD	0x0136	#	LATIN CAPITAL LETTER K WITH CEDILLA
+0xCE	0x012A	#	LATIN CAPITAL LETTER I WITH MACRON
+0xCF	0x013B	#	LATIN CAPITAL LETTER L WITH CEDILLA
+0xD0	0x0160	#	LATIN CAPITAL LETTER S WITH CARON
+0xD1	0x0143	#	LATIN CAPITAL LETTER N WITH ACUTE
+0xD2	0x0145	#	LATIN CAPITAL LETTER N WITH CEDILLA
+0xD3	0x00D3	#	LATIN CAPITAL LETTER O WITH ACUTE
+0xD4	0x014C	#	LATIN CAPITAL LETTER O WITH MACRON
+0xD5	0x00D5	#	LATIN CAPITAL LETTER O WITH TILDE
+0xD6	0x00D6	#	LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7	0x00D7	#	MULTIPLICATION SIGN
+0xD8	0x0172	#	LATIN CAPITAL LETTER U WITH OGONEK
+0xD9	0x0141	#	LATIN CAPITAL LETTER L WITH STROKE
+0xDA	0x015A	#	LATIN CAPITAL LETTER S WITH ACUTE
+0xDB	0x016A	#	LATIN CAPITAL LETTER U WITH MACRON
+0xDC	0x00DC	#	LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD	0x017B	#	LATIN CAPITAL LETTER Z WITH DOT ABOVE
+0xDE	0x017D	#	LATIN CAPITAL LETTER Z WITH CARON
+0xDF	0x00DF	#	LATIN SMALL LETTER SHARP S (German)
+0xE0	0x0105	#	LATIN SMALL LETTER A WITH OGONEK
+0xE1	0x012F	#	LATIN SMALL LETTER I WITH OGONEK
+0xE2	0x0101	#	LATIN SMALL LETTER A WITH MACRON
+0xE3	0x0107	#	LATIN SMALL LETTER C WITH ACUTE
+0xE4	0x00E4	#	LATIN SMALL LETTER A WITH DIAERESIS
+0xE5	0x00E5	#	LATIN SMALL LETTER A WITH RING ABOVE
+0xE6	0x0119	#	LATIN SMALL LETTER E WITH OGONEK
+0xE7	0x0113	#	LATIN SMALL LETTER E WITH MACRON
+0xE8	0x010D	#	LATIN SMALL LETTER C WITH CARON
+0xE9	0x00E9	#	LATIN SMALL LETTER E WITH ACUTE
+0xEA	0x017A	#	LATIN SMALL LETTER Z WITH ACUTE
+0xEB	0x0117	#	LATIN SMALL LETTER E WITH DOT ABOVE
+0xEC	0x0123	#	LATIN SMALL LETTER G WITH CEDILLA
+0xED	0x0137	#	LATIN SMALL LETTER K WITH CEDILLA
+0xEE	0x012B	#	LATIN SMALL LETTER I WITH MACRON
+0xEF	0x013C	#	LATIN SMALL LETTER L WITH CEDILLA
+0xF0	0x0161	#	LATIN SMALL LETTER S WITH CARON
+0xF1	0x0144	#	LATIN SMALL LETTER N WITH ACUTE
+0xF2	0x0146	#	LATIN SMALL LETTER N WITH CEDILLA
+0xF3	0x00F3	#	LATIN SMALL LETTER O WITH ACUTE
+0xF4	0x014D	#	LATIN SMALL LETTER O WITH MACRON
+0xF5	0x00F5	#	LATIN SMALL LETTER O WITH TILDE
+0xF6	0x00F6	#	LATIN SMALL LETTER O WITH DIAERESIS
+0xF7	0x00F7	#	DIVISION SIGN
+0xF8	0x0173	#	LATIN SMALL LETTER U WITH OGONEK
+0xF9	0x0142	#	LATIN SMALL LETTER L WITH STROKE
+0xFA	0x015B	#	LATIN SMALL LETTER S WITH ACUTE
+0xFB	0x016B	#	LATIN SMALL LETTER U WITH MACRON
+0xFC	0x00FC	#	LATIN SMALL LETTER U WITH DIAERESIS
+0xFD	0x017C	#	LATIN SMALL LETTER Z WITH DOT ABOVE
+0xFE	0x017E	#	LATIN SMALL LETTER Z WITH CARON
+0xFF	0x2019	#	RIGHT SINGLE QUOTATION MARK
diff --git a/ext/mbstring/tests/data/8859-14.txt b/ext/mbstring/tests/data/8859-14.txt
new file mode 100644
index 0000000000..3c3a02466b
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-14.txt
@@ -0,0 +1,292 @@
+# 8859-14.TXT
+# Date: 2015-12-02 22:05:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO/IEC 8859-14:1998 to Unicode
+#	Unicode version:  3.0
+#	Table version:    2.0
+#	Table format:     Format A
+#	Date:             1999 July 27 (header updated: 2015 December 02)
+#	Authors:          Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>
+#			  Ken Whistler <ken@unicode.org>
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO/IEC 8859-14:1998 characters map into Unicode.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO/IEC 8859-14 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO/IEC 8859-14 order.
+#
+#	Version history
+#   1.0 version: created
+#   2.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA1	0x1E02	#	LATIN CAPITAL LETTER B WITH DOT ABOVE
+0xA2	0x1E03	#	LATIN SMALL LETTER B WITH DOT ABOVE
+0xA3	0x00A3	#	POUND SIGN
+0xA4	0x010A	#	LATIN CAPITAL LETTER C WITH DOT ABOVE
+0xA5	0x010B	#	LATIN SMALL LETTER C WITH DOT ABOVE
+0xA6	0x1E0A	#	LATIN CAPITAL LETTER D WITH DOT ABOVE
+0xA7	0x00A7	#	SECTION SIGN
+0xA8	0x1E80	#	LATIN CAPITAL LETTER W WITH GRAVE
+0xA9	0x00A9	#	COPYRIGHT SIGN
+0xAA	0x1E82	#	LATIN CAPITAL LETTER W WITH ACUTE
+0xAB	0x1E0B	#	LATIN SMALL LETTER D WITH DOT ABOVE
+0xAC	0x1EF2	#	LATIN CAPITAL LETTER Y WITH GRAVE
+0xAD	0x00AD	#	SOFT HYPHEN
+0xAE	0x00AE	#	REGISTERED SIGN
+0xAF	0x0178	#	LATIN CAPITAL LETTER Y WITH DIAERESIS
+0xB0	0x1E1E	#	LATIN CAPITAL LETTER F WITH DOT ABOVE
+0xB1	0x1E1F	#	LATIN SMALL LETTER F WITH DOT ABOVE
+0xB2	0x0120	#	LATIN CAPITAL LETTER G WITH DOT ABOVE
+0xB3	0x0121	#	LATIN SMALL LETTER G WITH DOT ABOVE
+0xB4	0x1E40	#	LATIN CAPITAL LETTER M WITH DOT ABOVE
+0xB5	0x1E41	#	LATIN SMALL LETTER M WITH DOT ABOVE
+0xB6	0x00B6	#	PILCROW SIGN
+0xB7	0x1E56	#	LATIN CAPITAL LETTER P WITH DOT ABOVE
+0xB8	0x1E81	#	LATIN SMALL LETTER W WITH GRAVE
+0xB9	0x1E57	#	LATIN SMALL LETTER P WITH DOT ABOVE
+0xBA	0x1E83	#	LATIN SMALL LETTER W WITH ACUTE
+0xBB	0x1E60	#	LATIN CAPITAL LETTER S WITH DOT ABOVE
+0xBC	0x1EF3	#	LATIN SMALL LETTER Y WITH GRAVE
+0xBD	0x1E84	#	LATIN CAPITAL LETTER W WITH DIAERESIS
+0xBE	0x1E85	#	LATIN SMALL LETTER W WITH DIAERESIS
+0xBF	0x1E61	#	LATIN SMALL LETTER S WITH DOT ABOVE
+0xC0	0x00C0	#	LATIN CAPITAL LETTER A WITH GRAVE
+0xC1	0x00C1	#	LATIN CAPITAL LETTER A WITH ACUTE
+0xC2	0x00C2	#	LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3	0x00C3	#	LATIN CAPITAL LETTER A WITH TILDE
+0xC4	0x00C4	#	LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5	0x00C5	#	LATIN CAPITAL LETTER A WITH RING ABOVE
+0xC6	0x00C6	#	LATIN CAPITAL LETTER AE
+0xC7	0x00C7	#	LATIN CAPITAL LETTER C WITH CEDILLA
+0xC8	0x00C8	#	LATIN CAPITAL LETTER E WITH GRAVE
+0xC9	0x00C9	#	LATIN CAPITAL LETTER E WITH ACUTE
+0xCA	0x00CA	#	LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0xCB	0x00CB	#	LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC	0x00CC	#	LATIN CAPITAL LETTER I WITH GRAVE
+0xCD	0x00CD	#	LATIN CAPITAL LETTER I WITH ACUTE
+0xCE	0x00CE	#	LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF	0x00CF	#	LATIN CAPITAL LETTER I WITH DIAERESIS
+0xD0	0x0174	#	LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0xD1	0x00D1	#	LATIN CAPITAL LETTER N WITH TILDE
+0xD2	0x00D2	#	LATIN CAPITAL LETTER O WITH GRAVE
+0xD3	0x00D3	#	LATIN CAPITAL LETTER O WITH ACUTE
+0xD4	0x00D4	#	LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5	0x00D5	#	LATIN CAPITAL LETTER O WITH TILDE
+0xD6	0x00D6	#	LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7	0x1E6A	#	LATIN CAPITAL LETTER T WITH DOT ABOVE
+0xD8	0x00D8	#	LATIN CAPITAL LETTER O WITH STROKE
+0xD9	0x00D9	#	LATIN CAPITAL LETTER U WITH GRAVE
+0xDA	0x00DA	#	LATIN CAPITAL LETTER U WITH ACUTE
+0xDB	0x00DB	#	LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xDC	0x00DC	#	LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD	0x00DD	#	LATIN CAPITAL LETTER Y WITH ACUTE
+0xDE	0x0176	#	LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0xDF	0x00DF	#	LATIN SMALL LETTER SHARP S
+0xE0	0x00E0	#	LATIN SMALL LETTER A WITH GRAVE
+0xE1	0x00E1	#	LATIN SMALL LETTER A WITH ACUTE
+0xE2	0x00E2	#	LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3	0x00E3	#	LATIN SMALL LETTER A WITH TILDE
+0xE4	0x00E4	#	LATIN SMALL LETTER A WITH DIAERESIS
+0xE5	0x00E5	#	LATIN SMALL LETTER A WITH RING ABOVE
+0xE6	0x00E6	#	LATIN SMALL LETTER AE
+0xE7	0x00E7	#	LATIN SMALL LETTER C WITH CEDILLA
+0xE8	0x00E8	#	LATIN SMALL LETTER E WITH GRAVE
+0xE9	0x00E9	#	LATIN SMALL LETTER E WITH ACUTE
+0xEA	0x00EA	#	LATIN SMALL LETTER E WITH CIRCUMFLEX
+0xEB	0x00EB	#	LATIN SMALL LETTER E WITH DIAERESIS
+0xEC	0x00EC	#	LATIN SMALL LETTER I WITH GRAVE
+0xED	0x00ED	#	LATIN SMALL LETTER I WITH ACUTE
+0xEE	0x00EE	#	LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF	0x00EF	#	LATIN SMALL LETTER I WITH DIAERESIS
+0xF0	0x0175	#	LATIN SMALL LETTER W WITH CIRCUMFLEX
+0xF1	0x00F1	#	LATIN SMALL LETTER N WITH TILDE
+0xF2	0x00F2	#	LATIN SMALL LETTER O WITH GRAVE
+0xF3	0x00F3	#	LATIN SMALL LETTER O WITH ACUTE
+0xF4	0x00F4	#	LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5	0x00F5	#	LATIN SMALL LETTER O WITH TILDE
+0xF6	0x00F6	#	LATIN SMALL LETTER O WITH DIAERESIS
+0xF7	0x1E6B	#	LATIN SMALL LETTER T WITH DOT ABOVE
+0xF8	0x00F8	#	LATIN SMALL LETTER O WITH STROKE
+0xF9	0x00F9	#	LATIN SMALL LETTER U WITH GRAVE
+0xFA	0x00FA	#	LATIN SMALL LETTER U WITH ACUTE
+0xFB	0x00FB	#	LATIN SMALL LETTER U WITH CIRCUMFLEX
+0xFC	0x00FC	#	LATIN SMALL LETTER U WITH DIAERESIS
+0xFD	0x00FD	#	LATIN SMALL LETTER Y WITH ACUTE
+0xFE	0x0177	#	LATIN SMALL LETTER Y WITH CIRCUMFLEX
+0xFF	0x00FF	#	LATIN SMALL LETTER Y WITH DIAERESIS
diff --git a/ext/mbstring/tests/data/8859-15.txt b/ext/mbstring/tests/data/8859-15.txt
new file mode 100644
index 0000000000..f21a763600
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-15.txt
@@ -0,0 +1,294 @@
+# 8859-15.TXT
+# Date: 2015-12-02 22:06:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO/IEC 8859-15:1999 to Unicode
+#	Unicode version:  3.0
+#	Table version:    2.0
+#	Table format:     Format A
+#	Date:             1999 July 27 (header updated: 2015 December 02)
+#	Authors:          Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>
+#			  Ken Whistler <ken@unicode.org>
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO/IEC 8859-15:1999 characters map into Unicode.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO/IEC 8859-15 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO/IEC 8859-15 order.
+#
+#	Version history
+#
+#	Version history
+#   1.0 version: created
+#   2.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA1	0x00A1	#	INVERTED EXCLAMATION MARK
+0xA2	0x00A2	#	CENT SIGN
+0xA3	0x00A3	#	POUND SIGN
+0xA4	0x20AC	#	EURO SIGN
+0xA5	0x00A5	#	YEN SIGN
+0xA6	0x0160	#	LATIN CAPITAL LETTER S WITH CARON
+0xA7	0x00A7	#	SECTION SIGN
+0xA8	0x0161	#	LATIN SMALL LETTER S WITH CARON
+0xA9	0x00A9	#	COPYRIGHT SIGN
+0xAA	0x00AA	#	FEMININE ORDINAL INDICATOR
+0xAB	0x00AB	#	LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC	0x00AC	#	NOT SIGN
+0xAD	0x00AD	#	SOFT HYPHEN
+0xAE	0x00AE	#	REGISTERED SIGN
+0xAF	0x00AF	#	MACRON
+0xB0	0x00B0	#	DEGREE SIGN
+0xB1	0x00B1	#	PLUS-MINUS SIGN
+0xB2	0x00B2	#	SUPERSCRIPT TWO
+0xB3	0x00B3	#	SUPERSCRIPT THREE
+0xB4	0x017D	#	LATIN CAPITAL LETTER Z WITH CARON
+0xB5	0x00B5	#	MICRO SIGN
+0xB6	0x00B6	#	PILCROW SIGN
+0xB7	0x00B7	#	MIDDLE DOT
+0xB8	0x017E	#	LATIN SMALL LETTER Z WITH CARON
+0xB9	0x00B9	#	SUPERSCRIPT ONE
+0xBA	0x00BA	#	MASCULINE ORDINAL INDICATOR
+0xBB	0x00BB	#	RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC	0x0152	#	LATIN CAPITAL LIGATURE OE
+0xBD	0x0153	#	LATIN SMALL LIGATURE OE
+0xBE	0x0178	#	LATIN CAPITAL LETTER Y WITH DIAERESIS
+0xBF	0x00BF	#	INVERTED QUESTION MARK
+0xC0	0x00C0	#	LATIN CAPITAL LETTER A WITH GRAVE
+0xC1	0x00C1	#	LATIN CAPITAL LETTER A WITH ACUTE
+0xC2	0x00C2	#	LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3	0x00C3	#	LATIN CAPITAL LETTER A WITH TILDE
+0xC4	0x00C4	#	LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5	0x00C5	#	LATIN CAPITAL LETTER A WITH RING ABOVE
+0xC6	0x00C6	#	LATIN CAPITAL LETTER AE
+0xC7	0x00C7	#	LATIN CAPITAL LETTER C WITH CEDILLA
+0xC8	0x00C8	#	LATIN CAPITAL LETTER E WITH GRAVE
+0xC9	0x00C9	#	LATIN CAPITAL LETTER E WITH ACUTE
+0xCA	0x00CA	#	LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0xCB	0x00CB	#	LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC	0x00CC	#	LATIN CAPITAL LETTER I WITH GRAVE
+0xCD	0x00CD	#	LATIN CAPITAL LETTER I WITH ACUTE
+0xCE	0x00CE	#	LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF	0x00CF	#	LATIN CAPITAL LETTER I WITH DIAERESIS
+0xD0	0x00D0	#	LATIN CAPITAL LETTER ETH
+0xD1	0x00D1	#	LATIN CAPITAL LETTER N WITH TILDE
+0xD2	0x00D2	#	LATIN CAPITAL LETTER O WITH GRAVE
+0xD3	0x00D3	#	LATIN CAPITAL LETTER O WITH ACUTE
+0xD4	0x00D4	#	LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5	0x00D5	#	LATIN CAPITAL LETTER O WITH TILDE
+0xD6	0x00D6	#	LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7	0x00D7	#	MULTIPLICATION SIGN
+0xD8	0x00D8	#	LATIN CAPITAL LETTER O WITH STROKE
+0xD9	0x00D9	#	LATIN CAPITAL LETTER U WITH GRAVE
+0xDA	0x00DA	#	LATIN CAPITAL LETTER U WITH ACUTE
+0xDB	0x00DB	#	LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xDC	0x00DC	#	LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD	0x00DD	#	LATIN CAPITAL LETTER Y WITH ACUTE
+0xDE	0x00DE	#	LATIN CAPITAL LETTER THORN
+0xDF	0x00DF	#	LATIN SMALL LETTER SHARP S
+0xE0	0x00E0	#	LATIN SMALL LETTER A WITH GRAVE
+0xE1	0x00E1	#	LATIN SMALL LETTER A WITH ACUTE
+0xE2	0x00E2	#	LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3	0x00E3	#	LATIN SMALL LETTER A WITH TILDE
+0xE4	0x00E4	#	LATIN SMALL LETTER A WITH DIAERESIS
+0xE5	0x00E5	#	LATIN SMALL LETTER A WITH RING ABOVE
+0xE6	0x00E6	#	LATIN SMALL LETTER AE
+0xE7	0x00E7	#	LATIN SMALL LETTER C WITH CEDILLA
+0xE8	0x00E8	#	LATIN SMALL LETTER E WITH GRAVE
+0xE9	0x00E9	#	LATIN SMALL LETTER E WITH ACUTE
+0xEA	0x00EA	#	LATIN SMALL LETTER E WITH CIRCUMFLEX
+0xEB	0x00EB	#	LATIN SMALL LETTER E WITH DIAERESIS
+0xEC	0x00EC	#	LATIN SMALL LETTER I WITH GRAVE
+0xED	0x00ED	#	LATIN SMALL LETTER I WITH ACUTE
+0xEE	0x00EE	#	LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF	0x00EF	#	LATIN SMALL LETTER I WITH DIAERESIS
+0xF0	0x00F0	#	LATIN SMALL LETTER ETH
+0xF1	0x00F1	#	LATIN SMALL LETTER N WITH TILDE
+0xF2	0x00F2	#	LATIN SMALL LETTER O WITH GRAVE
+0xF3	0x00F3	#	LATIN SMALL LETTER O WITH ACUTE
+0xF4	0x00F4	#	LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5	0x00F5	#	LATIN SMALL LETTER O WITH TILDE
+0xF6	0x00F6	#	LATIN SMALL LETTER O WITH DIAERESIS
+0xF7	0x00F7	#	DIVISION SIGN
+0xF8	0x00F8	#	LATIN SMALL LETTER O WITH STROKE
+0xF9	0x00F9	#	LATIN SMALL LETTER U WITH GRAVE
+0xFA	0x00FA	#	LATIN SMALL LETTER U WITH ACUTE
+0xFB	0x00FB	#	LATIN SMALL LETTER U WITH CIRCUMFLEX
+0xFC	0x00FC	#	LATIN SMALL LETTER U WITH DIAERESIS
+0xFD	0x00FD	#	LATIN SMALL LETTER Y WITH ACUTE
+0xFE	0x00FE	#	LATIN SMALL LETTER THORN
+0xFF	0x00FF	#	LATIN SMALL LETTER Y WITH DIAERESIS
diff --git a/ext/mbstring/tests/data/8859-16.txt b/ext/mbstring/tests/data/8859-16.txt
new file mode 100644
index 0000000000..4a05225a78
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-16.txt
@@ -0,0 +1,293 @@
+# 8859-16.TXT
+# Date: 2015-12-02 22:08:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO/IEC 8859-16:2001 to Unicode
+#	Unicode version:  3.0
+#	Table version:    2.0
+#	Table format:     Format A
+#	Date:             2001 July 26 (header updated: 2015 December 02)
+#	Authors:          Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>
+#
+#	Copyright (c) 1999-2001 Unicode, Inc.  All Rights reserved.
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO/IEC 8859-16:2001 characters map into Unicode.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO/IEC 8859-16 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO/IEC 8859-16 order.
+#
+#	Version history
+#   1.0 version: created
+#   2.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA1	0x0104	#	LATIN CAPITAL LETTER A WITH OGONEK
+0xA2	0x0105	#	LATIN SMALL LETTER A WITH OGONEK
+0xA3	0x0141	#	LATIN CAPITAL LETTER L WITH STROKE
+0xA4	0x20AC	#	EURO SIGN
+0xA5	0x201E	#	DOUBLE LOW-9 QUOTATION MARK
+0xA6	0x0160	#	LATIN CAPITAL LETTER S WITH CARON
+0xA7	0x00A7	#	SECTION SIGN
+0xA8	0x0161	#	LATIN SMALL LETTER S WITH CARON
+0xA9	0x00A9	#	COPYRIGHT SIGN
+0xAA	0x0218	#	LATIN CAPITAL LETTER S WITH COMMA BELOW
+0xAB	0x00AB	#	LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC	0x0179	#	LATIN CAPITAL LETTER Z WITH ACUTE
+0xAD	0x00AD	#	SOFT HYPHEN
+0xAE	0x017A	#	LATIN SMALL LETTER Z WITH ACUTE
+0xAF	0x017B	#	LATIN CAPITAL LETTER Z WITH DOT ABOVE
+0xB0	0x00B0	#	DEGREE SIGN
+0xB1	0x00B1	#	PLUS-MINUS SIGN
+0xB2	0x010C	#	LATIN CAPITAL LETTER C WITH CARON
+0xB3	0x0142	#	LATIN SMALL LETTER L WITH STROKE
+0xB4	0x017D	#	LATIN CAPITAL LETTER Z WITH CARON
+0xB5	0x201D	#	RIGHT DOUBLE QUOTATION MARK
+0xB6	0x00B6	#	PILCROW SIGN
+0xB7	0x00B7	#	MIDDLE DOT
+0xB8	0x017E	#	LATIN SMALL LETTER Z WITH CARON
+0xB9	0x010D	#	LATIN SMALL LETTER C WITH CARON
+0xBA	0x0219	#	LATIN SMALL LETTER S WITH COMMA BELOW
+0xBB	0x00BB	#	RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC	0x0152	#	LATIN CAPITAL LIGATURE OE
+0xBD	0x0153	#	LATIN SMALL LIGATURE OE
+0xBE	0x0178	#	LATIN CAPITAL LETTER Y WITH DIAERESIS
+0xBF	0x017C	#	LATIN SMALL LETTER Z WITH DOT ABOVE
+0xC0	0x00C0	#	LATIN CAPITAL LETTER A WITH GRAVE
+0xC1	0x00C1	#	LATIN CAPITAL LETTER A WITH ACUTE
+0xC2	0x00C2	#	LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3	0x0102	#	LATIN CAPITAL LETTER A WITH BREVE
+0xC4	0x00C4	#	LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5	0x0106	#	LATIN CAPITAL LETTER C WITH ACUTE
+0xC6	0x00C6	#	LATIN CAPITAL LETTER AE
+0xC7	0x00C7	#	LATIN CAPITAL LETTER C WITH CEDILLA
+0xC8	0x00C8	#	LATIN CAPITAL LETTER E WITH GRAVE
+0xC9	0x00C9	#	LATIN CAPITAL LETTER E WITH ACUTE
+0xCA	0x00CA	#	LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0xCB	0x00CB	#	LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC	0x00CC	#	LATIN CAPITAL LETTER I WITH GRAVE
+0xCD	0x00CD	#	LATIN CAPITAL LETTER I WITH ACUTE
+0xCE	0x00CE	#	LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF	0x00CF	#	LATIN CAPITAL LETTER I WITH DIAERESIS
+0xD0	0x0110	#	LATIN CAPITAL LETTER D WITH STROKE
+0xD1	0x0143	#	LATIN CAPITAL LETTER N WITH ACUTE
+0xD2	0x00D2	#	LATIN CAPITAL LETTER O WITH GRAVE
+0xD3	0x00D3	#	LATIN CAPITAL LETTER O WITH ACUTE
+0xD4	0x00D4	#	LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5	0x0150	#	LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0xD6	0x00D6	#	LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7	0x015A	#	LATIN CAPITAL LETTER S WITH ACUTE
+0xD8	0x0170	#	LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0xD9	0x00D9	#	LATIN CAPITAL LETTER U WITH GRAVE
+0xDA	0x00DA	#	LATIN CAPITAL LETTER U WITH ACUTE
+0xDB	0x00DB	#	LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xDC	0x00DC	#	LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD	0x0118	#	LATIN CAPITAL LETTER E WITH OGONEK
+0xDE	0x021A	#	LATIN CAPITAL LETTER T WITH COMMA BELOW
+0xDF	0x00DF	#	LATIN SMALL LETTER SHARP S
+0xE0	0x00E0	#	LATIN SMALL LETTER A WITH GRAVE
+0xE1	0x00E1	#	LATIN SMALL LETTER A WITH ACUTE
+0xE2	0x00E2	#	LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3	0x0103	#	LATIN SMALL LETTER A WITH BREVE
+0xE4	0x00E4	#	LATIN SMALL LETTER A WITH DIAERESIS
+0xE5	0x0107	#	LATIN SMALL LETTER C WITH ACUTE
+0xE6	0x00E6	#	LATIN SMALL LETTER AE
+0xE7	0x00E7	#	LATIN SMALL LETTER C WITH CEDILLA
+0xE8	0x00E8	#	LATIN SMALL LETTER E WITH GRAVE
+0xE9	0x00E9	#	LATIN SMALL LETTER E WITH ACUTE
+0xEA	0x00EA	#	LATIN SMALL LETTER E WITH CIRCUMFLEX
+0xEB	0x00EB	#	LATIN SMALL LETTER E WITH DIAERESIS
+0xEC	0x00EC	#	LATIN SMALL LETTER I WITH GRAVE
+0xED	0x00ED	#	LATIN SMALL LETTER I WITH ACUTE
+0xEE	0x00EE	#	LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF	0x00EF	#	LATIN SMALL LETTER I WITH DIAERESIS
+0xF0	0x0111	#	LATIN SMALL LETTER D WITH STROKE
+0xF1	0x0144	#	LATIN SMALL LETTER N WITH ACUTE
+0xF2	0x00F2	#	LATIN SMALL LETTER O WITH GRAVE
+0xF3	0x00F3	#	LATIN SMALL LETTER O WITH ACUTE
+0xF4	0x00F4	#	LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5	0x0151	#	LATIN SMALL LETTER O WITH DOUBLE ACUTE
+0xF6	0x00F6	#	LATIN SMALL LETTER O WITH DIAERESIS
+0xF7	0x015B	#	LATIN SMALL LETTER S WITH ACUTE
+0xF8	0x0171	#	LATIN SMALL LETTER U WITH DOUBLE ACUTE
+0xF9	0x00F9	#	LATIN SMALL LETTER U WITH GRAVE
+0xFA	0x00FA	#	LATIN SMALL LETTER U WITH ACUTE
+0xFB	0x00FB	#	LATIN SMALL LETTER U WITH CIRCUMFLEX
+0xFC	0x00FC	#	LATIN SMALL LETTER U WITH DIAERESIS
+0xFD	0x0119	#	LATIN SMALL LETTER E WITH OGONEK
+0xFE	0x021B	#	LATIN SMALL LETTER T WITH COMMA BELOW
+0xFF	0x00FF	#	LATIN SMALL LETTER Y WITH DIAERESIS
diff --git a/ext/mbstring/tests/data/8859-2.txt b/ext/mbstring/tests/data/8859-2.txt
new file mode 100644
index 0000000000..20da9ba993
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-2.txt
@@ -0,0 +1,292 @@
+# 8859-2.TXT
+# Date: 2015-12-02 21:34:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO 8859-2:1999 to Unicode
+#	Unicode version:  3.0
+#	Table version:    2.0
+#	Table format:     Format A
+#	Date:             1999 July 27 (header updated: 2015 December 02)
+#	Authors:          Ken Whistler <ken@unicode.org>
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO/IEC 8859-2:1999 characters map into Unicode.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO/IEC 8859-2 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO/IEC 8859-2 order.
+#
+#	Version history
+#   1.0 version: updates 0.1 version by adding mappings for all
+#       control characters.
+#   2.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA1	0x0104	#	LATIN CAPITAL LETTER A WITH OGONEK
+0xA2	0x02D8	#	BREVE
+0xA3	0x0141	#	LATIN CAPITAL LETTER L WITH STROKE
+0xA4	0x00A4	#	CURRENCY SIGN
+0xA5	0x013D	#	LATIN CAPITAL LETTER L WITH CARON
+0xA6	0x015A	#	LATIN CAPITAL LETTER S WITH ACUTE
+0xA7	0x00A7	#	SECTION SIGN
+0xA8	0x00A8	#	DIAERESIS
+0xA9	0x0160	#	LATIN CAPITAL LETTER S WITH CARON
+0xAA	0x015E	#	LATIN CAPITAL LETTER S WITH CEDILLA
+0xAB	0x0164	#	LATIN CAPITAL LETTER T WITH CARON
+0xAC	0x0179	#	LATIN CAPITAL LETTER Z WITH ACUTE
+0xAD	0x00AD	#	SOFT HYPHEN
+0xAE	0x017D	#	LATIN CAPITAL LETTER Z WITH CARON
+0xAF	0x017B	#	LATIN CAPITAL LETTER Z WITH DOT ABOVE
+0xB0	0x00B0	#	DEGREE SIGN
+0xB1	0x0105	#	LATIN SMALL LETTER A WITH OGONEK
+0xB2	0x02DB	#	OGONEK
+0xB3	0x0142	#	LATIN SMALL LETTER L WITH STROKE
+0xB4	0x00B4	#	ACUTE ACCENT
+0xB5	0x013E	#	LATIN SMALL LETTER L WITH CARON
+0xB6	0x015B	#	LATIN SMALL LETTER S WITH ACUTE
+0xB7	0x02C7	#	CARON
+0xB8	0x00B8	#	CEDILLA
+0xB9	0x0161	#	LATIN SMALL LETTER S WITH CARON
+0xBA	0x015F	#	LATIN SMALL LETTER S WITH CEDILLA
+0xBB	0x0165	#	LATIN SMALL LETTER T WITH CARON
+0xBC	0x017A	#	LATIN SMALL LETTER Z WITH ACUTE
+0xBD	0x02DD	#	DOUBLE ACUTE ACCENT
+0xBE	0x017E	#	LATIN SMALL LETTER Z WITH CARON
+0xBF	0x017C	#	LATIN SMALL LETTER Z WITH DOT ABOVE
+0xC0	0x0154	#	LATIN CAPITAL LETTER R WITH ACUTE
+0xC1	0x00C1	#	LATIN CAPITAL LETTER A WITH ACUTE
+0xC2	0x00C2	#	LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3	0x0102	#	LATIN CAPITAL LETTER A WITH BREVE
+0xC4	0x00C4	#	LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5	0x0139	#	LATIN CAPITAL LETTER L WITH ACUTE
+0xC6	0x0106	#	LATIN CAPITAL LETTER C WITH ACUTE
+0xC7	0x00C7	#	LATIN CAPITAL LETTER C WITH CEDILLA
+0xC8	0x010C	#	LATIN CAPITAL LETTER C WITH CARON
+0xC9	0x00C9	#	LATIN CAPITAL LETTER E WITH ACUTE
+0xCA	0x0118	#	LATIN CAPITAL LETTER E WITH OGONEK
+0xCB	0x00CB	#	LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC	0x011A	#	LATIN CAPITAL LETTER E WITH CARON
+0xCD	0x00CD	#	LATIN CAPITAL LETTER I WITH ACUTE
+0xCE	0x00CE	#	LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF	0x010E	#	LATIN CAPITAL LETTER D WITH CARON
+0xD0	0x0110	#	LATIN CAPITAL LETTER D WITH STROKE
+0xD1	0x0143	#	LATIN CAPITAL LETTER N WITH ACUTE
+0xD2	0x0147	#	LATIN CAPITAL LETTER N WITH CARON
+0xD3	0x00D3	#	LATIN CAPITAL LETTER O WITH ACUTE
+0xD4	0x00D4	#	LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5	0x0150	#	LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0xD6	0x00D6	#	LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7	0x00D7	#	MULTIPLICATION SIGN
+0xD8	0x0158	#	LATIN CAPITAL LETTER R WITH CARON
+0xD9	0x016E	#	LATIN CAPITAL LETTER U WITH RING ABOVE
+0xDA	0x00DA	#	LATIN CAPITAL LETTER U WITH ACUTE
+0xDB	0x0170	#	LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0xDC	0x00DC	#	LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD	0x00DD	#	LATIN CAPITAL LETTER Y WITH ACUTE
+0xDE	0x0162	#	LATIN CAPITAL LETTER T WITH CEDILLA
+0xDF	0x00DF	#	LATIN SMALL LETTER SHARP S
+0xE0	0x0155	#	LATIN SMALL LETTER R WITH ACUTE
+0xE1	0x00E1	#	LATIN SMALL LETTER A WITH ACUTE
+0xE2	0x00E2	#	LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3	0x0103	#	LATIN SMALL LETTER A WITH BREVE
+0xE4	0x00E4	#	LATIN SMALL LETTER A WITH DIAERESIS
+0xE5	0x013A	#	LATIN SMALL LETTER L WITH ACUTE
+0xE6	0x0107	#	LATIN SMALL LETTER C WITH ACUTE
+0xE7	0x00E7	#	LATIN SMALL LETTER C WITH CEDILLA
+0xE8	0x010D	#	LATIN SMALL LETTER C WITH CARON
+0xE9	0x00E9	#	LATIN SMALL LETTER E WITH ACUTE
+0xEA	0x0119	#	LATIN SMALL LETTER E WITH OGONEK
+0xEB	0x00EB	#	LATIN SMALL LETTER E WITH DIAERESIS
+0xEC	0x011B	#	LATIN SMALL LETTER E WITH CARON
+0xED	0x00ED	#	LATIN SMALL LETTER I WITH ACUTE
+0xEE	0x00EE	#	LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF	0x010F	#	LATIN SMALL LETTER D WITH CARON
+0xF0	0x0111	#	LATIN SMALL LETTER D WITH STROKE
+0xF1	0x0144	#	LATIN SMALL LETTER N WITH ACUTE
+0xF2	0x0148	#	LATIN SMALL LETTER N WITH CARON
+0xF3	0x00F3	#	LATIN SMALL LETTER O WITH ACUTE
+0xF4	0x00F4	#	LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5	0x0151	#	LATIN SMALL LETTER O WITH DOUBLE ACUTE
+0xF6	0x00F6	#	LATIN SMALL LETTER O WITH DIAERESIS
+0xF7	0x00F7	#	DIVISION SIGN
+0xF8	0x0159	#	LATIN SMALL LETTER R WITH CARON
+0xF9	0x016F	#	LATIN SMALL LETTER U WITH RING ABOVE
+0xFA	0x00FA	#	LATIN SMALL LETTER U WITH ACUTE
+0xFB	0x0171	#	LATIN SMALL LETTER U WITH DOUBLE ACUTE
+0xFC	0x00FC	#	LATIN SMALL LETTER U WITH DIAERESIS
+0xFD	0x00FD	#	LATIN SMALL LETTER Y WITH ACUTE
+0xFE	0x0163	#	LATIN SMALL LETTER T WITH CEDILLA
+0xFF	0x02D9	#	DOT ABOVE
diff --git a/ext/mbstring/tests/data/8859-3.txt b/ext/mbstring/tests/data/8859-3.txt
new file mode 100644
index 0000000000..5a6c5de547
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-3.txt
@@ -0,0 +1,285 @@
+# 8859-3.TXT
+# Date: 2015-12-02 21:39:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO/IEC 8859-3:1999 to Unicode
+#	Unicode version:  3.0
+#	Table version:    2.0
+#	Table format:     Format A
+#	Date:             1999 July 27 (header updated: 2015 December 02)
+#	Authors:          Ken Whistler <ken@unicode.org>
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO/IEC 8859-3:1999 characters map into Unicode.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO/IEC 8859-3 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO/IEC 8859-3 order.
+#
+#	Version history
+#   1.0 version: updates 0.1 version by adding mappings for all
+#       control characters.
+#   2.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA1	0x0126	#	LATIN CAPITAL LETTER H WITH STROKE
+0xA2	0x02D8	#	BREVE
+0xA3	0x00A3	#	POUND SIGN
+0xA4	0x00A4	#	CURRENCY SIGN
+0xA6	0x0124	#	LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0xA7	0x00A7	#	SECTION SIGN
+0xA8	0x00A8	#	DIAERESIS
+0xA9	0x0130	#	LATIN CAPITAL LETTER I WITH DOT ABOVE
+0xAA	0x015E	#	LATIN CAPITAL LETTER S WITH CEDILLA
+0xAB	0x011E	#	LATIN CAPITAL LETTER G WITH BREVE
+0xAC	0x0134	#	LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0xAD	0x00AD	#	SOFT HYPHEN
+0xAF	0x017B	#	LATIN CAPITAL LETTER Z WITH DOT ABOVE
+0xB0	0x00B0	#	DEGREE SIGN
+0xB1	0x0127	#	LATIN SMALL LETTER H WITH STROKE
+0xB2	0x00B2	#	SUPERSCRIPT TWO
+0xB3	0x00B3	#	SUPERSCRIPT THREE
+0xB4	0x00B4	#	ACUTE ACCENT
+0xB5	0x00B5	#	MICRO SIGN
+0xB6	0x0125	#	LATIN SMALL LETTER H WITH CIRCUMFLEX
+0xB7	0x00B7	#	MIDDLE DOT
+0xB8	0x00B8	#	CEDILLA
+0xB9	0x0131	#	LATIN SMALL LETTER DOTLESS I
+0xBA	0x015F	#	LATIN SMALL LETTER S WITH CEDILLA
+0xBB	0x011F	#	LATIN SMALL LETTER G WITH BREVE
+0xBC	0x0135	#	LATIN SMALL LETTER J WITH CIRCUMFLEX
+0xBD	0x00BD	#	VULGAR FRACTION ONE HALF
+0xBF	0x017C	#	LATIN SMALL LETTER Z WITH DOT ABOVE
+0xC0	0x00C0	#	LATIN CAPITAL LETTER A WITH GRAVE
+0xC1	0x00C1	#	LATIN CAPITAL LETTER A WITH ACUTE
+0xC2	0x00C2	#	LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC4	0x00C4	#	LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5	0x010A	#	LATIN CAPITAL LETTER C WITH DOT ABOVE
+0xC6	0x0108	#	LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+0xC7	0x00C7	#	LATIN CAPITAL LETTER C WITH CEDILLA
+0xC8	0x00C8	#	LATIN CAPITAL LETTER E WITH GRAVE
+0xC9	0x00C9	#	LATIN CAPITAL LETTER E WITH ACUTE
+0xCA	0x00CA	#	LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0xCB	0x00CB	#	LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC	0x00CC	#	LATIN CAPITAL LETTER I WITH GRAVE
+0xCD	0x00CD	#	LATIN CAPITAL LETTER I WITH ACUTE
+0xCE	0x00CE	#	LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF	0x00CF	#	LATIN CAPITAL LETTER I WITH DIAERESIS
+0xD1	0x00D1	#	LATIN CAPITAL LETTER N WITH TILDE
+0xD2	0x00D2	#	LATIN CAPITAL LETTER O WITH GRAVE
+0xD3	0x00D3	#	LATIN CAPITAL LETTER O WITH ACUTE
+0xD4	0x00D4	#	LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5	0x0120	#	LATIN CAPITAL LETTER G WITH DOT ABOVE
+0xD6	0x00D6	#	LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7	0x00D7	#	MULTIPLICATION SIGN
+0xD8	0x011C	#	LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+0xD9	0x00D9	#	LATIN CAPITAL LETTER U WITH GRAVE
+0xDA	0x00DA	#	LATIN CAPITAL LETTER U WITH ACUTE
+0xDB	0x00DB	#	LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xDC	0x00DC	#	LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD	0x016C	#	LATIN CAPITAL LETTER U WITH BREVE
+0xDE	0x015C	#	LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+0xDF	0x00DF	#	LATIN SMALL LETTER SHARP S
+0xE0	0x00E0	#	LATIN SMALL LETTER A WITH GRAVE
+0xE1	0x00E1	#	LATIN SMALL LETTER A WITH ACUTE
+0xE2	0x00E2	#	LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE4	0x00E4	#	LATIN SMALL LETTER A WITH DIAERESIS
+0xE5	0x010B	#	LATIN SMALL LETTER C WITH DOT ABOVE
+0xE6	0x0109	#	LATIN SMALL LETTER C WITH CIRCUMFLEX
+0xE7	0x00E7	#	LATIN SMALL LETTER C WITH CEDILLA
+0xE8	0x00E8	#	LATIN SMALL LETTER E WITH GRAVE
+0xE9	0x00E9	#	LATIN SMALL LETTER E WITH ACUTE
+0xEA	0x00EA	#	LATIN SMALL LETTER E WITH CIRCUMFLEX
+0xEB	0x00EB	#	LATIN SMALL LETTER E WITH DIAERESIS
+0xEC	0x00EC	#	LATIN SMALL LETTER I WITH GRAVE
+0xED	0x00ED	#	LATIN SMALL LETTER I WITH ACUTE
+0xEE	0x00EE	#	LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF	0x00EF	#	LATIN SMALL LETTER I WITH DIAERESIS
+0xF1	0x00F1	#	LATIN SMALL LETTER N WITH TILDE
+0xF2	0x00F2	#	LATIN SMALL LETTER O WITH GRAVE
+0xF3	0x00F3	#	LATIN SMALL LETTER O WITH ACUTE
+0xF4	0x00F4	#	LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5	0x0121	#	LATIN SMALL LETTER G WITH DOT ABOVE
+0xF6	0x00F6	#	LATIN SMALL LETTER O WITH DIAERESIS
+0xF7	0x00F7	#	DIVISION SIGN
+0xF8	0x011D	#	LATIN SMALL LETTER G WITH CIRCUMFLEX
+0xF9	0x00F9	#	LATIN SMALL LETTER U WITH GRAVE
+0xFA	0x00FA	#	LATIN SMALL LETTER U WITH ACUTE
+0xFB	0x00FB	#	LATIN SMALL LETTER U WITH CIRCUMFLEX
+0xFC	0x00FC	#	LATIN SMALL LETTER U WITH DIAERESIS
+0xFD	0x016D	#	LATIN SMALL LETTER U WITH BREVE
+0xFE	0x015D	#	LATIN SMALL LETTER S WITH CIRCUMFLEX
+0xFF	0x02D9	#	DOT ABOVE
diff --git a/ext/mbstring/tests/data/8859-4.txt b/ext/mbstring/tests/data/8859-4.txt
new file mode 100644
index 0000000000..e2f5e725fd
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-4.txt
@@ -0,0 +1,292 @@
+# 8859-4.TXT
+# Date: 2015-12-02 21:41:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO/IEC 8859-4:1998 to Unicode
+#	Unicode version:  3.0
+#	Table version:    2.0
+#	Table format:     Format A
+#	Date:             1999 July 27 (header updated: 2015 December 02)
+#	Authors:          Ken Whistler <ken@unicode.org>
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO/IEC 8859-4:1998 characters map into Unicode.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO/IEC 8859-4 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO/IEC 8859-4 order.
+#
+#	Version history
+#   1.0 version: updates 0.1 version by adding mappings for all
+#       control characters.
+#   2.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA1	0x0104	#	LATIN CAPITAL LETTER A WITH OGONEK
+0xA2	0x0138	#	LATIN SMALL LETTER KRA
+0xA3	0x0156	#	LATIN CAPITAL LETTER R WITH CEDILLA
+0xA4	0x00A4	#	CURRENCY SIGN
+0xA5	0x0128	#	LATIN CAPITAL LETTER I WITH TILDE
+0xA6	0x013B	#	LATIN CAPITAL LETTER L WITH CEDILLA
+0xA7	0x00A7	#	SECTION SIGN
+0xA8	0x00A8	#	DIAERESIS
+0xA9	0x0160	#	LATIN CAPITAL LETTER S WITH CARON
+0xAA	0x0112	#	LATIN CAPITAL LETTER E WITH MACRON
+0xAB	0x0122	#	LATIN CAPITAL LETTER G WITH CEDILLA
+0xAC	0x0166	#	LATIN CAPITAL LETTER T WITH STROKE
+0xAD	0x00AD	#	SOFT HYPHEN
+0xAE	0x017D	#	LATIN CAPITAL LETTER Z WITH CARON
+0xAF	0x00AF	#	MACRON
+0xB0	0x00B0	#	DEGREE SIGN
+0xB1	0x0105	#	LATIN SMALL LETTER A WITH OGONEK
+0xB2	0x02DB	#	OGONEK
+0xB3	0x0157	#	LATIN SMALL LETTER R WITH CEDILLA
+0xB4	0x00B4	#	ACUTE ACCENT
+0xB5	0x0129	#	LATIN SMALL LETTER I WITH TILDE
+0xB6	0x013C	#	LATIN SMALL LETTER L WITH CEDILLA
+0xB7	0x02C7	#	CARON
+0xB8	0x00B8	#	CEDILLA
+0xB9	0x0161	#	LATIN SMALL LETTER S WITH CARON
+0xBA	0x0113	#	LATIN SMALL LETTER E WITH MACRON
+0xBB	0x0123	#	LATIN SMALL LETTER G WITH CEDILLA
+0xBC	0x0167	#	LATIN SMALL LETTER T WITH STROKE
+0xBD	0x014A	#	LATIN CAPITAL LETTER ENG
+0xBE	0x017E	#	LATIN SMALL LETTER Z WITH CARON
+0xBF	0x014B	#	LATIN SMALL LETTER ENG
+0xC0	0x0100	#	LATIN CAPITAL LETTER A WITH MACRON
+0xC1	0x00C1	#	LATIN CAPITAL LETTER A WITH ACUTE
+0xC2	0x00C2	#	LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3	0x00C3	#	LATIN CAPITAL LETTER A WITH TILDE
+0xC4	0x00C4	#	LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5	0x00C5	#	LATIN CAPITAL LETTER A WITH RING ABOVE
+0xC6	0x00C6	#	LATIN CAPITAL LETTER AE
+0xC7	0x012E	#	LATIN CAPITAL LETTER I WITH OGONEK
+0xC8	0x010C	#	LATIN CAPITAL LETTER C WITH CARON
+0xC9	0x00C9	#	LATIN CAPITAL LETTER E WITH ACUTE
+0xCA	0x0118	#	LATIN CAPITAL LETTER E WITH OGONEK
+0xCB	0x00CB	#	LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC	0x0116	#	LATIN CAPITAL LETTER E WITH DOT ABOVE
+0xCD	0x00CD	#	LATIN CAPITAL LETTER I WITH ACUTE
+0xCE	0x00CE	#	LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF	0x012A	#	LATIN CAPITAL LETTER I WITH MACRON
+0xD0	0x0110	#	LATIN CAPITAL LETTER D WITH STROKE
+0xD1	0x0145	#	LATIN CAPITAL LETTER N WITH CEDILLA
+0xD2	0x014C	#	LATIN CAPITAL LETTER O WITH MACRON
+0xD3	0x0136	#	LATIN CAPITAL LETTER K WITH CEDILLA
+0xD4	0x00D4	#	LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5	0x00D5	#	LATIN CAPITAL LETTER O WITH TILDE
+0xD6	0x00D6	#	LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7	0x00D7	#	MULTIPLICATION SIGN
+0xD8	0x00D8	#	LATIN CAPITAL LETTER O WITH STROKE
+0xD9	0x0172	#	LATIN CAPITAL LETTER U WITH OGONEK
+0xDA	0x00DA	#	LATIN CAPITAL LETTER U WITH ACUTE
+0xDB	0x00DB	#	LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xDC	0x00DC	#	LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD	0x0168	#	LATIN CAPITAL LETTER U WITH TILDE
+0xDE	0x016A	#	LATIN CAPITAL LETTER U WITH MACRON
+0xDF	0x00DF	#	LATIN SMALL LETTER SHARP S
+0xE0	0x0101	#	LATIN SMALL LETTER A WITH MACRON
+0xE1	0x00E1	#	LATIN SMALL LETTER A WITH ACUTE
+0xE2	0x00E2	#	LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3	0x00E3	#	LATIN SMALL LETTER A WITH TILDE
+0xE4	0x00E4	#	LATIN SMALL LETTER A WITH DIAERESIS
+0xE5	0x00E5	#	LATIN SMALL LETTER A WITH RING ABOVE
+0xE6	0x00E6	#	LATIN SMALL LETTER AE
+0xE7	0x012F	#	LATIN SMALL LETTER I WITH OGONEK
+0xE8	0x010D	#	LATIN SMALL LETTER C WITH CARON
+0xE9	0x00E9	#	LATIN SMALL LETTER E WITH ACUTE
+0xEA	0x0119	#	LATIN SMALL LETTER E WITH OGONEK
+0xEB	0x00EB	#	LATIN SMALL LETTER E WITH DIAERESIS
+0xEC	0x0117	#	LATIN SMALL LETTER E WITH DOT ABOVE
+0xED	0x00ED	#	LATIN SMALL LETTER I WITH ACUTE
+0xEE	0x00EE	#	LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF	0x012B	#	LATIN SMALL LETTER I WITH MACRON
+0xF0	0x0111	#	LATIN SMALL LETTER D WITH STROKE
+0xF1	0x0146	#	LATIN SMALL LETTER N WITH CEDILLA
+0xF2	0x014D	#	LATIN SMALL LETTER O WITH MACRON
+0xF3	0x0137	#	LATIN SMALL LETTER K WITH CEDILLA
+0xF4	0x00F4	#	LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5	0x00F5	#	LATIN SMALL LETTER O WITH TILDE
+0xF6	0x00F6	#	LATIN SMALL LETTER O WITH DIAERESIS
+0xF7	0x00F7	#	DIVISION SIGN
+0xF8	0x00F8	#	LATIN SMALL LETTER O WITH STROKE
+0xF9	0x0173	#	LATIN SMALL LETTER U WITH OGONEK
+0xFA	0x00FA	#	LATIN SMALL LETTER U WITH ACUTE
+0xFB	0x00FB	#	LATIN SMALL LETTER U WITH CIRCUMFLEX
+0xFC	0x00FC	#	LATIN SMALL LETTER U WITH DIAERESIS
+0xFD	0x0169	#	LATIN SMALL LETTER U WITH TILDE
+0xFE	0x016B	#	LATIN SMALL LETTER U WITH MACRON
+0xFF	0x02D9	#	DOT ABOVE
diff --git a/ext/mbstring/tests/data/8859-5.txt b/ext/mbstring/tests/data/8859-5.txt
new file mode 100644
index 0000000000..a574c3feaf
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-5.txt
@@ -0,0 +1,292 @@
+# 8859-5.TXT
+# Date: 2015-12-02 21:43:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO 8859-5:1999 to Unicode
+#	Unicode version:  3.0
+#	Table version:    2.0
+#	Table format:     Format A
+#	Date:             1999 July 27 (header updated: 2015 December 02)
+#	Authors:          Ken Whistler <ken@unicode.org>
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO/IEC 8859-5:1999 characters map into Unicode.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO/IEC 8859-5 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO/IEC 8859-5 order.
+#
+#	Version history
+#   1.0 version: updates 0.1 version by adding mappings for all
+#       control characters.
+#   2.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA1	0x0401	#	CYRILLIC CAPITAL LETTER IO
+0xA2	0x0402	#	CYRILLIC CAPITAL LETTER DJE
+0xA3	0x0403	#	CYRILLIC CAPITAL LETTER GJE
+0xA4	0x0404	#	CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0xA5	0x0405	#	CYRILLIC CAPITAL LETTER DZE
+0xA6	0x0406	#	CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0xA7	0x0407	#	CYRILLIC CAPITAL LETTER YI
+0xA8	0x0408	#	CYRILLIC CAPITAL LETTER JE
+0xA9	0x0409	#	CYRILLIC CAPITAL LETTER LJE
+0xAA	0x040A	#	CYRILLIC CAPITAL LETTER NJE
+0xAB	0x040B	#	CYRILLIC CAPITAL LETTER TSHE
+0xAC	0x040C	#	CYRILLIC CAPITAL LETTER KJE
+0xAD	0x00AD	#	SOFT HYPHEN
+0xAE	0x040E	#	CYRILLIC CAPITAL LETTER SHORT U
+0xAF	0x040F	#	CYRILLIC CAPITAL LETTER DZHE
+0xB0	0x0410	#	CYRILLIC CAPITAL LETTER A
+0xB1	0x0411	#	CYRILLIC CAPITAL LETTER BE
+0xB2	0x0412	#	CYRILLIC CAPITAL LETTER VE
+0xB3	0x0413	#	CYRILLIC CAPITAL LETTER GHE
+0xB4	0x0414	#	CYRILLIC CAPITAL LETTER DE
+0xB5	0x0415	#	CYRILLIC CAPITAL LETTER IE
+0xB6	0x0416	#	CYRILLIC CAPITAL LETTER ZHE
+0xB7	0x0417	#	CYRILLIC CAPITAL LETTER ZE
+0xB8	0x0418	#	CYRILLIC CAPITAL LETTER I
+0xB9	0x0419	#	CYRILLIC CAPITAL LETTER SHORT I
+0xBA	0x041A	#	CYRILLIC CAPITAL LETTER KA
+0xBB	0x041B	#	CYRILLIC CAPITAL LETTER EL
+0xBC	0x041C	#	CYRILLIC CAPITAL LETTER EM
+0xBD	0x041D	#	CYRILLIC CAPITAL LETTER EN
+0xBE	0x041E	#	CYRILLIC CAPITAL LETTER O
+0xBF	0x041F	#	CYRILLIC CAPITAL LETTER PE
+0xC0	0x0420	#	CYRILLIC CAPITAL LETTER ER
+0xC1	0x0421	#	CYRILLIC CAPITAL LETTER ES
+0xC2	0x0422	#	CYRILLIC CAPITAL LETTER TE
+0xC3	0x0423	#	CYRILLIC CAPITAL LETTER U
+0xC4	0x0424	#	CYRILLIC CAPITAL LETTER EF
+0xC5	0x0425	#	CYRILLIC CAPITAL LETTER HA
+0xC6	0x0426	#	CYRILLIC CAPITAL LETTER TSE
+0xC7	0x0427	#	CYRILLIC CAPITAL LETTER CHE
+0xC8	0x0428	#	CYRILLIC CAPITAL LETTER SHA
+0xC9	0x0429	#	CYRILLIC CAPITAL LETTER SHCHA
+0xCA	0x042A	#	CYRILLIC CAPITAL LETTER HARD SIGN
+0xCB	0x042B	#	CYRILLIC CAPITAL LETTER YERU
+0xCC	0x042C	#	CYRILLIC CAPITAL LETTER SOFT SIGN
+0xCD	0x042D	#	CYRILLIC CAPITAL LETTER E
+0xCE	0x042E	#	CYRILLIC CAPITAL LETTER YU
+0xCF	0x042F	#	CYRILLIC CAPITAL LETTER YA
+0xD0	0x0430	#	CYRILLIC SMALL LETTER A
+0xD1	0x0431	#	CYRILLIC SMALL LETTER BE
+0xD2	0x0432	#	CYRILLIC SMALL LETTER VE
+0xD3	0x0433	#	CYRILLIC SMALL LETTER GHE
+0xD4	0x0434	#	CYRILLIC SMALL LETTER DE
+0xD5	0x0435	#	CYRILLIC SMALL LETTER IE
+0xD6	0x0436	#	CYRILLIC SMALL LETTER ZHE
+0xD7	0x0437	#	CYRILLIC SMALL LETTER ZE
+0xD8	0x0438	#	CYRILLIC SMALL LETTER I
+0xD9	0x0439	#	CYRILLIC SMALL LETTER SHORT I
+0xDA	0x043A	#	CYRILLIC SMALL LETTER KA
+0xDB	0x043B	#	CYRILLIC SMALL LETTER EL
+0xDC	0x043C	#	CYRILLIC SMALL LETTER EM
+0xDD	0x043D	#	CYRILLIC SMALL LETTER EN
+0xDE	0x043E	#	CYRILLIC SMALL LETTER O
+0xDF	0x043F	#	CYRILLIC SMALL LETTER PE
+0xE0	0x0440	#	CYRILLIC SMALL LETTER ER
+0xE1	0x0441	#	CYRILLIC SMALL LETTER ES
+0xE2	0x0442	#	CYRILLIC SMALL LETTER TE
+0xE3	0x0443	#	CYRILLIC SMALL LETTER U
+0xE4	0x0444	#	CYRILLIC SMALL LETTER EF
+0xE5	0x0445	#	CYRILLIC SMALL LETTER HA
+0xE6	0x0446	#	CYRILLIC SMALL LETTER TSE
+0xE7	0x0447	#	CYRILLIC SMALL LETTER CHE
+0xE8	0x0448	#	CYRILLIC SMALL LETTER SHA
+0xE9	0x0449	#	CYRILLIC SMALL LETTER SHCHA
+0xEA	0x044A	#	CYRILLIC SMALL LETTER HARD SIGN
+0xEB	0x044B	#	CYRILLIC SMALL LETTER YERU
+0xEC	0x044C	#	CYRILLIC SMALL LETTER SOFT SIGN
+0xED	0x044D	#	CYRILLIC SMALL LETTER E
+0xEE	0x044E	#	CYRILLIC SMALL LETTER YU
+0xEF	0x044F	#	CYRILLIC SMALL LETTER YA
+0xF0	0x2116	#	NUMERO SIGN
+0xF1	0x0451	#	CYRILLIC SMALL LETTER IO
+0xF2	0x0452	#	CYRILLIC SMALL LETTER DJE
+0xF3	0x0453	#	CYRILLIC SMALL LETTER GJE
+0xF4	0x0454	#	CYRILLIC SMALL LETTER UKRAINIAN IE
+0xF5	0x0455	#	CYRILLIC SMALL LETTER DZE
+0xF6	0x0456	#	CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+0xF7	0x0457	#	CYRILLIC SMALL LETTER YI
+0xF8	0x0458	#	CYRILLIC SMALL LETTER JE
+0xF9	0x0459	#	CYRILLIC SMALL LETTER LJE
+0xFA	0x045A	#	CYRILLIC SMALL LETTER NJE
+0xFB	0x045B	#	CYRILLIC SMALL LETTER TSHE
+0xFC	0x045C	#	CYRILLIC SMALL LETTER KJE
+0xFD	0x00A7	#	SECTION SIGN
+0xFE	0x045E	#	CYRILLIC SMALL LETTER SHORT U
+0xFF	0x045F	#	CYRILLIC SMALL LETTER DZHE
diff --git a/ext/mbstring/tests/data/8859-6.txt b/ext/mbstring/tests/data/8859-6.txt
new file mode 100644
index 0000000000..fed0af3631
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-6.txt
@@ -0,0 +1,249 @@
+# 8859-6.TXT
+# Date: 2015-12-02 21:44:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO 8859-6:1999 to Unicode
+#	Unicode version:  3.0
+#	Table version:    2.0
+#	Table format:     Format A
+#	Date:             1999 July 27 (header updated: 2015 December 02)
+#	Authors:          Ken Whistler <ken@unicode.org>
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO/IEC 8859-6:1999 characters map into Unicode.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO/IEC 8859-6 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO/IEC 8859-6 order.
+#
+#	Version history
+#   1.0 version: updates 0.1 version by adding mappings for all
+#       control characters.
+#       0x30..0x39 remapped to the ASCII digits (U+0030..U+0039) instead
+#       of the Arabic digits (U+0660..U+0669).
+#   2.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA4	0x00A4	#	CURRENCY SIGN
+0xAC	0x060C	#	ARABIC COMMA
+0xAD	0x00AD	#	SOFT HYPHEN
+0xBB	0x061B	#	ARABIC SEMICOLON
+0xBF	0x061F	#	ARABIC QUESTION MARK
+0xC1	0x0621	#	ARABIC LETTER HAMZA
+0xC2	0x0622	#	ARABIC LETTER ALEF WITH MADDA ABOVE
+0xC3	0x0623	#	ARABIC LETTER ALEF WITH HAMZA ABOVE
+0xC4	0x0624	#	ARABIC LETTER WAW WITH HAMZA ABOVE
+0xC5	0x0625	#	ARABIC LETTER ALEF WITH HAMZA BELOW
+0xC6	0x0626	#	ARABIC LETTER YEH WITH HAMZA ABOVE
+0xC7	0x0627	#	ARABIC LETTER ALEF
+0xC8	0x0628	#	ARABIC LETTER BEH
+0xC9	0x0629	#	ARABIC LETTER TEH MARBUTA
+0xCA	0x062A	#	ARABIC LETTER TEH
+0xCB	0x062B	#	ARABIC LETTER THEH
+0xCC	0x062C	#	ARABIC LETTER JEEM
+0xCD	0x062D	#	ARABIC LETTER HAH
+0xCE	0x062E	#	ARABIC LETTER KHAH
+0xCF	0x062F	#	ARABIC LETTER DAL
+0xD0	0x0630	#	ARABIC LETTER THAL
+0xD1	0x0631	#	ARABIC LETTER REH
+0xD2	0x0632	#	ARABIC LETTER ZAIN
+0xD3	0x0633	#	ARABIC LETTER SEEN
+0xD4	0x0634	#	ARABIC LETTER SHEEN
+0xD5	0x0635	#	ARABIC LETTER SAD
+0xD6	0x0636	#	ARABIC LETTER DAD
+0xD7	0x0637	#	ARABIC LETTER TAH
+0xD8	0x0638	#	ARABIC LETTER ZAH
+0xD9	0x0639	#	ARABIC LETTER AIN
+0xDA	0x063A	#	ARABIC LETTER GHAIN
+0xE0	0x0640	#	ARABIC TATWEEL
+0xE1	0x0641	#	ARABIC LETTER FEH
+0xE2	0x0642	#	ARABIC LETTER QAF
+0xE3	0x0643	#	ARABIC LETTER KAF
+0xE4	0x0644	#	ARABIC LETTER LAM
+0xE5	0x0645	#	ARABIC LETTER MEEM
+0xE6	0x0646	#	ARABIC LETTER NOON
+0xE7	0x0647	#	ARABIC LETTER HEH
+0xE8	0x0648	#	ARABIC LETTER WAW
+0xE9	0x0649	#	ARABIC LETTER ALEF MAKSURA
+0xEA	0x064A	#	ARABIC LETTER YEH
+0xEB	0x064B	#	ARABIC FATHATAN
+0xEC	0x064C	#	ARABIC DAMMATAN
+0xED	0x064D	#	ARABIC KASRATAN
+0xEE	0x064E	#	ARABIC FATHA
+0xEF	0x064F	#	ARABIC DAMMA
+0xF0	0x0650	#	ARABIC KASRA
+0xF1	0x0651	#	ARABIC SHADDA
+0xF2	0x0652	#	ARABIC SUKUN
diff --git a/ext/mbstring/tests/data/8859-7.txt b/ext/mbstring/tests/data/8859-7.txt
new file mode 100644
index 0000000000..49131a4d94
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-7.txt
@@ -0,0 +1,299 @@
+# 8859-7.TXT
+# Date: 2015-12-02 21:47:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO 8859-7:2003 to Unicode
+#	Unicode version:  4.0
+#	Table version:    3.0
+#	Table format:     Format A
+#	Date:             2003-Nov-12 (header updated: 2015 December 02)
+#	Authors:          Ken Whistler <ken@unicode.org>
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO 8859-7:2003 characters map into Unicode.
+#
+#	ISO 8859-7:1987 is equivalent to ISO-IR-126, ELOT 928,
+#	and ECMA 118. ISO 8859-7:2003 adds two currency signs
+#	and one other character not in the earlier standard.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO 8859-7 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO 8859-7 order.
+#
+#	Version history
+#	1.0 version updates 0.1 version by adding mappings for all
+#	control characters.
+#	Remap 0xA1 to U+2018 (instead of 0x02BD) to match text of 8859-7
+#	Remap 0xA2 to U+2019 (instead of 0x02BC) to match text of 8859-7
+#
+#	2.0 version updates 1.0 version by adding mappings for the
+#	three newly added characters 0xA4, 0xA5, 0xAA.
+#
+#   3.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA1	0x2018	#	LEFT SINGLE QUOTATION MARK
+0xA2	0x2019	#	RIGHT SINGLE QUOTATION MARK
+0xA3	0x00A3	#	POUND SIGN
+0xA4	0x20AC	#	EURO SIGN
+0xA5	0x20AF	#	DRACHMA SIGN
+0xA6	0x00A6	#	BROKEN BAR
+0xA7	0x00A7	#	SECTION SIGN
+0xA8	0x00A8	#	DIAERESIS
+0xA9	0x00A9	#	COPYRIGHT SIGN
+0xAA	0x037A	#	GREEK YPOGEGRAMMENI
+0xAB	0x00AB	#	LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC	0x00AC	#	NOT SIGN
+0xAD	0x00AD	#	SOFT HYPHEN
+0xAF	0x2015	#	HORIZONTAL BAR
+0xB0	0x00B0	#	DEGREE SIGN
+0xB1	0x00B1	#	PLUS-MINUS SIGN
+0xB2	0x00B2	#	SUPERSCRIPT TWO
+0xB3	0x00B3	#	SUPERSCRIPT THREE
+0xB4	0x0384	#	GREEK TONOS
+0xB5	0x0385	#	GREEK DIALYTIKA TONOS
+0xB6	0x0386	#	GREEK CAPITAL LETTER ALPHA WITH TONOS
+0xB7	0x00B7	#	MIDDLE DOT
+0xB8	0x0388	#	GREEK CAPITAL LETTER EPSILON WITH TONOS
+0xB9	0x0389	#	GREEK CAPITAL LETTER ETA WITH TONOS
+0xBA	0x038A	#	GREEK CAPITAL LETTER IOTA WITH TONOS
+0xBB	0x00BB	#	RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC	0x038C	#	GREEK CAPITAL LETTER OMICRON WITH TONOS
+0xBD	0x00BD	#	VULGAR FRACTION ONE HALF
+0xBE	0x038E	#	GREEK CAPITAL LETTER UPSILON WITH TONOS
+0xBF	0x038F	#	GREEK CAPITAL LETTER OMEGA WITH TONOS
+0xC0	0x0390	#	GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0xC1	0x0391	#	GREEK CAPITAL LETTER ALPHA
+0xC2	0x0392	#	GREEK CAPITAL LETTER BETA
+0xC3	0x0393	#	GREEK CAPITAL LETTER GAMMA
+0xC4	0x0394	#	GREEK CAPITAL LETTER DELTA
+0xC5	0x0395	#	GREEK CAPITAL LETTER EPSILON
+0xC6	0x0396	#	GREEK CAPITAL LETTER ZETA
+0xC7	0x0397	#	GREEK CAPITAL LETTER ETA
+0xC8	0x0398	#	GREEK CAPITAL LETTER THETA
+0xC9	0x0399	#	GREEK CAPITAL LETTER IOTA
+0xCA	0x039A	#	GREEK CAPITAL LETTER KAPPA
+0xCB	0x039B	#	GREEK CAPITAL LETTER LAMDA
+0xCC	0x039C	#	GREEK CAPITAL LETTER MU
+0xCD	0x039D	#	GREEK CAPITAL LETTER NU
+0xCE	0x039E	#	GREEK CAPITAL LETTER XI
+0xCF	0x039F	#	GREEK CAPITAL LETTER OMICRON
+0xD0	0x03A0	#	GREEK CAPITAL LETTER PI
+0xD1	0x03A1	#	GREEK CAPITAL LETTER RHO
+0xD3	0x03A3	#	GREEK CAPITAL LETTER SIGMA
+0xD4	0x03A4	#	GREEK CAPITAL LETTER TAU
+0xD5	0x03A5	#	GREEK CAPITAL LETTER UPSILON
+0xD6	0x03A6	#	GREEK CAPITAL LETTER PHI
+0xD7	0x03A7	#	GREEK CAPITAL LETTER CHI
+0xD8	0x03A8	#	GREEK CAPITAL LETTER PSI
+0xD9	0x03A9	#	GREEK CAPITAL LETTER OMEGA
+0xDA	0x03AA	#	GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+0xDB	0x03AB	#	GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+0xDC	0x03AC	#	GREEK SMALL LETTER ALPHA WITH TONOS
+0xDD	0x03AD	#	GREEK SMALL LETTER EPSILON WITH TONOS
+0xDE	0x03AE	#	GREEK SMALL LETTER ETA WITH TONOS
+0xDF	0x03AF	#	GREEK SMALL LETTER IOTA WITH TONOS
+0xE0	0x03B0	#	GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+0xE1	0x03B1	#	GREEK SMALL LETTER ALPHA
+0xE2	0x03B2	#	GREEK SMALL LETTER BETA
+0xE3	0x03B3	#	GREEK SMALL LETTER GAMMA
+0xE4	0x03B4	#	GREEK SMALL LETTER DELTA
+0xE5	0x03B5	#	GREEK SMALL LETTER EPSILON
+0xE6	0x03B6	#	GREEK SMALL LETTER ZETA
+0xE7	0x03B7	#	GREEK SMALL LETTER ETA
+0xE8	0x03B8	#	GREEK SMALL LETTER THETA
+0xE9	0x03B9	#	GREEK SMALL LETTER IOTA
+0xEA	0x03BA	#	GREEK SMALL LETTER KAPPA
+0xEB	0x03BB	#	GREEK SMALL LETTER LAMDA
+0xEC	0x03BC	#	GREEK SMALL LETTER MU
+0xED	0x03BD	#	GREEK SMALL LETTER NU
+0xEE	0x03BE	#	GREEK SMALL LETTER XI
+0xEF	0x03BF	#	GREEK SMALL LETTER OMICRON
+0xF0	0x03C0	#	GREEK SMALL LETTER PI
+0xF1	0x03C1	#	GREEK SMALL LETTER RHO
+0xF2	0x03C2	#	GREEK SMALL LETTER FINAL SIGMA
+0xF3	0x03C3	#	GREEK SMALL LETTER SIGMA
+0xF4	0x03C4	#	GREEK SMALL LETTER TAU
+0xF5	0x03C5	#	GREEK SMALL LETTER UPSILON
+0xF6	0x03C6	#	GREEK SMALL LETTER PHI
+0xF7	0x03C7	#	GREEK SMALL LETTER CHI
+0xF8	0x03C8	#	GREEK SMALL LETTER PSI
+0xF9	0x03C9	#	GREEK SMALL LETTER OMEGA
+0xFA	0x03CA	#	GREEK SMALL LETTER IOTA WITH DIALYTIKA
+0xFB	0x03CB	#	GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+0xFC	0x03CC	#	GREEK SMALL LETTER OMICRON WITH TONOS
+0xFD	0x03CD	#	GREEK SMALL LETTER UPSILON WITH TONOS
+0xFE	0x03CE	#	GREEK SMALL LETTER OMEGA WITH TONOS
diff --git a/ext/mbstring/tests/data/8859-8.txt b/ext/mbstring/tests/data/8859-8.txt
new file mode 100644
index 0000000000..ff068f7083
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-8.txt
@@ -0,0 +1,258 @@
+# 8859-8.TXT
+# Date: 2015-12-02 21:50:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO/IEC 8859-8:1999 to Unicode
+#	Unicode version:  3.0
+#	Table version:    2.0
+#	Table format:     Format A
+#	Date:             2000-Jan-03 (header updated: 2015 December 02)
+#	Authors:          Ken Whistler <ken@unicode.org>
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO/IEC 8859-8:1999 characters map into Unicode.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO/IEC 8859-8 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO/IEC 8859-8 order.
+#
+#	Version history
+#	1.0 version updates 0.1 version by adding mappings for all
+#	control characters.
+#   1.1 version updates to the published 8859-8:1999, correcting
+#          the mapping of 0xAF and adding mappings for LRM and RLM.
+#   2.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA2	0x00A2	#	CENT SIGN
+0xA3	0x00A3	#	POUND SIGN
+0xA4	0x00A4	#	CURRENCY SIGN
+0xA5	0x00A5	#	YEN SIGN
+0xA6	0x00A6	#	BROKEN BAR
+0xA7	0x00A7	#	SECTION SIGN
+0xA8	0x00A8	#	DIAERESIS
+0xA9	0x00A9	#	COPYRIGHT SIGN
+0xAA	0x00D7	#	MULTIPLICATION SIGN
+0xAB	0x00AB	#	LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC	0x00AC	#	NOT SIGN
+0xAD	0x00AD	#	SOFT HYPHEN
+0xAE	0x00AE	#	REGISTERED SIGN
+0xAF	0x00AF	#	MACRON
+0xB0	0x00B0	#	DEGREE SIGN
+0xB1	0x00B1	#	PLUS-MINUS SIGN
+0xB2	0x00B2	#	SUPERSCRIPT TWO
+0xB3	0x00B3	#	SUPERSCRIPT THREE
+0xB4	0x00B4	#	ACUTE ACCENT
+0xB5	0x00B5	#	MICRO SIGN
+0xB6	0x00B6	#	PILCROW SIGN
+0xB7	0x00B7	#	MIDDLE DOT
+0xB8	0x00B8	#	CEDILLA
+0xB9	0x00B9	#	SUPERSCRIPT ONE
+0xBA	0x00F7	#	DIVISION SIGN
+0xBB	0x00BB	#	RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC	0x00BC	#	VULGAR FRACTION ONE QUARTER
+0xBD	0x00BD	#	VULGAR FRACTION ONE HALF
+0xBE	0x00BE	#	VULGAR FRACTION THREE QUARTERS
+0xDF	0x2017	#	DOUBLE LOW LINE
+0xE0	0x05D0	#	HEBREW LETTER ALEF
+0xE1	0x05D1	#	HEBREW LETTER BET
+0xE2	0x05D2	#	HEBREW LETTER GIMEL
+0xE3	0x05D3	#	HEBREW LETTER DALET
+0xE4	0x05D4	#	HEBREW LETTER HE
+0xE5	0x05D5	#	HEBREW LETTER VAV
+0xE6	0x05D6	#	HEBREW LETTER ZAYIN
+0xE7	0x05D7	#	HEBREW LETTER HET
+0xE8	0x05D8	#	HEBREW LETTER TET
+0xE9	0x05D9	#	HEBREW LETTER YOD
+0xEA	0x05DA	#	HEBREW LETTER FINAL KAF
+0xEB	0x05DB	#	HEBREW LETTER KAF
+0xEC	0x05DC	#	HEBREW LETTER LAMED
+0xED	0x05DD	#	HEBREW LETTER FINAL MEM
+0xEE	0x05DE	#	HEBREW LETTER MEM
+0xEF	0x05DF	#	HEBREW LETTER FINAL NUN
+0xF0	0x05E0	#	HEBREW LETTER NUN
+0xF1	0x05E1	#	HEBREW LETTER SAMEKH
+0xF2	0x05E2	#	HEBREW LETTER AYIN
+0xF3	0x05E3	#	HEBREW LETTER FINAL PE
+0xF4	0x05E4	#	HEBREW LETTER PE
+0xF5	0x05E5	#	HEBREW LETTER FINAL TSADI
+0xF6	0x05E6	#	HEBREW LETTER TSADI
+0xF7	0x05E7	#	HEBREW LETTER QOF
+0xF8	0x05E8	#	HEBREW LETTER RESH
+0xF9	0x05E9	#	HEBREW LETTER SHIN
+0xFA	0x05EA	#	HEBREW LETTER TAV
+0xFD	0x200E	#	LEFT-TO-RIGHT MARK
+0xFE	0x200F	#	RIGHT-TO-LEFT MARK
diff --git a/ext/mbstring/tests/data/8859-9.txt b/ext/mbstring/tests/data/8859-9.txt
new file mode 100644
index 0000000000..d0c9ff94e4
--- /dev/null
+++ b/ext/mbstring/tests/data/8859-9.txt
@@ -0,0 +1,294 @@
+# 8859-9.TXT
+# Date: 2015-12-02 21:51:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+#	Name:             ISO/IEC 8859-9:1999 to Unicode
+#	Unicode version:  3.0
+#	Table version:    2.0
+#	Table format:     Format A
+#	Date:             1999 July 27 (header updated: 2015 December 02)
+#	Authors:          Ken Whistler <ken@unicode.org>
+#
+#	General notes:
+#
+#	This table contains the data the Unicode Consortium has on how
+#       ISO/IEC 8859-9:1999 characters map into Unicode.
+#
+#	Format:  Three tab-separated columns
+#		 Column #1 is the ISO/IEC 8859-9 code (in hex as 0xXX)
+#		 Column #2 is the Unicode (in hex as 0xXXXX)
+#		 Column #3 the Unicode name (follows a comment sign, '#')
+#
+#	The entries are in ISO/IEC 8859-9 order.
+#
+#	ISO/IEC 8859-9 is also equivalent to ISO-IR-148.
+#
+#	Version history
+#   1.0 version: updates 0.1 version by adding mappings for all
+#       control characters.
+#   2.0 version: updates to copyright notice and terms of use; no
+#       changes to character mappings
+#
+#	Updated versions of this file may be found in:
+#		http://www.unicode.org/Public/MAPPINGS/
+#
+#	Any comments or problems, contact us at:
+#       http://www.unicode.org/reporting.html
+#
+0x00	0x0000	#	NULL
+0x01	0x0001	#	START OF HEADING
+0x02	0x0002	#	START OF TEXT
+0x03	0x0003	#	END OF TEXT
+0x04	0x0004	#	END OF TRANSMISSION
+0x05	0x0005	#	ENQUIRY
+0x06	0x0006	#	ACKNOWLEDGE
+0x07	0x0007	#	BELL
+0x08	0x0008	#	BACKSPACE
+0x09	0x0009	#	HORIZONTAL TABULATION
+0x0A	0x000A	#	LINE FEED
+0x0B	0x000B	#	VERTICAL TABULATION
+0x0C	0x000C	#	FORM FEED
+0x0D	0x000D	#	CARRIAGE RETURN
+0x0E	0x000E	#	SHIFT OUT
+0x0F	0x000F	#	SHIFT IN
+0x10	0x0010	#	DATA LINK ESCAPE
+0x11	0x0011	#	DEVICE CONTROL ONE
+0x12	0x0012	#	DEVICE CONTROL TWO
+0x13	0x0013	#	DEVICE CONTROL THREE
+0x14	0x0014	#	DEVICE CONTROL FOUR
+0x15	0x0015	#	NEGATIVE ACKNOWLEDGE
+0x16	0x0016	#	SYNCHRONOUS IDLE
+0x17	0x0017	#	END OF TRANSMISSION BLOCK
+0x18	0x0018	#	CANCEL
+0x19	0x0019	#	END OF MEDIUM
+0x1A	0x001A	#	SUBSTITUTE
+0x1B	0x001B	#	ESCAPE
+0x1C	0x001C	#	FILE SEPARATOR
+0x1D	0x001D	#	GROUP SEPARATOR
+0x1E	0x001E	#	RECORD SEPARATOR
+0x1F	0x001F	#	UNIT SEPARATOR
+0x20	0x0020	#	SPACE
+0x21	0x0021	#	EXCLAMATION MARK
+0x22	0x0022	#	QUOTATION MARK
+0x23	0x0023	#	NUMBER SIGN
+0x24	0x0024	#	DOLLAR SIGN
+0x25	0x0025	#	PERCENT SIGN
+0x26	0x0026	#	AMPERSAND
+0x27	0x0027	#	APOSTROPHE
+0x28	0x0028	#	LEFT PARENTHESIS
+0x29	0x0029	#	RIGHT PARENTHESIS
+0x2A	0x002A	#	ASTERISK
+0x2B	0x002B	#	PLUS SIGN
+0x2C	0x002C	#	COMMA
+0x2D	0x002D	#	HYPHEN-MINUS
+0x2E	0x002E	#	FULL STOP
+0x2F	0x002F	#	SOLIDUS
+0x30	0x0030	#	DIGIT ZERO
+0x31	0x0031	#	DIGIT ONE
+0x32	0x0032	#	DIGIT TWO
+0x33	0x0033	#	DIGIT THREE
+0x34	0x0034	#	DIGIT FOUR
+0x35	0x0035	#	DIGIT FIVE
+0x36	0x0036	#	DIGIT SIX
+0x37	0x0037	#	DIGIT SEVEN
+0x38	0x0038	#	DIGIT EIGHT
+0x39	0x0039	#	DIGIT NINE
+0x3A	0x003A	#	COLON
+0x3B	0x003B	#	SEMICOLON
+0x3C	0x003C	#	LESS-THAN SIGN
+0x3D	0x003D	#	EQUALS SIGN
+0x3E	0x003E	#	GREATER-THAN SIGN
+0x3F	0x003F	#	QUESTION MARK
+0x40	0x0040	#	COMMERCIAL AT
+0x41	0x0041	#	LATIN CAPITAL LETTER A
+0x42	0x0042	#	LATIN CAPITAL LETTER B
+0x43	0x0043	#	LATIN CAPITAL LETTER C
+0x44	0x0044	#	LATIN CAPITAL LETTER D
+0x45	0x0045	#	LATIN CAPITAL LETTER E
+0x46	0x0046	#	LATIN CAPITAL LETTER F
+0x47	0x0047	#	LATIN CAPITAL LETTER G
+0x48	0x0048	#	LATIN CAPITAL LETTER H
+0x49	0x0049	#	LATIN CAPITAL LETTER I
+0x4A	0x004A	#	LATIN CAPITAL LETTER J
+0x4B	0x004B	#	LATIN CAPITAL LETTER K
+0x4C	0x004C	#	LATIN CAPITAL LETTER L
+0x4D	0x004D	#	LATIN CAPITAL LETTER M
+0x4E	0x004E	#	LATIN CAPITAL LETTER N
+0x4F	0x004F	#	LATIN CAPITAL LETTER O
+0x50	0x0050	#	LATIN CAPITAL LETTER P
+0x51	0x0051	#	LATIN CAPITAL LETTER Q
+0x52	0x0052	#	LATIN CAPITAL LETTER R
+0x53	0x0053	#	LATIN CAPITAL LETTER S
+0x54	0x0054	#	LATIN CAPITAL LETTER T
+0x55	0x0055	#	LATIN CAPITAL LETTER U
+0x56	0x0056	#	LATIN CAPITAL LETTER V
+0x57	0x0057	#	LATIN CAPITAL LETTER W
+0x58	0x0058	#	LATIN CAPITAL LETTER X
+0x59	0x0059	#	LATIN CAPITAL LETTER Y
+0x5A	0x005A	#	LATIN CAPITAL LETTER Z
+0x5B	0x005B	#	LEFT SQUARE BRACKET
+0x5C	0x005C	#	REVERSE SOLIDUS
+0x5D	0x005D	#	RIGHT SQUARE BRACKET
+0x5E	0x005E	#	CIRCUMFLEX ACCENT
+0x5F	0x005F	#	LOW LINE
+0x60	0x0060	#	GRAVE ACCENT
+0x61	0x0061	#	LATIN SMALL LETTER A
+0x62	0x0062	#	LATIN SMALL LETTER B
+0x63	0x0063	#	LATIN SMALL LETTER C
+0x64	0x0064	#	LATIN SMALL LETTER D
+0x65	0x0065	#	LATIN SMALL LETTER E
+0x66	0x0066	#	LATIN SMALL LETTER F
+0x67	0x0067	#	LATIN SMALL LETTER G
+0x68	0x0068	#	LATIN SMALL LETTER H
+0x69	0x0069	#	LATIN SMALL LETTER I
+0x6A	0x006A	#	LATIN SMALL LETTER J
+0x6B	0x006B	#	LATIN SMALL LETTER K
+0x6C	0x006C	#	LATIN SMALL LETTER L
+0x6D	0x006D	#	LATIN SMALL LETTER M
+0x6E	0x006E	#	LATIN SMALL LETTER N
+0x6F	0x006F	#	LATIN SMALL LETTER O
+0x70	0x0070	#	LATIN SMALL LETTER P
+0x71	0x0071	#	LATIN SMALL LETTER Q
+0x72	0x0072	#	LATIN SMALL LETTER R
+0x73	0x0073	#	LATIN SMALL LETTER S
+0x74	0x0074	#	LATIN SMALL LETTER T
+0x75	0x0075	#	LATIN SMALL LETTER U
+0x76	0x0076	#	LATIN SMALL LETTER V
+0x77	0x0077	#	LATIN SMALL LETTER W
+0x78	0x0078	#	LATIN SMALL LETTER X
+0x79	0x0079	#	LATIN SMALL LETTER Y
+0x7A	0x007A	#	LATIN SMALL LETTER Z
+0x7B	0x007B	#	LEFT CURLY BRACKET
+0x7C	0x007C	#	VERTICAL LINE
+0x7D	0x007D	#	RIGHT CURLY BRACKET
+0x7E	0x007E	#	TILDE
+0x7F	0x007F	#	DELETE
+0x80	0x0080	#	<control>
+0x81	0x0081	#	<control>
+0x82	0x0082	#	<control>
+0x83	0x0083	#	<control>
+0x84	0x0084	#	<control>
+0x85	0x0085	#	<control>
+0x86	0x0086	#	<control>
+0x87	0x0087	#	<control>
+0x88	0x0088	#	<control>
+0x89	0x0089	#	<control>
+0x8A	0x008A	#	<control>
+0x8B	0x008B	#	<control>
+0x8C	0x008C	#	<control>
+0x8D	0x008D	#	<control>
+0x8E	0x008E	#	<control>
+0x8F	0x008F	#	<control>
+0x90	0x0090	#	<control>
+0x91	0x0091	#	<control>
+0x92	0x0092	#	<control>
+0x93	0x0093	#	<control>
+0x94	0x0094	#	<control>
+0x95	0x0095	#	<control>
+0x96	0x0096	#	<control>
+0x97	0x0097	#	<control>
+0x98	0x0098	#	<control>
+0x99	0x0099	#	<control>
+0x9A	0x009A	#	<control>
+0x9B	0x009B	#	<control>
+0x9C	0x009C	#	<control>
+0x9D	0x009D	#	<control>
+0x9E	0x009E	#	<control>
+0x9F	0x009F	#	<control>
+0xA0	0x00A0	#	NO-BREAK SPACE
+0xA1	0x00A1	#	INVERTED EXCLAMATION MARK
+0xA2	0x00A2	#	CENT SIGN
+0xA3	0x00A3	#	POUND SIGN
+0xA4	0x00A4	#	CURRENCY SIGN
+0xA5	0x00A5	#	YEN SIGN
+0xA6	0x00A6	#	BROKEN BAR
+0xA7	0x00A7	#	SECTION SIGN
+0xA8	0x00A8	#	DIAERESIS
+0xA9	0x00A9	#	COPYRIGHT SIGN
+0xAA	0x00AA	#	FEMININE ORDINAL INDICATOR
+0xAB	0x00AB	#	LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC	0x00AC	#	NOT SIGN
+0xAD	0x00AD	#	SOFT HYPHEN
+0xAE	0x00AE	#	REGISTERED SIGN
+0xAF	0x00AF	#	MACRON
+0xB0	0x00B0	#	DEGREE SIGN
+0xB1	0x00B1	#	PLUS-MINUS SIGN
+0xB2	0x00B2	#	SUPERSCRIPT TWO
+0xB3	0x00B3	#	SUPERSCRIPT THREE
+0xB4	0x00B4	#	ACUTE ACCENT
+0xB5	0x00B5	#	MICRO SIGN
+0xB6	0x00B6	#	PILCROW SIGN
+0xB7	0x00B7	#	MIDDLE DOT
+0xB8	0x00B8	#	CEDILLA
+0xB9	0x00B9	#	SUPERSCRIPT ONE
+0xBA	0x00BA	#	MASCULINE ORDINAL INDICATOR
+0xBB	0x00BB	#	RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC	0x00BC	#	VULGAR FRACTION ONE QUARTER
+0xBD	0x00BD	#	VULGAR FRACTION ONE HALF
+0xBE	0x00BE	#	VULGAR FRACTION THREE QUARTERS
+0xBF	0x00BF	#	INVERTED QUESTION MARK
+0xC0	0x00C0	#	LATIN CAPITAL LETTER A WITH GRAVE
+0xC1	0x00C1	#	LATIN CAPITAL LETTER A WITH ACUTE
+0xC2	0x00C2	#	LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3	0x00C3	#	LATIN CAPITAL LETTER A WITH TILDE
+0xC4	0x00C4	#	LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5	0x00C5	#	LATIN CAPITAL LETTER A WITH RING ABOVE
+0xC6	0x00C6	#	LATIN CAPITAL LETTER AE
+0xC7	0x00C7	#	LATIN CAPITAL LETTER C WITH CEDILLA
+0xC8	0x00C8	#	LATIN CAPITAL LETTER E WITH GRAVE
+0xC9	0x00C9	#	LATIN CAPITAL LETTER E WITH ACUTE
+0xCA	0x00CA	#	LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0xCB	0x00CB	#	LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC	0x00CC	#	LATIN CAPITAL LETTER I WITH GRAVE
+0xCD	0x00CD	#	LATIN CAPITAL LETTER I WITH ACUTE
+0xCE	0x00CE	#	LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF	0x00CF	#	LATIN CAPITAL LETTER I WITH DIAERESIS
+0xD0	0x011E	#	LATIN CAPITAL LETTER G WITH BREVE
+0xD1	0x00D1	#	LATIN CAPITAL LETTER N WITH TILDE
+0xD2	0x00D2	#	LATIN CAPITAL LETTER O WITH GRAVE
+0xD3	0x00D3	#	LATIN CAPITAL LETTER O WITH ACUTE
+0xD4	0x00D4	#	LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5	0x00D5	#	LATIN CAPITAL LETTER O WITH TILDE
+0xD6	0x00D6	#	LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7	0x00D7	#	MULTIPLICATION SIGN
+0xD8	0x00D8	#	LATIN CAPITAL LETTER O WITH STROKE
+0xD9	0x00D9	#	LATIN CAPITAL LETTER U WITH GRAVE
+0xDA	0x00DA	#	LATIN CAPITAL LETTER U WITH ACUTE
+0xDB	0x00DB	#	LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xDC	0x00DC	#	LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD	0x0130	#	LATIN CAPITAL LETTER I WITH DOT ABOVE
+0xDE	0x015E	#	LATIN CAPITAL LETTER S WITH CEDILLA
+0xDF	0x00DF	#	LATIN SMALL LETTER SHARP S
+0xE0	0x00E0	#	LATIN SMALL LETTER A WITH GRAVE
+0xE1	0x00E1	#	LATIN SMALL LETTER A WITH ACUTE
+0xE2	0x00E2	#	LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3	0x00E3	#	LATIN SMALL LETTER A WITH TILDE
+0xE4	0x00E4	#	LATIN SMALL LETTER A WITH DIAERESIS
+0xE5	0x00E5	#	LATIN SMALL LETTER A WITH RING ABOVE
+0xE6	0x00E6	#	LATIN SMALL LETTER AE
+0xE7	0x00E7	#	LATIN SMALL LETTER C WITH CEDILLA
+0xE8	0x00E8	#	LATIN SMALL LETTER E WITH GRAVE
+0xE9	0x00E9	#	LATIN SMALL LETTER E WITH ACUTE
+0xEA	0x00EA	#	LATIN SMALL LETTER E WITH CIRCUMFLEX
+0xEB	0x00EB	#	LATIN SMALL LETTER E WITH DIAERESIS
+0xEC	0x00EC	#	LATIN SMALL LETTER I WITH GRAVE
+0xED	0x00ED	#	LATIN SMALL LETTER I WITH ACUTE
+0xEE	0x00EE	#	LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF	0x00EF	#	LATIN SMALL LETTER I WITH DIAERESIS
+0xF0	0x011F	#	LATIN SMALL LETTER G WITH BREVE
+0xF1	0x00F1	#	LATIN SMALL LETTER N WITH TILDE
+0xF2	0x00F2	#	LATIN SMALL LETTER O WITH GRAVE
+0xF3	0x00F3	#	LATIN SMALL LETTER O WITH ACUTE
+0xF4	0x00F4	#	LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5	0x00F5	#	LATIN SMALL LETTER O WITH TILDE
+0xF6	0x00F6	#	LATIN SMALL LETTER O WITH DIAERESIS
+0xF7	0x00F7	#	DIVISION SIGN
+0xF8	0x00F8	#	LATIN SMALL LETTER O WITH STROKE
+0xF9	0x00F9	#	LATIN SMALL LETTER U WITH GRAVE
+0xFA	0x00FA	#	LATIN SMALL LETTER U WITH ACUTE
+0xFB	0x00FB	#	LATIN SMALL LETTER U WITH CIRCUMFLEX
+0xFC	0x00FC	#	LATIN SMALL LETTER U WITH DIAERESIS
+0xFD	0x0131	#	LATIN SMALL LETTER DOTLESS I
+0xFE	0x015F	#	LATIN SMALL LETTER S WITH CEDILLA
+0xFF	0x00FF	#	LATIN SMALL LETTER Y WITH DIAERESIS
diff --git a/ext/mbstring/tests/encoding_tests.inc b/ext/mbstring/tests/encoding_tests.inc
new file mode 100644
index 0000000000..43c6addee6
--- /dev/null
+++ b/ext/mbstring/tests/encoding_tests.inc
@@ -0,0 +1,98 @@
+<?php
+
+// Common code for tests which focus on conversion and verification of text
+// in some specific encoding
+
+function dbgPrint($str) {
+    $result = '';
+    if (mb_check_encoding($str, 'ASCII'))
+        $result .= '"' . $str . '" ';
+    return $result . "(" . bin2hex($str) . ")";
+}
+
+function identifyValidString($goodString, $encoding) {
+    $result = mb_check_encoding($goodString, $encoding);
+    if (!$result)
+        die("mb_check_encoding failed on good $encoding string: " . dbgPrint($goodString));
+}
+
+function identifyInvalidString($badString, $encoding) {
+    $result = mb_check_encoding($badString, $encoding);
+    if ($result)
+        die("mb_check_encoding passed on bad $encoding string: " . dbgPrint($badString));
+}
+
+function testConversion($fromString, $toString, $fromEncoding, $toEncoding) {
+    $result = mb_convert_encoding($fromString, $toEncoding, $fromEncoding);
+    if ($result !== $toString)
+        die("mb_convert_encoding not working on $fromEncoding input: " . dbgPrint($fromString) . "\nExpected $toEncoding: " . dbgPrint($toString) . "\nActually got: " . dbgPrint($result));
+}
+
+function testValidConversion($fromString, $toString, $fromEncoding, $toEncoding) {
+    $illegalChars = mb_get_info('illegal_chars');
+    testConversion($fromString, $toString, $fromEncoding, $toEncoding);
+    if (mb_get_info('illegal_chars') !== $illegalChars)
+        die("mb_convert_encoding incremented illegal_chars on valid $fromEncoding string: " . dbgPrint($fromString) . " when converting to $toEncoding");
+}
+
+function convertValidString($fromString, $toString, $fromEncoding, $toEncoding, $bothWays = true) {
+    testValidConversion($fromString, $toString, $fromEncoding, $toEncoding);
+    if ($bothWays)
+        testValidConversion($toString, $fromString, $toEncoding, $fromEncoding);
+}
+
+function convertInvalidString($fromString, $toString, $fromEncoding, $toEncoding) {
+    $illegalChars = mb_get_info('illegal_chars');
+    testConversion($fromString, $toString, $fromEncoding, $toEncoding);
+    if (mb_get_info('illegal_chars') <= $illegalChars)
+        die("mb_convert_encoding did not increment illegal_chars on invalid $fromEncoding string: " . dbgPrint($fromString) . " when converting to $toEncoding");
+}
+
+function testValidString($fromString, $toString, $fromEncoding, $toEncoding, $bothWays = true) {
+    identifyValidString($fromString, $fromEncoding);
+    convertValidString($fromString, $toString, $fromEncoding, $toEncoding, $bothWays);
+}
+
+function testInvalidString($fromString, $toString, $fromEncoding, $toEncoding) {
+    identifyInvalidString($fromString, $fromEncoding);
+    convertInvalidString($fromString, $toString, $fromEncoding, $toEncoding);
+}
+
+// Only for encodings where valid characters can be concatenated together in any
+// way, without any escape sequences
+// Also only for encodings which can be converted losslessly to and from $toEncoding
+function testAllValidChars($charMap, $fromEncoding, $toEncoding) {
+    $goodChars = array_keys($charMap);
+    shuffle($goodChars);
+    while (!empty($goodChars)) {
+        $length = min(rand(5,10), count($goodChars));
+        $fromString = $toString = '';
+        while ($length--) {
+            $goodChar = array_pop($goodChars);
+            $fromString .= $goodChar;
+            $toString .= $charMap[$goodChar];
+        }
+
+        testValidString($fromString, $toString, $fromEncoding, $toEncoding);
+    }
+}
+
+function testAllInvalidChars($badChars, $charMap, $fromEncoding, $toEncoding, $replacement) {
+    $badChars = array_keys($badChars);
+    shuffle($badChars);
+    $goodChars = array_keys($charMap);
+    shuffle($goodChars);
+    while (!empty($badChars)) {
+        if (empty($goodChars)) {
+            $goodChars = array_keys($charMap);
+            shuffle($goodChars);
+        }
+        $goodChar   = array_pop($goodChars);
+        $fromString = array_pop($badChars) . $goodChar;
+        $toString   = $replacement . $charMap[$goodChar];
+
+        testInvalidString($fromString, $toString, $fromEncoding, $toEncoding);
+    }
+}
+
+?>
diff --git a/ext/mbstring/tests/iso8859_encodings.phpt b/ext/mbstring/tests/iso8859_encodings.phpt
new file mode 100644
index 0000000000..2ad647a589
--- /dev/null
+++ b/ext/mbstring/tests/iso8859_encodings.phpt
@@ -0,0 +1,75 @@
+--TEST--
+Exhaustive test of verification and conversion of ISO-8859-X text
+--SKIPIF--
+<?php
+extension_loaded('mbstring') or die('skip mbstring not available');
+if (getenv("SKIP_SLOW_TESTS")) die("skip slow test");
+?>
+--FILE--
+<?php
+srand(678); // Make results consistent
+include('encoding_tests.inc');
+mb_substitute_character(0x25); // '%'
+
+function testValid($from, $to, $n) {
+    testValidString($from, $to, "ISO-8859-$n", 'UTF-16BE');
+}
+function testInvalid($from, $to, $n) {
+    testInvalidString($from, $to, "ISO-8859-$n", 'UTF-16BE');
+}
+
+for ($n = 1; $n <= 16; $n++) {
+    if ($n == 11 || $n == 12)
+        continue;
+
+    $toUnicode = array();
+    $fromUnicode = array();
+
+    $fp = fopen(realpath(__DIR__ . "/data/8859-$n.txt"), 'r+');
+    while ($line = fgets($fp, 256)) {
+        if ($line[0] == '#')
+            continue;
+
+        if (sscanf($line, "0x%x\t0x%x", $byte, $codepoint) == 2) {
+            $toUnicode[$byte] = pack('n', $codepoint);
+            $fromUnicode[$codepoint] = chr($byte);
+        }
+    }
+
+    // Try all ISO-8859-{$n} bytes (these are single-byte encodings)
+    for ($i = 0; $i < 256; $i++) {
+        if (isset($toUnicode[$i])) {
+            testValid(chr($i), $toUnicode[$i], $n);
+        } else {
+            testInvalid(chr($i), "\x00%", $n);
+        }
+    }
+
+    // Try all Unicode codepoints up to 0xFFFF
+    for ($i = 0; $i <= 0xFFFF; $i++) {
+        if (isset($fromUnicode[$i])) {
+            testValid($fromUnicode[$i], pack('n', $i), $n);
+        } else if (($i & 0xfc00) != 0xd800) {
+            convertInvalidString(pack('n', $i), "%", 'UTF-16BE', "ISO-8859-{$n}");
+        }
+    }
+
+    echo "All is fine and dandy for ISO-8859-$n text\n";
+}
+
+?>
+--EXPECT--
+All is fine and dandy for ISO-8859-1 text
+All is fine and dandy for ISO-8859-2 text
+All is fine and dandy for ISO-8859-3 text
+All is fine and dandy for ISO-8859-4 text
+All is fine and dandy for ISO-8859-5 text
+All is fine and dandy for ISO-8859-6 text
+All is fine and dandy for ISO-8859-7 text
+All is fine and dandy for ISO-8859-8 text
+All is fine and dandy for ISO-8859-9 text
+All is fine and dandy for ISO-8859-10 text
+All is fine and dandy for ISO-8859-13 text
+All is fine and dandy for ISO-8859-14 text
+All is fine and dandy for ISO-8859-15 text
+All is fine and dandy for ISO-8859-16 text