# MySQL bug#43593 dump/backup/restore/upgrade tools fails # # The patch is intended to help to those users: # - Having indexes on columns with collations # utf8_general_ci or ucs2_general_ci # - Having German letter SHARP S (SZLIG) in these columns # - Upgrading from MySQL from versions 5.0.x or # 5.1.23 (and earlier) to version 5.1.24 (and higher). # # This patch introduces new collations utf8_general50_ci # and ucs2_general50_ci which reproduce the "old" # sorting order provided by pre-5.1.24 versions of xxx_general_ci. # # In order to start using new MySQL-5.1.24+ please do the following: # # - Start new version of mysqld # - Convert all affected tables using this query (in case of utf8): # # ALTER TABLE t1 CONVERT TO CHARACTER SET utf8 COLLATE utf8_general50_ci; # # Or if you need to apply changes per-column level, use this example: # # ALTER TABLE t1 MODIFY c1 CHAR(N) CHARACTER SET utf8 COLLATE utf8_general50_ci; # # (Make sure you're using the old data type and size, # NULL/NOT NULL constraints, etc). # === modified file 'mysys/charset-def.c' --- a/mysys/charset-def.c +++ b/mysys/charset-def.c @@ -22,6 +22,9 @@ init_compiled_charsets() that only adds those that he wants */ +extern CHARSET_INFO my_charset_ucs2_general50_ci; +extern CHARSET_INFO my_charset_utf8_general50_ci; + #ifdef HAVE_UCA_COLLATIONS #ifdef HAVE_CHARSET_ucs2 @@ -205,6 +208,7 @@ add_compiled_collation(&my_charset_ucs2_general_ci); add_compiled_collation(&my_charset_ucs2_bin); add_compiled_collation(&my_charset_ucs2_general_mysql500_ci); + add_compiled_collation(&my_charset_ucs2_general50_ci); #ifdef HAVE_UCA_COLLATIONS add_compiled_collation(&my_charset_ucs2_unicode_ci); add_compiled_collation(&my_charset_ucs2_icelandic_uca_ci); @@ -238,6 +242,7 @@ add_compiled_collation(&my_charset_utf8_general_ci); add_compiled_collation(&my_charset_utf8_bin); add_compiled_collation(&my_charset_utf8_general_mysql500_ci); + add_compiled_collation(&my_charset_utf8_general50_ci); #ifdef HAVE_UTF8_GENERAL_CS add_compiled_collation(&my_charset_utf8_general_cs); #endif --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -3188,6 +3188,42 @@ }; + +extern MY_UNICASE_INFO *my_unicase_general50[256]; + +CHARSET_INFO my_charset_ucs2_general50_ci= +{ + 159,0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + "ucs2", /* cs name */ + "ucs2_general50_ci", /* name */ + "", /* comment */ + NULL, /* tailoring */ + ctype_ucs2, /* ctype */ + to_lower_ucs2, /* to_lower */ + to_upper_ucs2, /* to_upper */ + to_upper_ucs2, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_general50, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 2, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_ucs2_handler, + &my_collation_ucs2_general_ci_handler +}; + + CHARSET_INFO my_charset_ucs2_bin= { 90,0,0, /* number */ --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -192,6 +192,138 @@ {0x00DE,0x00FE,0x00DE}, {0x0178,0x00FF,0x0059} }; +static MY_UNICASE_INFO plane00_general50[]={ + {0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001}, + {0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003}, + {0x0004,0x0004,0x0004}, {0x0005,0x0005,0x0005}, + {0x0006,0x0006,0x0006}, {0x0007,0x0007,0x0007}, + {0x0008,0x0008,0x0008}, {0x0009,0x0009,0x0009}, + {0x000A,0x000A,0x000A}, {0x000B,0x000B,0x000B}, + {0x000C,0x000C,0x000C}, {0x000D,0x000D,0x000D}, + {0x000E,0x000E,0x000E}, {0x000F,0x000F,0x000F}, + {0x0010,0x0010,0x0010}, {0x0011,0x0011,0x0011}, + {0x0012,0x0012,0x0012}, {0x0013,0x0013,0x0013}, + {0x0014,0x0014,0x0014}, {0x0015,0x0015,0x0015}, + {0x0016,0x0016,0x0016}, {0x0017,0x0017,0x0017}, + {0x0018,0x0018,0x0018}, {0x0019,0x0019,0x0019}, + {0x001A,0x001A,0x001A}, {0x001B,0x001B,0x001B}, + {0x001C,0x001C,0x001C}, {0x001D,0x001D,0x001D}, + {0x001E,0x001E,0x001E}, {0x001F,0x001F,0x001F}, + {0x0020,0x0020,0x0020}, {0x0021,0x0021,0x0021}, + {0x0022,0x0022,0x0022}, {0x0023,0x0023,0x0023}, + {0x0024,0x0024,0x0024}, {0x0025,0x0025,0x0025}, + {0x0026,0x0026,0x0026}, {0x0027,0x0027,0x0027}, + {0x0028,0x0028,0x0028}, {0x0029,0x0029,0x0029}, + {0x002A,0x002A,0x002A}, {0x002B,0x002B,0x002B}, + {0x002C,0x002C,0x002C}, {0x002D,0x002D,0x002D}, + {0x002E,0x002E,0x002E}, {0x002F,0x002F,0x002F}, + {0x0030,0x0030,0x0030}, {0x0031,0x0031,0x0031}, + {0x0032,0x0032,0x0032}, {0x0033,0x0033,0x0033}, + {0x0034,0x0034,0x0034}, {0x0035,0x0035,0x0035}, + {0x0036,0x0036,0x0036}, {0x0037,0x0037,0x0037}, + {0x0038,0x0038,0x0038}, {0x0039,0x0039,0x0039}, + {0x003A,0x003A,0x003A}, {0x003B,0x003B,0x003B}, + {0x003C,0x003C,0x003C}, {0x003D,0x003D,0x003D}, + {0x003E,0x003E,0x003E}, {0x003F,0x003F,0x003F}, + {0x0040,0x0040,0x0040}, {0x0041,0x0061,0x0041}, + {0x0042,0x0062,0x0042}, {0x0043,0x0063,0x0043}, + {0x0044,0x0064,0x0044}, {0x0045,0x0065,0x0045}, + {0x0046,0x0066,0x0046}, {0x0047,0x0067,0x0047}, + {0x0048,0x0068,0x0048}, {0x0049,0x0069,0x0049}, + {0x004A,0x006A,0x004A}, {0x004B,0x006B,0x004B}, + {0x004C,0x006C,0x004C}, {0x004D,0x006D,0x004D}, + {0x004E,0x006E,0x004E}, {0x004F,0x006F,0x004F}, + {0x0050,0x0070,0x0050}, {0x0051,0x0071,0x0051}, + {0x0052,0x0072,0x0052}, {0x0053,0x0073,0x0053}, + {0x0054,0x0074,0x0054}, {0x0055,0x0075,0x0055}, + {0x0056,0x0076,0x0056}, {0x0057,0x0077,0x0057}, + {0x0058,0x0078,0x0058}, {0x0059,0x0079,0x0059}, + {0x005A,0x007A,0x005A}, {0x005B,0x005B,0x005B}, + {0x005C,0x005C,0x005C}, {0x005D,0x005D,0x005D}, + {0x005E,0x005E,0x005E}, {0x005F,0x005F,0x005F}, + {0x0060,0x0060,0x0060}, {0x0041,0x0061,0x0041}, + {0x0042,0x0062,0x0042}, {0x0043,0x0063,0x0043}, + {0x0044,0x0064,0x0044}, {0x0045,0x0065,0x0045}, + {0x0046,0x0066,0x0046}, {0x0047,0x0067,0x0047}, + {0x0048,0x0068,0x0048}, {0x0049,0x0069,0x0049}, + {0x004A,0x006A,0x004A}, {0x004B,0x006B,0x004B}, + {0x004C,0x006C,0x004C}, {0x004D,0x006D,0x004D}, + {0x004E,0x006E,0x004E}, {0x004F,0x006F,0x004F}, + {0x0050,0x0070,0x0050}, {0x0051,0x0071,0x0051}, + {0x0052,0x0072,0x0052}, {0x0053,0x0073,0x0053}, + {0x0054,0x0074,0x0054}, {0x0055,0x0075,0x0055}, + {0x0056,0x0076,0x0056}, {0x0057,0x0077,0x0057}, + {0x0058,0x0078,0x0058}, {0x0059,0x0079,0x0059}, + {0x005A,0x007A,0x005A}, {0x007B,0x007B,0x007B}, + {0x007C,0x007C,0x007C}, {0x007D,0x007D,0x007D}, + {0x007E,0x007E,0x007E}, {0x007F,0x007F,0x007F}, + {0x0080,0x0080,0x0080}, {0x0081,0x0081,0x0081}, + {0x0082,0x0082,0x0082}, {0x0083,0x0083,0x0083}, + {0x0084,0x0084,0x0084}, {0x0085,0x0085,0x0085}, + {0x0086,0x0086,0x0086}, {0x0087,0x0087,0x0087}, + {0x0088,0x0088,0x0088}, {0x0089,0x0089,0x0089}, + {0x008A,0x008A,0x008A}, {0x008B,0x008B,0x008B}, + {0x008C,0x008C,0x008C}, {0x008D,0x008D,0x008D}, + {0x008E,0x008E,0x008E}, {0x008F,0x008F,0x008F}, + {0x0090,0x0090,0x0090}, {0x0091,0x0091,0x0091}, + {0x0092,0x0092,0x0092}, {0x0093,0x0093,0x0093}, + {0x0094,0x0094,0x0094}, {0x0095,0x0095,0x0095}, + {0x0096,0x0096,0x0096}, {0x0097,0x0097,0x0097}, + {0x0098,0x0098,0x0098}, {0x0099,0x0099,0x0099}, + {0x009A,0x009A,0x009A}, {0x009B,0x009B,0x009B}, + {0x009C,0x009C,0x009C}, {0x009D,0x009D,0x009D}, + {0x009E,0x009E,0x009E}, {0x009F,0x009F,0x009F}, + {0x00A0,0x00A0,0x00A0}, {0x00A1,0x00A1,0x00A1}, + {0x00A2,0x00A2,0x00A2}, {0x00A3,0x00A3,0x00A3}, + {0x00A4,0x00A4,0x00A4}, {0x00A5,0x00A5,0x00A5}, + {0x00A6,0x00A6,0x00A6}, {0x00A7,0x00A7,0x00A7}, + {0x00A8,0x00A8,0x00A8}, {0x00A9,0x00A9,0x00A9}, + {0x00AA,0x00AA,0x00AA}, {0x00AB,0x00AB,0x00AB}, + {0x00AC,0x00AC,0x00AC}, {0x00AD,0x00AD,0x00AD}, + {0x00AE,0x00AE,0x00AE}, {0x00AF,0x00AF,0x00AF}, + {0x00B0,0x00B0,0x00B0}, {0x00B1,0x00B1,0x00B1}, + {0x00B2,0x00B2,0x00B2}, {0x00B3,0x00B3,0x00B3}, + {0x00B4,0x00B4,0x00B4}, {0x039C,0x00B5,0x039C}, + {0x00B6,0x00B6,0x00B6}, {0x00B7,0x00B7,0x00B7}, + {0x00B8,0x00B8,0x00B8}, {0x00B9,0x00B9,0x00B9}, + {0x00BA,0x00BA,0x00BA}, {0x00BB,0x00BB,0x00BB}, + {0x00BC,0x00BC,0x00BC}, {0x00BD,0x00BD,0x00BD}, + {0x00BE,0x00BE,0x00BE}, {0x00BF,0x00BF,0x00BF}, + {0x00C0,0x00E0,0x0041}, {0x00C1,0x00E1,0x0041}, + {0x00C2,0x00E2,0x0041}, {0x00C3,0x00E3,0x0041}, + {0x00C4,0x00E4,0x0041}, {0x00C5,0x00E5,0x0041}, + {0x00C6,0x00E6,0x00C6}, {0x00C7,0x00E7,0x0043}, + {0x00C8,0x00E8,0x0045}, {0x00C9,0x00E9,0x0045}, + {0x00CA,0x00EA,0x0045}, {0x00CB,0x00EB,0x0045}, + {0x00CC,0x00EC,0x0049}, {0x00CD,0x00ED,0x0049}, + {0x00CE,0x00EE,0x0049}, {0x00CF,0x00EF,0x0049}, + {0x00D0,0x00F0,0x00D0}, {0x00D1,0x00F1,0x004E}, + {0x00D2,0x00F2,0x004F}, {0x00D3,0x00F3,0x004F}, + {0x00D4,0x00F4,0x004F}, {0x00D5,0x00F5,0x004F}, + {0x00D6,0x00F6,0x004F}, {0x00D7,0x00D7,0x00D7}, + {0x00D8,0x00F8,0x00D8}, {0x00D9,0x00F9,0x0055}, + {0x00DA,0x00FA,0x0055}, {0x00DB,0x00FB,0x0055}, + {0x00DC,0x00FC,0x0055}, {0x00DD,0x00FD,0x0059}, + {0x00DE,0x00FE,0x00DE}, {0x00DF,0x00DF,0x00DF}, + {0x00C0,0x00E0,0x0041}, {0x00C1,0x00E1,0x0041}, + {0x00C2,0x00E2,0x0041}, {0x00C3,0x00E3,0x0041}, + {0x00C4,0x00E4,0x0041}, {0x00C5,0x00E5,0x0041}, + {0x00C6,0x00E6,0x00C6}, {0x00C7,0x00E7,0x0043}, + {0x00C8,0x00E8,0x0045}, {0x00C9,0x00E9,0x0045}, + {0x00CA,0x00EA,0x0045}, {0x00CB,0x00EB,0x0045}, + {0x00CC,0x00EC,0x0049}, {0x00CD,0x00ED,0x0049}, + {0x00CE,0x00EE,0x0049}, {0x00CF,0x00EF,0x0049}, + {0x00D0,0x00F0,0x00D0}, {0x00D1,0x00F1,0x004E}, + {0x00D2,0x00F2,0x004F}, {0x00D3,0x00F3,0x004F}, + {0x00D4,0x00F4,0x004F}, {0x00D5,0x00F5,0x004F}, + {0x00D6,0x00F6,0x004F}, {0x00F7,0x00F7,0x00F7}, + {0x00D8,0x00F8,0x00D8}, {0x00D9,0x00F9,0x0055}, + {0x00DA,0x00FA,0x0055}, {0x00DB,0x00FB,0x0055}, + {0x00DC,0x00FC,0x0055}, {0x00DD,0x00FD,0x0059}, + {0x00DE,0x00FE,0x00DE}, {0x0178,0x00FF,0x0059} +}; + + /* Almost similar to plane00, but maps sorting order @@ -1718,6 +1850,48 @@ /* + general50: to reproduce old utf8_general_ci behaviour + before we fixed Bug#27877. +*/ +MY_UNICASE_INFO *my_unicase_general50[256]={ + plane00_general50, + plane01, plane02, plane03, plane04, plane05, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, plane1E, plane1F, + NULL, plane21, NULL, NULL, plane24, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, planeFF + +}; + + +/* Turkish lower/upper mapping: 1. LOWER(0x0049 LATIN CAPITAL LETTER I) -> 0x0131 LATIN SMALL LETTER DOTLESS I @@ -3023,6 +3197,39 @@ }; +CHARSET_INFO my_charset_utf8_general50_ci= +{ + 253,0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, /* state */ + "utf8", /* cs name */ + "utf8_general50_ci", /* name */ + "", /* comment */ + NULL, /* tailoring */ + ctype_utf8, /* ctype */ + to_lower_utf8, /* to_lower */ + to_upper_utf8, /* to_upper */ + to_upper_utf8, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_general50, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 3, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8_handler, + &my_collation_ci_handler +}; + + CHARSET_INFO my_charset_utf8_bin= { 83,0,0, /* number */ --- /dev/null +++ b/mysql-test/t/percona_ucs2_general50_ci.test @@ -0,0 +1,12 @@ +# +# Test that ucs2_general50_ci provides pre-5.1.24 utf8_general_ci behavior, +# i.e. SHARP S is only equal to itself. +# + +--source include/have_ucs2.inc + +SET NAMES latin1; + +SET collation_connection='ucs2_general50_ci'; + +--source include/ctype_german.inc --- /dev/null +++ b/mysql-test/t/percona_utf8_general50_ci.test @@ -0,0 +1,9 @@ +# +# Test that utf8_general50_ci provides pre-5.1.24 utf8_general_ci behavior, +# i.e. SHARP S is only equal to itself. +# + +SET NAMES utf8; + +SET collation_connection='utf8_general50_ci'; +--source include/ctype_german.inc --- /dev/null +++ b/mysql-test/r/percona_utf8_general50_ci.result @@ -0,0 +1,38 @@ +SET NAMES utf8; +SET collation_connection='utf8_general50_ci'; +drop table if exists t1; +create table t1 as select repeat(' ', 64) as s1; +select collation(s1) from t1; +collation(s1) +utf8_general50_ci +delete from t1; +insert into t1 values ('a'),('ae'),(_latin1 0xE4); +insert into t1 values ('o'),('oe'),(_latin1 0xF6); +insert into t1 values ('s'),('ss'),(_latin1 0xDF); +insert into t1 values ('u'),('ue'),(_latin1 0xFC); +select s1, hex(s1) from t1 order by s1, binary s1; +s1 hex(s1) +a 61 +ä C3A4 +ae 6165 +o 6F +ö C3B6 +oe 6F65 +s 73 +ss 7373 +u 75 +ü C3BC +ue 7565 +ß C39F +select group_concat(s1 order by binary s1) from t1 group by s1; +group_concat(s1 order by binary s1) +a,ä +ae +o,ö +oe +s +ss +u,ü +ue +ß +drop table t1; Binary files /dev/null and b/mysql-test/r/percona_ucs2_general50_ci.result differ