Skip to content

Commit 1710b64

Browse files
committed
MDEV-26743 InnoDB: CHAR+nopad does not work well
The patch for "MDEV-25440: Indexed CHAR ... broken with NO_PAD collations" fixed these scenarios from MDEV-26743: - Basic latin letter vs equal accented letter - Two letters vs equal (but space padded) expansion However, this scenario was still broken: - Basic latin letter (but followed by an ignorable character) vs equal accented letter Fix: When processing for a NOPAD collation a string with trailing ignorable characters, like: '<non-ignorable><ignorable><ignorable>' the string gets virtually converted to: '<non-ignorable><ignorable><ignorable><space><space><space>...' After the fix the code works differently in these two cases: 1. <space> fits into the "nchars" limit 2. <space> does not fit into the "nchars" limit Details: 1. If "nchars" is large enough (4+ in this example), return weights as follows: '[weight-for-non-ignorable, 1 char] [weight-for-space-character, 3 chars]' i.e. the weight for the virtual trailing space character now indicates that it corresponds to total 3 characters: - two ignorable characters - one virtual trailing space character 2. If "nchars" is small (3), then the virtual trailing space character does not fit into the "nchar" limit, so return 0x00 as weight, e.g.: '[weight-for-non-ignorable, 1 char] [0x00, 2 chars]' Adding corresponding MTR tests and unit tests.
1 parent d6872f9 commit 1710b64

File tree

4 files changed

+131
-1
lines changed

4 files changed

+131
-1
lines changed

mysql-test/suite/innodb/r/no_pad.result

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,49 @@ ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
55
INSERT INTO t1 VALUES ('',2);
66
ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
77
DROP TABLE t1;
8+
#
9+
# MDEV-26743 InnoDB: CHAR+nopad does not work well
10+
#
11+
#
12+
# Basic Latin letter vs equal accented letter
13+
#
14+
SET NAMES utf8mb3;
15+
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
16+
INSERT INTO t1 VALUES ('a'),('ä');
17+
ERROR 23000: Duplicate entry 'ä' for key 'PRIMARY'
18+
DROP TABLE t1;
19+
#
20+
# Two letters vs equal (but space padded) expansion
21+
#
22+
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
23+
INSERT INTO t1 VALUES ('ss'),('ß');
24+
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
25+
SELECT HEX(a) FROM t1;
26+
HEX(a)
27+
7373
28+
C39F20
29+
SET sql_mode=DEFAULT;
30+
DROP TABLE t1;
31+
#
32+
# Basic Latin letter (but followed by an ignorable character) vs equal accented letter
33+
#
34+
SET NAMES utf8mb3;
35+
CREATE TABLE t1 (a CHAR(3), PRIMARY KEY(a)) CHARACTER SET utf8mb3 COLLATE utf8mb3_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
36+
INSERT INTO t1 VALUES (CONCAT('a',_utf8mb3 0x01)),('ä');
37+
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
38+
SELECT HEX(a) FROM t1 ORDER BY HEX(a);
39+
HEX(a)
40+
610120
41+
C3A42020
42+
SET sql_mode=DEFAULT;
43+
DROP TABLE t1;
44+
SET NAMES utf8mb3;
45+
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
46+
INSERT INTO t1 VALUES (CONCAT('a',_utf8mb3 0x01)),('ä');
47+
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
48+
SELECT HEX(a) FROM t1 ORDER BY HEX(a);
49+
HEX(a)
50+
6101
51+
C3A420
52+
SET sql_mode=DEFAULT;
53+
DROP TABLE t1;

mysql-test/suite/innodb/t/no_pad.test

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,49 @@ ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
88
INSERT INTO t1 VALUES ('',2);
99
ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
1010
DROP TABLE t1;
11+
12+
13+
--echo #
14+
--echo # MDEV-26743 InnoDB: CHAR+nopad does not work well
15+
--echo #
16+
17+
--echo #
18+
--echo # Basic Latin letter vs equal accented letter
19+
--echo #
20+
21+
SET NAMES utf8mb3;
22+
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
23+
--error ER_DUP_ENTRY
24+
INSERT INTO t1 VALUES ('a'),('ä');
25+
DROP TABLE t1;
26+
27+
--echo #
28+
--echo # Two letters vs equal (but space padded) expansion
29+
--echo #
30+
31+
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
32+
INSERT INTO t1 VALUES ('ss'),('ß');
33+
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
34+
SELECT HEX(a) FROM t1;
35+
SET sql_mode=DEFAULT;
36+
DROP TABLE t1;
37+
38+
--echo #
39+
--echo # Basic Latin letter (but followed by an ignorable character) vs equal accented letter
40+
--echo #
41+
42+
SET NAMES utf8mb3;
43+
CREATE TABLE t1 (a CHAR(3), PRIMARY KEY(a)) CHARACTER SET utf8mb3 COLLATE utf8mb3_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
44+
INSERT INTO t1 VALUES (CONCAT('a',_utf8mb3 0x01)),('ä');
45+
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
46+
SELECT HEX(a) FROM t1 ORDER BY HEX(a);
47+
SET sql_mode=DEFAULT;
48+
DROP TABLE t1;
49+
50+
SET NAMES utf8mb3;
51+
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
52+
INSERT INTO t1 VALUES (CONCAT('a',_utf8mb3 0x01)),('ä');
53+
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
54+
SELECT HEX(a) FROM t1 ORDER BY HEX(a);
55+
SET sql_mode=DEFAULT;
56+
DROP TABLE t1;

strings/ctype-uca.inl

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,8 +335,20 @@ MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
335335
flags & MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES ?
336336
my_space_weight(scanner->level) : 0;
337337

338-
res.nchars= 1;
339338
(*generated)++;
339+
res.nchars++; /* Count all ignorable characters and the padded space */
340+
if (res.nchars > nchars)
341+
{
342+
/*
343+
We scanned a number of ignorable characters at the end of the
344+
string and reached the "nchars" limit, so the virtual padded space
345+
does not fit. This is possible with CONCAT('a', x'00') with
346+
nchars=2 on the second iteration when we scan the x'00'.
347+
*/
348+
if (scanner->cs->state & MY_CS_NOPAD)
349+
res.weight= 0;
350+
res.nchars= (uint) nchars;
351+
}
340352
}
341353
else if (res.nchars > nchars)
342354
{

unittest/strings/strings-t.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,19 @@ static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_ci[]=
911911
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 4, TCHAR, 0},
912912
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, TCHAR, 0},
913913

914+
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0},
915+
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0},
916+
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, 0},
917+
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, 0},
918+
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0},
919+
920+
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0},
921+
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0},
922+
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, 0},
923+
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, 0},
924+
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 4, TCHAR, 0},
925+
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0},
926+
914927
{{NULL, 0}, {NULL, 0}, 0, 0, 0}
915928
};
916929

@@ -938,6 +951,19 @@ static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_nopad_ci[]=
938951
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 4, TVCHAR, 0},
939952
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, TVCHAR, 0},
940953

954+
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0},
955+
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0},
956+
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, -1},
957+
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, 0},
958+
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0},
959+
960+
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0},
961+
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0},
962+
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, -1},
963+
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, -1},
964+
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 4, TCHAR, 0},
965+
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0},
966+
941967
{{NULL, 0}, {NULL, 0}, 0, 0, 0}
942968
};
943969

0 commit comments

Comments
 (0)