Skip to content

Commit 30b4bb4

Browse files
committed
MDEV-31068 Reuse duplicate case conversion code in ctype-utf8.c and ctype-ucs2.c
1 parent 2230c2e commit 30b4bb4

File tree

3 files changed

+78
-118
lines changed

3 files changed

+78
-118
lines changed

strings/ctype-ucs2.c

Lines changed: 6 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,24 +1284,6 @@ my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)),
12841284
const char charset_name_utf16le[]= "utf16le";
12851285
#define charset_name_utf16le_length (sizeof(charset_name_utf16le)-1)
12861286

1287-
static inline void
1288-
my_tolower_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1289-
{
1290-
MY_UNICASE_CHARACTER *page;
1291-
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1292-
*wc= page[*wc & 0xFF].tolower;
1293-
}
1294-
1295-
1296-
static inline void
1297-
my_toupper_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1298-
{
1299-
MY_UNICASE_CHARACTER *page;
1300-
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1301-
*wc= page[*wc & 0xFF].toupper;
1302-
}
1303-
1304-
13051287
static inline void
13061288
my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
13071289
{
@@ -1335,7 +1317,7 @@ my_caseup_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
13351317
while ((src < srcend) &&
13361318
(res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
13371319
{
1338-
my_toupper_utf16(uni_plane, &wc);
1320+
my_toupper_unicode(uni_plane, &wc);
13391321
if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend))
13401322
break;
13411323
src+= res;
@@ -1393,7 +1375,7 @@ my_casedn_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
13931375
while ((src < srcend) &&
13941376
(res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
13951377
{
1396-
my_tolower_utf16(uni_plane, &wc);
1378+
my_tolower_unicode(uni_plane, &wc);
13971379
if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend))
13981380
break;
13991381
src+= res;
@@ -2196,24 +2178,6 @@ my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
21962178
}
21972179

21982180

2199-
static inline void
2200-
my_tolower_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2201-
{
2202-
MY_UNICASE_CHARACTER *page;
2203-
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
2204-
*wc= page[*wc & 0xFF].tolower;
2205-
}
2206-
2207-
2208-
static inline void
2209-
my_toupper_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2210-
{
2211-
MY_UNICASE_CHARACTER *page;
2212-
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
2213-
*wc= page[*wc & 0xFF].toupper;
2214-
}
2215-
2216-
22172181
static inline void
22182182
my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
22192183
{
@@ -2256,7 +2220,7 @@ my_caseup_utf32(CHARSET_INFO *cs, const char *src, size_t srclen,
22562220
while ((src < srcend) &&
22572221
(res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
22582222
{
2259-
my_toupper_utf32(uni_plane, &wc);
2223+
my_toupper_unicode(uni_plane, &wc);
22602224
if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend))
22612225
break;
22622226
src+= res;
@@ -2312,7 +2276,7 @@ my_casedn_utf32(CHARSET_INFO *cs, const char *src, size_t srclen,
23122276

23132277
while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
23142278
{
2315-
my_tolower_utf32(uni_plane,&wc);
2279+
my_tolower_unicode(uni_plane,&wc);
23162280
if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend))
23172281
break;
23182282
src+= res;
@@ -3118,24 +3082,6 @@ static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
31183082
}
31193083

31203084

3121-
static inline void
3122-
my_tolower_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
3123-
{
3124-
MY_UNICASE_CHARACTER *page;
3125-
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
3126-
*wc= page[*wc & 0xFF].tolower;
3127-
}
3128-
3129-
3130-
static inline void
3131-
my_toupper_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
3132-
{
3133-
MY_UNICASE_CHARACTER *page;
3134-
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
3135-
*wc= page[*wc & 0xFF].toupper;
3136-
}
3137-
3138-
31393085
static inline void
31403086
my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
31413087
{
@@ -3157,7 +3103,7 @@ static size_t my_caseup_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
31573103
while ((src < srcend) &&
31583104
(res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
31593105
{
3160-
my_toupper_ucs2(uni_plane, &wc);
3106+
my_toupper_unicode_bmp(uni_plane, &wc);
31613107
if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend))
31623108
break;
31633109
src+= res;
@@ -3208,7 +3154,7 @@ static size_t my_casedn_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
32083154
while ((src < srcend) &&
32093155
(res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
32103156
{
3211-
my_tolower_ucs2(uni_plane, &wc);
3157+
my_tolower_unicode_bmp(uni_plane, &wc);
32123158
if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend))
32133159
break;
32143160
src+= res;

strings/ctype-unidata.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,62 @@ extern MY_UNICASE_CHARACTER *my_unicase_default_pages[256];
2424
extern MY_UNICASE_CHARACTER my_unicase_mysql500_page00[256];
2525
extern MY_UNICASE_CHARACTER *my_unicase_mysql500_pages[256];
2626

27+
28+
static inline my_wc_t my_u300_tolower_7bit(uchar ch)
29+
{
30+
return my_unicase_default_page00[ch].tolower;
31+
}
32+
33+
static inline my_wc_t my_u300_toupper_7bit(uchar ch)
34+
{
35+
return my_unicase_default_page00[ch].toupper;
36+
}
37+
38+
39+
static inline void
40+
my_tolower_unicode_bmp(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
41+
{
42+
const MY_UNICASE_CHARACTER *page;
43+
DBUG_ASSERT(*wc <= uni_plane->maxchar);
44+
if ((page= uni_plane->page[*wc >> 8]))
45+
*wc= page[*wc & 0xFF].tolower;
46+
}
47+
48+
49+
static inline void
50+
my_toupper_unicode_bmp(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
51+
{
52+
const MY_UNICASE_CHARACTER *page;
53+
DBUG_ASSERT(*wc <= uni_plane->maxchar);
54+
if ((page= uni_plane->page[*wc >> 8]))
55+
*wc= page[*wc & 0xFF].toupper;
56+
}
57+
58+
59+
static inline void
60+
my_tolower_unicode(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
61+
{
62+
if (*wc <= uni_plane->maxchar)
63+
{
64+
const MY_UNICASE_CHARACTER *page;
65+
if ((page= uni_plane->page[(*wc >> 8)]))
66+
*wc= page[*wc & 0xFF].tolower;
67+
}
68+
}
69+
70+
71+
static inline void
72+
my_toupper_unicode(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
73+
{
74+
if (*wc <= uni_plane->maxchar)
75+
{
76+
const MY_UNICASE_CHARACTER *page;
77+
if ((page= uni_plane->page[(*wc >> 8)]))
78+
*wc= page[*wc & 0xFF].toupper;
79+
}
80+
}
81+
82+
2783
size_t my_strxfrm_pad_nweights_unicode(uchar *str, uchar *strend, size_t nweights);
2884
size_t my_strxfrm_pad_unicode(uchar *str, uchar *strend);
2985

strings/ctype-utf8.c

Lines changed: 16 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -5207,24 +5207,6 @@ static int my_uni_utf8mb3_no_range(CHARSET_INFO *cs __attribute__((unused)),
52075207
}
52085208

52095209

5210-
static inline void
5211-
my_tolower_utf8mb3(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
5212-
{
5213-
MY_UNICASE_CHARACTER *page;
5214-
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
5215-
*wc= page[*wc & 0xFF].tolower;
5216-
}
5217-
5218-
5219-
static inline void
5220-
my_toupper_utf8mb3(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
5221-
{
5222-
MY_UNICASE_CHARACTER *page;
5223-
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
5224-
*wc= page[*wc & 0xFF].toupper;
5225-
}
5226-
5227-
52285210
static size_t my_caseup_utf8mb3(CHARSET_INFO *cs,
52295211
const char *src, size_t srclen,
52305212
char *dst, size_t dstlen)
@@ -5239,7 +5221,7 @@ static size_t my_caseup_utf8mb3(CHARSET_INFO *cs,
52395221
while ((src < srcend) &&
52405222
(srcres= my_utf8mb3_uni(cs, &wc, (uchar *) src, (uchar*) srcend)) > 0)
52415223
{
5242-
my_toupper_utf8mb3(uni_plane, &wc);
5224+
my_toupper_unicode_bmp(uni_plane, &wc);
52435225
if ((dstres= my_uni_utf8mb3(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0)
52445226
break;
52455227
src+= srcres;
@@ -5292,7 +5274,7 @@ static size_t my_caseup_str_utf8mb3(CHARSET_INFO *cs, char *src)
52925274
while (*src &&
52935275
(srcres= my_utf8mb3_uni_no_range(cs, &wc, (uchar *) src)) > 0)
52945276
{
5295-
my_toupper_utf8mb3(uni_plane, &wc);
5277+
my_toupper_unicode_bmp(uni_plane, &wc);
52965278
if ((dstres= my_uni_utf8mb3_no_range(cs, wc, (uchar*) dst)) <= 0)
52975279
break;
52985280
src+= srcres;
@@ -5317,7 +5299,7 @@ static size_t my_casedn_utf8mb3(CHARSET_INFO *cs,
53175299
while ((src < srcend) &&
53185300
(srcres= my_utf8mb3_uni(cs, &wc, (uchar*) src, (uchar*)srcend)) > 0)
53195301
{
5320-
my_tolower_utf8mb3(uni_plane, &wc);
5302+
my_tolower_unicode_bmp(uni_plane, &wc);
53215303
if ((dstres= my_uni_utf8mb3(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0)
53225304
break;
53235305
src+= srcres;
@@ -5338,7 +5320,7 @@ static size_t my_casedn_str_utf8mb3(CHARSET_INFO *cs, char *src)
53385320
while (*src &&
53395321
(srcres= my_utf8mb3_uni_no_range(cs, &wc, (uchar *) src)) > 0)
53405322
{
5341-
my_tolower_utf8mb3(uni_plane, &wc);
5323+
my_tolower_unicode_bmp(uni_plane, &wc);
53425324
if ((dstres= my_uni_utf8mb3_no_range(cs, wc, (uchar*) dst)) <= 0)
53435325
break;
53445326
src+= srcres;
@@ -5397,7 +5379,7 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t)
53975379
It represents a single byte character.
53985380
Convert it into weight according to collation.
53995381
*/
5400-
s_wc= my_unicase_default_page00[(uchar) s[0]].tolower;
5382+
s_wc= my_u300_tolower_7bit((uchar) s[0]);
54015383
s++;
54025384
}
54035385
else
@@ -5430,7 +5412,7 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t)
54305412
s+= res;
54315413

54325414
/* Convert Unicode code into weight according to collation */
5433-
my_tolower_utf8mb3(uni_plane, &s_wc);
5415+
my_tolower_unicode_bmp(uni_plane, &s_wc);
54345416
}
54355417

54365418

@@ -5439,7 +5421,7 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t)
54395421
if ((uchar) t[0] < 128)
54405422
{
54415423
/* Convert single byte character into weight */
5442-
t_wc= my_unicase_default_page00[(uchar) t[0]].tolower;
5424+
t_wc= my_u300_tolower_7bit((uchar) t[0]);
54435425
t++;
54445426
}
54455427
else
@@ -5450,7 +5432,7 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t)
54505432
t+= res;
54515433

54525434
/* Convert code into weight */
5453-
my_tolower_utf8mb3(uni_plane, &t_wc);
5435+
my_tolower_unicode_bmp(uni_plane, &t_wc);
54545436
}
54555437

54565438
/* Now we have two weights, let's compare them */
@@ -7678,30 +7660,6 @@ my_wc_mb_utf8mb4_no_range(CHARSET_INFO *cs __attribute__((unused)),
76787660
}
76797661

76807662

7681-
static inline void
7682-
my_tolower_utf8mb4(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
7683-
{
7684-
if (*wc <= uni_plane->maxchar)
7685-
{
7686-
MY_UNICASE_CHARACTER *page;
7687-
if ((page= uni_plane->page[(*wc >> 8)]))
7688-
*wc= page[*wc & 0xFF].tolower;
7689-
}
7690-
}
7691-
7692-
7693-
static inline void
7694-
my_toupper_utf8mb4(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
7695-
{
7696-
if (*wc <= uni_plane->maxchar)
7697-
{
7698-
MY_UNICASE_CHARACTER *page;
7699-
if ((page= uni_plane->page[(*wc >> 8)]))
7700-
*wc= page[*wc & 0xFF].toupper;
7701-
}
7702-
}
7703-
7704-
77057663
static size_t
77067664
my_caseup_utf8mb4(CHARSET_INFO *cs, const char *src, size_t srclen,
77077665
char *dst, size_t dstlen)
@@ -7717,7 +7675,7 @@ my_caseup_utf8mb4(CHARSET_INFO *cs, const char *src, size_t srclen,
77177675
(srcres= my_mb_wc_utf8mb4(cs, &wc,
77187676
(uchar *) src, (uchar*) srcend)) > 0)
77197677
{
7720-
my_toupper_utf8mb4(uni_plane, &wc);
7678+
my_toupper_unicode(uni_plane, &wc);
77217679
if ((dstres= my_wc_mb_utf8mb4(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0)
77227680
break;
77237681
src+= srcres;
@@ -7784,7 +7742,7 @@ my_caseup_str_utf8mb4(CHARSET_INFO *cs, char *src)
77847742
while (*src &&
77857743
(srcres= my_mb_wc_utf8mb4_no_range(cs, &wc, (uchar *) src)) > 0)
77867744
{
7787-
my_toupper_utf8mb4(uni_plane, &wc);
7745+
my_toupper_unicode(uni_plane, &wc);
77887746
if ((dstres= my_wc_mb_utf8mb4_no_range(cs, wc, (uchar*) dst)) <= 0)
77897747
break;
77907748
src+= srcres;
@@ -7811,7 +7769,7 @@ my_casedn_utf8mb4(CHARSET_INFO *cs,
78117769
(srcres= my_mb_wc_utf8mb4(cs, &wc,
78127770
(uchar*) src, (uchar*) srcend)) > 0)
78137771
{
7814-
my_tolower_utf8mb4(uni_plane, &wc);
7772+
my_tolower_unicode(uni_plane, &wc);
78157773
if ((dstres= my_wc_mb_utf8mb4(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0)
78167774
break;
78177775
src+= srcres;
@@ -7833,7 +7791,7 @@ my_casedn_str_utf8mb4(CHARSET_INFO *cs, char *src)
78337791
while (*src &&
78347792
(srcres= my_mb_wc_utf8mb4_no_range(cs, &wc, (uchar *) src)) > 0)
78357793
{
7836-
my_tolower_utf8mb4(uni_plane, &wc);
7794+
my_tolower_unicode(uni_plane, &wc);
78377795
if ((dstres= my_wc_mb_utf8mb4_no_range(cs, wc, (uchar*) dst)) <= 0)
78387796
break;
78397797
src+= srcres;
@@ -7888,7 +7846,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
78887846
It represents a single byte character.
78897847
Convert it into weight according to collation.
78907848
*/
7891-
s_wc= my_unicase_default_page00[(uchar) s[0]].tolower;
7849+
s_wc= my_u300_tolower_7bit((uchar) s[0]);
78927850
s++;
78937851
}
78947852
else
@@ -7903,7 +7861,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
79037861
return strcmp(s, t);
79047862
s+= res;
79057863

7906-
my_tolower_utf8mb4(uni_plane, &s_wc);
7864+
my_tolower_unicode(uni_plane, &s_wc);
79077865
}
79087866

79097867

@@ -7912,7 +7870,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
79127870
if ((uchar) t[0] < 128)
79137871
{
79147872
/* Convert single byte character into weight */
7915-
t_wc= my_unicase_default_page00[(uchar) t[0]].tolower;
7873+
t_wc= my_u300_tolower_7bit((uchar) t[0]);
79167874
t++;
79177875
}
79187876
else
@@ -7922,7 +7880,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
79227880
return strcmp(s, t);
79237881
t+= res;
79247882

7925-
my_tolower_utf8mb4(uni_plane, &t_wc);
7883+
my_tolower_unicode(uni_plane, &t_wc);
79267884
}
79277885

79287886
/* Now we have two weights, let's compare them */

0 commit comments

Comments
 (0)