Skip to content

Commit af30861

Browse files
author
Fabrice Bellard
committed
fixed regexp case insensitive flag
1 parent aac2464 commit af30861

File tree

6 files changed

+519
-255
lines changed

6 files changed

+519
-255
lines changed

libregexp.c

Lines changed: 1 addition & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,6 @@
3434
/*
3535
TODO:
3636
37-
- Add full unicode canonicalize rules for character ranges (not
38-
really useful but needed for exact "ignorecase" compatibility).
39-
4037
- Add a lock step execution mode (=linear time execution guaranteed)
4138
when the regular expression is "simple" i.e. no backreference nor
4239
complicated lookahead. The opcodes are designed for this execution
@@ -120,33 +117,6 @@ static int dbuf_insert(DynBuf *s, int pos, int len)
120117
return 0;
121118
}
122119

123-
/* canonicalize with the specific JS regexp rules */
124-
static uint32_t lre_canonicalize(uint32_t c, BOOL is_utf16)
125-
{
126-
uint32_t res[LRE_CC_RES_LEN_MAX];
127-
int len;
128-
if (is_utf16) {
129-
if (likely(c < 128)) {
130-
if (c >= 'A' && c <= 'Z')
131-
c = c - 'A' + 'a';
132-
} else {
133-
lre_case_conv(res, c, 2);
134-
c = res[0];
135-
}
136-
} else {
137-
if (likely(c < 128)) {
138-
if (c >= 'a' && c <= 'z')
139-
c = c - 'a' + 'A';
140-
} else {
141-
/* legacy regexp: to upper case if single char >= 128 */
142-
len = lre_case_conv(res, c, FALSE);
143-
if (len == 1 && res[0] >= 128)
144-
c = res[0];
145-
}
146-
}
147-
return c;
148-
}
149-
150120
static const uint16_t char_range_d[] = {
151121
1,
152122
0x0030, 0x0039 + 1,
@@ -245,31 +215,6 @@ static int cr_init_char_range(REParseState *s, CharRange *cr, uint32_t c)
245215
return -1;
246216
}
247217

248-
static int cr_canonicalize(CharRange *cr)
249-
{
250-
CharRange a;
251-
uint32_t pt[2];
252-
int i, ret;
253-
254-
cr_init(&a, cr->mem_opaque, lre_realloc);
255-
pt[0] = 'a';
256-
pt[1] = 'z' + 1;
257-
ret = cr_op(&a, cr->points, cr->len, pt, 2, CR_OP_INTER);
258-
if (ret)
259-
goto fail;
260-
/* convert to upper case */
261-
/* XXX: the generic unicode case would be much more complicated
262-
and not really useful */
263-
for(i = 0; i < a.len; i++) {
264-
a.points[i] += 'A' - 'a';
265-
}
266-
/* Note: for simplicity we keep the lower case ranges */
267-
ret = cr_union1(cr, a.points, a.len);
268-
fail:
269-
cr_free(&a);
270-
return ret;
271-
}
272-
273218
#ifdef DUMP_REOP
274219
static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
275220
int buf_len)
@@ -922,7 +867,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
922867
}
923868
}
924869
if (s->ignore_case) {
925-
if (cr_canonicalize(cr))
870+
if (cr_regexp_canonicalize(cr, s->is_utf16))
926871
goto memory_error;
927872
}
928873
if (invert) {

libunicode-table.h

Lines changed: 61 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -3779,72 +3779,70 @@ static const uint8_t unicode_prop_Changes_When_Titlecased1_table[22] = {
37793779
0x8b, 0x80, 0x8e, 0x80, 0xae, 0x80,
37803780
};
37813781

3782-
static const uint8_t unicode_prop_Changes_When_Casefolded1_table[33] = {
3783-
0x40, 0xde, 0x80, 0xcf, 0x80, 0x97, 0x80, 0x44,
3784-
0x3c, 0x80, 0x59, 0x11, 0x80, 0x40, 0xe4, 0x3f,
3785-
0x3f, 0x87, 0x89, 0x11, 0x05, 0x02, 0x11, 0x80,
3786-
0xa9, 0x11, 0x80, 0x60, 0xdb, 0x07, 0x86, 0x8b,
3787-
0x84,
3782+
static const uint8_t unicode_prop_Changes_When_Casefolded1_table[29] = {
3783+
0x41, 0xef, 0x80, 0x41, 0x9e, 0x80, 0x9e, 0x80,
3784+
0x5a, 0xe4, 0x83, 0x40, 0xb5, 0x00, 0x00, 0x00,
3785+
0x80, 0xde, 0x06, 0x06, 0x80, 0x8a, 0x09, 0x81,
3786+
0x89, 0x10, 0x81, 0x8d, 0x80,
37883787
};
37893788

3790-
static const uint8_t unicode_prop_Changes_When_NFKC_Casefolded1_table[451] = {
3789+
static const uint8_t unicode_prop_Changes_When_NFKC_Casefolded1_table[447] = {
37913790
0x40, 0x9f, 0x06, 0x00, 0x01, 0x00, 0x01, 0x12,
3792-
0x10, 0x82, 0x9f, 0x80, 0xcf, 0x01, 0x80, 0x8b,
3793-
0x07, 0x80, 0xfb, 0x01, 0x01, 0x80, 0xa5, 0x80,
3794-
0x40, 0xbb, 0x88, 0x9e, 0x29, 0x84, 0xda, 0x08,
3795-
0x81, 0x89, 0x80, 0xa3, 0x04, 0x02, 0x04, 0x08,
3796-
0x80, 0xc9, 0x82, 0x9c, 0x80, 0x41, 0x93, 0x80,
3797-
0x40, 0x93, 0x80, 0xd7, 0x83, 0x42, 0xde, 0x87,
3798-
0xfb, 0x08, 0x80, 0xd2, 0x01, 0x80, 0xa1, 0x11,
3799-
0x80, 0x40, 0xfc, 0x81, 0x42, 0xd4, 0x80, 0xfe,
3800-
0x80, 0xa7, 0x81, 0xad, 0x80, 0xb5, 0x80, 0x88,
3801-
0x03, 0x03, 0x03, 0x80, 0x8b, 0x80, 0x88, 0x00,
3802-
0x26, 0x80, 0x90, 0x80, 0x88, 0x03, 0x03, 0x03,
3803-
0x80, 0x8b, 0x80, 0x41, 0x41, 0x80, 0xe1, 0x81,
3804-
0x46, 0x52, 0x81, 0xd4, 0x84, 0x45, 0x1b, 0x10,
3805-
0x8a, 0x80, 0x91, 0x80, 0x9b, 0x8c, 0x80, 0xa1,
3806-
0xa4, 0x40, 0xd9, 0x80, 0x40, 0xd5, 0x00, 0x00,
3807-
0x00, 0x00, 0x00, 0x00, 0x01, 0x3f, 0x3f, 0x87,
3808-
0x89, 0x11, 0x04, 0x00, 0x29, 0x04, 0x12, 0x80,
3809-
0x88, 0x12, 0x80, 0x88, 0x11, 0x11, 0x04, 0x08,
3810-
0x8f, 0x00, 0x20, 0x8b, 0x12, 0x2a, 0x08, 0x0b,
3811-
0x00, 0x07, 0x82, 0x8c, 0x06, 0x92, 0x81, 0x9a,
3812-
0x80, 0x8c, 0x8a, 0x80, 0xd6, 0x18, 0x10, 0x8a,
3813-
0x01, 0x0c, 0x0a, 0x00, 0x10, 0x11, 0x02, 0x06,
3814-
0x05, 0x1c, 0x85, 0x8f, 0x8f, 0x8f, 0x88, 0x80,
3815-
0x40, 0xa1, 0x08, 0x81, 0x40, 0xf7, 0x81, 0x41,
3816-
0x34, 0xd5, 0x99, 0x9a, 0x45, 0x20, 0x80, 0xe6,
3817-
0x82, 0xe4, 0x80, 0x41, 0x9e, 0x81, 0x40, 0xf0,
3818-
0x80, 0x41, 0x2e, 0x80, 0xd2, 0x80, 0x8b, 0x40,
3819-
0xd5, 0xa9, 0x80, 0xb4, 0x00, 0x82, 0xdf, 0x09,
3820-
0x80, 0xde, 0x80, 0xb0, 0xdd, 0x82, 0x8d, 0xdf,
3821-
0x9e, 0x80, 0xa7, 0x87, 0xae, 0x80, 0x41, 0x7f,
3822-
0x60, 0x72, 0x9b, 0x81, 0x40, 0xd1, 0x80, 0x40,
3823-
0x80, 0x12, 0x81, 0x43, 0x61, 0x83, 0x88, 0x80,
3824-
0x60, 0x4d, 0x95, 0x41, 0x0d, 0x08, 0x00, 0x81,
3825-
0x89, 0x00, 0x00, 0x09, 0x82, 0xc3, 0x81, 0xe9,
3826-
0xa5, 0x86, 0x8b, 0x24, 0x00, 0x97, 0x04, 0x00,
3827-
0x01, 0x01, 0x80, 0xeb, 0xa0, 0x41, 0x6a, 0x91,
3828-
0xbf, 0x81, 0xb5, 0xa7, 0x8c, 0x82, 0x99, 0x95,
3829-
0x94, 0x81, 0x8b, 0x80, 0x92, 0x03, 0x1a, 0x00,
3830-
0x80, 0x40, 0x86, 0x08, 0x80, 0x9f, 0x99, 0x40,
3831-
0x83, 0x15, 0x0d, 0x0d, 0x0a, 0x16, 0x06, 0x80,
3832-
0x88, 0x47, 0x87, 0x20, 0xa9, 0x80, 0x88, 0x60,
3833-
0xb4, 0xe4, 0x83, 0x54, 0xb9, 0x86, 0x8d, 0x87,
3834-
0xbf, 0x85, 0x42, 0x3e, 0xd4, 0x80, 0xc6, 0x01,
3835-
0x08, 0x09, 0x0b, 0x80, 0x8b, 0x00, 0x06, 0x80,
3836-
0xc0, 0x03, 0x0f, 0x06, 0x80, 0x9b, 0x03, 0x04,
3837-
0x00, 0x16, 0x80, 0x41, 0x53, 0x81, 0x41, 0x23,
3838-
0x81, 0xb1, 0x48, 0x2f, 0xbd, 0x4d, 0x91, 0x18,
3839-
0x9a, 0x01, 0x00, 0x08, 0x80, 0x89, 0x03, 0x00,
3840-
0x00, 0x28, 0x18, 0x00, 0x00, 0x02, 0x01, 0x00,
3841-
0x08, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x0b,
3842-
0x06, 0x03, 0x03, 0x00, 0x80, 0x89, 0x80, 0x90,
3843-
0x22, 0x04, 0x80, 0x90, 0x42, 0x43, 0x8a, 0x84,
3844-
0x9e, 0x80, 0x9f, 0x99, 0x82, 0xa2, 0x80, 0xee,
3845-
0x82, 0x8c, 0xab, 0x83, 0x88, 0x31, 0x49, 0x9d,
3846-
0x89, 0x60, 0xfc, 0x05, 0x42, 0x1d, 0x6b, 0x05,
3847-
0xe1, 0x4f, 0xff,
3791+
0x10, 0x82, 0xf3, 0x80, 0x8b, 0x80, 0x40, 0x84,
3792+
0x01, 0x01, 0x80, 0xa2, 0x01, 0x80, 0x40, 0xbb,
3793+
0x88, 0x9e, 0x29, 0x84, 0xda, 0x08, 0x81, 0x89,
3794+
0x80, 0xa3, 0x04, 0x02, 0x04, 0x08, 0x07, 0x80,
3795+
0x9e, 0x80, 0xa0, 0x82, 0x9c, 0x80, 0x42, 0x28,
3796+
0x80, 0xd7, 0x83, 0x42, 0xde, 0x87, 0xfb, 0x08,
3797+
0x80, 0xd2, 0x01, 0x80, 0xa1, 0x11, 0x80, 0x40,
3798+
0xfc, 0x81, 0x42, 0xd4, 0x80, 0xfe, 0x80, 0xa7,
3799+
0x81, 0xad, 0x80, 0xb5, 0x80, 0x88, 0x03, 0x03,
3800+
0x03, 0x80, 0x8b, 0x80, 0x88, 0x00, 0x26, 0x80,
3801+
0x90, 0x80, 0x88, 0x03, 0x03, 0x03, 0x80, 0x8b,
3802+
0x80, 0x41, 0x41, 0x80, 0xe1, 0x81, 0x46, 0x52,
3803+
0x81, 0xd4, 0x84, 0x45, 0x1b, 0x10, 0x8a, 0x80,
3804+
0x91, 0x80, 0x9b, 0x8c, 0x80, 0xa1, 0xa4, 0x40,
3805+
0xd5, 0x83, 0x40, 0xb5, 0x00, 0x00, 0x00, 0x80,
3806+
0x99, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
3807+
0xb7, 0x05, 0x00, 0x13, 0x05, 0x11, 0x02, 0x0c,
3808+
0x11, 0x00, 0x00, 0x0c, 0x15, 0x05, 0x08, 0x8f,
3809+
0x00, 0x20, 0x8b, 0x12, 0x2a, 0x08, 0x0b, 0x00,
3810+
0x07, 0x82, 0x8c, 0x06, 0x92, 0x81, 0x9a, 0x80,
3811+
0x8c, 0x8a, 0x80, 0xd6, 0x18, 0x10, 0x8a, 0x01,
3812+
0x0c, 0x0a, 0x00, 0x10, 0x11, 0x02, 0x06, 0x05,
3813+
0x1c, 0x85, 0x8f, 0x8f, 0x8f, 0x88, 0x80, 0x40,
3814+
0xa1, 0x08, 0x81, 0x40, 0xf7, 0x81, 0x41, 0x34,
3815+
0xd5, 0x99, 0x9a, 0x45, 0x20, 0x80, 0xe6, 0x82,
3816+
0xe4, 0x80, 0x41, 0x9e, 0x81, 0x40, 0xf0, 0x80,
3817+
0x41, 0x2e, 0x80, 0xd2, 0x80, 0x8b, 0x40, 0xd5,
3818+
0xa9, 0x80, 0xb4, 0x00, 0x82, 0xdf, 0x09, 0x80,
3819+
0xde, 0x80, 0xb0, 0xdd, 0x82, 0x8d, 0xdf, 0x9e,
3820+
0x80, 0xa7, 0x87, 0xae, 0x80, 0x41, 0x7f, 0x60,
3821+
0x72, 0x9b, 0x81, 0x40, 0xd1, 0x80, 0x40, 0x80,
3822+
0x12, 0x81, 0x43, 0x61, 0x83, 0x88, 0x80, 0x60,
3823+
0x4d, 0x95, 0x41, 0x0d, 0x08, 0x00, 0x81, 0x89,
3824+
0x00, 0x00, 0x09, 0x82, 0xc3, 0x81, 0xe9, 0xc2,
3825+
0x00, 0x97, 0x04, 0x00, 0x01, 0x01, 0x80, 0xeb,
3826+
0xa0, 0x41, 0x6a, 0x91, 0xbf, 0x81, 0xb5, 0xa7,
3827+
0x8c, 0x82, 0x99, 0x95, 0x94, 0x81, 0x8b, 0x80,
3828+
0x92, 0x03, 0x1a, 0x00, 0x80, 0x40, 0x86, 0x08,
3829+
0x80, 0x9f, 0x99, 0x40, 0x83, 0x15, 0x0d, 0x0d,
3830+
0x0a, 0x16, 0x06, 0x80, 0x88, 0x47, 0x87, 0x20,
3831+
0xa9, 0x80, 0x88, 0x60, 0xb4, 0xe4, 0x83, 0x54,
3832+
0xb9, 0x86, 0x8d, 0x87, 0xbf, 0x85, 0x42, 0x3e,
3833+
0xd4, 0x80, 0xc6, 0x01, 0x08, 0x09, 0x0b, 0x80,
3834+
0x8b, 0x00, 0x06, 0x80, 0xc0, 0x03, 0x0f, 0x06,
3835+
0x80, 0x9b, 0x03, 0x04, 0x00, 0x16, 0x80, 0x41,
3836+
0x53, 0x81, 0x41, 0x23, 0x81, 0xb1, 0x48, 0x2f,
3837+
0xbd, 0x4d, 0x91, 0x18, 0x9a, 0x01, 0x00, 0x08,
3838+
0x80, 0x89, 0x03, 0x00, 0x00, 0x28, 0x18, 0x00,
3839+
0x00, 0x02, 0x01, 0x00, 0x08, 0x00, 0x00, 0x00,
3840+
0x00, 0x01, 0x00, 0x0b, 0x06, 0x03, 0x03, 0x00,
3841+
0x80, 0x89, 0x80, 0x90, 0x22, 0x04, 0x80, 0x90,
3842+
0x42, 0x43, 0x8a, 0x84, 0x9e, 0x80, 0x9f, 0x99,
3843+
0x82, 0xa2, 0x80, 0xee, 0x82, 0x8c, 0xab, 0x83,
3844+
0x88, 0x31, 0x49, 0x9d, 0x89, 0x60, 0xfc, 0x05,
3845+
0x42, 0x1d, 0x6b, 0x05, 0xe1, 0x4f, 0xff,
38483846
};
38493847

38503848
static const uint8_t unicode_prop_ASCII_Hex_Digit_table[5] = {

0 commit comments

Comments
 (0)