@@ -66,7 +66,7 @@ typedef struct {
6666 const uint8_t * buf_end ;
6767 const uint8_t * buf_start ;
6868 int re_flags ;
69- BOOL is_utf16 ;
69+ BOOL is_unicode ;
7070 BOOL ignore_case ;
7171 BOOL dotall ;
7272 int capture_count ;
@@ -224,7 +224,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
224224
225225 assert (buf_len >= RE_HEADER_LEN );
226226
227- re_flags = buf [0 ];
227+ re_flags = buf [0 ];
228228 bc_len = get_u32 (buf + 3 );
229229 assert (bc_len + RE_HEADER_LEN <= buf_len );
230230 printf ("flags: 0x%x capture_count=%d stack_size=%d\n" ,
@@ -696,10 +696,10 @@ static int get_class_atom(REParseState *s, CharRange *cr,
696696 if ((c >= 'a' && c <= 'z' ) ||
697697 (c >= 'A' && c <= 'Z' ) ||
698698 (((c >= '0' && c <= '9' ) || c == '_' ) &&
699- inclass && !s -> is_utf16 )) { /* Annex B.1.4 */
699+ inclass && !s -> is_unicode )) { /* Annex B.1.4 */
700700 c &= 0x1f ;
701701 p ++ ;
702- } else if (s -> is_utf16 ) {
702+ } else if (s -> is_unicode ) {
703703 goto invalid_escape ;
704704 } else {
705705 /* otherwise return '\' and 'c' */
@@ -710,7 +710,7 @@ static int get_class_atom(REParseState *s, CharRange *cr,
710710#ifdef CONFIG_ALL_UNICODE
711711 case 'p' :
712712 case 'P' :
713- if (s -> is_utf16 ) {
713+ if (s -> is_unicode ) {
714714 if (parse_unicode_property (s , cr , & p , (c == 'P' )))
715715 return -1 ;
716716 c = CLASS_RANGE_BASE ;
@@ -720,14 +720,14 @@ static int get_class_atom(REParseState *s, CharRange *cr,
720720#endif
721721 default :
722722 p -- ;
723- ret = lre_parse_escape (& p , s -> is_utf16 * 2 );
723+ ret = lre_parse_escape (& p , s -> is_unicode * 2 );
724724 if (ret >= 0 ) {
725725 c = ret ;
726726 } else {
727727 if (ret == -2 && * p != '\0' && strchr ("^$\\.*+?()[]{}|/" , * p )) {
728728 /* always valid to escape these characters */
729729 goto normal_char ;
730- } else if (s -> is_utf16 ) {
730+ } else if (s -> is_unicode ) {
731731 invalid_escape :
732732 return re_parse_error (s , "invalid escape sequence in regular expression" );
733733 } else {
@@ -749,7 +749,7 @@ static int get_class_atom(REParseState *s, CharRange *cr,
749749 /* normal char */
750750 if (c >= 128 ) {
751751 c = unicode_from_utf8 (p , UTF8_CHAR_LEN_MAX , & p );
752- if ((unsigned )c > 0xffff && !s -> is_utf16 ) {
752+ if ((unsigned )c > 0xffff && !s -> is_unicode ) {
753753 /* XXX: should handle non BMP-1 code points */
754754 return re_parse_error (s , "malformed unicode char" );
755755 }
@@ -811,11 +811,13 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
811811 cr_init (cr , s -> opaque , lre_realloc );
812812 p = * pp ;
813813 p ++ ; /* skip '[' */
814+
814815 invert = FALSE;
815816 if (* p == '^' ) {
816817 p ++ ;
817818 invert = TRUE;
818819 }
820+
819821 for (;;) {
820822 if (* p == ']' )
821823 break ;
@@ -825,7 +827,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
825827 if (* p == '-' && p [1 ] != ']' ) {
826828 const uint8_t * p0 = p + 1 ;
827829 if (c1 >= CLASS_RANGE_BASE ) {
828- if (s -> is_utf16 ) {
830+ if (s -> is_unicode ) {
829831 cr_free (cr1 );
830832 goto invalid_class_range ;
831833 }
@@ -837,7 +839,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
837839 goto fail ;
838840 if (c2 >= CLASS_RANGE_BASE ) {
839841 cr_free (cr1 );
840- if (s -> is_utf16 ) {
842+ if (s -> is_unicode ) {
841843 goto invalid_class_range ;
842844 }
843845 /* Annex B: match '-' character */
@@ -866,7 +868,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
866868 }
867869 }
868870 if (s -> ignore_case ) {
869- if (cr_regexp_canonicalize (cr , s -> is_utf16 ))
871+ if (cr_regexp_canonicalize (cr , s -> is_unicode ))
870872 goto memory_error ;
871873 }
872874 if (invert ) {
@@ -1161,7 +1163,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
11611163 re_emit_op (s , REOP_prev );
11621164 break ;
11631165 case '{' :
1164- if (s -> is_utf16 ) {
1166+ if (s -> is_unicode ) {
11651167 return re_parse_error (s , "syntax error" );
11661168 } else if (!is_digit (p [1 ])) {
11671169 /* Annex B: we accept '{' not followed by digits as a
@@ -1213,7 +1215,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
12131215 lookahead :
12141216 /* Annex B allows lookahead to be used as an atom for
12151217 the quantifiers */
1216- if (!s -> is_utf16 && !is_backward_lookahead ) {
1218+ if (!s -> is_unicode && !is_backward_lookahead ) {
12171219 last_atom_start = s -> byte_code .size ;
12181220 last_capture_count = s -> capture_count ;
12191221 }
@@ -1289,15 +1291,15 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
12891291 /* annex B: we tolerate invalid group names in non
12901292 unicode mode if there is no named capture
12911293 definition */
1292- if (s -> is_utf16 || re_has_named_captures (s ))
1294+ if (s -> is_unicode || re_has_named_captures (s ))
12931295 return re_parse_error (s , "expecting group name" );
12941296 else
12951297 goto parse_class_atom ;
12961298 }
12971299 p1 += 3 ;
12981300 if (re_parse_group_name (s -> u .tmp_buf , sizeof (s -> u .tmp_buf ),
12991301 & p1 )) {
1300- if (s -> is_utf16 || re_has_named_captures (s ))
1302+ if (s -> is_unicode || re_has_named_captures (s ))
13011303 return re_parse_error (s , "invalid group name" );
13021304 else
13031305 goto parse_class_atom ;
@@ -1308,7 +1310,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
13081310 after (inefficient, but hopefully not common */
13091311 c = re_parse_captures (s , & dummy_res , s -> u .tmp_buf );
13101312 if (c < 0 ) {
1311- if (s -> is_utf16 || re_has_named_captures (s ))
1313+ if (s -> is_unicode || re_has_named_captures (s ))
13121314 return re_parse_error (s , "group name not defined" );
13131315 else
13141316 goto parse_class_atom ;
@@ -1320,7 +1322,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
13201322 case '0' :
13211323 p += 2 ;
13221324 c = 0 ;
1323- if (s -> is_utf16 ) {
1325+ if (s -> is_unicode ) {
13241326 if (is_digit (* p )) {
13251327 return re_parse_error (s , "invalid decimal escape in regular expression" );
13261328 }
@@ -1342,7 +1344,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
13421344
13431345 c = parse_digits (& p , FALSE);
13441346 if (c < 0 || (c >= s -> capture_count && c >= re_count_captures (s ))) {
1345- if (!s -> is_utf16 ) {
1347+ if (!s -> is_unicode ) {
13461348 /* Annex B.1.4: accept legacy octal */
13471349 p = q ;
13481350 if (* p <= '7' ) {
@@ -1384,7 +1386,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
13841386 break ;
13851387 case ']' :
13861388 case '}' :
1387- if (s -> is_utf16 )
1389+ if (s -> is_unicode )
13881390 return re_parse_error (s , "syntax error" );
13891391 goto parse_class_atom ;
13901392 default :
@@ -1406,7 +1408,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
14061408 return -1 ;
14071409 } else {
14081410 if (s -> ignore_case )
1409- c = lre_canonicalize (c , s -> is_utf16 );
1411+ c = lre_canonicalize (c , s -> is_unicode );
14101412 if (c <= 0xffff )
14111413 re_emit_op_u16 (s , REOP_char , c );
14121414 else
@@ -1442,7 +1444,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
14421444 /* As an extension (see ES6 annex B), we accept '{' not
14431445 followed by digits as a normal atom */
14441446 if (!is_digit (p [1 ])) {
1445- if (s -> is_utf16 )
1447+ if (s -> is_unicode )
14461448 goto invalid_quant_count ;
14471449 break ;
14481450 }
@@ -1461,7 +1463,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
14611463 quant_max = INT32_MAX ; /* infinity */
14621464 }
14631465 }
1464- if (* p != '}' && !s -> is_utf16 ) {
1466+ if (* p != '}' && !s -> is_unicode ) {
14651467 /* Annex B: normal atom if invalid '{' syntax */
14661468 p = p1 ;
14671469 break ;
@@ -1753,7 +1755,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
17531755 s -> buf_end = s -> buf_ptr + buf_len ;
17541756 s -> buf_start = s -> buf_ptr ;
17551757 s -> re_flags = re_flags ;
1756- s -> is_utf16 = ((re_flags & LRE_FLAG_UTF16 ) != 0 );
1758+ s -> is_unicode = ((re_flags & LRE_FLAG_UNICODE ) != 0 );
17571759 is_sticky = ((re_flags & LRE_FLAG_STICKY ) != 0 );
17581760 s -> ignore_case = ((re_flags & LRE_FLAG_IGNORECASE ) != 0 );
17591761 s -> dotall = ((re_flags & LRE_FLAG_DOTALL ) != 0 );
@@ -1861,11 +1863,11 @@ static BOOL is_word_char(uint32_t c)
18611863 } \
18621864 } while (0)
18631865
1864- #define PEEK_CHAR (c , cptr , cbuf_end ) \
1865- do { \
1866- if (cbuf_type == 0) { \
1867- c = cptr[0]; \
1868- } else { \
1866+ #define PEEK_CHAR (c , cptr , cbuf_end ) \
1867+ do { \
1868+ if (cbuf_type == 0) { \
1869+ c = cptr[0]; \
1870+ } else { \
18691871 uint32_t __c1; \
18701872 c = ((uint16_t *)cptr)[0]; \
18711873 if (c >= 0xd800 && c < 0xdc00 && \
@@ -1875,18 +1877,18 @@ static BOOL is_word_char(uint32_t c)
18751877 c = (((c & 0x3ff) << 10) | (__c1 & 0x3ff)) + 0x10000; \
18761878 } \
18771879 } \
1878- } \
1880+ } \
18791881 } while (0)
18801882
1881- #define PEEK_PREV_CHAR (c , cptr , cbuf_start ) \
1882- do { \
1883- if (cbuf_type == 0) { \
1884- c = cptr[-1]; \
1885- } else { \
1883+ #define PEEK_PREV_CHAR (c , cptr , cbuf_start ) \
1884+ do { \
1885+ if (cbuf_type == 0) { \
1886+ c = cptr[-1]; \
1887+ } else { \
18861888 uint32_t __c1; \
18871889 c = ((uint16_t *)cptr)[-1]; \
18881890 if (c >= 0xdc00 && c < 0xe000 && \
1889- cbuf_type == 2 && (cptr - 4) >= cbuf_start) { \
1891+ cbuf_type == 2 && (cptr - 4) >= cbuf_start) { \
18901892 __c1 = ((uint16_t *)cptr)[-2]; \
18911893 if (__c1 >= 0xd800 && __c1 < 0xdc00 ) { \
18921894 c = (((__c1 & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000; \
@@ -1895,15 +1897,15 @@ static BOOL is_word_char(uint32_t c)
18951897 } \
18961898 } while (0)
18971899
1898- #define GET_PREV_CHAR (c , cptr , cbuf_start ) \
1899- do { \
1900- if (cbuf_type == 0) { \
1901- cptr--; \
1902- c = cptr[0]; \
1903- } else { \
1900+ #define GET_PREV_CHAR (c , cptr , cbuf_start ) \
1901+ do { \
1902+ if (cbuf_type == 0) { \
1903+ cptr--; \
1904+ c = cptr[0]; \
1905+ } else { \
19041906 uint32_t __c1; \
19051907 cptr -= 2; \
1906- c = ((uint16_t *)cptr)[0]; \
1908+ c = ((uint16_t *)cptr)[0]; \
19071909 if (c >= 0xdc00 && c < 0xe000 && \
19081910 cbuf_type == 2 && cptr > cbuf_start) { \
19091911 __c1 = ((uint16_t *)cptr)[-1]; \
@@ -1915,12 +1917,12 @@ static BOOL is_word_char(uint32_t c)
19151917 } \
19161918 } while (0)
19171919
1918- #define PREV_CHAR (cptr , cbuf_start ) \
1919- do { \
1920- if (cbuf_type == 0) { \
1921- cptr--; \
1922- } else { \
1923- cptr -= 2; \
1920+ #define PREV_CHAR (cptr , cbuf_start ) \
1921+ do { \
1922+ if (cbuf_type == 0) { \
1923+ cptr--; \
1924+ } else { \
1925+ cptr -= 2; \
19241926 if (cbuf_type == 2) { \
19251927 c = ((uint16_t *)cptr)[0]; \
19261928 if (c >= 0xdc00 && c < 0xe000 && cptr > cbuf_start) { \
@@ -1959,7 +1961,7 @@ typedef struct {
19591961 int stack_size_max ;
19601962 BOOL multi_line ;
19611963 BOOL ignore_case ;
1962- BOOL is_utf16 ;
1964+ BOOL is_unicode ;
19631965 void * opaque ; /* used for stack overflow check */
19641966
19651967 size_t state_size ;
@@ -2105,7 +2107,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
21052107 goto no_match ;
21062108 GET_CHAR (c , cptr , cbuf_end );
21072109 if (s -> ignore_case ) {
2108- c = lre_canonicalize (c , s -> is_utf16 );
2110+ c = lre_canonicalize (c , s -> is_unicode );
21092111 }
21102112 if (val != c )
21112113 goto no_match ;
@@ -2260,8 +2262,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
22602262 GET_CHAR (c1 , cptr1 , cptr1_end );
22612263 GET_CHAR (c2 , cptr , cbuf_end );
22622264 if (s -> ignore_case ) {
2263- c1 = lre_canonicalize (c1 , s -> is_utf16 );
2264- c2 = lre_canonicalize (c2 , s -> is_utf16 );
2265+ c1 = lre_canonicalize (c1 , s -> is_unicode );
2266+ c2 = lre_canonicalize (c2 , s -> is_unicode );
22652267 }
22662268 if (c1 != c2 )
22672269 goto no_match ;
@@ -2274,8 +2276,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
22742276 GET_PREV_CHAR (c1 , cptr1 , cptr1_start );
22752277 GET_PREV_CHAR (c2 , cptr , s -> cbuf );
22762278 if (s -> ignore_case ) {
2277- c1 = lre_canonicalize (c1 , s -> is_utf16 );
2278- c2 = lre_canonicalize (c2 , s -> is_utf16 );
2279+ c1 = lre_canonicalize (c1 , s -> is_unicode );
2280+ c2 = lre_canonicalize (c2 , s -> is_unicode );
22792281 }
22802282 if (c1 != c2 )
22812283 goto no_match ;
@@ -2294,7 +2296,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
22942296 goto no_match ;
22952297 GET_CHAR (c , cptr , cbuf_end );
22962298 if (s -> ignore_case ) {
2297- c = lre_canonicalize (c , s -> is_utf16 );
2299+ c = lre_canonicalize (c , s -> is_unicode );
22982300 }
22992301 idx_min = 0 ;
23002302 low = get_u16 (pc + 0 * 4 );
@@ -2334,7 +2336,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
23342336 goto no_match ;
23352337 GET_CHAR (c , cptr , cbuf_end );
23362338 if (s -> ignore_case ) {
2337- c = lre_canonicalize (c , s -> is_utf16 );
2339+ c = lre_canonicalize (c , s -> is_unicode );
23382340 }
23392341 idx_min = 0 ;
23402342 low = get_u32 (pc + 0 * 8 );
@@ -2426,13 +2428,13 @@ int lre_exec(uint8_t **capture,
24262428 re_flags = bc_buf [RE_HEADER_FLAGS ];
24272429 s -> multi_line = (re_flags & LRE_FLAG_MULTILINE ) != 0 ;
24282430 s -> ignore_case = (re_flags & LRE_FLAG_IGNORECASE ) != 0 ;
2429- s -> is_utf16 = (re_flags & LRE_FLAG_UTF16 ) != 0 ;
2431+ s -> is_unicode = (re_flags & LRE_FLAG_UNICODE ) != 0 ;
24302432 s -> capture_count = bc_buf [RE_HEADER_CAPTURE_COUNT ];
24312433 s -> stack_size_max = bc_buf [RE_HEADER_STACK_SIZE ];
24322434 s -> cbuf = cbuf ;
24332435 s -> cbuf_end = cbuf + (clen << cbuf_type );
24342436 s -> cbuf_type = cbuf_type ;
2435- if (s -> cbuf_type == 1 && s -> is_utf16 )
2437+ if (s -> cbuf_type == 1 && s -> is_unicode )
24362438 s -> cbuf_type = 2 ;
24372439 s -> opaque = opaque ;
24382440
0 commit comments