Mathias Bynens | 34faeb4 | 2023-01-16 07:56:07 | [diff] [blame] | 1 | <!DOCTYPE html> |
| 2 | <meta charset="utf-8"> |
| 3 | <!-- |
| 4 | Copyright (C) 2017 the V8 project authors. All rights reserved. |
| 5 | This code is governed by the BSD license found in the LICENSE file. |
| 6 | --> |
| 7 | <title>JavaScript RegExp lookbehind assertions: alternations</title> |
| 8 | <script src="/resources/testharness.js"></script> |
| 9 | <script src="/resources/testharnessreport.js"></script> |
| 10 | |
| 11 | <script> |
| 12 | test(t => { |
| 13 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/alternations.js |
| 14 | assert_array_equals("xabcd".match(/.*(?<=(..|...|....))(.*)/), ["xabcd", "cd", ""], "#1"); |
| 15 | assert_array_equals("xabcd".match(/.*(?<=(xx|...|....))(.*)/), ["xabcd", "bcd", ""], "#2"); |
| 16 | assert_array_equals("xxabcd".match(/.*(?<=(xx|...))(.*)/), ["xxabcd", "bcd", ""], "#3"); |
| 17 | assert_array_equals("xxabcd".match(/.*(?<=(xx|xxx))(.*)/), ["xxabcd", "xx", "abcd"], "#4"); |
| 18 | }, "Alternations are tried left to right, with no backtracking into a lookbehind"); |
| 19 | |
| 20 | test(t => { |
| 21 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/back-references.js |
| 22 | assert_array_equals("abb".match(/(.)(?<=(\1\1))/), ["b", "b", "bb"], "#1"); |
| 23 | assert_array_equals("abB".match(/(.)(?<=(\1\1))/i), ["B", "B", "bB"], "#2"); |
| 24 | assert_array_equals("aabAaBa".match(/((\w)\w)(?<=\1\2\1)/i), ["aB", "aB", "a"], "#3"); |
| 25 | assert_array_equals("aabAaBa".match(/(\w(\w))(?<=\1\2\1)/i), ["Ba", "Ba", "a"], "#4"); |
| 26 | assert_array_equals("abaBbAa".match(/(?=(\w))(?<=(\1))./i), ["b", "b", "B"], "#5"); |
| 27 | assert_array_equals(" 'foo' ".match(/(?<=(.))(\w+)(?=\1)/), ["foo", "'", "foo"], "#6"); |
| 28 | assert_array_equals(" \"foo\" ".match(/(?<=(.))(\w+)(?=\1)/), ["foo", "\"", "foo"], "#7"); |
| 29 | assert_array_equals("abbb".match(/(.)(?<=\1\1\1)/), ["b", "b"], "#8"); |
| 30 | assert_array_equals("fababab".match(/(..)(?<=\1\1\1)/), ["ab", "ab"], "#9"); |
| 31 | assert_equals(" .foo\" ".match(/(?<=(.))(\w+)(?=\1)/), null, "#10"); |
| 32 | assert_equals("ab".match(/(.)(?<=\1\1\1)/), null, "#11"); |
| 33 | assert_equals("abb".match(/(.)(?<=\1\1\1)/), null, "#12"); |
| 34 | assert_equals("ab".match(/(..)(?<=\1\1\1)/), null, "#13"); |
| 35 | assert_equals("abb".match(/(..)(?<=\1\1\1)/), null, "#14"); |
| 36 | assert_equals("aabb".match(/(..)(?<=\1\1\1)/), null, "#15"); |
| 37 | assert_equals("abab".match(/(..)(?<=\1\1\1)/), null, "#16"); |
| 38 | assert_equals("fabxbab".match(/(..)(?<=\1\1\1)/), null, "#17"); |
| 39 | assert_equals("faxabab".match(/(..)(?<=\1\1\1)/), null, "#18"); |
| 40 | }, "Back-references"); |
| 41 | |
| 42 | test(t => { |
| 43 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/back-references-to-captures.js |
| 44 | assert_array_equals("abcCd".match(/(?<=\1(\w))d/i), ["d", "C"], "#1"); |
| 45 | assert_array_equals("abxxd".match(/(?<=\1([abx]))d/), ["d", "x"], "#2"); |
| 46 | assert_array_equals("ababc".match(/(?<=\1(\w+))c/), ["c", "ab"], "#3"); |
| 47 | assert_array_equals("ababbc".match(/(?<=\1(\w+))c/), ["c", "b"], "#4"); |
| 48 | assert_equals("ababdc".match(/(?<=\1(\w+))c/), null, "#5"); |
| 49 | assert_array_equals("ababc".match(/(?<=(\w+)\1)c/), ["c", "abab"], "#6"); |
| 50 | }, "Back-references to captures inside the lookbehind"); |
| 51 | |
| 52 | test(t => { |
| 53 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/captures.js |
| 54 | const str = "abcdef"; |
| 55 | assert_array_equals(str.match(/(?<=(c))def/), ["def", "c"], "#1"); |
| 56 | assert_array_equals(str.match(/(?<=(\w{2}))def/), ["def", "bc"], "#2"); |
| 57 | assert_array_equals(str.match(/(?<=(\w(\w)))def/), ["def", "bc", "c"], "#3"); |
| 58 | assert_array_equals(str.match(/(?<=(\w){3})def/), ["def", "a"], "#4"); |
| 59 | assert_array_equals(str.match(/(?<=(bc)|(cd))./), ["d", "bc", undefined], "#5"); |
| 60 | assert_array_equals(str.match(/(?<=([ab]{1,2})\D|(abc))\w/), ["c", "a", undefined], "#6"); |
| 61 | assert_array_equals(str.match(/\D(?<=([ab]+))(\w)/), ["ab", "a", "b"], "#7"); |
| 62 | assert_array_equals(str.match(/(?<=b|c)\w/g), ["c", "d"], "#8"); |
| 63 | assert_array_equals(str.match(/(?<=[b-e])\w{2}/g), ["cd", "ef"], "#9"); |
| 64 | }, "Capturing matches"); |
| 65 | |
| 66 | test(t => { |
| 67 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/captures-negative.js |
| 68 | assert_array_equals("abcdef".match(/(?<!(^|[ab]))\w{2}/), ["de", undefined]); |
| 69 | }, "Captures inside negative lookbehind"); |
| 70 | |
| 71 | test(t => { |
| 72 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/do-not-backtrack.js |
| 73 | // The lookbehind captures "abc" so that \1 does not match. We do not backtrack |
| 74 | // to capture only "bc" in the lookbehind. |
| 75 | assert_equals("abcdbc".match(/(?<=([abc]+)).\1/), null); |
| 76 | }, "Do not backtrack into a lookbehind"); |
| 77 | |
| 78 | test(t => { |
| 79 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/greedy-loop.js |
| 80 | assert_array_equals("abbbbbbc".match(/(?<=(b+))c/), ["c", "bbbbbb"], "#1"); |
| 81 | assert_array_equals("ab1234c".match(/(?<=(b\d+))c/), ["c", "b1234"], "#2"); |
| 82 | assert_array_equals("ab12b23b34c".match(/(?<=((?:b\d{2})+))c/), ["c", "b12b23b34"], "#3"); |
| 83 | }, "Greedy loop"); |
| 84 | |
| 85 | test(t => { |
| 86 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/misc.js |
| 87 | assert_equals("abcdef".match(/(?<=$abc)def/), null, "#1"); |
| 88 | assert_equals("fno".match(/^f.o(?<=foo)$/), null, "#2"); |
| 89 | assert_equals("foo".match(/^foo(?<!foo)$/), null, "#3"); |
| 90 | assert_equals("foo".match(/^f.o(?<!foo)$/), null, "#4"); |
| 91 | assert_array_equals("foo".match(/^foo(?<=foo)$/), ["foo"], "#5"); |
| 92 | assert_array_equals("foo".match(/^f.o(?<=foo)$/), ["foo"], "#6"); |
| 93 | assert_array_equals("fno".match(/^f.o(?<!foo)$/), ["fno"], "#7"); |
| 94 | assert_array_equals("foooo".match(/^foooo(?<=fo+)$/), ["foooo"], "#8"); |
| 95 | assert_array_equals("foooo".match(/^foooo(?<=fo*)$/), ["foooo"], "#9"); |
| 96 | assert_array_equals(/(abc\1)/.exec("abc"), ["abc", "abc"], "#10"); |
| 97 | assert_array_equals(/(abc\1)/.exec("abc\u1234"), ["abc", "abc"], "#11"); |
| 98 | assert_array_equals(/(abc\1)/i.exec("abc"), ["abc", "abc"], "#12"); |
| 99 | assert_array_equals(/(abc\1)/i.exec("abc\u1234"), ["abc", "abc"], "#13"); |
| 100 | }, "Miscellaneous"); |
| 101 | |
| 102 | test(t => { |
| 103 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/mutual-recursive.js |
| 104 | assert_array_equals(/(?<=a(.\2)b(\1)).{4}/.exec("aabcacbc"), ["cacb", "a", ""], "#1"); |
| 105 | assert_array_equals(/(?<=a(\2)b(..\1))b/.exec("aacbacb"), ["b", "ac", "ac"], "#2"); |
| 106 | assert_array_equals(/(?<=(?:\1b)(aa))./.exec("aabaax"), ["x", "aa"], "#3"); |
| 107 | assert_array_equals(/(?<=(?:\1|b)(aa))./.exec("aaaax"), ["x", "aa"], "#4"); |
| 108 | }, "Mutual recursive capture/back references"); |
| 109 | |
| 110 | test(t => { |
| 111 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/negative.js |
| 112 | assert_array_equals("abcdef".match(/(?<!abc)\w\w\w/), ["abc"], "#1"); |
| 113 | assert_array_equals("abcdef".match(/(?<!a.c)\w\w\w/), ["abc"], "#2"); |
| 114 | assert_array_equals("abcdef".match(/(?<!a\wc)\w\w\w/), ["abc"], "#3"); |
| 115 | assert_array_equals("abcdef".match(/(?<!a[a-z])\w\w\w/), ["abc"], "#4"); |
| 116 | assert_array_equals("abcdef".match(/(?<!a[a-z]{2})\w\w\w/), ["abc"], "#5"); |
| 117 | assert_equals("abcdef".match(/(?<!abc)def/), null, "#6"); |
| 118 | assert_equals("abcdef".match(/(?<!a.c)def/), null, "#7"); |
| 119 | assert_equals("abcdef".match(/(?<!a\wc)def/), null, "#8"); |
| 120 | assert_equals("abcdef".match(/(?<!a[a-z][a-z])def/), null, "#9"); |
| 121 | assert_equals("abcdef".match(/(?<!a[a-z]{2})def/), null, "#10"); |
| 122 | assert_equals("abcdef".match(/(?<!a{1}b{1})cde/), null, "#11"); |
| 123 | assert_equals("abcdef".match(/(?<!a{1}[a-z]{2})def/), null, "#12"); |
| 124 | }, "Negative lookbehinds"); |
| 125 | |
| 126 | test(t => { |
| 127 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/nested-lookaround.js |
| 128 | assert_array_equals("abcdef".match(/(?<=ab(?=c)\wd)\w\w/), ["ef"], "#1"); |
| 129 | assert_array_equals("abcdef".match(/(?<=a(?=([^a]{2})d)\w{3})\w\w/), ["ef", "bc"], "#2"); |
| 130 | assert_array_equals("abcdef".match(/(?<=a(?=([bc]{2}(?<!a{2}))d)\w{3})\w\w/), ["ef", "bc"], "#3"); |
| 131 | assert_array_equals("faaao".match(/^faaao?(?<=^f[oa]+(?=o))/), ["faaa"], "#4"); |
| 132 | assert_equals("abcdef".match(/(?<=a(?=([bc]{2}(?<!a*))d)\w{3})\w\w/), null, "#5"); |
| 133 | }, "Nested lookaround"); |
| 134 | |
| 135 | test(t => { |
| 136 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/simple-fixed-length.js |
| 137 | assert_equals("b".match(/^.(?<=a)/), null, "#1"); |
| 138 | assert_equals("boo".match(/^f\w\w(?<=\woo)/), null, "#2"); |
| 139 | assert_equals("fao".match(/^f\w\w(?<=\woo)/), null, "#3"); |
| 140 | assert_equals("foa".match(/^f\w\w(?<=\woo)/), null, "#4"); |
| 141 | assert_array_equals("a".match(/^.(?<=a)/), ["a"], "#5"); |
| 142 | assert_array_equals("foo1".match(/^f..(?<=.oo)/), ["foo"], "#6"); |
| 143 | assert_array_equals("foo2".match(/^f\w\w(?<=\woo)/), ["foo"], "#7"); |
| 144 | assert_array_equals("abcdef".match(/(?<=abc)\w\w\w/), ["def"], "#8"); |
| 145 | assert_array_equals("abcdef".match(/(?<=a.c)\w\w\w/), ["def"], "#9"); |
| 146 | assert_array_equals("abcdef".match(/(?<=a\wc)\w\w\w/), ["def"], "#10"); |
| 147 | assert_array_equals("abcdef".match(/(?<=a[a-z])\w\w\w/), ["cde"], "#11"); |
| 148 | assert_array_equals("abcdef".match(/(?<=a[a-z][a-z])\w\w\w/), ["def"], "#12"); |
| 149 | assert_array_equals("abcdef".match(/(?<=a[a-z]{2})\w\w\w/), ["def"], "#13"); |
| 150 | assert_array_equals("abcdef".match(/(?<=a{1})\w\w\w/), ["bcd"], "#14"); |
| 151 | assert_array_equals("abcdef".match(/(?<=a{1}b{1})\w\w\w/), ["cde"], "#15"); |
| 152 | assert_array_equals("abcdef".match(/(?<=a{1}[a-z]{2})\w\w\w/), ["def"], "#16"); |
| 153 | }, "Simple fixed-length matches"); |
| 154 | |
| 155 | test(t => { |
| 156 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/sliced-strings.js |
| 157 | const oob_subject = "abcdefghijklmnabcdefghijklmn".slice(14); |
| 158 | assert_equals(oob_subject.match(/(?=(abcdefghijklmn))(?<=\1)a/i), null, ""); |
| 159 | assert_equals(oob_subject.match(/(?=(abcdefghijklmn))(?<=\1)a/), null, ""); |
| 160 | assert_equals("abcdefgabcdefg".slice(1).match(/(?=(abcdefg))(?<=\1)/), null, ""); |
| 161 | }, "Sliced strings"); |
| 162 | |
| 163 | test(t => { |
| 164 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/start-of-line.js |
| 165 | assert_equals("abcdef".match(/(?<=^[^a-c]{3})def/), null, "#1"); |
| 166 | assert_equals("foooo".match(/"^foooo(?<=^o+)$/), null, "#2"); |
| 167 | assert_equals("foooo".match(/"^foooo(?<=^o*)$/), null, "#3"); |
| 168 | assert_array_equals("abcdef".match(/(?<=^abc)def/), ["def"], "#4"); |
| 169 | assert_array_equals("abcdef".match(/(?<=^[a-c]{3})def/), ["def"], "#5"); |
| 170 | assert_array_equals("xyz\nabcdef".match(/(?<=^[a-c]{3})def/m), ["def"], "#6"); |
| 171 | assert_array_equals("ab\ncd\nefg".match(/(?<=^)\w+/gm), ["ab", "cd", "efg"], "#7"); |
| 172 | assert_array_equals("ab\ncd\nefg".match(/\w+(?<=$)/gm), ["ab", "cd", "efg"], "#8"); |
| 173 | assert_array_equals("ab\ncd\nefg".match(/(?<=^)\w+(?<=$)/gm), ["ab", "cd", "efg"], "#9"); |
| 174 | assert_array_equals("foo".match(/^foo(?<=^fo+)$/), ["foo"], "#10"); |
| 175 | assert_array_equals("foooo".match(/^foooo(?<=^fo*)/), ["foooo"], "#11"); |
| 176 | assert_array_equals("foo".match(/^(f)oo(?<=^\1o+)$/), ["foo", "f"], "#12"); |
| 177 | assert_array_equals("foo".match(/^(f)oo(?<=^\1o+)$/i), ["foo", "f"], "#13"); |
| 178 | assert_array_equals("foo\u1234".match(/^(f)oo(?<=^\1o+).$/i), ["foo\u1234", "f"], "#14"); |
| 179 | assert_array_equals("abcdefdef".match(/(?<=^\w+)def/), ["def"], "#15"); |
| 180 | assert_array_equals("abcdefdef".match(/(?<=^\w+)def/g), ["def", "def"], "#16"); |
| 181 | }, "Start of line matches"); |
| 182 | |
| 183 | test(t => { |
| 184 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/sticky.js |
| 185 | const re1 = /(?<=^(\w+))def/g; |
| 186 | assert_array_equals(re1.exec("abcdefdef"), ["def", "abc"], "#1"); |
| 187 | assert_array_equals(re1.exec("abcdefdef"), ["def", "abcdef"], "#2"); |
| 188 | const re2 = /\Bdef/g; |
| 189 | assert_array_equals(re2.exec("abcdefdef"), ["def"], "#3"); |
| 190 | assert_array_equals(re2.exec("abcdefdef"), ["def"], "#4"); |
| 191 | }, "Sticky matches"); |
| 192 | |
| 193 | test(t => { |
| 194 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/variable-length.js |
| 195 | assert_array_equals("abcdef".match(/(?<=[a|b|c]*)[^a|b|c]{3}/), ["def"], "#1"); |
| 196 | assert_array_equals("abcdef".match(/(?<=\w*)[^a|b|c]{3}/), ["def"], "#2"); |
| 197 | }, "Variable-length matches"); |
| 198 | |
| 199 | test(t => { |
| 200 | // https://github.com/tc39/test262/blob/main/test/built-ins/RegExp/lookBehind/word-boundary.js |
| 201 | assert_array_equals("abc def".match(/(?<=\b)[d-f]{3}/), ["def"], "#1"); |
| 202 | assert_array_equals("ab cdef".match(/(?<=\B)\w{3}/), ["def"], "#2"); |
| 203 | assert_array_equals("ab cdef".match(/(?<=\B)(?<=c(?<=\w))\w{3}/), ["def"], "#3"); |
| 204 | assert_equals("abcdef".match(/(?<=\b)[d-f]{3}/), null, "#4"); |
| 205 | }, "Word boundary matches"); |
| 206 | </script> |