@@ -1970,9 +1970,11 @@ void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
19701970 }
19711971}
19721972
1973+ namespace {
1974+
19731975// Check for [0-9A-Z_a-z].
1974- static void EmitWordCheck (RegExpMacroAssembler* assembler, Label* word,
1975- Label* non_word, bool fall_through_on_word) {
1976+ void EmitWordCheck (RegExpMacroAssembler* assembler, Label* word,
1977+ Label* non_word, bool fall_through_on_word) {
19761978 if (assembler->CheckSpecialCharacterClass (
19771979 fall_through_on_word ? ' w' : ' W' ,
19781980 fall_through_on_word ? non_word : word)) {
@@ -1994,24 +1996,37 @@ static void EmitWordCheck(RegExpMacroAssembler* assembler, Label* word,
19941996
19951997// Emit the code to check for a ^ in multiline mode (1-character lookbehind
19961998// that matches newline or the start of input).
1997- static void EmitHat (RegExpCompiler* compiler, RegExpNode* on_success,
1998- Trace* trace) {
1999+ void EmitHat (RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) {
19992000 RegExpMacroAssembler* assembler = compiler->macro_assembler ();
2000- // We will be loading the previous character into the current character
2001- // register.
2001+
2002+ // We will load the previous character into the current character register.
20022003 Trace new_trace (*trace);
20032004 new_trace.InvalidateCurrentCharacter ();
20042005
2006+ // A positive (> 0) cp_offset means we've already successfully matched a
2007+ // non-empty-width part of the pattern, and thus cannot be at or before the
2008+ // start of the subject string. We can thus skip both at-start and
2009+ // bounds-checks when loading the one-character lookbehind.
2010+ const bool may_be_at_or_before_subject_string_start =
2011+ new_trace.cp_offset () <= 0 ;
2012+
20052013 Label ok;
2006- if (new_trace.cp_offset () == 0 ) {
2007- // The start of input counts as a newline in this context, so skip to
2008- // ok if we are at the start.
2009- assembler->CheckAtStart (&ok);
2014+ if (may_be_at_or_before_subject_string_start) {
2015+ // The start of input counts as a newline in this context, so skip to ok if
2016+ // we are at the start.
2017+ // TODO(jgruber): It would be less awkward to use CheckAtStart here, but
2018+ // that currently does not support a non-zero cp_offset.
2019+ Label not_at_start;
2020+ assembler->CheckNotAtStart (new_trace.cp_offset (), ¬_at_start);
2021+ assembler->GoTo (&ok);
2022+ assembler->Bind (¬_at_start);
20102023 }
2011- // We already checked that we are not at the start of input so it must be
2012- // OK to load the previous character.
2024+
2025+ // If we've already checked that we are not at the start of input, it's okay
2026+ // to load the previous character without bounds checks.
2027+ const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
20132028 assembler->LoadCurrentCharacter (new_trace.cp_offset () - 1 ,
2014- new_trace.backtrack (), false );
2029+ new_trace.backtrack (), can_skip_bounds_check );
20152030 if (!assembler->CheckSpecialCharacterClass (' n' , new_trace.backtrack ())) {
20162031 // Newline means \n, \r, 0x2028 or 0x2029.
20172032 if (!compiler->one_byte ()) {
@@ -2024,6 +2039,8 @@ static void EmitHat(RegExpCompiler* compiler, RegExpNode* on_success,
20242039 on_success->Emit (compiler, &new_trace);
20252040}
20262041
2042+ } // namespace
2043+
20272044// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
20282045void AssertionNode::EmitBoundaryCheck (RegExpCompiler* compiler, Trace* trace) {
20292046 RegExpMacroAssembler* assembler = compiler->macro_assembler ();
@@ -2080,21 +2097,35 @@ void AssertionNode::BacktrackIfPrevious(
20802097 Trace new_trace (*trace);
20812098 new_trace.InvalidateCurrentCharacter ();
20822099
2083- Label fall_through, dummy;
2084-
2100+ Label fall_through;
20852101 Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack ()
20862102 : &fall_through;
20872103 Label* word = backtrack_if_previous == kIsNonWord ? &fall_through
20882104 : new_trace.backtrack ();
20892105
2090- if (new_trace.cp_offset () == 0 ) {
2106+ // A positive (> 0) cp_offset means we've already successfully matched a
2107+ // non-empty-width part of the pattern, and thus cannot be at or before the
2108+ // start of the subject string. We can thus skip both at-start and
2109+ // bounds-checks when loading the one-character lookbehind.
2110+ const bool may_be_at_or_before_subject_string_start =
2111+ new_trace.cp_offset () <= 0 ;
2112+
2113+ if (may_be_at_or_before_subject_string_start) {
20912114 // The start of input counts as a non-word character, so the question is
20922115 // decided if we are at the start.
2093- assembler->CheckAtStart (non_word);
2094- }
2095- // We already checked that we are not at the start of input so it must be
2096- // OK to load the previous character.
2097- assembler->LoadCurrentCharacter (new_trace.cp_offset () - 1 , &dummy, false );
2116+ // TODO(jgruber): It would be less awkward to use CheckAtStart here, but
2117+ // that currently does not support a non-zero cp_offset.
2118+ Label not_at_start;
2119+ assembler->CheckNotAtStart (new_trace.cp_offset (), ¬_at_start);
2120+ assembler->GoTo (non_word);
2121+ assembler->Bind (¬_at_start);
2122+ }
2123+
2124+ // If we've already checked that we are not at the start of input, it's okay
2125+ // to load the previous character without bounds checks.
2126+ const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
2127+ assembler->LoadCurrentCharacter (new_trace.cp_offset () - 1 , non_word,
2128+ can_skip_bounds_check);
20982129 EmitWordCheck (assembler, word, non_word, backtrack_if_previous == kIsNonWord );
20992130
21002131 assembler->Bind (&fall_through);
0 commit comments