@@ -641,6 +641,8 @@ unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
641641 }
642642 StrSearcherImpl :: TwoWay ( ref mut searcher) => {
643643 let is_long = searcher. memory == usize:: MAX ;
644+ // write out `true` and `false` cases to encourage the compiler
645+ // to specialize the two cases separately.
644646 if is_long {
645647 searcher. next :: < MatchOnly > ( self . haystack . as_bytes ( ) ,
646648 self . needle . as_bytes ( ) ,
@@ -653,8 +655,8 @@ unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
653655 }
654656 }
655657 }
656-
657658}
659+
658660unsafe impl < ' a , ' b > ReverseSearcher < ' a > for StrSearcher < ' a , ' b > {
659661 #[ inline]
660662 fn next_back ( & mut self ) -> SearchStep {
@@ -709,6 +711,7 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
709711 }
710712 StrSearcherImpl :: TwoWay ( ref mut searcher) => {
711713 let is_long = searcher. memory == usize:: MAX ;
714+ // write out `true` and `false`, like `next_match`
712715 if is_long {
713716 searcher. next_back :: < MatchOnly > ( self . haystack . as_bytes ( ) ,
714717 self . needle . as_bytes ( ) ,
@@ -723,8 +726,7 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
723726 }
724727}
725728
726- /// The internal state of an iterator that searches for matches of a substring
727- /// within a larger string using two-way search
729+ /// The internal state of the two-way substring search algorithm.
728730#[ derive( Clone , Debug ) ]
729731struct TwoWaySearcher {
730732 // constants
@@ -741,7 +743,9 @@ struct TwoWaySearcher {
741743 // variables
742744 position : usize ,
743745 end : usize ,
746+ /// index into needle before which we have already matched
744747 memory : usize ,
748+ /// index into needle after which we have already matched
745749 memory_back : usize ,
746750}
747751
@@ -841,9 +845,6 @@ impl TwoWaySearcher {
841845 // is large.
842846 if & needle[ ..crit_pos] == & needle[ period.. period + crit_pos] {
843847 // short period case -- the period is exact
844- let byteset = needle[ ..period] . iter ( )
845- . fold ( 0 , |a, & b| ( 1 << ( b & 0x3f ) ) | a) ;
846-
847848 // compute a separate critical factorization for the reversed needle
848849 // x = u' v' where |v'| < period(x).
849850 //
@@ -860,26 +861,26 @@ impl TwoWaySearcher {
860861 crit_pos : crit_pos,
861862 crit_pos_back : crit_pos_back,
862863 period : period,
863- byteset : byteset ,
864+ byteset : Self :: byteset_create ( & needle [ ..period ] ) ,
864865
865866 position : 0 ,
866867 end : end,
867868 memory : 0 ,
868- // memory_back after which we have already matched
869869 memory_back : needle. len ( ) ,
870870 }
871871 } else {
872872 // long period case -- we have an approximation to the actual period,
873873 // and don't use memorization.
874-
875- let byteset = needle. iter ( )
876- . fold ( 0 , |a, & b| ( 1 << ( b & 0x3f ) ) | a) ;
874+ //
875+ // Approximate the period by lower bound max(|u|, |v|) + 1.
876+ // The critical factorization is efficient to use for both forward and
877+ // reverse search.
877878
878879 TwoWaySearcher {
879880 crit_pos : crit_pos,
880881 crit_pos_back : crit_pos,
881882 period : cmp:: max ( crit_pos, needle. len ( ) - crit_pos) + 1 ,
882- byteset : byteset ,
883+ byteset : Self :: byteset_create ( needle ) ,
883884
884885 position : 0 ,
885886 end : end,
@@ -889,6 +890,11 @@ impl TwoWaySearcher {
889890 }
890891 }
891892
893+ #[ inline]
894+ fn byteset_create ( bytes : & [ u8 ] ) -> u64 {
895+ bytes. iter ( ) . fold ( 0 , |a, & b| ( 1 << ( b & 0x3f ) ) | a)
896+ }
897+
892898 #[ inline( always) ]
893899 fn byteset_contains ( & self , byte : u8 ) -> bool {
894900 ( self . byteset >> ( ( byte & 0x3f ) as usize ) ) & 1 != 0
@@ -976,9 +982,9 @@ impl TwoWaySearcher {
976982 // and local_period(u, v) = local_period(reverse(v), reverse(u)), so if (u, v)
977983 // is a critical factorization, so is (reverse(v), reverse(u)).
978984 //
979- // For the short period case, using memorization, we rely on |u| < period(x).
980- // For this case we have computed a critical factorization x = u' v'
981- // where |v'| < period(x) instead (field `crit_pos_back`) .
985+ // For the reverse case we have computed a critical factorization x = u' v'
986+ // (field `crit_pos_back`). We need |u| < period(x) for the forward case and
987+ // thus |v'| < period(x) for the reverse .
982988 //
983989 // To search in reverse through the haystack, we search forward through
984990 // a reversed haystack with a reversed needle, matching first u' and then v'.
@@ -1018,7 +1024,7 @@ impl TwoWaySearcher {
10181024
10191025 // See if the left part of the needle matches
10201026 let crit = if long_period { self . crit_pos_back }
1021- else { cmp:: min ( self . crit_pos_back , self . memory_back ) } ;
1027+ else { cmp:: min ( self . crit_pos_back , self . memory_back ) } ;
10221028 for i in ( 0 ..crit) . rev ( ) {
10231029 if needle[ i] != haystack[ self . end - needle. len ( ) + i] {
10241030 self . end -= self . crit_pos_back - i;
@@ -1031,7 +1037,7 @@ impl TwoWaySearcher {
10311037
10321038 // See if the right part of the needle matches
10331039 let needle_end = if long_period { needle. len ( ) }
1034- else { self . memory_back } ;
1040+ else { self . memory_back } ;
10351041 for i in self . crit_pos_back ..needle_end {
10361042 if needle[ i] != haystack[ self . end - needle. len ( ) + i] {
10371043 self . end -= self . period ;
@@ -1070,7 +1076,8 @@ impl TwoWaySearcher {
10701076 fn maximal_suffix ( arr : & [ u8 ] , order_greater : bool ) -> ( usize , usize ) {
10711077 let mut left = 0 ; // Corresponds to i in the paper
10721078 let mut right = 1 ; // Corresponds to j in the paper
1073- let mut offset = 0 ; // Corresponds to k in the paper
1079+ let mut offset = 0 ; // Corresponds to k in the paper, but starting at 0
1080+ // to match 0-based indexing.
10741081 let mut period = 1 ; // Corresponds to p in the paper
10751082
10761083 while let Some ( & a) = arr. get ( right + offset) {
@@ -1117,7 +1124,8 @@ impl TwoWaySearcher {
11171124 {
11181125 let mut left = 0 ; // Corresponds to i in the paper
11191126 let mut right = 1 ; // Corresponds to j in the paper
1120- let mut offset = 0 ; // Corresponds to k in the paper
1127+ let mut offset = 0 ; // Corresponds to k in the paper, but starting at 0
1128+ // to match 0-based indexing.
11211129 let mut period = 1 ; // Corresponds to p in the paper
11221130 let n = arr. len ( ) ;
11231131
0 commit comments