1+ """
2+ Boyer-Moore string-search algorithm.
3+
4+ Author: Wenru Dong
5+ """
6+
7+ from typing import List , Tuple
8+
9+ SIZE = 256
10+
11+ def _generate_bad_character_table (pattern : str ) -> List [int ]:
12+ bc = [- 1 ] * SIZE
13+ for i , char in enumerate (pattern ):
14+ bc [ord (char )] = i
15+ return bc
16+
17+
18+ def _generate_good_suffix_table (pattern : str ) -> Tuple [List [bool ], List [int ]]:
19+ m = len (pattern )
20+ # prefix[k] records whether the last k-character suffix of pattern
21+ # can match with the first k-character prefix of pattern.
22+ # suffix[k] records the starting index of the last substring of
23+ # pattern that can match with the last k-character suffix of pattern.
24+ prefix , suffix = [False ] * m , [- 1 ] * m
25+ # For each substring patter[:i+1], we find the common suffix with
26+ # pattern, and the starting index of this common suffix.
27+ # This way we can re-write previous suffix[k] to record the index
28+ # as large as possible, hence the last substring.
29+ for i in range (m - 1 ):
30+ j = i # starting index of the common suffix
31+ k = 0 # length of the common suffix
32+ while j >= 0 and pattern [j ] == pattern [~ k ]:
33+ j -= 1
34+ k += 1
35+ suffix [k ] = j + 1
36+ if j == - 1 : prefix [k ] = True
37+ return (prefix , suffix )
38+
39+
40+ def _move_by_good_suffix (bad_character_index : int , suffix : List [int ], prefix : List [bool ]) -> int :
41+ k = len (suffix ) - 1 - bad_character_index
42+ if suffix [k ] != - 1 : return bad_character_index - suffix [k ] + 1
43+ # Test from k - 1
44+ for r , can_match_prefix in enumerate (reversed (prefix [:k ]), bad_character_index + 2 ):
45+ if can_match_prefix : return r
46+ return len (suffix )
47+
48+
49+ def bm (s : str , pattern : str ) -> int :
50+ bc = _generate_bad_character_table (pattern )
51+ prefix , suffix = _generate_good_suffix_table (pattern )
52+ n , m = len (s ), len (pattern )
53+ i = 0
54+ while i <= n - m :
55+ j = m - 1 # bad character index in pattern
56+ while j >= 0 :
57+ if s [i + j ] != pattern [j ]: break
58+ j -= 1
59+ if j < 0 : return i
60+
61+ x = j - bc [ord (s [i + j ])]
62+ y = 0
63+ if j < m - 1 :
64+ y = _move_by_good_suffix (j , suffix , prefix )
65+ i += max (x , y )
66+ return - 1
67+
68+
69+ if __name__ == "__main__" :
70+
71+ s = "Here is a simple example"
72+ pattern = "example"
73+ print (bm (s , pattern ))
74+
75+ s = "abcdcccdc"
76+ pattern = "cccd"
77+ print (s .find (pattern ) == bm (s , pattern ))
0 commit comments