Skip to content

Commit 9066ab5

Browse files
Merge pull request wangzheng0822#191 from KPatr1ck/kmp
KMP implementation in python
2 parents 76aec62 + b0feaf8 commit 9066ab5

File tree

1 file changed

+83
-0
lines changed

1 file changed

+83
-0
lines changed

python/34_kmp/kmp_.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/usr/bin/python
2+
# -*- coding: UTF-8 -*-
3+
4+
5+
def kmp(main, pattern):
6+
"""
7+
kmp字符串匹配
8+
:param main:
9+
:param pattern:
10+
:return:
11+
"""
12+
assert type(main) is str and type(pattern) is str
13+
14+
n, m = len(main), len(pattern)
15+
16+
if m == 0:
17+
return 0
18+
if n <= m:
19+
return 0 if main == pattern else -1
20+
21+
# 求解next数组
22+
next = get_next(pattern)
23+
24+
j = 0
25+
for i in range(n):
26+
# 在pattern[:j]中,从长到短递归去找最长的和后缀子串匹配的前缀子串
27+
while j > 0 and main[i] != pattern[j]:
28+
j = next[j-1] + 1 # 如果next[j-1] = -1,则要从起始字符取匹配
29+
30+
if main[i] == pattern[j]:
31+
if j == m-1:
32+
return i-m+1
33+
else:
34+
j += 1
35+
return -1
36+
37+
38+
def get_next(pattern):
39+
"""
40+
next数组生成
41+
42+
注意:
43+
理解的难点在于next[i]根据next[0], next[1]…… next[i-1]的求解
44+
next[i]的值依赖于前面的next数组的值,求解思路:
45+
1. 首先取出前一个最长的匹配的前缀子串,其下标就是next[i-1]
46+
2. 对比下一个字符,如果匹配,直接赋值next[i]为next[i-1]+1,因为i-1的时候已经是最长
47+
*3. 如果不匹配,需要递归去找次长的匹配的前缀子串,这里难理解的就是递归地方式,next[i-1]
48+
是i-1的最长匹配前缀子串的下标结尾,则 *next[next[i-1]]* 是其次长匹配前缀子串的下标
49+
结尾
50+
*4. 递归的出口,就是在次长前缀子串的下一个字符和当前匹配 或 遇到-1,遇到-1则说明没找到任
51+
何匹配的前缀子串,这时需要找pattern的第一个字符对比
52+
53+
ps: next[m-1]的数值其实没有任何意义,求解时可以不理。网上也有将next数组往右平移的做法。
54+
:param pattern:
55+
:return:
56+
"""
57+
m = len(pattern)
58+
next = [-1] * m
59+
60+
next[0] = -1
61+
62+
# for i in range(1, m):
63+
for i in range(1, m-1):
64+
j = next[i-1] # 取i-1时匹配到的最长前缀子串
65+
while j != -1 and pattern[j+1] != pattern[i]:
66+
j = next[j] # 次长的前缀子串的下标,即是next[next[i-1]]
67+
68+
# 根据上面跳出while的条件,当j=-1时,需要比较pattern[0]和当前字符
69+
# 如果j!=-1,则pattern[j+1]和pattern[i]一定是相等的
70+
if pattern[j+1] == pattern[i]: # 如果接下来的字符也是匹配的,那i的最长前缀子串下标是next[i-1]+1
71+
j += 1
72+
next[i] = j
73+
74+
return next
75+
76+
77+
if __name__ == '__main__':
78+
m_str = "aabbbbaaabbababbabbbabaaabb"
79+
p_str = "abbabbbabaa"
80+
81+
print('--- search ---')
82+
print('[Built-in Functions] result:', m_str.find(p_str))
83+
print('[kmp] result:', kmp(m_str, p_str))

0 commit comments

Comments
 (0)