Skip to content

Commit c7375ba

Browse files
committed
Create KMP.py
1 parent 8bc4a95 commit c7375ba

File tree

1 file changed

+89
-0
lines changed

1 file changed

+89
-0
lines changed

TextProcessingChpt/KMP.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
2+
3+
def find_KMP(T, P):
4+
"""Return the lowest index of T at which substring P begins else -1"""
5+
6+
n = len(T)
7+
m = len(P)
8+
9+
if m == 0 :
10+
return 0
11+
12+
fail = compute_kmp_fail(P) # utility method to precompute
13+
14+
j=0
15+
k=0
16+
17+
while j < n:
18+
if T[j] == P[k]:
19+
if k == m-1: # P[0: 1+k] matched thus far
20+
return j - m+1 # match is complete return first index where P starts in T
21+
22+
j+=1
23+
k+=1
24+
elif k > 0:
25+
k = fail[k-1] # reuse suffix of P[0:k]
26+
else:
27+
j+=1
28+
29+
return -1 #reached end without match
30+
31+
"""
32+
a t c a m a l g a m a m a l g a m a t i o n
33+
a |m a l g a m a t i o n
34+
a |m a l g a m a t i o n
35+
a m a l g a m a|t i o n
36+
a m a|l g a m a t i o n
37+
a m a l g a m a t i o n
38+
"""
39+
40+
def compute_kmp_fail(P):
41+
"""Utility that computes and returns KMP fail list"""
42+
43+
m= len(P)
44+
45+
fail = [0] * m # by default presum overlap of 0 everywhere
46+
j=1
47+
k=0
48+
49+
while j < m: # compute f(j) during this pass is nonzero
50+
if P[j] == P[k]: # k+1 character match thus far
51+
fail[j] = k+1
52+
j+=1
53+
k+=1
54+
elif k >0:
55+
k = fail[k-1] # k follows a matching prefix
56+
else: # no match foung starting at j
57+
j+=1
58+
59+
return fail
60+
61+
62+
"""
63+
a m a l g a m a t i o n
64+
0 1 2 3 4 5 6 7 8 9 10 11
65+
66+
m == a
67+
p[1] == p[0] - false j+=1 = j= 2 - fail -> [0,0,0,0,0,0,0,0,0,0,0,0]
68+
j=2
69+
k=0
70+
71+
a == a
72+
p[2] == P[0] - true fail[2] = 1 - fail -> [0,0,1,0,0,0,0,0,0,0,0,0]
73+
j+=1 - j=3
74+
k+=1 = k=1
75+
76+
l == m
77+
p[3] == k[1] - false k = fail[k-1] - k = fail[1-1] - [0,0,1,0,0,0,0,0,0,0,0,0]
78+
j=3
79+
k=0
80+
81+
.
82+
.
83+
.
84+
.
85+
86+
fail = [0,0,1,0,0,1,2,3,0,0,0,0]
87+
88+
89+
"""

0 commit comments

Comments
 (0)