- 思路
- example
- haystack = 'abcabcabd', size = n
- needle = 'abcabd', size = m, 模式(基准)串
- output: 3
- 暴力法:
-
(haystack)
-
(needle: 失败1)
-
(needle: 失败2)
-
(needle: 失败3)
-
(needle: 成功)
class Solution:
def strStr(self, haystack: str, needle: str) -> int:
n, m = len(haystack), len(needle)
for i in range(n-m+1):
if haystack[i:i+m] == needle:
return i
return -1
- KMP
-
双指针. i: haystack index; cur: needle (模式串)index
- 暴力法中的失败1和失败2尝试可以跳过。以失败1为例:
- abcabcabd (haystack)
-
abcabd (虽然模式串中第一个字符a就匹配失败,但从另一个角度模式串黑体字符前面的位置与haystack是不匹配的)
-
abcab (失败1发生时前面的子串, 成功的匹配需要needle中的字符去match haystack中的当前字符c -- 假设指标为)
- 考虑模式串(因为失败前面的位置两个串有相同部分),利用对称性 (前缀 = 后缀 的最大长度)
- 前缀: x, ..., i (以ith 结尾,x > 0) 对应的子串
- 后缀:0, ..., y (以0th开头, y < i) 对应的子串
- 上面的例子,abcab,最长“公共”前后缀长度 = 2 (模式串中下一个待匹配位置为2)
- 从而马上进入到(needle: 成功)的情况。
- 关键:建立模式串的next 数组(模式串的最长公共前后缀长度数组). 这样当模式串的ith位置与haystack[z]匹配失败的时候,调用next[i-1]可得到模式串中准备与haystack[z]比较的位置。(必然有next[i-1] < i) (回退)。
- abcabd 模式串
- 000120 next (回退)数组
-
next数组 暴力版,
def getNext(s): # s: 模式串
nxt = [0]*len(s)
for i in range(1, len(s)):
for k in range(i, 0, -1):
if s[:k] == s[i-k+1:i+1]:
break
nxt[i] = k
return nxt
- 优化版next数组见下面代码。
- 思想与strStr主体函数一样,但是在getNext中是模式子串自己与自己的匹配!
class Solution:
def strStr(self, haystack: str, needle: str) -> int:
def getNext(s):
nxt = [0]*len(s)
cur, i = 0, 1
while i < len(s):
if s[cur] == s[i]:
nxt[i] = cur + 1
cur += 1
i += 1
elif cur != 0:
cur = nxt[cur-1]
# i remain unchanged
else: # cur = 0
nxt[i] = 0
i += 1
# cur remain unchanged, = 0
return nxt
n, m = len(haystack), len(needle)
nxt = getNext(needle)
i, cur = 0, 0 # i: haystack中待比较位置,cur: needle中待比较位置
while i < n:
if haystack[i] == needle[cur]:
i += 1
cur += 1
elif cur != 0:
cur = nxt[cur-1]
# i remain unchanged
else: # cur = 0
i += 1
# cur remain unchanged
if cur == m:
return i - m
return -1
class Solution:
def strStr(self, haystack: str, needle: str) -> int:
def getNext(needle):
m = len(needle)
nxt = [0 for _ in range(m)]
j, i = 0, 1
while i < m:
if needle[i] == needle[j]:
nxt[i] = j + 1
i += 1
j += 1
else:
if j > 0:
j = nxt[j-1]
else:
i += 1
j = 0
return nxt
n, m = len(haystack), len(needle)
j = 0 # index in needle
i = 0 # index in haystack
nxt = getNext(needle)
while i < n:
if haystack[i] == needle[j]:
i += 1
j += 1
if j == m:
return i - m
else:
if j > 0:
j = nxt[j-1]
else:
i += 1
j = 0
return -1
class Solution:
def strStr(self, haystack: str, needle: str) -> int:
def compute_next(s):
n = len(s)
nxt = [0 for _ in range(n)]
i = 1
j = 0
while i < n:
if s[i] == s[j]:
nxt[i] = j + 1
i += 1
j += 1
else:
if j > 0:
j = nxt[j-1]
else:
nxt[i] = 0
i += 1
return nxt
m, n = len(haystack), len(needle)
nxt = compute_next(needle)
i, j = 0, 0
while i < m:
if haystack[i] == needle[j]:
i += 1
j += 1
else:
if j > 0:
j = nxt[j-1]
else:
i += 1
j = 0
if j == n:
return i - n
return -1
- 思路
- example
- 暴力法: 穷举子串的长度
- s[j] vs s[j-i] for j in range(i, n)
- 复杂度. 时间:, 空间:
class Solution:
def repeatedSubstringPattern(self, s: str) -> bool:
n = len(s)
for i in range(1, n//2+1): # i: length of substring
if n % i == 0:
flag = False
for j in range(i, n):
if s[j] != s[j-i]:
flag = True
break
if flag == False:
return True
return False
class Solution:
def repeatedSubstringPattern(self, s: str) -> bool:
n = len(s)
for k in range(1, n//2+1):
if n % k != 0:
continue
i = k
while i < n:
if s[i-k:i] != s[i:i+k]:
break
i += k
if i == n:
return True
return False
- 利用KMP
- 假设s = s's', 那到s + s = s's's's', 去掉s+s的第一个和最后一个字符。
- 假设s' = ab, 那到s+s = abababab,
- 去掉s+s的第一个和最后一个字符: t = bababa, 我们可以在t中找到一个s子串。
- 下面的版本还可以接着优化(TBA).
class Solution:
def repeatedSubstringPattern(self, s: str) -> bool:
def getNext(s): # pattern string
nxt = [0]*len(s)
cur = 0 #
i = 1 #
while i < len(s):
if s[i] == s[cur]:
nxt[i] = cur + 1
i += 1
cur += 1
elif cur != 0:
cur = nxt[cur-1]
else:
nxt[i] = 0
i += 1
return nxt
#
nxt = getNext(s)
ss = s + s
ss = ss[1:len(ss)-1]
n, m = len(ss), len(s)
i, cur = 0, 0 # i: ss index; cur: s index
while i < n:
if ss[i] == s[cur]:
i += 1
cur += 1
elif cur != 0:
cur = nxt[cur-1]
else:
i += 1
if cur == m:
return True
return False
class Solution:
def repeatedSubstringPattern(self, s: str) -> bool:
def getNext(s):
n = len(s)
nxt = [0 for _ in range(n)]
j, i = 0, 1
while i < n:
if s[i] == s[j]:
nxt[i] = j + 1
i += 1
j += 1
else:
if j > 0:
j = nxt[j-1]
else:
i += 1
j = 0
return nxt
ss = s + s
ss = ss[1:len(ss)-1]
n, m = len(ss), len(s)
j, i = 0, 0
nxt = getNext(s)
while i < n:
if ss[i] == s[j]:
i += 1
j += 1
if j == m:
return True
else:
if j > 0:
j = nxt[j-1]
else:
i += 1
j = 0
return False