algorithm-base/animation-simulation/数据结构和算法/BM.md

307 lines
15 KiB
Java
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

## BM(Boyer-Moore)
> **[tan45du_one](https://raw.githubusercontent.com/tan45du/tan45du.github.io/master/个人微信.15egrcgqd94w.jpg)** ,备注 github + 题目 + 问题 向我反馈
>
>
>
> <u>[****](https://raw.githubusercontent.com/tan45du/test/master/微信图片_20210320152235.2pthdebvh1c0.png)</u> 两个平台同步,想要和题友一起刷题,互相监督的同学,可以在我的小屋点击<u>[**刷题小队**](https://raw.githubusercontent.com/tan45du/test/master/微信图片_20210320152235.2pthdebvh1c0.png)</u>进入。
BF BF
![](https://img-blog.csdnimg.cn/20210401200433751.png)
BF abcdex abcde x
![BM2](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/BM2.141fhslg6vek.png)
![](https://img-blog.csdnimg.cn/20210401200635476.png)
###
BF BM
![BM4](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/BM4.2mayfaccj3i0.png)
BM ******** f , T f f
![BM5](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/BM5.31j3sja7vsq0.png)
![](https://img-blog.csdnimg.cn/20210401200838199.png)
f , f, T , f
![](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/坏字符移动.kl5k3nnzkcg.png)
d d
![2](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/换字符对其2.4xdb38am9e60.png)
![](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/坏字符原则.781vhv3vm280.png)
![](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/两个坏字符.1a6hcs8ildkw.png)
****
![](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/坏字符匹配不按规则.1y45278xg1vk.png)
****** babac** ********
![](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/坏字符移动规则.48oh1msdypy0.png)
![bug](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/换字符bug.24av6jslzh40.png)
###
BM
![](https://img-blog.csdnimg.cn/20210401201215799.png)
使
BM cac cac ****
绿
![](https://img-blog.csdnimg.cn/20210401201254453.png)
![](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/好后缀中间.7b6m6ki25l00.png)
![](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/比较.4m9ci1x1c1e0.png)
****
![](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/不完全重合.6oayqd0dre00.png)
![](https://img-blog.csdnimg.cn/20210319204004219.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzMzODg1OTI0,size_16,color_FFFFFF,t_70#pic_center)
![](https://img-blog.csdnimg.cn/202103191939263.gif)
1.********
2.****
3.
> Boyer R SMoore J S. A fast string searching algorithmJ. Communications of the ACM197710 762-772.
** 0 **
![](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/五好后缀.6wvqxa4um040.png)
AC KMP
Java Code:
```java
class Solution {
public int strStr(String haystack, String needle) {
char[] hay = haystack.toCharArray();
char[] need = needle.toCharArray();
int haylen = haystack.length();
int needlen = need.length;
return bm(hay,haylen,need,needlen);
}
//用来求坏字符情况下移动位数
private static void badChar(char[] b, int m, int[] bc) {
//初始化
for (int i = 0; i < 256; ++i) {
bc[i] = -1;
}
//m 代表模式串的长度,如果有两个 a,则后面那个会覆盖前面那个
for (int i = 0; i < m; ++i) {
int ascii = (int)b[i];
bc[ascii] = i;//下标
}
}
//用来求好后缀条件下的移动位数
private static void goodSuffix (char[] b, int m, int[] suffix,boolean[] prefix) {
//初始化
for (int i = 0; i < m; ++i) {
suffix[i] = -1;
prefix[i] = false;
}
for (int i = 0; i < m - 1; ++i) {
int j = i;
int k = 0;
while (j >= 0 && b[j] == b[m-1-k]) {
--j;
++k;
suffix[k] = j + 1;
}
if (j == -1) prefix[k] = true;
}
}
public static int bm (char[] a, int n, char[] b, int m) {
int[] bc = new int[256];//创建一个数组用来保存最右边字符的下标
badChar(b,m,bc);
//用来保存各种长度好后缀的最右位置的数组
int[] suffix_index = new int[m];
//判断是否是头部如果是头部则true
boolean[] ispre = new boolean[m];
goodSuffix(b,m,suffix_index,ispre);
int i = 0;//第一个匹配字符
//注意结束条件
while (i <= n-m) {
int j;
//从后往前匹配,匹配失败,找到坏字符
for (j = m - 1; j >= 0; --j) {
if (a[i+j] != b[j]) break;
}
//模式串遍历完毕,匹配成功
if (j < 0) {
return i;
}
//下面为匹配失败时,如何处理
//求出坏字符规则下移动的位数,就是我们坏字符下标减最右边的下标
int x = j - bc[(int)a[i+j]];
int y = 0;
//好后缀情况,求出好后缀情况下的移动位数,如果不含有好后缀的话,则按照坏字符来
if (y < m-1 && m - 1 - j > 0) {
y = move(j, m, suffix_index,ispre);
}
//移动
i = i + Math.max(x,y);
}
return -1;
}
// j代表坏字符的下标
private static int move (int j, int m, int[] suffix_index, boolean[] ispre) {
//好后缀长度
int k = m - 1 - j;
//如果含有长度为 k 的好后缀,返回移动位数,
if (suffix_index[k] != -1) return j - suffix_index[k] + 1;
//找头部为好后缀子串的最大长度,从长度最大的子串开始
for (int r = j + 2; r <= m-1; ++r) {
//如果是头部
if (ispre[m-r] == true) {
return r;
}
}
//如果没有发现好后缀匹配的串,或者头部为好后缀子串,则移动到 m 位,也就是匹配串的长度
return m;
}
}
```
Python Code:
```python
from typing import List
class Solution:
def strStr(self, haystack: str, needle: str)->int:
haylen = len(haystack)
needlen = len(needle)
return self.bm(haystack, haylen, needle, needlen)
#
def badChar(self, b: str, m: int, bc: List[int]):
#
for i in range(0, 256):
bc[i] = -1
# m a
for i in range(0, m,):
ascii = ord(b[i])
bc[ascii] = i#
#
def goodSuffix(self, b: str, m: int, suffix: List[int], prefix: List[bool]):
#
for i in range(0, m):
suffix[i] = -1
prefix[i] = False
for i in range(0, m - 1):
j = i
k = 0
while j >= 0 and b[j] == b[m - 1 - k]:
j -= 1
k += 1
suffix[k] = j + 1
if j == -1:
prefix[k] = True
def bm(self, a: str, n: int, b: str, m: int)->int:
bc = [0] * 256#
self.badChar(b, m, bc)
#
suffix_index = [0] * m
# True
ispre = [False] * m
self.goodSuffix(b, m, suffix_index, ispre)
i = 0#
#
while i <= n - m:
#
j = m - 1
while j >= 0:
if a[i + j] != b[j]:
break
j -= 1
#
if j < 0:
return i
#
#
x = j - bc[ord(a[i + j])]
y = 0
# ,
if y < m - 1 and m - 1 - j > 0:
y = self.move(j, m, suffix_index, ispre)
#
i += max(x, y)
return -1
# j
def move(j: int, m: int, suffix_index: List[int], ispre: List[bool])->int:
#
k = m - 1 - j
# k
if suffix_index[k] != -1:
return j - suffix_index[k] + 1
#
for r in range(j + 2, m):
# //如果是头部
if ispre[m - r] == True:
return r
# m
return m
```
![](https://cdn.jsdelivr.net/gh/tan45du/photobed@master/photo/头缀函数.145da63ig3s0.png)