内存字符串暴力搜索定位代码
其它优秀的字符串搜索代码:点击
使用说明:
一般都是四个参数,
参数1: 你要搜索的缓冲区
参数2: 参数1缓冲区的大小
参数3: 要搜索的字符串
参数4: 参数3的缓冲大小
代码实现
search.h
#pragma once
/*
function:
Boyer-Moore字符匹配算法
Param:
@text 要搜索的缓冲区开始
@n 要搜索的缓冲区大小
@pattern 需要匹配的字符串
@m 需要匹配的字符串长度
*/
int BinarySearch(unsigned char *text, int n, unsigned char *pattern, int m);
.cpp实现
使用BinarySearch即可.
#pragma once
#include "search.h"
#define maxNum 256
#ifndef MIN
# define MIN(A,B) ((A)<(B)?(A):(B))
#endif
#ifndef MAX
# define MAX(A,B) ((A)>(B)?(A):(B))
#endif
void PreBmBc(unsigned char *pattern, int m, int bmBc[])
{
int i;
for (i = 0; i < 256; i++) {//一个字符占八位,共256个字符,把所有字符都覆盖到,这里的初始化是将所有字符失配时的移动距离都赋值为m
bmBc[i] = m;
}
for (i = 0; i < m - 1; i++) {//针对模式串pattern中存在的每一个字符,计算出它们最靠右的(非最后一个字符)地方距离串末尾的距离,即它们失配时该移动的距离,这一操作更新了初始化中一些字符的移动距离
bmBc[pattern[i]] = m - 1 - i;
}
}
/*
function:
旧版的好后缀辅助数组(好后缀长度)求解方法
Param:
@pattern 需要匹配的字符串
@suff 好后缀辅助数组
@m 需要匹配的字符串长度
*/
void suffix_old(char *pattern, int m, int suff[])
{
int i, j;
suff[m - 1] = m;
for (i = m - 2; i >= 0; i--) {
j = i;
while (j >= 0 && pattern[j] == pattern[m - 1 - i + j]) j--;
suff[i] = i - j;
}
}
/*
function:
新版的好后缀辅助数组(好后缀长度)求解方法
Param:
@pattern 需要匹配的字符串
@suff 好后缀辅助数组
@m 需要匹配的字符串长度
*/
void suffix(unsigned char *pattern, int m, int suff[]) {
int f, g, i;
suff[m - 1] = m;
g = m - 1;
for (i = m - 2; i >= 0; --i) {
if (i > g&&suff[i + m - 1 - f] < i - g)
suff[i] = suff[i + m - 1 - f];
else {
if (i < g)
g = i;
f = i;
while (g >= 0 && pattern[g] == pattern[g + m - 1 - f])
--g;
suff[i] = f - g;
}
}
}
/*
function:
好后缀数组求解方法
Param:
@pattern 需要匹配的字符串
@bmGs 好后缀数组
@m 需要匹配的字符串长度
*/
void PreBmGs(unsigned char *pattern, int m, int bmGs[])
{
int i, j;
int suff[maxNum];
// 计算后缀数组
suffix(pattern, m, suff);
// 先全部赋值为m,包含Case3
for (i = 0; i < m; i++) {
bmGs[i] = m;
}
// Case2
j = 0;
for (i = m - 1; i >= 0; i--) {
if (suff[i] == i + 1) {
for (; j < m - 1 - i; j++) {
if (bmGs[j] == m)
bmGs[j] = m - 1 - i;
}
}
}
// Case1
for (i = 0; i <= m - 2; i++) {
bmGs[m - 1 - suff[i]] = m - 1 - i;
}
}
/*
function:
Boyer-Moore字符匹配算法
Param:
@text 文本内容
@n 文本内容长度
@pattern 需要匹配的字符串
@m 需要匹配的字符串长度
*/
int BinarySearch(unsigned char *text, int n, unsigned char *pattern, int m)
{
int * bmBc = new int[maxNum];
int * bmGs = new int[m];
PreBmBc(pattern, m, bmBc);
PreBmGs(pattern, m, bmGs);
int i, pos;
pos = 0;
while (pos <= n - m) {
for (i = m - 1; i >= 0 && pattern[i] == text[i + pos]; i--);
if (i < 0) {
delete bmGs;
delete bmBc;
return &text[pos] - text;
}
else {
pos += MAX(bmBc[text[i + pos]] - m + 1 + i, bmGs[i]);
}
}
return -1;
}
1.1 Boyer-Moore实现
上面的代码是有注释,也是这个相同实现
void preBmBc(char *x, int m, int bmBc[]) {
int i;
for (i = 0; i < ASIZE; ++i)
bmBc[i] = m;
for (i = 0; i < m - 1; ++i)
bmBc[x[i]] = m - i - 1;
}
void suffixes(char *x, int m, int *suff) {
int f, g, i;
suff[m - 1] = m;
g = m - 1;
for (i = m - 2; i >= 0; --i) {
if (i > g && suff[i + m - 1 - f] < i - g)
suff[i] = suff[i + m - 1 - f];
else {
if (i < g)
g = i;
f = i;
while (g >= 0 && x[g] == x[g + m - 1 - f])
--g;
suff[i] = f - g;
}
}
}
void preBmGs(char *x, int m, int bmGs[]) {
int i, j, suff[XSIZE];
suffixes(x, m, suff);
for (i = 0; i < m; ++i)
bmGs[i] = m;
j = 0;
for (i = m - 1; i >= 0; --i)
if (suff[i] == i + 1)
for (; j < m - 1 - i; ++j)
if (bmGs[j] == m)
bmGs[j] = m - 1 - i;
for (i = 0; i <= m - 2; ++i)
bmGs[m - 1 - suff[i]] = m - 1 - i;
}
void BM(char *x, int m, char *y, int n) {
int i, j, bmGs[XSIZE], bmBc[ASIZE];
/* Preprocessing */
preBmGs(x, m, bmGs);
preBmBc(x, m, bmBc);
/* Searching */
j = 0;
while (j <= n - m) {
for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
if (i < 0) {
OUTPUT(j);
j += bmGs[0];
}
else
j += MAX(bmGs[i], bmBc[y[i + j]] - m + 1 + i);
}
}
1.2 简化版Tuned Boyer-Moore
void TUNEDBM(char *x, int m, char *y, int n) {
int j, k, shift, bmBc[ASIZE];
/* Preprocessing */
preBmBc(x, m, bmBc);
shift = bmBc[x[m - 1]];
bmBc[x[m - 1]] = 0;
memset(y + n, x[m - 1], m);
/* Searching */
j = 0;
while (j < n) {
k = bmBc[y[j + m -1]];
while (k != 0) {
j += k; k = bmBc[y[j + m -1]];
j += k; k = bmBc[y[j + m -1]];
j += k; k = bmBc[y[j + m -1]];
}
if (memcmp(x, y + j, m - 1) == 0 && j < n)
OUTPUT(j);
j += shift; /* shift */
}
}
1.3 KMP
int attempt(char *y, char *x, int m, int start, int wall) {
int k;
k = wall - start;
while (k < m && x[k] == y[k + start])
++k;
return(k);
}
void KMPSKIP(char *x, int m, char *y, int n) {
int i, j, k, kmpStart, per, start, wall;
int kmpNext[XSIZE], list[XSIZE], mpNext[XSIZE],
z[ASIZE];
/* Preprocessing */
preMp(x, m, mpNext);
preKmp(x, m, kmpNext);
memset(z, -1, ASIZE*sizeof(int));
memset(list, -1, m*sizeof(int));
z[x[0]] = 0;
for (i = 1; i < m; ++i) {
list[i] = z[x[i]];
z[x[i]] = i;
}
/* Searching */
wall = 0;
per = m - kmpNext[m];
i = j = -1;
do {
j += m;
} while (j < n && z[y[j]] < 0);
if (j >= n)
return;
i = z[y[j]];
start = j - i;
while (start <= n - m) {
if (start > wall)
wall = start;
k = attempt(y, x, m, start, wall);
wall = start + k;
if (k == m) {
OUTPUT(start);
i -= per;
}
else
i = list[i];
if (i < 0) {
do {
j += m;
} while (j < n && z[y[j]] < 0);
if (j >= n)
return;
i = z[y[j]];
}
kmpStart = start + k - kmpNext[k];
k = kmpNext[k];
start = j - i;
while (start < kmpStart ||
(kmpStart < start && start < wall)) {
if (start < kmpStart) {
i = list[i];
if (i < 0) {
do {
j += m;
} while (j < n && z[y[j]] < 0);
if (j >= n)
return;
i = z[y[j]];
}
start = j - i;
}
else {
kmpStart += (k - mpNext[k]);
k = mpNext[k];
}
}
}
}