内存字符串暴力搜索定位代码

目录

内存字符串暴力搜索定位代码

其它优秀的字符串搜索代码:点击

使用说明:

一般都是四个参数,

参数1: 你要搜索的缓冲区

参数2: 参数1缓冲区的大小

参数3: 要搜索的字符串

参数4: 参数3的缓冲大小

代码实现

search.h

#pragma once


/*
function:
		Boyer-Moore字符匹配算法
Param:
@text 要搜索的缓冲区开始
@n 要搜索的缓冲区大小
@pattern 需要匹配的字符串
@m 需要匹配的字符串长度
*/
int BinarySearch(unsigned char *text, int n, unsigned char *pattern, int m);

.cpp实现

使用BinarySearch即可.

#pragma once
#include "search.h"


#define maxNum 256

#ifndef MIN
# define MIN(A,B) ((A)<(B)?(A):(B))
#endif
#ifndef MAX
# define MAX(A,B) ((A)>(B)?(A):(B))
#endif

void PreBmBc(unsigned  char *pattern, int m, int bmBc[])
{
	int i;
	for (i = 0; i < 256; i++) {//一个字符占八位,共256个字符,把所有字符都覆盖到,这里的初始化是将所有字符失配时的移动距离都赋值为m
		bmBc[i] = m;
	}
	for (i = 0; i < m - 1; i++) {//针对模式串pattern中存在的每一个字符,计算出它们最靠右的(非最后一个字符)地方距离串末尾的距离,即它们失配时该移动的距离,这一操作更新了初始化中一些字符的移动距离
		bmBc[pattern[i]] = m - 1 - i;
	}
}
/*
function:
		旧版的好后缀辅助数组(好后缀长度)求解方法
Param:
@pattern 需要匹配的字符串
@suff 好后缀辅助数组
@m 需要匹配的字符串长度
*/
void suffix_old(char *pattern, int m, int suff[])
{
	int i, j;
	suff[m - 1] = m;
	for (i = m - 2; i >= 0; i--) {
		j = i;
		while (j >= 0 && pattern[j] == pattern[m - 1 - i + j]) j--;
		suff[i] = i - j;
	}
}
/*
function:
		新版的好后缀辅助数组(好后缀长度)求解方法
Param:
@pattern 需要匹配的字符串
@suff 好后缀辅助数组
@m 需要匹配的字符串长度
*/
void suffix(unsigned char *pattern, int m, int suff[]) {
	int f, g, i;
	suff[m - 1] = m;
	g = m - 1;
	for (i = m - 2; i >= 0; --i) {
		if (i > g&&suff[i + m - 1 - f] < i - g)
			suff[i] = suff[i + m - 1 - f];
		else {
			if (i < g)
				g = i;
			f = i;
			while (g >= 0 && pattern[g] == pattern[g + m - 1 - f])
				--g;
			suff[i] = f - g;
		}
	}
}
/*
function:
		好后缀数组求解方法
Param:
@pattern 需要匹配的字符串
@bmGs 好后缀数组
@m 需要匹配的字符串长度
*/
void PreBmGs(unsigned char *pattern, int m, int bmGs[])
{
	int i, j;
	int suff[maxNum];
	// 计算后缀数组
	suffix(pattern, m, suff);
	// 先全部赋值为m,包含Case3
	for (i = 0; i < m; i++) {
		bmGs[i] = m;
	}
	// Case2
	j = 0;
	for (i = m - 1; i >= 0; i--) {
		if (suff[i] == i + 1) {
			for (; j < m - 1 - i; j++) {
				if (bmGs[j] == m)
					bmGs[j] = m - 1 - i;
			}
		}
	}
	// Case1
	for (i = 0; i <= m - 2; i++) {
		bmGs[m - 1 - suff[i]] = m - 1 - i;
	}
}
/*
function:
		Boyer-Moore字符匹配算法
Param:
@text 文本内容
@n 文本内容长度
@pattern 需要匹配的字符串
@m 需要匹配的字符串长度
*/
int BinarySearch(unsigned char *text, int n, unsigned char *pattern, int m)
{
	int * bmBc = new int[maxNum];
	int * bmGs = new int[m];
	PreBmBc(pattern, m, bmBc);
	PreBmGs(pattern, m, bmGs);
	int i, pos;
	pos = 0;
	while (pos <= n - m) {
		for (i = m - 1; i >= 0 && pattern[i] == text[i + pos]; i--);
		if (i < 0) {
			delete bmGs;
			delete bmBc;
			return &text[pos] - text;
		}
		else {
			pos += MAX(bmBc[text[i + pos]] - m + 1 + i, bmGs[i]);
		}
	}
	return -1;
}

1.1 Boyer-Moore实现

上面的代码是有注释,也是这个相同实现

void preBmBc(char *x, int m, int bmBc[]) {
   int i;
 
   for (i = 0; i < ASIZE; ++i)
      bmBc[i] = m;
   for (i = 0; i < m - 1; ++i)
      bmBc[x[i]] = m - i - 1;
}
 
 
void suffixes(char *x, int m, int *suff) {
   int f, g, i;
 
   suff[m - 1] = m;
   g = m - 1;
   for (i = m - 2; i >= 0; --i) {
      if (i > g && suff[i + m - 1 - f] < i - g)
         suff[i] = suff[i + m - 1 - f];
      else {
         if (i < g)
            g = i;
         f = i;
         while (g >= 0 && x[g] == x[g + m - 1 - f])
            --g;
         suff[i] = f - g;
      }
   }
}
 
void preBmGs(char *x, int m, int bmGs[]) {
   int i, j, suff[XSIZE];
 
   suffixes(x, m, suff);
 
   for (i = 0; i < m; ++i)
      bmGs[i] = m;
   j = 0;
   for (i = m - 1; i >= 0; --i)
      if (suff[i] == i + 1)
         for (; j < m - 1 - i; ++j)
            if (bmGs[j] == m)
               bmGs[j] = m - 1 - i;
   for (i = 0; i <= m - 2; ++i)
      bmGs[m - 1 - suff[i]] = m - 1 - i;
}
 
 
void BM(char *x, int m, char *y, int n) {
   int i, j, bmGs[XSIZE], bmBc[ASIZE];
 
   /* Preprocessing */
   preBmGs(x, m, bmGs);
   preBmBc(x, m, bmBc);
 
   /* Searching */
   j = 0;
   while (j <= n - m) {
      for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
      if (i < 0) {
         OUTPUT(j);
         j += bmGs[0];
      }
      else
         j += MAX(bmGs[i], bmBc[y[i + j]] - m + 1 + i);
   }
}

1.2 简化版Tuned Boyer-Moore

void TUNEDBM(char *x, int m, char *y, int n) {
   int j, k, shift, bmBc[ASIZE];
 
   /* Preprocessing */
   preBmBc(x, m, bmBc);
   shift = bmBc[x[m - 1]];
   bmBc[x[m - 1]] = 0;
   memset(y + n, x[m - 1], m);
 
   /* Searching */
   j = 0;
   while (j < n) {
      k = bmBc[y[j + m -1]];
      while (k !=  0) {
         j += k; k = bmBc[y[j + m -1]];
         j += k; k = bmBc[y[j + m -1]];
         j += k; k = bmBc[y[j + m -1]];
      }
      if (memcmp(x, y + j, m - 1) == 0 && j < n)
         OUTPUT(j);
      j += shift;                          /* shift */
   }
}

1.3 KMP

int attempt(char *y, char *x, int m, int start, int wall) {
   int k;

   k = wall - start;
   while (k < m && x[k] == y[k + start])
      ++k;
   return(k);
}


void KMPSKIP(char *x, int m, char *y, int n) {
   int i, j, k, kmpStart, per, start, wall;
   int kmpNext[XSIZE], list[XSIZE], mpNext[XSIZE],
       z[ASIZE];

   /* Preprocessing */
   preMp(x, m, mpNext);
   preKmp(x, m, kmpNext);
   memset(z, -1, ASIZE*sizeof(int));
   memset(list, -1, m*sizeof(int));
   z[x[0]] = 0;
   for (i = 1; i < m; ++i) {
      list[i] = z[x[i]];
      z[x[i]] = i;
   }

   /* Searching */
   wall = 0;
   per = m - kmpNext[m];
   i = j = -1;
   do {
      j += m;
   } while (j < n && z[y[j]] < 0);
   if (j >= n)
     return;
   i = z[y[j]];
   start = j - i;
   while (start <= n - m) {
      if (start > wall)
         wall = start;
      k = attempt(y, x, m, start, wall);
      wall = start + k;
      if (k == m) {
         OUTPUT(start);
         i -= per;
      }
      else
         i = list[i];
      if (i < 0) {
         do {
            j += m;
         } while (j < n && z[y[j]] < 0);
         if (j >= n)
            return;
         i = z[y[j]];
      }
      kmpStart = start + k - kmpNext[k];
      k = kmpNext[k];
      start = j - i;
      while (start < kmpStart ||
             (kmpStart < start && start < wall)) {
         if (start < kmpStart) {
            i = list[i];
            if (i < 0) {
               do {
                  j += m;
               } while (j < n && z[y[j]] < 0);
               if (j >= n)
                  return;
               i = z[y[j]];
            }
            start = j - i;
         }
         else {
            kmpStart += (k - mpNext[k]);
            k = mpNext[k];
         }
      }
   }
}
上一篇:解析正则表达式中的.*和.*?的含义


下一篇:【java】正则表达式