根据阮一峰的博客
http://www.ruanyifeng.com/blog/2013/05/boyer-moore_string_search_algorithm.html
试写算法。
使用好后缀的方法比较复杂,暂未实现。
只实现了通过坏字符的方法,事实上Boyer-Moore-Horspool算法是简化版的Boyer-Moore算法,它只使用坏字符规则。
具体代码如下:
- class Program
- {
- static void Main(string[] args)
- {
- string source = "HERE IS A SIMPLE EXAMPLE";
- string pattern = "EXAMPLE";
- BoyerMoore(source.ToCharArray(), pattern.ToCharArray());
- }
- static int BoyerMoore(char[] source, char[] pattern)
- {
- Dictionary<char, int> PostionDictionary = GetPostionDictionary(pattern);
- int pattern_length = pattern.Length;
- int start_pointer = 0;
- int end_pointer = start_pointer + pattern_length - 1;
- int offset = 0;
- int offset_for_print = 0;
- while (end_pointer <= source.Length)
- {
- Console.WriteLine(source);
- Console.WriteLine(new String(pattern).PadLeft(offset_for_print + pattern_length,'-'));
- char[] Chars = subChars(source, start_pointer, end_pointer);
- char badChar;
- int badPostion;
- if (checkIsEqual_badChar(pattern, Chars, out badChar, out badPostion))
- {
- Console.WriteLine(start_pointer + "," + end_pointer);
- start_pointer += pattern_length;
- end_pointer = start_pointer + pattern_length - 1;
- offset_for_print += pattern_length;
- continue;
- }
- else
- {
- if (PostionDictionary.ContainsKey(badChar))
- {
- //计算偏移量
- offset = badPostion - PostionDictionary[badChar];
- }
- else
- {
- offset = badPostion + 1;
- }
- start_pointer += offset;
- end_pointer = start_pointer + pattern_length - 1;
- offset_for_print += offset;
- }
- }
- return 0;
- }
- static bool checkIsEqual_badChar(char[] pattern, char[] Chars, out char badChar, out int badPostion)
- {
- if (pattern.Length != Chars.Length)
- {
- throw new Exception("length error");
- }
- for (int i = pattern.Length - 1; i >= 0; i--)
- {
- if (pattern[i] != Chars[i])
- {
- badChar = Chars[i];
- badPostion = i;
- return false;
- }
- }
- badChar = '\0';
- badPostion = -1;
- return true;
- }
- /// <summary>
- /// 根据起始位置和结束位置,给出子串
- /// </summary>
- /// <param name="source"></param>
- /// <param name="start"></param>
- /// <param name="end"></param>
- /// <returns></returns>
- static char[] subChars(char[] source, int start, int end)
- {
- char[] c = new char[end - start + 1];
- for (int i = start; i <= end; i++)
- {
- c[i - start] = source[i];
- }
- return c;
- }
- /// <summary>
- /// 生成坏字符位置表
- /// </summary>
- /// <param name="chars"></param>
- /// <returns></returns>
- static Dictionary<char, int> GetPostionDictionary(char[] chars)
- {
- Dictionary<char, int> d = new Dictionary<char, int>();
- for (int i = 0; i < chars.Length; i++)
- {
- if (d.ContainsKey(chars[i]) == false)
- {
- d.Add(chars[i], i);
- }
- else//存在则覆盖
- {
- d[chars[i]] = i;
- }
- }
- return d;
- }
- }
测试数据输出图:
本文转自cnn23711151CTO博客,原文链接:http://blog.51cto.com/cnn237111/1195682 ,如需转载请自行联系原作者