九十三.字符串匹配 KMP、suffix array 、RabinKarp (字符串算法问题(二))

暴力解法:

import java.util.Scanner;

public class LianXi {
		
	public static int index(String s, String p){
		int i = 0;
		int sc = i;
		int j = 0;
		while(sc < s.length()){
			if(s.charAt(sc) == p.charAt(j)){
				sc++;
				j++;
				if(j == p.length())
					return i;
			}
			else{
					i++;
					sc = i;
					j = 0;
				}
		    }
		return -1;
	 }
	
	public static void main(String[] args){
		Scanner in = new Scanner(System.in);
		String s = in.next();
		String p = in.next();
		int res = index(s,p);
		System.out.println(res);
	}
 }

九十三.字符串匹配 KMP、suffix array 、RabinKarp (字符串算法问题(二))
KMP:

import java.util.Scanner;

public class LianXi {
		
	public static int index(String s, String p){
		if(s.length() == 0 || p.length() == 0)
			return -1;
		if(p.length() > s.length())
			return -1;
	
		int[] next = next(p);
		int i = 0; //s位置
		int j = 0; //p位置
		int slen = s.length();
		int plen = p.length();
		
		while(i < slen){
			//①如果 j==-1,或者当前字符串匹配成功,(即s[i] == p[j]),都令i++,j++
			//j=-1,即next[0]=-1,说明p的第一位和i在这个位置无法匹配,这时i,j都增加1,i移位,j从0开始
			if(j==-1 || s.charAt(i) == p.charAt(j)){
				i++;
				j++;
			}else{
				//②如果 j!= -1, 且当前字符匹失败(即s[i]!=p[j]),则令i不变,j=next[j]
				//next[j]即为j所对应的next值
				j = next[j];
			}
			if(j == plen)
				return (i - j);
		}
		return -1;
	 }
	
	public static int[] next(String ps){
		int plength = ps.length();
		int[] next = new int[plength];
		char[] p = ps.toCharArray();
		next[0] = -1;
		if(ps.length() == 1)
			return next;
		next[1] = 0;
		
		int j = 1;
		int k = next[j]; // 看看位置j的最长匹配前缀在哪里
        
		while(j < plength - 1){
			//现在要推出next[j+1],检查j和k位置上的关系即可
			if(k < 0 || p[j] == p[k]){
				next[++j] = ++k;
			}else{
				k = next[k];
			}
		}
		return next;
	}
	
	public static void main(String[] args){
		Scanner in = new Scanner(System.in);
		String s = in.next();
		String p = in.next();
		int res = index(s,p);
		System.out.println(res);
	}
 }

九十三.字符串匹配 KMP、suffix array 、RabinKarp (字符串算法问题(二))

后缀数组:

import java.util.Arrays;

public class Match03_SuffixArray {

  public static int[] getHeight(String src, Suff[] sa) {
    // Suff[] sa =getSa2(src);
    int strLength = src.length();
    int[] rk = new int[strLength];
    //将rank表示为不重复的排名即0~n-1
    for (int i = 0; i < strLength; i++) {
      rk[sa[i].index] = i;
    }
    int[] height = new int[strLength];
    int k = 0;
    for (int i = 0; i < strLength; i++) {
      int rk_i = rk[i];//i后缀的排名
      if (rk_i == 0) {
        height[0] = 0;
        continue;
      }
      int rk_i_1 = rk_i - 1;
      int j = sa[rk_i_1].index;//j是i串字典序靠前的串的下标
      if (k > 0) k--;

      for (; j + k < strLength && i + k < strLength; k++) {
        if (src.charAt(j + k) != src.charAt(i + k))
          break;
      }
      height[rk_i] = k;
    }
    return height;
  }

  public static Suff[] getSa2(String src) {
    int n = src.length();
    Suff[] sa = new Suff[n];
    for (int i = 0; i < n; i++) {
      sa[i] = new Suff(src.charAt(i), i, src);//存单个字符,接下来排序
    }
    Arrays.sort(sa);

    /**rk是下标到排名的映射*/
    int[] rk = new int[n];//suffix array
    rk[sa[0].index] = 1;
    for (int i = 1; i < n; i++) {
      rk[sa[i].index] = rk[sa[i - 1].index];
      if (sa[i].c != sa[i - 1].c) rk[sa[i].index]++;
    }
    //倍增法
    for (int k = 2; rk[sa[n - 1].index] < n; k *= 2) {

      final int kk = k;
      Arrays.sort(sa, (o1, o2) -> {
        //不是基于字符串比较,而是利用之前的rank
        int i = o1.index;
        int j = o2.index;
        if (rk[i] == rk[j]) {//如果第一关键字相同
          if (i + kk / 2 >= n || j + kk / 2 >= n)
            return -(i - j);//如果某个后缀不具有第二关键字,那肯定较小,索引靠后的更小
          return rk[i + kk / 2] - rk[j + kk / 2];

        } else {
          return rk[i] - rk[j];
        }
      });
      /*---排序 end---*/
      // 更新rank
      rk[sa[0].index] = 1;
      for (int i = 1; i < n; i++) {
        int i1 = sa[i].index;
        int i2 = sa[i - 1].index;
        rk[i1] = rk[i2];
        try {
          if (!src.substring(i1, i1 + kk).equals(src.substring(i2, i2 + kk)))
            rk[i1]++;
        } catch (Exception e) {
          rk[i1]++;
        }
      }
    }

    return sa;
  }

  public static class Suff implements Comparable<Suff> {
    public char c;//后缀内容
    private String src;
    public int index;//后缀的起始下标

    public Suff(char c, int index, String src) {
      this.c = c;
      this.index = index;
      this.src = src;
    }

    @Override
    public int compareTo(Suff o2) {
      return this.c - o2.c;
    }

    @Override
    public String toString() {
      return "Suff{" +
          "char='" + src.substring(index) + '\'' +
          ", index=" + index +
          '}';
    }
  }
}

RabinKarp

public class Match01_RabinKarp {
  public static void main(String[] args) {
    String s = "ABABABA";
    String p = "ABA";
    match(p, s);
  }

  private static void match(String p, String s) {
    long hash_p = hash(p);//p的hash值
    long[] hashOfS = hash(s, p.length());
    match(hash_p, hashOfS);
  }

  private static void match(long hash_p, long[] hash_s) {
    for (int i = 0; i < hash_s.length; i++) {
      if (hash_s[i] == hash_p) {
        System.out.println("match:" + i);
      }
    }
  }

  final static long seed = 31;

  static long[] hash(final String s, final int n) {
    long[] res = new long[s.length() - n + 1];
    //前m个字符的hash
    res[0] = hash(s.substring(0, n));
    for (int i = n; i < s.length(); i++) {
      char newChar = s.charAt(i);
      char ochar = s.charAt(i - n);
      //前n个字符的hash*seed-前n字符的第一字符*seed的n次方
      long v = (res[i - n] * seed + newChar - Case11_NExponent.ex2(seed, n) * ochar) % Long.MAX_VALUE;
      res[i - n + 1] = v;
    }
    return res;
  }

  static long hash(String str) {
    long h = 0;
    for (int i = 0; i != str.length(); ++i) {
      h = seed * h + str.charAt(i);
    }
    return h % Long.MAX_VALUE;
  }
}

上一篇:2021-10-29


下一篇:无向图的邻接表表示求度和两种遍历-----数据结构与算法笔记