// leetcode 1044
class Solution { int p = 13331; int maxn = (int)3e4+5; long[] H = new long[maxn]; // 前i个字符的hashcode long[] P = new long[maxn]; // 次方数组 public String longestDupSubstring(String s) { int n = s.length(); P[0] = 1; for (int i = 1; i <= n; i++) { H[i] = H[i-1] * p + s.charAt(i-1); P[i] = P[i-1] * p; } String ans = ""; int left = 0, right = n; while (left < right) { // 寻找最后一个满足check的len,保证最大 int mid = left + ((right - left + 1) >> 1); String tmp = check(s, mid); if (tmp.length() != 0) { // 存在 left = mid; } else { right = mid - 1; } ans = tmp.length() > 0? tmp:ans; } String tmp = check(s, left); ans = tmp.length() > 0 && tmp.length() > ans.length()? tmp: ans; return ans; } // 判断某个长度的子串是否为重复子串 public String check(String s, int len) { int n = s.length(); Set<Long> set = new HashSet<>(); for (int i = 1; i + len - 1 <= n; i++) { int j = i + len - 1; long hash = H[j] - H[i-1] * P[j-i+1]; if (set.contains(hash)) return s.substring(i - 1, i + len - 1); set.add(hash); } return ""; } }
字符串哈希 + 前缀和
/** * https://blog.csdn.net/SHU15121856/article/details/109553503 * 字符串Hash + 前缀和 * eg. abcde * H[4] = a * p^4 + b * p^3 + c * p^2 + d * p^1 + e * p^0 * H[l:r] = H[r] - H[l] * P^(r-l+1) */ class Solution { int P = 13331; // P进制,经验值(131、13331) int maxn = (int) 1e5+5; int[] H = new int[maxn]; // H[i]表示前i个字符的哈希值 int[] p = new int[maxn]; // 次方数组 public List<String> findRepeatedDnaSequences(String s) { int n = s.length(); List<String> resultList = new LinkedList<>(); p[0] = 1; for (int i = 1; i <= n; i++) { H[i] = H[i-1] * P + s.charAt(i-1); p[i] = p[i-1] * P; } Map<Integer, Integer> map = new HashMap<>(); // key为子串的Hash值 for (int i = 1; i + 10 -1 <= n; i++) { int j = i + 10 -1; int hash = H[j] - H[i-1] * p[j-i+1]; int cnt = map.getOrDefault(hash, 0); if (cnt == 1) resultList.add(s.substring(i-1, i+10-1)); map.put(hash, cnt+1); } return resultList; } }