|
| 1 | +package class44; |
| 2 | + |
| 3 | +// 最长公共子串问题是面试常见题目之一 |
| 4 | +// 假设str1长度N,str2长度M |
| 5 | +// 因为最优解的难度所限,一般在面试场上回答出O(N*M)的解法已经是比较优秀了 |
| 6 | +// 因为得到O(N*M)的解法,就已经需要用到动态规划了 |
| 7 | +// 但其实这个问题的最优解是O(N+M),为了达到这个复杂度可是不容易 |
| 8 | +// 首先需要用到DC3算法得到后缀数组(sa) |
| 9 | +// 进而用sa数组去生成height数组 |
| 10 | +// 而且在生成的时候,还有一个不回退的优化,都非常不容易理解 |
| 11 | +// 这就是后缀数组在面试算法中的地位 : 德高望重的噩梦 |
| 12 | +public class Code04_LongestCommonSubstringConquerByHeight { |
| 13 | + |
| 14 | +public static int lcs1(String s1, String s2) { |
| 15 | +if (s1 == null || s2 == null || s1.length() == 0 || s2.length() == 0) { |
| 16 | +return 0; |
| 17 | +} |
| 18 | +char[] str1 = s1.toCharArray(); |
| 19 | +char[] str2 = s2.toCharArray(); |
| 20 | +int row = 0; |
| 21 | +int col = str2.length - 1; |
| 22 | +int max = 0; |
| 23 | +while (row < str1.length) { |
| 24 | +int i = row; |
| 25 | +int j = col; |
| 26 | +int len = 0; |
| 27 | +while (i < str1.length && j < str2.length) { |
| 28 | +if (str1[i] != str2[j]) { |
| 29 | +len = 0; |
| 30 | +} else { |
| 31 | +len++; |
| 32 | +} |
| 33 | +if (len > max) { |
| 34 | +max = len; |
| 35 | +} |
| 36 | +i++; |
| 37 | +j++; |
| 38 | +} |
| 39 | +if (col > 0) { |
| 40 | +col--; |
| 41 | +} else { |
| 42 | +row++; |
| 43 | +} |
| 44 | +} |
| 45 | +return max; |
| 46 | +} |
| 47 | + |
| 48 | +public static int lcs2(String s1, String s2) { |
| 49 | +if (s1 == null || s2 == null || s1.length() == 0 || s2.length() == 0) { |
| 50 | +return 0; |
| 51 | +} |
| 52 | +char[] str1 = s1.toCharArray(); |
| 53 | +char[] str2 = s2.toCharArray(); |
| 54 | +int N = str1.length; |
| 55 | +int M = str2.length; |
| 56 | +int min = str1[0]; |
| 57 | +int max = str1[0]; |
| 58 | +for (int i = 1; i < N; i++) { |
| 59 | +min = Math.min(min, str1[i]); |
| 60 | +max = Math.max(max, str1[i]); |
| 61 | +} |
| 62 | +for (int i = 0; i < M; i++) { |
| 63 | +min = Math.min(min, str2[i]); |
| 64 | +max = Math.max(max, str2[i]); |
| 65 | +} |
| 66 | +int[] all = new int[N + M + 1]; |
| 67 | +int index = 0; |
| 68 | +for (int i = 0; i < N; i++) { |
| 69 | +all[index++] = str1[i] - min + 2; |
| 70 | +} |
| 71 | +all[index++] = 1; |
| 72 | +for (int i = 0; i < M; i++) { |
| 73 | +all[index++] = str2[i] - min + 2; |
| 74 | +} |
| 75 | +DC3 dc3 = new DC3(all, max - min + 2); |
| 76 | +int n = all.length; |
| 77 | +int[] sa = dc3.sa; |
| 78 | +int[] height = dc3.height; |
| 79 | +int ans = 0; |
| 80 | +for (int i = 1; i < n; i++) { |
| 81 | +int up = sa[i - 1]; |
| 82 | +int down = sa[i]; |
| 83 | +if (Math.min(up, down) < N && Math.max(up, down) > N) { |
| 84 | +ans = Math.max(ans, height[i]); |
| 85 | +} |
| 86 | +} |
| 87 | +return ans; |
| 88 | +} |
| 89 | + |
| 90 | +public static class DC3 { |
| 91 | + |
| 92 | +public int[] sa; |
| 93 | + |
| 94 | +public int[] rank; |
| 95 | + |
| 96 | +public int[] height; |
| 97 | + |
| 98 | +public DC3(int[] nums, int max) { |
| 99 | +sa = sa(nums, max); |
| 100 | +rank = rank(); |
| 101 | +height = height(nums); |
| 102 | +} |
| 103 | + |
| 104 | +private int[] sa(int[] nums, int max) { |
| 105 | +int n = nums.length; |
| 106 | +int[] arr = new int[n + 3]; |
| 107 | +for (int i = 0; i < n; i++) { |
| 108 | +arr[i] = nums[i]; |
| 109 | +} |
| 110 | +return skew(arr, n, max); |
| 111 | +} |
| 112 | + |
| 113 | +private int[] skew(int[] nums, int n, int K) { |
| 114 | +int n0 = (n + 2) / 3, n1 = (n + 1) / 3, n2 = n / 3, n02 = n0 + n2; |
| 115 | +int[] s12 = new int[n02 + 3], sa12 = new int[n02 + 3]; |
| 116 | +for (int i = 0, j = 0; i < n + (n0 - n1); ++i) { |
| 117 | +if (0 != i % 3) { |
| 118 | +s12[j++] = i; |
| 119 | +} |
| 120 | +} |
| 121 | +radixPass(nums, s12, sa12, 2, n02, K); |
| 122 | +radixPass(nums, sa12, s12, 1, n02, K); |
| 123 | +radixPass(nums, s12, sa12, 0, n02, K); |
| 124 | +int name = 0, c0 = -1, c1 = -1, c2 = -1; |
| 125 | +for (int i = 0; i < n02; ++i) { |
| 126 | +if (c0 != nums[sa12[i]] || c1 != nums[sa12[i] + 1] || c2 != nums[sa12[i] + 2]) { |
| 127 | +name++; |
| 128 | +c0 = nums[sa12[i]]; |
| 129 | +c1 = nums[sa12[i] + 1]; |
| 130 | +c2 = nums[sa12[i] + 2]; |
| 131 | +} |
| 132 | +if (1 == sa12[i] % 3) { |
| 133 | +s12[sa12[i] / 3] = name; |
| 134 | +} else { |
| 135 | +s12[sa12[i] / 3 + n0] = name; |
| 136 | +} |
| 137 | +} |
| 138 | +if (name < n02) { |
| 139 | +sa12 = skew(s12, n02, name); |
| 140 | +for (int i = 0; i < n02; i++) { |
| 141 | +s12[sa12[i]] = i + 1; |
| 142 | +} |
| 143 | +} else { |
| 144 | +for (int i = 0; i < n02; i++) { |
| 145 | +sa12[s12[i] - 1] = i; |
| 146 | +} |
| 147 | +} |
| 148 | +int[] s0 = new int[n0], sa0 = new int[n0]; |
| 149 | +for (int i = 0, j = 0; i < n02; i++) { |
| 150 | +if (sa12[i] < n0) { |
| 151 | +s0[j++] = 3 * sa12[i]; |
| 152 | +} |
| 153 | +} |
| 154 | +radixPass(nums, s0, sa0, 0, n0, K); |
| 155 | +int[] sa = new int[n]; |
| 156 | +for (int p = 0, t = n0 - n1, k = 0; k < n; k++) { |
| 157 | +int i = sa12[t] < n0 ? sa12[t] * 3 + 1 : (sa12[t] - n0) * 3 + 2; |
| 158 | +int j = sa0[p]; |
| 159 | +if (sa12[t] < n0 ? leq(nums[i], s12[sa12[t] + n0], nums[j], s12[j / 3]) |
| 160 | +: leq(nums[i], nums[i + 1], s12[sa12[t] - n0 + 1], nums[j], nums[j + 1], s12[j / 3 + n0])) { |
| 161 | +sa[k] = i; |
| 162 | +t++; |
| 163 | +if (t == n02) { |
| 164 | +for (k++; p < n0; p++, k++) { |
| 165 | +sa[k] = sa0[p]; |
| 166 | +} |
| 167 | +} |
| 168 | +} else { |
| 169 | +sa[k] = j; |
| 170 | +p++; |
| 171 | +if (p == n0) { |
| 172 | +for (k++; t < n02; t++, k++) { |
| 173 | +sa[k] = sa12[t] < n0 ? sa12[t] * 3 + 1 : (sa12[t] - n0) * 3 + 2; |
| 174 | +} |
| 175 | +} |
| 176 | +} |
| 177 | +} |
| 178 | +return sa; |
| 179 | +} |
| 180 | + |
| 181 | +private void radixPass(int[] nums, int[] input, int[] output, int offset, int n, int k) { |
| 182 | +int[] cnt = new int[k + 1]; |
| 183 | +for (int i = 0; i < n; ++i) { |
| 184 | +cnt[nums[input[i] + offset]]++; |
| 185 | +} |
| 186 | +for (int i = 0, sum = 0; i < cnt.length; ++i) { |
| 187 | +int t = cnt[i]; |
| 188 | +cnt[i] = sum; |
| 189 | +sum += t; |
| 190 | +} |
| 191 | +for (int i = 0; i < n; ++i) { |
| 192 | +output[cnt[nums[input[i] + offset]]++] = input[i]; |
| 193 | +} |
| 194 | +} |
| 195 | + |
| 196 | +private boolean leq(int a1, int a2, int b1, int b2) { |
| 197 | +return a1 < b1 || (a1 == b1 && a2 <= b2); |
| 198 | +} |
| 199 | + |
| 200 | +private boolean leq(int a1, int a2, int a3, int b1, int b2, int b3) { |
| 201 | +return a1 < b1 || (a1 == b1 && leq(a2, a3, b2, b3)); |
| 202 | +} |
| 203 | + |
| 204 | +private int[] rank() { |
| 205 | +int n = sa.length; |
| 206 | +int[] ans = new int[n]; |
| 207 | +for (int i = 0; i < n; i++) { |
| 208 | +ans[sa[i]] = i; |
| 209 | +} |
| 210 | +return ans; |
| 211 | +} |
| 212 | + |
| 213 | +private int[] height(int[] s) { |
| 214 | +int n = s.length; |
| 215 | +int[] ans = new int[n]; |
| 216 | +for (int i = 0, k = 0; i < n; ++i) { |
| 217 | +if (rank[i] != 0) { |
| 218 | +if (k > 0) { |
| 219 | +--k; |
| 220 | +} |
| 221 | +int j = sa[rank[i] - 1]; |
| 222 | +while (i + k < n && j + k < n && s[i + k] == s[j + k]) { |
| 223 | +++k; |
| 224 | +} |
| 225 | +ans[rank[i]] = k; |
| 226 | +} |
| 227 | +} |
| 228 | +return ans; |
| 229 | +} |
| 230 | + |
| 231 | +} |
| 232 | + |
| 233 | +// for test |
| 234 | +public static String randomNumberString(int len, int range) { |
| 235 | +char[] str = new char[len]; |
| 236 | +for (int i = 0; i < len; i++) { |
| 237 | +str[i] = (char) ((int) (Math.random() * range) + 'a'); |
| 238 | +} |
| 239 | +return String.valueOf(str); |
| 240 | +} |
| 241 | + |
| 242 | +public static void main(String[] args) { |
| 243 | +int len = 30; |
| 244 | +int range = 5; |
| 245 | +int testTime = 100000; |
| 246 | +System.out.println("功能测试开始"); |
| 247 | +for (int i = 0; i < testTime; i++) { |
| 248 | +int N1 = (int) (Math.random() * len); |
| 249 | +int N2 = (int) (Math.random() * len); |
| 250 | +String str1 = randomNumberString(N1, range); |
| 251 | +String str2 = randomNumberString(N2, range); |
| 252 | +int ans1 = lcs1(str1, str2); |
| 253 | +int ans2 = lcs2(str1, str2); |
| 254 | +if (ans1 != ans2) { |
| 255 | +System.out.println("Oops!"); |
| 256 | +} |
| 257 | +} |
| 258 | +System.out.println("功能测试结束"); |
| 259 | +System.out.println("=========="); |
| 260 | + |
| 261 | +System.out.println("性能测试开始"); |
| 262 | +len = 80000; |
| 263 | +range = 26; |
| 264 | +long start; |
| 265 | +long end; |
| 266 | + |
| 267 | +String str1 = randomNumberString(len, range); |
| 268 | +String str2 = randomNumberString(len, range); |
| 269 | + |
| 270 | +start = System.currentTimeMillis(); |
| 271 | +int ans1 = lcs1(str1, str2); |
| 272 | +end = System.currentTimeMillis(); |
| 273 | +System.out.println("方法1结果 : " + ans1 + " , 运行时间 : " + (end - start) + " ms"); |
| 274 | + |
| 275 | +start = System.currentTimeMillis(); |
| 276 | +int ans2 = lcs2(str1, str2); |
| 277 | +end = System.currentTimeMillis(); |
| 278 | +System.out.println("方法2结果 : " + ans2 + " , 运行时间 : " + (end - start) + " ms"); |
| 279 | + |
| 280 | +System.out.println("性能测试结束"); |
| 281 | + |
| 282 | +} |
| 283 | + |
| 284 | +} |
0 commit comments