|
1 | 1 | package com.github.myibu.algorithm.compress;
|
2 | 2 |
|
3 |
| -public class LZ77 { |
| 3 | +import java.util.Arrays; |
| 4 | + |
| 5 | +/** |
| 6 | + * LZ77 compress algorithm |
| 7 | + * @author myibu |
| 8 | + * Created on 2021/10/11 |
| 9 | + */ |
| 10 | +public class LZ77Compressor implements Compressor { |
| 11 | + private static int DEFAULT_SEARCH_BUFFER_LENGTH = 7; |
| 12 | + private static int DEFAULT_LOOK_AHEAD_WINDOW_LENGTH = 5; |
| 13 | + |
| 14 | + /** |
| 15 | + * S is the length of the search buffer |
| 16 | + */ |
| 17 | + private int s; |
| 18 | + /** |
| 19 | + * L is the length of the look ahead window |
| 20 | + */ |
| 21 | + private int l; |
| 22 | + |
| 23 | + public LZ77Compressor() { |
| 24 | + s = DEFAULT_SEARCH_BUFFER_LENGTH; |
| 25 | + l = DEFAULT_LOOK_AHEAD_WINDOW_LENGTH; |
| 26 | + } |
| 27 | + |
| 28 | + /** |
| 29 | + * while look-ahead buffer is not empty |
| 30 | + * go backwards in search buffer to find longest match of the look-ahead buffer |
| 31 | + * if match found |
| 32 | + * print: (offset from window boundary, length of match, next symbol in look ahead buffer); |
| 33 | + * shift window by length+1; |
| 34 | + * else |
| 35 | + * print: (0, 0, first symbol in look-ahead buffer); |
| 36 | + * shift window by 1; |
| 37 | + * fi |
| 38 | + * end while |
| 39 | + * @param in_data input |
| 40 | + * @param in_len length of input |
| 41 | + * @param out_data output |
| 42 | + * @return offset in output |
| 43 | + */ |
| 44 | + @Override |
| 45 | + public int compress(byte[] in_data, int in_len, byte[] out_data) { |
| 46 | + // no need to compress |
| 47 | + if (l > in_len) { |
| 48 | + System.arraycopy(in_data, 0, out_data, 0, in_len); |
| 49 | + return in_len; |
| 50 | + } |
| 51 | + // search buffer |
| 52 | + byte[] sBuf = new byte[s]; |
| 53 | + // look ahead window |
| 54 | + byte[] lWindow = new byte[l]; |
| 55 | + int sp = 0, lp = l, ip = 0, op = 0; |
| 56 | + while (lWindow.length > 0 && ip < in_len) { |
| 57 | + // abracadabrad |
| 58 | + // update search buffer |
| 59 | + int sStart = 0, sEnd = sp < s ? sp : s; |
| 60 | + for (int i = sStart; i < sEnd; i++) { |
| 61 | + System.out.println("ip=" + ip + ", i=" + i + ", sEnd=" + sEnd + ", sp=" + sp); |
| 62 | + sBuf[i] = in_data[ip - i - 1]; |
| 63 | + } |
| 64 | + // update look ahead window |
| 65 | + int lStart = 0, lEnd = ip + l < in_len ? l : in_len - ip; |
| 66 | + for (int i = lStart; i < lEnd; i++) { |
| 67 | + lWindow[i] = in_data[ip + i]; |
| 68 | + } |
| 69 | + System.out.println("all=abracadabrad, sBuf=" + new StringBuilder(new String(sBuf)).reverse().toString() + ", lWindow=" + new String(lWindow)); |
| 70 | + |
| 71 | + int llStart = sEnd - 1, rrStart = 0, llEnd = 0, rrEnd = (lp = lEnd); |
| 72 | + int minMatched = 1, minIndex = 0; |
| 73 | + while (llStart >= 0) { |
| 74 | + int matched = 0, left = llStart, right = rrStart; |
| 75 | + while (left >= 0 && right < rrEnd && sBuf[left--] == lWindow[right++]) { |
| 76 | + matched++; |
| 77 | + } |
| 78 | + if (matched >= minMatched) { |
| 79 | + minIndex = llStart; |
| 80 | + minMatched = matched; |
| 81 | + } |
| 82 | + llStart--; |
| 83 | + } |
| 84 | + System.out.println("minIndex=" + minIndex + ", all=abracadabrad, sBuf=" + new StringBuilder(new String(sBuf)).reverse().toString() + ", lWindow=" + new String(lWindow)); |
| 85 | + // matched |
| 86 | + if (minIndex > 0) { |
| 87 | +// byte[] tuple = String.format("(%d,%d,%s)", minIndex + 1, minMatched, new String(new byte[]{lWindow[minMatched]})).getBytes(); |
| 88 | +// System.arraycopy(tuple, 0, out_data, (op++) * tuple.length, tuple.length); |
| 89 | + System.out.println(String.format("(%d, %d, %s)", minIndex + 1, minMatched, new String(new byte[]{lWindow[minMatched]}))); |
| 90 | + sp += (minMatched + 1); |
| 91 | +// if (sp > s) { |
| 92 | +// sp = s-1; |
| 93 | +// } |
| 94 | + ip += (minMatched + 1); |
| 95 | + } else { |
| 96 | + sp++; |
| 97 | +// if (sp > s) { |
| 98 | +// sp = s-1; |
| 99 | +// } |
| 100 | + ip++; |
| 101 | +// byte[] tuple = String.format("(%d,%d,%s)", 0, 0, new String(new byte[]{lWindow[0]})).getBytes(); |
| 102 | +// System.arraycopy(tuple, 0, out_data, (op++) * tuple.length, tuple.length); |
| 103 | + System.out.println(String.format("(%d, %d, %s)", 0, 0, new String(new byte[]{lWindow[0]}))); |
| 104 | + } |
| 105 | + } |
| 106 | + return 0; |
| 107 | + } |
| 108 | + |
| 109 | +// private int indexOf(int llStart, int rrStart, int llEnd, int rrEnd, byte[] sBuf, byte[] lWindow) { |
| 110 | +// int minMatched = 1, minIndex = 0; |
| 111 | +// while (llStart >= 0) { |
| 112 | +// int matched = 0, left = llStart, right = rrStart; |
| 113 | +// while (left >= 0 && right < rrEnd && sBuf[left--] == lWindow[right++]) { |
| 114 | +// matched++; |
| 115 | +// } |
| 116 | +// if (matched >= minMatched) { |
| 117 | +// minIndex = llStart; |
| 118 | +// minMatched = matched; |
| 119 | +// } |
| 120 | +// llStart--; |
| 121 | +// } |
| 122 | +// System.out.println("minIndex=" + minIndex + ", all=abracadabrad, sBuf=" + new StringBuilder(new String(sBuf)).reverse().toString() + ", lWindow=" + new String(lWindow)); |
| 123 | +// return minIndex; |
| 124 | +// } |
| 125 | + |
| 126 | + /** |
| 127 | + * for each token (offset, length, symbol) |
| 128 | + * if offset = 0 then |
| 129 | + * print symbol; |
| 130 | + * else |
| 131 | + * go reverse in previous output by offset characters and copy |
| 132 | + * character wise for length symbols; |
| 133 | + * print symbol; |
| 134 | + * fi |
| 135 | + * next |
| 136 | + * @param in_data input |
| 137 | + * @param in_len length of input |
| 138 | + * @param out_data output |
| 139 | + * @return offset in output |
| 140 | + */ |
| 141 | + @Override |
| 142 | + public int decompress(byte[] in_data, int in_len, byte[] out_data) { |
| 143 | + return 0; |
| 144 | + } |
4 | 145 | }
|
0 commit comments