Skip to content

Commit 285263e

Browse files
committed
add zf77 compress
1 parent f795639 commit 285263e

File tree

5 files changed

+243
-9
lines changed

5 files changed

+243
-9
lines changed

docs/LZ77.pdf

169 KB
Binary file not shown.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,26 @@
11
package com.github.myibu.algorithm.compress;
22

3+
/**
4+
* compressor for compress and decompress
5+
* @author myibu
6+
* Created on 2021/10/11
7+
*/
38
public interface Compressor {
9+
/**
10+
* compress bytes
11+
* @param in_data input
12+
* @param in_len length of input
13+
* @param out_data output
14+
* @return offset in output
15+
*/
16+
int compress(byte[] in_data, int in_len, byte[] out_data);
17+
18+
/**
19+
* decompress bytes
20+
* @param in_data input
21+
* @param in_len length of input
22+
* @param out_data output
23+
* @return offset in output
24+
*/
25+
int decompress(byte[] in_data, int in_len, byte[] out_data);
426
}
Lines changed: 142 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,145 @@
11
package com.github.myibu.algorithm.compress;
22

3-
public class LZ77 {
3+
import java.util.Arrays;
4+
5+
/**
6+
* LZ77 compress algorithm
7+
* @author myibu
8+
* Created on 2021/10/11
9+
*/
10+
public class LZ77Compressor implements Compressor {
11+
private static int DEFAULT_SEARCH_BUFFER_LENGTH = 7;
12+
private static int DEFAULT_LOOK_AHEAD_WINDOW_LENGTH = 5;
13+
14+
/**
15+
* S is the length of the search buffer
16+
*/
17+
private int s;
18+
/**
19+
* L is the length of the look ahead window
20+
*/
21+
private int l;
22+
23+
public LZ77Compressor() {
24+
s = DEFAULT_SEARCH_BUFFER_LENGTH;
25+
l = DEFAULT_LOOK_AHEAD_WINDOW_LENGTH;
26+
}
27+
28+
/**
29+
* while look-ahead buffer is not empty
30+
* go backwards in search buffer to find longest match of the look-ahead buffer
31+
* if match found
32+
* print: (offset from window boundary, length of match, next symbol in look ahead buffer);
33+
* shift window by length+1;
34+
* else
35+
* print: (0, 0, first symbol in look-ahead buffer);
36+
* shift window by 1;
37+
* fi
38+
* end while
39+
* @param in_data input
40+
* @param in_len length of input
41+
* @param out_data output
42+
* @return offset in output
43+
*/
44+
@Override
45+
public int compress(byte[] in_data, int in_len, byte[] out_data) {
46+
// no need to compress
47+
if (l > in_len) {
48+
System.arraycopy(in_data, 0, out_data, 0, in_len);
49+
return in_len;
50+
}
51+
// search buffer
52+
byte[] sBuf = new byte[s];
53+
// look ahead window
54+
byte[] lWindow = new byte[l];
55+
int sp = 0, lp = l, ip = 0, op = 0;
56+
while (lWindow.length > 0 && ip < in_len) {
57+
// abracadabrad
58+
// update search buffer
59+
int sStart = 0, sEnd = sp < s ? sp : s;
60+
for (int i = sStart; i < sEnd; i++) {
61+
System.out.println("ip=" + ip + ", i=" + i + ", sEnd=" + sEnd + ", sp=" + sp);
62+
sBuf[i] = in_data[ip - i - 1];
63+
}
64+
// update look ahead window
65+
int lStart = 0, lEnd = ip + l < in_len ? l : in_len - ip;
66+
for (int i = lStart; i < lEnd; i++) {
67+
lWindow[i] = in_data[ip + i];
68+
}
69+
System.out.println("all=abracadabrad, sBuf=" + new StringBuilder(new String(sBuf)).reverse().toString() + ", lWindow=" + new String(lWindow));
70+
71+
int llStart = sEnd - 1, rrStart = 0, llEnd = 0, rrEnd = (lp = lEnd);
72+
int minMatched = 1, minIndex = 0;
73+
while (llStart >= 0) {
74+
int matched = 0, left = llStart, right = rrStart;
75+
while (left >= 0 && right < rrEnd && sBuf[left--] == lWindow[right++]) {
76+
matched++;
77+
}
78+
if (matched >= minMatched) {
79+
minIndex = llStart;
80+
minMatched = matched;
81+
}
82+
llStart--;
83+
}
84+
System.out.println("minIndex=" + minIndex + ", all=abracadabrad, sBuf=" + new StringBuilder(new String(sBuf)).reverse().toString() + ", lWindow=" + new String(lWindow));
85+
// matched
86+
if (minIndex > 0) {
87+
// byte[] tuple = String.format("(%d,%d,%s)", minIndex + 1, minMatched, new String(new byte[]{lWindow[minMatched]})).getBytes();
88+
// System.arraycopy(tuple, 0, out_data, (op++) * tuple.length, tuple.length);
89+
System.out.println(String.format("(%d, %d, %s)", minIndex + 1, minMatched, new String(new byte[]{lWindow[minMatched]})));
90+
sp += (minMatched + 1);
91+
// if (sp > s) {
92+
// sp = s-1;
93+
// }
94+
ip += (minMatched + 1);
95+
} else {
96+
sp++;
97+
// if (sp > s) {
98+
// sp = s-1;
99+
// }
100+
ip++;
101+
// byte[] tuple = String.format("(%d,%d,%s)", 0, 0, new String(new byte[]{lWindow[0]})).getBytes();
102+
// System.arraycopy(tuple, 0, out_data, (op++) * tuple.length, tuple.length);
103+
System.out.println(String.format("(%d, %d, %s)", 0, 0, new String(new byte[]{lWindow[0]})));
104+
}
105+
}
106+
return 0;
107+
}
108+
109+
// private int indexOf(int llStart, int rrStart, int llEnd, int rrEnd, byte[] sBuf, byte[] lWindow) {
110+
// int minMatched = 1, minIndex = 0;
111+
// while (llStart >= 0) {
112+
// int matched = 0, left = llStart, right = rrStart;
113+
// while (left >= 0 && right < rrEnd && sBuf[left--] == lWindow[right++]) {
114+
// matched++;
115+
// }
116+
// if (matched >= minMatched) {
117+
// minIndex = llStart;
118+
// minMatched = matched;
119+
// }
120+
// llStart--;
121+
// }
122+
// System.out.println("minIndex=" + minIndex + ", all=abracadabrad, sBuf=" + new StringBuilder(new String(sBuf)).reverse().toString() + ", lWindow=" + new String(lWindow));
123+
// return minIndex;
124+
// }
125+
126+
/**
127+
* for each token (offset, length, symbol)
128+
* if offset = 0 then
129+
* print symbol;
130+
* else
131+
* go reverse in previous output by offset characters and copy
132+
* character wise for length symbols;
133+
* print symbol;
134+
* fi
135+
* next
136+
* @param in_data input
137+
* @param in_len length of input
138+
* @param out_data output
139+
* @return offset in output
140+
*/
141+
@Override
142+
public int decompress(byte[] in_data, int in_len, byte[] out_data) {
143+
return 0;
144+
}
4145
}

src/main/java/com/github/myibu/algorithm/compress/LZFCompressor.java

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@
22

33
import java.nio.charset.StandardCharsets;
44

5-
public class LZFCompressor {
5+
/**
6+
* LZF compress algorithm
7+
* @author myibu
8+
* Created on 2021/10/11
9+
*/
10+
public class LZFCompressor implements Compressor {
611
private static final int LZF_HSLOT_BIAS = 0;
712

813
private static final int HLOG = 16;
@@ -24,8 +29,9 @@ private static int IDX(int h) {
2429
return ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1));
2530
}
2631

27-
public int compress(byte[] in_data, int in_len,
28-
byte[] out_data, int out_len) {
32+
@Override
33+
public int compress(byte[] in_data, int in_len, byte[] out_data) {
34+
int out_len = out_data.length;
2935
if (in_len == 0 || out_len == 0)
3036
return 0;
3137
int ip = 0, op = 0;
@@ -125,4 +131,51 @@ public int compress(byte[] in_data, int in_len,
125131

126132
return op;
127133
}
134+
135+
@Override
136+
public int decompress(byte[] in_data, int in_len, byte[] out_data) {
137+
int out_len = out_data.length;
138+
int ip = 0, op = 0;
139+
while (ip < in_len) {
140+
int ctrl;
141+
ctrl = in_data[ip++];
142+
143+
if (ctrl < (1 << 5)) /* literal run */ {
144+
ctrl++;
145+
146+
if (op + ctrl > out_len) {
147+
//SET_ERRNO (E2BIG);
148+
return 0;
149+
}
150+
151+
do
152+
out_data[op++] = in_data[ip++];
153+
while ((--ctrl) > 0);
154+
}
155+
else /* back reference */ {
156+
int len = ctrl >> 5;
157+
int ref = op - ((ctrl & 0x1f) << 8) - 1;
158+
if (len == 7)
159+
len += in_data[ip++];
160+
161+
ref -= in_data[ip++];
162+
163+
if (op + len + 2 > out_len) {
164+
//SET_ERRNO (E2BIG);
165+
return 0;
166+
}
167+
168+
if (ref < 0) {
169+
//SET_ERRNO (EINVAL);
170+
return 0;
171+
}
172+
out_data[op++] = out_data[ref++];
173+
out_data[op++] = out_data[ref++];
174+
do
175+
out_data[op++] = out_data[ref++];
176+
while ((--len) > 0);
177+
}
178+
}
179+
return op;
180+
}
128181
}

src/test/java/com/github/myibu/algorithm/AlgorithmTest.java

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package com.github.myibu.algorithm;
22

3+
import com.github.myibu.algorithm.compress.Compressor;
4+
import com.github.myibu.algorithm.compress.LZ77Compressor;
35
import com.github.myibu.algorithm.compress.LZFCompressor;
46
import com.github.myibu.algorithm.data.Bits;
57
import com.github.myibu.algorithm.data.Bytes;
@@ -143,15 +145,31 @@ public void testLZFCompressor() {
143145
* binary: 01-49-49-32-00-00-50-64-00-00-51-32-00-01-51-51
144146
*
145147
* 111112222233333344444
146-
* '01-31-31-20-00-00-32-40-00-00-33-60-00-00-34-20-00-00-34'
148+
* hex: 01-31-31-20-00-00-32-40-00-00-33-60-00-00-34-20-00-00-34
149+
* binary: 01-49-49-32-00-00-50-64-00-00-51-96-00-00-52-32-00-00-52
150+
*
151+
* this is a test
152+
* hex: 04-74-68-69-73-20-20-02-05-61-20-74-65-73-74
153+
* binary: 04-116-104-105-115-32-32-02-05-97-32-116-101-115-116
147154
*/
148-
// [1, 49, 49, 32, 0, 32, 0, 0, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
155+
// [4, 116, 104, 105, 115, 32, 32, 2, 5, 97, 32, 116, 101, 115, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
149156
// byte[] in_data = "1111122222".getBytes(StandardCharsets.UTF_8);
150-
151-
byte[] in_data = "111112222233333344444".getBytes(StandardCharsets.UTF_8);
157+
// byte[] in_data = "111112222233333344444".getBytes(StandardCharsets.UTF_8);
158+
// byte[] in_data = "this is a test".getBytes(StandardCharsets.UTF_8);
159+
byte[] in_data = "Type \"help\", \"copyright\", \"credits\" or \"license\" for more information.".getBytes(StandardCharsets.UTF_8);
152160
byte[] out_data = new byte[in_data.length*2];
153161
LZFCompressor com = new LZFCompressor();
154-
com.compress(in_data, in_data.length, out_data, out_data.length);
162+
int op = com.compress(in_data, in_data.length, out_data);
163+
byte[] decompress_data = new byte[out_data.length * 2];
164+
op = com.decompress(out_data, op, decompress_data);
155165
System.out.println(Arrays.toString(out_data));
156166
}
167+
168+
@Test
169+
public void testLZ77Compressor() {
170+
byte[] in_data = "abracadabrad".getBytes(StandardCharsets.UTF_8);
171+
byte[] out_data = new byte[in_data.length*2];
172+
Compressor compressor = new LZ77Compressor();
173+
compressor.compress(in_data, in_data.length, out_data);
174+
}
157175
}

0 commit comments

Comments
 (0)