gauravbisen1
diff --git a/‎src/main/java/com/thealgorithms/compression/LZ77.java‎
Lines changed: 168 additions & 0 deletions b/‎src/main/java/com/thealgorithms/compression/LZ77.java‎
Lines changed: 168 additions & 0 deletions
diff --git a/‎src/main/java/com/thealgorithms/compression/LZ78.java‎
Lines changed: 136 additions & 0 deletions b/‎src/main/java/com/thealgorithms/compression/LZ78.java‎
Lines changed: 136 additions & 0 deletions
@@ -0,0 +1,168 @@
+package com.thealgorithms.compression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An implementation of the Lempel-Ziv 77 (LZ77) compression algorithm.
+ * <p>
+ * LZ77 is a lossless data compression algorithm that works by finding repeated
+ * occurrences of data in a sliding window. It replaces subsequent occurrences
+ * with references (offset, length) to the first occurrence within the window.
+ * </p>
+ * <p>
+ * This implementation uses a simple sliding window and lookahead buffer approach.
+ * Output format is a sequence of tuples (offset, length, next_character).
+ * </p>
+ * <p>
+ * Time Complexity: O(n*W) in this naive implementation, where n is the input length
+ * and W is the window size, due to the search for the longest match. More advanced
+ * data structures (like suffix trees) can improve this.
+ * </p>
+ * <p>
+ * References:
+ * <ul>
+ * <li><a href="https://en.wikipedia.org/wiki/LZ77_and_LZ78#LZ77">Wikipedia: LZ77</a></li>
+ * </ul>
+ * </p>
+ */
+public final class LZ77 {
+
+ private static final int DEFAULT_WINDOW_SIZE = 4096;
+ private static final int DEFAULT_LOOKAHEAD_BUFFER_SIZE = 16;
+ private static final char END_OF_STREAM = '\u0000';
+ private LZ77() {
+ }
+
+ /**
+ * Represents a token in the LZ77 compressed output.
+ * Stores the offset back into the window, the length of the match,
+ * and the next character after the match (or END_OF_STREAM if at end).
+ */
+ public record Token(int offset, int length, char nextChar) {
+ }
+
+ /**
+ * Compresses the input text using the LZ77 algorithm.
+ *
+ * @param text The input string to compress. Must not be null.
+ * @param windowSize The size of the sliding window (search buffer). Must be positive.
+ * @param lookaheadBufferSize The size of the lookahead buffer. Must be positive.
+ * @return A list of {@link Token} objects representing the compressed data.
+ * @throws IllegalArgumentException if windowSize or lookaheadBufferSize are not positive.
+ */
+ public static List<Token> compress(String text, int windowSize, int lookaheadBufferSize) {
+ if (text == null) {
+ return new ArrayList<>();
+ }
+ if (windowSize <= 0 || lookaheadBufferSize <= 0) {
+ throw new IllegalArgumentException("Window size and lookahead buffer size must be positive.");
+ }
+
+ List<Token> compressedOutput = new ArrayList<>();
+ int currentPosition = 0;
+
+ while (currentPosition < text.length()) {
+ int bestMatchDistance = 0;
+ int bestMatchLength = 0;
+
+ // Define the start of the search window
+ int searchBufferStart = Math.max(0, currentPosition - windowSize);
+ // Define the end of the lookahead buffer (don't go past text length)
+ int lookaheadEnd = Math.min(currentPosition + lookaheadBufferSize, text.length());
+
+ // Search for the longest match in the window
+ for (int i = searchBufferStart; i < currentPosition; i++) {
+ int currentMatchLength = 0;
+
+ // Check how far the match extends into the lookahead buffer
+ // This allows for overlapping matches (e.g., "aaa" can match with offset 1)
+ while (currentPosition + currentMatchLength < lookaheadEnd) {
+ int sourceIndex = i + currentMatchLength;
+
+ // Handle overlapping matches (run-length encoding within LZ77)
+ // When we've matched beyond our starting position, wrap around using modulo
+ if (sourceIndex >= currentPosition) {
+ int offset = currentPosition - i;
+ sourceIndex = i + (currentMatchLength % offset);
+ }
+
+ if (text.charAt(sourceIndex) == text.charAt(currentPosition + currentMatchLength)) {
+ currentMatchLength++;
+ } else {
+ break;
+ }
+ }
+
+ // If this match is longer than the best found so far
+ if (currentMatchLength > bestMatchLength) {
+ bestMatchLength = currentMatchLength;
+ bestMatchDistance = currentPosition - i; // Calculate offset from current position
+ }
+ }
+
+ char nextChar;
+ if (currentPosition + bestMatchLength < text.length()) {
+ nextChar = text.charAt(currentPosition + bestMatchLength);
+ } else {
+ nextChar = END_OF_STREAM;
+ }
+
+ // Add the token to the output
+ compressedOutput.add(new Token(bestMatchDistance, bestMatchLength, nextChar));
+
+ // Move the current position forward
+ // If we're at the end and had a match, just move by the match length
+ if (nextChar == END_OF_STREAM) {
+ currentPosition += bestMatchLength;
+ } else {
+ currentPosition += bestMatchLength + 1;
+ }
+ }
+
+ return compressedOutput;
+ }
+
+ /**
+ * Compresses the input text using the LZ77 algorithm with default buffer sizes.
+ *
+ * @param text The input string to compress. Must not be null.
+ * @return A list of {@link Token} objects representing the compressed data.
+ */
+ public static List<Token> compress(String text) {
+ return compress(text, DEFAULT_WINDOW_SIZE, DEFAULT_LOOKAHEAD_BUFFER_SIZE);
+ }
+
+ /**
+ * Decompresses a list of LZ77 tokens back into the original string.
+ *
+ * @param compressedData The list of {@link Token} objects. Must not be null.
+ * @return The original, uncompressed string.
+ */
+ public static String decompress(List<Token> compressedData) {
+ if (compressedData == null) {
+ return "";
+ }
+
+ StringBuilder decompressedText = new StringBuilder();
+
+ for (Token token : compressedData) {
+ // Copy matched characters from the sliding window
+ if (token.length > 0) {
+ int startIndex = decompressedText.length() - token.offset;
+
+ // Handle overlapping matches (e.g., when length > offset)
+ for (int i = 0; i < token.length; i++) {
+ decompressedText.append(decompressedText.charAt(startIndex + i));
+ }
+ }
+
+ // Append the next character (if not END_OF_STREAM)
+ if (token.nextChar != END_OF_STREAM) {
+ decompressedText.append(token.nextChar);
+ }
+ }
+
+ return decompressedText.toString();
+ }
+}
@@ -0,0 +1,136 @@
+package com.thealgorithms.compression;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * An implementation of the Lempel-Ziv 78 (LZ78) compression algorithm.
+ * <p>
+ * LZ78 is a dictionary-based lossless data compression algorithm. It processes
+ * input data sequentially, building a dictionary of phrases encountered so far.
+ * It outputs pairs (dictionary_index, next_character), representing
+ * the longest match found in the dictionary plus the character that follows it.
+ * </p>
+ * <p>
+ * This implementation builds the dictionary dynamically during compression.
+ * The dictionary index 0 represents the empty string (no prefix).
+ * </p>
+ * <p>
+ * Time Complexity: O(n) on average for compression and decompression, assuming
+ * efficient dictionary lookups (using a HashMap), where n is the
+ * length of the input string.
+ * </p>
+ * <p>
+ * References:
+ * <ul>
+ * <li><a href="https://en.wikipedia.org/wiki/LZ77_and_LZ78#LZ78">Wikipedia: LZ78</a></li>
+ * </ul>
+ * </p>
+ */
+public final class LZ78 {
+
+ /**
+ * Special character used to mark end of stream when needed.
+ */
+ private static final char END_OF_STREAM = '\u0000';
+
+ /**
+ * Private constructor to prevent instantiation of this utility class.
+ */
+ private LZ78() {
+ }
+
+ /**
+ * Represents a token in the LZ78 compressed output.
+ * Stores the index of the matching prefix in the dictionary and the next character.
+ * Index 0 represents the empty string (no prefix).
+ */
+ public record Token(int index, char nextChar) {
+ }
+
+ /**
+ * A node in the dictionary trie structure.
+ * Each node represents a phrase and can have child nodes for extended phrases.
+ */
+ private static final class TrieNode {
+ Map<Character, TrieNode> children = new HashMap<>();
+ int index = -1; // -1 means not assigned yet
+ }
+
+ /**
+ * Compresses the input text using the LZ78 algorithm.
+ *
+ * @param text The input string to compress. Must not be null.
+ * @return A list of {@link Token} objects representing the compressed data.
+ */
+ public static List<Token> compress(String text) {
+ if (text == null || text.isEmpty()) {
+ return new ArrayList<>();
+ }
+
+ List<Token> compressedOutput = new ArrayList<>();
+ TrieNode root = new TrieNode();
+ int nextDictionaryIndex = 1;
+
+ TrieNode currentNode = root;
+ int lastMatchedIndex = 0;
+
+ for (int i = 0; i < text.length(); i++) {
+ char currentChar = text.charAt(i);
+
+ if (currentNode.children.containsKey(currentChar)) {
+ currentNode = currentNode.children.get(currentChar);
+ lastMatchedIndex = currentNode.index;
+ } else {
+ // Output: (index of longest matching prefix, current character)
+ compressedOutput.add(new Token(lastMatchedIndex, currentChar));
+
+ TrieNode newNode = new TrieNode();
+ newNode.index = nextDictionaryIndex++;
+ currentNode.children.put(currentChar, newNode);
+
+ currentNode = root;
+ lastMatchedIndex = 0;
+ }
+ }
+
+ // Handle remaining phrase at end of input
+ if (currentNode != root) {
+ compressedOutput.add(new Token(lastMatchedIndex, END_OF_STREAM));
+ }
+
+ return compressedOutput;
+ }
+
+ /**
+ * Decompresses a list of LZ78 tokens back into the original string.
+ *
+ * @param compressedData The list of {@link Token} objects. Must not be null.
+ * @return The original, uncompressed string.
+ */
+ public static String decompress(List<Token> compressedData) {
+ if (compressedData == null || compressedData.isEmpty()) {
+ return "";
+ }
+
+ StringBuilder decompressedText = new StringBuilder();
+ Map<Integer, String> dictionary = new HashMap<>();
+ int nextDictionaryIndex = 1;
+
+ for (Token token : compressedData) {
+ String prefix = (token.index == 0) ? "" : dictionary.get(token.index);
+
+ if (token.nextChar == END_OF_STREAM) {
+ decompressedText.append(prefix);
+ } else {
+ String currentPhrase = prefix + token.nextChar;
+ decompressedText.append(currentPhrase);
+ dictionary.put(nextDictionaryIndex++, currentPhrase);
+ }
+ }
+
+ return decompressedText.toString();
+ }
+}