fast-pack
diff --git a/‎python_bindings/examples.ipynb‎
Lines changed: 190 additions & 0 deletions b/‎python_bindings/examples.ipynb‎
Lines changed: 190 additions & 0 deletions
@@ -0,0 +1,190 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "from pyfastpfor import *\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Compression ratio: 0.34668\n"
+ ]
+ }
+ ],
+ "source": [
+ "arrSize = 128 * 32\n",
+ "maxVal = 2048\n",
+ "# 1. Example without data differencing\n",
+ "\n",
+ "# All arrays the library use must be contiguous-memory C-style numpy arrays\n",
+ "inp = np.array(np.random.randint(0, maxVal, arrSize), dtype = np.uint32, order = 'C')\n",
+ "inpCompDecomp = np.zeros(arrSize, dtype = np.uint32, order = 'C')\n",
+ "\n",
+ "# To be on the safe side, let's reserve plenty of additional memory:\n",
+ "# sometimes the size of compressed data is not smaller than the size \n",
+ "# of the original one\n",
+ "inpComp = np.zeros(arrSize + 1024, dtype = np.uint32, order = 'C')\n",
+ "\n",
+ "# Obtain a codec by name\n",
+ "codec = getCodec('simdbinarypacking')\n",
+ "\n",
+ "# Compress data\n",
+ "compSize = codec.encodeArray(inp, arrSize, inpComp, len(inpComp))\n",
+ " \n",
+ "print('Compression ratio: %g' % (float(compSize)/arrSize))\n",
+ "\n",
+ "# Decompress data\n",
+ "assert(arrSize == codec.decodeArray(inpComp, compSize, inpCompDecomp, arrSize))\n",
+ "assert(np.all(inpCompDecomp == inp))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Compression ratio: 0.691406\n"
+ ]
+ }
+ ],
+ "source": [
+ "arrSize = 128 * 32\n",
+ "maxVal = 1024 * 1024 * 1024 * 2\n",
+ "\n",
+ "# 2. Example with slower data differencing\n",
+ "\n",
+ "# All arrays the library use must be contiguous-memory C-style numpy arrays\n",
+ "inp = np.array(np.random.randint(0, maxVal, arrSize), dtype = np.uint32, order = 'C')\n",
+ "inpCompDecomp = np.zeros(arrSize, dtype = np.uint32, order = 'C')\n",
+ "\n",
+ "inp.sort()\n",
+ "inpCopy = np.array(inp, copy = True, dtype = np.uint32, order = 'C')\n",
+ "\n",
+ "# To be on the safe side, let's reserve plenty of additional memory:\n",
+ "# sometimes the size of compressed data is not smaller than the size \n",
+ "# of the original one\n",
+ "inpComp = np.zeros(arrSize + 1024, dtype = np.uint32, order = 'C')\n",
+ "\n",
+ "# Carry out dafa differencing to convert a sorted sequence of large numbers\n",
+ "# into a sequence of small numbers (differences between adjacent numbers)\n",
+ "delta1(inpCopy, arrSize)\n",
+ "\n",
+ "\n",
+ "# Obtain a codec by name\n",
+ "codec = getCodec('simdbinarypacking')\n",
+ "\n",
+ "# Compress data\n",
+ "compSize = codec.encodeArray(inpCopy, arrSize, inpComp, len(inpComp))\n",
+ " \n",
+ "print('Compression ratio: %g' % (float(compSize)/arrSize))\n",
+ "\n",
+ "# Decompress data\n",
+ "assert(arrSize == codec.decodeArray(inpComp, compSize, inpCompDecomp, arrSize))\n",
+ "# Reverse differencing by computing the prefix sum\n",
+ "prefixSum1(inpCompDecomp, arrSize)\n",
+ "\n",
+ "assert(np.all(inpCompDecomp == inp))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Compression ratio: 0.72168\n"
+ ]
+ }
+ ],
+ "source": [
+ "arrSize = 128 * 32\n",
+ "maxVal = 1024 * 1024 * 1024 * 2\n",
+ "\n",
+ "# 3. Example with faster but coarser data differencing\n",
+ "\n",
+ "# All arrays the library use must be contiguous-memory C-style numpy arrays\n",
+ "inp = np.array(np.random.randint(0, maxVal, arrSize), dtype = np.uint32, order = 'C')\n",
+ "inpCompDecomp = np.zeros(arrSize, dtype = np.uint32, order = 'C')\n",
+ "\n",
+ "inp.sort()\n",
+ "inpCopy = np.array(inp, copy = True, dtype = np.uint32, order = 'C')\n",
+ "\n",
+ "# To be on the safe side, let's reserve plenty of additional memory:\n",
+ "# sometimes the size of compressed data is not smaller than the size \n",
+ "# of the original one\n",
+ "inpComp = np.zeros(arrSize + 1024, dtype = np.uint32, order = 'C')\n",
+ "\n",
+ "# Carry out dafa differencing to convert a sorted sequence of large numbers\n",
+ "# into a sequence of small numbers (differences between numbers that are 4 indices apart)\n",
+ "delta4(inpCopy, arrSize)\n",
+ "\n",
+ "\n",
+ "# Obtain a codec by name\n",
+ "codec = getCodec('simdbinarypacking')\n",
+ "\n",
+ "# Compress data\n",
+ "compSize = codec.encodeArray(inpCopy, arrSize, inpComp, len(inpComp))\n",
+ " \n",
+ "print('Compression ratio: %g' % (float(compSize)/arrSize))\n",
+ "\n",
+ "# Decompress data\n",
+ "assert(arrSize == codec.decodeArray(inpComp, compSize, inpCompDecomp, arrSize))\n",
+ "# Reverse differencing by computing the prefix sum\n",
+ "prefixSum4(inpCompDecomp, arrSize)\n",
+ "\n",
+ "assert(np.all(inpCompDecomp == inp))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}