C class for LZARI compression algorithm
Author:
QUERW)
This is a LZARI algorithm based on data compressed class. Haruhiko Okumura implemented this algorithm in C language on July 4, 1989. But the above uses some global or static variables, which is inconvenient to use under MFC. I have rewritten it into a C class that makes it easy to compress and decompress. More importantly, I added two interfaces, which can be compressed / unzip a memory buffer, not just a file.
A total of 5 external interfaces:
Compressed / decompressed file
Void Compress (const char * lpszinfile, const char * lpszoutfile);
Void Uncompress (const char * lpszoutfile);
The parameters are at a glance, which can be used in this two interfaces:
LZARI LZARI;
LZARI.compress ("Show.Bmp", "Show.Liz"); // Compressed file show.bmp to show.liz
// lzari.uncompress ("show.liz", "show.bmp"); // Unzip file show.liz to show.bmp
It's that simple.
2. Compressed / decompressed memory buffer
Void Compress (const byte * pinbuffer, int ninlength, int & noutLength);
Void Uncompress (const byte * pinbuffer, int ninlength, int & noutLength);
The parameters of the two interfaces are not difficult to understand, and the input pointers and lengths are passed, and the LZARI returns a read-only output pointer and length. Users don't have to worry about the problem of memory allocation, when not need to use the output results, call Release ( ), The following is an example:
LZARI LZARI;
BYTE * poutbuffer = null;
INT NOUTSIZE = 0;
Char szinbuffer [] = "this is a class for compress and uncompress";
Lzari.com (Szinbuffer, Strlen (Szinbuffer), Poutbuffer, NoutSize; // Compressed Pinbuffer
//
// Do something with PoutBuffer
//
LZARI.RELEASE ();
3. Release the memory and empty the sign.
Void release ();
If you want a LZARI class instance to perform both compression operations and decompression operations, call Release () before the latter operation call;
As follows:
LZARI LZARI;
Lzari.compress (Pinbuffer, Nister, Poutbuffer, NoutSize); // Compressed Pinbuffer
//
// Do something with PoutBuffer
//
LZARI.RELEASE ();
Lzari.uncompress (pinbuffer2, ninsize2, poutbuffer2, noutsize2); // Unzip Pinbuffer2
//
// ...
//
LZARI.RELEASE ();
Please pay attention to don't call this:
Lzari.compress (Pinbuffer, Nister, Poutbuffer, NoutSize); // Compressed Pinbuffer
//
// Do something with PoutBuffer
//
LZARI.RELEASE ();
Lzari.uncompress (poutbuffer, noutsize, poutbuffer2, noutsize2); // Separate the result of the first compression
Because the PoutBuffer's pointer is invalid. If you do not call Release (), PoutBuffer and PoutBuffer2 will lead to confusion. It is best to use two class instances. As follows: LZARI LZARI;
LZARI UNLZARI;
Lzari.compress (Pinbuffer, Nister, Poutbuffer, NoutSize); // Compressed Pinbuffer
//
// ...
//
Unlzari.uncompress (poutbuffer, noutsize, poutbuffer2, noutsize2); // Separate the result of the first compression
//
// ...
//
LZARI.RELEASE ();
Unlzari.release ();
Due to the vector template for STL in the program, add the following line in stdafx.h:
#include
Of course, this class does not depend on the MFC, which can be used in any C program.
In addition, the effect of Lzari compression is more than ZIP, and the gap is approximately 5% to 10%, and the compression speed is basically.
Note: Please don't ask me if you have questions about the algorithm, I don't know :) Other questions Welcome to QUERW@sina.com
/ ************************************************** ************* LZARI.C - A Data Compression Program (Tab = 4 spaces) ********************** *************************************** 4/7/1989 Haruhiko Okumura Use, DiStribute, And Modify This Program Freely. PLEELY. PLEELYS. PC-VAN Science Nifty-Serve PAF01022 CompuServe 74050, 1022 ********************************* *************************************** /
/ ********** BIT I / O *********** / # ifndef _file_h_compression_lzari_ # define _file_h_compression_lzari _ // # Pragma Warning (Disable: 4786) // # include
#define _output_status
#define N 4096 / * size of ring buffer * / # define F 60 / * upper limit for match_length * / # define THRESHOLD 2 / * encode string into position and length if match_length is greater than this * / # define NIL N / * Index for root of binary search trees * // ********* Arithmetic compression ********** /
/ * If you are not family, you Should Read IE Witten, RM NEAL, AND JG CLEARY, Communications of the ACM, VOL. 30, PP. 520-540 (1987), from Which Much Have Been Borrowed. * /
#define m 15
/ * Q1 (= 2 to the m) MUST Be Sufficiently Large, But Not So Large as the unsigned long 4 * q1 * (q1 - 1) overflows. * / # Define Q1 (1 ul << m) #define Q2 (2 * Q1) #define q3 (3 * q1) #define Q4 (4 * q1) #define max_cum (Q1 - 1)
#define n_char (256 - Threshold F)
class LZARI {public: LZARI (); virtual ~ LZARI (); protected: FILE * infile, * outfile; unsigned long textsize; unsigned long codesize; unsigned long printcount; unsigned char text_buf [N F - 1]; / * ring Buffer of Size N, with extra f-1 bytes to facilitate string comparison * / int match_position; int match_length; / * of longest match. these Areset by the insertnode () procedure. * / int LSON [N 1]; int in [N 257]; INT DAD [N 1]; / * Left & Right Children & Parents - The constitude binary search tree. * /
/ * CHARACTER CODE = 0, 1, ..., n_char - 1 * /
unsigned long low; unsigned long high; unsigned long value; int shifts; / * counts for magnifying low and high around Q2 * / int char_to_sym [N_CHAR]; int sym_to_char [N_CHAR 1]; unsigned int sym_freq [N_CHAR 1]; / * frequency for symbols * / unsigned int sym_cum [n_char 1]; / * cumulative freq for symbols * / unsigned int position_cum [n 1]; / * cumulative freq for positions * /
// compress in membory; bool m_bmem;
Std :: Vector
Const byte * m_pinbuffer; int m_ninlength; int m_nincur;
Unsigned int buffer_putbit, mask_putbit; unsigned int buffer_getbit, mask_getbit;
Private: void error (char * message); void Putbit (int bit); / * Output One bit (bit = 0, 1) * / void flushbitbuffer (void); / * send remaining bits * / int getBit (void); / * GET One bit (0 or 1) * /
/ *********** LZSS with multiple binary trees ********** / void inittree (void); / * Initialize Trees * / void insertnode (int R); Void deletenode p); / * delete node p from tree * / void startmodel (void); / * Initialize model * / void updatemodel (int sym); void output (int bit); / * Output 1 bit, Followed by its complements * / void encodeChar (int ch); void EncodePosition (int position); void encodeEnd (void); int BinarySearchSym (unsigned int x); int BinarySearchPos (unsigned int x); void StartDecode (void); int DecodeChar (void); int DecodePosition (void);
Void Encode (Void); Void Decode (Void);
Public: void compress (const char * lpszoutfile); void uncompress (const char * lpszoutfile);
void Compress (const BYTE * pInBuffer, int nInLength, const BYTE * & pOutBuffer, int & nOutLength); void UnCompress (const BYTE * pInBuffer, int nInLength, const BYTE * & pOutBuffer, int & nOutLength); void Release ();};
# *******************************************************************************************************************************TION ******************** LZARI.C - A Data Compression Program (Tab = 4 spaces) ************** ******************************************************** 4 / 7/1989 Haruhiko Okumura Use, distribute, and modify this program freely. Please send me your improved versions. PC-VAN SCIENCE NIFTY-Serve PAF01022 CompuServe 74050,1022 **************** **************************************************** /
/ ************************************************** ****************** LZARI.CPP - A Data Compression Class Created: 2004/104 Created: 4: 10: 2004 16:44 File Base: Lzari File Ext: CPP Author: u 文 (querw@sina.com) Purpose: As mentioned above, LZARI.C provides the implementation of the LZARI compression algorithm, based on LZARI.c I made it a C class for easy use ***** *********************************************************** ********************* / # include "stdafx.h" // # include
Lzari :: lzari () {infile = null; outfile = null;
Textsize = 0; CODESIZE = 0; printcount = 0;
Low = 0; high = q4; value = 0; shifts = 0; / * counts for magnifying low and high around q2 * / m_bmem = false;
m_pinbuffer = null; m_ninlength = 0; m_nincur = 0;
// m_poutbuffer = null; m_noutLength = 0; // m_noutcur = 0;
Buffer_putbit = 0; mask_putbit = 128;
Buffer_getbit = 0; mask_getbit = 0;
}
Lzari :: ~ lzari () {release ();
Void Lzari :: Error (Char * Message) {# ifdef_output_status printf ("/ n% s / n", message); # Endif // EXIT (exit_failure); int E = 1; throw e;}
Void Lzari :: Putbit (int bit) / * Output One bit (bit = 0, 1) * / {if (bit) buffer_putbit | = mask_putbit; if ((Mask_putbit >> = 1) == 0) {IF (! m_bmem) {IF (buffer_putbit, outfile == EOF) Error ("Write Error");} else {// if (m_noutcur == m_noutlength) error ("Write Error"); // m_poutbuffer [m_noutcur ] = Buffer; m_outbuffer.push_back (buffer_putbit); // m_noutcur ;} buffer_putbit = 0; Mask_putbit = 128; CODESize ;}}
Void Lzari :: FlushbitBuffer (void) / * send remaining bits * / {INT i; for (i = 0; i <7; i ) Putbit (0);} int LZARI :: getBit (void) / * get one bit (0 OR 1) * / {IF ((Mask_Getbit >> = 1) == 0) {if (! M_bmem) buffer_getbit = getc (infile); else buffer_getbit = m_pinbuffer [m_nincur ]; mask_getbit = 128;} return Buffer_getbit & mask_getbit)! = 0);
/ ********** lzss with multiple binary trees ********** /
Void lzari :: inittree (void) / * initialize trees * / {INT i;
/ * For i = 0 to n - 1, rson [i] and lson [i] will be the right and left children of node i. These Nodes Need not be initialized. Also, DAD [i] is the Parent of Node i ...................................................................................................................................................................................................................................................................................................... Initialized to nil. Note The 4056 Trees. * /
For (i = n 1; i <= n 256; i ) rson [i] = nil; / * root * / for (i = 0; i Void Lzari :: INSERTNODE (INT R) / * INSERTS STRING OF LENGTH F, TEXT_BUF [R.R F-1], INTO One of the Trees (Text_buf [R] 'TH THEE) And Returns The Longeest-Match Position and length via the global variables match_position and match_length. If match_length = F, then removes the old node in favor of the new one, because the old one will be deleted sooner. Note r plays double role, as tree node and position in buffer. * / {INT I, P, CMP, TEMP; unsigned char * key; CMP = 1; key = & text_buf [r]; p = n 1 key [0]; rson [r] = lson [r] = nil; match_length = 0; for (;;;;) {ix (cmp> = 0 ) {IF (rson [p]! = Nil) P = rson [p]; else {RSON [P] = R; DAD [} else {IF (LSON [P]! = Nil P = LSON [P]; ELSE {LSON [P] = R; DAD [R] = P; Return;}} for (i = 1; i / ********** Arithmetic compression ********** / / * If you are not family, you Should Read IE Witten, RM NEAL, AND JG CLEARY, Communications of the ACM, VOL. 30, PP. 520-540 (1987), from Which Much Have Been Borrowed. * // * Character code = 0, 1, ..., n_char - 1 * / Void Lzari :: StartModel (Void) / * Initialize Model * / {INT CH, SYM, I; SYM_CUM [N_CHAR] = 0; for (SYM = n_char; SYM> = 1; SYM--) {CH = SYM - 1 ; Char_to_sym [ch] = SYM; SYM_TO_CHAR [SYM] = CH; SYM_FREQ [SM] = 1; SYM_CUM [SYM - 1] = SYM_CUM [SYM] SYM_FREQ [SYM];} SYM_FREQ [0] = 0; / * Sentinel (! = SYM_FREQ [1]) * / position_cum [n] = 0; for (i = n; i> = 1; I -) POSITION_CUM [i - 1] = position_cum [i] 1000 / (i 200 ); / * Empirical distribution function * / / * please devise a better mechanism! * /} Void Lzari :: UpdateModel (int SM) {INT I, C, CH_I, CH_SYM; IF (Sym_cum [0]> = max_cum) {c = 0; for (i = n_char; i> 0; I -) {SYM_CUM [i] = c; c = (Sym_FREQ [i] = (SYM_FREQ [i] 1) >> 1);} sym_cum [0] = C;} for (i = SYM; SYM_FREQ [i] == SYM_FREQ [i - 1]; I -; if (i Void Lzari :: Output (int bit) / * Output 1 bit, Followed by its complements * / {putbit (bit); for (; shifts> 0; shifts - Putbit (! bit); Void Lzari :: EncodeChar (int CH) {Int Sym; unsigned long int te SYM = char_to_sym [ch]; Range = high - low; high = low (Range * Sym_cum [SYM - 1]) / SYM_CUM [0]; Low = (Range * Sym_cum [SYM]) / SYM_CUM [0]; For (;;) {if (high <= Q2) OUTPUT (0); Else if (low> = Q2) {OUTPUT (1); low - = Q2; high - = Q2;} else if (low> = Q1 && high <= q3) {Shifts ; low - = q1; high - = q1;} else break; low = low; high = high;} UpdateModel (SYM);} Void Lzari :: EncodePosition (int position) { Unsigned long int range; Range = high - low; high = low (Range * position_cum [position]) / position_cum [0]; low = (Range * position_cum [position 1]) / position_cum [0]; for (;;) {ix (High <= Q2) OUTPUT (0); Else IF (Low> = Q2) {OUTPUT (1); Low - = Q2; HIGH - = Q2;} Else IF (Low> = Q1 && High <= Q3) { Shifts ; low - = q1; high - = q1;} else break; low = low; high = high;}} Void Lzari :: EncodeEnd (Void) {Shifts ; if (Low INT LZARI :: binarysearchsym (unsigned int x) / * 1 if x> = sym_cum [1], n_char if Sym_cum [n_char]> x, i such That Sym_cum [i - 1]> x> = sym_cum [i] 安 otWise * / {INT I, J, K; I = 1; j = n_char; while (i INT LZARI :: binarysearchpos (unsigned int x) / * 0 if x> = position_cum [1], n - 1 if position_cum [n]> x, i such That position_cum [i]> x> = position_cum [i 1] Otherwise * / {INT I, J, K; I = 1; J = N; While (i For (i = 0; i INT LZARI :: DecodeChar (void) {Int Sym, CH; unsigned long - low; system = binarysearchsym ((unsigned int) ((value - low 1) * SYM_CUM [0] - 1) / RANGE); high = low (Range * Sym_cum [SYM - 1]) / SYM_CUM [0]; Low = (Range * Sym_cum [SYM]) / SYM_CUM [0]; for (;;;;) {ix Low> = Q2) {Value - = Q2; Low - = Q2; high - = Q2;} else if (low> = q1 && hard <= q3) {value - = Q1; low - = q1; high - = q1 } Else if (high> Q2) Break; low = low; high = high; value = 2 * value getbit ();} ch = SYM_TO_CHAR [SYM]; UPDATEMODEL (SYM); Return CH;} INT LZARI :: DecodePosition (void) {INT position; unsigned long- = high - low; position = binarysearchpos (((value - low 1) * position_cum [0] - 1) / Range ))); High = low (range * position_cum [position]) / position_cum [0]; low = (Range * position_cum [position 1]) / position_cum [0]; for (;;) {if (low> = Q2) {Value - = Q2; Low - = Q2; HIGH - = Q2;} else if (low> = q1 && high <= Q3) {value - = q1; low - = q1; high - = q1;} Else if (high> Q2) Break; low = low; high = high; value = 2 * value getbit ();} return position;} / ********* Encode and decode ** ******** / Void Lzari :: Encode (void) {INT I, C, LEN, R, S, LAST_MATCH_LENGTH; if (!, 0L, seek_ek_ek); TextSize = ftell (Infile); if (fwrite (& Textsize) Sizeof textsize, 1, outfile <1) Error ("Write Error"); / * Output Size of Text * / CODESize = SizeOf Textsize; if (Textsize == 0) Return; Rewind (Infile); TextSize = 0; } else {textsize = m_nInLength; m_OutBuffer.resize (sizeof textsize); memcpy (& m_OutBuffer [0], & textsize, sizeof textsize); // m_nOutCur = sizeof textsize; codesize = sizeof textsize; if (textsize == 0) return M_Nincur = 0; TextSize = 0;} startModel (); inittree (); s = 0; r = n - f; for (i = s; i R = (R 1) & (n - 1); INSERTNODE (R);}} else {for (i = 0; I Void Lzari :: Decode (void) {INT I, J, K, R, C; Unsigned long int count If (! m_bmem) {if (& TextSize, Sizeof Textsize, 1, Infile <1) Error ("Read Error"); / * read size of text * /} else {if (m_ninlength Void Lzari :: Compress (const char * lpszoutfile) {m_bmem = false; infile = fopen (lpszinfile, "rb"); outfile = fopen (lpszoutfile, "wb"); if (Infile && outfile) { Encode (); fclose (infile); infile (outfile); infile = null; outfile = null;}} void lzari :: uncompress (const char * lpszinfile, const char * lpszoutfile) {m_bmem = false; Infile = fopen (LPSZINFILE, "RB"); OUTFILE = FOPEN (LPSZoutFile, "WB"); if (Infile && outfile) {decode (); fclose (infile); fclose (outfile); infile = null; outfile = null }} Void Lzari :: Compress (const Byte * Pinbuffer, INT NINLENGTH, INT BYTE * & Poutbuffer, INT & NOUTLENGTH) {m_pinbuffer = pinbuffer; m_ninlength = ninlength; m_nincur = 0; // m_noutcur = 0; m_bmem = true; encode (); poutbuffer = & m_outbuffler [0]; NoutLength = m_outbuffer.size (); Void Lzari :: uncompress (const byte * pinbuffer, int ninlength, inv "{m_pinbuffer = pinbuffer; m_ninLength = ninlength; m_nincur = 0; m_bmem = true; decode (); poutbuffer = & m_outbuffler [0]; noutLength = m_outbuffer.size (); m_outbuffer.push_back (0);} Void Lzari :: Release () {if (! m_outbuffer.empty ()) {infile = null; outfile = null; textsize = 0; CODESIZE = 0; printcount = 0; low = 0; high = Q4; value = 0; SHIFTS = 0; m_bmem = false; m_pinbuffer = 0; m_nincur = 0; m_outbuffer.clear (); m_noutLength = 0; Buffer_putbit = 0; mask_putbit = 128; buffer_getbit = 0; mask_getbit = 0;}}