// Copyright (C) 2004 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_LZ77_BUFFER_KERNEl_2_ #define DLIB_LZ77_BUFFER_KERNEl_2_ #include "lz77_buffer_kernel_abstract.h" #include "../algs.h" namespace dlib { template < typename sliding_buffer > class lz77_buffer_kernel_2 { /*! REQUIREMENTS ON sliding_buffer sliding_buffer must be an implementation of sliding_buffer/sliding_buffer_kernel_abstract.h and must be instantiated to contain unsigned char data INITIAL VALUE history_limit == defined by constructor arguments lookahead_limit == defined by constructor arguments history_size == 0 lookahead_size == 0 buffer.size() == history_limit + lookahead_limit buffer[i] == 0 for all valid i nodes == an array of history_limit-3 nodes id_table == an array of buffer.size() pointers hash_table == an array of buffer.size() pointers and all are set to 0 mask == buffer.size() - 1 next_free_node == 0 CONVENTION history_limit == get_history_buffer_limit() lookahead_limit == get_lookahead_buffer_limit() history_size == get_history_buffer_size() lookahead_limit == get_lookahead_buffer_size() buffer.size() == history_limit + lookahead_limit lookahead_buffer(i) == buffer[lookahead_limit-1-i] history_buffer(i) == buffer[lookahead_limit+i] hash_table[hash(a,b,c,d)] points to the head of a linked list. Each node in this linked list tells the location in the buffer of a string that begins with abcd or a string who's first four letters have the same hash. The linked list is terminated by a node with a null next pointer. hash_table[i] == 0 if there is no linked list for this element of the hash table. each node in the hash table is allocated from the array nodes. When adding a node to hash_table: if (if all nodes aren't already in the hash_table) then { the next node to use is nodes[next_free_node]. } else { recycle nodes from the hash_table itself. This works because when we add new nodes we also have to remove nodes. } if (there is a node defined with an id of i) then { if (id_table[i] != 0) then id_table[i]->next->id == i else hash_table[some_hash]->id == i } !*/ public: lz77_buffer_kernel_2 ( unsigned long total_limit_, unsigned long lookahead_limit_ ); virtual ~lz77_buffer_kernel_2 ( ); void clear( ); void add ( unsigned char symbol ); void find_match ( unsigned long& index, unsigned long& length, unsigned long min_match_length ); inline unsigned long get_history_buffer_limit ( ) const { return history_limit; } inline unsigned long get_lookahead_buffer_limit ( ) const { return lookahead_limit; } inline unsigned long get_history_buffer_size ( ) const { return history_size; } inline unsigned long get_lookahead_buffer_size ( ) const { return lookahead_size; } inline unsigned char lookahead_buffer ( unsigned long index ) const { return buffer[lookahead_limit-1-index]; } inline unsigned char history_buffer ( unsigned long index ) const { return buffer[lookahead_limit+index]; } inline void shift_buffers ( unsigned long N ) { shift_buffer(N); } private: inline unsigned long hash ( unsigned char a, unsigned char b, unsigned char c, unsigned char d ) const /*! ensures - returns a hash of the 4 arguments and the hash is in the range !*/ { unsigned long B = b << 3; unsigned long C = c << 6; unsigned long D = d << 9; unsigned long temp = a + B; temp += C; temp += D; return (temp&mask); /**/ } void shift_buffer ( unsigned long N ); /*! requires - N <= lookahead_size ensuers - #lookahead_size == lookahead_size - N - if (history_size+N < history_limit) then - #history_size == history_size+N - else - #history_size == history_limit - for all i where 0 <= i < N: #history_buffer(N-1-i) == lookahead_buffer(i) - for all i where 0 <= i < #history_size-N: #history_buffer(N+i) == history_buffer(i) - for all i where 0 <= i < #lookahead_size #lookahead_buffer(i) == lookahead_buffer(N+i) !*/ // member data sliding_buffer buffer; unsigned long lookahead_limit; unsigned long history_limit; struct node { unsigned long id; node* next; }; node** hash_table; node* nodes; node** id_table; unsigned long next_free_node; unsigned long mask; unsigned long lookahead_size; unsigned long history_size; // restricted functions lz77_buffer_kernel_2(lz77_buffer_kernel_2<sliding_buffer>&); // copy constructor lz77_buffer_kernel_2<sliding_buffer>& operator=(lz77_buffer_kernel_2<sliding_buffer>&); // assignment operator }; // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // member function definitions // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < typename sliding_buffer > lz77_buffer_kernel_2<sliding_buffer>:: lz77_buffer_kernel_2 ( unsigned long total_limit_, unsigned long lookahead_limit_ ) : lookahead_size(0), history_size(0) { buffer.set_size(total_limit_); lookahead_limit = lookahead_limit_; history_limit = buffer.size() - lookahead_limit_; nodes = new node[history_limit-3]; try { id_table = new node*[buffer.size()]; } catch (...) { delete [] nodes; throw; } try { hash_table = new node*[buffer.size()]; } catch (...) { delete [] id_table; delete [] nodes; throw; } mask = buffer.size()-1; next_free_node = 0; node** start = hash_table; node** end = hash_table + buffer.size(); while (start != end) { *start = 0; ++start; } for (unsigned long i = 0; i < buffer.size(); ++i) buffer[i] = 0; } // ---------------------------------------------------------------------------------------- template < typename sliding_buffer > lz77_buffer_kernel_2<sliding_buffer>:: ~lz77_buffer_kernel_2 ( ) { delete [] nodes; delete [] hash_table; delete [] id_table; } // ---------------------------------------------------------------------------------------- template < typename sliding_buffer > void lz77_buffer_kernel_2<sliding_buffer>:: clear( ) { lookahead_size = 0; history_size = 0; next_free_node = 0; node** start = hash_table; node** end = hash_table + buffer.size(); while (start != end) { *start = 0; ++start; } } // ---------------------------------------------------------------------------------------- template < typename sliding_buffer > void lz77_buffer_kernel_2<sliding_buffer>:: shift_buffer ( unsigned long N ) { unsigned long old_history_size = history_size; unsigned long temp = history_size+N; unsigned long new_nodes; // the number of nodes to pull from the nodes array unsigned long recycled_nodes; // the number of nodes to pull from hash_table lookahead_size -= N; if (temp <= history_limit) { if (history_size <= 3) { if ((3-history_size) >= N) new_nodes = 0; else new_nodes = N - (3-history_size); } else { new_nodes = N; } recycled_nodes = 0; history_size = temp; } else { if (history_size != history_limit) { new_nodes = history_limit - history_size; recycled_nodes = temp - history_limit; history_size = history_limit; } else { new_nodes = 0; recycled_nodes = N; } } unsigned long i = lookahead_limit + 2; // if there are any "new" nodes to add to the hash table if (new_nodes != 0) { unsigned long stop = i - new_nodes; for (; i > stop; --i) { nodes[next_free_node].next = 0; nodes[next_free_node].id = buffer.get_element_id(i); id_table[nodes[next_free_node].id] = 0; unsigned long new_hash = hash(buffer[i],buffer[i-1],buffer[i-2],buffer[i-3]); if (hash_table[new_hash] != 0) id_table[hash_table[new_hash]->id] = &nodes[next_free_node]; nodes[next_free_node].next = hash_table[new_hash]; hash_table[new_hash] = &nodes[next_free_node]; ++next_free_node; } } // if (new_nodes != 0) unsigned long stop = i - recycled_nodes; unsigned long old = old_history_size-1+lookahead_limit; for (; i > stop; --i) { // find the next node to recycle in hash_table node* recycled_node; unsigned long old_id = buffer.get_element_id(old); // find the node with id old_id if (id_table[old_id] == 0) { unsigned long old_hash = hash(buffer[old],buffer[old-1],buffer[old-2],buffer[old-3]); recycled_node = hash_table[old_hash]; // fill the gap left by removing this node hash_table[old_hash] = recycled_node->next; } else { recycled_node = id_table[old_id]->next; // fill the gap left by removing this node id_table[old_id]->next = recycled_node->next; } --old; recycled_node->next = 0; recycled_node->id = buffer.get_element_id(i); id_table[recycled_node->id] = 0; unsigned long new_hash = hash(buffer[i],buffer[i-1],buffer[i-2],buffer[i-3]); if (hash_table[new_hash] != 0) id_table[hash_table[new_hash]->id] = recycled_node; recycled_node->next = hash_table[new_hash]; hash_table[new_hash] = recycled_node; } // for (; i > stop; --i) buffer.rotate_left(N); } // ---------------------------------------------------------------------------------------- template < typename sliding_buffer > void lz77_buffer_kernel_2<sliding_buffer>:: add ( unsigned char symbol ) { if (lookahead_size == lookahead_limit) { shift_buffer(1); } buffer[lookahead_limit-1-lookahead_size] = symbol; ++lookahead_size; } // ---------------------------------------------------------------------------------------- template < typename sliding_buffer > void lz77_buffer_kernel_2<sliding_buffer>:: find_match ( unsigned long& index, unsigned long& length, unsigned long min_match_length ) { unsigned long match_length = 0; // the length of the longest match we find unsigned long match_index = 0; // the index of the longest match we find const unsigned long hash_value = hash(lookahead_buffer(0), lookahead_buffer(1), lookahead_buffer(2), lookahead_buffer(3) ); node* temp = hash_table[hash_value]; while (temp != 0) { // current position in the history buffer unsigned long hpos = buffer.get_element_index(temp->id)-lookahead_limit; // current position in the lookahead buffer unsigned long lpos = 0; // find length of this match while (history_buffer(hpos) == lookahead_buffer(lpos)) { ++lpos; if (hpos == 0) break; --hpos; if (lpos == lookahead_size) break; } if (lpos > match_length) { match_length = lpos; match_index = buffer.get_element_index(temp->id)-lookahead_limit; // if this is the longest possible match then stop looking if (lpos == lookahead_limit) break; } temp = temp->next; } // while (temp != 0) // if we found a match that was long enough then report it if (match_length >= min_match_length) { shift_buffer(match_length); index = match_index; length = match_length; } else { length = 0; } } // ---------------------------------------------------------------------------------------- } #endif // DLIB_LZ77_BUFFER_KERNEl_2_