// Copyright (C) 2005 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_ENTROPY_ENCODER_MODEL_KERNEl_5_ #define DLIB_ENTROPY_ENCODER_MODEL_KERNEl_5_ #include "../algs.h" #include "entropy_encoder_model_kernel_abstract.h" #include "../assert.h" namespace dlib { namespace eemk5 { struct node { node* next; node* child_context; node* parent_context; unsigned short symbol; unsigned short count; unsigned short total; unsigned short escapes; }; } template < unsigned long alphabet_size, typename entropy_encoder, unsigned long total_nodes, unsigned long order > class entropy_encoder_model_kernel_5 { /*! REQUIREMENTS ON total_nodes - 4096 < total_nodes - this is the total number of nodes that we will use in the tree REQUIREMENTS ON order - 0 <= order - this is the maximum depth-1 the tree will be allowed to go (note that the root level is depth 0). GENERAL NOTES This implementation follows more or less the implementation strategy laid out by Alistair Moffat in his paper Implementing the PPM data compression scheme. Published in IEEE Transactions on Communications, 38(11):1917-1921, 1990. The escape method used will be method D. This also uses Dmitry Shkarin's Information Inheritance scheme. (described in "PPM: one step to practicality" and "Improving the Efficiency of the PPM Algorithm") INITIAL VALUE - root == pointer to an array of total_nodes nodes - next_node == 1 - cur == root - cur_order = 0 - root->next == 0 - root->parent_context == 0 - root->child_context == 0 - root->escapes == 0 - root->total == 0 - stack_size == 0 - exc_used == false - for all i: exc[i] == 0 CONVENTION - pop() == stack[stack_size-1].n and stack[stack_size-1].nc - exc_used == something_is_excluded() - is_excluded(symbol) == bit symbol&0x1F from exc[symbol>>5] - &get_entropy_encoder() == coder - root == pointer to an array of total_nodes nodes. this is also the root of the tree. - if (next_node < total_nodes) then - next_node == the next node in root that has not yet been allocated - root->next == 0 - root->parent_context == 0 - for every node in the tree: { - NOTATION: - The "context" of a node is the string of symbols seen when you go from the root of the tree down (down though child context pointers) to the node, including the symbol at the node itself. (note that the context of the root node is "" or the empty string) - A set of nodes is in the same "context set" if all the node's contexts are of length n and all the node's contexts share the same prefix of length n-1. - The "child context set" of a node is a set of nodes with contexts that are one symbol longer and prefixed by the node's context. For example, if a node has a context "abc" then the nodes for contexts "abca", "abcb", "abcc", etc. are all in the child context set of the node. - The "parent context" of a node is the context that is one symbol shorter than the node's context and includes the symbol in the node. So the parent context of a node with context "abcd" would be the context "bcd". - if (next != 0) then - next == pointer to the next node in the same context set - if (child_context != 0) then - child_context == pointer to the first node of the child context set for this node. - escapes > 0 - if (parent_context != 0) then - parent_context == pointer to the parent context of this node. - else - this node is the root node of the tree - if (this is not the root node) then - symbol == the symbol represented with this node - count == the number of times this symbol has been seen in its parent context. - else - the root doesn't have a symbol. i.e. the context for the root node is "" or the empty string. - total == The sum of the counts of all the nodes in the child context set + escapes. - escapes == the escape count for the context represented by the node. - count > 0 } - cur_order < order - cur_order == the depth of the node cur in the tree. (note that the root node has depth 0) - cur == pointer to the node in the tree who's context matches the most recent symbols we have seen. !*/ typedef eemk5::node node; public: typedef entropy_encoder entropy_encoder_type; entropy_encoder_model_kernel_5 ( entropy_encoder& coder ); virtual ~entropy_encoder_model_kernel_5 ( ); inline void clear( ); inline void encode ( unsigned long symbol ); entropy_encoder& get_entropy_encoder ( ) { return coder; } static unsigned long get_alphabet_size ( ) { return alphabet_size; } private: inline eemk5::node* allocate_node ( ); /*! requires - space_left() == true ensures - returns a pointer to a new node !*/ inline bool space_left ( ) const; /*! ensures - returns true if there is at least 1 free node left. - returns false otherwise !*/ inline void exclude ( unsigned short symbol ); /*! ensures - #is_excluded(symbol) == true - #something_is_excluded() == true !*/ inline bool something_is_excluded ( ); /*! ensures - returns true if some symbol has been excluded. returns false otherwise !*/ inline bool is_excluded ( unsigned short symbol ); /*! ensures - if (symbol has been excluded) then - returns true - else - returns false !*/ inline void clear_exclusions ( ); /*! ensures - for all symbols #is_excluded(symbol) == false - #something_is_excluded() == true !*/ inline void scale_counts ( node* n ); /*! ensures - divides all the counts in the child context set of n by 2. - none of the nodes in the child context set will have a count of 0 !*/ inline void push ( node* n, node* nc ); /*! requires - stack_size < order ensures - #pop(a,b): a == n && b == nc !*/ inline void pop ( node*& n, node*& nc ); /*! requires - stack_size > 0 ensures - returns the two nodes at the top of the stack !*/ struct nodes { node* n; node* nc; }; unsigned long next_node; entropy_encoder& coder; node* root; node* cur; unsigned long cur_order; unsigned long exc[alphabet_size/32+1]; bool exc_used; nodes stack[order+1]; unsigned long stack_size; // restricted functions entropy_encoder_model_kernel_5(entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>&); // copy constructor entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>& operator=(entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>&); // assignment operator }; // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // member function definitions // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < unsigned long alphabet_size, typename entropy_encoder, unsigned long total_nodes, unsigned long order > entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: entropy_encoder_model_kernel_5 ( entropy_encoder& coder_ ) : next_node(1), coder(coder_), cur_order(0), stack_size(0) { COMPILE_TIME_ASSERT( 1 < alphabet_size && alphabet_size < 65535 ); COMPILE_TIME_ASSERT( 4096 < total_nodes ); root = new node[total_nodes]; cur = root; root->child_context = 0; root->escapes = 0; root->next = 0; root->parent_context = 0; root->total = 0; clear_exclusions(); } // ---------------------------------------------------------------------------------------- template < unsigned long alphabet_size, typename entropy_encoder, unsigned long total_nodes, unsigned long order > entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: ~entropy_encoder_model_kernel_5 ( ) { delete [] root; } // ---------------------------------------------------------------------------------------- template < unsigned long alphabet_size, typename entropy_encoder, unsigned long total_nodes, unsigned long order > void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: clear( ) { next_node = 1; root->child_context = 0; root->escapes = 0; root->total = 0; cur = root; cur_order = 0; stack_size = 0; clear_exclusions(); } // ---------------------------------------------------------------------------------------- template < unsigned long alphabet_size, typename entropy_encoder, unsigned long total_nodes, unsigned long order > void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: encode ( unsigned long sym ) { unsigned short symbol = static_cast<unsigned short>(sym); node* temp = cur; cur = 0; unsigned short low_count, high_count, total_count; node* new_node = 0; // local_order will track the level of temp in the tree unsigned long local_order = cur_order; unsigned short c; // c == t(a|sk) unsigned short t; // t == T(sk) if (something_is_excluded()) clear_exclusions(); while (true) { low_count = 0; high_count = 0; if (space_left()) { total_count = temp->total; if (total_count > 0) { // check if we need to scale the counts if (total_count > 10000) { scale_counts(temp); total_count = temp->total; } // find the symbol we are looking for and put a pointer to it // into found_symbol. If it isn't found then found_symbol == 0. // also, low_count and high_count will be correctly set. node* n = temp->child_context; node* found_symbol = 0; node* last = 0; if (something_is_excluded()) { node* templast = 0; while (true) { if (is_excluded(n->symbol) == false) { exclude(n->symbol); if (found_symbol == 0) { high_count += n->count; if (n->symbol == symbol) { found_symbol = n; last = templast; low_count = high_count - n->count; } } } else { total_count -= n->count; } if (n->next == 0) break; templast = n; n = n->next; } } else { while (true) { high_count += n->count; exclude(n->symbol); if (n->symbol == symbol) { found_symbol = n; low_count = high_count - n->count; break; } if (n->next == 0) break; last = n; n = n->next; } } // if we found the symbol if (found_symbol) { n = found_symbol; if (new_node != 0) { new_node->parent_context = found_symbol; } coder.encode(low_count,high_count,total_count); c = n->count += 8; t = temp->total += 8; // move this node to the front if (last) { last->next = n->next; n->next = temp->child_context; temp->child_context = n; } if (cur == 0) { if (local_order >= order) { cur = n->parent_context; cur_order = local_order; } else { cur_order = local_order+1; cur = n; } } break; } // if we hit the end of the context set without finding the symbol else { // finish excluding all the symbols while (n->next) { exclude(n->symbol); n = n->next; } if (new_node != 0) { new_node->parent_context = allocate_node(); new_node = new_node->parent_context; } else { new_node = allocate_node(); } n->next = new_node; // write an escape to a lower context coder.encode(high_count,total_count,total_count); } } else // if (total_count == 0) { // this means that temp->child_context == 0 so we should make // a new node here. if (new_node != 0) { new_node->parent_context = allocate_node(); new_node = new_node->parent_context; } else { new_node = allocate_node(); } temp->child_context = new_node; } if (cur == 0 && local_order < order) { cur = new_node; cur_order = local_order+1; } // fill out the new node new_node->child_context = 0; new_node->escapes = 0; new_node->next = 0; new_node->total = 0; push(new_node,temp); if (temp != root) { temp = temp->parent_context; --local_order; continue; } t = 2056; c = 8; // since this is the root we are going to the order-(-1) context // so we can just take care of that here. new_node->parent_context = root; coder.encode(symbol,symbol+1,alphabet_size); if (cur == 0) { cur = root; cur_order = 0; } break; } else { // there isn't enough space so we should throw away the tree clear(); temp = cur; local_order = cur_order; cur = 0; new_node = 0; } } // while (true) // initialize the counts and symbol for any new nodes we have added // to the tree. node* n, *nc; while (stack_size > 0) { pop(n,nc); n->symbol = static_cast<unsigned short>(symbol); // if nc is not a determnistic context if (nc->total) { unsigned long temp2 = t-c+nc->total - nc->escapes - nc->escapes; unsigned long temp = nc->total; temp *= c; temp /= (temp2|1); // this oring by 1 is just to make sure that temp2 is never zero temp += 2; if (temp > 50000) temp = 50000; n->count = static_cast<unsigned short>(temp); nc->escapes += 4; nc->total += static_cast<unsigned short>(temp) + 4; } else { n->count = 3 + 5*(c)/(t-c); nc->escapes = 4; nc->total = n->count + 4; } while (nc->total > 10000) { scale_counts(nc); } } } // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // private member function definitions // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < unsigned long alphabet_size, typename entropy_encoder, unsigned long total_nodes, unsigned long order > eemk5::node* entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: allocate_node ( ) { node* temp; temp = root + next_node; ++next_node; return temp; } // ---------------------------------------------------------------------------------------- template < unsigned long alphabet_size, typename entropy_encoder, unsigned long total_nodes, unsigned long order > bool entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: space_left ( ) const { return (next_node < total_nodes); } // ---------------------------------------------------------------------------------------- template < unsigned long alphabet_size, typename entropy_encoder, unsigned long total_nodes, unsigned long order > void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: exclude ( unsigned short symbol ) { exc_used = true; unsigned long temp = 1; temp <<= symbol&0x1F; exc[symbol>>5] |= temp; } // ---------------------------------------------------------------------------------------- template < unsigned long alphabet_size, typename entropy_encoder, unsigned long total_nodes, unsigned long order > bool entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: is_excluded ( unsigned short symbol ) { unsigned long temp = 1; temp <<= symbol&0x1F; return ((exc[symbol>>5]&temp) != 0); } // ---------------------------------------------------------------------------------------- template < unsigned long alphabet_size, typename entropy_encoder, unsigned long total_nodes, unsigned long order > void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: clear_exclusions ( ) { exc_used = false; for (unsigned long i = 0; i < alphabet_size/32+1; ++i) { exc[i] = 0; } } // ---------------------------------------------------------------------------------------- template < unsigned long alphabet_size, typename entropy_encoder, unsigned long total_nodes, unsigned long order > bool entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: something_is_excluded ( ) { return exc_used; } // ---------------------------------------------------------------------------------------- template < unsigned long alphabet_size, typename entropy_encoder, unsigned long total_nodes, unsigned long order > void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: push ( node* n, node* nc ) { stack[stack_size].n = n; stack[stack_size].nc = nc; ++stack_size; } // ---------------------------------------------------------------------------------------- template < unsigned long alphabet_size, typename entropy_encoder, unsigned long total_nodes, unsigned long order > void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: pop ( node*& n, node*& nc ) { --stack_size; n = stack[stack_size].n; nc = stack[stack_size].nc; } // ---------------------------------------------------------------------------------------- template < unsigned long alphabet_size, typename entropy_encoder, unsigned long total_nodes, unsigned long order > void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: scale_counts ( node* temp ) { if (temp->escapes > 1) temp->escapes >>= 1; temp->total = temp->escapes; node* n = temp->child_context; while (n != 0) { if (n->count > 1) n->count >>= 1; temp->total += n->count; n = n->next; } } // ---------------------------------------------------------------------------------------- } #endif // DLIB_ENTROPY_ENCODER_MODEL_KERNEl_5_