// Copyright (C) 2008 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_LISfh_ #define DLIB_LISfh_ #include <vector> #include "linearly_independent_subset_finder_abstract.h" #include "../matrix.h" #include "function.h" #include "../std_allocator.h" #include "../algs.h" #include "../serialize.h" #include "../is_kind.h" #include "../string.h" #include "../rand.h" namespace dlib { // ---------------------------------------------------------------------------------------- template <typename kernel_type> class linearly_independent_subset_finder { /*! INITIAL VALUE - min_strength == 0 - min_vect_idx == 0 - K_inv.size() == 0 - K.size() == 0 - dictionary.size() == 0 CONVENTION - max_dictionary_size() == my_max_dictionary_size - get_kernel() == kernel - minimum_tolerance() == min_tolerance - size() == dictionary.size() - get_dictionary() == mat(dictionary) - K.nr() == dictionary.size() - K.nc() == dictionary.size() - for all valid r,c: - K(r,c) == kernel(dictionary[r], dictionary[c]) - K_inv == inv(K) - if (dictionary.size() == my_max_dictionary_size) then - for all valid 0 < i < dictionary.size(): - Let STRENGTHS[i] == the delta you would get for dictionary[i] (i.e. Approximately Linearly Dependent value) if you removed dictionary[i] from this object and then tried to add it back in. - min_strength == the minimum value from STRENGTHS - min_vect_idx == the index of the element in STRENGTHS with the smallest value !*/ public: typedef typename kernel_type::scalar_type scalar_type; typedef typename kernel_type::sample_type sample_type; typedef typename kernel_type::sample_type type; typedef typename kernel_type::mem_manager_type mem_manager_type; linearly_independent_subset_finder ( ) : my_max_dictionary_size(100), min_tolerance(0.001) { clear_dictionary(); } linearly_independent_subset_finder ( const kernel_type& kernel_, unsigned long max_dictionary_size_, scalar_type min_tolerance_ = 0.001 ) : kernel(kernel_), my_max_dictionary_size(max_dictionary_size_), min_tolerance(min_tolerance_) { // make sure requires clause is not broken DLIB_ASSERT(min_tolerance_ > 0 && max_dictionary_size_ > 1, "\tlinearly_independent_subset_finder()" << "\n\tinvalid argument to constructor" << "\n\tmin_tolerance_: " << min_tolerance_ << "\n\tmax_dictionary_size_: " << max_dictionary_size_ << "\n\tthis: " << this ); clear_dictionary(); } unsigned long max_dictionary_size() const { return my_max_dictionary_size; } const kernel_type& get_kernel ( ) const { return kernel; } scalar_type minimum_tolerance( ) const { return min_tolerance; } void set_minimum_tolerance ( scalar_type min_tol ) { // make sure requires clause is not broken DLIB_ASSERT(min_tol > 0, "\tlinearly_independent_subset_finder::set_minimum_tolerance()" << "\n\tinvalid argument to this function" << "\n\tmin_tol: " << min_tol << "\n\tthis: " << this ); min_tolerance = min_tol; } void clear_dictionary () { dictionary.clear(); min_strength = 0; min_vect_idx = 0; K_inv.set_size(0,0); K.set_size(0,0); } scalar_type projection_error ( const sample_type& x ) const { const scalar_type kx = kernel(x,x); if (dictionary.size() == 0) { return kx; } else { // fill in k k.set_size(dictionary.size()); for (long r = 0; r < k.nr(); ++r) k(r) = kernel(x,dictionary[r]); // compute the error we would have if we approximated the new x sample // with the dictionary. That is, do the ALD test from the KRLS paper. a = K_inv*k; scalar_type delta = kx - trans(k)*a; return delta; } } bool add ( const sample_type& x ) { const scalar_type kx = kernel(x,x); if (dictionary.size() == 0) { // just ignore this sample if it is the zero vector (or really close to being zero) if (std::abs(kx) > std::numeric_limits<scalar_type>::epsilon()) { // set initial state since this is the first sample we have seen K_inv.set_size(1,1); K_inv(0,0) = 1/kx; K.set_size(1,1); K(0,0) = kx; dictionary.push_back(x); return true; } return false; } else { // fill in k k.set_size(dictionary.size()); for (long r = 0; r < k.nr(); ++r) k(r) = kernel(x,dictionary[r]); // compute the error we would have if we approximated the new x sample // with the dictionary. That is, do the ALD test from the KRLS paper. a = K_inv*k; scalar_type delta = kx - trans(k)*a; // if this new vector is approximately linearly independent of the vectors // in our dictionary. if (delta > min_strength && delta > min_tolerance) { if (dictionary.size() == my_max_dictionary_size) { // if we have never computed the min_strength then we should compute it if (min_strength == 0) recompute_min_strength(); const long i = min_vect_idx; // replace the min strength vector with x. Put the new vector onto the end of // dictionary and remove the vector at position i. dictionary.erase(dictionary.begin()+i); dictionary.push_back(x); // compute reduced K_inv. // Remove the i'th vector from the inverse kernel matrix. This formula is basically // just the reverse of the way K_inv is updated by equation 3.14 below. temp = removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i); // recompute these guys since they were computed with the old // kernel matrix k2 = remove_row(k,i); a2 = temp*k2; delta = kx - trans(k2)*a2; // now update temp with the new dictionary vector // update the middle part of the matrix set_subm(K_inv, get_rect(temp)) = temp + a2*trans(a2)/delta; // update the right column of the matrix set_subm(K_inv, 0, temp.nr(),temp.nr(),1) = -a2/delta; // update the bottom row of the matrix set_subm(K_inv, temp.nr(), 0, 1, temp.nr()) = trans(-a2/delta); // update the bottom right corner of the matrix K_inv(temp.nr(), temp.nc()) = 1/delta; // now update the kernel matrix K set_subm(K,get_rect(temp)) = removerc(K, i,i); set_subm(K, 0, K.nr()-1,K.nr()-1,1) = k2; // update the bottom row of the matrix set_subm(K, K.nr()-1, 0, 1, K.nr()-1) = trans(k2); K(K.nr()-1, K.nc()-1) = kx; // now we have to recompute the min_strength in this case recompute_min_strength(); } else { // update K_inv by computing the new one in the temp matrix (equation 3.14 from Engel) temp.set_size(K_inv.nr()+1, K_inv.nc()+1); // update the middle part of the matrix set_subm(temp, get_rect(K_inv)) = K_inv + a*trans(a)/delta; // update the right column of the matrix set_subm(temp, 0, K_inv.nr(),K_inv.nr(),1) = -a/delta; // update the bottom row of the matrix set_subm(temp, K_inv.nr(), 0, 1, K_inv.nr()) = trans(-a/delta); // update the bottom right corner of the matrix temp(K_inv.nr(), K_inv.nc()) = 1/delta; // put temp into K_inv temp.swap(K_inv); // update K (the kernel matrix) temp.set_size(K.nr()+1, K.nc()+1); set_subm(temp, get_rect(K)) = K; // update the right column of the matrix set_subm(temp, 0, K.nr(),K.nr(),1) = k; // update the bottom row of the matrix set_subm(temp, K.nr(), 0, 1, K.nr()) = trans(k); temp(K.nr(), K.nc()) = kx; // put temp into K temp.swap(K); // add x to the dictionary dictionary.push_back(x); } return true; } else { return false; } } } void swap ( linearly_independent_subset_finder& item ) { exchange(kernel, item.kernel); dictionary.swap(item.dictionary); exchange(min_strength, item.min_strength); exchange(min_vect_idx, item.min_vect_idx); K_inv.swap(item.K_inv); K.swap(item.K); exchange(my_max_dictionary_size, item.my_max_dictionary_size); exchange(min_tolerance, item.min_tolerance); // non-state temp members a.swap(item.a); k.swap(item.k); a2.swap(item.a2); k2.swap(item.k2); temp.swap(item.temp); } unsigned long size ( ) const { return dictionary.size(); } const matrix<sample_type,0,1,mem_manager_type> get_dictionary ( ) const { return mat(dictionary); } friend void serialize(const linearly_independent_subset_finder& item, std::ostream& out) { serialize(item.kernel, out); serialize(item.dictionary, out); serialize(item.min_strength, out); serialize(item.min_vect_idx, out); serialize(item.K_inv, out); serialize(item.K, out); serialize(item.my_max_dictionary_size, out); serialize(item.min_tolerance, out); } friend void deserialize(linearly_independent_subset_finder& item, std::istream& in) { deserialize(item.kernel, in); deserialize(item.dictionary, in); deserialize(item.min_strength, in); deserialize(item.min_vect_idx, in); deserialize(item.K_inv, in); deserialize(item.K, in); deserialize(item.my_max_dictionary_size, in); deserialize(item.min_tolerance, in); } const sample_type& operator[] ( unsigned long index ) const { return dictionary[index]; } const matrix<scalar_type,0,0,mem_manager_type>& get_kernel_matrix ( ) const { return K; } const matrix<scalar_type,0,0,mem_manager_type>& get_inv_kernel_marix ( ) const { return K_inv; } private: typedef std_allocator<sample_type, mem_manager_type> alloc_sample_type; typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type; typedef std::vector<sample_type,alloc_sample_type> dictionary_vector_type; typedef std::vector<scalar_type,alloc_scalar_type> scalar_vector_type; void recompute_min_strength ( ) /*! ensures - recomputes the min_strength and min_vect_idx values so that they are correct with respect to the CONVENTION !*/ { min_strength = std::numeric_limits<scalar_type>::max(); // here we loop over each dictionary vector and compute what its delta would be if // we were to remove it from the dictionary and then try to add it back in. for (unsigned long i = 0; i < dictionary.size(); ++i) { // compute a2 = K_inv*k but where dictionary vector i has been removed a2 = (removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i)) * (remove_row(colm(K,i),i)); scalar_type delta = K(i,i) - trans(remove_row(colm(K,i),i))*a2; if (delta < min_strength) { min_strength = delta; min_vect_idx = i; } } } kernel_type kernel; dictionary_vector_type dictionary; scalar_type min_strength; unsigned long min_vect_idx; matrix<scalar_type,0,0,mem_manager_type> K_inv; matrix<scalar_type,0,0,mem_manager_type> K; unsigned long my_max_dictionary_size; scalar_type min_tolerance; // temp variables here just so we don't have to reconstruct them over and over. Thus, // they aren't really part of the state of this object. mutable matrix<scalar_type,0,1,mem_manager_type> a, a2; mutable matrix<scalar_type,0,1,mem_manager_type> k, k2; mutable matrix<scalar_type,0,0,mem_manager_type> temp; }; // ---------------------------------------------------------------------------------------- template <typename kernel_type> void swap(linearly_independent_subset_finder<kernel_type>& a, linearly_independent_subset_finder<kernel_type>& b) { a.swap(b); } // ---------------------------------------------------------------------------------------- template < typename T > const matrix_op<op_array_to_mat<linearly_independent_subset_finder<T> > > mat ( const linearly_independent_subset_finder<T>& m ) { typedef op_array_to_mat<linearly_independent_subset_finder<T> > op; return matrix_op<op>(op(m)); } // ---------------------------------------------------------------------------------------- namespace impl { template < typename kernel_type, typename vector_type, typename rand_type > void fill_lisf ( linearly_independent_subset_finder<kernel_type>& lisf, const vector_type& samples, rand_type& rnd, int sampling_size ) { // make sure requires clause is not broken DLIB_ASSERT(is_vector(samples) && sampling_size > 0, "\t void fill_lisf()" << "\n\t invalid arguments to this function" << "\n\t is_vector(samples): " << is_vector(samples) << "\n\t sampling_size: " << sampling_size ); // no need to do anything if there aren't any samples if (samples.size() == 0) return; typedef typename kernel_type::scalar_type scalar_type; // Start out by guessing what a reasonable projection error tolerance is. We will use // the biggest projection error we see in a small sample. scalar_type tol = 0; for (int i = 0; i < sampling_size; ++i) { const unsigned long idx = rnd.get_random_32bit_number()%samples.size(); const scalar_type temp = lisf.projection_error(samples(idx)); if (temp > tol) tol = temp; } const scalar_type min_tol = lisf.minimum_tolerance(); // run many rounds of random sampling. In each round we drop the tolerance lower. while (tol >= min_tol && lisf.size() < lisf.max_dictionary_size()) { tol *= 0.5; lisf.set_minimum_tolerance(std::max(tol, min_tol)); int add_failures = 0; // Keep picking random samples and adding them into the lisf. Stop when we either // fill it up or can't find any more samples with projection error larger than the // current tolerance. while (lisf.size() < lisf.max_dictionary_size() && add_failures < sampling_size) { if (lisf.add(samples(rnd.get_random_32bit_number()%samples.size())) == false) { ++add_failures; } } } // set this back to its original value lisf.set_minimum_tolerance(min_tol); } } template < typename kernel_type, typename vector_type > void fill_lisf ( linearly_independent_subset_finder<kernel_type>& lisf, const vector_type& samples ) { dlib::rand rnd; impl::fill_lisf(lisf, mat(samples),rnd, 2000); } template < typename kernel_type, typename vector_type, typename rand_type > typename enable_if<is_rand<rand_type> >::type fill_lisf ( linearly_independent_subset_finder<kernel_type>& lisf, const vector_type& samples, rand_type& rnd, const int sampling_size = 2000 ) { impl::fill_lisf(lisf, mat(samples),rnd, sampling_size); } template < typename kernel_type, typename vector_type, typename rand_type > typename disable_if<is_rand<rand_type> >::type fill_lisf ( linearly_independent_subset_finder<kernel_type>& lisf, const vector_type& samples, rand_type random_seed, const int sampling_size = 2000 ) { dlib::rand rnd; rnd.set_seed(cast_to_string(random_seed)); impl::fill_lisf(lisf, mat(samples), rnd, sampling_size); } // ---------------------------------------------------------------------------------------- } #endif // DLIB_LISfh_