dlib C++ Library - svm_c_ekm

// Copyright (C) 2010  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#ifndef DLIB_SVM_C_EKm_TRAINER_Hh_
#define DLIB_SVM_C_EKm_TRAINER_Hh_

#include "../algs.h"
#include "function.h"
#include "kernel.h"
#include "empirical_kernel_map.h"
#include "svm_c_linear_trainer.h"
#include "svm_c_ekm_trainer_abstract.h"
#include "../statistics.h"
#include "../rand.h"
#include <vector>

namespace dlib
{
    template <
        typename K 
        >
    class svm_c_ekm_trainer
    {

    public:
        typedef K kernel_type;
        typedef typename kernel_type::scalar_type scalar_type;
        typedef typename kernel_type::sample_type sample_type;
        typedef typename kernel_type::mem_manager_type mem_manager_type;
        typedef decision_function<kernel_type> trained_function_type;

        svm_c_ekm_trainer (
        )
        {
            verbose = false;
            ekm_stale = true;

            initial_basis_size = 10;
            basis_size_increment = 50;
            max_basis_size = 300;
        }

        explicit svm_c_ekm_trainer (
            const scalar_type& C 
        )
        {
            // make sure requires clause is not broken
            DLIB_ASSERT(C > 0,
                "\t svm_c_ekm_trainer::svm_c_ekm_trainer()"
                << "\n\t C must be greater than 0"
                << "\n\t C:    " << C 
                << "\n\t this: " << this
                );


            ocas.set_c(C);
            verbose = false;
            ekm_stale = true;

            initial_basis_size = 10;
            basis_size_increment = 50;
            max_basis_size = 300;
        }

        void set_epsilon (
            scalar_type eps
        )
        {
            // make sure requires clause is not broken
            DLIB_ASSERT(eps > 0,
                "\t void svm_c_ekm_trainer::set_epsilon()"
                << "\n\t eps must be greater than 0"
                << "\n\t eps: " << eps 
                << "\n\t this: " << this
                );

            ocas.set_epsilon(eps);
        }

        const scalar_type get_epsilon (
        ) const
        {
            return ocas.get_epsilon();
        }

        void set_max_iterations (
            unsigned long max_iter
        )
        {
            ocas.set_max_iterations(max_iter);
        }

        unsigned long get_max_iterations (
        )
        {
            return ocas.get_max_iterations();
        }

        void be_verbose (
        ) 
        { 
            verbose = true;
            ocas.be_quiet(); 
        }

        void be_very_verbose (
        )
        {
            verbose = true;
            ocas.be_verbose(); 
        }

        void be_quiet (
        )
        { 
            verbose = false;
            ocas.be_quiet(); 
        }

        void set_oca (
            const oca& item
        )
        {
            ocas.set_oca(item);
        }

        const oca get_oca (
        ) const
        {
            return ocas.get_oca();
        }

        const kernel_type get_kernel (
        ) const
        {
            return kern;
        }

        void set_kernel (
            const kernel_type& k
        )
        {
            kern = k;
            ekm_stale = true;
        }

        template <typename T>
        void set_basis (
            const T& basis_samples
        )
        {
            // make sure requires clause is not broken
            DLIB_ASSERT(basis_samples.size() > 0 && is_vector(mat(basis_samples)),
                "\tvoid svm_c_ekm_trainer::set_basis(basis_samples)"
                << "\n\t You have to give a non-empty set of basis_samples and it must be a vector"
                << "\n\t basis_samples.size():                       " << basis_samples.size() 
                << "\n\t is_vector(mat(basis_samples)): " << is_vector(mat(basis_samples)) 
                << "\n\t this: " << this
                );

            basis = mat(basis_samples);
            ekm_stale = true;
        }

        bool basis_loaded(
        ) const
        {
            return (basis.size() != 0);
        }

        void clear_basis (
        )
        {
            basis.set_size(0);
            ekm.clear();
            ekm_stale = true;
        }

        unsigned long get_max_basis_size (
        ) const
        {
            return max_basis_size;
        }

        void set_max_basis_size (
            unsigned long max_basis_size_
        )
        {
            // make sure requires clause is not broken
            DLIB_ASSERT(max_basis_size_ > 0,
                "\t void svm_c_ekm_trainer::set_max_basis_size()"
                << "\n\t max_basis_size_ must be greater than 0"
                << "\n\t max_basis_size_: " << max_basis_size_ 
                << "\n\t this:            " << this
                );

            max_basis_size = max_basis_size_;
            if (initial_basis_size > max_basis_size)
                initial_basis_size = max_basis_size;
        }

        unsigned long get_initial_basis_size (
        ) const
        {
            return initial_basis_size;
        }

        void set_initial_basis_size (
            unsigned long initial_basis_size_
        )
        {
            // make sure requires clause is not broken
            DLIB_ASSERT(initial_basis_size_ > 0,
                "\t void svm_c_ekm_trainer::set_initial_basis_size()"
                << "\n\t initial_basis_size_ must be greater than 0"
                << "\n\t initial_basis_size_: " << initial_basis_size_ 
                << "\n\t this:                " << this
                );

            initial_basis_size = initial_basis_size_;

            if (initial_basis_size > max_basis_size)
                max_basis_size = initial_basis_size;
        }

        unsigned long get_basis_size_increment (
        ) const
        {
            return basis_size_increment;
        }

        void set_basis_size_increment (
            unsigned long basis_size_increment_
        )
        {
            // make sure requires clause is not broken
            DLIB_ASSERT(basis_size_increment_ > 0,
                "\t void svm_c_ekm_trainer::set_basis_size_increment()"
                << "\n\t basis_size_increment_ must be greater than 0"
                << "\n\t basis_size_increment_: " << basis_size_increment_ 
                << "\n\t this:                  " << this
                );

            basis_size_increment = basis_size_increment_;
        }

        void set_c (
            scalar_type C 
        )
        {
            // make sure requires clause is not broken
            DLIB_ASSERT(C > 0,
                "\t void svm_c_ekm_trainer::set_c()"
                << "\n\t C must be greater than 0"
                << "\n\t C:    " << C 
                << "\n\t this: " << this
                );

            ocas.set_c(C);
        }

        const scalar_type get_c_class1 (
        ) const
        {
            return ocas.get_c_class1();
        }

        const scalar_type get_c_class2 (
        ) const
        {
            return ocas.get_c_class2();
        }

        void set_c_class1 (
            scalar_type C
        )
        {
            // make sure requires clause is not broken
            DLIB_ASSERT(C > 0,
                "\t void svm_c_ekm_trainer::set_c_class1()"
                << "\n\t C must be greater than 0"
                << "\n\t C:    " << C 
                << "\n\t this: " << this
                );

            ocas.set_c_class1(C);
        }

        void set_c_class2 (
            scalar_type C
        )
        {
            // make sure requires clause is not broken
            DLIB_ASSERT(C > 0,
                "\t void svm_c_ekm_trainer::set_c_class2()"
                << "\n\t C must be greater than 0"
                << "\n\t C:    " << C 
                << "\n\t this: " << this
                );

            ocas.set_c_class2(C);
        }

        template <
            typename in_sample_vector_type,
            typename in_scalar_vector_type
            >
        const decision_function<kernel_type> train (
            const in_sample_vector_type& x,
            const in_scalar_vector_type& y
        ) const
        {
            scalar_type obj;
            if (basis_loaded())
                return do_train_user_basis(mat(x),mat(y),obj);
            else
                return do_train_auto_basis(mat(x),mat(y),obj);
        }

        template <
            typename in_sample_vector_type,
            typename in_scalar_vector_type
            >
        const decision_function<kernel_type> train (
            const in_sample_vector_type& x,
            const in_scalar_vector_type& y,
            scalar_type& svm_objective
        ) const
        {
            if (basis_loaded())
                return do_train_user_basis(mat(x),mat(y),svm_objective);
            else
                return do_train_auto_basis(mat(x),mat(y),svm_objective);
        }


    private:

        template <
            typename in_sample_vector_type,
            typename in_scalar_vector_type
            >
        const decision_function<kernel_type> do_train_user_basis (
            const in_sample_vector_type& x,
            const in_scalar_vector_type& y,
            scalar_type& svm_objective
        ) const
        /*!
            requires
                - basis_loaded() == true
            ensures
                - trains an SVM with the user supplied basis
        !*/
        {
            // make sure requires clause is not broken
            DLIB_ASSERT(is_binary_classification_problem(x,y) == true,
                "\t decision_function svm_c_ekm_trainer::train(x,y)"
                << "\n\t invalid inputs were given to this function"
                << "\n\t x.nr(): " << x.nr() 
                << "\n\t y.nr(): " << y.nr() 
                << "\n\t x.nc(): " << x.nc() 
                << "\n\t y.nc(): " << y.nc() 
                << "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y)
                );

            if (ekm_stale)
            {
                ekm.load(kern, basis);
                ekm_stale = false;
            }

            // project all the samples with the ekm
            running_stats<scalar_type> rs;
            std::vector<matrix<scalar_type,0,1, mem_manager_type> > proj_samples;
            proj_samples.reserve(x.size());
            for (long i = 0; i < x.size(); ++i)
            {
                if (verbose)
                {
                    scalar_type err;
                    proj_samples.push_back(ekm.project(x(i), err));
                    rs.add(err);
                }
                else
                {
                    proj_samples.push_back(ekm.project(x(i)));
                }
            }

            if (verbose)
            {
                std::cout << "\nMean EKM projection error:                  " << rs.mean() << std::endl;
                std::cout << "Standard deviation of EKM projection error: " << rs.stddev() << std::endl;
            }
            
            // now do the training
            decision_function<linear_kernel<matrix<scalar_type,0,1, mem_manager_type> > > df;
            df = ocas.train(proj_samples, y, svm_objective);

            if (verbose)
            {
                std::cout << "Final svm objective: " << svm_objective << std::endl;
            }

            decision_function<kernel_type> final_df;
            final_df = ekm.convert_to_decision_function(df.basis_vectors(0));
            final_df.b = df.b;
            return final_df;
        }

        template <
            typename in_sample_vector_type,
            typename in_scalar_vector_type
            >
        const decision_function<kernel_type> do_train_auto_basis (
            const in_sample_vector_type& x,
            const in_scalar_vector_type& y,
            scalar_type& svm_objective
        ) const
        {
            // make sure requires clause is not broken
            DLIB_ASSERT(is_binary_classification_problem(x,y) == true,
                "\t decision_function svm_c_ekm_trainer::train(x,y)"
                << "\n\t invalid inputs were given to this function"
                << "\n\t x.nr(): " << x.nr() 
                << "\n\t y.nr(): " << y.nr() 
                << "\n\t x.nc(): " << x.nc() 
                << "\n\t y.nc(): " << y.nc() 
                << "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y)
                );


            std::vector<matrix<scalar_type,0,1, mem_manager_type> > proj_samples(x.size());
            decision_function<linear_kernel<matrix<scalar_type,0,1, mem_manager_type> > > df;

            // we will use a linearly_independent_subset_finder to store our basis set. 
            linearly_independent_subset_finder<kernel_type> lisf(get_kernel(), max_basis_size);

            dlib::rand rnd;

            // first pick the initial basis set randomly
            for (unsigned long i = 0; i < 10*initial_basis_size && lisf.size() < initial_basis_size; ++i)
            {
                lisf.add(x(rnd.get_random_32bit_number()%x.size()));
            }

            ekm.load(lisf);

            // first project all samples into the span of the current basis 
            for (long i = 0; i < x.size(); ++i)
            {
                proj_samples[i] = ekm.project(x(i));
            }


            svm_c_linear_trainer<linear_kernel<matrix<scalar_type,0,1,mem_manager_type> > > trainer(ocas);

            const scalar_type min_epsilon = trainer.get_epsilon();
            // while we are determining what the basis set will be we are going to use a very
            // lose stopping condition.  We will tighten it back up before producing the
            // final decision_function.
            trainer.set_epsilon(0.2);

            scalar_type prev_svm_objective = std::numeric_limits<scalar_type>::max();

            empirical_kernel_map<kernel_type> prev_ekm;

            // This loop is where we try to generate a basis for SVM training.  We will
            // do this by repeatedly training the SVM and adding a few points which violate the
            // margin to the basis in each iteration.
            while (true)
            {
                // if the basis is already as big as it's going to get then just do the most
                // accurate training right now.  
                if (lisf.size() == max_basis_size)
                    trainer.set_epsilon(min_epsilon);

                while (true)
                {
                    // now do the training.  
                    df = trainer.train(proj_samples, y, svm_objective);

                    if (svm_objective < prev_svm_objective)
                        break;

                    // If the training didn't reduce the objective more than last time then
                    // try lowering the epsilon and doing it again.
                    if (trainer.get_epsilon() > min_epsilon)
                    {
                        trainer.set_epsilon(std::max(trainer.get_epsilon()*0.5, min_epsilon));
                        if (verbose)
                            std::cout << " *** Reducing epsilon to " << trainer.get_epsilon() << std::endl;
                    }
                    else
                        break;
                }

                if (verbose)
                {
                    std::cout << "svm objective: " << svm_objective << std::endl;
                    std::cout << "basis size: " << lisf.size() << std::endl;
                }

                // if we failed to make progress on this iteration then we are done
                if (svm_objective >= prev_svm_objective)
                    break;

                prev_svm_objective = svm_objective;

                // now add more elements to the basis
                unsigned long count = 0;
                for (unsigned long j = 0; 
                     (j < 100*basis_size_increment) && (count < basis_size_increment) && (lisf.size() < max_basis_size); 
                     ++j)
                {
                    // pick a random sample
                    const unsigned long idx = rnd.get_random_32bit_number()%x.size();
                    // If it is a margin violator then it is useful to add it into the basis set.
                    if (df(proj_samples[idx])*y(idx) < 1)
                    {
                        // Add the sample into the basis set if it is linearly independent of all the
                        // vectors already in the basis set.  
                        if (lisf.add(x(idx)))
                        {
                            ++count;
                        }
                    }
                }
                // if we couldn't add any more basis vectors then stop
                if (count == 0)
                {
                    if (verbose)
                        std::cout << "Stopping, couldn't add more basis vectors." << std::endl;
                    break;
                }


                // Project all the samples into the span of our newly enlarged basis.  We will do this
                // using the special transformation in the EKM that lets us project from a smaller
                // basis set to a larger without needing to reevaluate kernel functions we have already
                // computed.
                ekm.swap(prev_ekm);
                ekm.load(lisf);
                projection_function<kernel_type> proj_part;
                matrix<double> prev_to_new;
                prev_ekm.get_transformation_to(ekm, prev_to_new, proj_part);

                
                matrix<scalar_type,0,1, mem_manager_type> temp;
                for (long i = 0; i < x.size(); ++i)
                {
                    // assign to temporary to avoid memory allocation that would result if we
                    // assigned this expression straight into proj_samples[i]
                    temp = prev_to_new*proj_samples[i] + proj_part(x(i));
                    proj_samples[i] = temp;

                }
            }
            
            // Reproject all the data samples using the final basis.  We could just use what we 
            // already have but the recursive thing done above to compute the proj_samples 
            // might have accumulated a little numerical error.  So lets just be safe.
            running_stats<scalar_type> rs, rs_margin;
            for (long i = 0; i < x.size(); ++i)
            {
                if (verbose)
                {
                    scalar_type err;
                    proj_samples[i] = ekm.project(x(i),err);
                    rs.add(err);
                    // if this point is within the margin 
                    if (df(proj_samples[i])*y(i) < 1)
                        rs_margin.add(err);
                }
                else
                {
                    proj_samples[i] = ekm.project(x(i));
                }
            }

            // do the final training
            trainer.set_epsilon(min_epsilon);
            df = trainer.train(proj_samples, y, svm_objective);


            if (verbose)
            {
                std::cout << "\nMean EKM projection error:                  " << rs.mean() << std::endl;
                std::cout << "Standard deviation of EKM projection error: " << rs.stddev() << std::endl;
                std::cout << "Mean EKM projection error for margin violators:                  " << rs_margin.mean() << std::endl;
                std::cout << "Standard deviation of EKM projection error for margin violators: " << ((rs_margin.current_n()>1)?rs_margin.stddev():0) << std::endl;

                std::cout << "Final svm objective: " << svm_objective << std::endl;
            }


            decision_function<kernel_type> final_df;
            final_df = ekm.convert_to_decision_function(df.basis_vectors(0));
            final_df.b = df.b;

            // we don't need the ekm anymore so clear it out
            ekm.clear();

            return final_df;
        }




        /*!
            CONVENTION
                - if (ekm_stale) then
                    - kern or basis have changed since the last time
                      they were loaded into the ekm
        !*/

        svm_c_linear_trainer<linear_kernel<matrix<scalar_type,0,1,mem_manager_type> > > ocas;
        bool verbose;

        kernel_type kern;
        unsigned long max_basis_size;
        unsigned long basis_size_increment;
        unsigned long initial_basis_size;


        matrix<sample_type,0,1,mem_manager_type> basis;
        mutable empirical_kernel_map<kernel_type> ekm;
        mutable bool ekm_stale; 

    }; 

}

#endif // DLIB_SVM_C_EKm_TRAINER_Hh_