dlib C++ Library - shape_predictor

// Copyright (C) 2014  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#undef DLIB_SHAPE_PREDICToR_ABSTRACT_H_
#ifdef DLIB_SHAPE_PREDICToR_ABSTRACT_H_

#include "full_object_detection_abstract.h"
#include "../matrix.h"
#include "../geometry.h"
#include "../pixel.h"

namespace dlib
{

// ----------------------------------------------------------------------------------------

    class shape_predictor
    {
        /*!
            WHAT THIS OBJECT REPRESENTS
                This object is a tool that takes in an image region containing some object
                and outputs a set of point locations that define the pose of the object.
                The classic example of this is human face pose prediction, where you take
                an image of a human face as input and are expected to identify the
                locations of important facial landmarks such as the corners of the mouth
                and eyes, tip of the nose, and so forth.

                To create useful instantiations of this object you need to use the
                shape_predictor_trainer object defined in the
                shape_predictor_trainer_abstract.h file to train a shape_predictor using a
                set of training images, each annotated with shapes you want to predict.

            THREAD SAFETY
                No synchronization is required when using this object.  In particular, a
                single instance of this object can be used from multiple threads at the
                same time.  
        !*/

    public:

        shape_predictor (
        );
        /*!
            ensures
                - #num_parts() == 0
                - #num_features() == 0
        !*/

        unsigned long num_parts (
        ) const;
        /*!
            ensures
                - returns the number of parts in the shapes predicted by this object.
        !*/

        unsigned long num_features (
        ) const;
        /*!
            ensures
                - Returns the dimensionality of the feature vector output by operator().
                  This number is the total number of trees in this object times the number
                  of leaves on each tree.  
        !*/

        template <typename image_type, typename T, typename U>
        full_object_detection operator()(
            const image_type& img,
            const rectangle& rect,
            std::vector<std::pair<T,U> >& feats
        ) const;
        /*!
            requires
                - image_type == an image object that implements the interface defined in
                  dlib/image_processing/generic_image.h 
                - T is some unsigned integral type (e.g. unsigned int).
                - U is any scalar type capable of storing the value 1 (e.g. float).
            ensures
                - Runs the shape prediction algorithm on the part of the image contained in
                  the given bounding rectangle.  So it will try and fit the shape model to
                  the contents of the given rectangle in the image.  For example, if there
                  is a human face inside the rectangle and you use a face landmarking shape
                  model then this function will return the locations of the face landmarks
                  as the parts.  So the return value is a full_object_detection DET such
                  that:
                    - DET.get_rect() == rect
                    - DET.num_parts() == num_parts()
                    - for all valid i:
                        - DET.part(i) == the location in img for the i-th part of the shape
                          predicted by this object.
                - #feats == a sparse vector that records which leaf each tree used to make
                  the shape prediction.   Moreover, it is an indicator vector, Therefore,
                  for all valid i:
                    - #feats[i].second == 1
                  Further, #feats is a vector from the space of num_features() dimensional
                  vectors.  The output shape positions can be represented as the dot
                  product between #feats and a weight vector.  Therefore, #feats encodes
                  all the information from img that was used to predict the returned shape
                  object.
        !*/

        template <typename image_type>
        full_object_detection operator()(
            const image_type& img,
            const rectangle& rect
        ) const;
        /*!
            requires
                - image_type == an image object that implements the interface defined in
                  dlib/image_processing/generic_image.h 
            ensures
                - Calling this function is equivalent to calling (*this)(img, rect, ignored)
                  where the 3d argument is discarded.
        !*/

    };

    void serialize (const shape_predictor& item, std::ostream& out);
    void deserialize (shape_predictor& item, std::istream& in);
    /*!
        provides serialization support
    !*/

// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

    template <
        typename image_array
        >
    double test_shape_predictor (
        const shape_predictor& sp,
        const image_array& images,
        const std::vector<std::vector<full_object_detection> >& objects,
        const std::vector<std::vector<double> >& scales
    );
    /*!
        requires
            - image_array is a dlib::array of image objects where each image object
              implements the interface defined in dlib/image_processing/generic_image.h 
            - images.size() == objects.size()
            - for all valid i and j:
                - objects[i][j].num_parts() == sp.num_parts()
            - if (scales.size() != 0) then
                - There must be a scale value for each full_object_detection in objects.
                  That is, it must be the case that:
                    - scales.size() == objects.size()
                    - for all valid i:
                        - scales[i].size() == objects[i].size()
        ensures
            - Tests the given shape_predictor by running it on each of the given objects and
              checking how well it recovers the part positions.  In particular, for all 
              valid i and j we perform:
                sp(images[i], objects[i][j].get_rect())
              and compare the result with the truth part positions in objects[i][j].  We
              then return the average distance (measured in pixels) between a predicted
              part location and its true position.  
            - Note that any parts in objects that are set to OBJECT_PART_NOT_PRESENT are
              simply ignored.
            - if (scales.size() != 0) then
                - Each time we compute the distance between a predicted part location and
                  its true location in objects[i][j] we divide the distance by
                  scales[i][j].  Therefore, if you want the reported error to be the
                  average pixel distance then give an empty scales vector, but if you want
                  the returned value to be something else like the average distance
                  normalized by some feature of each object (e.g. the interocular distance)
                  then you can supply those normalizing values via scales.
    !*/

    template <
        typename image_array
        >
    double test_shape_predictor (
        const shape_predictor& sp,
        const image_array& images,
        const std::vector<std::vector<full_object_detection> >& objects
    );
    /*!
        requires
            - image_array is a dlib::array of image objects where each image object
              implements the interface defined in dlib/image_processing/generic_image.h 
            - images.size() == objects.size()
            - for all valid i and j:
                - objects[i][j].num_parts() == sp.num_parts()
        ensures
            - returns test_shape_predictor(sp, images, objects, no_scales) where no_scales
              is an empty vector.  So this is just a convenience function for calling the
              above test_shape_predictor() routine without a scales argument.
    !*/

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_SHAPE_PREDICToR_ABSTRACT_H_