// Copyright (C) 2014 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #undef DLIB_SHAPE_PREDICToR_ABSTRACT_H_ #ifdef DLIB_SHAPE_PREDICToR_ABSTRACT_H_ #include "full_object_detection_abstract.h" #include "../matrix.h" #include "../geometry.h" #include "../pixel.h" namespace dlib { // ---------------------------------------------------------------------------------------- class shape_predictor { /*! WHAT THIS OBJECT REPRESENTS This object is a tool that takes in an image region containing some object and outputs a set of point locations that define the pose of the object. The classic example of this is human face pose prediction, where you take an image of a human face as input and are expected to identify the locations of important facial landmarks such as the corners of the mouth and eyes, tip of the nose, and so forth. To create useful instantiations of this object you need to use the shape_predictor_trainer object defined in the shape_predictor_trainer_abstract.h file to train a shape_predictor using a set of training images, each annotated with shapes you want to predict. THREAD SAFETY No synchronization is required when using this object. In particular, a single instance of this object can be used from multiple threads at the same time. !*/ public: shape_predictor ( ); /*! ensures - #num_parts() == 0 - #num_features() == 0 !*/ unsigned long num_parts ( ) const; /*! ensures - returns the number of parts in the shapes predicted by this object. !*/ unsigned long num_features ( ) const; /*! ensures - Returns the dimensionality of the feature vector output by operator(). This number is the total number of trees in this object times the number of leaves on each tree. !*/ template <typename image_type, typename T, typename U> full_object_detection operator()( const image_type& img, const rectangle& rect, std::vector<std::pair<T,U> >& feats ) const; /*! requires - image_type == an image object that implements the interface defined in dlib/image_processing/generic_image.h - T is some unsigned integral type (e.g. unsigned int). - U is any scalar type capable of storing the value 1 (e.g. float). ensures - Runs the shape prediction algorithm on the part of the image contained in the given bounding rectangle. So it will try and fit the shape model to the contents of the given rectangle in the image. For example, if there is a human face inside the rectangle and you use a face landmarking shape model then this function will return the locations of the face landmarks as the parts. So the return value is a full_object_detection DET such that: - DET.get_rect() == rect - DET.num_parts() == num_parts() - for all valid i: - DET.part(i) == the location in img for the i-th part of the shape predicted by this object. - #feats == a sparse vector that records which leaf each tree used to make the shape prediction. Moreover, it is an indicator vector, Therefore, for all valid i: - #feats[i].second == 1 Further, #feats is a vector from the space of num_features() dimensional vectors. The output shape positions can be represented as the dot product between #feats and a weight vector. Therefore, #feats encodes all the information from img that was used to predict the returned shape object. !*/ template <typename image_type> full_object_detection operator()( const image_type& img, const rectangle& rect ) const; /*! requires - image_type == an image object that implements the interface defined in dlib/image_processing/generic_image.h ensures - Calling this function is equivalent to calling (*this)(img, rect, ignored) where the 3d argument is discarded. !*/ }; void serialize (const shape_predictor& item, std::ostream& out); void deserialize (shape_predictor& item, std::istream& in); /*! provides serialization support !*/ // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < typename image_array > double test_shape_predictor ( const shape_predictor& sp, const image_array& images, const std::vector<std::vector<full_object_detection> >& objects, const std::vector<std::vector<double> >& scales ); /*! requires - image_array is a dlib::array of image objects where each image object implements the interface defined in dlib/image_processing/generic_image.h - images.size() == objects.size() - for all valid i and j: - objects[i][j].num_parts() == sp.num_parts() - if (scales.size() != 0) then - There must be a scale value for each full_object_detection in objects. That is, it must be the case that: - scales.size() == objects.size() - for all valid i: - scales[i].size() == objects[i].size() ensures - Tests the given shape_predictor by running it on each of the given objects and checking how well it recovers the part positions. In particular, for all valid i and j we perform: sp(images[i], objects[i][j].get_rect()) and compare the result with the truth part positions in objects[i][j]. We then return the average distance (measured in pixels) between a predicted part location and its true position. - Note that any parts in objects that are set to OBJECT_PART_NOT_PRESENT are simply ignored. - if (scales.size() != 0) then - Each time we compute the distance between a predicted part location and its true location in objects[i][j] we divide the distance by scales[i][j]. Therefore, if you want the reported error to be the average pixel distance then give an empty scales vector, but if you want the returned value to be something else like the average distance normalized by some feature of each object (e.g. the interocular distance) then you can supply those normalizing values via scales. !*/ template < typename image_array > double test_shape_predictor ( const shape_predictor& sp, const image_array& images, const std::vector<std::vector<full_object_detection> >& objects ); /*! requires - image_array is a dlib::array of image objects where each image object implements the interface defined in dlib/image_processing/generic_image.h - images.size() == objects.size() - for all valid i and j: - objects[i][j].num_parts() == sp.num_parts() ensures - returns test_shape_predictor(sp, images, objects, no_scales) where no_scales is an empty vector. So this is just a convenience function for calling the above test_shape_predictor() routine without a scales argument. !*/ // ---------------------------------------------------------------------------------------- } #endif // DLIB_SHAPE_PREDICToR_ABSTRACT_H_