// Copyright (C) 2011 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_SCAN_IMaGE_PYRAMID_Hh_ #define DLIB_SCAN_IMaGE_PYRAMID_Hh_ #include "scan_image_pyramid_abstract.h" #include "../matrix.h" #include "../geometry.h" #include "scan_image.h" #include "../array2d.h" #include <vector> #include "full_object_detection.h" #include "../image_processing/generic_image.h" namespace dlib { // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > class scan_image_pyramid : noncopyable { public: typedef matrix<double,0,1> feature_vector_type; typedef Pyramid_type pyramid_type; typedef Feature_extractor_type feature_extractor_type; scan_image_pyramid ( ); template < typename image_type > void load ( const image_type& img ); inline bool is_loaded_with_image ( ) const; inline void copy_configuration( const feature_extractor_type& fe ); inline void copy_configuration ( const scan_image_pyramid& item ); const Feature_extractor_type& get_feature_extractor ( ) const { return feats_config; } void add_detection_template ( const rectangle& object_box, const std::vector<rectangle>& stationary_feature_extraction_regions, const std::vector<rectangle>& movable_feature_extraction_regions ); void add_detection_template ( const rectangle& object_box, const std::vector<rectangle>& stationary_feature_extraction_regions ); inline unsigned long get_num_detection_templates ( ) const; inline unsigned long get_num_movable_components_per_detection_template ( ) const; inline unsigned long get_num_stationary_components_per_detection_template ( ) const; inline unsigned long get_num_components_per_detection_template ( ) const; inline long get_num_dimensions ( ) const; unsigned long get_max_pyramid_levels ( ) const; void set_max_pyramid_levels ( unsigned long max_levels ); inline unsigned long get_max_detections_per_template ( ) const; void set_min_pyramid_layer_size ( unsigned long width, unsigned long height ); inline unsigned long get_min_pyramid_layer_width ( ) const; inline unsigned long get_min_pyramid_layer_height ( ) const; void set_max_detections_per_template ( unsigned long max_dets ); void detect ( const feature_vector_type& w, std::vector<std::pair<double, rectangle> >& dets, const double thresh ) const; void get_feature_vector ( const full_object_detection& obj, feature_vector_type& psi ) const; full_object_detection get_full_object_detection ( const rectangle& rect, const feature_vector_type& w ) const; const rectangle get_best_matching_rect ( const rectangle& rect ) const; template <typename T, typename U> friend void serialize ( const scan_image_pyramid<T,U>& item, std::ostream& out ); template <typename T, typename U> friend void deserialize ( scan_image_pyramid<T,U>& item, std::istream& in ); private: static bool compare_pair_rect ( const std::pair<double, rectangle>& a, const std::pair<double, rectangle>& b ) { return a.first < b.first; } struct detection_template { rectangle object_box; // always centered at (0,0) std::vector<rectangle> rects; // template with respect to (0,0) std::vector<rectangle> movable_rects; }; friend void serialize(const detection_template& item, std::ostream& out) { int version = 1; serialize(version, out); serialize(item.object_box, out); serialize(item.rects, out); serialize(item.movable_rects, out); } friend void deserialize(detection_template& item, std::istream& in) { int version = 0; deserialize(version, in); if (version != 1) throw serialization_error("Unexpected version found while deserializing a dlib::scan_image_pyramid::detection_template object."); deserialize(item.object_box, in); deserialize(item.rects, in); deserialize(item.movable_rects, in); } void get_mapped_rect_and_metadata ( const unsigned long number_pyramid_levels, rectangle rect, rectangle& mapped_rect, detection_template& best_template, rectangle& object_box, unsigned long& best_level, unsigned long& detection_template_idx ) const; double get_match_score ( rectangle r1, rectangle r2 ) const { // make the rectangles overlap as much as possible before computing the match score. r1 = move_rect(r1, r2.tl_corner()); return (r1.intersect(r2).area())/(double)(r1 + r2).area(); } void test_coordinate_transforms() { for (long x = -10; x <= 10; x += 10) { for (long y = -10; y <= 10; y += 10) { const rectangle rect = centered_rect(x,y,5,6); rectangle a; a = feats_config.image_to_feat_space(rect); if (a.width() > 10000000 || a.height() > 10000000 ) { DLIB_CASSERT(false, "The image_to_feat_space() routine is outputting rectangles of an implausibly " << "\nlarge size. This means there is probably a bug in your feature extractor."); } a = feats_config.feat_to_image_space(rect); if (a.width() > 10000000 || a.height() > 10000000 ) { DLIB_CASSERT(false, "The feat_to_image_space() routine is outputting rectangles of an implausibly " << "\nlarge size. This means there is probably a bug in your feature extractor."); } } } } feature_extractor_type feats_config; // just here to hold configuration. use it to populate the feats elements. array<feature_extractor_type> feats; std::vector<detection_template> det_templates; unsigned long max_dets_per_template; unsigned long max_pyramid_levels; unsigned long min_pyramid_layer_width; unsigned long min_pyramid_layer_height; }; // ---------------------------------------------------------------------------------------- template <typename T, typename U> void serialize ( const scan_image_pyramid<T,U>& item, std::ostream& out ) { int version = 3; serialize(version, out); serialize(item.feats_config, out); serialize(item.feats, out); serialize(item.det_templates, out); serialize(item.max_dets_per_template, out); serialize(item.max_pyramid_levels, out); serialize(item.min_pyramid_layer_width, out); serialize(item.min_pyramid_layer_height, out); serialize(item.get_num_dimensions(), out); } // ---------------------------------------------------------------------------------------- template <typename T, typename U> void deserialize ( scan_image_pyramid<T,U>& item, std::istream& in ) { int version = 0; deserialize(version, in); if (version != 3) throw serialization_error("Unsupported version found when deserializing a scan_image_pyramid object."); deserialize(item.feats_config, in); deserialize(item.feats, in); deserialize(item.det_templates, in); deserialize(item.max_dets_per_template, in); deserialize(item.max_pyramid_levels, in); deserialize(item.min_pyramid_layer_width, in); deserialize(item.min_pyramid_layer_height, in); // When developing some feature extractor, it's easy to accidentally change its // number of dimensions and then try to deserialize data from an older version of // your extractor into the current code. This check is here to catch that kind of // user error. long dims; deserialize(dims, in); if (item.get_num_dimensions() != dims) throw serialization_error("Number of dimensions in serialized scan_image_pyramid doesn't match the expected number."); } // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // scan_image_pyramid member functions // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: scan_image_pyramid ( ) : max_dets_per_template(10000), max_pyramid_levels(1000), min_pyramid_layer_width(20), min_pyramid_layer_height(20) { } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > template < typename image_type > void scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: load ( const image_type& img ) { unsigned long levels = 0; rectangle rect = get_rect(img); // figure out how many pyramid levels we should be using based on the image size pyramid_type pyr; do { rect = pyr.rect_down(rect); ++levels; } while (rect.width() >= min_pyramid_layer_width && rect.height() >= min_pyramid_layer_height && levels < max_pyramid_levels); if (feats.max_size() < levels) feats.set_max_size(levels); feats.set_size(levels); for (unsigned long i = 0; i < feats.size(); ++i) feats[i].copy_configuration(feats_config); // build our feature pyramid feats[0].load(img); if (feats.size() > 1) { image_type temp1, temp2; pyr(img, temp1); feats[1].load(temp1); swap(temp1,temp2); for (unsigned long i = 2; i < feats.size(); ++i) { pyr(temp2, temp1); feats[i].load(temp1); swap(temp1,temp2); } } } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: get_max_detections_per_template ( ) const { return max_dets_per_template; } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > void scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: set_max_detections_per_template ( unsigned long max_dets ) { // make sure requires clause is not broken DLIB_ASSERT(max_dets > 0 , "\t void scan_image_pyramid::set_max_detections_per_template()" << "\n\t The max number of possible detections can't be zero. " << "\n\t max_dets: " << max_dets << "\n\t this: " << this ); max_dets_per_template = max_dets; } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > bool scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: is_loaded_with_image ( ) const { return feats.size() != 0; } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > void scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: copy_configuration( const feature_extractor_type& fe ) { test_coordinate_transforms(); feats_config.copy_configuration(fe); } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > void scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: copy_configuration ( const scan_image_pyramid& item ) { feats_config.copy_configuration(item.feats_config); det_templates = item.det_templates; max_dets_per_template = item.max_dets_per_template; max_pyramid_levels = item.max_pyramid_levels; min_pyramid_layer_width = item.min_pyramid_layer_width; min_pyramid_layer_height = item.min_pyramid_layer_height; } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > void scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: add_detection_template ( const rectangle& object_box, const std::vector<rectangle>& stationary_feature_extraction_regions, const std::vector<rectangle>& movable_feature_extraction_regions ) { #ifdef ENABLE_ASSERTS // make sure requires clause is not broken DLIB_ASSERT((get_num_detection_templates() == 0 || (get_num_stationary_components_per_detection_template() == stationary_feature_extraction_regions.size() && get_num_movable_components_per_detection_template() == movable_feature_extraction_regions.size())) && center(object_box) == point(0,0), "\t void scan_image_pyramid::add_detection_template()" << "\n\t The number of rects in this new detection template doesn't match " << "\n\t the number in previous detection templates." << "\n\t get_num_stationary_components_per_detection_template(): " << get_num_stationary_components_per_detection_template() << "\n\t stationary_feature_extraction_regions.size(): " << stationary_feature_extraction_regions.size() << "\n\t get_num_movable_components_per_detection_template(): " << get_num_movable_components_per_detection_template() << "\n\t movable_feature_extraction_regions.size(): " << movable_feature_extraction_regions.size() << "\n\t this: " << this ); for (unsigned long i = 0; i < movable_feature_extraction_regions.size(); ++i) { DLIB_ASSERT(center(movable_feature_extraction_regions[i]) == point(0,0), "Invalid inputs were given to this function." << "\n\t center(movable_feature_extraction_regions["<<i<<"]): " << center(movable_feature_extraction_regions[i]) << "\n\t this: " << this ); } #endif detection_template temp; temp.object_box = object_box; temp.rects = stationary_feature_extraction_regions; temp.movable_rects = movable_feature_extraction_regions; det_templates.push_back(temp); } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > void scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: add_detection_template ( const rectangle& object_box, const std::vector<rectangle>& stationary_feature_extraction_regions ) { // an empty set of movable feature regions const std::vector<rectangle> movable_feature_extraction_regions; add_detection_template(object_box, stationary_feature_extraction_regions, movable_feature_extraction_regions); } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: get_num_detection_templates ( ) const { return det_templates.size(); } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: get_num_stationary_components_per_detection_template ( ) const { // make sure requires clause is not broken DLIB_ASSERT(get_num_detection_templates() > 0 , "\t unsigned long scan_image_pyramid::get_num_stationary_components_per_detection_template()" << "\n\t You need to give some detection templates before calling this function. " << "\n\t get_num_detection_templates(): " << get_num_detection_templates() << "\n\t this: " << this ); return det_templates[0].rects.size(); } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: get_num_movable_components_per_detection_template ( ) const { // make sure requires clause is not broken DLIB_ASSERT(get_num_detection_templates() > 0 , "\t unsigned long scan_image_pyramid::get_num_movable_components_per_detection_template()" << "\n\t You need to give some detection templates before calling this function. " << "\n\t get_num_detection_templates(): " << get_num_detection_templates() << "\n\t this: " << this ); return det_templates[0].movable_rects.size(); } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: get_num_components_per_detection_template ( ) const { // make sure requires clause is not broken DLIB_ASSERT(get_num_detection_templates() > 0 , "\t unsigned long scan_image_pyramid::get_num_components_per_detection_template()" << "\n\t You need to give some detection templates before calling this function. " << "\n\t get_num_detection_templates(): " << get_num_detection_templates() << "\n\t this: " << this ); return get_num_movable_components_per_detection_template() + get_num_stationary_components_per_detection_template(); } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > long scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: get_num_dimensions ( ) const { // make sure requires clause is not broken DLIB_ASSERT(get_num_detection_templates() > 0 , "\t long scan_image_pyramid::get_num_dimensions()" << "\n\t You need to give some detection templates before calling this function. " << "\n\t get_num_detection_templates(): " << get_num_detection_templates() << "\n\t this: " << this ); return feats_config.get_num_dimensions()*get_num_components_per_detection_template() + get_num_detection_templates(); } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: get_max_pyramid_levels ( ) const { return max_pyramid_levels; } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > void scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: set_max_pyramid_levels ( unsigned long max_levels ) { // make sure requires clause is not broken DLIB_ASSERT(max_levels > 0 , "\t void scan_image_pyramid::set_max_pyramid_levels()" << "\n\t You can't have zero levels. " << "\n\t max_levels: " << max_levels << "\n\t this: " << this ); max_pyramid_levels = max_levels; } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > void scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: detect ( const feature_vector_type& w, std::vector<std::pair<double, rectangle> >& dets, const double thresh ) const { // make sure requires clause is not broken DLIB_ASSERT(get_num_detection_templates() > 0 && is_loaded_with_image() && w.size() >= get_num_dimensions(), "\t void scan_image_pyramid::detect()" << "\n\t Invalid inputs were given to this function " << "\n\t get_num_detection_templates(): " << get_num_detection_templates() << "\n\t is_loaded_with_image(): " << is_loaded_with_image() << "\n\t w.size(): " << w.size() << "\n\t get_num_dimensions(): " << get_num_dimensions() << "\n\t this: " << this ); dets.clear(); array<array2d<double> > saliency_images; saliency_images.set_max_size(get_num_components_per_detection_template()); saliency_images.set_size(get_num_components_per_detection_template()); std::vector<std::pair<unsigned int,rectangle> > stationary_region_rects(get_num_stationary_components_per_detection_template()); std::vector<std::pair<unsigned int,rectangle> > movable_region_rects(get_num_movable_components_per_detection_template()); pyramid_type pyr; std::vector<std::pair<double, point> > point_dets; // for all pyramid levels for (unsigned long l = 0; l < feats.size(); ++l) { for (unsigned long i = 0; i < saliency_images.size(); ++i) { saliency_images[i].set_size(feats[l].nr(), feats[l].nc()); const unsigned long offset = get_num_detection_templates() + feats_config.get_num_dimensions()*i; // build saliency images for pyramid level l for (long r = 0; r < feats[l].nr(); ++r) { for (long c = 0; c < feats[l].nc(); ++c) { const typename feature_extractor_type::descriptor_type& descriptor = feats[l](r,c); double sum = 0; for (unsigned long k = 0; k < descriptor.size(); ++k) { sum += w(descriptor[k].first + offset)*descriptor[k].second; } saliency_images[i][r][c] = sum; } } } // now search the saliency images for (unsigned long i = 0; i < det_templates.size(); ++i) { const point offset = -feats[l].image_to_feat_space(point(0,0)); for (unsigned long j = 0; j < stationary_region_rects.size(); ++j) { stationary_region_rects[j] = std::make_pair(j, translate_rect(feats[l].image_to_feat_space(det_templates[i].rects[j]),offset)); } for (unsigned long j = 0; j < movable_region_rects.size(); ++j) { // Scale the size of the movable rectangle but make sure its center // stays at point(0,0). const rectangle temp = feats[l].image_to_feat_space(det_templates[i].movable_rects[j]); movable_region_rects[j] = std::make_pair(j+stationary_region_rects.size(), centered_rect(point(0,0),temp.width(), temp.height())); } // Scale the object box into the feature extraction image, but keeping it // centered at point(0,0). rectangle scaled_object_box = feats[l].image_to_feat_space(det_templates[i].object_box); scaled_object_box = centered_rect(point(0,0),scaled_object_box.width(), scaled_object_box.height()); // Each detection template gets its own special threshold in addition to // the global detection threshold. This allows us to model the fact that // some detection templates might be more prone to false alarming or since // their size is different naturally require a larger or smaller threshold // (since they integrate over a larger or smaller region of the image). const double template_specific_thresh = w(i); scan_image_movable_parts(point_dets, saliency_images, scaled_object_box, stationary_region_rects, movable_region_rects, thresh+template_specific_thresh, max_dets_per_template); // convert all the point detections into rectangles at the original image scale and coordinate system for (unsigned long j = 0; j < point_dets.size(); ++j) { const double score = point_dets[j].first-template_specific_thresh; point p = point_dets[j].second; p = feats[l].feat_to_image_space(p); rectangle rect = translate_rect(det_templates[i].object_box, p); rect = pyr.rect_up(rect, l); dets.push_back(std::make_pair(score, rect)); } } } std::sort(dets.rbegin(), dets.rend(), compare_pair_rect); } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > const rectangle scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: get_best_matching_rect ( const rectangle& rect ) const { // make sure requires clause is not broken DLIB_ASSERT(get_num_detection_templates() > 0 , "\t const rectangle scan_image_pyramid::get_best_matching_rect()" << "\n\t Invalid inputs were given to this function " << "\n\t get_num_detection_templates(): " << get_num_detection_templates() << "\n\t this: " << this ); rectangle mapped_rect, object_box; detection_template best_template; unsigned long best_level, junk; get_mapped_rect_and_metadata(max_pyramid_levels, rect, mapped_rect, best_template, object_box, best_level, junk); return mapped_rect; } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > void scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: get_mapped_rect_and_metadata ( const unsigned long number_pyramid_levels, rectangle rect, rectangle& mapped_rect, detection_template& best_template, rectangle& object_box, unsigned long& best_level, unsigned long& detection_template_idx ) const { pyramid_type pyr; // Figure out the pyramid level which best matches rect against one of our // detection template object boxes. best_level = 0; double best_match_score = -1; // Find the best matching detection template for rect for (unsigned long l = 0; l < number_pyramid_levels; ++l) { const rectangle temp = pyr.rect_down(rect,l); if (temp.area() <= 1) break; // At this pyramid level, what matches best? for (unsigned long t = 0; t < det_templates.size(); ++t) { const double match_score = get_match_score(det_templates[t].object_box, temp); if (match_score > best_match_score) { best_match_score = match_score; best_level = l; best_template = det_templates[t]; detection_template_idx = t; } } } // Now we translate best_template into the right spot (it should be centered at the location // determined by rect) and convert it into the feature image coordinate system. rect = pyr.rect_down(rect,best_level); const point offset = -feats_config.image_to_feat_space(point(0,0)); const point origin = feats_config.image_to_feat_space(center(rect)) + offset; for (unsigned long k = 0; k < best_template.rects.size(); ++k) { rectangle temp = best_template.rects[k]; temp = feats_config.image_to_feat_space(temp); temp = translate_rect(temp, origin); best_template.rects[k] = temp; } for (unsigned long k = 0; k < best_template.movable_rects.size(); ++k) { rectangle temp = best_template.movable_rects[k]; temp = feats_config.image_to_feat_space(temp); temp = centered_rect(point(0,0), temp.width(), temp.height()); best_template.movable_rects[k] = temp; } const rectangle scaled_object_box = feats_config.image_to_feat_space(best_template.object_box); object_box = centered_rect(origin-offset, scaled_object_box.width(), scaled_object_box.height()); // The input rectangle was mapped to one of the detection templates. Reverse the process // to figure out what the mapped rectangle is in the original input space. mapped_rect = translate_rect(best_template.object_box, feats_config.feat_to_image_space(origin-offset)); mapped_rect = pyr.rect_up(mapped_rect, best_level); } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > full_object_detection scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: get_full_object_detection ( const rectangle& rect, const feature_vector_type& w ) const { // fill in movable part positions. rectangle mapped_rect; detection_template best_template; unsigned long best_level, junk; rectangle object_box; get_mapped_rect_and_metadata(feats.size(), rect, mapped_rect, best_template, object_box, best_level, junk); Pyramid_type pyr; array2d<double> saliency_image, sum_img; double total_temp_score = 0; // convert into feature space. object_box = object_box.intersect(get_rect(feats[best_level])); std::vector<point> movable_parts; movable_parts.reserve(get_num_movable_components_per_detection_template()); for (unsigned long i = 0; i < get_num_movable_components_per_detection_template(); ++i) { // make the saliency_image for the ith movable part. const rectangle part_rect = best_template.movable_rects[i]; const rectangle area = grow_rect(object_box, part_rect.width()/2, part_rect.height()/2).intersect(get_rect(feats[best_level])); saliency_image.set_size(area.height(), area.width()); const unsigned long offset = get_num_detection_templates() + feats_config.get_num_dimensions()*(i+get_num_stationary_components_per_detection_template()); // build saliency image for pyramid level best_level for (long r = area.top(); r <= area.bottom(); ++r) { for (long c = area.left(); c <= area.right(); ++c) { const typename feature_extractor_type::descriptor_type& descriptor = feats[best_level](r,c); double sum = 0; for (unsigned long k = 0; k < descriptor.size(); ++k) { sum += w(descriptor[k].first + offset)*descriptor[k].second; } saliency_image[r-area.top()][c-area.left()] = sum; } } sum_img.set_size(saliency_image.nr(), saliency_image.nc()); sum_filter_assign(saliency_image, sum_img, part_rect); // Figure out where the maximizer is in sum_img. Note that we // only look in the part of sum_img that corresponds to a location inside // object_box. rectangle valid_area = get_rect(sum_img); valid_area.left() += object_box.left() - area.left(); valid_area.top() += object_box.top() - area.top(); valid_area.right() += object_box.right() - area.right(); valid_area.bottom() += object_box.bottom() - area.bottom(); double max_val = 0; point max_loc; for (long r = valid_area.top(); r <= valid_area.bottom(); ++r) { for (long c = valid_area.left(); c <= valid_area.right(); ++c) { if (sum_img[r][c] > max_val) { //if (object_box.contains(point(c,r) + area.tl_corner())) { max_loc = point(c,r); max_val = sum_img[r][c]; } } } } if (max_val <= 0) { max_loc = OBJECT_PART_NOT_PRESENT; } else { total_temp_score += max_val; // convert max_loc back into feature image space from our cropped image. max_loc += area.tl_corner(); // now convert from feature space to image space. max_loc = feats[best_level].feat_to_image_space(max_loc); max_loc = pyr.point_up(max_loc, best_level); max_loc = nearest_point(rect, max_loc); } movable_parts.push_back(max_loc); } return full_object_detection(rect, movable_parts); } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > void scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: get_feature_vector ( const full_object_detection& obj, feature_vector_type& psi ) const { // make sure requires clause is not broken DLIB_ASSERT(get_num_detection_templates() > 0 && is_loaded_with_image() && psi.size() >= get_num_dimensions() && obj.num_parts() == get_num_movable_components_per_detection_template(), "\t void scan_image_pyramid::get_feature_vector()" << "\n\t Invalid inputs were given to this function " << "\n\t get_num_detection_templates(): " << get_num_detection_templates() << "\n\t is_loaded_with_image(): " << is_loaded_with_image() << "\n\t psi.size(): " << psi.size() << "\n\t get_num_dimensions(): " << get_num_dimensions() << "\n\t get_num_movable_components_per_detection_template(): " << get_num_movable_components_per_detection_template() << "\n\t obj.num_parts(): " << obj.num_parts() << "\n\t this: " << this ); DLIB_ASSERT(all_parts_in_rect(obj), "\t void scan_image_pyramid::get_feature_vector()" << "\n\t Invalid inputs were given to this function " << "\n\t obj.get_rect(): " << obj.get_rect() << "\n\t this: " << this ); rectangle mapped_rect; detection_template best_template; unsigned long best_level, detection_template_idx; rectangle object_box; get_mapped_rect_and_metadata(feats.size(), obj.get_rect(), mapped_rect, best_template, object_box, best_level, detection_template_idx); psi(detection_template_idx) -= 1; Pyramid_type pyr; // put the movable rects at the places indicated by obj. std::vector<rectangle> rects = best_template.rects; for (unsigned long i = 0; i < obj.num_parts(); ++i) { if (obj.part(i) != OBJECT_PART_NOT_PRESENT) { // map from the original image to scaled feature space. point loc = feats[best_level].image_to_feat_space(pyr.point_down(obj.part(i), best_level)); // Make sure the movable part always stays within the object_box. // Otherwise it would be at a place that the detect() function can never // look. loc = nearest_point(object_box, loc); rects.push_back(translate_rect(best_template.movable_rects[i], loc)); } else { // add an empty rectangle since this part wasn't observed. rects.push_back(rectangle()); } } // pull features out of all the boxes in rects. for (unsigned long j = 0; j < rects.size(); ++j) { const rectangle rect = rects[j].intersect(get_rect(feats[best_level])); const unsigned long template_region_id = j; const unsigned long offset = get_num_detection_templates() + feats_config.get_num_dimensions()*template_region_id; for (long r = rect.top(); r <= rect.bottom(); ++r) { for (long c = rect.left(); c <= rect.right(); ++c) { const typename feature_extractor_type::descriptor_type& descriptor = feats[best_level](r,c); for (unsigned long k = 0; k < descriptor.size(); ++k) { psi(descriptor[k].first + offset) += descriptor[k].second; } } } } } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > void scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: set_min_pyramid_layer_size ( unsigned long width, unsigned long height ) { // make sure requires clause is not broken DLIB_ASSERT(width > 0 && height > 0 , "\t void scan_image_pyramid::set_min_pyramid_layer_size()" << "\n\t These sizes can't be zero. " << "\n\t width: " << width << "\n\t height: " << height << "\n\t this: " << this ); min_pyramid_layer_width = width; min_pyramid_layer_height = height; } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: get_min_pyramid_layer_width ( ) const { return min_pyramid_layer_width; } // ---------------------------------------------------------------------------------------- template < typename Pyramid_type, typename Feature_extractor_type > unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>:: get_min_pyramid_layer_height ( ) const { return min_pyramid_layer_height; } // ---------------------------------------------------------------------------------------- } #endif // DLIB_SCAN_IMaGE_PYRAMID_Hh_