dlib C++ Library - edge_list_graphs

// Copyright (C) 2010  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#undef DLIB_EDGE_LIST_GrAPHS_ABSTRACT_Hh_
#ifdef DLIB_EDGE_LIST_GrAPHS_ABSTRACT_Hh_

#include <vector>
#include "../string.h"
#include "sample_pair_abstract.h"
#include "ordered_sample_pair_abstract.h"

namespace dlib
{

// ----------------------------------------------------------------------------------------

    template <
        typename vector_type,
        typename distance_function_type,
        typename alloc,
        typename T
        >
    void find_percent_shortest_edges_randomly (
        const vector_type& samples,
        const distance_function_type& dist_funct,
        const double percent,
        const unsigned long num,
        const T& random_seed,
        std::vector<sample_pair, alloc>& out
    );
    /*!
        requires
            - 0 < percent <= 1
            - num > 0
            - random_seed must be convertible to a string by dlib::cast_to_string()
            - dist_funct(samples[i], samples[j]) must be a valid expression that evaluates
              to a floating point number 
        ensures
            - This function randomly samples the space of pairs of integers between
              0 and samples.size()-1 inclusive.  For each of these pairs, (i,j), a
              sample_pair is created as follows:    
                sample_pair(i, j, dist_funct(samples[i], samples[j]))
              num such sample_pair objects are generated, duplicates and pairs with distance
              values == infinity are removed, and then the top percent of them with the 
              smallest distance are stored into out.  
            - #out.size() <= num*percent 
            - contains_duplicate_pairs(#out) == false
            - for all valid i:
                - #out[i].distance() == dist_funct(samples[#out[i].index1()], samples[#out[i].index2()])
                - #out[i].distance() < std::numeric_limits<double>::infinity()
            - random_seed is used to seed the random number generator used by this 
              function.
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename vector_type,
        typename distance_function_type,
        typename alloc,
        typename T
        >
    void find_approximate_k_nearest_neighbors (
        const vector_type& samples,
        const distance_function_type& dist_funct,
        const unsigned long k,
        const unsigned long num,
        const T& random_seed,
        std::vector<sample_pair, alloc>& out
    );
    /*!
        requires
            - k > 0
            - num > 0
            - random_seed must be convertible to a string by dlib::cast_to_string()
            - dist_funct(samples[i], samples[j]) must be a valid expression that evaluates
              to a floating point number 
        ensures
            - This function computes an approximate form of k nearest neighbors. As num grows 
              larger the output of this function converges to the output of the 
              find_k_nearest_neighbors() function defined below.
            - Specifically, this function randomly samples the space of pairs of integers between
              0 and samples.size()-1 inclusive.  For each of these pairs, (i,j), a
              sample_pair is created as follows:    
                sample_pair(i, j, dist_funct(samples[i], samples[j]))
              num such sample_pair objects are generated and then exact k-nearest-neighbors
              is performed amongst these sample_pairs and the results are stored into #out.
              Note that samples with an infinite distance between them are considered to 
              be not connected at all.
            - contains_duplicate_pairs(#out) == false
            - for all valid i:
                - #out[i].distance() == dist_funct(samples[#out[i].index1()], samples[#out[i].index2()])
                - #out[i].distance() < std::numeric_limits<double>::infinity()
            - random_seed is used to seed the random number generator used by this 
              function.
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename vector_type,
        typename distance_function_type,
        typename alloc
        >
    void find_k_nearest_neighbors (
        const vector_type& samples,
        const distance_function_type& dist_funct,
        const unsigned long k,
        std::vector<sample_pair, alloc>& out
    );
    /*!
        requires
            - k > 0
            - dist_funct(samples[i], samples[j]) must be a valid expression that evaluates
              to a floating point number 
        ensures
            - #out == a set of sample_pair objects that represent all the k nearest 
              neighbors in samples according to the given distance function dist_funct.  
              Note that samples with an infinite distance between them are considered to 
              be not connected at all.
            - for all valid i:
                - #out[i].distance() == dist_funct(samples[#out[i].index1()], samples[#out[i].index2()])
                - #out[i].distance() < std::numeric_limits<double>::infinity()
            - contains_duplicate_pairs(#out) == false
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename vector_type 
        >
    bool contains_duplicate_pairs (
        const vector_type& pairs
    );
    /*!
        requires
            - vector_type == a type with an interface compatible with std::vector and it
              must in turn contain objects with an interface compatible with
              dlib::sample_pair or dlib::ordered_sample_pair.
        ensures
            - if (pairs contains any elements that are equal according to operator==) then
                - returns true
            - else
                - returns false
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename vector_type 
        >
    unsigned long max_index_plus_one (
        const vector_type& pairs
    );
    /*!
        requires
            - vector_type == a type with an interface compatible with std::vector and it
              must in turn contain objects with an interface compatible with
              dlib::sample_pair or dlib::ordered_sample_pair.
        ensures
            - if (pairs.size() == 0) then
                - returns 0
            - else
                - returns a number N such that: 
                    - for all i:  pairs[i].index1()   <  N && pairs[i].index2()   <  N
                    - for some j: pairs[j].index1()+1 == N || pairs[j].index2()+1 == N
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename vector_type
        >
    void remove_long_edges (
        vector_type& pairs,
        double distance_threshold
    );
    /*!
        requires
            - vector_type == a type with an interface compatible with std::vector and it
              must in turn contain objects with an interface compatible with
              dlib::sample_pair or dlib::ordered_sample_pair.
        ensures
            - Removes all elements of pairs that have a distance value greater than the
              given threshold.
            - #pairs.size() <= pairs.size()
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename vector_type
        >
    void remove_short_edges (
        vector_type& pairs,
        double distance_threshold
    );
    /*!
        requires
            - vector_type == a type with an interface compatible with std::vector and it
              must in turn contain objects with an interface compatible with
              dlib::sample_pair or dlib::ordered_sample_pair.
        ensures
            - Removes all elements of pairs that have a distance value less than the
              given threshold.
            - #pairs.size() <= pairs.size()
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename vector_type
        >
    void remove_percent_longest_edges (
        vector_type& pairs,
        double percent 
    );
    /*!
        requires
            - 0 <= percent < 1
            - vector_type == a type with an interface compatible with std::vector and it
              must in turn contain objects with an interface compatible with
              dlib::sample_pair or dlib::ordered_sample_pair.
        ensures
            - Removes the given upper percentage of the longest edges in pairs.  I.e.
              this function removes the long edges from pairs.
            - #pairs.size() == (1-percent)*pairs.size()
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename vector_type
        >
    void remove_percent_shortest_edges (
        vector_type& pairs,
        double percent 
    );
    /*!
        requires
            - 0 <= percent < 1
            - vector_type == a type with an interface compatible with std::vector and it
              must in turn contain objects with an interface compatible with
              dlib::sample_pair or dlib::ordered_sample_pair.
        ensures
            - Removes the given upper percentage of the shortest edges in pairs.  I.e.
              this function removes the short edges from pairs.
            - #pairs.size() == (1-percent)*pairs.size()
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename vector_type
        >
    void remove_duplicate_edges (
        vector_type& pairs
    );
    /*!
        requires
            - vector_type == a type with an interface compatible with std::vector and it
              must in turn contain objects with an interface compatible with
              dlib::sample_pair or dlib::ordered_sample_pair.
        ensures
            - Removes any duplicate edges from pairs.  That is, for all elements of pairs,
              A and B, such that A == B, only one of A or B will be in pairs after this
              function terminates.
            - #pairs.size() <= pairs.size()
            - is_ordered_by_index(#pairs) == true
            - contains_duplicate_pairs(#pairs) == false
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename vector_type
        >
    bool is_ordered_by_index (
        const vector_type& edges
    );
    /*!
        requires
            - vector_type == a type with an interface compatible with std::vector and it
              must in turn contain objects with an interface compatible with
              dlib::sample_pair or dlib::ordered_sample_pair.
        ensures
            - returns true if and only if the contents of edges are in sorted order
              according to order_by_index().  That is, we return true if calling
              std::stable_sort(edges.begin(), edges.end(), &order_by_index<T>) would not
              change the ordering of elements of edges.
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename alloc1, 
        typename alloc2
        >
    void find_neighbor_ranges (
        const std::vector<ordered_sample_pair,alloc1>& edges,
        std::vector<std::pair<unsigned long, unsigned long>,alloc2>& neighbors
    );
    /*!
        requires
            - is_ordered_by_index(edges) == true
              (i.e. edges is sorted so that all the edges for a particular node are grouped
              together)
        ensures
            - This function takes a graph, represented by its list of edges, and finds the
              ranges that contain the edges for each node in the graph.  In particular,
              #neighbors[i] will tell you which edges correspond to the ith node in the
              graph.
            - #neighbors.size() == max_index_plus_one(edges)
              (i.e. neighbors will have an entry for each node in the graph defined by the
              list of edges)
            - for all valid i:
                - all elements of edges such that their index1() value == i are in the
                  range [neighbors[i].first, neighbors[i].second).  That is, for all k such
                  that neighbors[i].first <= k < neighbors[i].second:
                    - edges[k].index1() == i.
                    - all edges outside this range have an index1() value != i
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename alloc1, 
        typename alloc2
        >
    void convert_unordered_to_ordered (
        const std::vector<sample_pair,alloc1>& edges,
        std::vector<ordered_sample_pair,alloc2>& out_edges
    );
    /*!
        ensures
            - interprets edges a defining an undirected graph. 
            - This function populates out_edges with a directed graph that represents the
              same graph as the one in edges.  In particular, this means that for all valid
              i we have the following:
                - if (edges[i].index1() != edges[i].index2()) then
                    - #out_edges contains two edges corresponding to edges[i].  They
                      represent the two directions of this edge.  The distance value from
                      edges[i] is also copied into the output edges.
                - else
                    - #out_edges contains one edge corresponding to edges[i] since this is
                      a self edge.  The distance value from edges[i] is also copied into
                      the output edge.
            - max_index_plus_one(edges) == max_index_plus_one(#out_edges) 
              (i.e. both graphs have the same number of nodes)
            - In all but the most trivial cases, we will have is_ordered_by_index(#out_edges) == false
            - contains_duplicate_pairs(#out_edges) == contains_duplicate_pairs(edges)
    !*/

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_EDGE_LIST_GrAPHS_ABSTRACT_Hh_