// Copyright (C) 2015  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#undef DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_
#ifdef DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_

#include "../matrix.h"

namespace dlib
{

// ----------------------------------------------------------------------------------------

    template <
        typename EXP
        >
    unsigned long bottom_up_cluster (
        const matrix_exp<EXP>& dists,
        std::vector<unsigned long>& labels,
        unsigned long min_num_clusters,
        double max_dist = std::numeric_limits<double>::infinity()
    );
    /*!
        requires
            - dists.nr() == dists.nc()
            - min_num_clusters > 0
            - dists == trans(dists)
              (l.e. dists should be symmetric)
        ensures
            - Runs a bottom up agglomerative clustering algorithm.   
            - Interprets dists as a matrix that gives the distances between dists.nr()
              items.  In particular, we take dists(i,j) to be the distance between the ith
              and jth element of some set.  This function clusters the elements of this set
              into at least min_num_clusters (or dists.nr() if there aren't enough
              elements).  Additionally, within each cluster, the maximum pairwise distance
              between any two cluster elements is <= max_dist.
            - returns the number of clusters found.
            - #labels.size() == dists.nr()
            - for all valid i:
                - #labels[i] == the cluster ID of the node with index i (i.e. the node
                  corresponding to the distances dists(i,*)).  
                - 0 <= #labels[i] < the number of clusters found
                  (i.e. cluster IDs are assigned contiguously and start at 0) 
    !*/

// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

    struct snl_range
    {
        /*!
            WHAT THIS OBJECT REPRESENTS
                This object represents an interval on the real number line.  It is used
                to store the outputs of the segment_number_line() routine defined below.
        !*/

        snl_range(
        );
        /*!
            ensures
                - #lower == 0
                - #upper == 0
        !*/

        snl_range(
            double val
        );
        /*!
            ensures
                - #lower == val 
                - #upper == val 
        !*/

        snl_range(
            double l, 
            double u
        );
        /*!
            requires
                - l <= u
            ensures
                - #lower == l 
                - #upper == u 
        !*/

        double lower;
        double upper;

        double width(
        ) const { return upper-lower; }
        /*!
            ensures
                - returns the width of this interval on the number line.
        !*/

        bool operator<(const snl_range& item) const { return lower < item.lower; }
        /*!
            ensures
                - provides a total ordering of snl_range objects assuming they are
                  non-overlapping.
        !*/
    };

    std::ostream& operator<< (std::ostream& out, const snl_range& item );
    /*!
        ensures
            - prints item to out in the form [lower,upper].
    !*/

// ----------------------------------------------------------------------------------------

    std::vector<snl_range> segment_number_line (
        const std::vector<double>& x,
        const double max_range_width
    );
    /*!
        requires
            - max_range_width >= 0
        ensures
            - Finds a clustering of the values in x and returns the ranges that define the
              clustering.  This routine uses a combination of bottom up clustering and a
              simple greedy scan to try and find the most compact set of ranges that
              contain all the values in x.  
            - This routine has approximately linear runtime.
            - Every value in x will be contained inside one of the returned snl_range
              objects;
            - All returned snl_range object's will have a width() <= max_range_width and
              will also be non-overlapping.
    !*/

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_