// Copyright (C) 2005  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#ifndef DLIB_TOKENIZER_KERNEl_1_
#define DLIB_TOKENIZER_KERNEl_1_

#include <string>
#include <iosfwd>
#include <climits>
#include "../algs.h"
#include "tokenizer_kernel_abstract.h"

namespace dlib
{

    class tokenizer_kernel_1 
    {
        /*!
            INITIAL VALUE
                - in == 0
                - streambuf == 0
                - have_peeked == false
                - head == "_" + lowercase_letters() + uppercase_letters()
                - body == "_" + lowercase_letters() + uppercase_letters() + numbers()
                - headset == pointer to an array of UCHAR_MAX bools and set according 
                  to the CONVENTION.
                - bodyset == pointer to an array of UCHAR_MAX bools and set according 
                  to the CONVENTION.

            CONVENTION  
                - if (stream_is_set()) then
                    - get_stream() == *in
                    - streambuf == in->rdbuf()
                - else
                    - in == 0
                    - streambuf == 0

                - body == get_identifier_body()
                - head == get_identifier_head()

                - if (the char x appears in head) then
                    - headset[static_cast<unsigned char>(x)] == true
                - else
                    - headset[static_cast<unsigned char>(x)] == false

                - if (the char x appears in body) then
                    - bodyset[static_cast<unsigned char>(x)] == true
                - else
                    - bodyset[static_cast<unsigned char>(x)] == false

                - if (have_peeked) then
                    - next_token == the next token to be returned from get_token()
                    - next_type == the type of token in peek_token
        !*/

    public:

        // The name of this enum is irrelevant but on some compilers (gcc on MAC OS X) not having it named
        // causes an error for whatever reason
        enum some_random_name
        {
            END_OF_LINE,
            END_OF_FILE,
            IDENTIFIER,
            CHAR,
            NUMBER,
            WHITE_SPACE
        };

        tokenizer_kernel_1 (        
        );

        virtual ~tokenizer_kernel_1 (
        );

        void clear(
        );

        void set_stream (
            std::istream& in
        );

        bool stream_is_set (
        ) const;

        std::istream& get_stream (
        ) const;

        void get_token (
            int& type,
            std::string& token
        );

        void swap (
            tokenizer_kernel_1& item
        );

        void set_identifier_token (
            const std::string& head,
            const std::string& body
        );

        int peek_type (
        ) const;

        const std::string& peek_token (
        ) const;

        const std::string get_identifier_head (
        ) const;

        const std::string get_identifier_body (
        ) const;

        const std::string lowercase_letters (
        ) const;

        const std::string uppercase_letters (
        ) const;

        const std::string numbers (
        ) const;

    private:

        // restricted functions
        tokenizer_kernel_1(const tokenizer_kernel_1&);        // copy constructor
        tokenizer_kernel_1& operator=(const tokenizer_kernel_1&);    // assignment operator


        // data members
        std::istream* in;
        std::streambuf* streambuf;
        std::string head;
        std::string body;
        bool* headset;
        bool* bodyset;

        mutable std::string next_token;
        mutable int next_type;
        mutable bool have_peeked;
    };    

    inline void swap (
        tokenizer_kernel_1& a, 
        tokenizer_kernel_1& b 
    ) { a.swap(b); }   

}

#ifdef NO_MAKEFILE
#include "tokenizer_kernel_1.cpp"
#endif

#endif // DLIB_TOKENIZER_KERNEl_1