#pragma once

#include "filedb/database_tables.h"

#include <string>
#include <string_view>
#include <cstdint>
#include <optional>
#include <deque>
#include <istream>
#include <variant>

// The Tango Database RFC[1] contains a specification for the filedb format,
// although it is unfortunately a little ambiguous in places.
//
// TODO: Write a more formal specification of the file format and update the
// Tango RFC with it.
//
// The format is a little tricky to parse because it includes unquoted strings
// and the characters allowed in the unquoted strings is context sensitive.
//
// For example, before a ':' unquoted strings make up either
// device/attribute/class/property names and can only include '_', letters and
// digits.  In contrast, in most cases after a ':' unquoted strings constitute
// values and can be anything.  The exception to this is the server records,
// where the unquoted strings after the ':' must be part of device names.
//
// The basic strategy to solve this is to use Tokenizer/Parser pair, where the
// parser pulls tokens from the Tokenizer.  The Parser then calls
// Tokenizer::unquoted_is_identifier() and Tokenizer::unquoted_is_string() to
// set which mode the Tokenizer uses for unquoted strings.
//
// [1] https://tango-controls.readthedocs.io/projects/rfc/en/latest/6/Database.html

namespace FileDb
{

namespace detail
{

struct Token
{
    enum class Kind
    {
        STRING,
        COMMA,
        COLON,
        SLASH,
        ARROW,
        END_OF_FILE,
        ERROR,
    };

    Kind kind;
    // value if kind == STRING
    // error message if kind == ERROR
    // undefined otherwise
    std::string str;

    // column where this token starts, 1-indexed
    std::uint16_t col;
    // line number where this token starts, 1-indexed
    std::uint16_t lineno;
};

// Streaming tokenizer for the filedb format
class Tokenizer
{
  public:
    std::optional<Token> feed(char c) noexcept;

    // Unquoted strings will be treated as identifiers, so can include anything
    // except control characters (ASCII <= 32) and ':', '/', ',', '\\', '->'.
    void unquoted_is_identifier() noexcept;

    // Unquoted strings can include anything except control characters (ASCII <=
    // 32), ',' and '\\'.
    void unquoted_is_string() noexcept;

  private:
    enum class State
    {
        TOKEN_BEGIN,
        UNQUOTED_STRING,
        QUOTED_STRING,
        LINE_COMMENT,
        DONE,
    };

    // Make at token at the current column and line number
    // The created token will be saved to m_current_token
    Token make_token(Token::Kind kind) noexcept;
    // Make an UNEXPECTED token with the provided error message
    // The created token will be saved to m_current_token
    template <typename... Args>
    Token make_error(Args &&...args) noexcept;

    // Look at the character at index of the lookahead buffer
    char peek(size_t index = 0) const noexcept;
    // Consume the next character from the lookahead buffer
    void consume() noexcept;
    // Return true if c is found at index of the lookahead buffer
    bool is_char(char c, size_t index = 0) const noexcept;
    // Return true if seq is found at index of the lookahead buffer
    bool is_sequence(std::string_view seq, size_t index = 0) const noexcept;
    // Return true if whitespace to be skipped is found at index of the lookahead buffer
    bool is_whitespace(size_t index = 0) const noexcept;
    // Return true if index of the lookahead buffer is at end of file
    bool is_eof(size_t index = 0) const noexcept;
    // Return true if the next character in the lookahead buffer means we are end
    // the of an unquoted string
    bool is_end_of_unquoted() const noexcept;

    std::deque<int> m_lookahead;

    // For error reporting
    std::uint16_t m_col = 1;
    std::uint16_t m_lineno = 1;

    State m_state = State::TOKEN_BEGIN;
    bool m_unquoted_is_identifier = true;

    // Holds one of:
    // - the STRING token we are building when m_state == UNQUOTED_STRING or QUOTED_STRING
    // - the token we sent previously if m_state == TOKEN_BEGIN
    // - the END_OF_FILE/ERROR token if m_state == DONE
    Token m_current_token;
};

// Event base parser for filedb format.
class Parser
{
  public:
    explicit Parser(std::istream &in, std::string filename = "<none>") :
        m_filename{filename},
        m_stream{in}
    {
    }

    using EndOfFile = std::monostate;

    using RecordEvent = std::variant<EndOfFile,
                                     ServerRecord,
                                     DevicePropertyRecord,
                                     ClassPropertyRecord,
                                     DeviceAttributePropertyRecord,
                                     ClassAttributePropertyRecord,
                                     FreeObjectPropertyRecord>;

    // Return the next event from the file.  Throws a DevFailed on error.
    RecordEvent next_event();

  private:
    // Get the token at index.
    //
    // Expects index < m_lookahead.size()
    const Token &peek(size_t index = 0);

    // Consume the first token in the lookahead
    void consume();

    // Add the next token from the file to the lookahead.  Throws a DevFailed on error.
    void next_token();

    bool is_end_of_record();

    // Throw error pointing to character at token, shifted by offset columns
    template <typename... Args>
    [[noreturn]] void throw_at_token(const char *reason, size_t offset, const Token &token, Args &&...args);

    // Throw error pointing to character at token
    template <typename... Args>
    [[noreturn]] void throw_at_token(const char *reason, const Token &token, Args &&...args)
    {
        throw_at_token(reason, 0, token, std::forward<Args &&>(args)...);
    }

    Tokenizer m_tokenizer;
    std::deque<Token> m_lookahead;
    std::string m_filename; // For error messages
    std::istream &m_stream;
};

} // namespace detail

} // namespace FileDb
