use nom::{
    branch::alt,
    bytes::complete::{is_a, is_not, take},
    bytes::complete::take_while,
    character::complete::{anychar, char, digit0, digit1, not_line_ending, one_of},
    combinator::{map, opt, recognize, value},
    error::context,
    multi::{many0, many1, many_till},
    Parser,
    sequence::{delimited, tuple},
};
use nom_supreme::ParserExt;
use nom_supreme::tag::complete::{tag, tag_no_case};

pub use enums::*;

use crate::{TokenizationError, TokenizationResult};
use crate::Span;

pub mod enums;
pub mod traits;

pub const LOWER_ALPHABET: &str = "abcdefghijklmnopqrstuvwxyz";
pub const UPPER_ALPHABET: &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";

macro_rules! include_literal {
    (exact $tag:literal => $token:expr) => {
        value($token, exact_literal_token(tag($tag)))
    };
    ($tag:literal => $token:expr) => {
        value($token, tag($tag))
    };
    (soft $tag:literal => $token:expr) => {
        value($token, soft_literal_token($tag))
    };
}

macro_rules! include_literals {
    {$($($specifier:ident)* $tag:literal => $token:expr,)+} => {
        ($(include_literal!($($specifier)* $tag => $token),)+)
    };
}

pub fn exact_literal_token<'t, O, P: Parser<&'t str, O, TokenizationError<'t>>>(
    mut parser: P,
) -> impl FnMut(&'t str) -> TokenizationResult<O> {
    move |input| parser.parse(input)
}

pub fn soft_literal_token(literal: Span) -> impl Parser<Span, Span, TokenizationError> {
    identifier
        .map(|it| match it {
            Identifier::Type(x) | Identifier::Identifier(x) => x,
        })
        .verify(move |&it| it == literal)
}

fn ignore(input: Span) -> TokenizationResult<Ignore> {
    let comment = not_line_ending
        .cut()
        .preceded_by(tag("//"))
        .recognize()
        .map(Ignore::Comment);
    let multiline_comment = tag("/*")
        .precedes(many_till(anychar, tag("*/")).cut())
        .recognize()
        .map(Ignore::MultilineComment);

    context(
        "ignore",
        alt((
            comment,
            value(Ignore::Whitespace, many1(is_a(" \t"))),
            value(Ignore::Newline, many1(is_a("\n\r"))),
            multiline_comment,
        )),
    )(input)
}

fn keyword(input: Span) -> TokenizationResult<Keyword> {
    context(
        "keyword",
        alt(include_literals! {
            "fun" => Keyword::Fun,
            "val" => Keyword::Val,
            "var" => Keyword::Var,
            "if" => Keyword::If,
            "elif" => Keyword::Elif,
            "else" => Keyword::Else,
            "match" => Keyword::Match,
            "while" => Keyword::While,
            "module" => Keyword::Module,
            "extend" => Keyword::Extend,
            "\\" => Keyword::Lambda,
            soft "abstract" => Keyword::Abstract,
            soft "trait" => Keyword::Trait,
            soft "struct" => Keyword::Struct,
            soft "class" => Keyword::Class,
            soft "enum" => Keyword::Enum,
            soft "foreign" => Keyword::Foreign,
            soft "type" => Keyword::TypeAlias,
            soft "with" => Keyword::With,
        }),
    )(input)
}

fn symbol(input: Span) -> TokenizationResult<Symbol> {
    context(
        "symbol",
        alt(include_literals! {
            "," => Symbol::Comma,
            ";" => Symbol::Semicolon,
            "{" => Symbol::LBrace,
            "}" => Symbol::RBrace,
            "[" => Symbol::LBracket,
            "]" => Symbol::RBracket,
            "(" => Symbol::LParen,
            ")" => Symbol::RParen,
            "_" => Symbol::TypeGap,
            "::" => Symbol::DoubleColon,
            ":" => Symbol::Colon,
        }),
    )(input)
}

fn identifier(input: Span) -> TokenizationResult<Identifier> {
    let identifier_parser = |alphabet| {
        recognize(tuple((
            tag("_").opt(),
            one_of(alphabet),
            take_while(|it: char| it == '_' || it.is_alphanumeric()),
        )))
    };

    context(
        "identifier",
        alt((
            map(identifier_parser(LOWER_ALPHABET), Identifier::Identifier),
            map(identifier_parser(UPPER_ALPHABET), Identifier::Type),
        )),
    )(input)
}

fn literal(input: Span) -> TokenizationResult<Literal> {
    fn number_parser<'a>(
        prefix: &'static str,
        alphabet: &'static str,
    ) -> impl Parser<Span<'a>, &'a str, TokenizationError<'a>> {
        tag_no_case(prefix)
            .precedes(
                tuple((
                    one_of(alphabet),
                    many0(one_of(alphabet).or(one_of("0_"))),
                    one_of(alphabet).or(char('0')),
                ))
                .recognize()
                .or(one_of(alphabet).or(char('0')).recognize())
                .cut(),
            )
            .recognize()
    }

    let binary = number_parser("0b", "1");
    let octal = number_parser("0c", "1234567");
    let hex = number_parser("0x", "123456789ABCDEF");
    let floating = recognize(tuple((
        opt(one_of("-+")),
        alt((
            tuple((digit1, opt(tuple((char('.'), digit0))))).recognize(),
            tuple((char('.'), digit1)).recognize(),
        )),
        opt(tuple((tag_no_case("e"), opt(one_of("-+")), digit1))),
    )));
    let char_p = delimited(char('\''), char('\'').not().recognize(), char('\''));
    let string = delimited(char('"'), is_not(r#"""#).opt(), char('"'));

    context(
        "literal",
        alt((
            map(binary, Literal::Binary),
            map(octal, Literal::Octal),
            map(hex, Literal::Hex),
            map(floating, Literal::Floating),
            map(char_p, Literal::Char),
            map(string, |it| Literal::String(it.unwrap_or_default())),
        )),
    )(input)
}

fn operator(input: Span) -> TokenizationResult<Operator> {
    context(
        "operator",
        alt((
            alt(include_literals! {
                "." => Operator::Dot,
                "=>" => Operator::Flow,
            }),
            map(
                alt(include_literals! {
                "+" => MathOperator::Plus,
                "-" => MathOperator::Sub,
                "**" => MathOperator::Pow,
                "*" => MathOperator::Times,
                "/" => MathOperator::Div,
                "%" => MathOperator::Mod,
                }),
                Operator::Math,
            ),
            map(
                alt(include_literals! {
                    "<=>" => ComparisonOperator::Spaceship,
                    "==" => ComparisonOperator::Equiv,
                    "=" => ComparisonOperator::Equals,
                    "!=" => ComparisonOperator::NotEquiv,
                    ">=" => ComparisonOperator::GreaterEquals,
                    ">" => ComparisonOperator::Greater,
                    "<=" => ComparisonOperator::LessEquals,
                    "<" => ComparisonOperator::Less,
                }),
                Operator::Comparison,
            ),
            map(
                alt(include_literals! {
                    "||" => LogicOperator::OrLogic,
                    "&&" => LogicOperator::AndLogic,
                    "!" => LogicOperator::NotLogic,
                }),
                Operator::Logic,
            ),
            map(
                alt(include_literals! {
                    "|" => BitOperator::OrBit,
                    "&" => BitOperator::AndBit,
                    "^" => BitOperator::XorBit,
                    "~" => BitOperator::NotBit,
                }),
                Operator::Bit,
            ),
        )),
    )(input)
}

pub(crate) fn token(input: Span) -> TokenizationResult<Token> {
    context(
        "lexer",
        alt((
            map(ignore, Token::Ignore),
            map(keyword, Token::Keyword),
            map(symbol, Token::Symbol),
            map(identifier, Token::Identifier),
            map(literal, Token::Literal),
            map(operator, Token::Operator),
            value(Token::Unknown, take(1usize)),
        )),
    )(input)
}

#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
    use std::fmt::Debug;

    use nom::Finish;
    use test_case::test_case;

    #[allow(unused_imports)]
        use crate::lexer::{ignore, Ignore};
    use crate::TokenizationResult;

    #[test_case(ignore("// hello world!"), Ignore::Comment("// hello world!"), None; "ignore_parser_comment")]
    #[test_case(
        ignore("//hello world!\nthis is not comment"),
        Ignore::Comment("//hello world!"),
        Some("\nthis is not comment"); "ignore_parser_comment_another_line")]
    #[test_case(
        ignore("/* this is\nmultiline comment */"),
        Ignore::MultilineComment("/* this is\nmultiline comment */"),
        None; "ignore_parser_multiline_comment"
    )]
    #[test_case(
        ignore("/* this is\nmultiline comment */ this is not"),
        Ignore::MultilineComment("/* this is\nmultiline comment */"),
        Some(" this is not"); "ignore_parser_multiline_comment_with_rest"
    )]
    #[test_case(ignore("\n\n\n"), Ignore::Newline, None; "ignore_parser_newline")]
    #[test_case(ignore("   \t"), Ignore::Whitespace, None; "ignore_parser_whitespace")]
    fn test_parser<T: PartialEq + Debug>(
        result: TokenizationResult<T>,
        expected: T,
        expected_rest: Option<&'static str>,
    ) {
        let (rest, data) = result.finish().unwrap();

        assert_eq!(rest, expected_rest.unwrap_or(""));
        assert_eq!(data, expected);
    }
}