1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
//! <p align="center">
//!      <img src="https://raw.github.com/maciejhirsz/logos/master/logos.png?sanitize=true" width="60%" alt="Logos">
//! </p>
//!
//! ## Create ridiculously fast Lexers.
//!
//! This is a `#[derive]` macro crate, [for documentation go to main crate](https://docs.rs/logos).

// The `quote!` macro requires deep recursion.
#![recursion_limit = "196"]

extern crate syn;
extern crate quote;
extern crate proc_macro;
extern crate proc_macro2;

mod util;
mod tree;
mod regex;
mod handlers;
mod generator;

use util::OptionExt;
use handlers::Handlers;
use generator::Generator;

use regex::Regex;
use quote::quote;
use proc_macro::TokenStream;
use proc_macro2::TokenTree;
use syn::{ItemEnum, Fields, LitStr};

#[proc_macro_derive(Logos, attributes(error, end, token, regex))]
pub fn logos(input: TokenStream) -> TokenStream {
    let item: ItemEnum = syn::parse(input).expect("#[token] can be only applied to enums");

    let size = item.variants.len();
    let name = &item.ident;

    // panic!("{:#?}", item);

    let mut error = None;
    let mut end = None;

    let mut handlers = Handlers::new();

    for variant in &item.variants {
        if variant.discriminant.is_some() {
            panic!("`{}::{}` has a discriminant value set. This is not allowed for Tokens.", name, variant.ident);
        }

        match variant.fields {
            Fields::Unit => {},
            _ => panic!("`{}::{}` has fields. This is not allowed for Tokens.", name, variant.ident),
        }

        for attr in &variant.attrs {
            let ident = &attr.path.segments[0].ident;

            if ident == "error" {
                error.insert(&variant.ident, "Only one #[error] variant can be declared.");

                break;
            }

            if ident == "end" {
                end.insert(&variant.ident, "Only one #[end] variant can be declared.");

                break;
            }

            let token = ident == "token";
            let regex = ident == "regex";

            if token || regex {
                let mut tts = attr.tts.clone().into_iter();

                match tts.next() {
                    Some(TokenTree::Punct(ref punct)) if punct.as_char() == '=' => {},
                    Some(invalid) => panic!("#[token] Expected '=', got {}", invalid),
                    _ => panic!("Invalid token")
                }

                match tts.next() {
                    Some(TokenTree::Literal(literal)) => {
                        let path = syn::parse::<LitStr>(quote!{ #literal }.into())
                                        .expect("#[token] value must be a literal string")
                                        .value();

                        let regex = if regex {
                            Regex::from(&path)
                        } else {
                            Regex::sequence(&path)
                        };

                        handlers.insert(regex, &variant.ident);
                    },
                    Some(invalid) => panic!("#[token] Invalid value: {}", invalid),
                    None => panic!("Invalid token")
                };

                assert!(tts.next().is_none(), "Unexpected token!");

                break;
            }
        }
    }

    let error = match error {
        Some(error) => error,
        None => panic!("Missing #[error] token variant."),
    };

    let end = match end {
        Some(end) => end,
        None => panic!("Missing #[end] token variant.")
    };

    // panic!("{:#?}", handlers);

    let mut generator = Generator::new(name);

    let handlers = handlers.into_iter().map(|handler| {
        use handlers::Handler;

        match handler {
            Handler::Eof        => quote! { Some(eof) },
            Handler::Error      => quote! { Some(_error) },
            Handler::Whitespace => quote! { None },
            Handler::Tree(tree) => generator.print_tree(tree),
        }
    }).collect::<Vec<_>>();

    let fns = generator.fns();

    let tokens = quote! {
        impl ::logos::Logos for #name {
            type Extras = ();

            const SIZE: usize = #size;
            const ERROR: Self = #name::#error;

            fn lexicon<'a, S: ::logos::Source>() -> &'a ::logos::Lexicon<::logos::Lexer<Self, S>> {
                use ::logos::internal::LexerInternal;

                type Lexer<S> = ::logos::Lexer<#name, S>;

                fn eof<S: ::logos::Source>(lex: &mut Lexer<S>) {
                    lex.token = #name::#end;
                }

                fn _error<S: ::logos::Source>(lex: &mut Lexer<S>) {
                    lex.bump();

                    lex.token = #name::#error;
                }

                #fns

                &[#(#handlers),*]
            }
        }
    };

    // panic!("{}", tokens);

    TokenStream::from(tokens).into()
}