1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
extern crate proc_macro;
extern crate proc_macro2;
extern crate syn;
extern crate quote;

use proc_macro::TokenStream;
use proc_macro2::TokenStream as TokenStream2;
use syn::{
    ItemEnum,
    Fields,
    Ident,
    Lit,
    Path
};
use quote::quote;

#[proc_macro_derive(Lexable, attributes(end, error, token, regex, token_start, token_end, skip, prio))]
pub fn derive_lexable(input: TokenStream) -> TokenStream {
    let item: ItemEnum = syn::parse(input).expect("Only Enums can be used as a TokenType.");

    let enum_size = item.variants.len();
    let name = &item.ident;

    let mut match_statements: Vec<TokenStream2> = Vec::new();
    let mut skip_statements: Vec<TokenStream2> = Vec::new();
    let mut regex_init_statements: Vec<TokenStream2> = Vec::new();
    let mut inclusive_statements: Vec<TokenStream2> = Vec::new();
    let mut prio_statements: Vec<TokenStream2> = Vec::new();

    let token_attr_ident = syn::parse_str::<Ident>("token").unwrap();
    let regex_attr_ident = syn::parse_str::<Ident>("regex").unwrap();
    let skip_attr_ident = syn::parse_str::<Ident>("skip").unwrap();
    let error_attr_ident = syn::parse_str::<Ident>("error").unwrap();
    let end_attr_ident = syn::parse_str::<Ident>("end").unwrap();
    let token_start_ident = syn::parse_str::<Ident>("token_start").unwrap();
    let token_end_ident = syn::parse_str::<Ident>("token_end").unwrap();
    let prio_ident = syn::parse_str::<Ident>("prio").unwrap();

    let mut end_set = false;
    let mut error_set = false;

    let mut err_accessor = syn::parse_str::<Ident>("error").unwrap();
    let mut end_accessor = syn::parse_str::<Ident>("end").unwrap();

    for variant in &item.variants {
        let variant_ident = &variant.ident;
        let accessor = format!("{}::{}", name, variant_ident);

        if variant.discriminant.is_some() {
            panic!("`{}::{}` has a discriminant, this is not allowed for a TokenType.", name, variant.ident);
        }
        match variant.fields {
            Fields::Unit => {},
            _ => panic!("`{}::{}` has fields, this is not allowed for a TokenType.", name, variant.ident),
        }

        let mut token_end_val = String::new();
        let mut token_start_val = String::new();

        for attr in &variant.attrs {
            let (attr_ident, attr_lit) = read_attribute(attr);
            
            // If this token variant is matched by a literal
            if attr_ident == token_attr_ident {
                if let Some(Lit::Str(literal)) = attr_lit {
                    let literal_value = literal.value();

                    let match_statement = quote! {
                        if input == #literal_value {
                            matches.push(#name::#variant_ident);
                        }
                    };

                    match_statements.push(match_statement);
                } else {
                    panic!("Value for token attribute must be a string literal.");
                }
            }
            // If this token variant is matched by a regex
            else if attr_ident == regex_attr_ident {
                if let Some(Lit::Str(literal)) = attr_lit {
                    let mut literal_value = literal.value();

                    literal_value.insert_str(0, "^");
                    literal_value += "$";

                    let regex_ident_string = format!("{}_regex", variant_ident);
                    let regex_ident = syn::parse_str::<Ident>(&regex_ident_string).expect("Unknown parse error.");

                    let regex_init_statement = quote! {
                        static ref #regex_ident : Regex = Regex::new(#literal_value).unwrap();
                    };
                    
                    let match_statement = quote! {
                        if #regex_ident.is_match(input) {
                            matches.push(#name::#variant_ident);
                        }
                    };

                    regex_init_statements.push(regex_init_statement);
                    match_statements.push(match_statement);
                } else {
                    panic!("Value for regex attribute must be a string literal.");
                }
            }
            else if attr_ident == end_attr_ident {
                if end_set {
                    panic!("Only one end variant can be defined for a TokenType.");
                }
                end_set = true;
                end_accessor = variant_ident.clone();
            }
            else if attr_ident == error_attr_ident {
                if error_set {
                    panic!("Only one error variant can be defined for a TokenType.");
                }
                error_set = true;
                err_accessor = variant_ident.clone();
            }
            // If this token variant should be skipped
            else if attr_ident == skip_attr_ident {
                let skip_statement = quote! {
                    if *self == #name::#variant_ident {
                        return true;
                    }
                };

                skip_statements.push(skip_statement);
            }

            else if attr_ident == token_start_ident {
                if let Some(Lit::Str(literal)) = attr_lit {
                    token_start_val = literal.value();
                }
            }

            else if attr_ident == token_end_ident {
                if let Some(Lit::Str(literal)) = attr_lit {
                    token_end_val = literal.value();
                }
            }

            else if attr_ident == prio_ident {
                if let Some(Lit::Int(literal)) = attr_lit {
                    let prio: i8 = literal.base10_parse().expect("Priority needs to be an 8-bit signed integer.");
                    let prio_statement = quote! {
                        if *self == #name::#variant_ident {
                            return #prio;
                        }
                    };
                    prio_statements.push(prio_statement);
                }
            }
        }

        if !token_start_val.is_empty() && !token_end_val.is_empty() {
            let match_statement = quote! {
                if input.starts_with(#token_start_val) {
                    if !input[0..input.len() - 1].ends_with(#token_end_val) {
                        matches.push(#name::#variant_ident);
                    }
                }
            };
            let inclusive_statement = quote! {
                if *self == #name::#variant_ident {
                    return true;
                }
            };
            match_statements.push(match_statement);
            inclusive_statements.push(inclusive_statement);
        }
    }

    if !end_set {
        panic!("You need to specify an end variant for a TokenType.");
    }
    if !error_set {
        panic!("You need to specify an error variant for a TokenType.");
    }

    let token_stream = quote! {
        impl Lexable for #name {
            fn lexer<'source, S>(source: S) -> Lexer<#name, S>
            where S: Source<'source> {
                let mut ret = Lexer::new(source);
                ret.advance();
                ret
            }

            fn match_token(input: &str) -> Vec<#name> {
                let mut matches: Vec<#name> = Vec::new();
                
                lazy_static! {
                    #(
                        #regex_init_statements
                    )*
                }

                #(
                    #match_statements
                )*

                matches
            }

            fn get_end_variant() -> #name {
                #name::#end_accessor
            }

            fn get_error_variant() -> #name {
                #name::#err_accessor
            }

            fn should_skip(&self) -> bool {
                #(
                    #skip_statements
                )*

                false
            }

            fn is_inclusive(&self) -> bool {
                #(
                    #inclusive_statements
                )*

                false
            }

            fn prio(&self) -> i8 {
                #(
                    #prio_statements
                )*
                
                0
            }
        }
    };
    token_stream.into()
}

fn read_attribute(attr: &syn::Attribute) -> (Ident, Option<Lit>) {
    let meta = attr.parse_meta().expect("Attribute malformed: Meta parsing failed.");
    let ret = match meta {
        syn::Meta::NameValue(args) => {
            (args.path.get_ident().cloned().expect("Attribute malformed: Parsing of path to ident failed."), Some(args.lit))
        },
        syn::Meta::Path(path) => {
            (path.get_ident().cloned().expect("Attribute malformed: Parsing of path to ident failed."), None)
        },
        _ => panic!("Attribute malformed: Unknown attribute type.")
    };
    ret
}