regen/dynamic/
dyn_lang.rs

1//! Dynamic language implementation
2
3use std::collections::HashSet;
4
5use regex::Regex;
6
7use crate::core::{Language, TokenRule};
8use crate::sdk::lex::{self, TokenizerOutput};
9use crate::sdk::{ASTParser, TokenStream};
10
11use super::{DynAstNode, DynTok};
12
13impl Language {
14    /// Dynamically tokenize a source code string based on this language
15    pub fn dyn_tokenize(&self, src: &str) -> TokenizerOutput<DynTok> {
16        // Generate token rules dynamically
17        let mut extract_token_rules = HashSet::new();
18        for token in &self.tokens {
19            if token.is_extract {
20                extract_token_rules.insert(token.name.clone());
21            }
22        }
23        let token_rules = &self
24            .token_rules
25            .iter()
26            .map(|r| match r {
27                TokenRule::IgnoreLiteral(literal) => {
28                    lex::Rule::Literal(literal.to_string(), lex::Target::Ignore)
29                }
30                TokenRule::IgnoreRegExp(regex) => {
31                    let regex = format!("^{regex}");
32                    lex::Rule::Regex(Regex::new(&regex).unwrap(), lex::Target::Ignore)
33                }
34                TokenRule::Literal(token_type, literal) => {
35                    if extract_token_rules.contains(token_type) {
36                        lex::Rule::Literal(
37                            literal.to_string(),
38                            lex::Target::Extract(format!("T{token_type}")),
39                        )
40                    } else {
41                        lex::Rule::Literal(
42                            literal.to_string(),
43                            lex::Target::Keep(format!("T{token_type}")),
44                        )
45                    }
46                }
47                TokenRule::RegExp(token_type, regex) => {
48                    let regex = format!("^{regex}");
49                    if extract_token_rules.contains(token_type) {
50                        lex::Rule::Regex(
51                            Regex::new(&regex).unwrap(),
52                            lex::Target::Extract(format!("T{token_type}")),
53                        )
54                    } else {
55                        lex::Rule::Regex(
56                            Regex::new(&regex).unwrap(),
57                            lex::Target::Keep(format!("T{token_type}")),
58                        )
59                    }
60                }
61            })
62            .collect::<Vec<_>>();
63
64        lex::run_tokenizer(src, "".to_owned(), token_rules)
65    }
66}
67
68/// Generate a dynamic AST from a token stream based on this language
69impl ASTParser for Language {
70    type T = DynTok;
71    type A = DynAstNode;
72
73    fn parse_ast(&self, ts: &mut TokenStream<Self::T>) -> Option<Self::A> {
74        DynAstNode::parse_rule(ts, self, &self.target)
75    }
76}