regen/dynamic/
dyn_lang.rs1use std::collections::HashSet;
4
5use regex::Regex;
6
7use crate::core::{Language, TokenRule};
8use crate::sdk::lex::{self, TokenizerOutput};
9use crate::sdk::{ASTParser, TokenStream};
10
11use super::{DynAstNode, DynTok};
12
13impl Language {
14 pub fn dyn_tokenize(&self, src: &str) -> TokenizerOutput<DynTok> {
16 let mut extract_token_rules = HashSet::new();
18 for token in &self.tokens {
19 if token.is_extract {
20 extract_token_rules.insert(token.name.clone());
21 }
22 }
23 let token_rules = &self
24 .token_rules
25 .iter()
26 .map(|r| match r {
27 TokenRule::IgnoreLiteral(literal) => {
28 lex::Rule::Literal(literal.to_string(), lex::Target::Ignore)
29 }
30 TokenRule::IgnoreRegExp(regex) => {
31 let regex = format!("^{regex}");
32 lex::Rule::Regex(Regex::new(®ex).unwrap(), lex::Target::Ignore)
33 }
34 TokenRule::Literal(token_type, literal) => {
35 if extract_token_rules.contains(token_type) {
36 lex::Rule::Literal(
37 literal.to_string(),
38 lex::Target::Extract(format!("T{token_type}")),
39 )
40 } else {
41 lex::Rule::Literal(
42 literal.to_string(),
43 lex::Target::Keep(format!("T{token_type}")),
44 )
45 }
46 }
47 TokenRule::RegExp(token_type, regex) => {
48 let regex = format!("^{regex}");
49 if extract_token_rules.contains(token_type) {
50 lex::Rule::Regex(
51 Regex::new(®ex).unwrap(),
52 lex::Target::Extract(format!("T{token_type}")),
53 )
54 } else {
55 lex::Rule::Regex(
56 Regex::new(®ex).unwrap(),
57 lex::Target::Keep(format!("T{token_type}")),
58 )
59 }
60 }
61 })
62 .collect::<Vec<_>>();
63
64 lex::run_tokenizer(src, "".to_owned(), token_rules)
65 }
66}
67
68impl ASTParser for Language {
70 type T = DynTok;
71 type A = DynAstNode;
72
73 fn parse_ast(&self, ts: &mut TokenStream<Self::T>) -> Option<Self::A> {
74 DynAstNode::parse_rule(ts, self, &self.target)
75 }
76}