highlighter_core/
lexer.rs1use regex::{Regex, Error, CaptureLocations};
2
3use crate::language::{Language, Scope};
4
5#[derive(Clone, Debug, PartialEq)]
7pub struct Token {
8 pub scope: Scope,
10
11 pub value: String,
13}
14
15pub struct TokenContext {
17 tokens: Vec<Token>,
19}
20
21impl TokenContext {
22 pub fn token<Str: Into<String>>(&mut self, scope: Scope, value: Str) {
24 self.tokens.push(Token { scope, value: value.into() });
25 }
26}
27
28pub type TokenHandler = fn(CaptureLocations, &String, &mut TokenContext);
30
31struct HandledPattern {
33 regex: Regex,
35
36 handler: TokenHandler,
38}
39
40enum Pattern {
42 Plain(Scope, Regex),
44
45 Handled(HandledPattern),
47}
48
49pub struct LexerContext {
51 patterns: Vec<Pattern>,
53}
54
55impl LexerContext {
56 #[inline]
58 fn new() -> Result<Self, Error> {
59 Ok(Self { patterns: Vec::new() })
60 }
61
62 pub fn token<Str: Into<String>>(&mut self, scope: Scope, pattern: Str) -> Result<(), Error> {
63 self.patterns.push(Pattern::Plain(scope, Regex::new(&pattern.into())?));
65 Ok(())
66 }
67
68 pub fn advanced_token<Str: Into<String>>(&mut self, pattern: Str, handler: TokenHandler) -> Result<(), Error> {
70 self.patterns.push(Pattern::Handled(HandledPattern { regex: Regex::new(&pattern.into())?, handler }));
72 Ok(())
73 }
74}
75
76pub struct Lexer {
78 ctx: LexerContext,
80}
81
82impl Lexer {
83 pub fn new<L: Language>(language: L) -> Result<Self, Error> {
85 let mut ctx = LexerContext::new()?;
86
87 language.init(&mut ctx)?;
88
89 Ok(Self { ctx })
90 }
91
92 pub fn lex(&self, str: &str) -> Vec<Token> {
94 let mut i = 0;
95 let mut tokens = TokenContext { tokens: Vec::new() };
96
97 'str_iter: while i < str.len() {
98 for pattern in &self.ctx.patterns {
99 match pattern {
100 Pattern::Plain(scope, regex) => {
101 let mut captures = regex.capture_locations();
102
103 if let Some(m) = regex.captures_read_at(&mut captures, str, i) {
104 if m.start() != i {
105 continue;
106 }
107
108 i = m.end();
109 tokens.token(*scope, m.as_str());
110 continue 'str_iter;
111 }
112 },
113 Pattern::Handled(pattern) => {
114 let regex = &pattern.regex;
115 let mut captures = pattern.regex.capture_locations();
116
117 if let Some(m) = regex.captures_read_at(&mut captures, str, i) {
118 if m.start() != i {
119 continue;
120 }
121
122 i = m.end();
123 (pattern.handler)(captures, &str.to_owned(), &mut tokens);
124 continue 'str_iter;
125 }
126 },
127 }
128 }
129
130 tokens.token(Scope::None, str.chars().nth(i).unwrap());
131 i += 1;
132 }
133
134 tokens.tokens
135 }
136}