1
2
3
4use std::cmp::Ordering;
5use std::collections::HashMap;
6use std::ops::Range;
7
8use super::error::{Error, ErrorKind};
9
10use super::error_strategy::error_listener::ErrorListener;
11use super::lexer_rule::LexerRule;
12use super::position::Position;
13use super::token::Token;
14
15
16pub trait Lexer {
18 fn iter(&self) -> TokenIter ;
19
20
21 fn get_all_on_channel_tokens(&self, channel: usize) -> Vec<Token> {
22 self.iter().filter(|token| token.channel == channel).collect::<Vec<_>>()
23 }
24
25 fn get_all_tokens(&self) -> Vec<Token> {
26 self.iter().collect::<Vec<_>>()
27 }
28
29 fn scan_all_tokens_and_group_by_channel(&mut self) -> HashMap<usize, Vec<Token>> {
30 let mut ret: HashMap<usize, Vec<Token>> = HashMap::new();
31 for token in self.iter() {
32 if ret.contains_key(&token.channel) {
33 ret.get_mut(&token.channel).unwrap().push(token);
34 } else {
35 ret.insert(token.channel, vec![token]);
36 }
37 }
38 ret
39 }
40
41
42}
43
44
45pub struct TokenIter<'a> {
47 pub input: &'a str, pub rules: &'a [LexerRule],
50 pub error_listeners: &'a [Box<dyn ErrorListener>],
51
52 pub ranges: Vec<Range<usize>>,
54
55
56
57 pub cursor: usize, pub token_index: usize, }
61
62
63
64
65impl<'a> TokenIter<'a> {
66
67 pub fn get_current_position(&self) -> Position {
68 self.get_position_from_char_index(self.cursor)
69 }
70
71 fn get_position_from_char_index(&self, char_index: usize) -> Position {
72 let line = self.ranges.binary_search_by(|range| {
73 if range.end <= char_index {
74 Ordering::Less
75 } else if range.start > char_index {
76 Ordering::Greater
77 } else {
78 Ordering::Equal
79 }
80 }).unwrap();
81 let range = self.ranges[line].clone();
82
83 Position::new(line, char_index - range.start)
84 }
85
86 pub fn lexer_match(&mut self) -> Result<Token, Error> {
88 if self.cursor >= self.input.len() {
89 return Err(Error::new(ErrorKind::LexerScanOverflow, "LexerScanOverflow",
90 self.get_position_from_char_index(self.cursor),
91 self.get_position_from_char_index(self.cursor)));
92 }
93
94 let mut len = 0;
96 let mut start = self.input.len();
97 let mut stop = start;
98
99 let mut meta: Option<LexerRule> = None;
100
101 for lexer_meta in self.rules.iter() {
102 if ! lexer_meta.rule.is_match_at(self.input, self.cursor) { continue; }
104
105 let result = lexer_meta.rule.find_at(self.input, self.cursor) ;
106 if let Some(result) = result {
107 if result.start() < start || result.start() == start && result.end() - result.start() > len {
108 meta = Some(lexer_meta.clone());
109 start = result.start();
110 stop = result.end();
111 len = result.end() - result.start();
112 }
113 }
114 }
115
116 if let None = meta {
118 return Err(Error::new(ErrorKind::LexerNoMatch, "",
119 self.get_position_from_char_index(self.cursor),
120 self.get_position_from_char_index(self.cursor)));
121 }
122
123
124 if start != self.cursor {
126 for listener in self.error_listeners.iter() {
127 listener.syntax_error();
128 }
129 todo!()
130 }
131
132 let text = String::from(&self.input[start..stop]);
134
135
136
137 let meta = meta.unwrap();
138 let token = Token::new(meta.token_type, &meta.token_name, &text,
139 self.get_position_from_char_index(start),
140 self.get_position_from_char_index(stop), self.token_index,
141 meta.channel,
142 self.cursor,
143 self.cursor + len);
144
145 self.cursor = stop;
146
147
148 if meta.skip {
150 return self.lexer_match();
151 }
152
153 self.token_index += 1;
154 return Ok(token);
155 }
156
157
158 pub fn reset(&mut self) {
159 self.cursor = 0;
160 self.token_index = 1;
161 }
162
163
164
165
166 pub fn new(input: &'a str, rules: &'a [LexerRule], error_listeners: &'a [Box<dyn ErrorListener>]) -> Self {
167 let mut st = 0;
168 let ranges = input.split("\n").map(|f| {
169 let ed = st + f.len() + 1; let ret = st..ed;
171 st = ed;
172 ret
173 }).collect::<Vec<_>>();
174
175 Self {
176 input, rules, error_listeners, cursor: 0, token_index: 1,
177 ranges,
178 }
179 }
180}
181
182
183impl Iterator for TokenIter<'_> {
184 type Item = Token;
185
186 fn next(&mut self) -> Option<Self::Item> {
187 match self.lexer_match() {
188 Ok(token) => Some(token),
189 Err(_) => None,
190 }
191 }
192}
193