oak_core/helpers/
lexing.rs1use crate::{
8 Language, Lexer, TokenType,
9 errors::{OakDiagnostics, OakError},
10 helpers::{create_file, json_from_path, source_from_path},
11 source::Source,
12};
13use serde::{Deserialize, Serialize};
14use serde_json::{Serializer, ser::PrettyFormatter};
15use std::{
16 path::{Path, PathBuf},
17 sync::{Arc, Mutex},
18 thread,
19 time::{Duration, Instant},
20};
21use walkdir::WalkDir;
22
23pub struct LexerTester {
29 root: PathBuf,
30 extensions: Vec<String>,
31 timeout: Duration,
32}
33
34#[derive(Debug, Serialize, Deserialize, PartialEq)]
39pub struct LexerTestExpected {
40 success: bool,
41 count: usize,
42 tokens: Vec<TokenData>,
43 errors: Vec<String>,
44}
45
46#[derive(Debug, Serialize, Deserialize, PartialEq)]
51pub struct TokenData {
52 kind: String,
53 text: String,
54 start: usize,
55 end: usize,
56}
57
58impl LexerTester {
59 pub fn new<P: AsRef<Path>>(root: P) -> Self {
61 Self { root: root.as_ref().to_path_buf(), extensions: vec![], timeout: Duration::from_secs(10) }
62 }
63
64 pub fn with_extension(mut self, extension: impl ToString) -> Self {
66 self.extensions.push(extension.to_string());
67 self
68 }
69 pub fn with_timeout(mut self, time: Duration) -> Self {
71 self.timeout = time;
72 self
73 }
74
75 pub fn run_tests<L, Lex>(self, lexer: &Lex) -> Result<(), OakError>
86 where
87 L: Language + Send + Sync + 'static,
88 L::TokenType: Serialize + std::fmt::Debug + Send + Sync,
89 Lex: Lexer<L> + Send + Sync + 'static + Clone,
90 {
91 let test_files = self.find_test_files()?;
92 let force_regenerated = std::env::var("REGENERATE_TESTS").unwrap_or("0".to_string()) == "1";
93 let mut regenerated_any = false;
94
95 for file_path in test_files {
96 println!("Testing file: {}", file_path.display());
97 regenerated_any |= self.test_single_file::<L, Lex>(&file_path, lexer, force_regenerated)?;
98 }
99
100 if regenerated_any && force_regenerated { Err(OakError::test_regenerated(self.root)) } else { Ok(()) }
101 }
102
103 fn find_test_files(&self) -> Result<Vec<PathBuf>, OakError> {
104 let mut files = Vec::new();
105
106 for entry in WalkDir::new(&self.root) {
107 let entry = entry.unwrap();
108 let path = entry.path();
109
110 if path.is_file() {
111 if let Some(ext) = path.extension() {
112 let ext_str = ext.to_str().unwrap_or("");
113 if self.extensions.iter().any(|e| e == ext_str) {
114 let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
116 let is_output_file = file_name.ends_with(".parsed.json") || file_name.ends_with(".lexed.json") || file_name.ends_with(".built.json");
117
118 if !is_output_file {
119 files.push(path.to_path_buf());
120 }
121 }
122 }
123 }
124 }
125
126 Ok(files)
127 }
128
129 fn test_single_file<L, Lex>(&self, file_path: &Path, lexer: &Lex, force_regenerated: bool) -> Result<bool, OakError>
130 where
131 L: Language + Send + Sync + 'static,
132 L::TokenType: Serialize + std::fmt::Debug + Send + Sync,
133 Lex: Lexer<L> + Send + Sync + 'static + Clone,
134 {
135 let source = source_from_path(file_path)?;
136
137 let result = Arc::new(Mutex::new(None));
139 let result_clone = Arc::clone(&result);
140
141 let lexer_clone = lexer.clone();
143 let source_arc = Arc::new(source);
145 let source_clone = Arc::clone(&source_arc);
146
147 let handle = thread::spawn(move || {
149 let mut cache = crate::parser::ParseSession::<L>::default();
150 let output = lexer_clone.lex(&*source_clone, &[], &mut cache);
151 let mut result = result_clone.lock().unwrap();
152 *result = Some(output);
153 });
154
155 let start_time = Instant::now();
157 let timeout_occurred = loop {
158 if handle.is_finished() {
160 break false;
161 }
162
163 if start_time.elapsed() > self.timeout {
165 break true;
166 }
167
168 thread::sleep(Duration::from_millis(10));
170 };
171
172 if timeout_occurred {
174 return Err(OakError::custom_error(&format!("Lexer test timed out after {:?} for file: {}", self.timeout, file_path.display())));
175 }
176
177 let OakDiagnostics { result: tokens_result, mut diagnostics } = {
179 let result_guard = result.lock().unwrap();
180 match result_guard.as_ref() {
181 Some(output) => output.clone(),
182 None => return Err(OakError::custom_error("Failed to get lexer result")),
183 }
184 };
185
186 let mut success = true;
188 let tokens = match tokens_result {
189 Ok(tokens) => tokens,
190 Err(e) => {
191 success = false;
192 diagnostics.push(e);
193 triomphe::Arc::from_iter(Vec::new())
194 }
195 };
196
197 if !diagnostics.is_empty() {
198 success = false;
199 }
200
201 let tokens: Vec<TokenData> = tokens
202 .iter()
203 .filter(|token| !token.kind.is_ignored())
204 .map(|token| {
205 let len = source_arc.as_ref().length();
206 let start = token.span.start.min(len);
207 let end = token.span.end.min(len).max(start);
208 let text = source_arc.as_ref().get_text_in((start..end).into()).to_string();
209 TokenData { kind: format!("{:?}", token.kind), text, start: token.span.start, end: token.span.end }
210 })
211 .take(100)
212 .collect();
213
214 let errors: Vec<String> = diagnostics.iter().map(|e| e.to_string()).collect();
215 let test_result = LexerTestExpected { success, count: tokens.len(), tokens, errors };
216
217 let expected_file = file_path.with_extension(format!("{}.lexed.json", file_path.extension().unwrap_or_default().to_str().unwrap_or("")));
219
220 let mut regenerated = false;
221 if expected_file.exists() && !force_regenerated {
222 let expected: LexerTestExpected = json_from_path(&expected_file)?;
223
224 if test_result != expected {
225 return Err(OakError::test_failure(file_path.to_path_buf(), format!("{:#?}", expected), format!("{:#?}", test_result)));
226 }
227 }
228 else {
229 let file = create_file(&expected_file)?;
230 let mut writer = Serializer::with_formatter(file, PrettyFormatter::with_indent(b" "));
231 test_result.serialize(&mut writer)?;
232
233 if force_regenerated {
234 regenerated = true;
235 }
236 else {
237 return Err(OakError::test_regenerated(expected_file));
238 }
239 }
240
241 Ok(regenerated)
242 }
243}