1pub use arborium_highlight;
29pub use arborium_tree_sitter as tree_sitter;
30
31use std::collections::HashSet;
32use std::fs;
33use std::path::{Path, PathBuf};
34
35use arborium_highlight::{CompiledGrammar, GrammarConfig, ParseContext};
36use arborium_tree_sitter::Language;
37use arborium_tree_sitter::{Node, Parser, Tree};
38use tree_sitter_language::LanguageFn;
39
40pub use arborium_theme::CAPTURE_NAMES as HIGHLIGHT_NAMES_FULL;
42
43#[derive(Debug, Default)]
44struct CorpusTest {
45 name: String,
46 input: String,
47 contains: Vec<String>,
48 expected_sexp: Option<String>,
49}
50
51#[derive(Debug, Clone)]
52pub struct CorpusCase {
53 pub file: PathBuf,
54 pub name: String,
55 pub input: String,
56 pub contains: Vec<String>,
57 pub expected_sexp: Option<String>,
58}
59
60#[derive(Debug)]
61pub struct HarnessError {
62 message: String,
63}
64
65impl HarnessError {
66 fn new(message: impl Into<String>) -> Self {
67 Self {
68 message: message.into(),
69 }
70 }
71}
72
73impl std::fmt::Display for HarnessError {
74 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75 write!(f, "{}", self.message)
76 }
77}
78
79impl std::error::Error for HarnessError {}
80
81type HarnessResult<T = ()> = Result<T, HarnessError>;
82
83pub fn test_grammar(
103 language: impl Into<Language>,
104 name: &str,
105 highlights_query: &str,
106 injections_query: &str,
107 _locals_query: &str,
108 crate_dir: &str,
109) {
110 let language: Language = language.into();
111 let config = GrammarConfig {
113 language,
114 highlights_query,
115 injections_query,
116 locals_query: "", };
118
119 let grammar = CompiledGrammar::new(config).unwrap_or_else(|e| {
121 panic!(
122 "Query validation failed for {}: {:?}\n\
123 This usually means highlights.scm references a node type that doesn't exist in the grammar.\n\
124 Check the grammar's node-types.json to see valid node types.",
125 name, e
126 );
127 });
128
129 let mut ctx = ParseContext::for_grammar(&grammar).unwrap_or_else(|e| {
131 panic!("Failed to create parse context for {}: {:?}", name, e);
132 });
133
134 let crate_path = Path::new(crate_dir);
136 let kdl_path = crate_path.join("arborium.kdl");
137 let samples: Vec<_> = if kdl_path.exists() {
138 parse_samples_from_kdl(&kdl_path)
139 .into_iter()
140 .map(|p| crate_path.join(p))
141 .collect()
142 } else {
143 vec![]
144 };
145
146 if samples.is_empty() {
147 return;
149 }
150
151 for sample_path in &samples {
153 let sample_code = fs::read_to_string(sample_path).unwrap_or_else(|e| {
154 panic!(
155 "Failed to read sample file {} for {}: {}",
156 sample_path.display(),
157 name,
158 e
159 );
160 });
161
162 let result = grammar.parse(&mut ctx, &sample_code);
164
165 let highlight_count = result.spans.len();
167
168 if highlight_count == 0 {
170 panic!(
171 "No highlights produced for {} in {}.\n\
172 Sample has {} bytes.\n\
173 This likely means the highlights.scm query doesn't match anything in the sample.",
174 sample_path.display(),
175 name,
176 sample_code.len()
177 );
178 }
179 }
180}
181
182pub fn test_corpus(language: LanguageFn, name: &str, crate_dir: &str) {
205 let cases = collect_corpus_cases(crate_dir).unwrap_or_else(|e| {
206 panic!(
207 "Failed to gather corpus cases for {} (crate dir {}): {}",
208 name, crate_dir, e
209 )
210 });
211
212 for case in &cases {
213 if let Err(err) = run_corpus_case(language, name, case) {
214 panic!(
215 "Corpus failure for {} / {} (file {}): {}",
216 name,
217 case.name,
218 case.file.display(),
219 err
220 );
221 }
222 }
223}
224
225pub fn corpus_files(crate_dir: &str) -> Vec<PathBuf> {
227 let crate_path = Path::new(crate_dir);
228 let corpus_dir = crate_path.join("corpus");
229 if !corpus_dir.exists() {
230 return Vec::new();
231 }
232
233 let mut entries: Vec<_> = match fs::read_dir(&corpus_dir) {
234 Ok(read_dir) => read_dir
235 .filter_map(|e| e.ok())
236 .map(|e| e.path())
237 .filter(|p| p.is_file() && p.extension().is_some_and(|ext| ext == "txt"))
238 .collect(),
239 Err(_) => Vec::new(),
240 };
241 entries.sort();
242 entries
243}
244
245pub fn collect_corpus_cases(crate_dir: &str) -> HarnessResult<Vec<CorpusCase>> {
247 let files = corpus_files(crate_dir);
248 if files.is_empty() {
249 return Ok(Vec::new());
250 }
251
252 let mut cases = Vec::new();
253 for path in files {
254 let content = fs::read_to_string(&path).map_err(|e| {
255 HarnessError::new(format!(
256 "Failed to read corpus file {}: {}",
257 path.display(),
258 e
259 ))
260 })?;
261
262 let tests = parse_corpus(&content).map_err(|e| {
263 HarnessError::new(format!(
264 "Failed to parse corpus file {}: {}",
265 path.display(),
266 e
267 ))
268 })?;
269
270 if tests.is_empty() {
271 return Err(HarnessError::new(format!(
272 "Corpus file {} contains no tests",
273 path.display()
274 )));
275 }
276
277 for test in tests {
278 cases.push(CorpusCase {
279 file: path.clone(),
280 name: test.name,
281 input: test.input,
282 contains: test.contains,
283 expected_sexp: test.expected_sexp,
284 });
285 }
286 }
287
288 Ok(cases)
289}
290
291pub fn run_corpus_file(language: LanguageFn, name: &str, path: &Path) -> HarnessResult<()> {
293 let content = fs::read_to_string(path).map_err(|e| {
294 HarnessError::new(format!(
295 "Failed to read corpus file {} for {}: {}",
296 path.display(),
297 name,
298 e
299 ))
300 })?;
301
302 let tests = parse_corpus(&content).map_err(|e| {
303 HarnessError::new(format!(
304 "Failed to parse corpus file {} for {}: {}",
305 path.display(),
306 name,
307 e
308 ))
309 })?;
310
311 if tests.is_empty() {
312 return Err(HarnessError::new(format!(
313 "Corpus file {} for {} contains no tests",
314 path.display(),
315 name
316 )));
317 }
318
319 for test in tests {
320 let case = CorpusCase {
321 file: path.to_path_buf(),
322 name: test.name,
323 input: test.input,
324 contains: test.contains,
325 expected_sexp: test.expected_sexp,
326 };
327 run_corpus_case(language, name, &case)?;
328 }
329
330 Ok(())
331}
332
333pub fn run_corpus_case(language: LanguageFn, name: &str, case: &CorpusCase) -> HarnessResult<()> {
335 run_corpus_case_with_tree(language, name, case).map(|_| ())
336}
337
338pub fn run_corpus_case_with_tree(
340 language: LanguageFn,
341 name: &str,
342 case: &CorpusCase,
343) -> HarnessResult<String> {
344 let tree = parse_case(language, name, case)?;
345 let root = tree.root_node();
346
347 if let Some(expected) = &case.expected_sexp {
348 let actual = root.to_sexp();
349 if actual.trim() != expected.trim() {
350 return Err(HarnessError::new(format!(
351 "S-expression mismatch for {} / {} (file {})\n--- input ---\n{}\n--- expected ---\n{}\n--- actual ---\n{}",
352 name,
353 case.name,
354 case.file.display(),
355 case.input,
356 expected,
357 actual
358 )));
359 }
360 }
361
362 if !case.contains.is_empty() {
363 let mut seen: HashSet<&str> = HashSet::new();
364 collect_kinds(root, &mut seen);
365
366 for kind in &case.contains {
367 if !seen.contains(kind.as_str()) {
368 return Err(HarnessError::new(format!(
369 "Expected node kind `{}` not found for {} / {} (file {})\n--- input ---\n{}\n--- seen ---\n{:?}\n--- sexp ---\n{}",
370 kind,
371 name,
372 case.name,
373 case.file.display(),
374 case.input,
375 seen,
376 root.to_sexp()
377 )));
378 }
379 }
380 }
381
382 Ok(root.to_sexp())
383}
384
385fn parse_case(language: LanguageFn, name: &str, case: &CorpusCase) -> HarnessResult<Tree> {
386 if case.input.trim().is_empty() {
387 return Err(HarnessError::new(format!(
388 "Corpus test {} / {} (file {}) is missing an `--- input` section",
389 name,
390 case.name,
391 case.file.display()
392 )));
393 }
394
395 let language = Language::from(language);
396 let mut parser = Parser::new();
397 parser
398 .set_language(&language)
399 .map_err(|e| HarnessError::new(format!("Failed to set language for {}: {:?}", name, e)))?;
400
401 let tree = parser.parse(&case.input, None).ok_or_else(|| {
402 HarnessError::new(format!(
403 "Parser returned no tree for {} / {} (file {})",
404 name,
405 case.name,
406 case.file.display()
407 ))
408 })?;
409
410 let root = tree.root_node();
411 if root.has_error() {
412 return Err(HarnessError::new(format!(
413 "Parse errors for {} / {} (file {})\n--- input ---\n{}\n--- sexp ---\n{}",
414 name,
415 case.name,
416 case.file.display(),
417 case.input,
418 root.to_sexp()
419 )));
420 }
421
422 Ok(tree)
423}
424
425fn collect_kinds(node: Node, out: &mut HashSet<&str>) {
426 out.insert(node.kind());
427 let mut cursor = node.walk();
428 for child in node.children(&mut cursor) {
429 collect_kinds(child, out);
430 }
431}
432
433fn parse_corpus(content: &str) -> HarnessResult<Vec<CorpusTest>> {
434 let mut tests: Vec<CorpusTest> = Vec::new();
435 let mut current: Option<CorpusTest> = None;
436 let mut section: Option<String> = None;
437
438 for (idx, chunk) in content.split_inclusive('\n').enumerate() {
439 let line = chunk
440 .strip_suffix('\n')
441 .map(|l| l.strip_suffix('\r').unwrap_or(l))
442 .unwrap_or(chunk);
443 let trimmed = line.trim_end();
444
445 if let Some(name) = trimmed.strip_prefix("===") {
446 if let Some(t) = current.take() {
447 tests.push(t);
448 }
449 current = Some(CorpusTest {
450 name: name.trim().to_string(),
451 ..CorpusTest::default()
452 });
453 section = None;
454 continue;
455 }
456
457 if let Some(sec) = trimmed.strip_prefix("---") {
458 section = Some(sec.trim().to_string());
459 continue;
460 }
461
462 let Some(test) = current.as_mut() else {
463 if trimmed.is_empty() || trimmed.starts_with('#') {
465 continue;
466 }
467 return Err(HarnessError::new(format!(
468 "Unexpected content before first test at line {}: {}",
469 idx + 1,
470 trimmed
471 )));
472 };
473
474 match section.as_deref() {
475 Some("input") => test.input.push_str(chunk),
476 Some("sexp") => {
477 let expected = test.expected_sexp.get_or_insert_with(String::new);
478 expected.push_str(chunk);
479 }
480 Some("contains") => {
481 for tok in trimmed.split_whitespace() {
482 test.contains.push(tok.to_string());
483 }
484 }
485 Some(other) => {
486 return Err(HarnessError::new(format!(
487 "Unknown section `{}` at line {}",
488 other,
489 idx + 1
490 )));
491 }
492 None => {
493 if trimmed.is_empty() || trimmed.starts_with('#') {
494 continue;
495 }
496 return Err(HarnessError::new(format!(
497 "Content outside a section at line {}: {}",
498 idx + 1,
499 trimmed
500 )));
501 }
502 }
503 }
504
505 if let Some(t) = current.take() {
506 tests.push(t);
507 }
508
509 Ok(tests)
510}
511
512fn parse_samples_from_kdl(path: &Path) -> Vec<String> {
516 let content = match fs::read_to_string(path) {
517 Ok(c) => c,
518 Err(_) => return vec![],
519 };
520
521 let mut samples = Vec::new();
522 let mut in_sample_block = false;
523 let mut brace_depth = 0;
524
525 for line in content.lines() {
526 let trimmed = line.trim();
527
528 if trimmed.starts_with("sample") && trimmed.contains('{') {
530 in_sample_block = true;
531 brace_depth = 1;
532 continue;
533 }
534
535 if in_sample_block {
536 brace_depth += trimmed.matches('{').count();
538 brace_depth = brace_depth.saturating_sub(trimmed.matches('}').count());
539
540 if brace_depth == 0 {
541 in_sample_block = false;
542 continue;
543 }
544
545 if trimmed.starts_with("path")
547 && let Some(start) = trimmed.find('"')
548 && let Some(end) = trimmed[start + 1..].find('"')
549 {
550 let path_value = &trimmed[start + 1..start + 1 + end];
551 if !path_value.is_empty() {
552 samples.push(path_value.to_string());
553 }
554 }
555 }
556 }
557
558 samples
559}
560
561pub const HIGHLIGHT_NAMES: &[&str] = arborium_theme::CAPTURE_NAMES;