Skip to main content

arborium_test_harness/
lib.rs

1//! Test harness for arborium grammar crates.
2//!
3//! This crate provides utilities for testing tree-sitter grammars and their queries.
4//!
5//! # Usage
6//!
7//! In your grammar crate's lib.rs tests:
8//!
9//! ```ignore
10//! #[cfg(test)]
11//! mod tests {
12//!     use super::*;
13//!
14//!     #[test]
15//!     fn test_grammar() {
16//!         arborium_test_harness::test_grammar(
17//!             language(),
18//!             "rust",
19//!             HIGHLIGHTS_QUERY,
20//!             INJECTIONS_QUERY,
21//!             LOCALS_QUERY,
22//!             env!("CARGO_MANIFEST_DIR"),
23//!         );
24//!     }
25//! }
26//! ```
27
28pub use arborium_highlight;
29pub use arborium_tree_sitter as tree_sitter;
30
31use std::collections::HashSet;
32use std::fs;
33use std::path::{Path, PathBuf};
34
35use arborium_highlight::{CompiledGrammar, GrammarConfig, ParseContext};
36use arborium_tree_sitter::Language;
37use arborium_tree_sitter::{Node, Parser, Tree};
38use tree_sitter_language::LanguageFn;
39
40// Re-export CAPTURE_NAMES from arborium-theme as HIGHLIGHT_NAMES for convenience
41pub use arborium_theme::CAPTURE_NAMES as HIGHLIGHT_NAMES_FULL;
42
43#[derive(Debug, Default)]
44struct CorpusTest {
45    name: String,
46    input: String,
47    contains: Vec<String>,
48    expected_sexp: Option<String>,
49}
50
51#[derive(Debug, Clone)]
52pub struct CorpusCase {
53    pub file: PathBuf,
54    pub name: String,
55    pub input: String,
56    pub contains: Vec<String>,
57    pub expected_sexp: Option<String>,
58}
59
60#[derive(Debug)]
61pub struct HarnessError {
62    message: String,
63}
64
65impl HarnessError {
66    fn new(message: impl Into<String>) -> Self {
67        Self {
68            message: message.into(),
69        }
70    }
71}
72
73impl std::fmt::Display for HarnessError {
74    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75        write!(f, "{}", self.message)
76    }
77}
78
79impl std::error::Error for HarnessError {}
80
81type HarnessResult<T = ()> = Result<T, HarnessError>;
82
83/// Tests a grammar by validating its queries and highlighting all samples.
84///
85/// This function:
86/// 1. Validates that the queries compile correctly
87/// 2. Finds sample files in the samples/ directory
88/// 3. Highlights each sample file and verifies we get highlights
89///
90/// # Arguments
91///
92/// * `language` - The tree-sitter Language
93/// * `name` - The grammar name (e.g., "rust")
94/// * `highlights_query` - The highlights.scm content
95/// * `injections_query` - The injections.scm content
96/// * `locals_query` - The locals.scm content (currently unused by arborium-highlight)
97/// * `crate_dir` - Path to the crate directory (use `env!("CARGO_MANIFEST_DIR")`)
98///
99/// # Panics
100///
101/// Panics if query validation fails, highlighting produces errors, or no highlights are found.
102pub fn test_grammar(
103    language: impl Into<Language>,
104    name: &str,
105    highlights_query: &str,
106    injections_query: &str,
107    _locals_query: &str,
108    crate_dir: &str,
109) {
110    let language: Language = language.into();
111    // Create grammar config
112    let config = GrammarConfig {
113        language,
114        highlights_query,
115        injections_query,
116        locals_query: "", // Not used by arborium-highlight yet
117    };
118
119    // Validate queries compile by creating the grammar
120    let grammar = CompiledGrammar::new(config).unwrap_or_else(|e| {
121        panic!(
122            "Query validation failed for {}: {:?}\n\
123             This usually means highlights.scm references a node type that doesn't exist in the grammar.\n\
124             Check the grammar's node-types.json to see valid node types.",
125            name, e
126        );
127    });
128
129    // Create a parse context for this grammar
130    let mut ctx = ParseContext::for_grammar(&grammar).unwrap_or_else(|e| {
131        panic!("Failed to create parse context for {}: {:?}", name, e);
132    });
133
134    // Find samples from arborium.kdl
135    let crate_path = Path::new(crate_dir);
136    let kdl_path = crate_path.join("arborium.kdl");
137    let samples: Vec<_> = if kdl_path.exists() {
138        parse_samples_from_kdl(&kdl_path)
139            .into_iter()
140            .map(|p| crate_path.join(p))
141            .collect()
142    } else {
143        vec![]
144    };
145
146    if samples.is_empty() {
147        // No samples - just verify query compiles (already done above)
148        return;
149    }
150
151    // Test each sample - must produce at least one highlight
152    for sample_path in &samples {
153        let sample_code = fs::read_to_string(sample_path).unwrap_or_else(|e| {
154            panic!(
155                "Failed to read sample file {} for {}: {}",
156                sample_path.display(),
157                name,
158                e
159            );
160        });
161
162        // Parse with the grammar
163        let result = grammar.parse(&mut ctx, &sample_code);
164
165        // Count highlight spans
166        let highlight_count = result.spans.len();
167
168        // Verify we got highlights
169        if highlight_count == 0 {
170            panic!(
171                "No highlights produced for {} in {}.\n\
172                 Sample has {} bytes.\n\
173                 This likely means the highlights.scm query doesn't match anything in the sample.",
174                sample_path.display(),
175                name,
176                sample_code.len()
177            );
178        }
179    }
180}
181
182/// Runs corpus-style parsing tests for a grammar.
183///
184/// The harness looks for a `corpus/` directory at the crate root and reads all
185/// `*.txt` files in it. Each file contains one or more test cases in a simple
186/// format:
187///
188/// ```text
189/// === test name
190/// --- input
191/// node 1;
192/// --- contains
193/// raw_string
194/// quoted_string
195/// --- sexp
196/// (document ...)
197/// ```
198///
199/// Only `input` is required. `contains` and `sexp` are optional:
200/// - `contains`: node kinds that must appear at least once in the parse tree.
201/// - `sexp`: expected root s-expression (exact match).
202///
203/// This does **not** use `tree-sitter test`; it's a lightweight Rust runner.
204pub fn test_corpus(language: LanguageFn, name: &str, crate_dir: &str) {
205    let cases = collect_corpus_cases(crate_dir).unwrap_or_else(|e| {
206        panic!(
207            "Failed to gather corpus cases for {} (crate dir {}): {}",
208            name, crate_dir, e
209        )
210    });
211
212    for case in &cases {
213        if let Err(err) = run_corpus_case(language, name, case) {
214            panic!(
215                "Corpus failure for {} / {} (file {}): {}",
216                name,
217                case.name,
218                case.file.display(),
219                err
220            );
221        }
222    }
223}
224
225/// Return all `.txt` corpus files for a grammar crate.
226pub fn corpus_files(crate_dir: &str) -> Vec<PathBuf> {
227    let crate_path = Path::new(crate_dir);
228    let corpus_dir = crate_path.join("corpus");
229    if !corpus_dir.exists() {
230        return Vec::new();
231    }
232
233    let mut entries: Vec<_> = match fs::read_dir(&corpus_dir) {
234        Ok(read_dir) => read_dir
235            .filter_map(|e| e.ok())
236            .map(|e| e.path())
237            .filter(|p| p.is_file() && p.extension().is_some_and(|ext| ext == "txt"))
238            .collect(),
239        Err(_) => Vec::new(),
240    };
241    entries.sort();
242    entries
243}
244
245/// Parse every corpus file and yield a case per `=== test`.
246pub fn collect_corpus_cases(crate_dir: &str) -> HarnessResult<Vec<CorpusCase>> {
247    let files = corpus_files(crate_dir);
248    if files.is_empty() {
249        return Ok(Vec::new());
250    }
251
252    let mut cases = Vec::new();
253    for path in files {
254        let content = fs::read_to_string(&path).map_err(|e| {
255            HarnessError::new(format!(
256                "Failed to read corpus file {}: {}",
257                path.display(),
258                e
259            ))
260        })?;
261
262        let tests = parse_corpus(&content).map_err(|e| {
263            HarnessError::new(format!(
264                "Failed to parse corpus file {}: {}",
265                path.display(),
266                e
267            ))
268        })?;
269
270        if tests.is_empty() {
271            return Err(HarnessError::new(format!(
272                "Corpus file {} contains no tests",
273                path.display()
274            )));
275        }
276
277        for test in tests {
278            cases.push(CorpusCase {
279                file: path.clone(),
280                name: test.name,
281                input: test.input,
282                contains: test.contains,
283                expected_sexp: test.expected_sexp,
284            });
285        }
286    }
287
288    Ok(cases)
289}
290
291/// Execute all tests defined in a single corpus file.
292pub fn run_corpus_file(language: LanguageFn, name: &str, path: &Path) -> HarnessResult<()> {
293    let content = fs::read_to_string(path).map_err(|e| {
294        HarnessError::new(format!(
295            "Failed to read corpus file {} for {}: {}",
296            path.display(),
297            name,
298            e
299        ))
300    })?;
301
302    let tests = parse_corpus(&content).map_err(|e| {
303        HarnessError::new(format!(
304            "Failed to parse corpus file {} for {}: {}",
305            path.display(),
306            name,
307            e
308        ))
309    })?;
310
311    if tests.is_empty() {
312        return Err(HarnessError::new(format!(
313            "Corpus file {} for {} contains no tests",
314            path.display(),
315            name
316        )));
317    }
318
319    for test in tests {
320        let case = CorpusCase {
321            file: path.to_path_buf(),
322            name: test.name,
323            input: test.input,
324            contains: test.contains,
325            expected_sexp: test.expected_sexp,
326        };
327        run_corpus_case(language, name, &case)?;
328    }
329
330    Ok(())
331}
332
333/// Execute a single corpus test case.
334pub fn run_corpus_case(language: LanguageFn, name: &str, case: &CorpusCase) -> HarnessResult<()> {
335    run_corpus_case_with_tree(language, name, case).map(|_| ())
336}
337
338/// Run a corpus test case and return the parsed tree's s-expression.
339pub fn run_corpus_case_with_tree(
340    language: LanguageFn,
341    name: &str,
342    case: &CorpusCase,
343) -> HarnessResult<String> {
344    let tree = parse_case(language, name, case)?;
345    let root = tree.root_node();
346
347    if let Some(expected) = &case.expected_sexp {
348        let actual = root.to_sexp();
349        if actual.trim() != expected.trim() {
350            return Err(HarnessError::new(format!(
351                "S-expression mismatch for {} / {} (file {})\n--- input ---\n{}\n--- expected ---\n{}\n--- actual ---\n{}",
352                name,
353                case.name,
354                case.file.display(),
355                case.input,
356                expected,
357                actual
358            )));
359        }
360    }
361
362    if !case.contains.is_empty() {
363        let mut seen: HashSet<&str> = HashSet::new();
364        collect_kinds(root, &mut seen);
365
366        for kind in &case.contains {
367            if !seen.contains(kind.as_str()) {
368                return Err(HarnessError::new(format!(
369                    "Expected node kind `{}` not found for {} / {} (file {})\n--- input ---\n{}\n--- seen ---\n{:?}\n--- sexp ---\n{}",
370                    kind,
371                    name,
372                    case.name,
373                    case.file.display(),
374                    case.input,
375                    seen,
376                    root.to_sexp()
377                )));
378            }
379        }
380    }
381
382    Ok(root.to_sexp())
383}
384
385fn parse_case(language: LanguageFn, name: &str, case: &CorpusCase) -> HarnessResult<Tree> {
386    if case.input.trim().is_empty() {
387        return Err(HarnessError::new(format!(
388            "Corpus test {} / {} (file {}) is missing an `--- input` section",
389            name,
390            case.name,
391            case.file.display()
392        )));
393    }
394
395    let language = Language::from(language);
396    let mut parser = Parser::new();
397    parser
398        .set_language(&language)
399        .map_err(|e| HarnessError::new(format!("Failed to set language for {}: {:?}", name, e)))?;
400
401    let tree = parser.parse(&case.input, None).ok_or_else(|| {
402        HarnessError::new(format!(
403            "Parser returned no tree for {} / {} (file {})",
404            name,
405            case.name,
406            case.file.display()
407        ))
408    })?;
409
410    let root = tree.root_node();
411    if root.has_error() {
412        return Err(HarnessError::new(format!(
413            "Parse errors for {} / {} (file {})\n--- input ---\n{}\n--- sexp ---\n{}",
414            name,
415            case.name,
416            case.file.display(),
417            case.input,
418            root.to_sexp()
419        )));
420    }
421
422    Ok(tree)
423}
424
425fn collect_kinds(node: Node, out: &mut HashSet<&str>) {
426    out.insert(node.kind());
427    let mut cursor = node.walk();
428    for child in node.children(&mut cursor) {
429        collect_kinds(child, out);
430    }
431}
432
433fn parse_corpus(content: &str) -> HarnessResult<Vec<CorpusTest>> {
434    let mut tests: Vec<CorpusTest> = Vec::new();
435    let mut current: Option<CorpusTest> = None;
436    let mut section: Option<String> = None;
437
438    for (idx, chunk) in content.split_inclusive('\n').enumerate() {
439        let line = chunk
440            .strip_suffix('\n')
441            .map(|l| l.strip_suffix('\r').unwrap_or(l))
442            .unwrap_or(chunk);
443        let trimmed = line.trim_end();
444
445        if let Some(name) = trimmed.strip_prefix("===") {
446            if let Some(t) = current.take() {
447                tests.push(t);
448            }
449            current = Some(CorpusTest {
450                name: name.trim().to_string(),
451                ..CorpusTest::default()
452            });
453            section = None;
454            continue;
455        }
456
457        if let Some(sec) = trimmed.strip_prefix("---") {
458            section = Some(sec.trim().to_string());
459            continue;
460        }
461
462        let Some(test) = current.as_mut() else {
463            // Allow blank lines and comments before first test.
464            if trimmed.is_empty() || trimmed.starts_with('#') {
465                continue;
466            }
467            return Err(HarnessError::new(format!(
468                "Unexpected content before first test at line {}: {}",
469                idx + 1,
470                trimmed
471            )));
472        };
473
474        match section.as_deref() {
475            Some("input") => test.input.push_str(chunk),
476            Some("sexp") => {
477                let expected = test.expected_sexp.get_or_insert_with(String::new);
478                expected.push_str(chunk);
479            }
480            Some("contains") => {
481                for tok in trimmed.split_whitespace() {
482                    test.contains.push(tok.to_string());
483                }
484            }
485            Some(other) => {
486                return Err(HarnessError::new(format!(
487                    "Unknown section `{}` at line {}",
488                    other,
489                    idx + 1
490                )));
491            }
492            None => {
493                if trimmed.is_empty() || trimmed.starts_with('#') {
494                    continue;
495                }
496                return Err(HarnessError::new(format!(
497                    "Content outside a section at line {}: {}",
498                    idx + 1,
499                    trimmed
500                )));
501            }
502        }
503    }
504
505    if let Some(t) = current.take() {
506        tests.push(t);
507    }
508
509    Ok(tests)
510}
511
512/// Parse sample paths from arborium.kdl
513///
514/// Looks for `sample { path "..." }` blocks and extracts the path values.
515fn parse_samples_from_kdl(path: &Path) -> Vec<String> {
516    let content = match fs::read_to_string(path) {
517        Ok(c) => c,
518        Err(_) => return vec![],
519    };
520
521    let mut samples = Vec::new();
522    let mut in_sample_block = false;
523    let mut brace_depth = 0;
524
525    for line in content.lines() {
526        let trimmed = line.trim();
527
528        // Track sample blocks
529        if trimmed.starts_with("sample") && trimmed.contains('{') {
530            in_sample_block = true;
531            brace_depth = 1;
532            continue;
533        }
534
535        if in_sample_block {
536            // Track brace depth
537            brace_depth += trimmed.matches('{').count();
538            brace_depth = brace_depth.saturating_sub(trimmed.matches('}').count());
539
540            if brace_depth == 0 {
541                in_sample_block = false;
542                continue;
543            }
544
545            // Look for path "..."
546            if trimmed.starts_with("path")
547                && let Some(start) = trimmed.find('"')
548                && let Some(end) = trimmed[start + 1..].find('"')
549            {
550                let path_value = &trimmed[start + 1..start + 1 + end];
551                if !path_value.is_empty() {
552                    samples.push(path_value.to_string());
553                }
554            }
555        }
556    }
557
558    samples
559}
560
561/// Standard highlight names used by arborium.
562///
563/// **Deprecated**: Use [`arborium_theme::CAPTURE_NAMES`] instead, which is the
564/// canonical source of truth for all capture names.
565///
566/// This constant is kept for backwards compatibility.
567pub const HIGHLIGHT_NAMES: &[&str] = arborium_theme::CAPTURE_NAMES;