Skip to main content

normalize_languages/
parsers.rs

1//! Tree-sitter parser singleton and convenience functions.
2//!
3//! Provides a global `GrammarLoader` singleton so that grammars are loaded once
4//! and shared across all call sites. This is the canonical way to parse source
5//! code with tree-sitter in the normalize ecosystem.
6//!
7//! # Lifetime Safety
8//!
9//! The singleton is stored in a `'static OnceLock`, so the backing shared
10//! libraries are never unloaded. This satisfies the lifetime requirement
11//! documented in [`GrammarLoader`].
12//!
13//! # Missing-grammar reporting
14//!
15//! When a grammar fails to load (not installed, ABI mismatch, etc.), we emit
16//! a single user-visible warning to stderr (deduplicated per process) and
17//! record the failure in a process-wide tracker so callers like
18//! `normalize structure rebuild` can summarise affected files. Use
19//! [`try_get_grammar`] / [`parse_with_grammar`] / [`parser_for`] to get the
20//! warning automatically; call [`report_missing_grammar`] directly if you
21//! call [`GrammarLoader::get`] yourself.
22
23use crate::{GrammarLoadError, GrammarLoader};
24use std::collections::HashMap;
25use std::sync::{Arc, Mutex, OnceLock};
26use tree_sitter::Parser;
27
28/// Global grammar loader singleton — avoids reloading grammars for each parse.
29static GRAMMAR_LOADER: OnceLock<Arc<GrammarLoader>> = OnceLock::new();
30
31/// Tracks grammars that have failed to load this process. Maps grammar name to
32/// (warned_already, failure_count). The first failure prints a stderr warning;
33/// subsequent failures only bump the count so callers can produce a summary
34/// without spamming the user.
35static MISSING_GRAMMARS: OnceLock<Mutex<HashMap<String, MissingGrammarRecord>>> = OnceLock::new();
36
37#[derive(Debug, Clone)]
38struct MissingGrammarRecord {
39    /// Number of times this grammar was requested but failed to load.
40    pub count: usize,
41    /// Last error detail (used by the summary).
42    pub detail: String,
43}
44
45/// Summary entry returned by [`take_missing_grammars`].
46#[derive(Debug, Clone)]
47pub struct MissingGrammar {
48    /// Grammar name, e.g. `"go"`.
49    pub name: String,
50    /// Number of files / call sites that hit this missing grammar.
51    pub count: usize,
52    /// Human-readable error detail (e.g. "not found in search paths").
53    pub detail: String,
54}
55
56fn missing_grammars() -> &'static Mutex<HashMap<String, MissingGrammarRecord>> {
57    MISSING_GRAMMARS.get_or_init(|| Mutex::new(HashMap::new()))
58}
59
60/// Record a grammar load failure and emit a one-shot stderr warning.
61///
62/// Subsequent calls with the same `name` only increment the failure count —
63/// the user only sees the warning once per process per missing grammar. Use
64/// [`take_missing_grammars`] at the end of a long-running command to print a
65/// summary of affected files.
66pub fn report_missing_grammar(name: &str, err: &GrammarLoadError) {
67    let detail = format!("{err}");
68    let mut map = missing_grammars().lock().unwrap_or_else(|e| e.into_inner());
69    let entry = map
70        .entry(name.to_string())
71        .or_insert_with(|| MissingGrammarRecord {
72            count: 0,
73            detail: detail.clone(),
74        });
75    let first_time = entry.count == 0;
76    entry.count += 1;
77    entry.detail = detail;
78    if first_time {
79        eprintln!("warning: tree-sitter grammar '{name}' could not be loaded: {err}");
80        eprintln!("    Run: normalize grammars install");
81        eprintln!("    Or:  normalize grammars install --force  (if grammars are stale)");
82    }
83}
84
85/// Drain and return the missing-grammar tracker.
86///
87/// Returns one entry per grammar that failed to load this process. The
88/// internal counter is reset, so a subsequent rebuild starts fresh.
89pub fn take_missing_grammars() -> Vec<MissingGrammar> {
90    let mut map = missing_grammars().lock().unwrap_or_else(|e| e.into_inner());
91    let drained: Vec<MissingGrammar> = map
92        .drain()
93        .map(|(name, rec)| MissingGrammar {
94            name,
95            count: rec.count,
96            detail: rec.detail,
97        })
98        .collect();
99    drained
100}
101
102/// Peek at the missing-grammar tracker without resetting it.
103pub fn peek_missing_grammars() -> Vec<MissingGrammar> {
104    let map = missing_grammars().lock().unwrap_or_else(|e| e.into_inner());
105    map.iter()
106        .map(|(name, rec)| MissingGrammar {
107            name: name.clone(),
108            count: rec.count,
109            detail: rec.detail.clone(),
110        })
111        .collect()
112}
113
114/// Get the global grammar loader singleton.
115pub fn grammar_loader() -> Arc<GrammarLoader> {
116    GRAMMAR_LOADER
117        .get_or_init(|| Arc::new(GrammarLoader::new()))
118        .clone()
119}
120
121/// Try to load a grammar, surfacing missing-grammar failures as a one-shot
122/// stderr warning. Returns `None` on any load failure (caller can short-circuit
123/// like `?`).
124pub fn try_get_grammar(grammar: &str) -> Option<tree_sitter::Language> {
125    match grammar_loader().get(grammar) {
126        Ok(lang) => Some(lang),
127        Err(err) => {
128            report_missing_grammar(grammar, &err);
129            None
130        }
131    }
132}
133
134/// Create a parser for a specific grammar.
135///
136/// The grammar name should match tree-sitter grammar names
137/// (e.g., "python", "rust", "typescript"). Emits a warning to stderr on the
138/// first call where the grammar fails to load.
139pub fn parser_for(grammar: &str) -> Option<Parser> {
140    let language = try_get_grammar(grammar)?;
141    let mut parser = Parser::new();
142    parser.set_language(&language).ok()?;
143    Some(parser)
144}
145
146/// Parse source code with a specific grammar.
147///
148/// The grammar name should match tree-sitter grammar names
149/// (e.g., "python", "rust", "typescript"). Emits a warning to stderr on the
150/// first call where the grammar fails to load.
151pub fn parse_with_grammar(grammar: &str, source: &str) -> Option<tree_sitter::Tree> {
152    let mut parser = parser_for(grammar)?;
153    parser.parse(source, None)
154}
155
156/// List grammars available in external search paths.
157pub fn available_external_grammars() -> Vec<String> {
158    grammar_loader().available_external()
159}
160
161/// List grammars available in external search paths, with their file paths.
162pub fn available_external_grammars_with_paths() -> Vec<(String, std::path::PathBuf)> {
163    grammar_loader().available_external_with_paths()
164}