normalize_languages/parsers.rs
1//! Tree-sitter parser singleton and convenience functions.
2//!
3//! Provides a global `GrammarLoader` singleton so that grammars are loaded once
4//! and shared across all call sites. This is the canonical way to parse source
5//! code with tree-sitter in the normalize ecosystem.
6//!
7//! # Lifetime Safety
8//!
9//! The singleton is stored in a `'static OnceLock`, so the backing shared
10//! libraries are never unloaded. This satisfies the lifetime requirement
11//! documented in [`GrammarLoader`].
12//!
13//! # Missing-grammar reporting
14//!
15//! When a grammar fails to load (not installed, ABI mismatch, etc.), we emit
16//! a single user-visible warning to stderr (deduplicated per process) and
17//! record the failure in a process-wide tracker so callers like
18//! `normalize structure rebuild` can summarise affected files. Use
19//! [`try_get_grammar`] / [`parse_with_grammar`] / [`parser_for`] to get the
20//! warning automatically; call [`report_missing_grammar`] directly if you
21//! call [`GrammarLoader::get`] yourself.
22
23use crate::{GrammarLoadError, GrammarLoader};
24use std::collections::HashMap;
25use std::sync::{Arc, Mutex, OnceLock};
26use tree_sitter::Parser;
27
28/// Global grammar loader singleton — avoids reloading grammars for each parse.
29static GRAMMAR_LOADER: OnceLock<Arc<GrammarLoader>> = OnceLock::new();
30
31/// Tracks grammars that have failed to load this process. Maps grammar name to
32/// (warned_already, failure_count). The first failure prints a stderr warning;
33/// subsequent failures only bump the count so callers can produce a summary
34/// without spamming the user.
35static MISSING_GRAMMARS: OnceLock<Mutex<HashMap<String, MissingGrammarRecord>>> = OnceLock::new();
36
37#[derive(Debug, Clone)]
38struct MissingGrammarRecord {
39 /// Number of times this grammar was requested but failed to load.
40 pub count: usize,
41 /// Last error detail (used by the summary).
42 pub detail: String,
43}
44
45/// Summary entry returned by [`take_missing_grammars`].
46#[derive(Debug, Clone)]
47pub struct MissingGrammar {
48 /// Grammar name, e.g. `"go"`.
49 pub name: String,
50 /// Number of files / call sites that hit this missing grammar.
51 pub count: usize,
52 /// Human-readable error detail (e.g. "not found in search paths").
53 pub detail: String,
54}
55
56fn missing_grammars() -> &'static Mutex<HashMap<String, MissingGrammarRecord>> {
57 MISSING_GRAMMARS.get_or_init(|| Mutex::new(HashMap::new()))
58}
59
60/// Record a grammar load failure and emit a one-shot stderr warning.
61///
62/// Subsequent calls with the same `name` only increment the failure count —
63/// the user only sees the warning once per process per missing grammar. Use
64/// [`take_missing_grammars`] at the end of a long-running command to print a
65/// summary of affected files.
66pub fn report_missing_grammar(name: &str, err: &GrammarLoadError) {
67 let detail = format!("{err}");
68 let mut map = missing_grammars().lock().unwrap_or_else(|e| e.into_inner());
69 let entry = map
70 .entry(name.to_string())
71 .or_insert_with(|| MissingGrammarRecord {
72 count: 0,
73 detail: detail.clone(),
74 });
75 let first_time = entry.count == 0;
76 entry.count += 1;
77 entry.detail = detail;
78 if first_time {
79 eprintln!("warning: tree-sitter grammar '{name}' could not be loaded: {err}");
80 eprintln!(" Run: normalize grammars install");
81 eprintln!(" Or: normalize grammars install --force (if grammars are stale)");
82 }
83}
84
85/// Drain and return the missing-grammar tracker.
86///
87/// Returns one entry per grammar that failed to load this process. The
88/// internal counter is reset, so a subsequent rebuild starts fresh.
89pub fn take_missing_grammars() -> Vec<MissingGrammar> {
90 let mut map = missing_grammars().lock().unwrap_or_else(|e| e.into_inner());
91 let drained: Vec<MissingGrammar> = map
92 .drain()
93 .map(|(name, rec)| MissingGrammar {
94 name,
95 count: rec.count,
96 detail: rec.detail,
97 })
98 .collect();
99 drained
100}
101
102/// Peek at the missing-grammar tracker without resetting it.
103pub fn peek_missing_grammars() -> Vec<MissingGrammar> {
104 let map = missing_grammars().lock().unwrap_or_else(|e| e.into_inner());
105 map.iter()
106 .map(|(name, rec)| MissingGrammar {
107 name: name.clone(),
108 count: rec.count,
109 detail: rec.detail.clone(),
110 })
111 .collect()
112}
113
114/// Get the global grammar loader singleton.
115pub fn grammar_loader() -> Arc<GrammarLoader> {
116 GRAMMAR_LOADER
117 .get_or_init(|| Arc::new(GrammarLoader::new()))
118 .clone()
119}
120
121/// Try to load a grammar, surfacing missing-grammar failures as a one-shot
122/// stderr warning. Returns `None` on any load failure (caller can short-circuit
123/// like `?`).
124pub fn try_get_grammar(grammar: &str) -> Option<tree_sitter::Language> {
125 match grammar_loader().get(grammar) {
126 Ok(lang) => Some(lang),
127 Err(err) => {
128 report_missing_grammar(grammar, &err);
129 None
130 }
131 }
132}
133
134/// Create a parser for a specific grammar.
135///
136/// The grammar name should match tree-sitter grammar names
137/// (e.g., "python", "rust", "typescript"). Emits a warning to stderr on the
138/// first call where the grammar fails to load.
139pub fn parser_for(grammar: &str) -> Option<Parser> {
140 let language = try_get_grammar(grammar)?;
141 let mut parser = Parser::new();
142 parser.set_language(&language).ok()?;
143 Some(parser)
144}
145
146/// Parse source code with a specific grammar.
147///
148/// The grammar name should match tree-sitter grammar names
149/// (e.g., "python", "rust", "typescript"). Emits a warning to stderr on the
150/// first call where the grammar fails to load.
151pub fn parse_with_grammar(grammar: &str, source: &str) -> Option<tree_sitter::Tree> {
152 let mut parser = parser_for(grammar)?;
153 parser.parse(source, None)
154}
155
156/// List grammars available in external search paths.
157pub fn available_external_grammars() -> Vec<String> {
158 grammar_loader().available_external()
159}
160
161/// List grammars available in external search paths, with their file paths.
162pub fn available_external_grammars_with_paths() -> Vec<(String, std::path::PathBuf)> {
163 grammar_loader().available_external_with_paths()
164}