Skip to main content

harn_hostlib/ast/
mod.rs

1//! AST host capability.
2//!
3//! Wraps tree-sitter parsing, symbol extraction, and outline generation.
4//! The implementation is fully wired so AST builtins share one canonical
5//! wire format.
6//!
7//! ## Wire format
8//!
9//! - Row/column coordinates are **0-based** across all three builtins,
10//!   matching tree-sitter's native `Point` representation. `parse_file`,
11//!   `symbols`, and `outline` share one convention.
12//! - `parse_file` emits a flat node list with `parent_id` rather than
13//!   nested children — keeps the wire JSON-serializable without inflating
14//!   it with object copies.
15//! - `symbols` and `outline` carry a `signature` string (e.g.
16//!   `"fn foo(bar: i32)"`) on every entry.
17//!
18//! ## Languages
19//!
20//! [`language::Language`] covers the general-purpose languages
21//! (Harn, TypeScript/TSX, JavaScript/JSX, Python, Go, Rust, Java, C, C++,
22//! C#, Ruby, Kotlin, PHP, Scala, Bash, Swift, Zig, Elixir, Lua, Haskell, R)
23//! plus data/markup/config grammars (JSON, YAML, TOML, CSS, HTML, SQL,
24//! Markdown). The latter support the query-driven edit primitives but
25//! carry no symbol-graph projection — see
26//! [`language::Language::edit_capabilities`] for the per-language matrix.
27//! Adding/dropping languages requires coordinated schema, fixture, and
28//! host-bridge updates.
29//!
30
31use std::sync::Arc;
32
33use harn_vm::VmValue;
34
35use crate::code_index::SharedIndex;
36use crate::error::HostlibError;
37use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
38
39mod apply_node;
40mod batch_apply;
41mod bracket_balance;
42mod capabilities;
43mod dry_run;
44mod edit_common;
45mod function_body;
46mod fuzzy;
47mod imports;
48mod insert_at_anchor;
49mod language;
50mod mutation;
51mod outline;
52mod parse;
53mod parse_errors;
54mod search;
55mod structural_diff;
56mod symbols;
57mod symbols_call;
58mod types;
59mod undefined_names;
60mod unified_diff;
61
62pub use language::{EditCapabilities, Language, TEXT_PATCH_FALLBACK};
63pub use types::{OutlineItem, ParseError, ParsedNode, Symbol, SymbolKind, UndefinedName};
64
65/// Programmatic entry point to the AST builtins. Embedders typically go
66/// through the registered builtins, but tests and tools that want
67/// strongly-typed access can use these helpers directly.
68pub mod api {
69    use std::path::Path;
70
71    use tree_sitter::Tree;
72
73    use crate::error::HostlibError;
74
75    use super::language::Language;
76    use super::outline::build_outline;
77    use super::parse::{parse_source, read_source};
78    use super::symbols::extract;
79    use super::types::{OutlineItem, Symbol};
80
81    /// Parse `path` (with optional language hint) and return its symbols.
82    pub fn symbols(
83        path: &Path,
84        language_hint: Option<&str>,
85    ) -> Result<(Language, Vec<Symbol>), HostlibError> {
86        let language = detect(path, language_hint)?;
87        let source = read_source(&path.to_string_lossy(), 0)?;
88        let tree = parse_source(&source, language)?;
89        Ok((language, extract(&tree, &source, language)))
90    }
91
92    /// Parse `path` and return a hierarchical outline.
93    pub fn outline(
94        path: &Path,
95        language_hint: Option<&str>,
96    ) -> Result<(Language, Vec<OutlineItem>), HostlibError> {
97        let (language, symbols) = symbols(path, language_hint)?;
98        Ok((language, build_outline(symbols)))
99    }
100
101    /// Parse a source `str` for `language` and return its symbols. Useful
102    /// for unit tests where the input lives in-memory rather than on disk.
103    pub fn symbols_from_source(
104        source: &str,
105        language: Language,
106    ) -> Result<Vec<Symbol>, HostlibError> {
107        let tree = parse_source(source, language)?;
108        Ok(extract(&tree, source, language))
109    }
110
111    /// Parse a source `str` for `language` and return the raw tree-sitter
112    /// tree. Used by the typed symbol graph in
113    /// [`crate::code_index::symbol_graph`] to sweep for call sites
114    /// without re-doing the work the AST symbol extractor already did.
115    pub fn parse_tree(source: &str, language: Language) -> Result<Tree, HostlibError> {
116        parse_source(source, language)
117    }
118
119    /// Parse `source` once, then return the tree plus the symbol list
120    /// extracted from it. Lets a caller (e.g. the typed symbol graph)
121    /// avoid paying the parse cost twice when it needs both products.
122    pub fn parse_with_symbols(
123        source: &str,
124        language: Language,
125    ) -> Result<(Tree, Vec<Symbol>), HostlibError> {
126        let tree = parse_source(source, language)?;
127        let symbols = extract(&tree, source, language);
128        Ok((tree, symbols))
129    }
130
131    fn detect(path: &Path, language_hint: Option<&str>) -> Result<Language, HostlibError> {
132        Language::detect(path, language_hint).ok_or_else(|| HostlibError::InvalidParameter {
133            builtin: "ast::api",
134            param: "language",
135            message: format!(
136                "could not infer a tree-sitter grammar for `{}` \
137                 (extension or `language` field unrecognized)",
138                path.display()
139            ),
140        })
141    }
142}
143
144/// AST capability handle. Stateless; tree-sitter parsers are constructed
145/// per-call (cheap relative to grammar lookup) so the capability itself
146/// has nothing to own.
147#[derive(Default)]
148pub struct AstCapability;
149
150/// AST capability registered with access to the shared code-index state.
151///
152/// Most AST builtins are stateless, but `ast.dry_run` can preview
153/// `rename_symbol` plan ops only when it can delegate to the typed
154/// symbol graph owned by `code_index`.
155pub struct AstCapabilityWithCodeIndex {
156    code_index: SharedIndex,
157}
158
159impl AstCapabilityWithCodeIndex {
160    /// Build an AST capability that can delegate dry-run rename previews
161    /// to the supplied code-index state.
162    pub fn new(code_index: SharedIndex) -> Self {
163        Self { code_index }
164    }
165}
166
167impl HostlibCapability for AstCapability {
168    fn module_name(&self) -> &'static str {
169        "ast"
170    }
171
172    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
173        register_ast_builtins(registry, None);
174    }
175}
176
177impl HostlibCapability for AstCapabilityWithCodeIndex {
178    fn module_name(&self) -> &'static str {
179        "ast"
180    }
181
182    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
183        register_ast_builtins(registry, Some(self.code_index.clone()));
184    }
185}
186
187fn register_ast_builtins(registry: &mut BuiltinRegistry, code_index: Option<SharedIndex>) {
188    register(registry, "hostlib_ast_parse_file", "parse_file", parse::run);
189    register(
190        registry,
191        "hostlib_ast_symbols",
192        "symbols",
193        symbols_call::run,
194    );
195    register(registry, "hostlib_ast_outline", "outline", outline::run);
196    register(
197        registry,
198        "hostlib_ast_parse_errors",
199        "parse_errors",
200        parse_errors::run,
201    );
202    register(
203        registry,
204        "hostlib_ast_undefined_names",
205        "undefined_names",
206        undefined_names::run,
207    );
208    register(
209        registry,
210        "hostlib_ast_function_body",
211        "function_body",
212        function_body::run_single,
213    );
214    register(
215        registry,
216        "hostlib_ast_function_bodies",
217        "function_bodies",
218        function_body::run_bulk,
219    );
220    register(
221        registry,
222        "hostlib_ast_extract_imports",
223        "extract_imports",
224        imports::run,
225    );
226    register(
227        registry,
228        "hostlib_ast_symbol_extract",
229        "symbol_extract",
230        mutation::run_extract,
231    );
232    register(
233        registry,
234        "hostlib_ast_symbol_delete",
235        "symbol_delete",
236        mutation::run_delete,
237    );
238    register(
239        registry,
240        "hostlib_ast_symbol_replace",
241        "symbol_replace",
242        mutation::run_replace,
243    );
244    register(
245        registry,
246        "hostlib_ast_bracket_balance",
247        "bracket_balance",
248        bracket_balance::run,
249    );
250    // These two write edited source back to disk, so they share the
251    // deterministic-tools gate with `tools::*` file I/O.
252    register_gated(
253        registry,
254        "hostlib_ast_apply_node",
255        "apply_node",
256        apply_node::run,
257    );
258    register_gated(
259        registry,
260        "hostlib_ast_insert_at_anchor",
261        "insert_at_anchor",
262        insert_at_anchor::run,
263    );
264    // Multi-file codemod runner. Writes when `dry_run: false`, so it shares
265    // the deterministic-tools write gate with the other mutating builtins.
266    register_gated(
267        registry,
268        "hostlib_ast_batch_apply",
269        "batch_apply",
270        batch_apply::run,
271    );
272    register_dry_run(registry, code_index);
273    // Read-only structural search: shares the query machinery with
274    // `apply_node` but never writes, so it carries no deterministic-tools
275    // gate.
276    register(registry, "hostlib_ast_search", "search", search::run);
277    register(
278        registry,
279        "hostlib_ast_structural_diff",
280        "structural_diff",
281        structural_diff::run,
282    );
283    register(
284        registry,
285        "hostlib_ast_capabilities",
286        "capabilities",
287        capabilities::run,
288    );
289}
290
291fn register(
292    registry: &mut BuiltinRegistry,
293    name: &'static str,
294    method: &'static str,
295    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
296) {
297    let handler: SyncHandler = Arc::new(runner);
298    registry.register(RegisteredBuiltin {
299        name,
300        module: "ast",
301        method,
302        handler,
303    });
304}
305
306fn register_dry_run(registry: &mut BuiltinRegistry, code_index: Option<SharedIndex>) {
307    match code_index {
308        Some(index) => {
309            let handler: SyncHandler =
310                Arc::new(move |args| dry_run::run_with_code_index(Some(&index), args));
311            registry.register(RegisteredBuiltin {
312                name: "hostlib_ast_dry_run",
313                module: "ast",
314                method: "dry_run",
315                handler,
316            });
317        }
318        None => register(registry, "hostlib_ast_dry_run", "dry_run", dry_run::run),
319    }
320}
321
322fn register_gated(
323    registry: &mut BuiltinRegistry,
324    name: &'static str,
325    method: &'static str,
326    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
327) {
328    registry.register(RegisteredBuiltin {
329        name,
330        module: "ast",
331        method,
332        handler: crate::tools::permissions::gated_handler(name, runner),
333    });
334}