Skip to main content

harn_hostlib/ast/
mod.rs

1//! AST host capability.
2//!
3//! Wraps tree-sitter parsing, symbol extraction, and outline generation.
4//! The implementation is fully wired so AST builtins share one canonical
5//! wire format.
6//!
7//! ## Wire format
8//!
9//! - Row/column coordinates are **0-based** across all three builtins,
10//!   matching tree-sitter's native `Point` representation. `parse_file`,
11//!   `symbols`, and `outline` share one convention.
12//! - `parse_file` emits a flat node list with `parent_id` rather than
13//!   nested children — keeps the wire JSON-serializable without inflating
14//!   it with object copies.
15//! - `symbols` and `outline` carry a `signature` string (e.g.
16//!   `"fn foo(bar: i32)"`) on every entry.
17//!
18//! ## Languages
19//!
20//! [`language::Language`] covers the general-purpose languages
21//! (TypeScript/TSX, JavaScript/JSX, Python, Go, Rust, Java, C, C++, C#,
22//! Ruby, Kotlin, PHP, Scala, Bash, Swift, Zig, Elixir, Lua, Haskell, R)
23//! plus data/markup/config grammars (JSON, YAML, TOML, CSS, HTML, SQL,
24//! Markdown). The latter support the query-driven edit primitives but
25//! carry no symbol-graph projection — see
26//! [`language::Language::edit_capabilities`] for the per-language matrix.
27//! Adding/dropping languages requires coordinated schema, fixture, and
28//! host-bridge updates.
29//!
30
31use std::sync::Arc;
32
33use harn_vm::VmValue;
34
35use crate::code_index::SharedIndex;
36use crate::error::HostlibError;
37use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
38
39mod apply_node;
40mod bracket_balance;
41mod capabilities;
42mod dry_run;
43mod edit_common;
44mod function_body;
45mod fuzzy;
46mod imports;
47mod insert_at_anchor;
48mod language;
49mod mutation;
50mod outline;
51mod parse;
52mod parse_errors;
53mod structural_diff;
54mod symbols;
55mod symbols_call;
56mod types;
57mod undefined_names;
58mod unified_diff;
59
60pub use language::{EditCapabilities, Language, TEXT_PATCH_FALLBACK};
61pub use types::{OutlineItem, ParseError, ParsedNode, Symbol, SymbolKind, UndefinedName};
62
63/// Programmatic entry point to the AST builtins. Embedders typically go
64/// through the registered builtins, but tests and tools that want
65/// strongly-typed access can use these helpers directly.
66pub mod api {
67    use std::path::Path;
68
69    use tree_sitter::Tree;
70
71    use crate::error::HostlibError;
72
73    use super::language::Language;
74    use super::outline::build_outline;
75    use super::parse::{parse_source, read_source};
76    use super::symbols::extract;
77    use super::types::{OutlineItem, Symbol};
78
79    /// Parse `path` (with optional language hint) and return its symbols.
80    pub fn symbols(
81        path: &Path,
82        language_hint: Option<&str>,
83    ) -> Result<(Language, Vec<Symbol>), HostlibError> {
84        let language = detect(path, language_hint)?;
85        let source = read_source(&path.to_string_lossy(), 0)?;
86        let tree = parse_source(&source, language)?;
87        Ok((language, extract(&tree, &source, language)))
88    }
89
90    /// Parse `path` and return a hierarchical outline.
91    pub fn outline(
92        path: &Path,
93        language_hint: Option<&str>,
94    ) -> Result<(Language, Vec<OutlineItem>), HostlibError> {
95        let (language, symbols) = symbols(path, language_hint)?;
96        Ok((language, build_outline(symbols)))
97    }
98
99    /// Parse a source `str` for `language` and return its symbols. Useful
100    /// for unit tests where the input lives in-memory rather than on disk.
101    pub fn symbols_from_source(
102        source: &str,
103        language: Language,
104    ) -> Result<Vec<Symbol>, HostlibError> {
105        let tree = parse_source(source, language)?;
106        Ok(extract(&tree, source, language))
107    }
108
109    /// Parse a source `str` for `language` and return the raw tree-sitter
110    /// tree. Used by the typed symbol graph in
111    /// [`crate::code_index::symbol_graph`] to sweep for call sites
112    /// without re-doing the work the AST symbol extractor already did.
113    pub fn parse_tree(source: &str, language: Language) -> Result<Tree, HostlibError> {
114        parse_source(source, language)
115    }
116
117    /// Parse `source` once, then return the tree plus the symbol list
118    /// extracted from it. Lets a caller (e.g. the typed symbol graph)
119    /// avoid paying the parse cost twice when it needs both products.
120    pub fn parse_with_symbols(
121        source: &str,
122        language: Language,
123    ) -> Result<(Tree, Vec<Symbol>), HostlibError> {
124        let tree = parse_source(source, language)?;
125        let symbols = extract(&tree, source, language);
126        Ok((tree, symbols))
127    }
128
129    fn detect(path: &Path, language_hint: Option<&str>) -> Result<Language, HostlibError> {
130        Language::detect(path, language_hint).ok_or_else(|| HostlibError::InvalidParameter {
131            builtin: "ast::api",
132            param: "language",
133            message: format!(
134                "could not infer a tree-sitter grammar for `{}` \
135                 (extension or `language` field unrecognized)",
136                path.display()
137            ),
138        })
139    }
140}
141
142/// AST capability handle. Stateless; tree-sitter parsers are constructed
143/// per-call (cheap relative to grammar lookup) so the capability itself
144/// has nothing to own.
145#[derive(Default)]
146pub struct AstCapability;
147
148/// AST capability registered with access to the shared code-index state.
149///
150/// Most AST builtins are stateless, but `ast.dry_run` can preview
151/// `rename_symbol` plan ops only when it can delegate to the typed
152/// symbol graph owned by `code_index`.
153pub struct AstCapabilityWithCodeIndex {
154    code_index: SharedIndex,
155}
156
157impl AstCapabilityWithCodeIndex {
158    /// Build an AST capability that can delegate dry-run rename previews
159    /// to the supplied code-index state.
160    pub fn new(code_index: SharedIndex) -> Self {
161        Self { code_index }
162    }
163}
164
165impl HostlibCapability for AstCapability {
166    fn module_name(&self) -> &'static str {
167        "ast"
168    }
169
170    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
171        register_ast_builtins(registry, None);
172    }
173}
174
175impl HostlibCapability for AstCapabilityWithCodeIndex {
176    fn module_name(&self) -> &'static str {
177        "ast"
178    }
179
180    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
181        register_ast_builtins(registry, Some(self.code_index.clone()));
182    }
183}
184
185fn register_ast_builtins(registry: &mut BuiltinRegistry, code_index: Option<SharedIndex>) {
186    register(registry, "hostlib_ast_parse_file", "parse_file", parse::run);
187    register(
188        registry,
189        "hostlib_ast_symbols",
190        "symbols",
191        symbols_call::run,
192    );
193    register(registry, "hostlib_ast_outline", "outline", outline::run);
194    register(
195        registry,
196        "hostlib_ast_parse_errors",
197        "parse_errors",
198        parse_errors::run,
199    );
200    register(
201        registry,
202        "hostlib_ast_undefined_names",
203        "undefined_names",
204        undefined_names::run,
205    );
206    register(
207        registry,
208        "hostlib_ast_function_body",
209        "function_body",
210        function_body::run_single,
211    );
212    register(
213        registry,
214        "hostlib_ast_function_bodies",
215        "function_bodies",
216        function_body::run_bulk,
217    );
218    register(
219        registry,
220        "hostlib_ast_extract_imports",
221        "extract_imports",
222        imports::run,
223    );
224    register(
225        registry,
226        "hostlib_ast_symbol_extract",
227        "symbol_extract",
228        mutation::run_extract,
229    );
230    register(
231        registry,
232        "hostlib_ast_symbol_delete",
233        "symbol_delete",
234        mutation::run_delete,
235    );
236    register(
237        registry,
238        "hostlib_ast_symbol_replace",
239        "symbol_replace",
240        mutation::run_replace,
241    );
242    register(
243        registry,
244        "hostlib_ast_bracket_balance",
245        "bracket_balance",
246        bracket_balance::run,
247    );
248    // These two write edited source back to disk, so they share the
249    // deterministic-tools gate with `tools::*` file I/O.
250    register_gated(
251        registry,
252        "hostlib_ast_apply_node",
253        "apply_node",
254        apply_node::run,
255    );
256    register_gated(
257        registry,
258        "hostlib_ast_insert_at_anchor",
259        "insert_at_anchor",
260        insert_at_anchor::run,
261    );
262    register_dry_run(registry, code_index);
263    register(
264        registry,
265        "hostlib_ast_structural_diff",
266        "structural_diff",
267        structural_diff::run,
268    );
269    register(
270        registry,
271        "hostlib_ast_capabilities",
272        "capabilities",
273        capabilities::run,
274    );
275}
276
277fn register(
278    registry: &mut BuiltinRegistry,
279    name: &'static str,
280    method: &'static str,
281    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
282) {
283    let handler: SyncHandler = Arc::new(runner);
284    registry.register(RegisteredBuiltin {
285        name,
286        module: "ast",
287        method,
288        handler,
289    });
290}
291
292fn register_dry_run(registry: &mut BuiltinRegistry, code_index: Option<SharedIndex>) {
293    match code_index {
294        Some(index) => {
295            let handler: SyncHandler =
296                Arc::new(move |args| dry_run::run_with_code_index(Some(&index), args));
297            registry.register(RegisteredBuiltin {
298                name: "hostlib_ast_dry_run",
299                module: "ast",
300                method: "dry_run",
301                handler,
302            });
303        }
304        None => register(registry, "hostlib_ast_dry_run", "dry_run", dry_run::run),
305    }
306}
307
308fn register_gated(
309    registry: &mut BuiltinRegistry,
310    name: &'static str,
311    method: &'static str,
312    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
313) {
314    registry.register(RegisteredBuiltin {
315        name,
316        module: "ast",
317        method,
318        handler: crate::tools::permissions::gated_handler(name, runner),
319    });
320}