Skip to main content

harn_hostlib/ast/
mod.rs

1//! AST host capability.
2//!
3//! Wraps tree-sitter parsing, symbol extraction, and outline generation.
4//! The implementation is fully wired so AST builtins share one canonical
5//! wire format.
6//!
7//! ## Wire format
8//!
9//! - Row/column coordinates are **0-based** across all three builtins,
10//!   matching tree-sitter's native `Point` representation. `parse_file`,
11//!   `symbols`, and `outline` share one convention.
12//! - `parse_file` emits a flat node list with `parent_id` rather than
13//!   nested children — keeps the wire JSON-serializable without inflating
14//!   it with object copies.
15//! - `symbols` and `outline` carry a `signature` string (e.g.
16//!   `"fn foo(bar: i32)"`) on every entry.
17//!
18//! ## Languages
19//!
20//! [`language::Language`] covers the general-purpose languages
21//! (TypeScript/TSX, JavaScript/JSX, Python, Go, Rust, Java, C, C++, C#,
22//! Ruby, Kotlin, PHP, Scala, Bash, Swift, Zig, Elixir, Lua, Haskell, R)
23//! plus data/markup/config grammars (JSON, YAML, TOML, CSS, HTML, SQL,
24//! Markdown). The latter support the query-driven edit primitives but
25//! carry no symbol-graph projection — see
26//! [`language::Language::edit_capabilities`] for the per-language matrix.
27//! Adding/dropping languages requires coordinated schema, fixture, and
28//! host-bridge updates.
29//!
30
31use std::sync::Arc;
32
33use harn_vm::VmValue;
34
35use crate::error::HostlibError;
36use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
37
38mod apply_node;
39mod bracket_balance;
40mod capabilities;
41mod dry_run;
42mod edit_common;
43mod function_body;
44mod fuzzy;
45mod imports;
46mod insert_at_anchor;
47mod language;
48mod mutation;
49mod outline;
50mod parse;
51mod parse_errors;
52mod structural_diff;
53mod symbols;
54mod symbols_call;
55mod types;
56mod undefined_names;
57mod unified_diff;
58
59pub use language::{EditCapabilities, Language, TEXT_PATCH_FALLBACK};
60pub use types::{OutlineItem, ParseError, ParsedNode, Symbol, SymbolKind, UndefinedName};
61
62/// Programmatic entry point to the AST builtins. Embedders typically go
63/// through the registered builtins, but tests and tools that want
64/// strongly-typed access can use these helpers directly.
65pub mod api {
66    use std::path::Path;
67
68    use tree_sitter::Tree;
69
70    use crate::error::HostlibError;
71
72    use super::language::Language;
73    use super::outline::build_outline;
74    use super::parse::{parse_source, read_source};
75    use super::symbols::extract;
76    use super::types::{OutlineItem, Symbol};
77
78    /// Parse `path` (with optional language hint) and return its symbols.
79    pub fn symbols(
80        path: &Path,
81        language_hint: Option<&str>,
82    ) -> Result<(Language, Vec<Symbol>), HostlibError> {
83        let language = detect(path, language_hint)?;
84        let source = read_source(&path.to_string_lossy(), 0)?;
85        let tree = parse_source(&source, language)?;
86        Ok((language, extract(&tree, &source, language)))
87    }
88
89    /// Parse `path` and return a hierarchical outline.
90    pub fn outline(
91        path: &Path,
92        language_hint: Option<&str>,
93    ) -> Result<(Language, Vec<OutlineItem>), HostlibError> {
94        let (language, symbols) = symbols(path, language_hint)?;
95        Ok((language, build_outline(symbols)))
96    }
97
98    /// Parse a source `str` for `language` and return its symbols. Useful
99    /// for unit tests where the input lives in-memory rather than on disk.
100    pub fn symbols_from_source(
101        source: &str,
102        language: Language,
103    ) -> Result<Vec<Symbol>, HostlibError> {
104        let tree = parse_source(source, language)?;
105        Ok(extract(&tree, source, language))
106    }
107
108    /// Parse a source `str` for `language` and return the raw tree-sitter
109    /// tree. Used by the typed symbol graph in
110    /// [`crate::code_index::symbol_graph`] to sweep for call sites
111    /// without re-doing the work the AST symbol extractor already did.
112    pub fn parse_tree(source: &str, language: Language) -> Result<Tree, HostlibError> {
113        parse_source(source, language)
114    }
115
116    /// Parse `source` once, then return the tree plus the symbol list
117    /// extracted from it. Lets a caller (e.g. the typed symbol graph)
118    /// avoid paying the parse cost twice when it needs both products.
119    pub fn parse_with_symbols(
120        source: &str,
121        language: Language,
122    ) -> Result<(Tree, Vec<Symbol>), HostlibError> {
123        let tree = parse_source(source, language)?;
124        let symbols = extract(&tree, source, language);
125        Ok((tree, symbols))
126    }
127
128    fn detect(path: &Path, language_hint: Option<&str>) -> Result<Language, HostlibError> {
129        Language::detect(path, language_hint).ok_or_else(|| HostlibError::InvalidParameter {
130            builtin: "ast::api",
131            param: "language",
132            message: format!(
133                "could not infer a tree-sitter grammar for `{}` \
134                 (extension or `language` field unrecognized)",
135                path.display()
136            ),
137        })
138    }
139}
140
141/// AST capability handle. Stateless; tree-sitter parsers are constructed
142/// per-call (cheap relative to grammar lookup) so the capability itself
143/// has nothing to own.
144#[derive(Default)]
145pub struct AstCapability;
146
147impl HostlibCapability for AstCapability {
148    fn module_name(&self) -> &'static str {
149        "ast"
150    }
151
152    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
153        register(registry, "hostlib_ast_parse_file", "parse_file", parse::run);
154        register(
155            registry,
156            "hostlib_ast_symbols",
157            "symbols",
158            symbols_call::run,
159        );
160        register(registry, "hostlib_ast_outline", "outline", outline::run);
161        register(
162            registry,
163            "hostlib_ast_parse_errors",
164            "parse_errors",
165            parse_errors::run,
166        );
167        register(
168            registry,
169            "hostlib_ast_undefined_names",
170            "undefined_names",
171            undefined_names::run,
172        );
173        register(
174            registry,
175            "hostlib_ast_function_body",
176            "function_body",
177            function_body::run_single,
178        );
179        register(
180            registry,
181            "hostlib_ast_function_bodies",
182            "function_bodies",
183            function_body::run_bulk,
184        );
185        register(
186            registry,
187            "hostlib_ast_extract_imports",
188            "extract_imports",
189            imports::run,
190        );
191        register(
192            registry,
193            "hostlib_ast_symbol_extract",
194            "symbol_extract",
195            mutation::run_extract,
196        );
197        register(
198            registry,
199            "hostlib_ast_symbol_delete",
200            "symbol_delete",
201            mutation::run_delete,
202        );
203        register(
204            registry,
205            "hostlib_ast_symbol_replace",
206            "symbol_replace",
207            mutation::run_replace,
208        );
209        register(
210            registry,
211            "hostlib_ast_bracket_balance",
212            "bracket_balance",
213            bracket_balance::run,
214        );
215        // These two write edited source back to disk, so they share the
216        // deterministic-tools gate with `tools::*` file I/O.
217        register_gated(
218            registry,
219            "hostlib_ast_apply_node",
220            "apply_node",
221            apply_node::run,
222        );
223        register_gated(
224            registry,
225            "hostlib_ast_insert_at_anchor",
226            "insert_at_anchor",
227            insert_at_anchor::run,
228        );
229        register(registry, "hostlib_ast_dry_run", "dry_run", dry_run::run);
230        register(
231            registry,
232            "hostlib_ast_structural_diff",
233            "structural_diff",
234            structural_diff::run,
235        );
236        register(
237            registry,
238            "hostlib_ast_capabilities",
239            "capabilities",
240            capabilities::run,
241        );
242    }
243}
244
245fn register(
246    registry: &mut BuiltinRegistry,
247    name: &'static str,
248    method: &'static str,
249    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
250) {
251    let handler: SyncHandler = Arc::new(runner);
252    registry.register(RegisteredBuiltin {
253        name,
254        module: "ast",
255        method,
256        handler,
257    });
258}
259
260fn register_gated(
261    registry: &mut BuiltinRegistry,
262    name: &'static str,
263    method: &'static str,
264    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
265) {
266    registry.register(RegisteredBuiltin {
267        name,
268        module: "ast",
269        method,
270        handler: crate::tools::permissions::gated_handler(name, runner),
271    });
272}