Skip to main content

harn_hostlib/ast/
mod.rs

1//! AST host capability.
2//!
3//! Wraps tree-sitter parsing, symbol extraction, and outline generation.
4//! The implementation is fully wired so AST builtins share one canonical
5//! wire format.
6//!
7//! ## Wire format
8//!
9//! - Row/column coordinates are **0-based** across all three builtins,
10//!   matching tree-sitter's native `Point` representation. `parse_file`,
11//!   `symbols`, and `outline` share one convention.
12//! - `parse_file` emits a flat node list with `parent_id` rather than
13//!   nested children — keeps the wire JSON-serializable without inflating
14//!   it with object copies.
15//! - `symbols` and `outline` carry a `signature` string (e.g.
16//!   `"fn foo(bar: i32)"`) on every entry.
17//!
18//! ## Languages
19//!
20//! [`language::Language`] covers the general-purpose languages
21//! (TypeScript/TSX, JavaScript/JSX, Python, Go, Rust, Java, C, C++, C#,
22//! Ruby, Kotlin, PHP, Scala, Bash, Swift, Zig, Elixir, Lua, Haskell, R)
23//! plus data/markup/config grammars (JSON, YAML, TOML, CSS, HTML, SQL,
24//! Markdown). The latter support the query-driven edit primitives but
25//! carry no symbol-graph projection — see
26//! [`language::Language::edit_capabilities`] for the per-language matrix.
27//! Adding/dropping languages requires coordinated schema, fixture, and
28//! host-bridge updates.
29//!
30
31use std::sync::Arc;
32
33use harn_vm::VmValue;
34
35use crate::error::HostlibError;
36use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
37
38mod apply_node;
39mod bracket_balance;
40mod capabilities;
41mod dry_run;
42mod edit_common;
43mod function_body;
44mod fuzzy;
45mod imports;
46mod insert_at_anchor;
47mod language;
48mod mutation;
49mod outline;
50mod parse;
51mod parse_errors;
52mod symbols;
53mod symbols_call;
54mod types;
55mod undefined_names;
56mod unified_diff;
57
58pub use language::{EditCapabilities, Language, TEXT_PATCH_FALLBACK};
59pub use types::{OutlineItem, ParseError, ParsedNode, Symbol, SymbolKind, UndefinedName};
60
61/// Programmatic entry point to the AST builtins. Embedders typically go
62/// through the registered builtins, but tests and tools that want
63/// strongly-typed access can use these helpers directly.
64pub mod api {
65    use std::path::Path;
66
67    use tree_sitter::Tree;
68
69    use crate::error::HostlibError;
70
71    use super::language::Language;
72    use super::outline::build_outline;
73    use super::parse::{parse_source, read_source};
74    use super::symbols::extract;
75    use super::types::{OutlineItem, Symbol};
76
77    /// Parse `path` (with optional language hint) and return its symbols.
78    pub fn symbols(
79        path: &Path,
80        language_hint: Option<&str>,
81    ) -> Result<(Language, Vec<Symbol>), HostlibError> {
82        let language = detect(path, language_hint)?;
83        let source = read_source(&path.to_string_lossy(), 0)?;
84        let tree = parse_source(&source, language)?;
85        Ok((language, extract(&tree, &source, language)))
86    }
87
88    /// Parse `path` and return a hierarchical outline.
89    pub fn outline(
90        path: &Path,
91        language_hint: Option<&str>,
92    ) -> Result<(Language, Vec<OutlineItem>), HostlibError> {
93        let (language, symbols) = symbols(path, language_hint)?;
94        Ok((language, build_outline(symbols)))
95    }
96
97    /// Parse a source `str` for `language` and return its symbols. Useful
98    /// for unit tests where the input lives in-memory rather than on disk.
99    pub fn symbols_from_source(
100        source: &str,
101        language: Language,
102    ) -> Result<Vec<Symbol>, HostlibError> {
103        let tree = parse_source(source, language)?;
104        Ok(extract(&tree, source, language))
105    }
106
107    /// Parse a source `str` for `language` and return the raw tree-sitter
108    /// tree. Used by the typed symbol graph in
109    /// [`crate::code_index::symbol_graph`] to sweep for call sites
110    /// without re-doing the work the AST symbol extractor already did.
111    pub fn parse_tree(source: &str, language: Language) -> Result<Tree, HostlibError> {
112        parse_source(source, language)
113    }
114
115    /// Parse `source` once, then return the tree plus the symbol list
116    /// extracted from it. Lets a caller (e.g. the typed symbol graph)
117    /// avoid paying the parse cost twice when it needs both products.
118    pub fn parse_with_symbols(
119        source: &str,
120        language: Language,
121    ) -> Result<(Tree, Vec<Symbol>), HostlibError> {
122        let tree = parse_source(source, language)?;
123        let symbols = extract(&tree, source, language);
124        Ok((tree, symbols))
125    }
126
127    fn detect(path: &Path, language_hint: Option<&str>) -> Result<Language, HostlibError> {
128        Language::detect(path, language_hint).ok_or_else(|| HostlibError::InvalidParameter {
129            builtin: "ast::api",
130            param: "language",
131            message: format!(
132                "could not infer a tree-sitter grammar for `{}` \
133                 (extension or `language` field unrecognized)",
134                path.display()
135            ),
136        })
137    }
138}
139
140/// AST capability handle. Stateless; tree-sitter parsers are constructed
141/// per-call (cheap relative to grammar lookup) so the capability itself
142/// has nothing to own.
143#[derive(Default)]
144pub struct AstCapability;
145
146impl HostlibCapability for AstCapability {
147    fn module_name(&self) -> &'static str {
148        "ast"
149    }
150
151    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
152        register(registry, "hostlib_ast_parse_file", "parse_file", parse::run);
153        register(
154            registry,
155            "hostlib_ast_symbols",
156            "symbols",
157            symbols_call::run,
158        );
159        register(registry, "hostlib_ast_outline", "outline", outline::run);
160        register(
161            registry,
162            "hostlib_ast_parse_errors",
163            "parse_errors",
164            parse_errors::run,
165        );
166        register(
167            registry,
168            "hostlib_ast_undefined_names",
169            "undefined_names",
170            undefined_names::run,
171        );
172        register(
173            registry,
174            "hostlib_ast_function_body",
175            "function_body",
176            function_body::run_single,
177        );
178        register(
179            registry,
180            "hostlib_ast_function_bodies",
181            "function_bodies",
182            function_body::run_bulk,
183        );
184        register(
185            registry,
186            "hostlib_ast_extract_imports",
187            "extract_imports",
188            imports::run,
189        );
190        register(
191            registry,
192            "hostlib_ast_symbol_extract",
193            "symbol_extract",
194            mutation::run_extract,
195        );
196        register(
197            registry,
198            "hostlib_ast_symbol_delete",
199            "symbol_delete",
200            mutation::run_delete,
201        );
202        register(
203            registry,
204            "hostlib_ast_symbol_replace",
205            "symbol_replace",
206            mutation::run_replace,
207        );
208        register(
209            registry,
210            "hostlib_ast_bracket_balance",
211            "bracket_balance",
212            bracket_balance::run,
213        );
214        // These two write edited source back to disk, so they share the
215        // deterministic-tools gate with `tools::*` file I/O.
216        register_gated(
217            registry,
218            "hostlib_ast_apply_node",
219            "apply_node",
220            apply_node::run,
221        );
222        register_gated(
223            registry,
224            "hostlib_ast_insert_at_anchor",
225            "insert_at_anchor",
226            insert_at_anchor::run,
227        );
228        register(registry, "hostlib_ast_dry_run", "dry_run", dry_run::run);
229        register(
230            registry,
231            "hostlib_ast_capabilities",
232            "capabilities",
233            capabilities::run,
234        );
235    }
236}
237
238fn register(
239    registry: &mut BuiltinRegistry,
240    name: &'static str,
241    method: &'static str,
242    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
243) {
244    let handler: SyncHandler = Arc::new(runner);
245    registry.register(RegisteredBuiltin {
246        name,
247        module: "ast",
248        method,
249        handler,
250    });
251}
252
253fn register_gated(
254    registry: &mut BuiltinRegistry,
255    name: &'static str,
256    method: &'static str,
257    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
258) {
259    registry.register(RegisteredBuiltin {
260        name,
261        module: "ast",
262        method,
263        handler: crate::tools::permissions::gated_handler(name, runner),
264    });
265}