Skip to main content

harn_hostlib/ast/
mod.rs

1//! AST host capability.
2//!
3//! Wraps tree-sitter parsing, symbol extraction, and outline generation.
4//! The implementation is fully wired so AST builtins share one canonical
5//! wire format.
6//!
7//! ## Wire format
8//!
9//! - Row/column coordinates are **0-based** across all three builtins,
10//!   matching tree-sitter's native `Point` representation. `parse_file`,
11//!   `symbols`, and `outline` share one convention.
12//! - `parse_file` emits a flat node list with `parent_id` rather than
13//!   nested children — keeps the wire JSON-serializable without inflating
14//!   it with object copies.
15//! - `symbols` and `outline` carry a `signature` string (e.g.
16//!   `"fn foo(bar: i32)"`) on every entry.
17//!
18//! ## Languages
19//!
20//! [`language::Language`] covers the general-purpose languages
21//! (Harn, TypeScript/TSX, JavaScript/JSX, Python, Go, Rust, Java, C, C++,
22//! C#, Ruby, Kotlin, PHP, Scala, Bash, Swift, Zig, Elixir, Lua, Haskell, R)
23//! plus data/markup/config grammars (JSON, YAML, TOML, CSS, HTML, SQL,
24//! Markdown). The latter support the query-driven edit primitives but
25//! carry no symbol-graph projection — see
26//! [`language::Language::edit_capabilities`] for the per-language matrix.
27//! Adding/dropping languages requires coordinated schema, fixture, and
28//! host-bridge updates.
29//!
30
31use std::sync::Arc;
32
33use crate::code_index::SharedIndex;
34use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
35
36mod apply_node;
37mod batch_apply;
38mod bracket_balance;
39mod capabilities;
40mod dry_run;
41mod edit_common;
42mod function_body;
43mod fuzzy;
44mod imports;
45mod insert_at_anchor;
46mod language;
47mod mutation;
48mod outline;
49mod parse;
50mod parse_errors;
51mod search;
52mod structural_diff;
53mod symbols;
54mod symbols_call;
55mod types;
56mod undefined_names;
57mod unified_diff;
58
59pub use language::{EditCapabilities, Language, TEXT_PATCH_FALLBACK};
60pub use types::{OutlineItem, ParseError, ParsedNode, Symbol, SymbolKind, UndefinedName};
61
62/// Programmatic entry point to the AST builtins. Embedders typically go
63/// through the registered builtins, but tests and tools that want
64/// strongly-typed access can use these helpers directly.
65pub mod api {
66    use std::path::Path;
67
68    use tree_sitter::Tree;
69
70    use crate::error::HostlibError;
71
72    use super::language::Language;
73    use super::outline::build_outline;
74    use super::parse::{parse_source, read_source};
75    use super::symbols::extract;
76    use super::types::{OutlineItem, Symbol};
77
78    /// Parse `path` (with optional language hint) and return its symbols.
79    pub fn symbols(
80        path: &Path,
81        language_hint: Option<&str>,
82    ) -> Result<(Language, Vec<Symbol>), HostlibError> {
83        let language = detect(path, language_hint)?;
84        let source = read_source(&path.to_string_lossy(), 0)?;
85        let tree = parse_source(&source, language)?;
86        Ok((language, extract(&tree, &source, language)))
87    }
88
89    /// Parse `path` and return a hierarchical outline.
90    pub fn outline(
91        path: &Path,
92        language_hint: Option<&str>,
93    ) -> Result<(Language, Vec<OutlineItem>), HostlibError> {
94        let (language, symbols) = symbols(path, language_hint)?;
95        Ok((language, build_outline(symbols)))
96    }
97
98    /// Parse a source `str` for `language` and return its symbols. Useful
99    /// for unit tests where the input lives in-memory rather than on disk.
100    pub fn symbols_from_source(
101        source: &str,
102        language: Language,
103    ) -> Result<Vec<Symbol>, HostlibError> {
104        let tree = parse_source(source, language)?;
105        Ok(extract(&tree, source, language))
106    }
107
108    /// Parse a source `str` for `language` and return the raw tree-sitter
109    /// tree. Used by the typed symbol graph in
110    /// [`crate::code_index::symbol_graph`] to sweep for call sites
111    /// without re-doing the work the AST symbol extractor already did.
112    pub fn parse_tree(source: &str, language: Language) -> Result<Tree, HostlibError> {
113        parse_source(source, language)
114    }
115
116    /// Parse `source` once, then return the tree plus the symbol list
117    /// extracted from it. Lets a caller (e.g. the typed symbol graph)
118    /// avoid paying the parse cost twice when it needs both products.
119    pub fn parse_with_symbols(
120        source: &str,
121        language: Language,
122    ) -> Result<(Tree, Vec<Symbol>), HostlibError> {
123        let tree = parse_source(source, language)?;
124        let symbols = extract(&tree, source, language);
125        Ok((tree, symbols))
126    }
127
128    fn detect(path: &Path, language_hint: Option<&str>) -> Result<Language, HostlibError> {
129        Language::detect(path, language_hint).ok_or_else(|| HostlibError::InvalidParameter {
130            builtin: "ast::api",
131            param: "language",
132            message: format!(
133                "could not infer a tree-sitter grammar for `{}` \
134                 (extension or `language` field unrecognized)",
135                path.display()
136            ),
137        })
138    }
139}
140
141/// AST capability handle. Stateless; tree-sitter parsers are constructed
142/// per-call (cheap relative to grammar lookup) so the capability itself
143/// has nothing to own.
144#[derive(Default)]
145pub struct AstCapability;
146
147/// AST capability registered with access to the shared code-index state.
148///
149/// Most AST builtins are stateless, but `ast.dry_run` can preview
150/// `rename_symbol` plan ops only when it can delegate to the typed
151/// symbol graph owned by `code_index`.
152pub struct AstCapabilityWithCodeIndex {
153    code_index: SharedIndex,
154}
155
156impl AstCapabilityWithCodeIndex {
157    /// Build an AST capability that can delegate dry-run rename previews
158    /// to the supplied code-index state.
159    pub fn new(code_index: SharedIndex) -> Self {
160        Self { code_index }
161    }
162}
163
164impl HostlibCapability for AstCapability {
165    fn module_name(&self) -> &'static str {
166        "ast"
167    }
168
169    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
170        register_ast_builtins(registry, None);
171    }
172}
173
174impl HostlibCapability for AstCapabilityWithCodeIndex {
175    fn module_name(&self) -> &'static str {
176        "ast"
177    }
178
179    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
180        register_ast_builtins(registry, Some(self.code_index.clone()));
181    }
182}
183
184fn register_ast_builtins(registry: &mut BuiltinRegistry, code_index: Option<SharedIndex>) {
185    registry.register_fn("ast", "hostlib_ast_parse_file", "parse_file", parse::run);
186    registry.register_fn("ast", "hostlib_ast_symbols", "symbols", symbols_call::run);
187    registry.register_fn("ast", "hostlib_ast_outline", "outline", outline::run);
188    registry.register_fn(
189        "ast",
190        "hostlib_ast_parse_errors",
191        "parse_errors",
192        parse_errors::run,
193    );
194    registry.register_fn(
195        "ast",
196        "hostlib_ast_undefined_names",
197        "undefined_names",
198        undefined_names::run,
199    );
200    registry.register_fn(
201        "ast",
202        "hostlib_ast_function_body",
203        "function_body",
204        function_body::run_single,
205    );
206    registry.register_fn(
207        "ast",
208        "hostlib_ast_function_bodies",
209        "function_bodies",
210        function_body::run_bulk,
211    );
212    registry.register_fn(
213        "ast",
214        "hostlib_ast_extract_imports",
215        "extract_imports",
216        imports::run,
217    );
218    registry.register_fn(
219        "ast",
220        "hostlib_ast_symbol_extract",
221        "symbol_extract",
222        mutation::run_extract,
223    );
224    registry.register_fn(
225        "ast",
226        "hostlib_ast_symbol_delete",
227        "symbol_delete",
228        mutation::run_delete,
229    );
230    registry.register_fn(
231        "ast",
232        "hostlib_ast_symbol_replace",
233        "symbol_replace",
234        mutation::run_replace,
235    );
236    registry.register_fn(
237        "ast",
238        "hostlib_ast_bracket_balance",
239        "bracket_balance",
240        bracket_balance::run,
241    );
242    // These two write edited source back to disk, so they share the
243    // deterministic-tools gate with `tools::*` file I/O.
244    registry.register_gated_fn(
245        "ast",
246        "hostlib_ast_apply_node",
247        "apply_node",
248        apply_node::run,
249    );
250    registry.register_gated_fn(
251        "ast",
252        "hostlib_ast_insert_at_anchor",
253        "insert_at_anchor",
254        insert_at_anchor::run,
255    );
256    // Multi-file codemod runner. Writes when `dry_run: false`, so it shares
257    // the deterministic-tools write gate with the other mutating builtins.
258    registry.register_gated_fn(
259        "ast",
260        "hostlib_ast_batch_apply",
261        "batch_apply",
262        batch_apply::run,
263    );
264    register_dry_run(registry, code_index);
265    // Read-only structural search: shares the query machinery with
266    // `apply_node` but never writes, so it carries no deterministic-tools
267    // gate.
268    registry.register_fn("ast", "hostlib_ast_search", "search", search::run);
269    registry.register_fn(
270        "ast",
271        "hostlib_ast_structural_diff",
272        "structural_diff",
273        structural_diff::run,
274    );
275    registry.register_fn(
276        "ast",
277        "hostlib_ast_capabilities",
278        "capabilities",
279        capabilities::run,
280    );
281}
282
283fn register_dry_run(registry: &mut BuiltinRegistry, code_index: Option<SharedIndex>) {
284    match code_index {
285        Some(index) => {
286            let handler: SyncHandler =
287                Arc::new(move |args| dry_run::run_with_code_index(Some(&index), args));
288            registry.register(RegisteredBuiltin {
289                name: "hostlib_ast_dry_run",
290                module: "ast",
291                method: "dry_run",
292                handler,
293            });
294        }
295        None => registry.register_fn("ast", "hostlib_ast_dry_run", "dry_run", dry_run::run),
296    }
297}