Skip to main content

harn_hostlib/ast/
mod.rs

1//! AST host capability.
2//!
3//! Wraps tree-sitter parsing, symbol extraction, and outline generation.
4//! The implementation is fully wired so AST builtins share one canonical
5//! wire format.
6//!
7//! ## Wire format
8//!
9//! - Row/column coordinates are **0-based** across all three builtins,
10//!   matching tree-sitter's native `Point` representation. `parse_file`,
11//!   `symbols`, and `outline` share one convention.
12//! - `parse_file` emits a flat node list with `parent_id` rather than
13//!   nested children — keeps the wire JSON-serializable without inflating
14//!   it with object copies.
15//! - `symbols` and `outline` carry a `signature` string (e.g.
16//!   `"fn foo(bar: i32)"`) on every entry.
17//!
18//! ## Languages
19//!
20//! [`language::Language`] covers TypeScript/TSX, JavaScript/JSX, Python,
21//! Go, Rust, Java, C, C++, C#, Ruby, Kotlin, PHP, Scala, Bash, Swift, Zig,
22//! Elixir, Lua, Haskell, and R. Adding/dropping languages requires
23//! coordinated schema, fixture, and host-bridge updates.
24//!
25
26use std::sync::Arc;
27
28use harn_vm::VmValue;
29
30use crate::error::HostlibError;
31use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
32
33mod apply_node;
34mod bracket_balance;
35mod function_body;
36mod fuzzy;
37mod imports;
38mod language;
39mod mutation;
40mod outline;
41mod parse;
42mod parse_errors;
43mod symbols;
44mod symbols_call;
45mod types;
46mod undefined_names;
47
48pub use language::Language;
49pub use types::{OutlineItem, ParseError, ParsedNode, Symbol, SymbolKind, UndefinedName};
50
51/// Programmatic entry point to the AST builtins. Embedders typically go
52/// through the registered builtins, but tests and tools that want
53/// strongly-typed access can use these helpers directly.
54pub mod api {
55    use std::path::Path;
56
57    use tree_sitter::Tree;
58
59    use crate::error::HostlibError;
60
61    use super::language::Language;
62    use super::outline::build_outline;
63    use super::parse::{parse_source, read_source};
64    use super::symbols::extract;
65    use super::types::{OutlineItem, Symbol};
66
67    /// Parse `path` (with optional language hint) and return its symbols.
68    pub fn symbols(
69        path: &Path,
70        language_hint: Option<&str>,
71    ) -> Result<(Language, Vec<Symbol>), HostlibError> {
72        let language = detect(path, language_hint)?;
73        let source = read_source(&path.to_string_lossy(), 0)?;
74        let tree = parse_source(&source, language)?;
75        Ok((language, extract(&tree, &source, language)))
76    }
77
78    /// Parse `path` and return a hierarchical outline.
79    pub fn outline(
80        path: &Path,
81        language_hint: Option<&str>,
82    ) -> Result<(Language, Vec<OutlineItem>), HostlibError> {
83        let (language, symbols) = symbols(path, language_hint)?;
84        Ok((language, build_outline(symbols)))
85    }
86
87    /// Parse a source `str` for `language` and return its symbols. Useful
88    /// for unit tests where the input lives in-memory rather than on disk.
89    pub fn symbols_from_source(
90        source: &str,
91        language: Language,
92    ) -> Result<Vec<Symbol>, HostlibError> {
93        let tree = parse_source(source, language)?;
94        Ok(extract(&tree, source, language))
95    }
96
97    /// Parse a source `str` for `language` and return the raw tree-sitter
98    /// tree. Used by the typed symbol graph in
99    /// [`crate::code_index::symbol_graph`] to sweep for call sites
100    /// without re-doing the work the AST symbol extractor already did.
101    pub fn parse_tree(source: &str, language: Language) -> Result<Tree, HostlibError> {
102        parse_source(source, language)
103    }
104
105    /// Parse `source` once, then return the tree plus the symbol list
106    /// extracted from it. Lets a caller (e.g. the typed symbol graph)
107    /// avoid paying the parse cost twice when it needs both products.
108    pub fn parse_with_symbols(
109        source: &str,
110        language: Language,
111    ) -> Result<(Tree, Vec<Symbol>), HostlibError> {
112        let tree = parse_source(source, language)?;
113        let symbols = extract(&tree, source, language);
114        Ok((tree, symbols))
115    }
116
117    fn detect(path: &Path, language_hint: Option<&str>) -> Result<Language, HostlibError> {
118        Language::detect(path, language_hint).ok_or_else(|| HostlibError::InvalidParameter {
119            builtin: "ast::api",
120            param: "language",
121            message: format!(
122                "could not infer a tree-sitter grammar for `{}` \
123                 (extension or `language` field unrecognized)",
124                path.display()
125            ),
126        })
127    }
128}
129
130/// AST capability handle. Stateless; tree-sitter parsers are constructed
131/// per-call (cheap relative to grammar lookup) so the capability itself
132/// has nothing to own.
133#[derive(Default)]
134pub struct AstCapability;
135
136impl HostlibCapability for AstCapability {
137    fn module_name(&self) -> &'static str {
138        "ast"
139    }
140
141    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
142        register(registry, "hostlib_ast_parse_file", "parse_file", parse::run);
143        register(
144            registry,
145            "hostlib_ast_symbols",
146            "symbols",
147            symbols_call::run,
148        );
149        register(registry, "hostlib_ast_outline", "outline", outline::run);
150        register(
151            registry,
152            "hostlib_ast_parse_errors",
153            "parse_errors",
154            parse_errors::run,
155        );
156        register(
157            registry,
158            "hostlib_ast_undefined_names",
159            "undefined_names",
160            undefined_names::run,
161        );
162        register(
163            registry,
164            "hostlib_ast_function_body",
165            "function_body",
166            function_body::run_single,
167        );
168        register(
169            registry,
170            "hostlib_ast_function_bodies",
171            "function_bodies",
172            function_body::run_bulk,
173        );
174        register(
175            registry,
176            "hostlib_ast_extract_imports",
177            "extract_imports",
178            imports::run,
179        );
180        register(
181            registry,
182            "hostlib_ast_symbol_extract",
183            "symbol_extract",
184            mutation::run_extract,
185        );
186        register(
187            registry,
188            "hostlib_ast_symbol_delete",
189            "symbol_delete",
190            mutation::run_delete,
191        );
192        register(
193            registry,
194            "hostlib_ast_symbol_replace",
195            "symbol_replace",
196            mutation::run_replace,
197        );
198        register(
199            registry,
200            "hostlib_ast_bracket_balance",
201            "bracket_balance",
202            bracket_balance::run,
203        );
204        register(
205            registry,
206            "hostlib_ast_apply_node",
207            "apply_node",
208            apply_node::run,
209        );
210    }
211}
212
213fn register(
214    registry: &mut BuiltinRegistry,
215    name: &'static str,
216    method: &'static str,
217    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
218) {
219    let handler: SyncHandler = Arc::new(runner);
220    registry.register(RegisteredBuiltin {
221        name,
222        module: "ast",
223        method,
224        handler,
225    });
226}