Skip to main content

harn_hostlib/ast/
mod.rs

1//! AST host capability.
2//!
3//! Wraps tree-sitter parsing, symbol extraction, and outline generation.
4//! The implementation is fully wired so AST builtins share one canonical
5//! wire format.
6//!
7//! ## Wire format
8//!
9//! - Row/column coordinates are **0-based** across all three builtins,
10//!   matching tree-sitter's native `Point` representation. `parse_file`,
11//!   `symbols`, and `outline` share one convention.
12//! - `parse_file` emits a flat node list with `parent_id` rather than
13//!   nested children — keeps the wire JSON-serializable without inflating
14//!   it with object copies.
15//! - `symbols` and `outline` carry a `signature` string (e.g.
16//!   `"fn foo(bar: i32)"`) on every entry.
17//!
18//! ## Languages
19//!
20//! [`language::Language`] covers TypeScript/TSX, JavaScript/JSX, Python,
21//! Go, Rust, Java, C, C++, C#, Ruby, Kotlin, PHP, Scala, Bash, Swift, Zig,
22//! Elixir, Lua, Haskell, and R. Adding/dropping languages requires
23//! coordinated schema, fixture, and host-bridge updates.
24//!
25
26use std::sync::Arc;
27
28use harn_vm::VmValue;
29
30use crate::error::HostlibError;
31use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
32
33mod bracket_balance;
34mod function_body;
35mod fuzzy;
36mod imports;
37mod language;
38mod mutation;
39mod outline;
40mod parse;
41mod parse_errors;
42mod symbols;
43mod symbols_call;
44mod types;
45mod undefined_names;
46
47pub use language::Language;
48pub use types::{OutlineItem, ParseError, ParsedNode, Symbol, SymbolKind, UndefinedName};
49
50/// Programmatic entry point to the AST builtins. Embedders typically go
51/// through the registered builtins, but tests and tools that want
52/// strongly-typed access can use these helpers directly.
53pub mod api {
54    use std::path::Path;
55
56    use tree_sitter::Tree;
57
58    use crate::error::HostlibError;
59
60    use super::language::Language;
61    use super::outline::build_outline;
62    use super::parse::{parse_source, read_source};
63    use super::symbols::extract;
64    use super::types::{OutlineItem, Symbol};
65
66    /// Parse `path` (with optional language hint) and return its symbols.
67    pub fn symbols(
68        path: &Path,
69        language_hint: Option<&str>,
70    ) -> Result<(Language, Vec<Symbol>), HostlibError> {
71        let language = detect(path, language_hint)?;
72        let source = read_source(&path.to_string_lossy(), 0)?;
73        let tree = parse_source(&source, language)?;
74        Ok((language, extract(&tree, &source, language)))
75    }
76
77    /// Parse `path` and return a hierarchical outline.
78    pub fn outline(
79        path: &Path,
80        language_hint: Option<&str>,
81    ) -> Result<(Language, Vec<OutlineItem>), HostlibError> {
82        let (language, symbols) = symbols(path, language_hint)?;
83        Ok((language, build_outline(symbols)))
84    }
85
86    /// Parse a source `str` for `language` and return its symbols. Useful
87    /// for unit tests where the input lives in-memory rather than on disk.
88    pub fn symbols_from_source(
89        source: &str,
90        language: Language,
91    ) -> Result<Vec<Symbol>, HostlibError> {
92        let tree = parse_source(source, language)?;
93        Ok(extract(&tree, source, language))
94    }
95
96    /// Parse a source `str` for `language` and return the raw tree-sitter
97    /// tree. Used by the typed symbol graph in
98    /// [`crate::code_index::symbol_graph`] to sweep for call sites
99    /// without re-doing the work the AST symbol extractor already did.
100    pub fn parse_tree(source: &str, language: Language) -> Result<Tree, HostlibError> {
101        parse_source(source, language)
102    }
103
104    /// Parse `source` once, then return the tree plus the symbol list
105    /// extracted from it. Lets a caller (e.g. the typed symbol graph)
106    /// avoid paying the parse cost twice when it needs both products.
107    pub fn parse_with_symbols(
108        source: &str,
109        language: Language,
110    ) -> Result<(Tree, Vec<Symbol>), HostlibError> {
111        let tree = parse_source(source, language)?;
112        let symbols = extract(&tree, source, language);
113        Ok((tree, symbols))
114    }
115
116    fn detect(path: &Path, language_hint: Option<&str>) -> Result<Language, HostlibError> {
117        Language::detect(path, language_hint).ok_or_else(|| HostlibError::InvalidParameter {
118            builtin: "ast::api",
119            param: "language",
120            message: format!(
121                "could not infer a tree-sitter grammar for `{}` \
122                 (extension or `language` field unrecognized)",
123                path.display()
124            ),
125        })
126    }
127}
128
129/// AST capability handle. Stateless; tree-sitter parsers are constructed
130/// per-call (cheap relative to grammar lookup) so the capability itself
131/// has nothing to own.
132#[derive(Default)]
133pub struct AstCapability;
134
135impl HostlibCapability for AstCapability {
136    fn module_name(&self) -> &'static str {
137        "ast"
138    }
139
140    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
141        register(registry, "hostlib_ast_parse_file", "parse_file", parse::run);
142        register(
143            registry,
144            "hostlib_ast_symbols",
145            "symbols",
146            symbols_call::run,
147        );
148        register(registry, "hostlib_ast_outline", "outline", outline::run);
149        register(
150            registry,
151            "hostlib_ast_parse_errors",
152            "parse_errors",
153            parse_errors::run,
154        );
155        register(
156            registry,
157            "hostlib_ast_undefined_names",
158            "undefined_names",
159            undefined_names::run,
160        );
161        register(
162            registry,
163            "hostlib_ast_function_body",
164            "function_body",
165            function_body::run_single,
166        );
167        register(
168            registry,
169            "hostlib_ast_function_bodies",
170            "function_bodies",
171            function_body::run_bulk,
172        );
173        register(
174            registry,
175            "hostlib_ast_extract_imports",
176            "extract_imports",
177            imports::run,
178        );
179        register(
180            registry,
181            "hostlib_ast_symbol_extract",
182            "symbol_extract",
183            mutation::run_extract,
184        );
185        register(
186            registry,
187            "hostlib_ast_symbol_delete",
188            "symbol_delete",
189            mutation::run_delete,
190        );
191        register(
192            registry,
193            "hostlib_ast_symbol_replace",
194            "symbol_replace",
195            mutation::run_replace,
196        );
197        register(
198            registry,
199            "hostlib_ast_bracket_balance",
200            "bracket_balance",
201            bracket_balance::run,
202        );
203    }
204}
205
206fn register(
207    registry: &mut BuiltinRegistry,
208    name: &'static str,
209    method: &'static str,
210    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
211) {
212    let handler: SyncHandler = Arc::new(runner);
213    registry.register(RegisteredBuiltin {
214        name,
215        module: "ast",
216        method,
217        handler,
218    });
219}