Skip to main content

harn_hostlib/ast/
mod.rs

1//! AST host capability.
2//!
3//! Wraps tree-sitter parsing, symbol extraction, and outline generation —
4//! the Swift `Sources/ASTEngine/` surface ported into Rust. The implementation
5//! is fully wired so AST builtins share one canonical wire format.
6//!
7//! ## Wire format
8//!
9//! - Row/column coordinates are **0-based** across all three builtins,
10//!   matching tree-sitter's native `Point` representation. Swift's
11//!   `ASTEngine` historically returned 1-based coordinates for symbols;
12//!   we normalize on 0-based here so `parse_file`, `symbols`, and
13//!   `outline` share one convention.
14//! - `parse_file` emits a flat node list with `parent_id` rather than
15//!   nested children — keeps the wire JSON-serializable without inflating
16//!   it with object copies.
17//! - `symbols` and `outline` carry a `signature` string (e.g.
18//!   `"fn foo(bar: i32)"`) on every entry to match Swift's
19//!   `TreeSitterSymbol.signature`.
20//!
21//! ## Languages
22//!
23//! [`language::Language`] mirrors Swift's `TreeSitterLanguage` enum
24//! verbatim: TypeScript/TSX, JavaScript/JSX, Python, Go, Rust, Java,
25//! C, C++, C#, Ruby, Kotlin, PHP, Scala, Bash, Swift, Zig, Elixir, Lua,
26//! Haskell, R. Adding/dropping languages requires a coordinated change
27//! in both repos.
28//!
29
30use std::sync::Arc;
31
32use harn_vm::VmValue;
33
34use crate::error::HostlibError;
35use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
36
37mod language;
38mod outline;
39mod parse;
40mod parse_errors;
41mod symbols;
42mod symbols_call;
43mod types;
44mod undefined_names;
45
46pub use language::Language;
47pub use types::{OutlineItem, ParseError, ParsedNode, Symbol, SymbolKind, UndefinedName};
48
49/// Programmatic entry point to the AST builtins. Embedders typically go
50/// through the registered builtins, but tests and tools that want
51/// strongly-typed access can use these helpers directly.
52pub mod api {
53    use std::path::Path;
54
55    use crate::error::HostlibError;
56
57    use super::language::Language;
58    use super::outline::build_outline;
59    use super::parse::{parse_source, read_source};
60    use super::symbols::extract;
61    use super::types::{OutlineItem, Symbol};
62
63    /// Parse `path` (with optional language hint) and return its symbols.
64    pub fn symbols(
65        path: &Path,
66        language_hint: Option<&str>,
67    ) -> Result<(Language, Vec<Symbol>), HostlibError> {
68        let language = detect(path, language_hint)?;
69        let source = read_source(&path.to_string_lossy(), 0)?;
70        let tree = parse_source(&source, language)?;
71        Ok((language, extract(&tree, &source, language)))
72    }
73
74    /// Parse `path` and return a hierarchical outline.
75    pub fn outline(
76        path: &Path,
77        language_hint: Option<&str>,
78    ) -> Result<(Language, Vec<OutlineItem>), HostlibError> {
79        let (language, symbols) = symbols(path, language_hint)?;
80        Ok((language, build_outline(symbols)))
81    }
82
83    /// Parse a source `str` for `language` and return its symbols. Useful
84    /// for unit tests where the input lives in-memory rather than on disk.
85    pub fn symbols_from_source(
86        source: &str,
87        language: Language,
88    ) -> Result<Vec<Symbol>, HostlibError> {
89        let tree = parse_source(source, language)?;
90        Ok(extract(&tree, source, language))
91    }
92
93    fn detect(path: &Path, language_hint: Option<&str>) -> Result<Language, HostlibError> {
94        Language::detect(path, language_hint).ok_or_else(|| HostlibError::InvalidParameter {
95            builtin: "ast::api",
96            param: "language",
97            message: format!(
98                "could not infer a tree-sitter grammar for `{}` \
99                 (extension or `language` field unrecognized)",
100                path.display()
101            ),
102        })
103    }
104}
105
106/// AST capability handle. Stateless; tree-sitter parsers are constructed
107/// per-call (cheap relative to grammar lookup) so the capability itself
108/// has nothing to own.
109#[derive(Default)]
110pub struct AstCapability;
111
112impl HostlibCapability for AstCapability {
113    fn module_name(&self) -> &'static str {
114        "ast"
115    }
116
117    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
118        register(registry, "hostlib_ast_parse_file", "parse_file", parse::run);
119        register(
120            registry,
121            "hostlib_ast_symbols",
122            "symbols",
123            symbols_call::run,
124        );
125        register(registry, "hostlib_ast_outline", "outline", outline::run);
126        register(
127            registry,
128            "hostlib_ast_parse_errors",
129            "parse_errors",
130            parse_errors::run,
131        );
132        register(
133            registry,
134            "hostlib_ast_undefined_names",
135            "undefined_names",
136            undefined_names::run,
137        );
138    }
139}
140
141fn register(
142    registry: &mut BuiltinRegistry,
143    name: &'static str,
144    method: &'static str,
145    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
146) {
147    let handler: SyncHandler = Arc::new(runner);
148    registry.register(RegisteredBuiltin {
149        name,
150        module: "ast",
151        method,
152        handler,
153    });
154}