use std::path::PathBuf;
use std::rc::Rc;
use harn_vm::VmValue;
use tree_sitter::{Node, Parser, Tree};
use crate::error::HostlibError;
use crate::tools::args::{
build_dict, dict_arg, optional_int, optional_string, require_string, str_value,
};
use super::language::Language;
use super::types::ParsedNode;
const BUILTIN: &str = "hostlib_ast_parse_file";
pub(super) fn run(args: &[VmValue]) -> Result<VmValue, HostlibError> {
let raw = dict_arg(BUILTIN, args)?;
let dict = raw.as_ref();
let path_str = require_string(BUILTIN, dict, "path")?;
let language_hint = optional_string(BUILTIN, dict, "language")?;
let max_bytes = optional_int(BUILTIN, dict, "max_bytes", 0)?;
if max_bytes < 0 {
return Err(HostlibError::InvalidParameter {
builtin: BUILTIN,
param: "max_bytes",
message: "must be >= 0".into(),
});
}
let path = PathBuf::from(&path_str);
let language = Language::detect(&path, language_hint.as_deref()).ok_or_else(|| {
HostlibError::InvalidParameter {
builtin: BUILTIN,
param: "language",
message: format!(
"could not infer a tree-sitter grammar for `{path_str}` \
(extension or `language` field unrecognized)"
),
}
})?;
let source = read_source(&path_str, max_bytes as usize)?;
let tree = parse_source(&source, language)?;
let (root_id, nodes) = flatten(&tree);
let nodes_list: Vec<VmValue> = nodes.iter().map(ParsedNode::to_vm_value).collect();
Ok(build_dict([
("path", str_value(&path_str)),
("language", str_value(language.name())),
("root_id", VmValue::Int(root_id as i64)),
("nodes", VmValue::List(Rc::new(nodes_list))),
("had_errors", VmValue::Bool(tree.root_node().has_error())),
]))
}
pub(super) fn read_source(path: &str, max_bytes: usize) -> Result<String, HostlibError> {
let path_buf = PathBuf::from(path);
let bytes = match crate::fs::read(&path_buf, None) {
Some(result) => result,
None => std::fs::read(path),
}
.map_err(|err| HostlibError::Backend {
builtin: BUILTIN,
message: format!("read `{path}`: {err}"),
})?;
let slice = if max_bytes == 0 || bytes.len() <= max_bytes {
&bytes[..]
} else {
&bytes[..max_bytes]
};
Ok(String::from_utf8_lossy(slice).into_owned())
}
pub(super) fn parse_source(source: &str, language: Language) -> Result<Tree, HostlibError> {
let mut parser = Parser::new();
parser
.set_language(&language.ts_language())
.map_err(|err| HostlibError::Backend {
builtin: BUILTIN,
message: format!("set tree-sitter language `{}`: {err}", language.name()),
})?;
parser
.parse(source, None)
.ok_or_else(|| HostlibError::Backend {
builtin: BUILTIN,
message: format!(
"tree-sitter parse failed for language `{}` (timeout or panic)",
language.name()
),
})
}
fn flatten(tree: &Tree) -> (u32, Vec<ParsedNode>) {
let root = tree.root_node();
let mut nodes: Vec<ParsedNode> = Vec::new();
let mut queue: std::collections::VecDeque<(Node<'_>, Option<u32>)> =
std::collections::VecDeque::new();
queue.push_back((root, None));
while let Some((node, parent_id)) = queue.pop_front() {
let id = nodes.len() as u32;
let s = node.start_position();
let e = node.end_position();
nodes.push(ParsedNode {
id,
parent_id,
kind: node.kind().to_string(),
is_named: node.is_named(),
start_byte: node.start_byte() as u32,
end_byte: node.end_byte() as u32,
start_row: s.row as u32,
start_col: s.column as u32,
end_row: e.row as u32,
end_col: e.column as u32,
});
for i in 0..node.child_count() {
if let Some(child) = node.child(i as u32) {
queue.push_back((child, Some(id)));
}
}
}
(0, nodes)
}