use std::path::PathBuf;
use std::sync::Arc;
use harn_hostlib::{ast::AstCapability, BuiltinRegistry, HostlibCapability};
use harn_vm::VmValue;
struct Case {
fixture: &'static str,
language: &'static str,
expected_imports: &'static [&'static str],
expect_clean_parse: bool,
}
fn corpus() -> Vec<Case> {
vec![
Case {
fixture: "sample.zig",
language: "zig",
expected_imports: &[],
expect_clean_parse: true,
},
Case {
fixture: "sample.py",
language: "python",
expected_imports: &["import os", "from typing import List, Optional"],
expect_clean_parse: true,
},
Case {
fixture: "sample.rs",
language: "rust",
expected_imports: &["use std::collections::HashMap;"],
expect_clean_parse: true,
},
Case {
fixture: "sample.go",
language: "go",
expected_imports: &["import (\n\t\"fmt\"\n\t\"os\"\n)"],
expect_clean_parse: true,
},
Case {
fixture: "sample.ts",
language: "typescript",
expected_imports: &[
"import { readFile } from 'fs';",
"import path from \"path\";",
],
expect_clean_parse: true,
},
Case {
fixture: "sample.c",
language: "c",
expected_imports: &["#include <stdio.h>", "#include \"local.h\""],
expect_clean_parse: true,
},
]
}
fn ast_registry() -> BuiltinRegistry {
let mut registry = BuiltinRegistry::new();
AstCapability.register_builtins(&mut registry);
registry
}
fn dict(pairs: &[(&str, VmValue)]) -> VmValue {
let mut map: harn_vm::value::DictMap = Default::default();
for (k, v) in pairs {
map.insert((*k).into(), v.clone());
}
VmValue::dict(map)
}
fn invoke(registry: &BuiltinRegistry, name: &str, payload: VmValue) -> VmValue {
let entry = registry
.find(name)
.unwrap_or_else(|| panic!("builtin {name} not registered"));
(entry.handler)(&[payload]).unwrap_or_else(|err| panic!("{name} failed: {err}"))
}
fn vstring(s: &str) -> VmValue {
VmValue::String(arcstr::ArcStr::from(s))
}
fn dict_field(value: &VmValue, key: &str) -> VmValue {
match value {
VmValue::Dict(d) => d
.get(key)
.cloned()
.unwrap_or_else(|| panic!("missing field `{key}` on {value:?}")),
other => panic!("expected dict, got {other:?}"),
}
}
fn list_value(value: &VmValue) -> Arc<Vec<VmValue>> {
match value {
VmValue::List(l) => l.clone(),
other => panic!("expected list, got {other:?}"),
}
}
fn string_value(value: &VmValue) -> String {
match value {
VmValue::String(s) => s.to_string(),
other => panic!("expected string, got {other:?}"),
}
}
fn bool_value(value: &VmValue) -> bool {
match value {
VmValue::Bool(b) => *b,
other => panic!("expected bool, got {other:?}"),
}
}
fn fixture_source(name: &str) -> String {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/parser_agreement")
.join(name);
std::fs::read_to_string(&path)
.unwrap_or_else(|err| panic!("read fixture {}: {err}", path.display()))
}
fn extracted_imports(registry: &BuiltinRegistry, source: &str, language: &str) -> Vec<String> {
let result = invoke(
registry,
"hostlib_ast_extract_imports",
dict(&[("source", vstring(source)), ("language", vstring(language))]),
);
assert!(
bool_value(&dict_field(&result, "supported")),
"language `{language}` must be a supported tree-sitter grammar"
);
list_value(&dict_field(&result, "statements"))
.iter()
.map(|stmt| string_value(&dict_field(stmt, "text")))
.collect()
}
fn parse_error_messages(registry: &BuiltinRegistry, source: &str, language: &str) -> Vec<String> {
let result = invoke(
registry,
"hostlib_ast_parse_errors",
dict(&[
("content", vstring(source)),
("language", vstring(language)),
]),
);
assert!(
bool_value(&dict_field(&result, "supported")),
"language `{language}` must be a supported tree-sitter grammar"
);
list_value(&dict_field(&result, "errors"))
.iter()
.map(|e| string_value(&dict_field(e, "message")))
.collect()
}
#[test]
fn bundled_parser_facts_agree_with_ground_truth() {
let registry = ast_registry();
for case in corpus() {
let source = fixture_source(case.fixture);
let imports = extracted_imports(®istry, &source, case.language);
let expected: Vec<String> = case
.expected_imports
.iter()
.map(|s| s.to_string())
.collect();
assert_eq!(
imports, expected,
"parser-agreement MISMATCH on `{}` ({}): extracted imports diverged from ground truth — \
a bundled grammar bump is shipping phantom/dropped import facts to the model",
case.fixture, case.language,
);
let errors = parse_error_messages(®istry, &source, case.language);
if case.expect_clean_parse {
assert!(
errors.is_empty(),
"parser-agreement MISMATCH on `{}` ({}): expected a CLEAN parse but the bundled \
grammar reported {} error(s): {:?} — a grammar regression is mis-lexing valid \
source into phantom parse errors",
case.fixture,
case.language,
errors.len(),
errors,
);
}
}
}
#[test]
fn zig_multiline_string_does_not_regress_to_phantom_facts() {
let registry = ast_registry();
let source = fixture_source("sample.zig");
assert!(
source.contains("\\\\SELECT"),
"the seed fixture must contain a `\\\\` multiline string"
);
let errors = parse_error_messages(®istry, &source, "zig");
assert!(
errors.is_empty(),
"REGRESSION: the bundled tree-sitter-zig grammar mis-lexed the `\\\\` multiline string \
into {} phantom parse error(s): {:?} (this is the #3010 class)",
errors.len(),
errors,
);
let imports = extracted_imports(®istry, &source, "zig");
assert!(
imports.is_empty(),
"zig `@import` builtins are not import declarations; the grammar must surface none, got {imports:?}"
);
}