use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::path::{Component, Path, PathBuf};
use std::process::{self, Stdio};
use regex::Regex;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::commands::run::{
install_cli_llm_mock_mode, persist_cli_llm_mock_recording, CliLlmMockMode,
};
use crate::env_guard::ScopedEnvVar;
use crate::json_envelope::{self, JsonEnvelope, JsonError};
use crate::test_runner;
use crate::{execute_with_skill_dirs, execute_with_skill_dirs_and_harness};
pub(crate) const CONFORMANCE_TEST_SCHEMA_VERSION: u32 = 1;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
#[serde(rename_all = "snake_case")]
enum ConformanceJsonOutcome {
Pass,
Fail,
XfailExpected,
XfailUnexpectedPass,
}
#[derive(Debug, Clone, Serialize)]
struct ConformanceJsonResult {
name: String,
outcome: ConformanceJsonOutcome,
duration_ms: u64,
message: Option<String>,
diagnostic_codes: Vec<String>,
}
#[derive(Debug, Clone, Default, Serialize)]
struct ConformanceJsonSummary {
pass: u64,
fail: u64,
xfail_expected: u64,
xfail_unexpected_pass: u64,
skipped: u64,
}
impl ConformanceJsonSummary {
fn record(&mut self, outcome: ConformanceJsonOutcome) {
match outcome {
ConformanceJsonOutcome::Pass => self.pass += 1,
ConformanceJsonOutcome::Fail => self.fail += 1,
ConformanceJsonOutcome::XfailExpected => self.xfail_expected += 1,
ConformanceJsonOutcome::XfailUnexpectedPass => self.xfail_unexpected_pass += 1,
}
}
fn is_success(&self) -> bool {
self.fail == 0 && self.xfail_unexpected_pass == 0
}
}
#[derive(Debug, Clone, Serialize)]
struct ConformanceJsonReport {
#[serde(rename = "snapshotKey")]
snapshot_key: String,
results: Vec<ConformanceJsonResult>,
summary: ConformanceJsonSummary,
}
fn normalize_expected_output(text: &str) -> String {
text.lines()
.map(normalize_output_line)
.collect::<Vec<_>>()
.join("\n")
}
fn normalize_actual_output(text: &str) -> String {
text.lines()
.map(normalize_output_line)
.collect::<Vec<_>>()
.join("\n")
}
fn normalize_output_line(line: &str) -> String {
if let Some(prefix) = line.strip_suffix("ms") {
if let Some((head, _millis)) = prefix.rsplit_once(": ") {
if head.starts_with("[timer] ") {
return format!("{head}: <ms>");
}
}
}
line.to_string()
}
fn logical_path(path: &Path) -> String {
path.components()
.filter_map(|component| match component {
Component::Normal(part) => Some(part.to_string_lossy().into_owned()),
Component::CurDir => None,
Component::ParentDir => Some("..".to_string()),
Component::RootDir | Component::Prefix(_) => {
Some(component.as_os_str().to_string_lossy().into_owned())
}
})
.collect::<Vec<_>>()
.join("/")
}
fn simple_diff(expected: &str, actual: &str) -> String {
let mut result = String::new();
let expected_lines: Vec<&str> = expected.lines().collect();
let actual_lines: Vec<&str> = actual.lines().collect();
let max = expected_lines.len().max(actual_lines.len());
for i in 0..max {
let exp = expected_lines.get(i).copied().unwrap_or("");
let act = actual_lines.get(i).copied().unwrap_or("");
if exp == act {
result.push_str(&format!(" {exp}\n"));
} else {
result.push_str(&format!("\x1b[31m- {exp}\x1b[0m\n"));
result.push_str(&format!("\x1b[32m+ {act}\x1b[0m\n"));
}
}
result
}
fn error_matches(actual_error: &str, expected_spec: &str) -> bool {
let lines: Vec<&str> = expected_spec.lines().collect();
if lines.len() > 1 {
return lines
.iter()
.any(|line| error_line_matches(actual_error, line.trim()));
}
error_line_matches(actual_error, expected_spec.trim())
}
fn error_line_matches(actual_error: &str, pattern: &str) -> bool {
if let Some(re_pattern) = pattern.strip_prefix("re:") {
match Regex::new(re_pattern.trim()) {
Ok(re) => re.is_match(actual_error),
Err(_) => {
eprintln!(" warning: invalid regex in .error file: {re_pattern}");
false
}
}
} else {
actual_error.contains(pattern)
}
}
fn xml_escape(s: &str) -> String {
s.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
.replace('"', """)
.replace('\'', "'")
}
fn write_junit_xml(path: &str, results: &[(String, bool, String, u64)], announce: bool) {
let total = results.len();
let failures = results.iter().filter(|r| !r.1).count();
let total_time: f64 = results.iter().map(|r| r.3 as f64 / 1000.0).sum();
let mut xml = String::new();
xml.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
xml.push_str(&format!(
"<testsuite name=\"harn\" tests=\"{total}\" failures=\"{failures}\" time=\"{total_time:.3}\">\n"
));
for (name, passed, error_msg, duration_ms) in results {
let time = *duration_ms as f64 / 1000.0;
let escaped_name = xml_escape(name);
xml.push_str(&format!(
" <testcase name=\"{escaped_name}\" time=\"{time:.3}\""
));
if *passed {
xml.push_str(" />\n");
} else {
xml.push_str(">\n");
let escaped = xml_escape(error_msg);
xml.push_str(&format!(
" <failure message=\"test failed\">{escaped}</failure>\n"
));
xml.push_str(" </testcase>\n");
}
}
xml.push_str("</testsuite>\n");
if let Err(e) = fs::write(path, &xml) {
eprintln!("Failed to write JUnit XML to {path}: {e}");
} else if announce {
println!("JUnit XML written to {path}");
}
}
fn collect_harn_files_sorted(dir: &Path) -> Vec<PathBuf> {
let mut files = Vec::new();
super::collect_harn_files(dir, &mut files);
files
}
fn conformance_llm_mock_mode(harn_file: &Path) -> CliLlmMockMode {
let fixture = harn_file.with_extension("llm-mock.jsonl");
if fixture.is_file() {
CliLlmMockMode::Replay {
fixture_path: fixture,
}
} else {
CliLlmMockMode::Off
}
}
struct TestbenchSidecarConfig {
process_tape: Option<PathBuf>,
fs_overlay: Option<PathBuf>,
expected_tape: Option<PathBuf>,
annotations: Option<PathBuf>,
harness: Option<PathBuf>,
}
impl TestbenchSidecarConfig {
fn is_empty(&self) -> bool {
self.process_tape.is_none()
&& self.fs_overlay.is_none()
&& self.expected_tape.is_none()
&& self.annotations.is_none()
}
}
fn conformance_testbench_config(harn_file: &Path) -> TestbenchSidecarConfig {
let process_tape = harn_file.with_extension("process-tape.json");
let fs_overlay = harn_file.with_extension("fs-overlay");
let expected_tape = harn_file.with_extension("testbench-tape");
let annotations = harn_file.with_extension("annotations.jsonl");
let harness = harn_file.with_extension("harness.json");
TestbenchSidecarConfig {
process_tape: process_tape.is_file().then_some(process_tape),
fs_overlay: fs_overlay.is_dir().then_some(fs_overlay),
expected_tape: expected_tape.is_file().then_some(expected_tape),
annotations: annotations.is_file().then_some(annotations),
harness: harness.is_file().then_some(harness),
}
}
#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
struct HarnessSidecar {
mode: HarnessSidecarMode,
#[serde(default)]
clock_at_unix_ms: Option<i64>,
#[serde(default)]
env: BTreeMap<String, String>,
#[serde(default)]
fs_reads: BTreeMap<String, String>,
#[serde(default)]
net_gets: BTreeMap<String, String>,
#[serde(default)]
random_u64: Vec<u64>,
#[serde(default)]
expect_calls: Vec<HarnessEventExpectation>,
#[serde(default)]
expect_deny_events: Vec<HarnessEventExpectation>,
#[serde(default)]
expect_stdio: Option<String>,
#[serde(default)]
expect_stderr: Option<String>,
#[serde(default)]
stdin_lines: Vec<String>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "snake_case")]
enum HarnessSidecarMode {
Null,
Mock,
}
#[derive(Debug, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
struct HarnessEventExpectation {
sub_handle: String,
method: String,
#[serde(default)]
args: Vec<String>,
}
impl HarnessSidecar {
fn load(path: &Path) -> Result<Self, String> {
let raw = fs::read_to_string(path)
.map_err(|error| format!("read harness sidecar {}: {error}", path.display()))?;
serde_json::from_str(&raw)
.map_err(|error| format!("parse harness sidecar {}: {error}", path.display()))
}
fn build_harness(&self) -> harn_vm::Harness {
match self.mode {
HarnessSidecarMode::Null => harn_vm::Harness::null(),
HarnessSidecarMode::Mock => {
let mut builder = harn_vm::Harness::mock();
if let Some(unix_ms) = self.clock_at_unix_ms {
builder = builder.clock_at_unix_ms(unix_ms);
}
for (key, value) in &self.env {
builder = builder.env(key.as_str(), value.as_str());
}
for (path, value) in &self.fs_reads {
builder = builder.fs_read(path.as_str(), value.as_bytes().to_vec());
}
for (url, body) in &self.net_gets {
builder = builder.net_get(url.as_str(), body.as_str());
}
for value in &self.random_u64 {
builder = builder.random_u64(*value);
}
for line in &self.stdin_lines {
builder = builder.stdin_line(line.as_str());
}
builder.build()
}
}
}
fn validate(&self, harness: &harn_vm::Harness) -> Vec<String> {
let mut errors = Vec::new();
if !self.expect_calls.is_empty() {
let actual = harness
.calls()
.into_iter()
.map(event_from_call)
.collect::<Vec<_>>();
if actual != self.expect_calls {
errors.push(format!(
"harness calls differed: expected {:?}, actual {:?}",
self.expect_calls, actual
));
}
}
if !self.expect_deny_events.is_empty() {
let actual = harness
.deny_events()
.into_iter()
.map(event_from_deny)
.collect::<Vec<_>>();
if actual != self.expect_deny_events {
errors.push(format!(
"harness deny events differed: expected {:?}, actual {:?}",
self.expect_deny_events, actual
));
}
}
if let Some(expected) = &self.expect_stdio {
let actual = harness.captured_stdio();
if &actual != expected {
errors.push(format!(
"harness captured stdio differed: expected {:?}, actual {:?}",
expected, actual
));
}
}
if let Some(expected) = &self.expect_stderr {
let actual = harness.captured_stderr();
if &actual != expected {
errors.push(format!(
"harness captured stderr differed: expected {:?}, actual {:?}",
expected, actual
));
}
}
errors
}
}
fn event_from_call(call: harn_vm::HarnessCall) -> HarnessEventExpectation {
HarnessEventExpectation {
sub_handle: harness_kind_name(call.sub_handle).to_string(),
method: call.method,
args: call.args,
}
}
fn event_from_deny(event: harn_vm::DenyEvent) -> HarnessEventExpectation {
HarnessEventExpectation {
sub_handle: harness_kind_name(event.sub_handle).to_string(),
method: event.method,
args: event.args,
}
}
fn harness_kind_name(kind: harn_vm::HarnessKind) -> &'static str {
kind.field_name().unwrap_or("root")
}
enum ConformanceExecution {
Completed(Result<String, String>),
TimedOut,
}
struct ConformanceRun {
execution: ConformanceExecution,
duration_ms: u64,
sidecar_error: Option<String>,
}
const CONFORMANCE_TESTBENCH_START_MS: i64 = 1_767_225_600_000;
async fn execute_conformance_source(
source: &str,
harn_file: &Path,
timeout_ms: u64,
llm_mock_mode: &CliLlmMockMode,
testbench: &TestbenchSidecarConfig,
cli_skill_dirs: &[PathBuf],
) -> Result<ConformanceRun, String> {
use harn_vm::testbench::{
ClockConfig, FilesystemConfig, SubprocessConfig, TapeConfig, Testbench,
};
harn_vm::reset_thread_local_state();
install_cli_llm_mock_mode(llm_mock_mode)
.map_err(|error| format!("llm mock setup error: {error}"))?;
let harness_sidecar = match testbench.harness.as_ref() {
Some(path) => Some(HarnessSidecar::load(path)?),
None => None,
};
let harness = harness_sidecar.as_ref().map(HarnessSidecar::build_harness);
let harness_for_validation = harness.clone();
let tape_temp_dir = if testbench.expected_tape.is_some() || testbench.annotations.is_some() {
Some(tempfile::tempdir().map_err(|e| format!("tempdir for tape: {e}"))?)
} else {
None
};
let tape_path = tape_temp_dir
.as_ref()
.map(|dir| dir.path().join("run.tape"));
let bench = if !testbench.is_empty() {
let clock = ClockConfig::Paused {
starting_at_ms: CONFORMANCE_TESTBENCH_START_MS,
};
let subprocess = match &testbench.process_tape {
Some(tape) => SubprocessConfig::Replay { tape: tape.clone() },
None => SubprocessConfig::Real,
};
let filesystem = match &testbench.fs_overlay {
Some(root) => FilesystemConfig::Overlay {
worktree: root.clone(),
},
None => FilesystemConfig::Real,
};
let tape_cfg = match &tape_path {
Some(path) => TapeConfig::Emit {
path: path.clone(),
argv: Vec::new(),
script_path: Some(harn_file.to_string_lossy().into_owned()),
},
None => TapeConfig::Off,
};
Some(
Testbench {
clock,
llm: harn_vm::testbench::LlmConfig::Real,
filesystem,
subprocess,
network: harn_vm::testbench::NetworkConfig::Real,
tape: tape_cfg,
}
.activate()
.map_err(|e| format!("testbench activate: {e}"))?,
)
} else {
None
};
let start = std::time::Instant::now();
let result = tokio::time::timeout(std::time::Duration::from_millis(timeout_ms), async {
match harness {
Some(harness) => {
execute_with_skill_dirs_and_harness(
source,
Some(harn_file),
cli_skill_dirs,
harness,
)
.await
}
None => execute_with_skill_dirs(source, Some(harn_file), cli_skill_dirs).await,
}
})
.await;
let duration_ms = start.elapsed().as_millis() as u64;
harn_vm::llm::clear_cli_llm_mock_mode();
if let Some(session) = bench {
session
.finalize()
.map_err(|e| format!("testbench finalize: {e}"))?;
}
let mut sidecar_errors: Vec<String> = Vec::new();
let actual_tape = match (&tape_path, &testbench.expected_tape) {
(Some(tape_path), Some(expected_path)) => {
use harn_vm::testbench::fidelity::{compare, FidelityMode};
use harn_vm::testbench::tape::EventTape;
match (EventTape::load(tape_path), EventTape::load(expected_path)) {
(Ok(actual), Ok(expected)) => {
let report = compare(&expected, &actual, FidelityMode::ByteIdentical);
if !report.is_byte_identical() {
sidecar_errors.push(format!(
"tape fidelity: {} divergence(s) vs {}",
report.divergences.len(),
expected_path.display()
));
}
Some(actual)
}
(Err(e), _) => {
sidecar_errors.push(format!("load emitted tape: {e}"));
None
}
(_, Err(e)) => {
sidecar_errors.push(format!(
"load expected tape {}: {e}",
expected_path.display()
));
None
}
}
}
(Some(tape_path), None) => {
use harn_vm::testbench::tape::EventTape;
match EventTape::load(tape_path) {
Ok(tape) => Some(tape),
Err(e) => {
sidecar_errors.push(format!("load emitted tape: {e}"));
None
}
}
}
_ => None,
};
if let (Some(annotations_path), Some(actual)) = (&testbench.annotations, actual_tape.as_ref()) {
use harn_vm::testbench::annotations::{validate_against_tape, AnnotationTape};
match AnnotationTape::load(annotations_path) {
Ok(annotations) => {
let report = validate_against_tape(&annotations, actual);
if !report.is_ok() {
sidecar_errors.push(format!(
"annotations: {} problem(s) in {}",
report.problems.len(),
annotations_path.display()
));
}
}
Err(e) => sidecar_errors.push(format!(
"load annotations {}: {e}",
annotations_path.display()
)),
}
}
if let (Some(sidecar), Some(harness)) = (&harness_sidecar, &harness_for_validation) {
sidecar_errors.extend(sidecar.validate(harness));
}
let sidecar_error: Option<String> = if sidecar_errors.is_empty() {
None
} else {
Some(sidecar_errors.join("; "))
};
let execution = match result {
Ok(inner_result) => ConformanceExecution::Completed(inner_result),
Err(_) => ConformanceExecution::TimedOut,
};
Ok(ConformanceRun {
execution,
duration_ms,
sidecar_error,
})
}
async fn verify_unoptimized_conformance_subprocess(
harn_file: &Path,
timeout_ms: u64,
cli_skill_dirs: &[PathBuf],
) -> Result<u64, String> {
let exe = std::env::current_exe()
.map_err(|error| format!("failed to resolve current harn executable: {error}"))?;
let start = std::time::Instant::now();
let mut command = tokio::process::Command::new(exe);
command
.arg("test")
.arg("conformance")
.arg(harn_file)
.arg("--timeout")
.arg(timeout_ms.to_string())
.env(harn_vm::HARN_DISABLE_OPTIMIZATIONS_ENV, "1")
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.kill_on_drop(true);
for dir in cli_skill_dirs {
command.arg("--skill-dir").arg(dir);
}
let wait_timeout = std::time::Duration::from_millis(timeout_ms.saturating_add(2_000));
let output = match tokio::time::timeout(wait_timeout, command.output()).await {
Ok(Ok(output)) => output,
Ok(Err(error)) => {
return Err(format!("unoptimized subprocess launch failed: {error}"));
}
Err(_) => {
return Err(format!(
"unoptimized subprocess timed out after {}ms",
wait_timeout.as_millis()
));
}
};
let duration_ms = start.elapsed().as_millis() as u64;
if output.status.success() {
return Ok(duration_ms);
}
let stdout = String::from_utf8_lossy(&output.stdout);
let stderr = String::from_utf8_lossy(&output.stderr);
let mut message = format!(
"unoptimized subprocess exited with status {}",
output.status
);
if !stdout.trim().is_empty() {
message.push_str("\nstdout:\n");
message.push_str(stdout.trim_end());
}
if !stderr.trim().is_empty() {
message.push_str("\nstderr:\n");
message.push_str(stderr.trim_end());
}
Err(message)
}
fn canonicalize_or_err(path: &Path) -> Result<PathBuf, String> {
path.canonicalize()
.map_err(|error| format!("Failed to canonicalize {}: {error}", path.display()))
}
fn read_xfail_marker(path: &Path) -> Option<String> {
let source = fs::read_to_string(path).ok()?;
parse_xfail_marker(&source)
}
fn parse_xfail_marker(source: &str) -> Option<String> {
for line in source.lines().take(50) {
let mut s = line.trim_start();
if let Some(rest) = s.strip_prefix("//") {
s = rest;
} else if let Some(rest) = s.strip_prefix("/**") {
s = rest.strip_suffix("*/").unwrap_or(rest);
} else if let Some(rest) = s.strip_prefix("/*") {
s = rest.strip_suffix("*/").unwrap_or(rest);
} else if let Some(rest) = s.strip_prefix('*') {
s = rest.strip_suffix("*/").unwrap_or(rest);
} else {
continue;
}
let s = s.trim();
if let Some(reason) = s.strip_prefix("@xfail:") {
let r = reason.trim().trim_end_matches("*/").trim();
if !r.is_empty() {
return Some(r.to_string());
}
}
}
None
}
fn resolve_conformance_selection(
suite_root: &Path,
selection: Option<&str>,
) -> Result<Vec<PathBuf>, String> {
let suite_root = canonicalize_or_err(suite_root)?;
let Some(selection) = selection else {
return Ok(collect_harn_files_sorted(&suite_root));
};
let raw = PathBuf::from(selection);
let mut candidates = vec![raw.clone()];
if !raw.is_absolute() && !raw.starts_with(&suite_root) {
candidates.push(suite_root.join(&raw));
}
let Some(candidate) = candidates.into_iter().find(|path| path.exists()) else {
return Err(format!(
"Conformance target not found: {selection}. Expected a file or directory under {}",
suite_root.display()
));
};
let canonical = canonicalize_or_err(&candidate)?;
if !canonical.starts_with(&suite_root) {
return Err(format!(
"Conformance target must be inside {}: {}",
suite_root.display(),
candidate.display()
));
}
if canonical.is_file() {
if canonical.extension().is_some_and(|ext| ext == "harn") {
return Ok(vec![canonical]);
}
return Err(format!(
"Conformance target must be a .harn file or directory: {}",
candidate.display()
));
}
let files = collect_harn_files_sorted(&canonical);
if files.is_empty() {
return Err(format!(
"No .harn conformance tests found under {}",
candidate.display()
));
}
Ok(files)
}
fn conformance_filter_matches(rel_path: &str, filter: Option<&str>) -> bool {
let Some(pattern) = filter else {
return true;
};
if let Some(re_pat) = pattern.strip_prefix("re:") {
Regex::new(re_pat).is_ok_and(|re| re.is_match(rel_path))
} else if pattern.contains('|') {
pattern.split('|').any(|p| rel_path.contains(p.trim()))
} else if pattern.contains('*') || pattern.contains('?') {
let escaped = regex::escape(pattern)
.replace(r"\*", ".*")
.replace(r"\?", ".");
Regex::new(&escaped).is_ok_and(|re| re.is_match(rel_path))
} else {
rel_path.contains(pattern)
}
}
#[derive(Debug, Clone)]
struct ConformanceCaseEvaluation {
passed: bool,
message: Option<String>,
diagnostic_codes: Vec<String>,
duration_ms: u64,
}
impl ConformanceCaseEvaluation {
fn pass(duration_ms: u64) -> Self {
Self {
passed: true,
message: None,
diagnostic_codes: Vec::new(),
duration_ms,
}
}
fn fail(message: impl Into<String>, duration_ms: u64) -> Self {
let message = message.into();
Self {
passed: false,
diagnostic_codes: extract_diagnostic_codes(&message),
message: Some(message),
duration_ms,
}
}
}
fn extract_diagnostic_codes(message: &str) -> Vec<String> {
let re = Regex::new(r"\bHARN-[A-Z0-9]+(?:-[A-Z0-9]+)*-[0-9]{3}\b")
.expect("diagnostic code regex compiles");
let mut codes = BTreeSet::new();
for capture in re.find_iter(message) {
codes.insert(capture.as_str().to_string());
}
codes.into_iter().collect()
}
fn target_triple_label() -> &'static str {
if cfg!(all(target_os = "macos", target_arch = "x86_64")) {
"x86_64-apple-darwin"
} else if cfg!(all(target_os = "macos", target_arch = "aarch64")) {
"aarch64-apple-darwin"
} else if cfg!(all(target_os = "linux", target_arch = "x86_64")) {
"x86_64-unknown-linux-gnu"
} else if cfg!(all(target_os = "linux", target_arch = "aarch64")) {
"aarch64-unknown-linux-gnu"
} else if cfg!(all(target_os = "windows", target_arch = "x86_64")) {
"x86_64-pc-windows-msvc"
} else {
"unknown-target"
}
}
fn hash_file_if_present(hasher: &mut blake3::Hasher, suite_root: &Path, path: &Path) {
if !path.is_file() {
return;
}
hasher.update(b"file\0");
let rel = path.strip_prefix(suite_root).unwrap_or(path);
hasher.update(logical_path(rel).as_bytes());
hasher.update(b"\0");
match fs::read(path) {
Ok(bytes) => hasher.update(&bytes),
Err(error) => hasher.update(format!("read-error:{error}").as_bytes()),
};
hasher.update(b"\0");
}
fn hash_dir_if_present(hasher: &mut blake3::Hasher, suite_root: &Path, path: &Path) {
if !path.is_dir() {
return;
}
let mut files = Vec::new();
collect_files_recursive(path, &mut files);
files.sort();
for file in files {
hash_file_if_present(hasher, suite_root, &file);
}
}
fn collect_files_recursive(dir: &Path, out: &mut Vec<PathBuf>) {
if let Ok(entries) = fs::read_dir(dir) {
let mut entries = entries.filter_map(Result::ok).collect::<Vec<_>>();
entries.sort_by_key(|entry| entry.path());
for entry in entries {
let path = entry.path();
if path.is_dir() {
collect_files_recursive(&path, out);
} else if path.is_file() {
out.push(path);
}
}
}
}
fn conformance_snapshot_key(suite_root: &Path, selected_files: &[(PathBuf, String)]) -> String {
let mut hasher = blake3::Hasher::new();
hasher.update(env!("CARGO_PKG_VERSION").as_bytes());
hasher.update(b"\0");
hasher.update(target_triple_label().as_bytes());
hasher.update(b"\0");
match harn_vm::orchestration::current_provider_catalog_hash_blake3() {
Ok(hash) => hasher.update(hash.as_bytes()),
Err(error) => hasher.update(format!("provider-catalog-error:{error}").as_bytes()),
};
hasher.update(b"\0");
for (harn_file, rel_path) in selected_files {
hasher.update(b"test\0");
hasher.update(rel_path.as_bytes());
hasher.update(b"\0");
hash_file_if_present(&mut hasher, suite_root, harn_file);
hash_file_if_present(
&mut hasher,
suite_root,
&harn_file.with_extension("expected"),
);
hash_file_if_present(&mut hasher, suite_root, &harn_file.with_extension("error"));
hash_file_if_present(
&mut hasher,
suite_root,
&harn_file.with_extension("llm-mock.jsonl"),
);
hash_file_if_present(
&mut hasher,
suite_root,
&harn_file.with_extension("process-tape.json"),
);
hash_file_if_present(
&mut hasher,
suite_root,
&harn_file.with_extension("testbench-tape"),
);
hash_file_if_present(
&mut hasher,
suite_root,
&harn_file.with_extension("annotations.jsonl"),
);
hash_file_if_present(
&mut hasher,
suite_root,
&harn_file.with_extension("harness.json"),
);
hash_dir_if_present(
&mut hasher,
suite_root,
&harn_file.with_extension("fs-overlay"),
);
}
hasher.finalize().to_hex().to_string()
}
pub(crate) struct ConformanceRunOptions<'a> {
pub(crate) verbose: bool,
pub(crate) timing: bool,
pub(crate) differential_optimizations: bool,
pub(crate) json: bool,
pub(crate) cli_skill_dirs: &'a [PathBuf],
}
async fn evaluate_conformance_case(
harn_file: &Path,
expected_file: &Path,
error_file: &Path,
rel_path: &str,
timeout_ms: u64,
options: &ConformanceRunOptions<'_>,
) -> ConformanceCaseEvaluation {
if expected_file.exists() {
let source = match fs::read_to_string(harn_file) {
Ok(s) => s,
Err(e) => {
return ConformanceCaseEvaluation::fail(
format!("{rel_path}: IO error reading source: {e}"),
0,
);
}
};
let expected = match fs::read_to_string(expected_file) {
Ok(s) => normalize_expected_output(s.trim_end()),
Err(e) => {
return ConformanceCaseEvaluation::fail(
format!("{rel_path}: IO error reading expected: {e}"),
0,
);
}
};
let llm_mock_mode = conformance_llm_mock_mode(harn_file);
let testbench_config = conformance_testbench_config(harn_file);
let run = match execute_conformance_source(
&source,
harn_file,
timeout_ms,
&llm_mock_mode,
&testbench_config,
options.cli_skill_dirs,
)
.await
{
Ok(run) => run,
Err(error) => {
return ConformanceCaseEvaluation::fail(format!("{rel_path}: {error}"), 0);
}
};
let duration_ms = run.duration_ms;
if let Some(sidecar_error) = run.sidecar_error {
return ConformanceCaseEvaluation::fail(
format!("{rel_path}: {sidecar_error}"),
duration_ms,
);
}
return match run.execution {
ConformanceExecution::Completed(Ok(output)) => {
let actual = normalize_actual_output(output.trim_end());
if actual == expected {
if options.differential_optimizations {
if let Err(error) = verify_unoptimized_conformance_subprocess(
harn_file,
timeout_ms,
options.cli_skill_dirs,
)
.await
{
return ConformanceCaseEvaluation::fail(
format!("{rel_path}: {error}"),
duration_ms,
);
}
}
ConformanceCaseEvaluation::pass(duration_ms)
} else {
let diff = simple_diff(&expected, &actual);
let msg = if options.verbose {
format!(
"{rel_path}:\n expected:\n {}\n actual:\n {}\n diff:\n{diff}",
expected.lines().collect::<Vec<_>>().join("\n "),
actual.lines().collect::<Vec<_>>().join("\n "),
)
} else {
format!("{rel_path}:\n{diff}")
};
ConformanceCaseEvaluation::fail(msg, duration_ms)
}
}
ConformanceExecution::Completed(Err(e)) => ConformanceCaseEvaluation::fail(
format!("{rel_path}: runtime error: {e}"),
duration_ms,
),
ConformanceExecution::TimedOut => ConformanceCaseEvaluation::fail(
format!("{rel_path}: timed out after {timeout_ms}ms"),
timeout_ms,
),
};
}
if error_file.exists() {
let source = match fs::read_to_string(harn_file) {
Ok(s) => s,
Err(e) => {
return ConformanceCaseEvaluation::fail(
format!("{rel_path}: IO error reading source: {e}"),
0,
);
}
};
let expected_error = match fs::read_to_string(error_file) {
Ok(s) => s.trim_end().to_string(),
Err(e) => {
return ConformanceCaseEvaluation::fail(
format!("{rel_path}: IO error reading expected error: {e}"),
0,
);
}
};
let llm_mock_mode = conformance_llm_mock_mode(harn_file);
let testbench_config = conformance_testbench_config(harn_file);
let run = match execute_conformance_source(
&source,
harn_file,
timeout_ms,
&llm_mock_mode,
&testbench_config,
options.cli_skill_dirs,
)
.await
{
Ok(run) => run,
Err(error) => {
return ConformanceCaseEvaluation::fail(format!("{rel_path}: {error}"), 0);
}
};
let duration_ms = run.duration_ms;
if let Some(sidecar_error) = run.sidecar_error {
return ConformanceCaseEvaluation::fail(
format!("{rel_path}: {sidecar_error}"),
duration_ms,
);
}
return match run.execution {
ConformanceExecution::Completed(Err(ref err)) if error_matches(err, &expected_error) => {
if options.differential_optimizations {
if let Err(error) = verify_unoptimized_conformance_subprocess(
harn_file,
timeout_ms,
options.cli_skill_dirs,
)
.await
{
return ConformanceCaseEvaluation::fail(
format!("{rel_path}: {error}"),
duration_ms,
);
}
}
ConformanceCaseEvaluation::pass(duration_ms)
}
ConformanceExecution::Completed(Err(err)) => ConformanceCaseEvaluation::fail(
format!(
"{rel_path}:\n expected error containing: {expected_error}\n actual error: {err}"
),
duration_ms,
),
ConformanceExecution::Completed(Ok(_)) => ConformanceCaseEvaluation::fail(
format!("{rel_path}: expected error containing '{expected_error}', but succeeded"),
duration_ms,
),
ConformanceExecution::TimedOut => ConformanceCaseEvaluation::fail(
format!("{rel_path}: timed out after {timeout_ms}ms"),
timeout_ms,
),
};
}
ConformanceCaseEvaluation::fail(format!("{rel_path}: missing .expected or .error file"), 0)
}
pub(crate) async fn run_conformance_tests(
dir: &str,
selection: Option<&str>,
filter: Option<&str>,
junit_path: Option<&str>,
timeout_ms: u64,
options: ConformanceRunOptions<'_>,
) {
let show_timing = options.verbose || options.timing;
let _disable_llm_calls = ScopedEnvVar::set(harn_vm::llm::LLM_CALLS_DISABLED_ENV, "1");
let _force_optimized_parent = if options.differential_optimizations {
Some(ScopedEnvVar::unset(harn_vm::HARN_DISABLE_OPTIMIZATIONS_ENV))
} else {
None
};
let dir_path = PathBuf::from(dir);
if !dir_path.exists() {
if options.json {
let envelope: JsonEnvelope<ConformanceJsonReport> = JsonEnvelope::err(
CONFORMANCE_TEST_SCHEMA_VERSION,
"conformance_directory_not_found",
format!("Directory not found: {dir}"),
);
println!("{}", json_envelope::to_string_pretty(&envelope));
} else {
eprintln!("Directory not found: {dir}");
}
process::exit(1);
}
let suite_root = match canonicalize_or_err(&dir_path) {
Ok(path) => path,
Err(error) => {
if options.json {
let envelope: JsonEnvelope<ConformanceJsonReport> = JsonEnvelope::err(
CONFORMANCE_TEST_SCHEMA_VERSION,
"conformance_directory_error",
error,
);
println!("{}", json_envelope::to_string_pretty(&envelope));
} else {
eprintln!("{error}");
}
process::exit(1);
}
};
let suite_start = std::time::Instant::now();
let mut passed = 0;
let mut failed = 0;
let mut skipped = 0;
let mut skipped_summary: Vec<(String, String)> = Vec::new();
let mut errors: Vec<String> = Vec::new();
let mut json_results: Vec<ConformanceJsonResult> = Vec::new();
let mut json_summary = ConformanceJsonSummary::default();
let mut junit_results: Vec<(String, bool, String, u64)> = Vec::new();
let harn_files = match resolve_conformance_selection(&suite_root, selection) {
Ok(files) => files,
Err(error) => {
if options.json {
let envelope: JsonEnvelope<ConformanceJsonReport> = JsonEnvelope::err(
CONFORMANCE_TEST_SCHEMA_VERSION,
"conformance_selection_error",
error,
);
println!("{}", json_envelope::to_string_pretty(&envelope));
} else {
eprintln!("{error}");
}
process::exit(1);
}
};
let selected_harn_files: Vec<(PathBuf, String)> = harn_files
.into_iter()
.filter_map(|harn_file| {
let rel_path = harn_file.strip_prefix(&suite_root).unwrap_or(&harn_file);
let rel_path = logical_path(rel_path);
conformance_filter_matches(&rel_path, filter).then_some((harn_file, rel_path))
})
.collect();
for (harn_file, rel_path) in &selected_harn_files {
let expected_file = harn_file.with_extension("expected");
let error_file = harn_file.with_extension("error");
let xfail_reason = read_xfail_marker(harn_file);
if !options.json {
if let Some(reason) = xfail_reason.as_ref() {
println!(" \x1b[33mSKIP\x1b[0m {rel_path} ({reason})");
skipped_summary.push((rel_path.clone(), reason.clone()));
skipped += 1;
continue;
}
}
if !expected_file.exists() && !error_file.exists() {
continue;
}
let evaluation = evaluate_conformance_case(
harn_file,
&expected_file,
&error_file,
rel_path,
timeout_ms,
&options,
)
.await;
if options.json {
let outcome = match (&xfail_reason, evaluation.passed) {
(Some(_), true) => ConformanceJsonOutcome::XfailUnexpectedPass,
(Some(_), false) => ConformanceJsonOutcome::XfailExpected,
(None, true) => ConformanceJsonOutcome::Pass,
(None, false) => ConformanceJsonOutcome::Fail,
};
json_summary.record(outcome);
let message = match (
outcome,
xfail_reason.as_deref(),
evaluation.message.as_deref(),
) {
(ConformanceJsonOutcome::XfailUnexpectedPass, Some(reason), _) => {
Some(format!("xfail marker is stale: {reason}"))
}
(ConformanceJsonOutcome::XfailExpected, Some(reason), Some(message)) => {
Some(format!("expected failure ({reason}): {message}"))
}
(ConformanceJsonOutcome::XfailExpected, Some(reason), None) => {
Some(format!("expected failure ({reason})"))
}
(_, _, Some(message)) => Some(message.to_string()),
_ => None,
};
let junit_passed = matches!(
outcome,
ConformanceJsonOutcome::Pass | ConformanceJsonOutcome::XfailExpected
);
junit_results.push((
rel_path.clone(),
junit_passed,
if junit_passed {
String::new()
} else {
message.clone().unwrap_or_default()
},
evaluation.duration_ms,
));
json_results.push(ConformanceJsonResult {
name: rel_path.clone(),
outcome,
duration_ms: evaluation.duration_ms,
message,
diagnostic_codes: evaluation.diagnostic_codes,
});
continue;
}
if evaluation.passed {
if show_timing {
println!(
" \x1b[32mPASS\x1b[0m {rel_path} ({} ms)",
evaluation.duration_ms
);
} else {
println!(" \x1b[32mPASS\x1b[0m {rel_path}");
}
junit_results.push((
rel_path.clone(),
true,
String::new(),
evaluation.duration_ms,
));
passed += 1;
} else {
if show_timing {
println!(
" \x1b[31mFAIL\x1b[0m {rel_path} ({} ms)",
evaluation.duration_ms
);
} else {
println!(" \x1b[31mFAIL\x1b[0m {rel_path}");
}
let msg = evaluation
.message
.unwrap_or_else(|| format!("{rel_path}: failed without diagnostic message"));
errors.push(msg.clone());
junit_results.push((rel_path.clone(), false, msg, evaluation.duration_ms));
failed += 1;
}
}
let total_duration_ms = suite_start.elapsed().as_millis() as u64;
if options.json {
if let Some(path) = junit_path {
write_junit_xml(path, &junit_results, false);
}
let snapshot_key = conformance_snapshot_key(&suite_root, &selected_harn_files);
let ok = json_summary.is_success();
let error = (!ok).then(|| JsonError {
code: "conformance_failed".to_string(),
message: "one or more conformance tests failed or unexpectedly passed an xfail marker"
.to_string(),
details: serde_json::json!({
"fail": json_summary.fail,
"xfail_unexpected_pass": json_summary.xfail_unexpected_pass,
}),
});
let envelope = JsonEnvelope {
schema_version: CONFORMANCE_TEST_SCHEMA_VERSION,
ok,
data: Some(ConformanceJsonReport {
snapshot_key,
results: json_results,
summary: json_summary,
}),
error,
warnings: Vec::new(),
};
println!("{}", json_envelope::to_string_pretty(&envelope));
if !ok {
process::exit(1);
}
return;
}
println!();
let total = passed + failed + skipped;
if failed > 0 {
println!(
"\x1b[31m{passed} passed, {failed} failed, {skipped} skipped, {total} total\x1b[0m"
);
} else {
println!(
"\x1b[32m{passed} passed, {failed} failed, {skipped} skipped, {total} total\x1b[0m"
);
}
if !skipped_summary.is_empty() {
println!();
println!("Skipped (xfail):");
for (path, reason) in &skipped_summary {
println!(" {path} ({reason})");
}
}
if show_timing {
println!();
println!("Total time: {total_duration_ms} ms");
let mut durations: Vec<u64> = junit_results.iter().map(|r| r.3).collect();
durations.sort();
if !durations.is_empty() {
let n = durations.len();
let p50 = durations[n * 50 / 100];
let p95 = durations[n * 95 / 100];
let p99 = durations[(n * 99 / 100).min(n - 1)];
let avg = durations.iter().sum::<u64>() / n as u64;
println!("Per-test: avg={avg} ms p50={p50} ms p95={p95} ms p99={p99} ms");
}
let mut by_time: Vec<&(String, bool, String, u64)> = junit_results.iter().collect();
by_time.sort_by_key(|entry| std::cmp::Reverse(entry.3));
let top_n = by_time.len().min(10);
if top_n > 0 {
println!();
println!("Slowest {top_n} tests:");
for entry in &by_time[..top_n] {
println!(" {:>6} ms {}", entry.3, entry.0);
}
}
}
if let Some(path) = junit_path {
write_junit_xml(path, &junit_results, true);
}
if !errors.is_empty() {
println!();
println!("Failures:");
for err in &errors {
println!(" {err}");
}
process::exit(1);
}
}
fn print_test_results(summary: &test_runner::TestSummary) {
let file_count = summary
.results
.iter()
.map(|r| r.file.as_str())
.collect::<std::collections::HashSet<_>>()
.len();
if summary.total > 0 {
println!(
"Running {} test{} from {} file{}...\n",
summary.total,
if summary.total == 1 { "" } else { "s" },
file_count,
if file_count == 1 { "" } else { "s" },
);
}
for result in &summary.results {
if result.passed {
println!(
" \x1b[32mPASS\x1b[0m {} [{}] ({} ms)",
result.name, result.file, result.duration_ms
);
} else {
println!(" \x1b[31mFAIL\x1b[0m {} [{}]", result.name, result.file);
if let Some(err) = &result.error {
for line in err.lines() {
println!(" {line}");
}
}
}
}
println!();
if summary.failed > 0 {
println!(
"\x1b[31m{} passed, {} failed, {} total ({} ms)\x1b[0m",
summary.passed, summary.failed, summary.total, summary.duration_ms
);
} else if summary.total == 0 {
println!("No test pipelines found");
} else {
println!(
"\x1b[32m{} passed, {} total ({} ms)\x1b[0m",
summary.passed, summary.total, summary.duration_ms
);
}
}
pub(crate) async fn run_user_tests(
path_str: &str,
filter: Option<&str>,
timeout_ms: u64,
parallel: bool,
cli_skill_dirs: &[PathBuf],
) {
let path = PathBuf::from(path_str);
if !path.exists() {
eprintln!("Path not found: {path_str}");
process::exit(1);
}
let summary = test_runner::run_tests(&path, filter, timeout_ms, parallel, cli_skill_dirs).await;
print_test_results(&summary);
if summary.failed > 0 {
process::exit(1);
}
}
fn collect_user_test_files(path_str: &str) -> Result<Vec<PathBuf>, String> {
let path = PathBuf::from(path_str);
if !path.exists() {
return Err(format!("Path not found: {path_str}"));
}
if path.is_file() {
return Ok(vec![path]);
}
let files = collect_harn_files_sorted(&path);
if files.is_empty() {
return Err(format!("No .harn files found under {}", path.display()));
}
Ok(files)
}
fn sibling_llm_fixture(path: &Path) -> Option<PathBuf> {
let fixture = path.with_extension("llm-mock.jsonl");
fixture.is_file().then_some(fixture)
}
fn load_run_records(dir: &Path) -> Result<Vec<harn_vm::orchestration::RunRecord>, String> {
let mut paths: Vec<_> = fs::read_dir(dir)
.map_err(|error| format!("failed to read {}: {error}", dir.display()))?
.filter_map(|entry| entry.ok().map(|entry| entry.path()))
.filter(|path| path.extension().is_some_and(|ext| ext == "json"))
.collect();
paths.sort();
paths
.iter()
.map(|path| {
harn_vm::orchestration::load_run_record(path)
.map_err(|error| format!("failed to load {}: {error}", path.display()))
})
.collect()
}
fn load_transcript_responses(dir: &Path) -> Result<Vec<Value>, String> {
let path = dir.join("llm_transcript.jsonl");
if !path.exists() {
return Ok(Vec::new());
}
let content = fs::read_to_string(&path)
.map_err(|error| format!("failed to read {}: {error}", path.display()))?;
content
.lines()
.filter(|line| !line.trim().is_empty())
.filter_map(|line| serde_json::from_str::<Value>(line).ok())
.filter(|event| event.get("type").and_then(Value::as_str) == Some("provider_call_response"))
.map(|event| {
Ok(serde_json::json!({
"provider": event.get("provider").cloned().unwrap_or(Value::Null),
"model": event.get("model").cloned().unwrap_or(Value::Null),
"text": event.get("text").cloned().unwrap_or(Value::Null),
"tool_calls": event.get("tool_calls").cloned().unwrap_or(Value::Null),
"input_tokens": event.get("input_tokens").cloned().unwrap_or(Value::Null),
"output_tokens": event.get("output_tokens").cloned().unwrap_or(Value::Null),
"thinking": event.get("thinking").cloned().unwrap_or(Value::Null),
}))
})
.collect()
}
async fn execute_determinism_run(
source: &str,
path: &Path,
timeout_ms: u64,
llm_mock_mode: &CliLlmMockMode,
run_dir: &tempfile::TempDir,
transcript_dir: &tempfile::TempDir,
cli_skill_dirs: &[PathBuf],
) -> Result<String, String> {
harn_vm::reset_thread_local_state();
install_cli_llm_mock_mode(llm_mock_mode)?;
let run_dir_guard = ScopedEnvVar::set(
harn_vm::runtime_paths::HARN_RUN_DIR_ENV,
&run_dir.path().to_string_lossy(),
);
let transcript_dir_guard = ScopedEnvVar::set(
"HARN_LLM_TRANSCRIPT_DIR",
&transcript_dir.path().to_string_lossy(),
);
let result = tokio::time::timeout(
std::time::Duration::from_millis(timeout_ms),
execute_with_skill_dirs(source, Some(path), cli_skill_dirs),
)
.await;
let persist_result = persist_cli_llm_mock_recording(llm_mock_mode);
harn_vm::llm::clear_cli_llm_mock_mode();
drop(transcript_dir_guard);
drop(run_dir_guard);
persist_result?;
match result {
Ok(Ok(output)) => Ok(output),
Ok(Err(error)) => Err(error),
Err(_) => Err(format!("timed out after {timeout_ms}ms")),
}
}
fn compare_determinism_artifacts(
path: &Path,
left_runs: &[harn_vm::orchestration::RunRecord],
right_runs: &[harn_vm::orchestration::RunRecord],
left_responses: &[Value],
right_responses: &[Value],
) -> Result<(), String> {
if left_runs.len() != right_runs.len() {
return Err(format!(
"{} produced {} run record(s) on the first pass and {} on replay",
path.display(),
left_runs.len(),
right_runs.len()
));
}
for (idx, (left, right)) in left_runs.iter().zip(right_runs.iter()).enumerate() {
let diff = harn_vm::orchestration::diff_run_records(left, right);
if !diff.identical
|| left.tool_recordings != right.tool_recordings
|| left.hitl_questions != right.hitl_questions
{
return Err(format!(
"{} replay diverged for run #{idx}: identical={} tool_recordings_equal={} hitl_questions_equal={}",
path.display(),
diff.identical,
left.tool_recordings == right.tool_recordings,
left.hitl_questions == right.hitl_questions
));
}
}
if left_responses != right_responses {
return Err(format!(
"{} replay changed provider_call_response output",
path.display()
));
}
Ok(())
}
async fn run_determinism_case(
path: &Path,
timeout_ms: u64,
cli_skill_dirs: &[PathBuf],
) -> Result<(), String> {
let source = fs::read_to_string(path)
.map_err(|error| format!("failed to read {}: {error}", path.display()))?;
let recording_dir = tempfile::Builder::new()
.prefix("harn-determinism-record-")
.tempdir()
.map_err(|error| format!("failed to create determinism tempdir: {error}"))?;
let replay_dir = tempfile::Builder::new()
.prefix("harn-determinism-replay-")
.tempdir()
.map_err(|error| format!("failed to create determinism tempdir: {error}"))?;
let record_transcript = tempfile::Builder::new()
.prefix("harn-determinism-record-llm-")
.tempdir()
.map_err(|error| format!("failed to create transcript tempdir: {error}"))?;
let replay_transcript = tempfile::Builder::new()
.prefix("harn-determinism-replay-llm-")
.tempdir()
.map_err(|error| format!("failed to create transcript tempdir: {error}"))?;
let fixture_mode = sibling_llm_fixture(path);
let fixture_path = fixture_mode
.clone()
.unwrap_or_else(|| recording_dir.path().join("fixture.jsonl"));
let first_mode = fixture_mode
.clone()
.map(|fixture_path| CliLlmMockMode::Replay { fixture_path })
.unwrap_or_else(|| CliLlmMockMode::Record {
fixture_path: fixture_path.clone(),
});
let second_mode = CliLlmMockMode::Replay {
fixture_path: fixture_path.clone(),
};
let first_output = execute_determinism_run(
&source,
path,
timeout_ms,
&first_mode,
&recording_dir,
&record_transcript,
cli_skill_dirs,
)
.await?;
let second_output = execute_determinism_run(
&source,
path,
timeout_ms,
&second_mode,
&replay_dir,
&replay_transcript,
cli_skill_dirs,
)
.await?;
if first_output != second_output {
return Err(format!(
"{} replay changed stdout\nfirst:\n{}\nsecond:\n{}",
path.display(),
first_output,
second_output
));
}
let first_runs = load_run_records(recording_dir.path())?;
let second_runs = load_run_records(replay_dir.path())?;
let first_responses = load_transcript_responses(record_transcript.path())?;
let second_responses = load_transcript_responses(replay_transcript.path())?;
compare_determinism_artifacts(
path,
&first_runs,
&second_runs,
&first_responses,
&second_responses,
)
}
pub(crate) async fn run_determinism_tests(
path_str: &str,
filter: Option<&str>,
timeout_ms: u64,
cli_skill_dirs: &[PathBuf],
) {
let files = collect_user_test_files(path_str).unwrap_or_else(|error| {
eprintln!("{error}");
process::exit(1);
});
let mut passed = 0usize;
let mut failed = 0usize;
let mut errors = Vec::new();
for path in files {
let rel_path = path.display().to_string();
if let Some(pattern) = filter {
let matched = if let Some(re_pat) = pattern.strip_prefix("re:") {
Regex::new(re_pat).is_ok_and(|re| re.is_match(&rel_path))
} else {
rel_path.contains(pattern)
};
if !matched {
continue;
}
}
match run_determinism_case(&path, timeout_ms, cli_skill_dirs).await {
Ok(()) => {
println!(" \x1b[32mPASS\x1b[0m {rel_path}");
passed += 1;
}
Err(error) => {
println!(" \x1b[31mFAIL\x1b[0m {rel_path}");
failed += 1;
errors.push(error);
}
}
}
println!();
if failed > 0 {
println!(
"\x1b[31m{passed} passed, {failed} failed, {} total\x1b[0m",
passed + failed
);
println!();
println!("Failures:");
for error in errors {
println!(" {error}");
}
process::exit(1);
}
println!(
"\x1b[32m{passed} passed, {failed} failed, {} total\x1b[0m",
passed + failed
);
}
pub(crate) async fn run_conformance_determinism_tests(
dir: &str,
selection: Option<&str>,
filter: Option<&str>,
timeout_ms: u64,
cli_skill_dirs: &[PathBuf],
) {
let dir_path = PathBuf::from(dir);
let suite_root = canonicalize_or_err(&dir_path).unwrap_or_else(|error| {
eprintln!("{error}");
process::exit(1);
});
let files = resolve_conformance_selection(&suite_root, selection).unwrap_or_else(|error| {
eprintln!("{error}");
process::exit(1);
});
let mut passed = 0usize;
let mut failed = 0usize;
let mut errors = Vec::new();
for path in files {
let rel_path = path.strip_prefix(&suite_root).unwrap_or(&path);
let rel_path = logical_path(rel_path);
if let Some(pattern) = filter {
let matched = if let Some(re_pat) = pattern.strip_prefix("re:") {
Regex::new(re_pat).is_ok_and(|re| re.is_match(&rel_path))
} else {
rel_path.contains(pattern)
};
if !matched {
continue;
}
}
match run_determinism_case(&path, timeout_ms, cli_skill_dirs).await {
Ok(()) => {
println!(" \x1b[32mPASS\x1b[0m {rel_path}");
passed += 1;
}
Err(error) => {
println!(" \x1b[31mFAIL\x1b[0m {rel_path}");
failed += 1;
errors.push(error);
}
}
}
println!();
if failed > 0 {
println!(
"\x1b[31m{passed} passed, {failed} failed, {} total\x1b[0m",
passed + failed
);
println!();
println!("Failures:");
for error in errors {
println!(" {error}");
}
process::exit(1);
}
println!(
"\x1b[32m{passed} passed, {failed} failed, {} total\x1b[0m",
passed + failed
);
}
pub(crate) async fn run_watch_tests(
path_str: &str,
filter: Option<&str>,
timeout_ms: u64,
parallel: bool,
cli_skill_dirs: &[PathBuf],
) {
use notify::{Config, RecommendedWatcher, RecursiveMode, Watcher};
use std::sync::mpsc;
use std::time::Duration;
let path = PathBuf::from(path_str);
if !path.exists() {
eprintln!("Path not found: {path_str}");
process::exit(1);
}
println!("Watching {path_str} for changes... (Ctrl+C to stop)\n");
let summary = test_runner::run_tests(&path, filter, timeout_ms, parallel, cli_skill_dirs).await;
print_test_results(&summary);
let (tx, rx) = mpsc::channel();
let mut watcher = RecommendedWatcher::new(tx, Config::default()).unwrap_or_else(|e| {
eprintln!("Failed to create file watcher: {e}");
process::exit(1);
});
watcher
.watch(&path, RecursiveMode::Recursive)
.unwrap_or_else(|e| {
eprintln!("Failed to watch {path_str}: {e}");
process::exit(1);
});
loop {
match rx.recv() {
Ok(Ok(event)) => {
let is_harn = event
.paths
.iter()
.any(|p| p.extension().is_some_and(|e| e == "harn"));
if !is_harn {
continue;
}
while rx.recv_timeout(Duration::from_millis(100)).is_ok() {}
println!("\n\x1b[2m--- file changed, re-running tests ---\x1b[0m\n");
let summary =
test_runner::run_tests(&path, filter, timeout_ms, parallel, cli_skill_dirs)
.await;
print_test_results(&summary);
}
Ok(Err(e)) => {
eprintln!("Watch error: {e}");
}
Err(_) => break,
}
}
}
#[cfg(test)]
mod tests {
use super::{
collect_harn_files_sorted, evaluate_conformance_case, logical_path, parse_xfail_marker,
resolve_conformance_selection, ConformanceRunOptions,
};
use std::fs;
use std::path::Path;
struct TempTestDir {
dir: tempfile::TempDir,
}
impl TempTestDir {
fn new() -> Self {
let dir = tempfile::Builder::new()
.prefix("harn-cli-test-")
.tempdir()
.unwrap();
Self { dir }
}
fn write(&self, relative: &str) {
self.write_content(relative, "// test");
}
fn write_content(&self, relative: &str, content: &str) {
let path = self.dir.path().join(relative);
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).unwrap();
}
fs::write(path, content).unwrap();
}
fn path(&self) -> &Path {
self.dir.path()
}
}
#[test]
fn collect_harn_files_sorted_descends_and_sorts() {
let temp = TempTestDir::new();
temp.write("suite/zeta.harn");
temp.write("suite/alpha.harn");
temp.write("suite/nested/beta.harn");
fs::write(temp.path().join("suite/ignore.txt"), "").unwrap();
let files = collect_harn_files_sorted(&temp.path().join("suite"));
let relative: Vec<String> = files
.iter()
.map(|path| logical_path(path.strip_prefix(temp.path()).unwrap()))
.collect();
assert_eq!(
relative,
vec![
"suite/alpha.harn",
"suite/nested/beta.harn",
"suite/zeta.harn"
]
);
}
#[test]
fn logical_path_uses_slashes_for_native_test_paths() {
let path = Path::new("suite").join("nested").join("beta.harn");
assert_eq!(logical_path(&path), "suite/nested/beta.harn");
}
#[test]
fn resolve_conformance_selection_accepts_suite_relative_file() {
let temp = TempTestDir::new();
temp.write("conformance/tests/sample.harn");
let files = resolve_conformance_selection(
&temp.path().join("conformance"),
Some("tests/sample.harn"),
)
.unwrap();
assert_eq!(files.len(), 1);
assert!(files[0].ends_with("conformance/tests/sample.harn"));
}
#[test]
fn resolve_conformance_selection_rejects_paths_outside_suite_root() {
let temp = TempTestDir::new();
temp.write("conformance/tests/sample.harn");
temp.write("outside.harn");
let error = resolve_conformance_selection(
&temp.path().join("conformance"),
Some("../outside.harn"),
)
.unwrap_err();
assert!(error.contains("must be inside"));
}
#[test]
fn parse_xfail_marker_recognizes_top_of_file_marker() {
let src = "// @xfail: tracked in #1240\npipeline main(task) {}\n";
assert_eq!(parse_xfail_marker(src).as_deref(), Some("tracked in #1240"));
}
#[test]
fn parse_xfail_marker_recognizes_indented_marker() {
let src = " // @xfail: skill matching #1240\n";
assert_eq!(
parse_xfail_marker(src).as_deref(),
Some("skill matching #1240")
);
}
#[test]
fn parse_xfail_marker_returns_none_when_absent() {
let src = "// regular comment\npipeline main(task) {}\n";
assert!(parse_xfail_marker(src).is_none());
}
#[test]
fn parse_xfail_marker_ignores_marker_past_first_50_lines() {
let mut src = String::new();
for _ in 0..60 {
src.push_str("// filler\n");
}
src.push_str("// @xfail: too late\n");
assert!(parse_xfail_marker(&src).is_none());
}
#[test]
fn parse_xfail_marker_ignores_empty_reason() {
let src = "// @xfail: \n";
assert!(parse_xfail_marker(src).is_none());
}
#[test]
fn parse_xfail_marker_recognizes_one_line_doc_comment() {
let src = "/** @xfail: tracked in #1240 */\npipeline test() {}\n";
assert_eq!(parse_xfail_marker(src).as_deref(), Some("tracked in #1240"));
}
#[test]
fn parse_xfail_marker_recognizes_multi_line_doc_comment() {
let src = "/**\n * @xfail: tracked in #1238\n */\nfn foo() {}\n";
assert_eq!(parse_xfail_marker(src).as_deref(), Some("tracked in #1238"));
}
#[test]
fn parse_xfail_marker_recognizes_block_comment() {
let src = "/* @xfail: tracked in #1239 */\nfn foo() {}\n";
assert_eq!(parse_xfail_marker(src).as_deref(), Some("tracked in #1239"));
}
#[tokio::test]
async fn conformance_harness_sidecar_error_fails_expected_error_fixture() {
let temp = TempTestDir::new();
temp.write_content(
"conformance/tests/harness_sidecar_error.harn",
r#"fn main(harness: Harness) {
harness.env.get("TOKEN")
}
"#,
);
temp.write_content(
"conformance/tests/harness_sidecar_error.error",
"NullHarness denied",
);
temp.write_content(
"conformance/tests/harness_sidecar_error.harness.json",
r#"{
"mode": "null",
"expect_deny_events": [
{
"sub_handle": "env",
"method": "wrong",
"args": ["TOKEN"]
}
]
}
"#,
);
let harn_file = temp
.path()
.join("conformance/tests/harness_sidecar_error.harn");
let expected_file = harn_file.with_extension("expected");
let error_file = harn_file.with_extension("error");
let options = ConformanceRunOptions {
verbose: false,
timing: false,
differential_optimizations: false,
json: false,
cli_skill_dirs: &[],
};
let evaluation = evaluate_conformance_case(
&harn_file,
&expected_file,
&error_file,
"tests/harness_sidecar_error.harn",
2_000,
&options,
)
.await;
assert!(!evaluation.passed);
let message = evaluation.message.unwrap_or_default();
assert!(
message.contains("harness deny events differed"),
"unexpected message: {message}"
);
}
}