#![recursion_limit = "256"]
use std::sync::atomic::AtomicBool;
use std::sync::{Arc, Mutex};
use harn_vm::bridge::HostBridge;
use harn_vm::value::VmError;
fn run_with_bridge(source: &str) -> Result<String, String> {
harn_vm::reset_thread_local_state();
let chunk = harn_vm::compile_source(source)?;
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.map_err(|e| e.to_string())?;
rt.block_on(async {
let local = tokio::task::LocalSet::new();
local
.run_until(async {
let bridge = Arc::new(HostBridge::from_parts(
Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new())),
Arc::new(AtomicBool::new(false)),
Arc::new(Mutex::new(())),
1,
));
harn_vm::llm::install_current_host_bridge(bridge.clone());
let mut vm = harn_vm::Vm::new();
harn_vm::register_vm_stdlib(&mut vm);
let result = vm
.execute(&chunk)
.await
.map_err(|e: VmError| format!("{e:?}"));
harn_vm::llm::clear_current_host_bridge();
result?;
Ok(vm.output().to_string())
})
.await
})
}
fn out_lines(raw: &str) -> Vec<String> {
raw.lines()
.filter_map(|l| l.strip_prefix("[harn] "))
.map(|s| s.to_string())
.collect()
}
fn exhaustion_pipeline(session_id: &str, final_wrapup_opt: &str) -> String {
format!(
r#"
pipeline main(task) {{
clear_tool_hooks()
let registry = tool_registry()
let tools = tool_define(
registry,
"keep_exploring",
"Test stand-in for a tool the model keeps calling.",
{{parameters: {{}}, handler: {{ _args -> return "explored" }}}},
)
let tool_calls_counter = shared_cell(
{{scope: "task_group", key: "wrapup-tool-calls-{session_id}", initial: 0}},
)
let wrapup_counter = shared_cell(
{{scope: "task_group", key: "wrapup-final-calls-{session_id}", initial: 0}},
)
let mock_llm = {{ _call ->
if _call?.opts?._final_wrapup == true {{
let wsnap = shared_snapshot(wrapup_counter)
shared_cas(wrapup_counter, wsnap, wsnap.value + 1)
return {{
ok: true,
value: {{
text: "<user_response>Refactored the parser and ran the tests.</user_response>\n<done>FINISHED</done>",
tool_calls: [],
provider: "mock",
model: "mock",
}},
}}
}}
let snap = shared_snapshot(tool_calls_counter)
shared_cas(tool_calls_counter, snap, snap.value + 1)
return {{
ok: true,
value: {{
text: "",
tool_calls: [{{id: "call_explore", name: "keep_exploring", arguments: {{}}}}],
provider: "mock",
model: "mock",
}},
}}
}}
let result = agent_loop(
"do the work",
nil,
{{
provider: "mock",
tools: tools,
tool_format: "native",
max_iterations: 3,
loop_until_done: true,
done_sentinel: "FINISHED",
session_id: "{session_id}",
llm_caller: mock_llm,
{final_wrapup_opt}
}},
)
log(result.status)
log(shared_get(tool_calls_counter))
log(shared_get(wrapup_counter))
log(contains(result.text, "Refactored the parser"))
}}
"#
)
}
fn clean_done_pipeline(session_id: &str) -> String {
format!(
r#"
pipeline main(task) {{
clear_tool_hooks()
let wrapup_counter = shared_cell(
{{scope: "task_group", key: "clean-done-wrapup-{session_id}", initial: 0}},
)
let mock_llm = {{ _call ->
if _call?.opts?._final_wrapup == true {{
let wsnap = shared_snapshot(wrapup_counter)
shared_cas(wrapup_counter, wsnap, wsnap.value + 1)
}}
return {{
ok: true,
value: {{
text: "<user_response>All done.</user_response>\n<done>FINISHED</done>",
tool_calls: [],
provider: "mock",
model: "mock",
}},
}}
}}
let result = agent_loop(
"do the work",
nil,
{{
provider: "mock",
max_iterations: 3,
loop_until_done: true,
done_sentinel: "FINISHED",
session_id: "{session_id}",
llm_caller: mock_llm,
}},
)
log(result.status)
log(shared_get(wrapup_counter))
log(contains(result.text, "All done."))
}}
"#
)
}
#[test]
fn exhaustion_mid_tool_use_fires_wrapup_and_surfaces_sentinel() {
let raw =
run_with_bridge(&exhaustion_pipeline("wrapup-exhaustion", "")).expect("script must run");
let lines = out_lines(&raw);
assert_eq!(
lines[0], "budget_exhausted",
"expected budget_exhausted; lines: {lines:?}"
);
assert_eq!(
lines[1], "3",
"expected three loop LLM calls; lines: {lines:?}"
);
assert_eq!(
lines[2], "1",
"expected exactly one wrap-up LLM call; lines: {lines:?}"
);
assert_eq!(
lines[3], "true",
"expected surfaced text to contain the wrap-up summary; lines: {lines:?}"
);
}
#[test]
fn clean_done_exit_does_not_fire_wrapup() {
let raw = run_with_bridge(&clean_done_pipeline("wrapup-clean-done")).expect("script must run");
let lines = out_lines(&raw);
assert_eq!(lines[0], "done", "expected done; lines: {lines:?}");
assert_eq!(
lines[1], "0",
"expected zero wrap-up calls on a clean done; lines: {lines:?}"
);
assert_eq!(
lines[2], "true",
"expected surfaced text to contain the natural final answer; lines: {lines:?}"
);
}
#[test]
fn final_wrapup_false_disables_the_wrapup_turn() {
let raw = run_with_bridge(&exhaustion_pipeline(
"wrapup-disabled",
" final_wrapup: false,",
))
.expect("script must run");
let lines = out_lines(&raw);
assert_eq!(
lines[0], "budget_exhausted",
"expected budget_exhausted; lines: {lines:?}"
);
assert_eq!(
lines[1], "3",
"expected three loop LLM calls; lines: {lines:?}"
);
assert_eq!(
lines[2], "0",
"expected zero wrap-up calls when final_wrapup:false; lines: {lines:?}"
);
assert_eq!(
lines[3], "false",
"expected no wrap-up summary in surfaced text when disabled; lines: {lines:?}"
);
}