import { __judge_apply_llm_overrides, __judge_classify_verdict } from "std/agent/judge_internals"
import { agent_typed_output_checkpoint } from "std/agent/primitives"
import { completion_judge_user_prompt } from "std/agent/prompts"
import {
agent_emit_event,
agent_session_inject_feedback,
agent_session_messages,
} from "std/agent/state"
// Conservative default cap on how many times the completion-judge LLM may
// veto a proposed completion within one session. Each veto is a paid model
// call plus an injected feedback message, so a weak model that never satisfies
// the judge can otherwise burn calls up to `max_verify_attempts` (default 20)
// with no structured signal. Callers raise it via
// `verify_completion_judge.max_invocations` (or `max_feedback`), or disable the
// cap entirely with `0`.
const __VERIFY_COMPLETION_JUDGE_DEFAULT_CAP = 5
fn __unique_names(names) {
var unique = []
for name in names {
if name != "" && !contains(unique, name) {
unique = unique.push(name)
}
}
return unique
}
fn __session_tool_names(messages) {
var names = []
for message in messages {
if message?.role == "tool" {
names = names.push(message?.name ?? "")
}
}
return __unique_names(names)
}
fn __judge_payload(session, opts, stop_reason, text, iteration) {
let messages = agent_session_messages(session.session_id)
let tool_names = __session_tool_names(messages)
return {
session_id: session.session_id,
task: session?.task ?? "",
stop_reason: stop_reason,
text: text,
visible_text: text,
last_text: text,
transcript: json_stringify(messages),
all_tools_used: join(tool_names, ", "),
successful_tools_used: join(tool_names, ", "),
iteration: iteration,
}
}
fn __judge_invoke_closure(verify_completion, payload) {
let result = verify_completion(payload)
if result == nil || result == "" {
return {vetoed: false}
}
if type_of(result) == "bool" {
return {vetoed: !result}
}
if type_of(result) == "string" {
return {vetoed: true, feedback: result}
}
if type_of(result) == "dict" {
let confirm = result?.confirm ?? false
let message = result?.message ?? result?.feedback
return {vetoed: !confirm, feedback: message}
}
return {vetoed: false}
}
fn __judge_invoke_structured(harness: Harness, judge_cfg, opts, payload) {
let system = judge_cfg?.system ?? ""
let user = completion_judge_user_prompt(payload)
let schema = {
type: "object",
properties: {
verdict: {type: "string", description: "Use `done` or `continue`."},
reasoning: {type: "string"},
next_step: {type: "string"},
},
required: ["verdict"],
}
let base = opts?.llm_options ?? {}
let llm_opts = __judge_apply_llm_overrides(
base
+ {
model: judge_cfg?.model ?? opts?.model,
provider: judge_cfg?.provider ?? opts?.provider,
output_schema: schema,
session_id: payload.session_id,
system: system,
},
judge_cfg,
)
let started = harness.clock.monotonic_ms()
let checkpoint = agent_typed_output_checkpoint("agent.completion_judge", user, schema, llm_opts)
let duration_ms = harness.clock.monotonic_ms() - started
if !checkpoint.ok {
if judge_cfg?.fail_open_on_error ?? judge_cfg?.fail_open ?? false {
return {
vetoed: false,
verdict: "done",
reasoning: checkpoint.error,
next_step: "",
judge_duration_ms: duration_ms,
typed_checkpoint: checkpoint,
}
}
return {
vetoed: true,
feedback: checkpoint.error,
verdict: "continue",
reasoning: checkpoint.error,
next_step: checkpoint.error,
judge_duration_ms: duration_ms,
typed_checkpoint: checkpoint,
}
}
let result = checkpoint.data
let reasoning = result?.reasoning ?? ""
let next_step = result?.next_step ?? ""
let done_verdicts = ["done", "pass", "passed", "safe", "true", "yes", "yield", "yield_to_user", "complete", "completed"]
let feedback_default = judge_cfg?.feedback_fallback ?? ""
let outcome = __judge_classify_verdict(
result?.verdict ?? "continue",
done_verdicts,
[next_step, reasoning],
feedback_default,
)
return outcome
+ {
reasoning: reasoning,
next_step: next_step,
judge_duration_ms: duration_ms,
typed_checkpoint: checkpoint,
}
}
/**
* Resolve the per-session veto cap for the completion judge. Returns nil when
* the cap is disabled (`max_invocations`/`max_feedback` set to 0), otherwise a
* positive integer ceiling on judge invocations.
*/
fn __verify_completion_judge_cap(judge_cfg) {
let configured = if type_of(judge_cfg) == "dict" {
judge_cfg?.max_invocations ?? judge_cfg?.max_feedback
} else {
nil
}
let cap = configured ?? __VERIFY_COMPLETION_JUDGE_DEFAULT_CAP
if cap <= 0 {
return nil
}
return cap
}
/**
* agent_verify_completion_judge_cap.
*
* Resolved completion-judge veto cap for a `verify_completion_judge` config,
* for surfacing in run records. Returns nil when the cap is disabled.
*
* @effects: []
* @allocation: stack
* @errors: []
* @api_stability: experimental
* @example: agent_verify_completion_judge_cap(opts?.verify_completion_judge)
*/
pub fn agent_verify_completion_judge_cap(judge_cfg) {
return __verify_completion_judge_cap(judge_cfg)
}
fn __emit_judge_decision(session_id, iteration, verdict) {
agent_emit_event(
session_id,
"judge_decision",
{
iteration: iteration,
verdict: verdict?.verdict
?? if verdict.vetoed {
"continue"
} else {
"done"
},
reasoning: verdict?.reasoning ?? "",
next_step: verdict?.next_step ?? "",
judge_duration_ms: verdict?.judge_duration_ms ?? 0,
trigger: verdict?.trigger ?? nil,
},
)
}
/**
* agent_verify_or_continue.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: experimental
* @example: agent_verify_or_continue(session, opts, stop_reason, text, iteration)
*/
pub fn agent_verify_or_continue(session, opts, stop_reason, text, iteration = 0) {
let payload = __judge_payload(session, opts, stop_reason, text, iteration)
var verdict = {vetoed: false}
if opts?.verify_completion != nil {
verdict = __judge_invoke_closure(opts.verify_completion, payload)
}
var completion_judge_invoked = false
var completion_judge_cap_reached = false
let prior_judge_invocations = opts?._verify_completion_judge_invocations ?? 0
if !verdict.vetoed && opts?.verify_completion_judge != nil {
let cap = __verify_completion_judge_cap(opts.verify_completion_judge)
if cap != nil && prior_judge_invocations >= cap {
// Cap reached: stop firing the judge (and injecting its feedback). The
// loop surfaces this as stop_reason `completion_judge_cap_reached` plus a
// structured `completion_judge` block in the run record. The prior
// `judge_decision` events already record each veto for loop counting, so
// no extra event type is needed here.
completion_judge_cap_reached = true
} else {
verdict = __judge_invoke_structured(harness, opts.verify_completion_judge, opts, payload)
completion_judge_invoked = true
__emit_judge_decision(session.session_id, iteration, verdict)
}
}
let done_judge_due = opts?._done_judge_due ?? true
let done_judge_applies = opts?.done_judge != nil
&& (stop_reason == "sentinel" || stop_reason == "natural" || stop_reason == "stalled")
&& done_judge_due
if !verdict.vetoed && done_judge_applies {
verdict = __judge_invoke_structured(harness, opts.done_judge, opts, payload)
verdict = verdict + {done_judge_invoked: true, trigger: opts?._done_judge_trigger ?? nil}
__emit_judge_decision(session.session_id, iteration, verdict)
}
if verdict.vetoed && verdict?.feedback != nil && verdict.feedback != "" {
agent_session_inject_feedback(session.session_id, "verify_completion", verdict.feedback)
}
return verdict
+ {
verify_completion_judge_invoked: completion_judge_invoked,
verify_completion_judge_cap_reached: completion_judge_cap_reached,
}
}