1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
//! Sentinel for the post-success phantom-tool-call exemption.
//!
//! Incident: the phantom detector treated text-only iterations the
//! same regardless of whether the turn had already produced
//! successful tool calls. After a clean commit+push the model would
//! emit a one-line "Pushed." or a short summary, the detector's
//! `has_past_tense_action_claim` would fire ("pushed" / "committed"
//! / "fixed" are all past-tense action verbs), `phantom_retries_used`
//! would tick up, and 5+ retries later self-heal would even swap
//! providers — all on completed work. User screenshots showed 8+
//! "Phantom tool calls detected" alerts and provider-swap warnings
//! after a successful commit, ~293s and 4683 tokens wasted finalising
//! nothing.
//!
//! Fix: a turn-scoped counter `tool_calls_completed_this_turn` ticks
//! every successful tool execution. The phantom detection block is
//! gated on `phantom_eligible = !is_cli_provider &&
//! tool_calls_completed_this_turn == 0`, so once the turn has real
//! work behind it the wrap-up text is treated as a completion ack,
//! not phantom intent.
//!
//! These tests are source-level sentinels:
//! 1. The counter is declared at turn scope (not iteration scope),
//! initialised to zero, and incremented in both success paths
//! (direct execution + post-approval execution).
//! 2. The four phantom-detection gates inside `tool_uses.is_empty()`
//! all check `phantom_eligible`, not the bare `!is_cli_provider`
//! that produced the false positive.
//! 3. The BRAIN_PREAMBLE "FINISHING A TURN" directive lands in the
//! system prompt every turn, telling the model what shape the
//! final acknowledgement should take. Without this the model
//! still narrates after a successful tool call; the safe-guard
//! catches the false positive, but the model wastes tokens on
//! multi-paragraph wrap-ups before the loop ends.
const TOOL_LOOP_SRC: &str = include_str!("../brain/agent/service/tool_loop.rs");
const PROMPT_BUILDER_SRC: &str = include_str!("../brain/prompt_builder.rs");
#[test]
fn counter_declared_at_turn_scope_not_iteration_scope() {
// The counter must be next to `let mut iteration = 0;` (turn-scoped),
// NOT inside the iteration-body loop where `let mut tool_uses` lives
// (iteration-scoped, resets every loop). Iteration-scoped would
// defeat the whole point: every text-only iteration would see
// counter == 0 again and the detector would re-fire.
assert!(
TOOL_LOOP_SRC.contains("let mut tool_calls_completed_this_turn: usize = 0;"),
"tool_calls_completed_this_turn counter must be declared and zero-initialised"
);
// Position check: the counter must appear BEFORE `let mut iteration_text`
// (which is iteration-scoped). If it ends up after, the iteration
// body owns it and resets each loop.
let counter_pos = TOOL_LOOP_SRC
.find("let mut tool_calls_completed_this_turn")
.expect("counter must exist");
let iter_text_pos = TOOL_LOOP_SRC
.find("let mut iteration_text = String::new();")
.expect("iteration_text marker must exist");
assert!(
counter_pos < iter_text_pos,
"counter must be declared OUTSIDE the iteration body (before `let mut iteration_text`) \
or it resets every iteration and the exemption never works"
);
}
#[test]
fn counter_increments_on_both_success_paths() {
// Two distinct success branches exist (direct execution at
// ~line 4041 and post-approval execution at ~line 3830). Both
// must tick the counter or partial coverage lets the regression
// come back through whichever branch was missed.
let increments = TOOL_LOOP_SRC
.matches("tool_calls_completed_this_turn += 1;")
.count();
assert!(
increments >= 2,
"counter must be incremented in BOTH the direct and post-approval success branches; \
found {increments} increments — missing one re-opens the regression for that path"
);
}
#[test]
fn phantom_eligible_gate_replaces_naked_is_cli_provider_check() {
// The four phantom-detection gates inside `tool_uses.is_empty()`
// must use the `phantom_eligible` gate (which folds in the
// post-success exemption), not the bare `!is_cli_provider` that
// ignored success state. Search for the unguarded pattern
// inside a window around the phantom block.
let anchor = "// ── Phantom tool call detection";
let anchor_pos = TOOL_LOOP_SRC
.find(anchor)
.expect("phantom detection block marker must exist");
let block_end = TOOL_LOOP_SRC[anchor_pos..]
.find("// Cap hit and the fast-escalate block above couldn't")
.map(|p| anchor_pos + p + 500)
.unwrap_or(anchor_pos + 4000);
let window = &TOOL_LOOP_SRC[anchor_pos..block_end.min(TOOL_LOOP_SRC.len())];
// Whitespace-normalised match so rustfmt-driven line wrapping
// doesn't false-fail (e.g. fmt may split the let-binding across
// two lines — the chain shape is what matters).
let normalized: String = window
.split_ascii_whitespace()
.collect::<Vec<_>>()
.join(" ");
let expected_chain =
"let phantom_eligible = !is_cli_provider && tool_calls_completed_this_turn == 0;";
assert!(
normalized.contains(expected_chain),
"phantom_eligible gate must be defined at the top of the phantom block — \
expected substring: {expected_chain}"
);
// The four phantom-detection conditions must check phantom_eligible.
// Counting "phantom_eligible" usages inside the window gives us a
// lower bound (the let-binding + at least 3 condition uses).
let phantom_eligible_uses = window.matches("phantom_eligible").count();
assert!(
phantom_eligible_uses >= 4,
"phantom_eligible must appear in the gate binding + the three condition checks; \
found {phantom_eligible_uses} uses — a missing one leaves a phantom branch unguarded"
);
}
#[test]
fn brain_preamble_directs_clear_acknowledgement_not_silent_close() {
// Original directive wording ("ONE short acknowledgement line and
// stop", with bare-word examples like "Done.") was interpreted by
// the model as "produce no text at all" — it started emitting
// `finish_reason: stop` with empty delta on basically every
// side-effect turn. Looked like a silent crash to the user.
//
// The rewritten directive REQUIRES the acknowledgement and tells
// the model that empty completions are the worst possible outcome.
// Pin the key phrases that prevent the regression from sneaking
// back via a refactor.
assert!(
PROMPT_BUILDER_SRC.contains("FINISHING A TURN"),
"BRAIN_PREAMBLE must carry the FINISHING A TURN directive header"
);
assert!(
PROMPT_BUILDER_SRC.contains("never disappear silently"),
"directive header must forbid empty closes — the bug it was added to prevent"
);
assert!(
PROMPT_BUILDER_SRC.contains("never end with empty content"),
"directive must explicitly forbid `finish_reason: stop` with no text"
);
assert!(
PROMPT_BUILDER_SRC.contains("Empty completions"),
"directive must call out empty completions as a failure mode, not a valid close"
);
assert!(
PROMPT_BUILDER_SRC.contains("Do NOT run \"verification\" tool calls"),
"directive must forbid verification re-runs — the secondary loop pattern"
);
}
#[test]
fn comment_documents_the_post_success_exemption_rationale() {
// The exemption is non-obvious and a refactor could remove it
// without realising what it does. The inline comment must
// explain the rationale so the next person editing this block
// doesn't strip the gate "for cleanliness" and re-introduce
// the loop.
assert!(
TOOL_LOOP_SRC.contains("POST-SUCCESS EXEMPTION"),
"the gate must carry a labelled comment so its purpose survives refactors"
);
assert!(
TOOL_LOOP_SRC.contains("completion acknowledgement"),
"comment must name what the text-only iteration actually is"
);
}