localharness 0.46.0

Agents that own themselves: one Rust crate that's both an agent SDK (streaming, tools, hooks, policies, triggers, MCP) and a wallet-owning, self-sovereign agent that runs in the browser.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
//! Pure turn-outcome classification for the in-tab agent's continuous-execution
//! loop — native-testable, no DOM, no state, no async.
//!
//! These were inlined in `app::chat` (a 4k-line file), which only compiles
//! under `browser-app` + wasm32 — so their guard tests (including the
//! loop-termination invariant) were dead under a native `cargo test`. Hoisting
//! them to the crate root — alongside `encoding`, `raster`, and `compose` —
//! makes them real, native-run unit tests. Same pattern as `src/encoding.rs`;
//! behavior is unchanged.

/// Upper bound on automatic "continue toward the goal" turns per single
/// user message. A safety cap so a confused model can't loop forever
/// (and to bound credit spend). The user can always send again to extend.
pub const MAX_AUTO_CONTINUATIONS: u32 = 10;

/// How a single streamed turn ended — drives the continuous-execution loop.
#[derive(Debug, PartialEq, Eq)]
pub enum TurnOutcome {
    /// The model called `finish` — task explicitly complete. Stop.
    Finished,
    /// The turn ended on a final text answer with no tool activity this
    /// turn (plain conversation / a closing reply / a question). Stop —
    /// don't spam empty auto-continues on a chat reply.
    FinalAnswer,
    /// The turn performed tool actions and ended WITHOUT a completion
    /// signal — the model likely stopped mid-goal. Auto-continue.
    Incomplete,
    /// Nothing visible was produced, but the turn was TRUNCATED mid-answer
    /// (max-tokens / all reasoning, no final text). The model isn't done —
    /// retry with a "finish concisely" nudge, bounded like `Incomplete`.
    EmptyTruncated,
    /// Nothing visible was produced for a terminal reason (genuinely blank,
    /// safety-blocked, or a credits problem). Stop.
    Empty,
    /// The turn errored (already surfaced in the transcript). Stop.
    Error,
    /// The user hit stop mid-turn. Stop.
    Cancelled,
}

/// Why a turn produced no visible output — drives the message shown and
/// whether the turn is retryable.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EmptyKind {
    /// The model hit its output-token limit mid-answer (finish-reason
    /// MAX_TOKENS, or it streamed only reasoning and never reached a final
    /// text part). RETRYABLE — continue toward an answer.
    Truncated,
    /// A safety / content filter blocked the response. Terminal.
    Blocked,
    /// Genuinely nothing — no reasoning, no finish-reason note. Most often a
    /// credits/session problem (the proxy returned an empty body) or a stray
    /// blank from the model. Terminal.
    Blank,
}

/// Classify a no-visible-output turn from the model's terminal finish-reason
/// note (`ChatResponse::finish_note`) plus whether ANY reasoning streamed.
/// Pure → unit-testable without a browser.
///
/// - A MAX_TOKENS finish-reason → `Truncated` (ran out of room mid-answer).
/// - A safety/blocklist/prohibited finish-reason → `Blocked`.
/// - No note but the model DID reason (`saw_thinking`) → `Truncated`: it spent
///   the whole window thinking and never emitted a final text part (the exact
///   "(empty response) on hard tasks" bug; some paths drop the finish-reason).
/// - Otherwise → `Blank` (likely credits/session, or a stray empty reply).
pub fn classify_empty(finish_note: Option<&str>, saw_thinking: bool) -> EmptyKind {
    let note = finish_note.unwrap_or("").to_lowercase();
    if note.contains("max token") {
        EmptyKind::Truncated
    } else if note.contains("safety")
        || note.contains("blocklist")
        || note.contains("prohibited")
        || note.contains("refusal")
    {
        EmptyKind::Blocked
    } else if saw_thinking {
        // Reasoned but produced no answer: budget-starved mid-thought.
        EmptyKind::Truncated
    } else {
        EmptyKind::Blank
    }
}

/// User-facing message for a terminal empty turn (Truncated is retried, so it
/// has no message — see `stream_turn`). Names the likely cause + the remedy,
/// per the on-chain feedback asking for "more informative error logging".
pub fn empty_message(kind: EmptyKind) -> &'static str {
    match kind {
        // Shown only if a Truncated turn somehow reaches the cap without an
        // answer; the remedy is to decompose the task.
        EmptyKind::Truncated => {
            "(the request was too large to finish in one step — try breaking it \
             into smaller asks.)"
        }
        EmptyKind::Blocked => {
            "(the model stopped this response under its safety filter. Try \
             rephrasing the request.)"
        }
        EmptyKind::Blank => {
            "(empty response — the model returned no text. If you're on platform \
             credits, check your session/balance in the account tab.)"
        }
    }
}

/// Decide how a completed (non-cancelled) turn ended, for the
/// continuous-execution loop. Pure over the signals tracked while
/// streaming so it can be unit-tested without a browser:
/// - `saw_finish`: the model called `finish` → task complete, stop.
/// - `saw_question`: the model called `ask_question` → it's blocking on the
///   user, stop and wait (do NOT auto-continue — that would spam the model
///   and never let the user answer).
/// - `saw_tool_call`: a goal-step tool ran (NOT finish / ask_question).
/// - `any_visible`: anything (text or a tool block) was rendered.
/// - `retryable_empty`: the turn was empty BECAUSE it was truncated mid-answer
///   (max tokens / all-thinking) → `EmptyTruncated`, which the loop retries.
///
/// Precedence: `finish` wins over everything (the model can call other tools
/// then `finish` in the same turn — that's still "done"). A blocking question
/// stops next. Then empty turns — a TRUNCATED empty retries (`EmptyTruncated`),
/// any other empty stops (`Empty`). Then a goal-step-only turn auto-continues
/// (`Incomplete`). A pure text reply with no tool activity is a `FinalAnswer`.
pub fn classify_turn(
    saw_finish: bool,
    saw_question: bool,
    saw_tool_call: bool,
    any_visible: bool,
    retryable_empty: bool,
) -> TurnOutcome {
    if saw_finish {
        TurnOutcome::Finished
    } else if saw_question {
        // Blocking on the user — a conversational stop, like FinalAnswer.
        // Never auto-continue (the default ask_question returns "skipped",
        // so a continue would loop the model 10x without a real answer).
        TurnOutcome::FinalAnswer
    } else if !any_visible {
        // No output. If it was truncated mid-answer (model ran out of budget
        // while reasoning), retry toward an answer; otherwise it's a real
        // dead-end (blank / safety / credits).
        if retryable_empty {
            TurnOutcome::EmptyTruncated
        } else {
            TurnOutcome::Empty
        }
    } else if saw_tool_call {
        // Ended right after tool activity with no explicit completion —
        // the model probably has more to do. Auto-continue.
        TurnOutcome::Incomplete
    } else {
        // Pure text reply, no tool calls — a conversational answer or a
        // question. Don't auto-continue (would spam empty turns).
        TurnOutcome::FinalAnswer
    }
}

#[cfg(test)]
mod tests {
    use super::{classify_empty, classify_turn, EmptyKind, TurnOutcome, MAX_AUTO_CONTINUATIONS};

    // --- Turn classification (the continuous-execution loop's decision) -----
    //
    // `classify_turn(saw_finish, saw_question, saw_tool_call, any_visible,
    // retryable_empty)`. `retryable_empty` only matters when `any_visible` is
    // false; it's `false` for every visible-output case below.

    #[test]
    fn finish_wins_over_everything() {
        // finish + a goal-step tool in the same turn is still "done".
        assert_eq!(
            classify_turn(true, false, true, true, false),
            TurnOutcome::Finished
        );
        // finish alone.
        assert_eq!(
            classify_turn(true, false, false, true, false),
            TurnOutcome::Finished
        );
        // finish even alongside a question.
        assert_eq!(
            classify_turn(true, true, true, true, false),
            TurnOutcome::Finished
        );
    }

    #[test]
    fn ask_question_stops_the_loop_not_incomplete() {
        // REGRESSION: a blocking ask_question used to be read as a goal step
        // (saw_tool_call) → Incomplete → auto-continue, spamming the model and
        // never letting the user answer. It must stop like a FinalAnswer.
        assert_eq!(
            classify_turn(false, true, false, true, false),
            TurnOutcome::FinalAnswer
        );
        // A question accompanied by some other goal-step tool still stops:
        // the question is the blocking signal.
        assert_eq!(
            classify_turn(false, true, true, true, false),
            TurnOutcome::FinalAnswer
        );
    }

    #[test]
    fn goal_step_tool_only_auto_continues() {
        assert_eq!(
            classify_turn(false, false, true, true, false),
            TurnOutcome::Incomplete
        );
    }

    /// REGRESSION (on-chain feedback #100/#101): backends INTERCEPT `finish`
    /// and never emit it as a ToolCall chunk, so the in-tab loop reads it off
    /// `ChatResponse::finished()` and feeds it as `saw_finish`. These pin the
    /// loop-control invariant the app relies on now that the signal flows:
    /// a finished turn ALWAYS stops (no redundant auto-continue sign-off,
    /// #100), and a finish-with-no-text turn is `Finished` — NOT `Empty` —
    /// so it never paints an "(empty response)" bubble (#101).
    #[test]
    fn finish_stops_the_loop_in_every_shape() {
        // finish + text → stop (a normal closing turn).
        assert_eq!(
            classify_turn(true, false, false, true, false),
            TurnOutcome::Finished
        );
        // finish after a goal-step tool, no closing text → stop, NOT
        // Incomplete (the #100 root cause: an undetected finish here used to
        // auto-continue with the "continue toward the goal" nudge).
        assert_eq!(
            classify_turn(true, false, true, true, false),
            TurnOutcome::Finished
        );
        // BARE finish — no text, no other tool, nothing visible → Finished,
        // NOT Empty (the #101 root cause: a missed finish dead-ended as a
        // "(empty response)" bubble).
        assert_eq!(
            classify_turn(true, false, false, false, false),
            TurnOutcome::Finished
        );
    }

    /// The contrast case to `finish_stops_the_loop_in_every_shape`: a turn that
    /// ran a goal-step tool but did NOT call finish keeps going. This is the
    /// behavior that must be PRESERVED — the finish fix can't kill legitimate
    /// multi-step auto-continuation.
    #[test]
    fn pure_tool_without_finish_continues() {
        assert_eq!(
            classify_turn(false, false, true, true, false),
            TurnOutcome::Incomplete
        );
    }

    #[test]
    fn pure_text_reply_is_final_answer() {
        assert_eq!(
            classify_turn(false, false, false, true, false),
            TurnOutcome::FinalAnswer
        );
    }

    #[test]
    fn nothing_visible_is_empty() {
        assert_eq!(
            classify_turn(false, false, false, false, false),
            TurnOutcome::Empty
        );
        // No-visible takes precedence over a stray tool flag (can't have run a
        // tool with nothing rendered, but the ordering must be deterministic).
        assert_eq!(
            classify_turn(false, false, true, false, false),
            TurnOutcome::Empty
        );
    }

    /// A TRUNCATED empty turn (model ran out of output budget mid-answer) is
    /// `EmptyTruncated` — RETRYABLE — not a flat `Empty` dead-end. This is the
    /// core fix for "(empty response) on hard tasks".
    #[test]
    fn truncated_empty_is_retryable_not_dead_end() {
        assert_eq!(
            classify_turn(false, false, false, false, true),
            TurnOutcome::EmptyTruncated
        );
        // finish/question still win over a truncated-empty flag (defensive —
        // can't really co-occur, but precedence must be deterministic).
        assert_eq!(
            classify_turn(true, false, false, false, true),
            TurnOutcome::Finished
        );
    }

    // --- Empty-turn cause classification (drives message + retry) -----------

    #[test]
    fn classify_empty_max_tokens_note_is_truncated() {
        // The finish-reason note from either backend ("stopped at max tokens").
        assert_eq!(
            classify_empty(Some("stopped at max tokens"), false),
            EmptyKind::Truncated
        );
    }

    #[test]
    fn classify_empty_all_thinking_no_note_is_truncated() {
        // The exact bug: the model reasoned the whole budget away and emitted no
        // final text, and the finish-reason wasn't surfaced — `saw_thinking`
        // alone classifies it as truncated (so it's retried).
        assert_eq!(classify_empty(None, true), EmptyKind::Truncated);
    }

    #[test]
    fn classify_empty_safety_note_is_blocked() {
        assert_eq!(
            classify_empty(Some("stopped by safety policy"), true),
            EmptyKind::Blocked
        );
        assert_eq!(
            classify_empty(Some("stopped by blocklist"), false),
            EmptyKind::Blocked
        );
        assert_eq!(
            classify_empty(Some("stopped by refusal"), false),
            EmptyKind::Blocked
        );
    }

    #[test]
    fn classify_empty_nothing_at_all_is_blank() {
        // No note, no thinking → a genuine blank (likely credits/session).
        assert_eq!(classify_empty(None, false), EmptyKind::Blank);
        assert_eq!(classify_empty(Some(""), false), EmptyKind::Blank);
    }

    /// The outcomes that auto-continue are `Incomplete` and `EmptyTruncated`.
    /// This guards the loop-termination invariant: every OTHER classification
    /// breaks the continuous-execution loop, so the loop can only spin via these
    /// two — both hard-bounded by the SAME `MAX_AUTO_CONTINUATIONS` counter.
    #[test]
    fn only_incomplete_or_truncated_continues() {
        let continues =
            |o: TurnOutcome| o == TurnOutcome::Incomplete || o == TurnOutcome::EmptyTruncated;
        assert!(!continues(classify_turn(true, false, false, true, false))); // Finished
        assert!(!continues(classify_turn(false, true, false, true, false))); // FinalAnswer (question)
        assert!(!continues(classify_turn(false, false, false, true, false))); // FinalAnswer (text)
        assert!(!continues(classify_turn(false, false, false, false, false))); // Empty (blank)
        assert!(continues(classify_turn(false, false, true, true, false))); // Incomplete
        assert!(continues(classify_turn(false, false, false, false, true))); // EmptyTruncated
    }

    /// Mirrors the loop's increment/break: an always-`Incomplete` turn can fire
    /// at most `MAX_AUTO_CONTINUATIONS` auto-continuations, then the cap stops
    /// it. Proves no infinite loop when a confused model never finishes.
    #[test]
    fn auto_continuation_is_bounded() {
        let mut auto: u32 = 0;
        let mut iterations = 0u32;
        loop {
            iterations += 1;
            // Always Incomplete (the worst case for the loop).
            if matches!(
                classify_turn(false, false, true, true, false),
                TurnOutcome::Incomplete
            ) {
                if auto >= MAX_AUTO_CONTINUATIONS {
                    break;
                }
                auto += 1;
            } else {
                break;
            }
            // Safety net for the test itself.
            assert!(iterations < MAX_AUTO_CONTINUATIONS + 5, "loop did not terminate");
        }
        // First turn + MAX_AUTO_CONTINUATIONS continuations, then the cap break.
        assert_eq!(auto, MAX_AUTO_CONTINUATIONS);
        assert_eq!(iterations, MAX_AUTO_CONTINUATIONS + 1);
    }

    /// A repeatedly-truncated turn (`EmptyTruncated` every time — the model
    /// keeps running out of budget) must ALSO terminate, bounded by the same
    /// `MAX_AUTO_CONTINUATIONS` cap. Proves the retry path can't loop forever.
    #[test]
    fn truncated_retry_is_bounded() {
        let mut auto: u32 = 0;
        let mut iterations = 0u32;
        loop {
            iterations += 1;
            if matches!(
                classify_turn(false, false, false, false, true),
                TurnOutcome::EmptyTruncated
            ) {
                if auto >= MAX_AUTO_CONTINUATIONS {
                    break;
                }
                auto += 1;
            } else {
                break;
            }
            assert!(iterations < MAX_AUTO_CONTINUATIONS + 5, "truncated retry did not terminate");
        }
        assert_eq!(auto, MAX_AUTO_CONTINUATIONS);
        assert_eq!(iterations, MAX_AUTO_CONTINUATIONS + 1);
    }
}