localharness 0.54.0

Agents that own themselves: one Rust crate that's both an agent SDK (streaming, tools, hooks, policies, triggers, MCP) and a wallet-owning, self-sovereign agent that runs in the browser.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
//! Deterministic, offline mock backend for testing agents.
//!
//! [`MockConnection`](crate::backends::mock::MockConnection) is a scripted
//! [`ConnectionStrategy`](crate::ConnectionStrategy)
//! / [`Connection`](crate::Connection) that replays a fixed sequence of model
//! turns with **no network, no API key, and no LLM** — so SDK consumers (and
//! the crate's own tests) can unit-test an [`Agent`](crate::Agent)'s behavior
//! (the tool loop, hooks, policies, triggers) deterministically and offline.
//!
//! It is a faithful drop-in for a real backend: each scripted turn emits the
//! exact same [`Step`](crate::Step) shapes the live Gemini/Anthropic loops do —
//! streamed text-delta steps ([`StepStatus::Active`](crate::StepStatus),
//! `content_delta`), tool-call steps ([`StepType::ToolCall`](crate::StepType))
//! with inline tool dispatch through the injected [`ToolRunner`](crate::ToolRunner)
//! (running the same hooks + policies), and a turn-terminal step
//! ([`StepStatus::Done`](crate::StepStatus), `is_complete_response: true`). Each
//! `agent.chat(...)` / `Connection::send` consumes the next scripted turn.
//!
//! Always available (no feature flag): the mock pulls no dependencies the core
//! crate doesn't already use, and compiles on `wasm32` exactly like the live
//! backends — so both the crate's own tests and consumers' dev-deps benefit.
//! Use it from your crate's tests via `localharness::backends::mock`.
//!
//! # Example
//!
//! Script a tool-call flow and assert it runs offline against a real
//! [`Agent`](crate::Agent):
//!
//! ```rust,no_run
//! use localharness::{Agent, ClosureTool, policy};
//! use localharness::backends::mock::{MockAgentConfig, MockConnection};
//! use serde_json::json;
//!
//! # async fn run() -> localharness::Result<()> {
//! // A custom tool the script will call.
//! let greet = ClosureTool::new(
//!     "greet",
//!     "Greet someone",
//!     json!({"type": "object", "properties": {"name": {"type": "string"}}}),
//!     |args, _ctx| async move {
//!         let name = args["name"].as_str().unwrap_or("world");
//!         Ok(json!({"text": format!("hello {name}")}))
//!     },
//! );
//!
//! // Script ONE turn: call `greet`, then reply with text.
//! let backend = MockConnection::builder()
//!     .turn(|t| t.tool_call("greet", json!({"name": "ada"})).text("done"))
//!     .build();
//!
//! let agent = Agent::start_mock(
//!     MockAgentConfig::new(backend)
//!         .with_tool(greet)
//!         .with_policies(vec![policy::allow_all()]),
//! )
//! .await?;
//!
//! let reply = agent.chat("hi").await?.text().await?;
//! assert_eq!(reply, "done"); // the tool ran inline; the scripted text replied
//! agent.shutdown().await?;
//! # Ok(())
//! # }
//! ```

use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};

use async_trait::async_trait;
use tokio::sync::{broadcast, Notify};

// Re-exported here so consumers can `use localharness::backends::mock::{
// MockAgentConfig, MockConnection}` in one line. The config itself lives in
// `agent.rs` next to the other per-backend agent configs.
pub use crate::agent::MockAgentConfig;

use crate::connections::{Connection, ConnectionStrategy, StepStream};
use crate::content::Content;
use crate::error::Result;
use crate::types::{Step, StepStatus, ToolCall, ToolResult, UsageMetadata};

const STEP_BROADCAST_CAPACITY: usize = 256;

// =============================================================================
// Script: a scripted turn + the actions it replays
// =============================================================================

/// One scripted action the mock replays within a turn, in order.
#[derive(Debug, Clone)]
enum ScriptAction {
    /// Stream a conversational text delta (a `content_delta` step). The
    /// terminal step's `content` is the concatenation of every `Text`.
    Text(String),
    /// Request a tool call. When a [`ToolRunner`](crate::ToolRunner) is injected (the Agent path)
    /// the mock dispatches it inline through hooks + policies, exactly like the
    /// live backends; otherwise it only surfaces the call on the step stream.
    ToolCall {
        name: String,
        args: serde_json::Value,
    },
}

/// A single scripted model turn: an ordered list of text deltas and/or
/// tool-calls, optionally reporting token usage on its terminal step.
///
/// Build one with the closure passed to [`MockConnectionBuilder::turn`], or
/// construct directly and pass a `Vec<ScriptedTurn>` to
/// [`MockConnectionBuilder::turns`].
#[derive(Debug, Clone, Default)]
pub struct ScriptedTurn {
    actions: Vec<ScriptAction>,
    usage: Option<UsageMetadata>,
}

impl ScriptedTurn {
    /// An empty turn. Add actions with [`Self::text`] / [`Self::tool_call`].
    pub fn new() -> Self {
        Self::default()
    }

    /// Append a streamed text delta. Multiple `text` calls concatenate into
    /// the turn-terminal step's `content` (mirrors a streaming model emitting
    /// deltas that sum to the final message).
    pub fn text(mut self, text: impl Into<String>) -> Self {
        self.actions.push(ScriptAction::Text(text.into()));
        self
    }

    /// Append a tool call the model "requests" at this point in the turn.
    /// `args` is the JSON the tool receives. With a [`ToolRunner`](crate::ToolRunner) injected,
    /// the mock executes it inline through the agent's hooks + policies.
    pub fn tool_call(mut self, name: impl Into<String>, args: serde_json::Value) -> Self {
        self.actions.push(ScriptAction::ToolCall {
            name: name.into(),
            args,
        });
        self
    }

    /// Report token usage on this turn's terminal step (the only step that
    /// carries usage — matching the live backends, so `cumulative_usage`
    /// counts each turn exactly once).
    pub fn with_usage(mut self, usage: UsageMetadata) -> Self {
        self.usage = Some(usage);
        self
    }

    /// The concatenated text content of the turn (the terminal step's body).
    fn content(&self) -> String {
        let mut out = String::new();
        for a in &self.actions {
            if let ScriptAction::Text(t) = a {
                out.push_str(t);
            }
        }
        out
    }
}

// =============================================================================
// Builder
// =============================================================================

/// Fluent builder for a scripted [`MockConnectionStrategy`].
///
/// See the [module docs](self) for an end-to-end example.
#[derive(Default)]
pub struct MockConnectionBuilder {
    turns: Vec<ScriptedTurn>,
    conversation_id: Option<String>,
}

impl MockConnectionBuilder {
    /// Start an empty builder.
    pub fn new() -> Self {
        Self::default()
    }

    /// Append a scripted turn built by the closure. Each call adds one turn;
    /// the Nth `agent.chat(...)` replays the Nth turn.
    ///
    /// ```rust
    /// use localharness::backends::mock::MockConnection;
    /// use serde_json::json;
    ///
    /// let backend = MockConnection::builder()
    ///     .turn(|t| t.text("first answer"))
    ///     .turn(|t| t.tool_call("search", json!({"q": "rust"})).text("found it"))
    ///     .build();
    /// # let _ = backend;
    /// ```
    pub fn turn(mut self, f: impl FnOnce(ScriptedTurn) -> ScriptedTurn) -> Self {
        self.turns.push(f(ScriptedTurn::new()));
        self
    }

    /// Append a pre-built scripted turn (use when you already hold a
    /// [`ScriptedTurn`], e.g. one built in a loop).
    pub fn push_turn(mut self, turn: ScriptedTurn) -> Self {
        self.turns.push(turn);
        self
    }

    /// Replace the whole script with an ordered list of turns.
    pub fn turns(mut self, turns: Vec<ScriptedTurn>) -> Self {
        self.turns = turns;
        self
    }

    /// Set a fixed conversation id (default: `"mock-conversation"`).
    pub fn conversation_id(mut self, id: impl Into<String>) -> Self {
        self.conversation_id = Some(id.into());
        self
    }

    /// Finish building the strategy.
    pub fn build(self) -> MockConnectionStrategy {
        MockConnectionStrategy {
            turns: Arc::new(self.turns),
            conversation_id: self
                .conversation_id
                .unwrap_or_else(|| "mock-conversation".to_string()),
            runners: MockRunners::default(),
        }
    }
}

// =============================================================================
// Runners (injected by Agent::start_mock)
// =============================================================================

/// Runners the Agent injects so the mock can dispatch tool calls inline
/// through the same hooks + policies + [`ToolRunner`](crate::ToolRunner) the live backends use —
/// an alias of the shared [`BackendRunners`](crate::backends::BackendRunners).
pub type MockRunners = crate::backends::BackendRunners;

// =============================================================================
// Strategy
// =============================================================================

/// Factory that opens a scripted [`MockConnection`]. Build one via
/// [`MockConnection::builder`].
pub struct MockConnectionStrategy {
    turns: Arc<Vec<ScriptedTurn>>,
    conversation_id: String,
    runners: MockRunners,
}

impl MockConnectionStrategy {
    /// Inject the runners the Agent owns so scripted tool calls dispatch
    /// inline through hooks + policies + the tool runner. Parallels
    /// `GeminiConnectionStrategy::with_runners`.
    pub fn with_runners(mut self, runners: MockRunners) -> Self {
        self.runners = runners;
        self
    }
}

#[cfg_attr(not(target_arch = "wasm32"), async_trait)]
#[cfg_attr(target_arch = "wasm32", async_trait(?Send))]
impl ConnectionStrategy for MockConnectionStrategy {
    async fn connect(&self) -> Result<Arc<dyn Connection>> {
        let (steps_tx, _) = broadcast::channel::<Step>(STEP_BROADCAST_CAPACITY);
        let inner = Arc::new(MockInner {
            turns: self.turns.clone(),
            next_turn: AtomicUsize::new(0),
            step_index: AtomicUsize::new(0),
            steps: steps_tx,
            idle: AtomicBool::new(true),
            idle_notify: Notify::new(),
            conversation_id: self.conversation_id.clone().into(),
            runners: self.runners.clone(),
        });
        Ok(Arc::new(MockConnection { inner }))
    }
}

// =============================================================================
// Connection
// =============================================================================

/// A live, scripted session implementing [`Connection`]. Replays one
/// [`ScriptedTurn`] per [`Connection::send`] / `agent.chat(...)`.
///
/// Construct it via the [builder](MockConnection::builder); the
/// [`MockConnectionStrategy`] it produces is what [`Agent::start_mock`]
/// consumes.
///
/// [`Agent::start_mock`]: crate::Agent::start_mock
pub struct MockConnection {
    inner: Arc<MockInner>,
}

/// Shared, cheaply-cloneable turn-running state. Held behind an `Arc` so
/// [`Connection::send`] can hand a clone to the spawned turn task (the live
/// backends clone an `Arc`-backed `deps_template` the same way).
struct MockInner {
    turns: Arc<Vec<ScriptedTurn>>,
    next_turn: AtomicUsize,
    step_index: AtomicUsize,
    steps: broadcast::Sender<Step>,
    idle: AtomicBool,
    idle_notify: Notify,
    conversation_id: Arc<str>,
    runners: MockRunners,
}

impl MockConnection {
    /// Start building a scripted mock backend.
    pub fn builder() -> MockConnectionBuilder {
        MockConnectionBuilder::new()
    }
}

impl MockInner {
    fn alloc_step_index(&self) -> u32 {
        self.step_index.fetch_add(1, Ordering::Relaxed) as u32
    }

    fn emit(&self, step: Step) {
        let _ = self.steps.send(step);
    }

    /// Replay one scripted turn: gate the prompt through the pre-turn hooks,
    /// stream the turn's text deltas, dispatch its tool calls inline through
    /// hooks + policies + the tool runner (when injected), then emit the
    /// turn-terminal step and fire the post-turn hooks. Faithful to the live
    /// `run_turn` shape: streamed `Active` deltas + a single `Done` terminal
    /// carrying the full `content` (and, if scripted, the turn's usage).
    async fn run_turn(&self, prompt: Content) {
        // ONE turn context shared by the pre-turn gate, the per-call tool
        // hooks, and the post-turn hooks of this turn.
        let turn_ctx = self
            .runners
            .session_ctx
            .as_ref()
            .map(|s| s.child())
            .unwrap_or_default();

        // Pre-turn gate — like the live backends, a denied prompt never runs
        // the "model": the next scripted turn is NOT consumed, and the deny
        // surfaces as a turn_error Step (→ stream `Err` for `chat()`/`text()`).
        if let Some(denied) = crate::backends::dispatch::gate_pre_turn(
            self.runners.hook_runner.as_ref(),
            &turn_ctx,
            &prompt,
        )
        .await
        {
            self.emit(Step::turn_error(self.alloc_step_index(), denied));
            return;
        }

        // Consume the next scripted turn AFTER the gate. Past the end of the
        // script, every send is an empty terminal turn (a model with nothing
        // left to say) — so an over-sending test terminates cleanly.
        let idx = self.next_turn.fetch_add(1, Ordering::Relaxed);
        let turn = self.turns.get(idx).cloned().unwrap_or_default();

        self.idle.store(false, Ordering::Release);
        let traj = uuid::Uuid::new_v4().to_string();

        for action in &turn.actions {
            match action {
                ScriptAction::Text(delta) => {
                    self.emit(Step::text_delta(&traj, self.alloc_step_index(), delta));
                }
                ScriptAction::ToolCall { name, args } => {
                    let tool_call = ToolCall {
                        name: name.clone(),
                        args: args.clone(),
                        id: None,
                        canonical_path: None,
                    };
                    // Surface the call on the stream (UIs flip the tool block to
                    // "running"), exactly like the live loop's tool-call step.
                    // `Done` (not `Active`) is deliberate: the mock ALREADY
                    // dispatches this tool call inline (matching the live
                    // backends), so the step exists only for OBSERVABILITY —
                    // `ChatResponse::tool_calls()` reads `tool_calls` regardless
                    // of status. The Agent's `spawn_tool_dispatcher` SKIPS
                    // `Done` steps, so this step does NOT trigger a redundant
                    // second dispatch. (It is non-terminal —
                    // `is_complete_response: Some(false)` and
                    // `target: Environment` — so it never ends the
                    // `ChatResponse`.)
                    self.emit(Step::tool_call(
                        self.alloc_step_index(),
                        tool_call.clone(),
                        StepStatus::Done,
                    ));

                    // Dispatch inline through hooks + policies + the runner when
                    // the Agent injected them. This is what makes a scripted
                    // tool-call flow actually RUN the tool offline — like the
                    // live backends, the result feeds the conversation rather
                    // than being re-broadcast as its own step.
                    if self.runners.tool_runner.is_some() {
                        let _result = self.dispatch_tool(&turn_ctx, &tool_call).await;
                    }
                }
            }
        }

        let content = turn.content();
        // A scripted `finish` tool call flags the terminal step as
        // `StepType::Finish` (mirrors the live backends' `saw_finish`).
        let finished_turn = turn.actions.iter().any(|a| {
            matches!(a, ScriptAction::ToolCall { name, .. }
                if name == crate::builtins::FINISH_TOOL_NAME)
        });
        self.emit(Step::turn_complete(
            traj,
            self.alloc_step_index(),
            StepStatus::Done,
            content.as_str(),
            "",
            finished_turn,
            None,
            turn.usage,
        ));

        // Post-turn hooks observe the completed turn's final text — fired
        // after the terminal step, never on denied turns (the gate above
        // returned early). Same placement as the live backends.
        crate::backends::dispatch::dispatch_post_turn(
            self.runners.hook_runner.as_ref(),
            &turn_ctx,
            &content,
        )
        .await;

        self.idle.store(true, Ordering::Release);
        self.idle_notify.notify_waiters();
    }

    /// Run a scripted tool call through the injected hooks + policies + tool
    /// runner — the SAME shared pipeline the live backends use
    /// ([`crate::backends::dispatch::dispatch_tool_call`]) — and return the
    /// typed result. A denied call (policy / pre-tool-call hook) yields an
    /// error result without executing the tool; an `Ok` value carrying
    /// `{"error": ...}` is lifted into `ToolResult::error` exactly like the
    /// live backends. `turn_ctx` is the turn's shared hook context, so
    /// per-call operation contexts hang off the turn like the live loops.
    async fn dispatch_tool(
        &self,
        turn_ctx: &crate::hooks::TurnContext,
        call: &ToolCall,
    ) -> ToolResult {
        crate::backends::dispatch::dispatch_tool_call(
            self.runners.tool_runner.as_ref(),
            self.runners.hook_runner.as_ref(),
            turn_ctx,
            call,
        )
        .await
    }
}

#[cfg_attr(not(target_arch = "wasm32"), async_trait)]
#[cfg_attr(target_arch = "wasm32", async_trait(?Send))]
impl Connection for MockConnection {
    fn is_idle(&self) -> bool {
        self.inner.idle.load(Ordering::Acquire)
    }

    fn conversation_id(&self) -> &str {
        &self.inner.conversation_id
    }

    async fn send(&self, content: Content) -> Result<()> {
        // Spawn the turn so `send` returns once dispatched (the live backends
        // do the same), letting streaming consumers subscribe before steps
        // land. `run_turn` gates the prompt through the pre-turn hooks and
        // only consumes the next scripted turn when the gate allows.
        let inner = self.inner.clone();
        crate::runtime::spawn(async move {
            inner.run_turn(content).await;
        });
        Ok(())
    }

    async fn send_trigger(&self, content: String) -> Result<()> {
        self.send(Content::text(content)).await
    }

    async fn send_tool_results(&self, _results: Vec<ToolResult>) -> Result<()> {
        // The mock dispatches scripted tool calls inline (like the Gemini
        // backend), so out-of-band results are a no-op.
        Ok(())
    }

    fn subscribe_steps(&self) -> StepStream {
        // Turn-failure Steps surface as stream `Err` (uniform across
        // backends) — see `backends::subscribe_step_stream`.
        crate::backends::subscribe_step_stream(self.inner.steps.subscribe(), "mock")
    }

    async fn wait_for_idle(&self) -> Result<()> {
        loop {
            if self.is_idle() {
                return Ok(());
            }
            self.inner.idle_notify.notified().await;
        }
    }

    async fn shutdown(&self) -> Result<()> {
        self.inner.idle.store(true, Ordering::Release);
        self.inner.idle_notify.notify_waiters();
        Ok(())
    }
}

// =============================================================================
// Tests — these read like the example a consumer would write.
// =============================================================================

#[cfg(test)]
mod tests {
    use super::*;
    use crate::agent::Agent;
    use crate::policy;
    use crate::tools::ClosureTool;
    use parking_lot::Mutex;
    use serde_json::json;
    use std::sync::atomic::{AtomicBool, AtomicUsize};

    /// THE demonstrating test: a consumer scripts a tool-call flow and asserts
    /// it runs deterministically OFFLINE — no network, no key, no LLM.
    ///
    /// Script ONE turn: `{ tool_call(record_fact) -> text("logged") }`.
    /// Assert: the tool actually RAN (its side effect fired, with the scripted
    /// args) AND the agent's final text is the scripted reply. This is the
    /// whole point — agent logic (the tool loop) tested with a mock model.
    #[tokio::test]
    async fn scripted_tool_call_flow_runs_offline() {
        // A tool whose side effect we can observe: it COUNTS each invocation
        // (so a double-dispatch would be caught) and records the args it saw.
        let count = Arc::new(AtomicUsize::new(0));
        let recorded: Arc<Mutex<Option<String>>> = Arc::new(Mutex::new(None));
        let count_c = count.clone();
        let recorded_c = recorded.clone();
        let record_fact = ClosureTool::new(
            "record_fact",
            "Persist a fact",
            json!({"type": "object", "properties": {"fact": {"type": "string"}}}),
            move |args, _ctx| {
                let count_c = count_c.clone();
                let recorded_c = recorded_c.clone();
                async move {
                    count_c.fetch_add(1, Ordering::SeqCst);
                    let fact = args["fact"].as_str().unwrap_or_default().to_string();
                    *recorded_c.lock() = Some(fact);
                    Ok(json!({"ok": true}))
                }
            },
        );

        // Script the model's behavior: call the tool, then answer.
        let backend = MockConnection::builder()
            .turn(|t| {
                t.tool_call("record_fact", json!({"fact": "the sky is blue"}))
                    .text("logged")
            })
            .build();

        let agent = Agent::start_mock(
            MockAgentConfig::new(backend)
                .with_tool(record_fact)
                .with_policies(vec![policy::allow_all()]),
        )
        .await
        .expect("mock agent starts");

        let reply = agent
            .chat("remember a fact")
            .await
            .expect("chat starts")
            .text()
            .await
            .expect("turn completes");

        // 1. The scripted tool executed EXACTLY ONCE, with the scripted args.
        //    Exactly-once proves the mock dispatches inline (like the live
        //    backends) without the Agent's step dispatcher double-firing it.
        assert_eq!(
            count.load(Ordering::SeqCst),
            1,
            "the scripted tool must run exactly once",
        );
        assert_eq!(
            recorded.lock().as_deref(),
            Some("the sky is blue"),
            "the tool received the scripted args",
        );
        // 2. The agent's final text is the scripted reply.
        assert_eq!(reply, "logged", "the scripted terminal text is returned");

        agent.shutdown().await.expect("clean shutdown");
    }

    /// A policy that denies the scripted tool must BLOCK it: the tool's side
    /// effect never fires, but the turn still completes (the model's text
    /// still streams). Proves the mock drives the real hooks/policy pipeline.
    #[tokio::test]
    async fn denied_tool_call_does_not_execute() {
        let ran = Arc::new(AtomicBool::new(false));
        let ran_c = ran.clone();
        let tool = ClosureTool::new(
            "danger",
            "A blocked tool",
            json!({"type": "object"}),
            move |_args, _ctx| {
                let ran_c = ran_c.clone();
                async move {
                    ran_c.store(true, Ordering::SeqCst);
                    Ok(json!({"ok": true}))
                }
            },
        );

        let backend = MockConnection::builder()
            .turn(|t| t.tool_call("danger", json!({})).text("attempted"))
            .build();

        // deny_all → the pre-tool-call policy hook blocks every call.
        let agent = Agent::start_mock(
            MockAgentConfig::new(backend)
                .with_tool(tool)
                .with_policies(vec![policy::deny_all()]),
        )
        .await
        .expect("mock agent starts");

        let reply = agent.chat("go").await.unwrap().text().await.unwrap();

        assert!(
            !ran.load(Ordering::SeqCst),
            "a denied tool must NOT execute its body",
        );
        assert_eq!(reply, "attempted", "the turn still completes");
        agent.shutdown().await.unwrap();
    }

    /// The scripted tool call is also OBSERVABLE on the response stream — a
    /// consumer can assert which tool the (mock) model dispatched via the
    /// public `ChatResponse::tool_calls()` cursor, with the scripted args.
    #[tokio::test]
    async fn scripted_tool_call_is_visible_on_the_stream() {
        use futures_util::StreamExt;

        let tool = ClosureTool::new(
            "search",
            "Search",
            json!({"type": "object", "properties": {"q": {"type": "string"}}}),
            |_args, _ctx| async move { Ok(json!({"hits": 0})) },
        );
        let backend = MockConnection::builder()
            .turn(|t| t.tool_call("search", json!({"q": "rust"})).text("none found"))
            .build();
        let agent = Agent::start_mock(
            MockAgentConfig::new(backend)
                .with_tool(tool)
                .with_policies(vec![policy::allow_all()]),
        )
        .await
        .unwrap();

        let resp = agent.chat("find rust").await.unwrap();
        let mut calls = resp.tool_calls();
        let first = calls
            .next()
            .await
            .expect("a tool call is surfaced")
            .expect("ok");
        assert_eq!(first.name, "search");
        assert_eq!(first.args, json!({"q": "rust"}));
        agent.shutdown().await.unwrap();
    }

    /// Multi-turn determinism: the Nth `chat` replays the Nth scripted turn,
    /// and per-turn usage accumulates exactly like the live backends.
    #[tokio::test]
    async fn turns_replay_in_order_with_usage() {
        let backend = MockConnection::builder()
            .turn(|t| {
                t.text("first").with_usage(UsageMetadata {
                    total_token_count: Some(10),
                    ..Default::default()
                })
            })
            .turn(|t| {
                t.text("second").with_usage(UsageMetadata {
                    total_token_count: Some(20),
                    ..Default::default()
                })
            })
            .build();

        let agent = Agent::start_mock(MockAgentConfig::new(backend))
            .await
            .expect("mock agent starts");

        let r1 = agent.chat("a").await.unwrap().text().await.unwrap();
        assert_eq!(r1, "first");
        let r2 = agent.chat("b").await.unwrap().text().await.unwrap();
        assert_eq!(r2, "second");

        // Usage summed across both turns, counted once each.
        assert_eq!(
            agent.cumulative_usage().total_token_count,
            Some(30),
            "10 + 20, each turn counted once",
        );
        agent.shutdown().await.unwrap();
    }
}