aidaemon 0.9.34

A personal AI agent that runs as a background daemon, accessible via Telegram, Slack, or Discord, with tool use, MCP integration, and persistent memory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
// ---------------------------------------------------------------------------
// Default+fallback routing tests — iteration 1 always has tools available
// ---------------------------------------------------------------------------

/// With non-uniform models, iteration 1 runs with tools available (no separate
/// text-only pre-pass). The INTENT_GATE in the response is still parsed
/// and the execution loop produces the user-visible answer.
#[tokio::test]
async fn test_initial_routing_call_classifies_then_executor_answers_questions() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(
            "I can answer this from memory.\n[INTENT_GATE]\n\
             {\"complexity\": \"knowledge\", \"can_answer_now\": true, \"needs_tools\": false}",
        ),
        MockProvider::text_response(
            "Your website is deployed to Cloudflare Workers at your-site.workers.dev.",
        ),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    harness
        .state
        .upsert_fact(
            "project",
            "my website",
            "deployed to cloudflare workers at your-site.workers.dev",
            "user",
            None,
            crate::types::FactPrivacy::Global,
        )
        .await
        .unwrap();

    // Question (contains ?) -> intent gate classifies, executor answers.
    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Can you tell me the deployment URL for my website?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(
        response,
        "Your website is deployed to Cloudflare Workers at your-site.workers.dev."
    );

    assert_eq!(harness.provider.call_count().await, 2);
    let calls = harness.provider.call_log.lock().await;
    // With default+fallback routing, all LLM calls include tools
    assert!(
        !calls[0].tools.is_empty(),
        "First call should have tools (no separate tool-free text-only pre-pass)"
    );
}

#[tokio::test]
async fn test_critical_owner_name_query_is_deterministic() {
    let harness = setup_test_agent_with_models(MockProvider::new(), "primary-model", "smart-model")
        .await
        .unwrap();

    harness
        .state
        .upsert_fact(
            "user",
            "name",
            "Test Owner",
            "owner",
            None,
            crate::types::FactPrivacy::Global,
        )
        .await
        .unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "What's my name?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(response, "Your name is Test Owner.");
    assert_eq!(
        harness.provider.call_count().await,
        0,
        "Critical identity query should resolve without an LLM call"
    );
}

#[tokio::test]
async fn test_personal_recall_turn_routes_at_least_primary_model() {
    // With deterministic routing (no first-pass orchestration LLM call), the
    // execution loop
    // handles the request directly with a single LLM call.
    let provider = MockProvider::with_responses(vec![
        // Execution loop — direct answer (no text-only pre-pass response needed)
        MockProvider::text_response("I don't have pet information saved."),
    ]);
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "What about pets?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        response.contains("don't have pet information"),
        "Expected pet information response, got: {response}"
    );
    let calls = harness.provider.call_log.lock().await;
    assert!(
        calls.len() >= 1,
        "Execution loop should make at least one LLM call"
    );
    assert_eq!(
        calls[0].model, "primary-model",
        "Personal recall should not route to the cheapest profile/model"
    );
}

#[tokio::test]
async fn test_empty_answerable_routing_output_falls_through_to_tool_path() {
    // With deterministic orchestration (no first-pass LLM call), an empty/low-signal
    // first response should cause the agent to retry and eventually use tools.
    let provider = MockProvider::with_responses(vec![
        // 1) First LLM call produces an empty response
        MockProvider::text_response(""),
        // 2) Retry: the agent uses tools
        MockProvider::tool_call_response("system_info", "{}"),
        // 3) Final answer
        MockProvider::text_response("Recovered after memory/tool retry."),
    ]);

    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "What timezone am I in?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(response, "Recovered after memory/tool retry.");
    let calls = harness.provider.call_log.lock().await;
    assert!(calls.len() >= 2);
    assert!(
        !calls[1].tools.is_empty(),
        "Empty first response should trigger tool-enabled retry path"
    );
}

#[tokio::test]
async fn test_identity_tool_result_survives_context_collapse() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::tool_call_response(
            "remember_fact",
            r#"{"category":"user","key":"name","value":"David"}"#,
        ),
        MockProvider::text_response("Saved."),
        MockProvider::text_response("Continuing with your latest request."),
    ]);
    let harness = setup_test_agent(provider).await.unwrap();

    let _ = harness
        .agent
        .handle_message(
            "test_session",
            "Remember that my name is David",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    let _ = harness
        .agent
        .handle_message(
            "test_session",
            "What should we do next?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    let calls = harness.provider.call_log.lock().await;
    assert!(
        calls.len() >= 3,
        "Expected at least 3 model calls across both turns"
    );
    let second_turn_call = &calls[2];
    let has_identity_tool_context = second_turn_call.messages.iter().any(|m| {
        m.get("role").and_then(|r| r.as_str()) == Some("tool")
            && m.get("name").and_then(|n| n.as_str()) == Some("remember_fact")
            && m.get("content")
                .and_then(|c| c.as_str())
                .is_some_and(|c| c.to_ascii_lowercase().contains("name = david"))
    });
    assert!(
        has_identity_tool_context,
        "Critical identity tool result should be preserved across context collapsing"
    );
}

/// For action requests (non-questions), the first LLM call has tools available
/// and the execution model handles tool use.
#[tokio::test]
async fn test_initial_routing_call_continues_for_actions() {
    // Three responses:
    //   1. First call (with tools) → text analysis of the action
    //   2. Agent loop → tool call (system_info)
    //   3. Agent loop → final text with tool result
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response("I'll check the system information for you."),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::text_response("Your system is running macOS."),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    // Action request (imperative, no ?) → full agent loop with tools
    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Show me the current system information and environment details",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(response, "Your system is running macOS.");

    let call_count = harness.provider.call_count().await;
    assert!(
        call_count >= 2,
        "Expected at least 2 LLM calls (intent classification + tool execution)"
    );

    let calls = harness.provider.call_log.lock().await;
    // With default+fallback routing, all LLM calls include tools
    assert!(
        !calls[0].tools.is_empty(),
        "First call should have tools (no separate tool-free text-only pre-pass)"
    );
}

/// Regression: if the execution model replies with deferred-action narration
/// ("I'll do X", "starting workflow") but no tool calls, the agent must keep
/// iterating instead of returning that narration as final output.
#[tokio::test]
async fn test_deferred_action_no_tool_calls_does_not_complete_task() {
    let provider = MockProvider::with_responses(vec![
        // 1) First call (with tools) → INTENT_GATE classification
        MockProvider::text_response(
            "I'll check and send it over.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"complexity\":\"simple\"}",
        ),
        // 2) Execution loop (bad): deferred action text, no tool calls
        MockProvider::text_response(
            "I'll find your resume and send it over right away.\nStarting the send-resume workflow...",
        ),
        // 3) Execution loop (good): actual tool execution
        MockProvider::tool_call_response("system_info", "{}"),
        // 4) Final answer
        MockProvider::text_response("Found it and sent it."),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "send me my resume",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(response, "Found it and sent it.");
    assert_eq!(harness.provider.call_count().await, 4);

    let calls = harness.provider.call_log.lock().await;
    // All calls now have tools available (no separate tool-free text-only pre-pass)
    assert!(
        !calls[0].tools.is_empty(),
        "First call should have tools available"
    );
    assert!(
        !calls[1].tools.is_empty(),
        "Execution loop must have tools available"
    );
}

/// Regression: even after some successful tool calls, a deferred-action
/// narration ("I'll send it over") must not be treated as final completion.
#[tokio::test]
async fn test_deferred_action_after_tool_progress_does_not_complete_task() {
    let provider = MockProvider::with_responses(vec![
        // 1) First call: INTENT_GATE classification
        MockProvider::text_response(
            "I'll find it for you.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"complexity\":\"simple\"}",
        ),
        // 2) Tool prelude forces a tool call after text-only INTENT_GATE
        MockProvider::tool_call_response("system_info", "{}"),
        // 3) Execution loop (bad): deferred narration instead of results
        MockProvider::text_response(
            "I'll send it over once I locate the exact file. Give me a moment.",
        ),
        // 4-6) Extra responses for mutation-contract nudges and deferred-action retries.
        //       "send" triggers expects_mutation=true, causing up to 2 extra nudge
        //       iterations before the text response is accepted.
        MockProvider::text_response("I couldn't find a matching SOW PDF in the project files."),
        MockProvider::text_response("I couldn't find a matching SOW PDF in the project files."),
        MockProvider::text_response("I couldn't find a matching SOW PDF in the project files."),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Send me the SOW PDF from the Lodestar project",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(
        response,
        "I couldn't find a matching SOW PDF in the project files."
    );
    // Call count varies due to tool prelude (forces tool use after text-only
    // INTENT_GATE) and mutation-contract nudges.
    let call_count = harness.provider.call_count().await;
    assert!(
        call_count >= 4 && call_count <= 7,
        "Expected 4-7 LLM calls, got {}",
        call_count
    );

    let calls = harness.provider.call_log.lock().await;
    // All calls should have tools available (no separate tool-free text-only pre-pass)
    assert!(
        !calls[0].tools.is_empty(),
        "First call should have tools available"
    );
    assert!(
        !calls[1].tools.is_empty(),
        "Execution loop must have tools available"
    );
}

/// With uniform models (all "mock-model"), no text-only pre-pass — tools should
/// be available from the very first LLM call.
#[tokio::test]
async fn test_uniform_models_have_tools_on_first_call() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::text_response("Here is your info."),
    ]);

    // Uniform models -> router disabled -> no text-only pre-pass
    let harness = setup_test_agent(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "What's my system info?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(response, "Here is your info.");
    assert_eq!(harness.provider.call_count().await, 2);

    // First call should have tools (no text-only pre-pass)
    let calls = harness.provider.call_log.lock().await;
    assert!(
        !calls[0].tools.is_empty(),
        "Without text-only pre-pass, first call should have tools"
    );
}

/// An empty first-pass response on iteration 1 should NOT
/// intercept — it falls through to the normal empty-response handling.
#[tokio::test]
async fn test_empty_first_pass_response_not_intercepted() {
    // First-pass response is empty -> should not be intercepted
    // Then execution model responds normally
    let provider = MockProvider::with_responses(vec![
        // Empty content response from the first routing call
        ProviderResponse {
            content: Some(String::new()),
            tool_calls: vec![],
            usage: Some(crate::traits::TokenUsage {
                input_tokens: 10,
                output_tokens: 0,
                model: "mock".to_string(),
            }),
            thinking: None,
            response_note: None,
        },
        MockProvider::text_response("Fallback response."),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let _response = harness
        .agent
        .handle_message(
            "test_session",
            "Hello",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    // The empty first-pass response should NOT be intercepted (the !reply.is_empty()
    // check lets it fall through to normal empty-response handling).
    // The exact behavior depends on depth/iteration, but it should not panic.
}

/// Regression: when the execution loop keeps returning empty content with no
/// tool calls, the agent should attempt one recovery pass, then persist
/// the fallback response and emit task completion.
#[tokio::test]
async fn test_empty_execution_response_persists_fallback_message() {
    let provider = MockProvider::with_responses(vec![
        // Iteration 1: empty response -> retry nudge.
        ProviderResponse {
            content: Some(String::new()),
            tool_calls: vec![],
            usage: Some(crate::traits::TokenUsage {
                input_tokens: 10,
                output_tokens: 0,
                model: "mock".to_string(),
            }),
            thinking: None,
            response_note: None,
        },
        // Iteration 2: still empty -> fallback.
        ProviderResponse {
            content: Some(String::new()),
            tool_calls: vec![],
            usage: Some(crate::traits::TokenUsage {
                input_tokens: 20,
                output_tokens: 0,
                model: "mock".to_string(),
            }),
            thinking: None,
            response_note: None,
        },
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Who is becquer?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    let expected = "I wasn't able to process that request. Could you try rephrasing?";
    assert_eq!(response, expected);
    assert_eq!(harness.provider.call_count().await, 2);

    let history = harness.state.get_history("test_session", 10).await.unwrap();
    assert!(
        history
            .iter()
            .any(|m| m.role == "assistant" && m.content.as_deref() == Some(expected)),
        "Fallback response should be persisted in history. History: {:?}",
        history
    );
}

#[tokio::test]
async fn test_empty_execution_response_surfaces_provider_note() {
    let provider = MockProvider::with_responses(vec![
        // Iteration 1: empty response with provider note -> retry nudge.
        ProviderResponse {
            content: Some(String::new()),
            tool_calls: vec![],
            usage: Some(crate::traits::TokenUsage {
                input_tokens: 20,
                output_tokens: 0,
                model: "mock".to_string(),
            }),
            thinking: None,
            response_note: Some(
                "finish reason: SAFETY; candidate safety categories: HARM_CATEGORY_HATE_SPEECH"
                    .to_string(),
            ),
        },
        // Iteration 2: still empty, no note -> fallback surfaces the previous note.
        ProviderResponse {
            content: Some(String::new()),
            tool_calls: vec![],
            usage: Some(crate::traits::TokenUsage {
                input_tokens: 20,
                output_tokens: 0,
                model: "mock".to_string(),
            }),
            thinking: None,
            response_note: None,
        },
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Find my resume and send it",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(response.starts_with("I wasn't able to process that request."));
    assert!(response.contains("The model returned no usable output (finish reason: SAFETY; candidate safety categories: HARM_CATEGORY_HATE_SPEECH)."));
    assert!(response.ends_with("Could you try rephrasing?"));
    assert_eq!(harness.provider.call_count().await, 2);

    let history = harness.state.get_history("test_session", 10).await.unwrap();
    assert!(
        history
            .iter()
            .any(|m| m.role == "assistant" && m.content.as_deref() == Some(response.as_str())),
        "Fallback response with provider note should be persisted in history. History: {:?}",
        history
    );
}

#[tokio::test]
async fn test_empty_execution_response_retry_recovers_with_text() {
    let provider = MockProvider::with_responses(vec![
        // Iteration 1: empty response -> retry nudge.
        ProviderResponse {
            content: Some(String::new()),
            tool_calls: vec![],
            usage: Some(crate::traits::TokenUsage {
                input_tokens: 10,
                output_tokens: 0,
                model: "mock".to_string(),
            }),
            thinking: None,
            response_note: None,
        },
        // Iteration 2: recovery succeeds with text.
        MockProvider::text_response("Recovered response."),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Create a page",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(response, "Recovered response.");
    assert_eq!(harness.provider.call_count().await, 2);
    assert!(!response.contains("I wasn't able to process that request."));
}

/// When the first routing call returns BOTH text AND tool calls on
/// iteration 1, the tool calls should be DROPPED and only the text
/// analysis should be kept.  This handles Gemini models that hallucinate
/// function calls from system prompt tool descriptions.
/// When hallucinated tool calls are detected, the code forces `needs_tools = true`
/// (the LLM signaled it needs tools by attempting to call them), so the question
/// falls through to the tool loop — not returned directly.
#[tokio::test]
async fn test_initial_routing_call_drops_hallucinated_tool_calls() {
    use crate::traits::ToolCall;

    // First routing call returns confident text + hallucinated tool call (iteration 1)
    // → hallucinated tool calls force needs_tools=true → falls through to execution loop.
    // Execution loop returns the final text response.
    let provider = MockProvider::with_responses(vec![
        // Iteration 1: first routing call returns confident text AND a hallucinated tool call
        ProviderResponse {
            content: Some(
                "Your website is deployed at your-site.workers.dev on Cloudflare Workers."
                    .to_string(),
            ),
            tool_calls: vec![ToolCall {
                id: "call_hallucinated".to_string(),
                name: "terminal".to_string(),
                arguments: r#"{"command":"find ~ -name wrangler.toml"}"#.to_string(),
                extra_content: None,
            }],
            usage: Some(crate::traits::TokenUsage {
                input_tokens: 100,
                output_tokens: 50,
                model: "mock".to_string(),
            }),
            thinking: None,
            response_note: None,
        },
        // Iteration 2+: hallucinated tool calls force needs_tools=true, so executor
        // must use tool calls (text-only responses are now blocked).
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::text_response(
            "Your website is deployed at your-site.workers.dev on Cloudflare Workers.",
        ),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Can you tell me the deployment URL for my website?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    // The final response comes from the execution loop
    assert_eq!(
        response,
        "Your website is deployed at your-site.workers.dev on Cloudflare Workers."
    );

    // At least 1 initial routing call + execution loop calls
    let call_count = harness.provider.call_count().await;
    assert!(
        call_count >= 3,
        "Expected at least 3 LLM calls — initial routing call + tool call + final response (got {})",
        call_count
    );
}

/// Regression: if first-pass analysis sanitizes to empty but intent gate says
/// acknowledgment + needs_tools=true (e.g. "Yes, do it."), we must NOT return
/// an empty direct reply. The turn should fall through to execution.
#[tokio::test]
async fn test_acknowledgment_with_needs_tools_and_empty_analysis_falls_through() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(
            "[tool_use: terminal]\n\
             cmd: read_file project/plan.md\n\
             args: {\"path\":\"project/plan.md\"}\n\
             \n\
             [INTENT_GATE]\n\
             {\"complexity\":\"simple\",\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"is_acknowledgment\":true}",
        ),
        // needs_tools=true blocks text-only responses, so executor must use a tool call
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::text_response("Proceeding with the requested changes."),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Yes, do it.",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(response, "Proceeding with the requested changes.");
    assert_eq!(
        harness.provider.call_count().await,
        3,
        "Expected initial routing call + tool call + execution pass"
    );
}

/// With default+fallback routing (text-only pre-pass disabled), an LLM response
/// containing only INTENT_GATE metadata is treated as deferred action text
/// (structural marker detected) and the agent loops to get a real response.
#[tokio::test]
async fn test_acknowledgment_with_empty_analysis_returns_default_reply() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(
            "[INTENT_GATE]\n\
             {\"complexity\":\"simple\",\"can_answer_now\":true,\"needs_tools\":false,\"needs_clarification\":false,\"is_acknowledgment\":true}",
        ),
        MockProvider::text_response("Got it, understood."),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Yes",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    // With text-only pre-pass disabled, the INTENT_GATE-only response is treated
    // as deferred action text, so the agent loops and gets the second response.
    assert_eq!(response, "Got it, understood.");
    assert_eq!(harness.provider.call_count().await, 2);
}

/// With default+fallback routing (text-only pre-pass disabled), an LLM response
/// containing only INTENT_GATE metadata is treated as deferred action text
/// and the agent loops to get a real response for corrections too.
#[tokio::test]
async fn test_short_correction_with_empty_analysis_returns_default_reply() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(
            "[INTENT_GATE]\n\
             {\"complexity\":\"simple\",\"can_answer_now\":true,\"needs_tools\":false,\"needs_clarification\":false,\"is_acknowledgment\":false}",
        ),
        MockProvider::text_response("You're right, my apologies for the confusion."),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "You did send me the file",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    // With text-only pre-pass disabled, the INTENT_GATE-only response triggers
    // deferred action detection, so the agent loops and returns the second response.
    assert_eq!(response, "You're right, my apologies for the confusion.");
    assert_eq!(harness.provider.call_count().await, 2);
}

/// With default+fallback routing (text-only pre-pass disabled), intent_gate
/// decision points are not emitted for direct replies. The LLM response
/// with INTENT_GATE metadata is treated as deferred action text and the
/// agent loops to produce a real response.
#[tokio::test]
async fn test_intent_gate_decision_metadata_includes_route_reason_for_direct_reply() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(
            "[INTENT_GATE]\n\
             {\"complexity\":\"simple\",\"can_answer_now\":true,\"needs_tools\":false,\"needs_clarification\":false,\"is_acknowledgment\":true}",
        ),
        MockProvider::text_response("Got it, understood."),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Yes",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();
    // With text-only pre-pass disabled, INTENT_GATE-only responses trigger
    // deferred action detection; the agent loops and returns the second response.
    assert_eq!(response, "Got it, understood.");
    assert_eq!(harness.provider.call_count().await, 2);

    // Intent gate decision points are no longer emitted when the text-only
    // pre-pass is disabled.
    // Verify that intent_gate contract enforcement decision points may still be emitted
    // (from the completion phase), but the specific route_reason metadata from the
    // removed decision phase is not present.
    let event_rows: Vec<String> = sqlx::query_scalar(
        "SELECT data FROM events WHERE session_id = ? AND event_type = 'decision_point' ORDER BY id DESC",
    )
    .bind("test_session")
    .fetch_all(&harness.state.pool())
    .await
    .unwrap();

    let intent_gate_direct_reply = event_rows
        .iter()
        .map(|raw| serde_json::from_str::<serde_json::Value>(raw).unwrap())
        .find(|data| {
            data.get("decision_type").and_then(|v| v.as_str()) == Some("intent_gate")
                && data
                    .get("metadata")
                    .and_then(|m| m.get("route_reason"))
                    .and_then(|v| v.as_str())
                    == Some("acknowledgment_direct_reply")
        });

    // With text-only pre-pass disabled, no acknowledgment_direct_reply decision points
    assert!(
        intent_gate_direct_reply.is_none(),
        "With text-only pre-pass disabled, acknowledgment_direct_reply intent_gate decisions should not be emitted"
    );
}

/// With default+fallback routing (text-only pre-pass disabled), intent_gate
/// decision points from the removed decision phase are not emitted.
/// The LLM response with INTENT_GATE metadata is treated as deferred action
/// text, and the agent continues looping with tools available.
#[tokio::test]
async fn test_intent_gate_decision_metadata_includes_route_reason_for_continue() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(
            "[INTENT_GATE]\n\
             {\"complexity\":\"simple\",\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"is_acknowledgment\":true}",
        ),
        // INTENT_GATE text triggers deferred action detection, agent loops
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::text_response("Proceeding with the requested changes."),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Yes, do it.",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();
    assert_eq!(response, "Proceeding with the requested changes.");

    let event_rows: Vec<String> = sqlx::query_scalar(
        "SELECT data FROM events WHERE session_id = ? AND event_type = 'decision_point' ORDER BY id DESC",
    )
    .bind("test_session")
    .fetch_all(&harness.state.pool())
    .await
    .unwrap();

    // With text-only pre-pass disabled, the specific tools_required route_reason
    // from the removed decision phase is not emitted.
    let intent_gate_tools_required = event_rows
        .iter()
        .map(|raw| serde_json::from_str::<serde_json::Value>(raw).unwrap())
        .find(|data| {
            data.get("decision_type").and_then(|v| v.as_str()) == Some("intent_gate")
                && data
                    .get("metadata")
                    .and_then(|m| m.get("route_reason"))
                    .and_then(|v| v.as_str())
                    == Some("tools_required")
        });

    assert!(
        intent_gate_tools_required.is_none(),
        "With text-only pre-pass disabled, tools_required intent_gate decisions from the removed decision phase should not be emitted"
    );
}