opencrabs 0.3.58

The autonomous, self-improving AI agent. Single Rust binary. Every channel. Install with: cargo install opencrabs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
//! Tests for the Qwen-style text-based tool-call extractor.
//!
//! Local GGUF/MLX backends (llama.cpp, MLX, LM Studio, Ollama) serving Qwen3
//! often emit tool calls as `<tool_call>{json}</tool_call>` or
//! `<function=name>...<parameter=key>val</parameter>...</function>` inside
//! `message.content` instead of the structured `tool_calls` field. The
//! extractor recovers them so they execute; these tests lock in the
//! contract — especially the edge cases (nested braces, open-ended tags,
//! prose mentions, field aliases).

use crate::brain::provider::custom_openai_compatible::{
    BareToolArrayMatch, classify_bare_tool_array, extract_balanced_json, extract_text_tool_calls,
};

#[test]
fn balanced_json_simple() {
    assert_eq!(extract_balanced_json(r#"{"a":1}"#), Some(7));
    assert_eq!(extract_balanced_json(r#"{"a":{"b":2}} trailing"#), Some(13));
}

#[test]
fn balanced_json_strings_with_braces() {
    // Braces inside strings must not affect depth.
    let s = r#"{"cmd":"echo { nested } end"} trailing"#;
    let consumed = extract_balanced_json(s).expect("balanced");
    assert_eq!(&s[..consumed], r#"{"cmd":"echo { nested } end"}"#);
}

#[test]
fn balanced_json_escaped_quotes() {
    let s = r#"{"msg":"he said \"hi\" then left"}"#;
    let consumed = extract_balanced_json(s).expect("balanced");
    assert_eq!(consumed, s.len());
}

#[test]
fn balanced_json_unbalanced_returns_none() {
    assert_eq!(extract_balanced_json(r#"{"a":1"#), None);
    assert_eq!(extract_balanced_json("not json"), None);
}

#[test]
fn extract_tool_call_closed() {
    let text = r#"sure, running it. <tool_call>{"name":"bash","arguments":{"command":"ls"}}</tool_call> done."#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "ls");
    assert!(!cleaned.contains("<tool_call>"));
    assert!(cleaned.contains("sure, running it"));
    assert!(cleaned.contains("done"));
}

#[test]
fn extract_tool_call_open_ended() {
    // Qwen frequently omits the closing tag.
    let text = r#"<tool_call>{"name":"web_search","arguments":{"query":"rust traits"}}"#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "web_search");
    assert_eq!(calls[0].1["query"], "rust traits");
    assert!(cleaned.trim().is_empty());
}

#[test]
fn extract_tool_call_nested_braces() {
    // Arguments with nested JSON must survive balanced-brace extraction.
    let text = r#"<tool_call>{"name":"set","arguments":{"obj":{"k":"v"},"n":1}}</tool_call>"#;
    let (calls, _) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "set");
    assert_eq!(calls[0].1["obj"]["k"], "v");
    assert_eq!(calls[0].1["n"], 1);
}

#[test]
fn extract_multiple_tool_calls() {
    let text = concat!(
        "first <tool_call>{\"name\":\"a\",\"arguments\":{}}</tool_call> ",
        "then <tool_call>{\"name\":\"b\",\"arguments\":{\"x\":2}}</tool_call>"
    );
    let (calls, _) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 2);
    assert_eq!(calls[0].0, "a");
    assert_eq!(calls[1].0, "b");
    assert_eq!(calls[1].1["x"], 2);
}

#[test]
fn extract_tool_call_with_field_aliases() {
    // MiniMax-style `tool_name` + `args`.
    let text = r#"<tool_call>{"tool_name":"bash","args":{"command":"pwd"}}</tool_call>"#;
    let (calls, _) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "pwd");
}

#[test]
fn extract_tool_call_stringified_arguments() {
    // Some runtimes emit arguments as a JSON-encoded string.
    let text = r#"<tool_call>{"name":"run","arguments":"{\"cmd\":\"go\"}"}</tool_call>"#;
    let (calls, _) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "run");
    assert_eq!(calls[0].1["cmd"], "go");
}

#[test]
fn extract_function_format() {
    let text = r#"<function=web_search><parameter=query>rust</parameter></function>"#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "web_search");
    assert_eq!(calls[0].1["query"], "rust");
    assert!(cleaned.trim().is_empty());
}

#[test]
fn skips_prose_mention_with_invalid_json() {
    // "the <tool_call> tag is special" — no JSON, so we must not strip it
    // and must not emit a tool call.
    let text = "the <tool_call> tag is special";
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 0);
    assert_eq!(cleaned, text);
}

#[test]
fn noop_without_markers() {
    let text = "just prose, no tool tags here";
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 0);
    assert_eq!(cleaned, text);
}

#[test]
fn ignores_tool_call_without_name() {
    // Malformed — no `name`/`tool_name` field → must not emit a call.
    let text = r#"<tool_call>{"arguments":{"x":1}}</tool_call>"#;
    let (calls, _) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 0);
}

#[test]
fn extract_bare_tool_call_openai_envelope() {
    // The format Qwen3 leaks when the template isn't in reasoning mode —
    // seen in logs 2026-04-17 02:41:12 after phantom retries failed.
    let text = r#"tool_call:{"id":"call_001","type":"function","function":{"name":"bash","arguments":{"command":"ls -la"}}}"#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1, "must recover bare tool_call: prefix");
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "ls -la");
    assert!(
        cleaned.trim().is_empty(),
        "the whole envelope must be stripped, got: {cleaned:?}"
    );
}

#[test]
fn extract_bare_tool_call_with_preceding_text() {
    let text = r#"I'll check that. tool_call:{"id":"c1","type":"function","function":{"name":"bash","arguments":{"command":"pwd"}}} done."#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert!(cleaned.contains("I'll check that"));
    assert!(cleaned.contains("done"));
    assert!(!cleaned.contains("tool_call:"));
}

#[test]
fn bare_marker_rejects_non_boundary_prefix() {
    // `set_tool_call:{...}` is not a tool-call emission — the prefix is
    // embedded in an identifier. Must not extract.
    let text = r#"set_tool_call:{"x":1} and more"#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 0);
    assert_eq!(cleaned, text);
}

#[test]
fn extract_tool_calls_array_envelope() {
    let text = r#"{"tool_calls":[{"id":"c1","type":"function","function":{"name":"bash","arguments":{"command":"ls"}}},{"id":"c2","type":"function","function":{"name":"web_search","arguments":{"query":"rust"}}}]}"#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 2, "both envelope entries must be recovered");
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "ls");
    assert_eq!(calls[1].0, "web_search");
    assert_eq!(calls[1].1["query"], "rust");
    assert!(cleaned.trim().is_empty());
}

#[test]
fn openai_nested_function_name_recovered_via_bare_name_args_pass() {
    // Bare OpenAI envelope with no `tool_call:` prefix, no array wrapper,
    // and no `call_` id. The structured-tool_calls path can't see it,
    // and none of the marker-based passes claim it either. Before
    // 2026-06-02 this test asserted "no extraction" as a "we won't
    // over-match prose" guard — but real production logs showed the
    // model emitting exactly this shape as a JSON-stringified copy of
    // a tool call that did dispatch via the structured path. We need
    // to strip it; the bare_tool_call_extractor pass (Pass 1.9) now
    // recovers the inner `function: {name, arguments}` object as a
    // tool call. The KNOWN_TOOL_NAMES gate protects against this
    // turning into a false-positive on arbitrary prose JSON.
    let text =
        r#"{"id":"c1","type":"function","function":{"name":"bash","arguments":{"command":"ls"}}}"#;
    let (calls, _) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "ls");
}

#[test]
fn extract_singular_tool_call_envelope() {
    let text = r#"{"tool_call":{"name":"bash","arguments":{"command":"ls -la"}}}"#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "ls -la");
    assert!(cleaned.trim().is_empty());
}

#[test]
fn extract_singular_envelope_with_malformed_json_missing_colons() {
    // Seen in logs 2026-04-17 03:07 — Qwen dropped the colons after keys
    // in its hallucinated envelope. Strict serde_json refuses this; the
    // regex fallback must still recover name + primitive-valued args so
    // the tool actually executes.
    let text = r#"{"tool_call" {"name" "bash" "arguments" {"command" "git status"}}}"#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(
        calls.len(),
        1,
        "malformed singular envelope must be recovered"
    );
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "git status");
    assert!(cleaned.trim().is_empty());
}

#[test]
fn singular_envelope_rejects_plural_match() {
    // Sanity check — `"tool_calls"` (plural) must not be caught by the
    // singular branch. The plural has its own handling.
    let text = r#"{"tool_calls":[{"id":"c1","type":"function","function":{"name":"bash","arguments":{"command":"ls"}}}]}"#;
    let (calls, _) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "ls");
}

#[test]
fn extract_claude_style_bash_invocation() {
    // Regression for 2026-04-17 14:27 — unsloth Qwen emitted a
    // `<bash><command>...</command></bash>` block and our parser missed
    // it entirely because the outer tag was the tool NAME, not one of
    // our known `<tool_call>` / `<function=` markers.
    let text = "Let me search for the OpenCode repo.\n\n\
        <bash>\n\
        <command>\n\
        curl -s \"https://api.github.com/search/repositories?q=opencode+oauth\" | python3 -c \"import json,sys; print(json.load(sys.stdin))\"\n\
        </command>\n\
        </bash>";
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert!(
        calls[0].1["command"]
            .as_str()
            .unwrap_or("")
            .starts_with("curl"),
        "command arg must round-trip"
    );
    assert!(cleaned.contains("Let me search"));
    assert!(!cleaned.contains("<bash>"));
    assert!(!cleaned.contains("</bash>"));
}

#[test]
fn claude_style_multiple_params() {
    let text = "<edit_file>\n\
        <path>src/main.rs</path>\n\
        <old_string>foo</old_string>\n\
        <new_string>bar</new_string>\n\
        </edit_file>";
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "edit_file");
    assert_eq!(calls[0].1["path"], "src/main.rs");
    assert_eq!(calls[0].1["old_string"], "foo");
    assert_eq!(calls[0].1["new_string"], "bar");
    assert!(cleaned.trim().is_empty());
}

#[test]
fn claude_style_ignores_unknown_tag_names() {
    // `<html>` and `<script>` aren't in KNOWN_TOOL_NAMES — prose mentions
    // of HTML in a chat response must NOT get extracted as tool calls.
    let text = "The page has a <html><body>Hello</body></html> structure.";
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 0);
    assert_eq!(cleaned, text);
}

#[test]
fn tool_calls_inside_prose_is_ignored() {
    // Prose like `the "tool_calls" field carries tool calls` must not
    // match — the wrapping `{` is far away and belongs to something else.
    let text = r#"The field called "tool_calls" is an array. Another sentence."#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 0);
    assert_eq!(cleaned, text);
}

// --- Bare top-level array of OpenAI envelopes ---
// Seen 2026-05-16 with qwen-3.6-max-preview-thinking: the model double-emits,
// putting the full `[{"id":"call_...","type":"function",...}]` array into
// `delta.content` while the real call still flows via `delta.tool_calls`. The
// text copy used to bleed to the TUI as raw JSON. These tests pin the cleanup.

#[test]
fn balanced_json_accepts_arrays() {
    // The balanced extractor must now handle `[...]` too, since the new
    // bare-array pass uses it. Nested arrays and objects-inside-arrays
    // remain correctly bracketed.
    assert_eq!(extract_balanced_json(r#"[1,2,3]"#), Some(7));
    assert_eq!(
        extract_balanced_json(r#"[{"a":1},{"b":2}] trailing"#),
        Some(17)
    );
    assert_eq!(extract_balanced_json(r#"[1,2"#), None);
}

#[test]
fn extract_bare_array_single_call_compact() {
    let text = r#"Sure! [{"id":"call_1","type":"function","function":{"name":"bash","arguments":{"command":"ls"}}}] done."#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1, "got {:?}", calls);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "ls");
    assert!(!cleaned.contains("call_1"));
    assert!(cleaned.contains("Sure!"));
    assert!(cleaned.contains("done"));
}

#[test]
fn extract_bare_array_pretty_printed_matches_log_shape() {
    // Verbatim text shape from ~/.opencrabs/logs/opencrabs.2026-05-16
    // (qwen-3.6-max-preview-thinking, ~20:16 UTC+1).
    let text = "Good idea. I'll add automatic sitemap discovery.\n\n[\n  {\n    \"id\": \"call_1\",\n    \"type\": \"function\",\n    \"function\": {\n      \"name\": \"edit_file\",\n      \"arguments\": {\n        \"path\": \"/x/scraper.rs\",\n        \"operation\": \"replace\",\n        \"old_text\": \"foo\",\n        \"new_text\": \"bar\"\n      }\n    }\n  }\n]";
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1, "got {:?}", calls);
    assert_eq!(calls[0].0, "edit_file");
    assert_eq!(calls[0].1["operation"], "replace");
    assert!(cleaned.contains("Good idea"));
    assert!(!cleaned.contains("call_1"));
    assert!(!cleaned.contains("edit_file"));
}

#[test]
fn extract_bare_array_multiple_calls() {
    let text = r#"[{"id":"call_a","type":"function","function":{"name":"bash","arguments":{"command":"git status"}}},{"id":"call_b","type":"function","function":{"name":"read_file","arguments":{"path":"/x"}}}]"#;
    let (calls, _cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 2, "got {:?}", calls);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[1].0, "read_file");
    assert_eq!(calls[1].1["path"], "/x");
}

#[test]
fn extract_bare_array_with_stringified_arguments() {
    // OpenAI envelope often serializes `arguments` as a string, not an object.
    let text = r#"[{"id":"call_1","type":"function","function":{"name":"glob","arguments":"{\"pattern\":\"**/*.rs\"}"}}]"#;
    let (calls, _) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1, "got {:?}", calls);
    assert_eq!(calls[0].0, "glob");
    assert_eq!(calls[0].1["pattern"], "**/*.rs");
}

#[test]
fn bare_array_anchor_requires_call_prefix() {
    // `"id":1` (numeric) or `"id":"x_1"` (non-call) must NOT trigger the
    // bare-array pass — that would over-match arbitrary JSON content. The
    // `"call_"` prefix is the OpenAI tool-call ID convention and is the
    // anchor we use for cheap pre-rejection.
    let text = r#"Here is JSON: [{"id":"banana_1","type":"function","function":{"name":"foo","arguments":{}}}] end."#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 0);
    assert_eq!(cleaned, text);
}

#[test]
fn bare_array_with_prose_id_call_mention_is_ignored() {
    // Prose mentioning `"id":"call_xyz"` without a wrapping `[` shortly
    // before must not match.
    let text = r#"The field "id":"call_xyz" is what OpenAI returns."#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 0);
    assert_eq!(cleaned, text);
}

#[test]
fn classify_bare_tool_array_states() {
    use BareToolArrayMatch::*;
    // Empty / whitespace-only inputs are valid prefixes (no info yet).
    assert_eq!(classify_bare_tool_array(""), Prefix);
    assert_eq!(classify_bare_tool_array("   "), Prefix);
    assert_eq!(classify_bare_tool_array("\n\n"), Prefix);

    // Each step along the recognition path is still a Prefix.
    assert_eq!(classify_bare_tool_array("["), Prefix);
    assert_eq!(classify_bare_tool_array("[\n"), Prefix);
    assert_eq!(classify_bare_tool_array("[ {"), Prefix);
    assert_eq!(classify_bare_tool_array("[\n  {\n    \"id\""), Prefix);
    assert_eq!(classify_bare_tool_array("[{\"id\":"), Prefix);
    assert_eq!(classify_bare_tool_array("[{\"id\":\"cal"), Prefix);

    // Complete recognition.
    assert_eq!(classify_bare_tool_array("[{\"id\":\"call_"), Full);
    assert_eq!(classify_bare_tool_array("[ {\"id\": \"call_1\"}]"), Full);
    assert_eq!(
        classify_bare_tool_array("[\n  {\n    \"id\": \"call_abc\"\n  }\n]"),
        Full
    );

    // Definite divergences.
    assert_eq!(classify_bare_tool_array("Hello"), None);
    assert_eq!(classify_bare_tool_array("{not array"), None);
    assert_eq!(classify_bare_tool_array("[1,2,3]"), None);
    assert_eq!(classify_bare_tool_array("[{\"name\":\"x\"}]"), None);
    assert_eq!(classify_bare_tool_array("[{\"id\":42}]"), None);
    assert_eq!(classify_bare_tool_array("[{\"id\":\"banana_\""), None);
}

// ── Dict-by-call-id shape (2026-05-24 qwen-3.7-max-preview regression) ──

#[test]
fn extract_dict_by_call_id_single() {
    // The exact shape seen in @adolfousier's Telegram screenshots:
    // top-level object keyed by call_<hex>, value is {name, arguments}.
    let text = r#"{"call_5f8d9c7b4a3e2f1c8d6e5b9a": {"name": "read_file", "arguments": {"path": "/tmp/x"}}}"#;
    let (calls, remaining) = extract_text_tool_calls(text);
    assert_eq!(
        calls.len(),
        1,
        "should extract one tool call from dict-by-id"
    );
    assert_eq!(calls[0].0, "read_file");
    assert_eq!(calls[0].1["path"], "/tmp/x");
    assert!(
        remaining.trim().is_empty(),
        "the matched object should be stripped from the remaining text, got: {remaining:?}",
    );
}

#[test]
fn extract_dict_by_call_id_multiple_keys() {
    // Some emissions cram several calls into one object.
    let text = r#"{"call_aaa": {"name":"bash","arguments":{"command":"ls"}},"call_bbb": {"name":"read_file","arguments":{"path":"/x"}}}"#;
    let (calls, _) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 2);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[1].0, "read_file");
}

#[test]
fn extract_dict_by_call_id_inside_markdown_fence() {
    // Telegram screenshots show the dict wrapped in ```json fences.
    // The fence isn't part of the JSON; we still want to find and strip
    // the inner object.
    let text = "Pushed fix.\n\n```json\n{\"call_7b8f9e6d4c5a3b2e1d7c6a9f\": {\"name\": \"bash\", \"arguments\": {\"command\": \"git push\"}}}\n```\n\nMore prose after.";
    let (calls, _remaining) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "git push");
}

#[test]
fn dict_by_call_id_prose_mention_ignored() {
    // Prose like `the field "call_id" is set` must not trigger extraction:
    // the `"call_` substring exists but isn't the first key of an object.
    let text = r#"The "call_id" field is set in your config."#;
    let (calls, remaining) = extract_text_tool_calls(text);
    assert!(calls.is_empty(), "prose `call_id` must not be extracted");
    assert_eq!(remaining, text, "prose must pass through unchanged");
}

#[test]
fn dict_by_call_id_with_function_envelope_form() {
    // Some models nest under `function: {name, arguments}` even within
    // the dict-by-id shape — parse_tool_call_value already handles that.
    let text = r#"{"call_xyz": {"type":"function","function":{"name":"bash","arguments":"{\"command\":\"pwd\"}"}}}"#;
    let (calls, _) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "pwd");
}

// ─── Pass 1.7 — bare-args orphans (qwen-3.7-max-thinking on dialagram) ──

#[test]
fn extract_bare_command_args_single() {
    let text = r#"{"command": "cd ~/srv/rs/opencrabs && cat src/rtk/tracker.rs | head -150"}"#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1, "must synthesize a bash call");
    assert_eq!(calls[0].0, "bash");
    assert_eq!(
        calls[0].1["command"],
        "cd ~/srv/rs/opencrabs && cat src/rtk/tracker.rs | head -150"
    );
    assert!(
        cleaned.trim().is_empty(),
        "bare-args JSON must be stripped from visible text, got: {cleaned:?}"
    );
}

#[test]
fn extract_bare_command_args_multiple_blobs_user_screenshot() {
    // Verbatim reproduction of the 2026-05-27 user screenshot: five bare
    // `{"command": "..."}` blobs the model emitted in delta.content after
    // some structured tool calls succeeded. Pre-fix: every blob rendered
    // as visible JSON in the TUI and the calls never dispatched.
    let text = r#"{"command": "cd ~/srv/rs/opencrabs && cat src/rtk/tracker.rs | head -150"}
{"command": "cd ~/srv/rs/opencrabs && cat src/rtk/mod.rs | head -200"}
{"command": "cd ~/srv/rs/opencrabs && grep -r \"rtk::rewrite\\|rtk_rewrite\\|Rtk Result\" src/brain/tools/ --include=\"*.rs\" | head -20"}
{"command": "cd ~/srv/rs/opencrabs && git diff v0.3.24..v0.3.25 -- src/brain/tools/bash.rs | head -100"}
{"command": "cd ~/srv/rs/opencrabs && cat src/brain/tools/bash.rs | grep -A 30 -B 5 'rtk\\|spawn_blocking\\|block_on' | head -80"}"#;

    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 5, "all five blobs must become tool calls");
    for call in &calls {
        assert_eq!(call.0, "bash");
        assert!(
            call.1["command"]
                .as_str()
                .unwrap()
                .starts_with("cd ~/srv/rs/opencrabs"),
            "command must round-trip exactly: {call:?}"
        );
    }
    assert!(
        cleaned.trim().is_empty(),
        "all bare-args blobs must be stripped, leftover: {cleaned:?}"
    );
}

#[test]
fn extract_bare_command_args_with_optional_keys() {
    // Bash schema has `working_dir` and `timeout_secs` alongside `command`.
    // A bare-args object using those is still bash.
    let text = r#"{"command": "cargo build", "timeout_secs": 600}"#;
    let (calls, _) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "cargo build");
    assert_eq!(calls[0].1["timeout_secs"], 600);
}

#[test]
fn bare_command_args_ignored_when_extra_unknown_keys() {
    // A prose-emitted object with `command` AND unknown extras (`note`,
    // `id`, etc.) is NOT a tool call — must not synthesize bash, must
    // leave content alone.
    let text = r#"{"command": "ls", "note": "example", "id": "x"}"#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert!(
        calls.is_empty(),
        "object with non-bash keys must not synthesize"
    );
    assert!(
        cleaned.contains("ls"),
        "non-tool prose must pass through, got: {cleaned:?}"
    );
}

#[test]
fn bare_command_args_ignored_when_command_not_string() {
    let text = r#"{"command": 42}"#;
    let (calls, _) = extract_text_tool_calls(text);
    assert!(calls.is_empty(), "non-string command must not synthesize");
}

#[test]
fn bare_command_args_alongside_prose_strips_only_json() {
    let text = "Running the build now: {\"command\": \"cargo build\"} please wait.";
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "cargo build");
    assert!(
        cleaned.contains("Running the build") && cleaned.contains("please wait"),
        "surrounding prose must remain, got: {cleaned:?}"
    );
}

// ─── Pass 1.8 — invoke-style Anthropic XML (qwen:tool_call wrapper) ──

#[test]
fn invoke_style_single_clean_with_qwen_wrapper() {
    let text = r#"<qwen:tool_call>
<invoke name="read_file">
<parameter name="path">/tmp/x.dart</parameter>
<parameter name="start_line">10</parameter>
<parameter name="line_count">50</parameter>
</invoke>
</qwen:tool_call>"#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "read_file");
    assert_eq!(calls[0].1["path"], "/tmp/x.dart");
    assert_eq!(calls[0].1["start_line"], 10);
    assert_eq!(calls[0].1["line_count"], 50);
    assert!(
        cleaned.trim().is_empty(),
        "wrapper + invoke must be stripped, leftover: {cleaned:?}"
    );
}

#[test]
fn invoke_style_user_screenshot_2026_05_27_malformed() {
    // Verbatim reproduction of the 2026-05-27 Telegram screenshot.
    // Two malformations preserved:
    //   (a) first invoke missing leading `<`  →  `invoke name="read_file">`
    //   (b) second block wrote `<parameter name="read_file">` instead of
    //       `<invoke name="read_file">`
    let text = r#"<qwen:tool_call>
invoke name="read_file">
<parameter name="path">~/srv/dart/myapp/lib/presentation/buyer_intent_screen/widgets/chat_widget.dart</parameter>
<parameter name="start_line">1888</parameter>
<parameter name="line_count">50</parameter>
</invoke>
<parameter name="read_file">
<parameter name="path">~/srv/dart/myapp/lib/presentation/propositions_screen/propositions_screen.dart</parameter>
<parameter name="start_line">360</parameter>
<parameter name="line_count">30</parameter>
</invoke>
</qwen:tool_call>"#;
    let (calls, cleaned) = extract_text_tool_calls(text);

    // Both invokes must be recovered despite the malformations.
    assert_eq!(
        calls.len(),
        2,
        "both invokes (missing-< and parameter-not-invoke) must recover, got {} calls: {:?}",
        calls.len(),
        calls
    );

    // First call: `invoke name="read_file">` missing the leading `<`.
    assert_eq!(calls[0].0, "read_file");
    assert!(
        calls[0].1["path"]
            .as_str()
            .unwrap()
            .ends_with("chat_widget.dart"),
    );
    assert_eq!(calls[0].1["start_line"], 1888);
    assert_eq!(calls[0].1["line_count"], 50);

    // Second call: `<parameter name="read_file">` malformation.
    assert_eq!(calls[1].0, "read_file");
    assert!(
        calls[1].1["path"]
            .as_str()
            .unwrap()
            .ends_with("propositions_screen.dart"),
    );
    assert_eq!(calls[1].1["start_line"], 360);
    assert_eq!(calls[1].1["line_count"], 30);

    // Whole wrapper must be stripped from visible content.
    assert!(
        !cleaned.contains("qwen:tool_call"),
        "qwen wrapper must be gone from rendered content, got: {cleaned:?}"
    );
    assert!(
        !cleaned.contains("<invoke") && !cleaned.contains("invoke name="),
        "invoke blocks must be gone, got: {cleaned:?}"
    );
}

#[test]
fn invoke_style_anthropic_function_calls_wrapper() {
    let text = r#"<function_calls>
<invoke name="bash">
<parameter name="command">ls -la</parameter>
</invoke>
</function_calls>"#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "ls -la");
    assert!(
        !cleaned.contains("function_calls"),
        "function_calls wrapper must be stripped, got: {cleaned:?}"
    );
}

#[test]
fn invoke_style_standalone_no_wrapper() {
    let text = r#"<invoke name="bash">
<parameter name="command">pwd</parameter>
</invoke>"#;
    let (calls, _cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "pwd");
}

#[test]
fn invoke_style_unknown_tool_name_ignored() {
    // Prose mention with `invoke name="not_a_tool"` must NOT match.
    let text = r#"This is just prose with <invoke name="not_a_real_tool"> mentioned."#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert!(
        calls.is_empty(),
        "unknown tool name must not dispatch a call"
    );
    // Prose passes through unchanged (Pass 1.8 short-circuits, but the
    // overall extractor returns the text intact when no other pass strips).
    assert!(cleaned.contains("just prose"));
}

#[test]
fn invoke_style_value_type_coercion() {
    let text = r#"<invoke name="read_file">
<parameter name="path">/tmp/x</parameter>
<parameter name="start_line">42</parameter>
<parameter name="hashline">true</parameter>
</invoke>"#;
    let (calls, _) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].1["path"], "/tmp/x");
    assert_eq!(
        calls[0].1["start_line"], 42,
        "numeric string must coerce to integer"
    );
    assert_eq!(
        calls[0].1["hashline"], true,
        "boolean string must coerce to bool"
    );
}

#[test]
fn invoke_style_single_quoted_name_attribute() {
    let text = r#"<invoke name='bash'>
<parameter name='command'>echo hi</parameter>
</invoke>"#;
    let (calls, _) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "echo hi");
}

// === Pass 1.7: bare `{"name": "<tool>", "arguments": {...}}` ===
//
// 2026-06-02 qwen-3.7-max-thinking via dialagram leaked tool calls
// as JSON-stringified objects inside delta.content while ALSO
// emitting the proper structured tool_calls delta. The structured
// path executed the call; the text copy made it to Telegram as raw
// JSON that looked like the user had pasted the tool args into the
// chat. These tests pin the bare-object detection and dedupe.

#[test]
fn bare_name_args_with_arguments_object_is_stripped_and_extracted() {
    // Exact shape from the 2026-06-02 leak (truncated arguments for
    // brevity, keys preserved). Bare object dropped in prose: no
    // <tool_call> wrapper, no `tool_call:` marker, no envelope.
    let text = r#"I see the issue now. Let me fix it.

{"name": "edit_file", "arguments": {"path": "/tmp/x.dart", "operation": "replace", "old_text": "foo", "new_text": "bar"}}

Then we'll verify."#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1, "bare object must extract as one call");
    assert_eq!(calls[0].0, "edit_file");
    assert_eq!(calls[0].1["path"], "/tmp/x.dart");
    assert_eq!(calls[0].1["operation"], "replace");
    assert!(
        !cleaned.contains("\"name\": \"edit_file\""),
        "the bare JSON must be stripped from the visible text — \
         leak shape that hit Telegram on 2026-06-02 must not survive"
    );
    assert!(cleaned.contains("I see the issue now."));
    assert!(cleaned.contains("Then we'll verify."));
}

#[test]
fn bare_name_args_deduplicated_against_already_parsed_call() {
    // The model emits the SAME call twice in one assistant turn: once
    // bare in the prose and once via a structured tool_calls delta
    // (which is parsed upstream and shows up as `<tool_call>{json}</tool_call>`
    // when re-serialised into text for this extractor's input). The
    // bare copy must strip but NOT add a second call to the list.
    let text = r#"Let me fix it. <tool_call>{"name":"edit_file","arguments":{"path":"/tmp/x","operation":"append","new_text":"y"}}</tool_call>

{"name": "edit_file", "arguments": {"path": "/tmp/x", "operation": "append", "new_text": "y"}}"#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(
        calls.len(),
        1,
        "the bare-copy duplicate must NOT add a second tool call — \
         the structured-path call already executed the same args"
    );
    assert_eq!(calls[0].0, "edit_file");
    assert!(
        !cleaned.contains("\"name\": \"edit_file\""),
        "both the wrapped AND the bare JSON must be stripped from \
         the visible text — only the prose lead-in should remain"
    );
}

#[test]
fn bare_name_args_with_string_arguments_json_string_is_parsed() {
    // Some providers double-encode: the model emits `arguments` as
    // a JSON-stringified string instead of a nested object. The
    // extractor must json::from_str the string before populating
    // the call's args (matches the structured-path normalisation).
    let text = r#"{"name": "bash", "arguments": "{\"command\":\"pwd\"}"}"#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "pwd");
    assert!(cleaned.trim().is_empty());
}

#[test]
fn bare_name_args_requires_both_name_and_arguments_keys() {
    // Object with `name` but no `arguments` (e.g. legitimate prose
    // JSON like `{"name": "Adolfo"}`) must NOT trigger the extractor.
    // The signal AND the per-object key check both have to fail.
    let text = r#"Here's an example: {"name": "Adolfo", "role": "user"}. That's it."#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert!(
        calls.is_empty(),
        "a `name`-only object without `arguments` is not a tool call \
         envelope and must not be stripped or executed"
    );
    assert!(
        cleaned.contains("\"name\": \"Adolfo\""),
        "the legit prose JSON must survive — only tool-call-shaped \
         objects get stripped"
    );
}

#[test]
fn bare_name_args_inside_nested_object_is_ignored() {
    // `"name"` appearing as a nested key (not the FIRST key of its
    // parent object) must not be mistaken for a tool-call envelope.
    // Anchoring on the preceding `{` plus whitespace protects against
    // this — `{"foo": ..., "name": ..., "arguments": ...}` has `"name"`
    // preceded by `,`, not by `{`, so the pass skips it.
    let text =
        r#"The config is {"version": 2, "name": "edit_file", "arguments": {"path": "/tmp"}}."#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert!(
        calls.is_empty(),
        "`name` mid-object (not the first key) is not a tool-call \
         envelope; nothing must dispatch"
    );
    assert!(cleaned.contains("The config is"));
}

#[test]
fn bare_name_args_multiple_calls_in_one_message_extract_in_order() {
    // Two bare tool-call objects in one assistant message — both
    // must extract, both must strip, both must dispatch.
    let text = r#"First read, then edit.

{"name": "read_file", "arguments": {"path": "/tmp/a"}}

{"name": "edit_file", "arguments": {"path": "/tmp/a", "operation": "append", "new_text": "x"}}

Done."#;
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 2);
    assert_eq!(calls[0].0, "read_file");
    assert_eq!(calls[1].0, "edit_file");
    assert!(!cleaned.contains("\"name\": \"read_file\""));
    assert!(!cleaned.contains("\"name\": \"edit_file\""));
    assert!(cleaned.contains("First read, then edit."));
    assert!(cleaned.contains("Done."));
}

// ── `<tool_call_list>` wrapper (mimo-v2.5-pro leaks it into content) ──────

#[test]
fn tool_call_list_single_object() {
    let text = "<tool_call_list>\n{\"tool_call_id\": \"rsi_read\", \"tool_name\": \"self_improve\", \"tool_input\": {\"action\": \"read\", \"target_file\": \"USER.md\"}}\n</tool_call_list>";
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "self_improve");
    assert_eq!(calls[0].1["action"], "read");
    assert_eq!(calls[0].1["target_file"], "USER.md");
    assert!(!cleaned.contains("tool_call_list"));
}

#[test]
fn tool_call_list_keeps_surrounding_prose() {
    let text = "Running checks now.\n\n<tool_call_list>{\"tool_name\": \"bash\", \"tool_input\": {\"command\": \"cargo test\"}}</tool_call_list>";
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "cargo test");
    assert!(cleaned.contains("Running checks now."));
    assert!(!cleaned.contains("tool_call_list"));
}

#[test]
fn tool_call_list_multiple_objects() {
    // Newline-separated objects inside one block.
    let text = "<tool_call_list>\n{\"tool_name\": \"read_file\", \"tool_input\": {\"path\": \"a.rs\"}}\n{\"tool_name\": \"edit_file\", \"tool_input\": {\"path\": \"b.rs\"}}\n</tool_call_list>";
    let (calls, _cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 2);
    assert_eq!(calls[0].0, "read_file");
    assert_eq!(calls[1].0, "edit_file");
}

#[test]
fn tool_call_list_json_array() {
    let text = "<tool_call_list>[{\"tool_name\": \"read_file\", \"tool_input\": {\"path\": \"a.rs\"}}, {\"tool_name\": \"bash\", \"tool_input\": {\"command\": \"ls\"}}]</tool_call_list>";
    let (calls, _cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 2);
    assert_eq!(calls[0].0, "read_file");
    assert_eq!(calls[1].0, "bash");
}

#[test]
fn tool_call_list_open_ended_no_closing_tag() {
    let text =
        "<tool_call_list>{\"tool_name\": \"bash\", \"tool_input\": {\"command\": \"echo hi\"}}";
    let (calls, cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert!(!cleaned.contains("tool_call_list"));
}

#[test]
fn tool_input_alias_maps_to_arguments() {
    // The `tool_input` key (mimo) must resolve to the call arguments.
    let text =
        "<tool_call>{\"tool_name\": \"bash\", \"tool_input\": {\"command\": \"pwd\"}}</tool_call>";
    let (calls, _cleaned) = extract_text_tool_calls(text);
    assert_eq!(calls.len(), 1);
    assert_eq!(calls[0].0, "bash");
    assert_eq!(calls[0].1["command"], "pwd");
}