codescout 0.15.0

High-performance coding agent toolkit MCP server
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
//! Prompt construction helpers used by onboarding, workspace, and
//! prompt-refresh flows. Pure string-building — no tool side effects.

use std::path::Path;

/// Returns curated anti-patterns and correct patterns for a language.
/// Content sourced from docs/research/claude-language-patterns.md.
pub(crate) fn language_patterns(lang: &str) -> Option<&'static str> {
    match lang {
        "rust" => Some(
            "### Rust\n\
             \n\
             **Anti-patterns (Don't → Do):**\n\
             1. Gratuitous `.clone()` to silence borrow checker → borrow: `&str` over `&String`, `&[T]` over `&Vec<T>`\n\
             2. `.unwrap()` everywhere → `?` with `.context()` from anyhow, `.expect(\"invariant: ...\")` only for proven invariants\n\
             3. `Rc<RefCell<T>>` / interior mutability overuse → restructure data flow and ownership\n\
             4. `String` params where `&str` suffices → `fn greet(name: &str)`, use `Cow<'_, str>` when ownership is conditional\n\
             5. Catch-all `_ => {}` in match → handle all variants explicitly, let compiler check exhaustiveness\n\
             \n\
             **Correct patterns:**\n\
             1. `thiserror` for library errors, `anyhow` for application errors — propagate with `?`\n\
             2. Iterator chains over explicit loops — `.iter().map(f).collect()`, avoid unnecessary `.collect()`\n\
             3. `Vec::with_capacity()` when size is known\n\
             4. Derive common traits: `#[derive(Debug, Clone, PartialEq)]`, `#[derive(Default)]` when sensible\n\
             5. `if let`/`while let` for single-pattern matching instead of full match",
        ),
        "python" => Some(
            "### Python\n\
             \n\
             **Anti-patterns (Don't → Do):**\n\
             1. Mutable default arguments `def f(items=[])` → use `None` with `if items is None: items = []`\n\
             2. `typing.List`, `typing.Dict`, `typing.Optional` → built-in generics: `list[str]`, `str | None`\n\
             3. Bare/broad exception handling `except Exception: pass` → catch specific exceptions, log with context\n\
             4. `os.path.join()` → `pathlib.Path`: `Path(base) / \"data\" / \"file.csv\"`\n\
             5. `Any` type overuse → complete type annotations on all function signatures\n\
             \n\
             **Correct patterns:**\n\
             1. Modern type hints (3.10+): `list[int]`, `dict[str, Any]`, `str | None`\n\
             2. `uv` for packages, `ruff` for linting/formatting, `pyright` for types, `pytest` for testing\n\
             3. `pyproject.toml` over `setup.py`/`requirements.txt`\n\
             4. `dataclasses` for internal data, Pydantic for validation, TypedDict for dict shapes\n\
             5. `is` comparison for singletons: `if x is None:` not `if x == None:`",
        ),
        "typescript" => Some(
            "### TypeScript\n\
             \n\
             **Anti-patterns (Don't → Do):**\n\
             1. `any` type overuse → `unknown` when type is uncertain, Zod schemas for external data\n\
             2. Type assertion `as` abuse / `as unknown as T` → type guards, proper narrowing\n\
             3. Missing discriminated unions → model domain states with `'kind'`/`'type'` discriminant, `satisfies never` for exhaustiveness\n\
             4. Non-null assertion `!` abuse → handle null/undefined with narrowing, optional chaining, type guards\n\
             5. Enums → `as const` objects or string literal union types\n\
             \n\
             **Correct patterns:**\n\
             1. Strict tsconfig: `strict: true`, `noUncheckedIndexedAccess`, `exactOptionalPropertyTypes`\n\
             2. Explicit return types on exported functions\n\
             3. Zod schema validation for external data — derive types with `z.infer<typeof Schema>`\n\
             4. Discriminated unions with exhaustiveness: `default: throw new Error(\\`Unhandled: ${x satisfies never}\\`)`\n\
             5. `interface` for object shapes, `type` for unions/intersections/mapped types",
        ),
        "javascript" | "jsx" => Some(
            "### JavaScript\n\
             \n\
             **Anti-patterns (Don't → Do):**\n\
             1. Missing Promise error handling → every `.then()` needs `.catch()`, every `async/await` needs try/catch\n\
             2. Stale closures in React hooks → ensure exhaustive dependency arrays in useEffect/useCallback/useMemo\n\
             3. Event listener / timer memory leaks → cleanup with `removeEventListener`, `clearInterval`, `AbortController`\n\
             4. `var` declarations → `const` by default, `let` only for reassignment\n\
             5. Loose equality `==` → always `===` and `!==`\n\
             \n\
             **Correct patterns:**\n\
             1. Proper useEffect async: define async inside effect, call it, return cleanup with AbortController\n\
             2. `const` by default, destructuring at function boundaries\n\
             3. Named exports over default exports — aids tree-shaking and refactoring\n\
             4. Template literals over string concatenation\n\
             5. `jsconfig.json` with `checkJs: true` for type safety in JS projects",
        ),
        "go" => Some(
            "### Go\n\
             \n\
             **Anti-patterns (Don't → Do):**\n\
             1. `ioutil` package → `io.ReadAll`, `os.ReadFile`, `os.MkdirTemp` (deprecated since Go 1.16)\n\
             2. Pre-modern patterns → `slices.Contains()`, `min`/`max` builtins (1.21), `for range n` (1.22)\n\
             3. Java-style large interfaces at producer → accept interfaces at consumer, return structs, keep interfaces small (1-3 methods)\n\
             4. Error wrapping with `%v` → `fmt.Errorf(\"context: %w\", err)`, use `errors.Is`/`errors.As`\n\
             5. `context.Background()` deep in call chains → ctx as first param, pass through entire chain, never store in structs\n\
             \n\
             **Correct patterns:**\n\
             1. Table-driven tests with `t.Parallel()` and `t.Run()` subtests\n\
             2. `errgroup` for structured concurrency: `g, ctx := errgroup.WithContext(ctx)`\n\
             3. Functional options pattern: `WithPort(8080)`, `WithTimeout(30*time.Second)`\n\
             4. `slog` for structured logging (Go 1.21+), not `log.Println`\n\
             5. No name stuttering: `package kv; type Store` not `type KVStore`",
        ),
        "java" => Some(
            "### Java\n\
             \n\
             **Anti-patterns (Don't → Do):**\n\
             1. `@Autowired` field injection → constructor injection with `final` fields (Spring 4.3+ auto-infers)\n\
             2. `Optional.get()` without check → `orElseThrow(() -> new NotFoundException(id))`, Optional for return types only\n\
             3. `throws Exception` / bare catches → declare and catch specific exceptions, log with context\n\
             4. `Date`/`Calendar`/`SimpleDateFormat` → `java.time`: `LocalDate`, `ZonedDateTime`, `DateTimeFormatter`\n\
             5. Raw types `List items` → `List<String> items = new ArrayList<>()`\n\
             \n\
             **Correct patterns:**\n\
             1. Records for data carriers (Java 16+): `public record UserDto(String name, String email) {}`\n\
             2. Sealed classes + pattern matching (Java 17+/21+) with switch expressions\n\
             3. Text blocks `\"\"\"` for multi-line strings (Java 15+)\n\
             4. Pattern matching instanceof (Java 16+): `if (obj instanceof String s) { s.length(); }`\n\
             5. Immutable collections: `List.of()`, `Map.of()`, `Set.of()`",
        ),
        "kotlin" => Some(
            "### Kotlin\n\
             \n\
             **Anti-patterns (Don't → Do):**\n\
             1. `!!` (not-null assertion) overuse → `?.let`, `?:`, `?.` chaining, or redesign to eliminate nullability\n\
             2. `GlobalScope.launch`/`async` → lifecycle-bound scopes: `viewModelScope`, `lifecycleScope`, injected `CoroutineScope`\n\
             3. `runBlocking` in production code → only for `main()` and tests, use suspend functions\n\
             4. Mutable `var` in data classes → `val` + `List` (not `MutableList`), immutability by default\n\
             5. `enum` when sealed class is needed → `sealed class`/`sealed interface` for state with per-variant data\n\
             \n\
             **Correct patterns:**\n\
             1. `val` over `var`, `List` over `MutableList` — expose read-only interfaces\n\
             2. Structured concurrency: `coroutineScope { launch { a() }; launch { b() } }`\n\
             3. Sealed class/interface for all state and result types\n\
             4. `Sequence` for large collections with chained operations\n\
             5. `require`/`check`/`error` for preconditions: `require(age >= 0) { \"Age must be non-negative\" }`",
        ),
        _ => None,
    }
}

/// Assembles a language-patterns memory from detected project languages.
/// Returns None if no detected languages have pattern data.
pub(crate) fn build_language_patterns_memory(languages: &[String]) -> Option<String> {
    let sections: Vec<&str> = languages
        .iter()
        .filter_map(|lang| language_patterns(lang))
        .collect();

    if sections.is_empty() {
        return None;
    }

    let mut content = String::from(
        "# Language Patterns\n\n\
         Per-language anti-patterns and correct patterns for this project's languages.\n\
         Each section lists the top 5 mistakes LLMs make and the top 5 idiomatic patterns.\n\n",
    );

    for (i, section) in sections.iter().enumerate() {
        if i > 0 {
            content.push_str("\n---\n\n");
        }
        content.push_str(section);
        content.push('\n');
    }

    Some(content)
}

pub(crate) fn build_system_prompt_draft(
    languages: &[String],
    entry_points: &[String],
    project_root: Option<&Path>,
    projects: Option<&[crate::workspace::DiscoveredProject]>,
    libraries: &[crate::library::registry::LibraryEntry],
) -> String {
    let mut draft = String::new();
    draft.push_str("# Project — Code Explorer Guidance\n\n");

    let projects_slice = projects.unwrap_or(&[]);

    // Entry points section
    draft.push_str("## Entry Points\n");
    if entry_points.is_empty() {
        draft.push_str("- Explore with `tree(\".\")` then `symbols` on key files\n");
    } else {
        for ep in entry_points {
            draft.push_str(&format!("- `{}` — start here\n", ep));
        }
    }
    draft.push('\n');

    // Key abstractions — placeholder for the LLM to fill
    draft.push_str("## Key Abstractions\n");
    draft.push_str("- [3-5 entries max. Each = one line: `TypeName` (`path/`) — one-line purpose only. No narrative.]\n\n");

    // Search tips
    draft.push_str("## Search Tips\n");
    if !languages.is_empty() {
        draft.push_str(&format!("- This is a {} project\n", languages.join("/")));
    }
    draft.push_str("- Use specific terms over generic ones (e.g., avoid 'data', 'utils')\n");
    draft.push_str("- For call relationships and impact analysis: `call_graph(symbol, path)` — traces callers/callees\n");
    if projects_slice.len() > 1 {
        draft.push_str(
            "- **Workspace mode:** always scope `semantic_search` with `project_id=\"<id>\"` — \
             broad terms match all projects and return mixed results\n",
        );
        for p in projects_slice {
            let example_term = if p.languages.iter().any(|l| l == "rust") {
                "key type or trait name"
            } else if p
                .languages
                .iter()
                .any(|l| l == "typescript" || l == "javascript")
            {
                "handler or component name"
            } else if p.languages.iter().any(|l| l == "python") {
                "class or function name"
            } else {
                "concept specific to this project"
            };
            draft.push_str(&format!(
                "  - `{}`: `semantic_search(\"<{}>\", project_id=\"{}\")` \
                 — [fill in good query examples during onboarding]\n",
                p.id, example_term, p.id
            ));
        }
    }
    draft.push('\n');

    // Language patterns reference — only if at least one language has patterns
    let has_patterns = languages.iter().any(|l| language_patterns(l).is_some());
    if has_patterns {
        let pattern_langs: Vec<&str> = languages
            .iter()
            .filter(|l| language_patterns(l).is_some())
            .map(|s| s.as_str())
            .collect();
        draft.push_str("## Language Patterns\n");
        draft.push_str(&format!(
            "This project uses {}. Read `memory(action=\"read\", topic=\"language-patterns\")` before writing, editing, or reviewing code.\n\n",
            pattern_langs.join(", ")
        ));
    }

    // Navigation strategy — per-project subsections for multi-project workspaces
    if projects_slice.len() > 1 {
        draft.push_str("## Navigation Strategy\n\n");
        draft.push_str("1. `memory(action=\"read\", topic=\"architecture\")` — orient yourself to the workspace\n");
        draft.push_str(
            "2. `semantic_search(\"your concept\")` — find relevant code across projects\n",
        );
        draft.push_str(
            "3. `memory(action=\"recall\", query=\"...\")` — search memories by meaning\n\n",
        );
        draft.push_str("**Per-project navigation:**\n\n");
        for p in projects_slice {
            let langs = if p.languages.is_empty() {
                String::new()
            } else {
                format!(" ({})", p.languages.join(", "))
            };
            draft.push_str(&format!("### {}{}\n", p.id, langs));
            draft.push_str(&format!(
                "1. `symbols(\"{}\")` — [fill in entry point during onboarding]\n",
                p.relative_root.display()
            ));
            draft.push_str(&format!(
                "2. `semantic_search(\"your concept\", project_id=\"{}\")` — search within this project\n",
                p.id
            ));
            draft.push_str(&format!(
                "3. `memory(project_id=\"{}\", action=\"read\", topic=\"architecture\")` — project-specific knowledge\n",
                p.id
            ));
            draft.push_str(
                "3b. `symbol_at(path, line)` — hover + type sig when you have an exact location\n",
            );
            draft.push_str("3c. `references(symbol, path)` — all call sites before any edit\n");
            draft.push_str(
                "4. `call_graph(symbol=\"Name\", path=\"...\", direction=\"callers\")` — blast radius before any structural change; `direction=\"callees\"` for flow tracing\n\n",
            );
        }

        draft.push_str("**Cross-project navigation:**\n");
        draft.push_str("- **Quick lookups** (1–3 calls): pass `project_id=\"<id>\"` to scope the call — no state change.\n");
        draft.push_str("- **Sustained exploration** (reading memories, semantic search, many tool calls): \
                         use `workspace(action=\"activate\", path=\"<id>\")`, but **always `workspace(action=\"activate\")` back to your original \
                         project when done.** Forgetting to return leaves all subsequent tool calls operating \
                         against the wrong project.\n");
        draft.push_str("- **Subagents:** the MCP server state is shared with the parent conversation. \
                         You **MUST** `workspace(action=\"activate\")` back to the original project before completing your task.\n\n");
        draft.push_str(
            "**Markdown files** (memories, plans, docs): \
             `read_markdown(\"path\")` — returns heading map + `@file_ref` for large files. \
             **IRON LAW #6:** subsequent reads MUST use `@file_ref` (not the original path): \
             `read_markdown(\"@file_ref\", heading=\"## Section\")` or `start_line=/end_line=`.\n\n",
        );
    } else {
        draft.push_str("## Navigation Strategy\n");
        draft.push_str("1. `memory(action=\"read\", topic=\"architecture\")` — orient yourself\n");
        if !entry_points.is_empty() {
            draft.push_str(&format!(
                "2. `symbols(\"{}\")` — see main structure\n",
                entry_points[0]
            ));
        } else {
            draft.push_str("2. `symbols(\"src/\")` — see main structure\n");
        }
        draft.push_str("3. `semantic_search(\"your concept\")` — find relevant code\n");
        draft.push_str("4. `symbols(name=\"Name\", include_body=true)` — read implementation\n");
        draft.push_str("   - regex-like patterns belong in `grep`, not `symbols`\n");
        draft.push_str("4b. `symbol_at(path, line)` — hover + type sig when you have an exact location from prior tool output; skip re-searching\n");
        draft.push_str("4c. `references(symbol, path)` — all call sites before any edit\n");
        draft.push_str(
            "5. `call_graph(symbol=\"Name\", direction=\"callers\")` — transitive blast radius; `direction=\"callees\"` for flow tracing\n",
        );
        draft.push_str(
            "6. `memory(action=\"recall\", query=\"...\")` — search memories by meaning\n\n",
        );
        draft.push_str(
            "7. `read_markdown(\"path/to/file.md\")` — returns heading map + `@file_ref` for large files. \
             **IRON LAW #6:** subsequent reads MUST use `@file_ref` (not the original path): \
             `read_markdown(\"@file_ref\", heading=\"## Section\")` or `start_line=/end_line=`.\n\n",
        );
    }

    // Retrieval stack — semantic_search routes through Qdrant
    draft.push_str("## Retrieval Stack\n");
    draft.push_str(
        "`semantic_search` runs through the Qdrant + TEI hybrid stack. \
         Start it once per machine with `./scripts/retrieval-stack.sh up`, then index this \
         project with `cargo run --release --bin sync_project -- <path> <project_id>`. \
         If a call returns `retrieval stack offline`, the stack isn't running.\n\n",
    );

    // MCP resource pointers — always included so agents know where to get extended docs
    draft.push_str("## MCP Resources\n");
    draft.push_str(
        "Extended docs and project context are available via MCP resources (`resources/read <uri>`):\n",
    );
    draft.push_str("- `doc://codescout-tool-guide` — long-form usage notes for every tool (examples, tradeoffs)\n");
    draft.push_str(
        "- `memory://<name>` — project memory files (architecture, conventions, gotchas)\n",
    );
    draft.push_str("- `project://summary` — active project + index + LSP snapshot\n\n");

    // Project rules — empty section for the LLM to fill from exploration
    draft.push_str("## Project Rules\n");
    draft.push_str("- [Fill from Phase 1 exploration: linting, formatting, commit conventions]\n");

    // Workspace projects table — only for multi-project repos
    if projects_slice.len() > 1 {
        draft.push_str("\n## Workspace Projects\n\n");
        draft.push_str("| Project | Root | Languages | Build |\n");
        draft.push_str("|---------|------|-----------|-------|\n");
        for p in projects_slice {
            draft.push_str(&format!(
                "| {} | {} | {} | {} |\n",
                p.id,
                p.relative_root.display(),
                p.languages.join(", "),
                p.manifest.as_deref().unwrap_or("-"),
            ));
        }
        draft.push('\n');

        // Load depends_on from workspace.toml if available
        if let Some(root) = project_root {
            let ws_path = crate::config::workspace::workspace_config_path(root);
            if let Ok(content) = std::fs::read_to_string(&ws_path) {
                if let Ok(ws) =
                    toml::from_str::<crate::config::workspace::WorkspaceConfig>(&content)
                {
                    let deps: Vec<_> = ws
                        .projects
                        .iter()
                        .filter(|p| !p.depends_on.is_empty())
                        .collect();
                    if !deps.is_empty() {
                        draft.push_str("**Cross-project dependencies:**\n");
                        for p in deps {
                            draft.push_str(&format!(
                                "- {} depends on {}\n",
                                p.id,
                                p.depends_on.join(", "),
                            ));
                        }
                        draft.push('\n');
                    }
                }
            }
        }

        draft.push_str(
            "Use `project_id=\"name\"` parameter to scope search/navigation to a specific project.\n\n",
        );

        draft.push_str(
            "**Per-project details:** Use `memory(project_id=\"<id>\", topic=\"architecture\")` \
             or `memory(project_id=\"<id>\", topic=\"conventions\")` for project-specific knowledge.\n\n",
        );
    }

    // Registered libraries — only included when at least one is registered
    if !libraries.is_empty() {
        draft.push_str("\n## Registered Libraries\n\n");
        for lib in libraries {
            let status = if lib.indexed {
                if lib.version.is_some()
                    && lib.version_indexed.is_some()
                    && lib.version != lib.version_indexed
                {
                    "indexed [stale]"
                } else {
                    "indexed"
                }
            } else {
                "not indexed"
            };
            draft.push_str(&format!(
                "- **{}** ({}) — {}\n",
                lib.name, lib.language, status
            ));
        }
        draft.push_str(
            "\nUse `scope=\"lib:<name>\"` with `symbols`, `grep`, \
             and `semantic_search` to navigate library code. \
             Run `index(action=\"build\", scope=\"lib:<name>\")` to enable semantic search for a library.\n",
        );
    }

    // Preferences auto-inject moved out of this sync builder — see
    // `append_preferences_section()` and its caller in onboarding.rs.
    let _ = project_root;

    draft
}
/// Append a `## User Preferences` section to a system-prompt draft using the
/// top-10 most-recently-updated memories in the `preferences` bucket.
///
/// Lives outside `build_system_prompt_draft` because the storage layer is
/// async (Qdrant) and the draft builder must stay sync to keep its 16+ test
/// call sites simple. Best-effort: any failure (store unavailable, empty,
/// network error) leaves `draft` untouched.
pub(crate) async fn append_preferences_section(agent: &crate::agent::Agent, draft: &mut String) {
    let project_id = {
        let inner = agent.inner.read().await;
        match inner.active_project() {
            Some(p) => p.config.project.name.clone(),
            None => return,
        }
    };
    let store = match agent.semantic_memory_store().await {
        Ok(s) => s,
        Err(_) => return,
    };
    let filter = crate::memory::semantic_store::MemoryFilter {
        bucket: Some("preferences".into()),
        order_by: crate::memory::semantic_store::MemoryOrder::UpdatedAtDesc,
        limit: Some(10),
        ..Default::default()
    };
    let hits = match store.list(&project_id, filter).await {
        Ok(h) => h,
        Err(_) => return,
    };
    if hits.is_empty() {
        return;
    }
    draft.push_str("\n## User Preferences\n\n");
    for hit in &hits {
        let m = &hit.memory;
        let summary = if m.content.len() > 200 {
            let end = crate::tools::floor_char_boundary(&m.content, 200);
            format!("{}...", &m.content[..end])
        } else {
            m.content.clone()
        };
        draft.push_str(&format!("- **{}:** {}\n", m.title, summary));
    }
}

/// Build the preamble prepended to the onboarding prompt for the subagent.
/// Instructs the subagent to activate the project before following the exploration steps.
pub(crate) fn build_subagent_preamble() -> String {
    let mut s = String::new();
    s.push_str("You are an onboarding subagent for codescout. ");
    s.push_str("Your job is to thoroughly explore this codebase and write project memories ");
    s.push_str("that will be used by every future session.\n\n");
    s.push_str("FIRST ACTION: Call workspace(action=\"activate\", path=\".\", read_only=false) to initialize the ");
    s.push_str("project context. All subsequent tool calls depend on this.\n\n");
    s.push_str("Then follow the exploration and memory-writing instructions below exactly.\n\n");
    s.push_str("---\n\n");
    s
}

/// Build the epilogue appended to the onboarding prompt for the subagent.
/// Defines the return contract: what the subagent must include in its final response.
pub(crate) fn build_subagent_epilogue() -> String {
    let mut s = String::new();
    s.push_str("\n---\n\n");
    s.push_str("## Return Contract\n\n");
    s.push_str(
        "When you have completed ALL exploration steps and written ALL memories, end your \
response with this structured summary:\n\n",
    );
    s.push_str("**Exploration Summary:**\n");
    s.push_str("- What this system does (your own words, not the README's)\n");
    s.push_str("- The 5 most important types/modules (name, file, role)\n");
    s.push_str("- How a typical operation flows (concrete function names)\n");
    s.push_str("- What surprised you (things docs didn't mention)\n\n");
    s.push_str("**Memories Written:**\n");
    s.push_str(
        "- List each memory topic you wrote (e.g., \"architecture\", \"conventions\", etc.)\n\n",
    );
    s.push_str("**Warnings:**\n");
    s.push_str(
        "- Any issues encountered (index not built, LSP failures, files that couldn't be read)\n",
    );
    s.push_str("- Steps you couldn't fully complete and why\n\n");
    s.push_str(
        "This summary is returned to the main agent and shown to the user. Make it \
informative but concise — aim for 300-500 tokens total.\n\n",
    );
    s.push_str(
        "LAST ACTION: Call workspace(action=\"activate\", path=\".\") before returning to ensure the parent's \
project state is unchanged.",
    );
    s
}

/// Extract level-2 headings from markdown content with numbered line counts.
pub(crate) fn build_heading_map(prompt: &str) -> Vec<String> {
    let lines: Vec<&str> = prompt.lines().collect();
    let mut headings: Vec<(String, usize)> = Vec::new();
    for (i, line) in lines.iter().enumerate() {
        if line.starts_with("## ") {
            headings.push((line.to_string(), i));
        }
    }
    headings
        .iter()
        .enumerate()
        .map(|(idx, (heading, start))| {
            let end = headings
                .get(idx + 1)
                .map(|(_, s)| *s)
                .unwrap_or(lines.len());
            format!("{}. {} ({} lines)", idx + 1, heading, end - start)
        })
        .collect()
}

/// Build dispatch instructions for full onboarding, adapted to client capability.
pub(crate) fn build_buffered_onboarding_instructions(
    prompt_path: &str,
    subagent_capable: bool,
) -> String {
    let p = prompt_path;
    let checklist = format!(
        "\
Read these phases in order — complete each before moving to the next:

  1. read_markdown(\"{p}\", heading=\"## THE IRON LAW\")
  2. read_markdown(\"{p}\", headings=[\"## Phase 0: Embedding Model Selection\", \"## Phase 1: Semantic Index Check\"])
  3. read_markdown(\"{p}\", heading=\"## Phase 2: Explore the Code\")
     → Execute ALL exploration steps before continuing
  4. read_markdown(\"{p}\", headings=[\"## Red Flags — STOP and Return to Phase 2\", \"## Common Rationalizations\"])
  5. read_markdown(\"{p}\", heading=\"## Phase 3: Write the Memories (Single-Project Mode)\")
     → Write all memories before continuing
  6. read_markdown(\"{p}\", headings=[\"## After Everything Is Created\", \"## Gathered Project Data\", \"## Return Contract\"])

For workspace mode, read these after step 2 (before step 3):
  read_markdown(\"{p}\", headings=[\"## Workspace Survey — Breadth-First\", \"## Workspace Deep Dives — Subagent Per-Project\"])
And after step 4 (before step 5):
  read_markdown(\"{p}\", heading=\"## Workspace Memory Synthesis\")"
    );

    if subagent_capable {
        format!(
            "\
Onboarding required — this project has not been explored yet.

Spawn a general-purpose subagent with model=sonnet to perform the exploration and \
memory writing. The subagent must read the onboarding prompt by phase — \
copy-paste these exact commands:

{checklist}

Do NOT read the onboarding prompt yourself — let the subagent handle it.

When the subagent completes, report its summary to the user. Then read whichever \
memories are relevant to the user's current task via memory(action=\"read\", topic=...).

Wait for the subagent to complete before continuing — onboarding is a prerequisite \
for all subsequent work.

If you cannot spawn subagents, follow the checklist yourself."
        )
    } else {
        format!(
            "\
Onboarding required — this project has not been explored yet.

{checklist}

Follow each phase's instructions to explore the codebase and write project memories."
        )
    }
}

/// Build dispatch instructions for version refresh, adapted to client capability.
pub(crate) fn build_buffered_refresh_instructions(
    prompt_path: &str,
    stored: Option<u32>,
    current: u32,
    subagent_capable: bool,
) -> String {
    let stored_str = stored
        .map(|v| format!("v{v}"))
        .unwrap_or_else(|| "pre-versioning".to_string());

    if subagent_capable {
        format!(
            "\
System prompt outdated ({stored_str} → v{current}) — a lightweight refresh is needed.

Spawn a general-purpose subagent with model=sonnet to regenerate the system prompt. \
The subagent must:
  read_markdown(\"{prompt_path}\")  — read the full refresh prompt (it's short)

The subagent will re-read memories and regenerate system-prompt.md without \
re-exploring the codebase.

When the subagent completes, continue with the user's original task."
        )
    } else {
        format!(
            "\
System prompt outdated ({stored_str} → v{current}) — a lightweight refresh is needed.

Read the refresh prompt:
  read_markdown(\"{prompt_path}\")

Follow it to re-read memories and regenerate system-prompt.md."
        )
    }
}

/// Build the subagent task prompt for refreshing the system prompt.
///
/// The subagent will re-read memories and regenerate system-prompt.md without
/// re-exploring the codebase — memory already captured all the relevant knowledge.
pub(crate) fn build_prompt_refresh_subagent_prompt(memory_topics: &[String]) -> String {
    let memory_reads = memory_topics
        .iter()
        .filter(|t| t.as_str() != "system-prompt")
        .map(|t| format!("  - memory(action=\"read\", topic=\"{t}\")"))
        .collect::<Vec<_>>()
        .join("\n");

    format!(
        "\
System prompt refresh — the stored onboarding version is behind the current codescout version.

Steps:
1. workspace(action=\"activate\", path=\".\", read_only=false) — enable writes
2. Read each project memory that contributes to the system prompt:
{memory_reads}
3. Read the current system-prompt.md (if it exists) for Entry Points structure only — do NOT copy tool navigation examples from it, as those may be stale
4. Regenerate system-prompt.md following the canonical template spec:

   **What to include:**
   - Entry points: specific file paths + symbol names to start exploring
   - Key abstractions: **3-5 entries max**. Each = one line: `TypeName` (`path/`) — one-line purpose only. NO architecture narrative, NO state machine descriptions, NO config details — those go in the `architecture` memory.
   - Search tips: concrete query examples that work well for THIS codebase; terms to avoid
   - Navigation strategy: recommended tool call sequence for a new task. Every step must name a codescout tool. Include `call_graph(symbol, path, direction=\"callers\")` for blast-radius checking before edits, and `direction=\"callees\"` for tracing data/control flow.
   - Project rules: conventions the AI must follow that linters don't catch

   **What NOT to include:**
   - How codescout tools work (static tool guidance covers this)
   - Full architecture details (architecture memory covers this)
   - Command lists, glossary, detailed conventions (other memories cover these)
   - More than ~30 lines total (injected every session — keep it dense)
   - Native host tool names (Read, Grep, Glob, Edit, Bash) — blocked in codescout sessions

   **Template:**
   ```
   # [Project Name] — Code Explorer Guidance

   ## Entry Points
   [Specific files + symbols, not module descriptions]

   ## Key Abstractions
   [3-5 lines: `TypeName` (`path/`) — one-line purpose]

   ## Search Tips
   [Concrete queries + terms to avoid]

   ## Navigation Strategy
   [Numbered steps, each naming a codescout tool. Include call_graph step.]

   ## Project Rules
   [Conventions not caught by linters]
   ```

5. Write the updated content to .codescout/system-prompt.md
6. Do NOT re-explore the codebase — the memories already contain the relevant knowledge
7. workspace(action=\"activate\", path=\".\") — restore normal state

When done, report: \"System prompt refreshed (vN → vM).\"",
        memory_reads = memory_reads,
    )
}

/// Build a self-contained onboarding prompt for one project in a workspace.
///
/// Each per-project subagent gets this prompt. It includes:
/// - Iron Law, exploration steps, red flags (from the shared template)
/// - Project-specific context (id, root, languages, siblings)
/// - Per-project memory writing instructions (scoped with project_id= param)
/// - Return contract
#[allow(dead_code)]
pub(crate) fn build_per_project_prompt(
    project: &crate::workspace::DiscoveredProject,
    siblings: &[(String, Vec<String>)],
) -> String {
    let mut prompt = String::new();

    // Iron Law (from shared template)
    prompt.push_str("## THE IRON LAW\n\n");
    prompt.push_str(
        "```\nNO MEMORIES WRITTEN WITHOUT COMPLETING ALL EXPLORATION STEPS FIRST\n```\n\n",
    );
    prompt.push_str("You may only call `memory(action: \"write\", ...)` after you have:\n");
    prompt.push_str("1. Completed ALL exploration steps below\n");
    prompt.push_str("2. Verified EVERY item in the Phase 2 Gate Checklist\n\n");
    prompt.push_str("---\n\n");

    // Project context
    prompt.push_str("## Your Project\n\n");
    prompt.push_str(&format!("- **ID:** {}\n", project.id));
    prompt.push_str(&format!(
        "- **Root:** {}\n",
        project.relative_root.display()
    ));
    prompt.push_str(&format!(
        "- **Languages:** {}\n",
        project.languages.join(", ")
    ));
    if let Some(ref manifest) = project.manifest {
        prompt.push_str(&format!("- **Manifest:** {}\n", manifest));
    }

    if !siblings.is_empty() {
        prompt.push_str("\n**Sibling projects** (for context — Do NOT deep-dive these):\n");
        for (id, langs) in siblings {
            prompt.push_str(&format!("- {} ({})\n", id, langs.join(", ")));
        }
    }
    prompt.push_str("\n---\n\n");

    // Phase 2: Explore (scoped to this project)
    prompt.push_str("## Phase 2: Explore the Code\n\n");
    prompt.push_str("Explore ONLY your project root. Do NOT explore sibling projects.\n\n");
    prompt.push_str(&format!(
        "### Step 1: Map the Codebase Structure\n\n\
         - `tree(\"{root}\")` — top-level structure\n\
         - `tree` on each subdirectory\n\
         - `read_file` on the build config\n\
         - `read_markdown(\"README.md\")` if present\n\n",
        root = project.relative_root.display()
    ));
    prompt.push_str(
        "### Step 2: Full Symbol Survey\n\n\
         - Run `symbols` on the main source directory\n\
         - Run `symbols` on EACH subdirectory individually\n\
         - Survey at least 5 distinct source files\n\n",
    );
    prompt.push_str(
        "### Step 3: Read Core Implementations\n\n\
         - Identify 5+ central types/functions from Step 2\n\
         - `symbols(name=..., include_body=true)` for each\n\
         - Read the FULL body, not just signatures\n\n",
    );
    prompt.push_str(
        "### Step 4: Read Architecture Documentation\n\n\
         - `read_markdown` on any docs found in the project\n\
         - Read completely — do not skim\n\n",
    );
    prompt.push_str(
        "### Step 5: Trace Two Data Flows\n\n\
         - Trace the most representative operation end-to-end\n\
         - Trace a second distinct path (error, write vs read, etc.)\n\n",
    );
    prompt.push_str(
        "### Step 6: Concept-Level Search (5+ queries)\n\n\
         - Error handling, data flow, testing, config, domain concept\n\
         - Use `semantic_search` or `grep` as fallback\n\n",
    );
    prompt.push_str(
        "### Step 7: Examine Tests\n\n\
         - `symbols` on test directory\n\
         - Read 2-3 test files for patterns\n\n",
    );

    // Phase 2 Gate Checklist
    prompt.push_str(
        "### Phase 2 Gate Checklist\n\n\
         Before writing ANY memory, verify ALL true:\n\
         - [ ] Listed structure AND ran tree on major subdirectories\n\
         - [ ] Symbol survey on 5+ source files\n\
         - [ ] Read full body of 5+ core implementations\n\
         - [ ] Read all architecture docs\n\
         - [ ] Traced two data flows\n\
         - [ ] Ran 5+ concept queries\n\
         - [ ] Read 2-3 test files\n\n\
         ---\n\n",
    );

    // Red Flags
    prompt.push_str(
        "## Red Flags — STOP and Return to Phase 2\n\n\
         If you notice any of these, STOP and go back:\n\
         - \"I have a good enough picture\" — No, read the code.\n\
         - \"The README covers this\" — READMEs lie. Verify in code.\n\
         - \"This is similar to...\" — Explore anyway. Differences matter.\n\n\
         ---\n\n",
    );

    // Phase 3: Write per-project memories
    prompt.push_str("## Phase 3: Write the Memories\n\n");
    prompt.push_str(&format!(
        "Write these memories using `memory(action=\"write\", project_id=\"{id}\", topic=\"...\", content=\"...\")`.\n\n",
        id = project.id
    ));
    prompt.push_str(
        "### 1. `project-overview`\n\
         Purpose, tech stack, key dependencies, runtime requirements. 15-30 lines.\n\n\
         ### 2. `architecture`\n\
         Module structure, key abstractions, data flow, design patterns. 20-40 lines.\n\
         Include 3-5 good `semantic_search(query, project_id=\"{id}\")` examples.\n\n\
         ### 3. `conventions`\n\
         Language/framework patterns, naming, testing approach. 15-30 lines.\n\n",
    );

    // Return contract
    prompt.push_str("---\n\n");
    prompt.push_str(
        "## Return Contract\n\n\
         Return a summary with:\n\
         - What this project does (your own words)\n\
         - 3-5 most important types/modules\n\
         - How a typical operation flows\n\
         - Memories written (list topics)\n\
         - Any issues encountered\n",
    );

    prompt
}

/// Build the workspace synthesis prompt that runs after all per-project subagents complete.
///
/// The main agent (or a synthesis subagent) reads this to:
/// 1. Read back all per-project memories
/// 2. Write 5 workspace-level memories
/// 3. Generate the system prompt
/// 4. Offer to refresh CLAUDE.md with memory references
#[allow(dead_code)]
pub(crate) fn build_synthesis_prompt(projects: &[(String, Vec<String>)]) -> String {
    let mut prompt = String::new();

    // Step 1: Read back per-project memories
    prompt.push_str("## Read Per-Project Memories\n\n");
    prompt.push_str("Read these memories to understand what each subagent discovered:\n\n");
    for (id, _langs) in projects {
        prompt.push_str(&format!(
            "- `memory(action=\"read\", project_id=\"{id}\", topic=\"project-overview\")`\n\
             - `memory(action=\"read\", project_id=\"{id}\", topic=\"architecture\")`\n\
             - `memory(action=\"read\", project_id=\"{id}\", topic=\"conventions\")`\n"
        ));
    }
    prompt.push_str("\n---\n\n");

    // Step 2: Write workspace-level memories
    prompt.push_str("## Write Workspace Memories\n\n");
    prompt.push_str(
        "Write these 5 workspace-level memories (no `project_id:` parameter = workspace-level):\n\n",
    );
    prompt.push_str(
        "### 1. `architecture`\n\
         Workspace-level architecture:\n\
         - Project map: each project's purpose (1 sentence each)\n\
         - Cross-project dependencies (which imports from which)\n\
         - Shared infrastructure (CI, deployment, tooling)\n\
         15-30 lines.\n\n\
         ### 2. `conventions`\n\
         Shared patterns across projects: commit style, PR process, CI rules.\n\
         Per-project: reference `memory(project_id=\"{id}\", topic=\"conventions\")`.\n\
         15-30 lines.\n\n\
         ### 3. `development-commands`\n\
         Workspace-level build/test/lint commands. Per-project commands go in per-project memories.\n\
         10-20 lines.\n\n\
         ### 4. `domain-glossary`\n\
         Terms used across multiple projects. Project-specific terms go in per-project memories.\n\
         10-20 lines.\n\n\
         ### 5. `gotchas`\n\
         Cross-project pitfalls, version mismatches, integration gotchas.\n\
         10-20 lines.\n\n",
    );

    // Step 3: System prompt
    prompt.push_str("---\n\n## Generate System Prompt\n\n");
    prompt.push_str(
        "Write the system prompt directly to `.codescout/system-prompt.md` with `create_file` (NOT `memory(action=\"write\", topic=\"system-prompt\")` — it is the always-on root file injected into every session, not a memory topic).\n\
         Include: entry points per project, key abstractions, search tips scoped by project,\n\
         navigation strategy for the workspace.\n\n",
    );

    // Step 4: CLAUDE.md refresh
    prompt.push_str("---\n\n## Refresh CLAUDE.md\n\n");
    prompt.push_str(
        "Read `read_markdown(\"CLAUDE.md\")` to see its heading structure.\n\n\
         Compare each section with the memories you just wrote. For sections that\n\
         overlap with memory content, offer to replace the body with a memory reference:\n\
         `See codescout memory 'architecture' (Key Patterns section).`\n\n\
         **preserve user-specific content:** personal preferences, code style rules,\n\
         iron rules, git workflow specifics, private notes — anything not derivable\n\
         from the codebase. Do NOT touch sections the user wrote for their own use.\n\n\
         **Add memory discovery hints** if CLAUDE.md doesn't already list available memories.\n\n\
         Present a summary of proposed changes and ask for approval before modifying.\n\n",
    );

    // Return contract
    prompt.push_str("---\n\n## Return Contract\n\n");
    prompt.push_str(
        "Return a summary with:\n\
         - Workspace-level memories written (list topics)\n\
         - Cross-project patterns discovered\n\
         - CLAUDE.md changes proposed/applied\n\
         - Any issues or gaps\n",
    );

    prompt
}

/// Build dispatch instructions for workspace onboarding.
#[allow(dead_code)]
pub(crate) fn build_workspace_instructions(
    main_prompt_path: &str,
    project_prompts: &[(String, String)],
    synthesis_path: &str,
    subagent_capable: bool,
) -> String {
    let p = main_prompt_path;

    if subagent_capable {
        let mut instructions = format!(
            "\
Onboarding required — this is a workspace with {} projects.

Step 1: Read prerequisites from the main prompt:
  read_markdown(\"{p}\", headings=[\"## Phase 0: Embedding Model Selection\", \"## Phase 1: Semantic Index Check\"])

Step 2: Spawn {} subagents IN PARALLEL — one per project:",
            project_prompts.len(),
            project_prompts.len(),
        );

        for (id, path) in project_prompts {
            instructions.push_str(&format!(
                "\n  - {id}: read_markdown(\"{path}\") and follow all instructions",
            ));
        }

        instructions.push_str(&format!(
            "\n\n\
Step 3: Wait for ALL subagents to complete.\n\n\
Step 4: Read the synthesis prompt and write workspace memories:\n\
  read_markdown(\"{synthesis_path}\")\n\n\
Follow the synthesis instructions to read back per-project memories,\n\
write workspace-level memories, generate the system prompt, and\n\
offer to refresh CLAUDE.md."
        ));

        instructions
    } else {
        let mut instructions = format!(
            "\
Onboarding required — this is a workspace with {} projects.

Step 1: Read prerequisites:
  read_markdown(\"{p}\", headings=[\"## Phase 0: Embedding Model Selection\", \"## Phase 1: Semantic Index Check\"])

Step 2: Explore each project one at a time:",
            project_prompts.len(),
        );

        for (id, path) in project_prompts {
            instructions.push_str(&format!(
                "\n  - {id}: read_markdown(\"{path}\") and follow all instructions",
            ));
        }

        instructions.push_str(&format!(
            "\n\n\
Step 3: Read the synthesis prompt and write workspace memories:\n\
  read_markdown(\"{synthesis_path}\")"
        ));

        instructions
    }
}