use std::path::Path;
pub(crate) fn language_patterns(lang: &str) -> Option<&'static str> {
match lang {
"rust" => Some(
"### Rust\n\
\n\
**Anti-patterns (Don't → Do):**\n\
1. Gratuitous `.clone()` to silence borrow checker → borrow: `&str` over `&String`, `&[T]` over `&Vec<T>`\n\
2. `.unwrap()` everywhere → `?` with `.context()` from anyhow, `.expect(\"invariant: ...\")` only for proven invariants\n\
3. `Rc<RefCell<T>>` / interior mutability overuse → restructure data flow and ownership\n\
4. `String` params where `&str` suffices → `fn greet(name: &str)`, use `Cow<'_, str>` when ownership is conditional\n\
5. Catch-all `_ => {}` in match → handle all variants explicitly, let compiler check exhaustiveness\n\
\n\
**Correct patterns:**\n\
1. `thiserror` for library errors, `anyhow` for application errors — propagate with `?`\n\
2. Iterator chains over explicit loops — `.iter().map(f).collect()`, avoid unnecessary `.collect()`\n\
3. `Vec::with_capacity()` when size is known\n\
4. Derive common traits: `#[derive(Debug, Clone, PartialEq)]`, `#[derive(Default)]` when sensible\n\
5. `if let`/`while let` for single-pattern matching instead of full match",
),
"python" => Some(
"### Python\n\
\n\
**Anti-patterns (Don't → Do):**\n\
1. Mutable default arguments `def f(items=[])` → use `None` with `if items is None: items = []`\n\
2. `typing.List`, `typing.Dict`, `typing.Optional` → built-in generics: `list[str]`, `str | None`\n\
3. Bare/broad exception handling `except Exception: pass` → catch specific exceptions, log with context\n\
4. `os.path.join()` → `pathlib.Path`: `Path(base) / \"data\" / \"file.csv\"`\n\
5. `Any` type overuse → complete type annotations on all function signatures\n\
\n\
**Correct patterns:**\n\
1. Modern type hints (3.10+): `list[int]`, `dict[str, Any]`, `str | None`\n\
2. `uv` for packages, `ruff` for linting/formatting, `pyright` for types, `pytest` for testing\n\
3. `pyproject.toml` over `setup.py`/`requirements.txt`\n\
4. `dataclasses` for internal data, Pydantic for validation, TypedDict for dict shapes\n\
5. `is` comparison for singletons: `if x is None:` not `if x == None:`",
),
"typescript" => Some(
"### TypeScript\n\
\n\
**Anti-patterns (Don't → Do):**\n\
1. `any` type overuse → `unknown` when type is uncertain, Zod schemas for external data\n\
2. Type assertion `as` abuse / `as unknown as T` → type guards, proper narrowing\n\
3. Missing discriminated unions → model domain states with `'kind'`/`'type'` discriminant, `satisfies never` for exhaustiveness\n\
4. Non-null assertion `!` abuse → handle null/undefined with narrowing, optional chaining, type guards\n\
5. Enums → `as const` objects or string literal union types\n\
\n\
**Correct patterns:**\n\
1. Strict tsconfig: `strict: true`, `noUncheckedIndexedAccess`, `exactOptionalPropertyTypes`\n\
2. Explicit return types on exported functions\n\
3. Zod schema validation for external data — derive types with `z.infer<typeof Schema>`\n\
4. Discriminated unions with exhaustiveness: `default: throw new Error(\\`Unhandled: ${x satisfies never}\\`)`\n\
5. `interface` for object shapes, `type` for unions/intersections/mapped types",
),
"javascript" | "jsx" => Some(
"### JavaScript\n\
\n\
**Anti-patterns (Don't → Do):**\n\
1. Missing Promise error handling → every `.then()` needs `.catch()`, every `async/await` needs try/catch\n\
2. Stale closures in React hooks → ensure exhaustive dependency arrays in useEffect/useCallback/useMemo\n\
3. Event listener / timer memory leaks → cleanup with `removeEventListener`, `clearInterval`, `AbortController`\n\
4. `var` declarations → `const` by default, `let` only for reassignment\n\
5. Loose equality `==` → always `===` and `!==`\n\
\n\
**Correct patterns:**\n\
1. Proper useEffect async: define async inside effect, call it, return cleanup with AbortController\n\
2. `const` by default, destructuring at function boundaries\n\
3. Named exports over default exports — aids tree-shaking and refactoring\n\
4. Template literals over string concatenation\n\
5. `jsconfig.json` with `checkJs: true` for type safety in JS projects",
),
"go" => Some(
"### Go\n\
\n\
**Anti-patterns (Don't → Do):**\n\
1. `ioutil` package → `io.ReadAll`, `os.ReadFile`, `os.MkdirTemp` (deprecated since Go 1.16)\n\
2. Pre-modern patterns → `slices.Contains()`, `min`/`max` builtins (1.21), `for range n` (1.22)\n\
3. Java-style large interfaces at producer → accept interfaces at consumer, return structs, keep interfaces small (1-3 methods)\n\
4. Error wrapping with `%v` → `fmt.Errorf(\"context: %w\", err)`, use `errors.Is`/`errors.As`\n\
5. `context.Background()` deep in call chains → ctx as first param, pass through entire chain, never store in structs\n\
\n\
**Correct patterns:**\n\
1. Table-driven tests with `t.Parallel()` and `t.Run()` subtests\n\
2. `errgroup` for structured concurrency: `g, ctx := errgroup.WithContext(ctx)`\n\
3. Functional options pattern: `WithPort(8080)`, `WithTimeout(30*time.Second)`\n\
4. `slog` for structured logging (Go 1.21+), not `log.Println`\n\
5. No name stuttering: `package kv; type Store` not `type KVStore`",
),
"java" => Some(
"### Java\n\
\n\
**Anti-patterns (Don't → Do):**\n\
1. `@Autowired` field injection → constructor injection with `final` fields (Spring 4.3+ auto-infers)\n\
2. `Optional.get()` without check → `orElseThrow(() -> new NotFoundException(id))`, Optional for return types only\n\
3. `throws Exception` / bare catches → declare and catch specific exceptions, log with context\n\
4. `Date`/`Calendar`/`SimpleDateFormat` → `java.time`: `LocalDate`, `ZonedDateTime`, `DateTimeFormatter`\n\
5. Raw types `List items` → `List<String> items = new ArrayList<>()`\n\
\n\
**Correct patterns:**\n\
1. Records for data carriers (Java 16+): `public record UserDto(String name, String email) {}`\n\
2. Sealed classes + pattern matching (Java 17+/21+) with switch expressions\n\
3. Text blocks `\"\"\"` for multi-line strings (Java 15+)\n\
4. Pattern matching instanceof (Java 16+): `if (obj instanceof String s) { s.length(); }`\n\
5. Immutable collections: `List.of()`, `Map.of()`, `Set.of()`",
),
"kotlin" => Some(
"### Kotlin\n\
\n\
**Anti-patterns (Don't → Do):**\n\
1. `!!` (not-null assertion) overuse → `?.let`, `?:`, `?.` chaining, or redesign to eliminate nullability\n\
2. `GlobalScope.launch`/`async` → lifecycle-bound scopes: `viewModelScope`, `lifecycleScope`, injected `CoroutineScope`\n\
3. `runBlocking` in production code → only for `main()` and tests, use suspend functions\n\
4. Mutable `var` in data classes → `val` + `List` (not `MutableList`), immutability by default\n\
5. `enum` when sealed class is needed → `sealed class`/`sealed interface` for state with per-variant data\n\
\n\
**Correct patterns:**\n\
1. `val` over `var`, `List` over `MutableList` — expose read-only interfaces\n\
2. Structured concurrency: `coroutineScope { launch { a() }; launch { b() } }`\n\
3. Sealed class/interface for all state and result types\n\
4. `Sequence` for large collections with chained operations\n\
5. `require`/`check`/`error` for preconditions: `require(age >= 0) { \"Age must be non-negative\" }`",
),
_ => None,
}
}
pub(crate) fn build_language_patterns_memory(languages: &[String]) -> Option<String> {
let sections: Vec<&str> = languages
.iter()
.filter_map(|lang| language_patterns(lang))
.collect();
if sections.is_empty() {
return None;
}
let mut content = String::from(
"# Language Patterns\n\n\
Per-language anti-patterns and correct patterns for this project's languages.\n\
Each section lists the top 5 mistakes LLMs make and the top 5 idiomatic patterns.\n\n",
);
for (i, section) in sections.iter().enumerate() {
if i > 0 {
content.push_str("\n---\n\n");
}
content.push_str(section);
content.push('\n');
}
Some(content)
}
pub(crate) fn build_system_prompt_draft(
languages: &[String],
entry_points: &[String],
project_root: Option<&Path>,
projects: Option<&[crate::workspace::DiscoveredProject]>,
libraries: &[crate::library::registry::LibraryEntry],
) -> String {
let mut draft = String::new();
draft.push_str("# Project — Code Explorer Guidance\n\n");
let projects_slice = projects.unwrap_or(&[]);
draft.push_str("## Entry Points\n");
if entry_points.is_empty() {
draft.push_str("- Explore with `tree(\".\")` then `symbols` on key files\n");
} else {
for ep in entry_points {
draft.push_str(&format!("- `{}` — start here\n", ep));
}
}
draft.push('\n');
draft.push_str("## Key Abstractions\n");
draft.push_str("- [3-5 entries max. Each = one line: `TypeName` (`path/`) — one-line purpose only. No narrative.]\n\n");
draft.push_str("## Search Tips\n");
if !languages.is_empty() {
draft.push_str(&format!("- This is a {} project\n", languages.join("/")));
}
draft.push_str("- Use specific terms over generic ones (e.g., avoid 'data', 'utils')\n");
draft.push_str("- For call relationships and impact analysis: `call_graph(symbol, path)` — traces callers/callees\n");
if projects_slice.len() > 1 {
draft.push_str(
"- **Workspace mode:** always scope `semantic_search` with `project_id=\"<id>\"` — \
broad terms match all projects and return mixed results\n",
);
for p in projects_slice {
let example_term = if p.languages.iter().any(|l| l == "rust") {
"key type or trait name"
} else if p
.languages
.iter()
.any(|l| l == "typescript" || l == "javascript")
{
"handler or component name"
} else if p.languages.iter().any(|l| l == "python") {
"class or function name"
} else {
"concept specific to this project"
};
draft.push_str(&format!(
" - `{}`: `semantic_search(\"<{}>\", project_id=\"{}\")` \
— [fill in good query examples during onboarding]\n",
p.id, example_term, p.id
));
}
}
draft.push('\n');
let has_patterns = languages.iter().any(|l| language_patterns(l).is_some());
if has_patterns {
let pattern_langs: Vec<&str> = languages
.iter()
.filter(|l| language_patterns(l).is_some())
.map(|s| s.as_str())
.collect();
draft.push_str("## Language Patterns\n");
draft.push_str(&format!(
"This project uses {}. Read `memory(action=\"read\", topic=\"language-patterns\")` before writing, editing, or reviewing code.\n\n",
pattern_langs.join(", ")
));
}
if projects_slice.len() > 1 {
draft.push_str("## Navigation Strategy\n\n");
draft.push_str("1. `memory(action=\"read\", topic=\"architecture\")` — orient yourself to the workspace\n");
draft.push_str(
"2. `semantic_search(\"your concept\")` — find relevant code across projects\n",
);
draft.push_str(
"3. `memory(action=\"recall\", query=\"...\")` — search memories by meaning\n\n",
);
draft.push_str("**Per-project navigation:**\n\n");
for p in projects_slice {
let langs = if p.languages.is_empty() {
String::new()
} else {
format!(" ({})", p.languages.join(", "))
};
draft.push_str(&format!("### {}{}\n", p.id, langs));
draft.push_str(&format!(
"1. `symbols(\"{}\")` — [fill in entry point during onboarding]\n",
p.relative_root.display()
));
draft.push_str(&format!(
"2. `semantic_search(\"your concept\", project_id=\"{}\")` — search within this project\n",
p.id
));
draft.push_str(&format!(
"3. `memory(project_id=\"{}\", action=\"read\", topic=\"architecture\")` — project-specific knowledge\n",
p.id
));
draft.push_str(
"3b. `symbol_at(path, line)` — hover + type sig when you have an exact location\n",
);
draft.push_str("3c. `references(symbol, path)` — all call sites before any edit\n");
draft.push_str(
"4. `call_graph(symbol=\"Name\", path=\"...\", direction=\"callers\")` — blast radius before any structural change; `direction=\"callees\"` for flow tracing\n\n",
);
}
draft.push_str("**Cross-project navigation:**\n");
draft.push_str("- **Quick lookups** (1–3 calls): pass `project_id=\"<id>\"` to scope the call — no state change.\n");
draft.push_str("- **Sustained exploration** (reading memories, semantic search, many tool calls): \
use `workspace(action=\"activate\", path=\"<id>\")`, but **always `workspace(action=\"activate\")` back to your original \
project when done.** Forgetting to return leaves all subsequent tool calls operating \
against the wrong project.\n");
draft.push_str("- **Subagents:** the MCP server state is shared with the parent conversation. \
You **MUST** `workspace(action=\"activate\")` back to the original project before completing your task.\n\n");
draft.push_str(
"**Markdown files** (memories, plans, docs): \
`read_markdown(\"path\")` — returns heading map + `@file_ref` for large files. \
**IRON LAW #6:** subsequent reads MUST use `@file_ref` (not the original path): \
`read_markdown(\"@file_ref\", heading=\"## Section\")` or `start_line=/end_line=`.\n\n",
);
} else {
draft.push_str("## Navigation Strategy\n");
draft.push_str("1. `memory(action=\"read\", topic=\"architecture\")` — orient yourself\n");
if !entry_points.is_empty() {
draft.push_str(&format!(
"2. `symbols(\"{}\")` — see main structure\n",
entry_points[0]
));
} else {
draft.push_str("2. `symbols(\"src/\")` — see main structure\n");
}
draft.push_str("3. `semantic_search(\"your concept\")` — find relevant code\n");
draft.push_str("4. `symbols(name=\"Name\", include_body=true)` — read implementation\n");
draft.push_str(" - regex-like patterns belong in `grep`, not `symbols`\n");
draft.push_str("4b. `symbol_at(path, line)` — hover + type sig when you have an exact location from prior tool output; skip re-searching\n");
draft.push_str("4c. `references(symbol, path)` — all call sites before any edit\n");
draft.push_str(
"5. `call_graph(symbol=\"Name\", direction=\"callers\")` — transitive blast radius; `direction=\"callees\"` for flow tracing\n",
);
draft.push_str(
"6. `memory(action=\"recall\", query=\"...\")` — search memories by meaning\n\n",
);
draft.push_str(
"7. `read_markdown(\"path/to/file.md\")` — returns heading map + `@file_ref` for large files. \
**IRON LAW #6:** subsequent reads MUST use `@file_ref` (not the original path): \
`read_markdown(\"@file_ref\", heading=\"## Section\")` or `start_line=/end_line=`.\n\n",
);
}
draft.push_str("## Retrieval Stack\n");
draft.push_str(
"`semantic_search` runs through the Qdrant + TEI hybrid stack. \
Start it once per machine with `./scripts/retrieval-stack.sh up`, then index this \
project with `cargo run --release --bin sync_project -- <path> <project_id>`. \
If a call returns `retrieval stack offline`, the stack isn't running.\n\n",
);
draft.push_str("## MCP Resources\n");
draft.push_str(
"Extended docs and project context are available via MCP resources (`resources/read <uri>`):\n",
);
draft.push_str("- `doc://codescout-tool-guide` — long-form usage notes for every tool (examples, tradeoffs)\n");
draft.push_str(
"- `memory://<name>` — project memory files (architecture, conventions, gotchas)\n",
);
draft.push_str("- `project://summary` — active project + index + LSP snapshot\n\n");
draft.push_str("## Project Rules\n");
draft.push_str("- [Fill from Phase 1 exploration: linting, formatting, commit conventions]\n");
if projects_slice.len() > 1 {
draft.push_str("\n## Workspace Projects\n\n");
draft.push_str("| Project | Root | Languages | Build |\n");
draft.push_str("|---------|------|-----------|-------|\n");
for p in projects_slice {
draft.push_str(&format!(
"| {} | {} | {} | {} |\n",
p.id,
p.relative_root.display(),
p.languages.join(", "),
p.manifest.as_deref().unwrap_or("-"),
));
}
draft.push('\n');
if let Some(root) = project_root {
let ws_path = crate::config::workspace::workspace_config_path(root);
if let Ok(content) = std::fs::read_to_string(&ws_path) {
if let Ok(ws) =
toml::from_str::<crate::config::workspace::WorkspaceConfig>(&content)
{
let deps: Vec<_> = ws
.projects
.iter()
.filter(|p| !p.depends_on.is_empty())
.collect();
if !deps.is_empty() {
draft.push_str("**Cross-project dependencies:**\n");
for p in deps {
draft.push_str(&format!(
"- {} depends on {}\n",
p.id,
p.depends_on.join(", "),
));
}
draft.push('\n');
}
}
}
}
draft.push_str(
"Use `project_id=\"name\"` parameter to scope search/navigation to a specific project.\n\n",
);
draft.push_str(
"**Per-project details:** Use `memory(project_id=\"<id>\", topic=\"architecture\")` \
or `memory(project_id=\"<id>\", topic=\"conventions\")` for project-specific knowledge.\n\n",
);
}
if !libraries.is_empty() {
draft.push_str("\n## Registered Libraries\n\n");
for lib in libraries {
let status = if lib.indexed {
if lib.version.is_some()
&& lib.version_indexed.is_some()
&& lib.version != lib.version_indexed
{
"indexed [stale]"
} else {
"indexed"
}
} else {
"not indexed"
};
draft.push_str(&format!(
"- **{}** ({}) — {}\n",
lib.name, lib.language, status
));
}
draft.push_str(
"\nUse `scope=\"lib:<name>\"` with `symbols`, `grep`, \
and `semantic_search` to navigate library code. \
Run `index(action=\"build\", scope=\"lib:<name>\")` to enable semantic search for a library.\n",
);
}
let _ = project_root;
draft
}
pub(crate) async fn append_preferences_section(agent: &crate::agent::Agent, draft: &mut String) {
let project_id = {
let inner = agent.inner.read().await;
match inner.active_project() {
Some(p) => p.config.project.name.clone(),
None => return,
}
};
let store = match agent.semantic_memory_store().await {
Ok(s) => s,
Err(_) => return,
};
let filter = crate::memory::semantic_store::MemoryFilter {
bucket: Some("preferences".into()),
order_by: crate::memory::semantic_store::MemoryOrder::UpdatedAtDesc,
limit: Some(10),
..Default::default()
};
let hits = match store.list(&project_id, filter).await {
Ok(h) => h,
Err(_) => return,
};
if hits.is_empty() {
return;
}
draft.push_str("\n## User Preferences\n\n");
for hit in &hits {
let m = &hit.memory;
let summary = if m.content.len() > 200 {
let end = crate::tools::floor_char_boundary(&m.content, 200);
format!("{}...", &m.content[..end])
} else {
m.content.clone()
};
draft.push_str(&format!("- **{}:** {}\n", m.title, summary));
}
}
pub(crate) fn build_subagent_preamble() -> String {
let mut s = String::new();
s.push_str("You are an onboarding subagent for codescout. ");
s.push_str("Your job is to thoroughly explore this codebase and write project memories ");
s.push_str("that will be used by every future session.\n\n");
s.push_str("FIRST ACTION: Call workspace(action=\"activate\", path=\".\", read_only=false) to initialize the ");
s.push_str("project context. All subsequent tool calls depend on this.\n\n");
s.push_str("Then follow the exploration and memory-writing instructions below exactly.\n\n");
s.push_str("---\n\n");
s
}
pub(crate) fn build_subagent_epilogue() -> String {
let mut s = String::new();
s.push_str("\n---\n\n");
s.push_str("## Return Contract\n\n");
s.push_str(
"When you have completed ALL exploration steps and written ALL memories, end your \
response with this structured summary:\n\n",
);
s.push_str("**Exploration Summary:**\n");
s.push_str("- What this system does (your own words, not the README's)\n");
s.push_str("- The 5 most important types/modules (name, file, role)\n");
s.push_str("- How a typical operation flows (concrete function names)\n");
s.push_str("- What surprised you (things docs didn't mention)\n\n");
s.push_str("**Memories Written:**\n");
s.push_str(
"- List each memory topic you wrote (e.g., \"architecture\", \"conventions\", etc.)\n\n",
);
s.push_str("**Warnings:**\n");
s.push_str(
"- Any issues encountered (index not built, LSP failures, files that couldn't be read)\n",
);
s.push_str("- Steps you couldn't fully complete and why\n\n");
s.push_str(
"This summary is returned to the main agent and shown to the user. Make it \
informative but concise — aim for 300-500 tokens total.\n\n",
);
s.push_str(
"LAST ACTION: Call workspace(action=\"activate\", path=\".\") before returning to ensure the parent's \
project state is unchanged.",
);
s
}
pub(crate) fn build_heading_map(prompt: &str) -> Vec<String> {
let lines: Vec<&str> = prompt.lines().collect();
let mut headings: Vec<(String, usize)> = Vec::new();
for (i, line) in lines.iter().enumerate() {
if line.starts_with("## ") {
headings.push((line.to_string(), i));
}
}
headings
.iter()
.enumerate()
.map(|(idx, (heading, start))| {
let end = headings
.get(idx + 1)
.map(|(_, s)| *s)
.unwrap_or(lines.len());
format!("{}. {} ({} lines)", idx + 1, heading, end - start)
})
.collect()
}
pub(crate) fn build_buffered_onboarding_instructions(
prompt_path: &str,
subagent_capable: bool,
) -> String {
let p = prompt_path;
let checklist = format!(
"\
Read these phases in order — complete each before moving to the next:
1. read_markdown(\"{p}\", heading=\"## THE IRON LAW\")
2. read_markdown(\"{p}\", headings=[\"## Phase 0: Embedding Model Selection\", \"## Phase 1: Semantic Index Check\"])
3. read_markdown(\"{p}\", heading=\"## Phase 2: Explore the Code\")
→ Execute ALL exploration steps before continuing
4. read_markdown(\"{p}\", headings=[\"## Red Flags — STOP and Return to Phase 2\", \"## Common Rationalizations\"])
5. read_markdown(\"{p}\", heading=\"## Phase 3: Write the Memories (Single-Project Mode)\")
→ Write all memories before continuing
6. read_markdown(\"{p}\", headings=[\"## After Everything Is Created\", \"## Gathered Project Data\", \"## Return Contract\"])
For workspace mode, read these after step 2 (before step 3):
read_markdown(\"{p}\", headings=[\"## Workspace Survey — Breadth-First\", \"## Workspace Deep Dives — Subagent Per-Project\"])
And after step 4 (before step 5):
read_markdown(\"{p}\", heading=\"## Workspace Memory Synthesis\")"
);
if subagent_capable {
format!(
"\
Onboarding required — this project has not been explored yet.
Spawn a general-purpose subagent with model=sonnet to perform the exploration and \
memory writing. The subagent must read the onboarding prompt by phase — \
copy-paste these exact commands:
{checklist}
Do NOT read the onboarding prompt yourself — let the subagent handle it.
When the subagent completes, report its summary to the user. Then read whichever \
memories are relevant to the user's current task via memory(action=\"read\", topic=...).
Wait for the subagent to complete before continuing — onboarding is a prerequisite \
for all subsequent work.
If you cannot spawn subagents, follow the checklist yourself."
)
} else {
format!(
"\
Onboarding required — this project has not been explored yet.
{checklist}
Follow each phase's instructions to explore the codebase and write project memories."
)
}
}
pub(crate) fn build_buffered_refresh_instructions(
prompt_path: &str,
stored: Option<u32>,
current: u32,
subagent_capable: bool,
) -> String {
let stored_str = stored
.map(|v| format!("v{v}"))
.unwrap_or_else(|| "pre-versioning".to_string());
if subagent_capable {
format!(
"\
System prompt outdated ({stored_str} → v{current}) — a lightweight refresh is needed.
Spawn a general-purpose subagent with model=sonnet to regenerate the system prompt. \
The subagent must:
read_markdown(\"{prompt_path}\") — read the full refresh prompt (it's short)
The subagent will re-read memories and regenerate system-prompt.md without \
re-exploring the codebase.
When the subagent completes, continue with the user's original task."
)
} else {
format!(
"\
System prompt outdated ({stored_str} → v{current}) — a lightweight refresh is needed.
Read the refresh prompt:
read_markdown(\"{prompt_path}\")
Follow it to re-read memories and regenerate system-prompt.md."
)
}
}
pub(crate) fn build_prompt_refresh_subagent_prompt(memory_topics: &[String]) -> String {
let memory_reads = memory_topics
.iter()
.filter(|t| t.as_str() != "system-prompt")
.map(|t| format!(" - memory(action=\"read\", topic=\"{t}\")"))
.collect::<Vec<_>>()
.join("\n");
format!(
"\
System prompt refresh — the stored onboarding version is behind the current codescout version.
Steps:
1. workspace(action=\"activate\", path=\".\", read_only=false) — enable writes
2. Read each project memory that contributes to the system prompt:
{memory_reads}
3. Read the current system-prompt.md (if it exists) for Entry Points structure only — do NOT copy tool navigation examples from it, as those may be stale
4. Regenerate system-prompt.md following the canonical template spec:
**What to include:**
- Entry points: specific file paths + symbol names to start exploring
- Key abstractions: **3-5 entries max**. Each = one line: `TypeName` (`path/`) — one-line purpose only. NO architecture narrative, NO state machine descriptions, NO config details — those go in the `architecture` memory.
- Search tips: concrete query examples that work well for THIS codebase; terms to avoid
- Navigation strategy: recommended tool call sequence for a new task. Every step must name a codescout tool. Include `call_graph(symbol, path, direction=\"callers\")` for blast-radius checking before edits, and `direction=\"callees\"` for tracing data/control flow.
- Project rules: conventions the AI must follow that linters don't catch
**What NOT to include:**
- How codescout tools work (static tool guidance covers this)
- Full architecture details (architecture memory covers this)
- Command lists, glossary, detailed conventions (other memories cover these)
- More than ~30 lines total (injected every session — keep it dense)
- Native host tool names (Read, Grep, Glob, Edit, Bash) — blocked in codescout sessions
**Template:**
```
# [Project Name] — Code Explorer Guidance
## Entry Points
[Specific files + symbols, not module descriptions]
## Key Abstractions
[3-5 lines: `TypeName` (`path/`) — one-line purpose]
## Search Tips
[Concrete queries + terms to avoid]
## Navigation Strategy
[Numbered steps, each naming a codescout tool. Include call_graph step.]
## Project Rules
[Conventions not caught by linters]
```
5. Write the updated content to .codescout/system-prompt.md
6. Do NOT re-explore the codebase — the memories already contain the relevant knowledge
7. workspace(action=\"activate\", path=\".\") — restore normal state
When done, report: \"System prompt refreshed (vN → vM).\"",
memory_reads = memory_reads,
)
}
#[allow(dead_code)]
pub(crate) fn build_per_project_prompt(
project: &crate::workspace::DiscoveredProject,
siblings: &[(String, Vec<String>)],
) -> String {
let mut prompt = String::new();
prompt.push_str("## THE IRON LAW\n\n");
prompt.push_str(
"```\nNO MEMORIES WRITTEN WITHOUT COMPLETING ALL EXPLORATION STEPS FIRST\n```\n\n",
);
prompt.push_str("You may only call `memory(action: \"write\", ...)` after you have:\n");
prompt.push_str("1. Completed ALL exploration steps below\n");
prompt.push_str("2. Verified EVERY item in the Phase 2 Gate Checklist\n\n");
prompt.push_str("---\n\n");
prompt.push_str("## Your Project\n\n");
prompt.push_str(&format!("- **ID:** {}\n", project.id));
prompt.push_str(&format!(
"- **Root:** {}\n",
project.relative_root.display()
));
prompt.push_str(&format!(
"- **Languages:** {}\n",
project.languages.join(", ")
));
if let Some(ref manifest) = project.manifest {
prompt.push_str(&format!("- **Manifest:** {}\n", manifest));
}
if !siblings.is_empty() {
prompt.push_str("\n**Sibling projects** (for context — Do NOT deep-dive these):\n");
for (id, langs) in siblings {
prompt.push_str(&format!("- {} ({})\n", id, langs.join(", ")));
}
}
prompt.push_str("\n---\n\n");
prompt.push_str("## Phase 2: Explore the Code\n\n");
prompt.push_str("Explore ONLY your project root. Do NOT explore sibling projects.\n\n");
prompt.push_str(&format!(
"### Step 1: Map the Codebase Structure\n\n\
- `tree(\"{root}\")` — top-level structure\n\
- `tree` on each subdirectory\n\
- `read_file` on the build config\n\
- `read_markdown(\"README.md\")` if present\n\n",
root = project.relative_root.display()
));
prompt.push_str(
"### Step 2: Full Symbol Survey\n\n\
- Run `symbols` on the main source directory\n\
- Run `symbols` on EACH subdirectory individually\n\
- Survey at least 5 distinct source files\n\n",
);
prompt.push_str(
"### Step 3: Read Core Implementations\n\n\
- Identify 5+ central types/functions from Step 2\n\
- `symbols(name=..., include_body=true)` for each\n\
- Read the FULL body, not just signatures\n\n",
);
prompt.push_str(
"### Step 4: Read Architecture Documentation\n\n\
- `read_markdown` on any docs found in the project\n\
- Read completely — do not skim\n\n",
);
prompt.push_str(
"### Step 5: Trace Two Data Flows\n\n\
- Trace the most representative operation end-to-end\n\
- Trace a second distinct path (error, write vs read, etc.)\n\n",
);
prompt.push_str(
"### Step 6: Concept-Level Search (5+ queries)\n\n\
- Error handling, data flow, testing, config, domain concept\n\
- Use `semantic_search` or `grep` as fallback\n\n",
);
prompt.push_str(
"### Step 7: Examine Tests\n\n\
- `symbols` on test directory\n\
- Read 2-3 test files for patterns\n\n",
);
prompt.push_str(
"### Phase 2 Gate Checklist\n\n\
Before writing ANY memory, verify ALL true:\n\
- [ ] Listed structure AND ran tree on major subdirectories\n\
- [ ] Symbol survey on 5+ source files\n\
- [ ] Read full body of 5+ core implementations\n\
- [ ] Read all architecture docs\n\
- [ ] Traced two data flows\n\
- [ ] Ran 5+ concept queries\n\
- [ ] Read 2-3 test files\n\n\
---\n\n",
);
prompt.push_str(
"## Red Flags — STOP and Return to Phase 2\n\n\
If you notice any of these, STOP and go back:\n\
- \"I have a good enough picture\" — No, read the code.\n\
- \"The README covers this\" — READMEs lie. Verify in code.\n\
- \"This is similar to...\" — Explore anyway. Differences matter.\n\n\
---\n\n",
);
prompt.push_str("## Phase 3: Write the Memories\n\n");
prompt.push_str(&format!(
"Write these memories using `memory(action=\"write\", project_id=\"{id}\", topic=\"...\", content=\"...\")`.\n\n",
id = project.id
));
prompt.push_str(
"### 1. `project-overview`\n\
Purpose, tech stack, key dependencies, runtime requirements. 15-30 lines.\n\n\
### 2. `architecture`\n\
Module structure, key abstractions, data flow, design patterns. 20-40 lines.\n\
Include 3-5 good `semantic_search(query, project_id=\"{id}\")` examples.\n\n\
### 3. `conventions`\n\
Language/framework patterns, naming, testing approach. 15-30 lines.\n\n",
);
prompt.push_str("---\n\n");
prompt.push_str(
"## Return Contract\n\n\
Return a summary with:\n\
- What this project does (your own words)\n\
- 3-5 most important types/modules\n\
- How a typical operation flows\n\
- Memories written (list topics)\n\
- Any issues encountered\n",
);
prompt
}
#[allow(dead_code)]
pub(crate) fn build_synthesis_prompt(projects: &[(String, Vec<String>)]) -> String {
let mut prompt = String::new();
prompt.push_str("## Read Per-Project Memories\n\n");
prompt.push_str("Read these memories to understand what each subagent discovered:\n\n");
for (id, _langs) in projects {
prompt.push_str(&format!(
"- `memory(action=\"read\", project_id=\"{id}\", topic=\"project-overview\")`\n\
- `memory(action=\"read\", project_id=\"{id}\", topic=\"architecture\")`\n\
- `memory(action=\"read\", project_id=\"{id}\", topic=\"conventions\")`\n"
));
}
prompt.push_str("\n---\n\n");
prompt.push_str("## Write Workspace Memories\n\n");
prompt.push_str(
"Write these 5 workspace-level memories (no `project_id:` parameter = workspace-level):\n\n",
);
prompt.push_str(
"### 1. `architecture`\n\
Workspace-level architecture:\n\
- Project map: each project's purpose (1 sentence each)\n\
- Cross-project dependencies (which imports from which)\n\
- Shared infrastructure (CI, deployment, tooling)\n\
15-30 lines.\n\n\
### 2. `conventions`\n\
Shared patterns across projects: commit style, PR process, CI rules.\n\
Per-project: reference `memory(project_id=\"{id}\", topic=\"conventions\")`.\n\
15-30 lines.\n\n\
### 3. `development-commands`\n\
Workspace-level build/test/lint commands. Per-project commands go in per-project memories.\n\
10-20 lines.\n\n\
### 4. `domain-glossary`\n\
Terms used across multiple projects. Project-specific terms go in per-project memories.\n\
10-20 lines.\n\n\
### 5. `gotchas`\n\
Cross-project pitfalls, version mismatches, integration gotchas.\n\
10-20 lines.\n\n",
);
prompt.push_str("---\n\n## Generate System Prompt\n\n");
prompt.push_str(
"Write the system prompt directly to `.codescout/system-prompt.md` with `create_file` (NOT `memory(action=\"write\", topic=\"system-prompt\")` — it is the always-on root file injected into every session, not a memory topic).\n\
Include: entry points per project, key abstractions, search tips scoped by project,\n\
navigation strategy for the workspace.\n\n",
);
prompt.push_str("---\n\n## Refresh CLAUDE.md\n\n");
prompt.push_str(
"Read `read_markdown(\"CLAUDE.md\")` to see its heading structure.\n\n\
Compare each section with the memories you just wrote. For sections that\n\
overlap with memory content, offer to replace the body with a memory reference:\n\
`See codescout memory 'architecture' (Key Patterns section).`\n\n\
**preserve user-specific content:** personal preferences, code style rules,\n\
iron rules, git workflow specifics, private notes — anything not derivable\n\
from the codebase. Do NOT touch sections the user wrote for their own use.\n\n\
**Add memory discovery hints** if CLAUDE.md doesn't already list available memories.\n\n\
Present a summary of proposed changes and ask for approval before modifying.\n\n",
);
prompt.push_str("---\n\n## Return Contract\n\n");
prompt.push_str(
"Return a summary with:\n\
- Workspace-level memories written (list topics)\n\
- Cross-project patterns discovered\n\
- CLAUDE.md changes proposed/applied\n\
- Any issues or gaps\n",
);
prompt
}
#[allow(dead_code)]
pub(crate) fn build_workspace_instructions(
main_prompt_path: &str,
project_prompts: &[(String, String)],
synthesis_path: &str,
subagent_capable: bool,
) -> String {
let p = main_prompt_path;
if subagent_capable {
let mut instructions = format!(
"\
Onboarding required — this is a workspace with {} projects.
Step 1: Read prerequisites from the main prompt:
read_markdown(\"{p}\", headings=[\"## Phase 0: Embedding Model Selection\", \"## Phase 1: Semantic Index Check\"])
Step 2: Spawn {} subagents IN PARALLEL — one per project:",
project_prompts.len(),
project_prompts.len(),
);
for (id, path) in project_prompts {
instructions.push_str(&format!(
"\n - {id}: read_markdown(\"{path}\") and follow all instructions",
));
}
instructions.push_str(&format!(
"\n\n\
Step 3: Wait for ALL subagents to complete.\n\n\
Step 4: Read the synthesis prompt and write workspace memories:\n\
read_markdown(\"{synthesis_path}\")\n\n\
Follow the synthesis instructions to read back per-project memories,\n\
write workspace-level memories, generate the system prompt, and\n\
offer to refresh CLAUDE.md."
));
instructions
} else {
let mut instructions = format!(
"\
Onboarding required — this is a workspace with {} projects.
Step 1: Read prerequisites:
read_markdown(\"{p}\", headings=[\"## Phase 0: Embedding Model Selection\", \"## Phase 1: Semantic Index Check\"])
Step 2: Explore each project one at a time:",
project_prompts.len(),
);
for (id, path) in project_prompts {
instructions.push_str(&format!(
"\n - {id}: read_markdown(\"{path}\") and follow all instructions",
));
}
instructions.push_str(&format!(
"\n\n\
Step 3: Read the synthesis prompt and write workspace memories:\n\
read_markdown(\"{synthesis_path}\")"
));
instructions
}
}