Skip to main content

lean_ctx/
instructions.rs

1use crate::tools::CrpMode;
2
3/// Claude Code truncates MCP server instructions at 2048 characters.
4/// Full instructions are installed as `$CLAUDE_CONFIG_DIR/rules/lean-ctx.md`
5/// (defaulting to `~/.claude/rules/lean-ctx.md`) instead.
6/// Session state is dynamically appended to the MCP instructions for continuity.
7///
8/// Universal instruction cap for all MCP clients (in tokens, not bytes).
9/// Enforced via `count_tokens` so truncation is accurate regardless of
10/// character mix (ASCII, CJK, emoji).
11const INSTRUCTION_CAP_TOKENS: usize = 1200;
12
13pub fn build_instructions(crp_mode: CrpMode) -> String {
14    build_instructions_with_client(crp_mode, "")
15}
16
17pub fn build_instructions_with_client(crp_mode: CrpMode, client_name: &str) -> String {
18    if is_claude_code_client(client_name) {
19        return build_claude_code_instructions();
20    }
21    build_full_instructions(crp_mode, client_name)
22}
23
24pub fn build_instructions_for_test(crp_mode: CrpMode) -> String {
25    // Avoid loading dynamic on-disk session/knowledge/gotcha blocks in tests, which can
26    // vary across machines and between concurrent test runs.
27    build_full_instructions_for_test(crp_mode, "")
28}
29
30pub fn build_instructions_with_client_for_test(crp_mode: CrpMode, client_name: &str) -> String {
31    if is_claude_code_client(client_name) {
32        return build_claude_code_instructions();
33    }
34    build_full_instructions_for_test(crp_mode, client_name)
35}
36
37/// Deterministic instruction builder for the Instruction Compiler.
38///
39/// MUST NOT depend on process-global env toggles or on-disk mutable config, because the compiler
40/// output is intended to be stable and diffable across runs and in CI.
41pub fn build_instructions_with_client_for_compiler(
42    crp_mode: CrpMode,
43    client_name: &str,
44    unified_tool_mode: bool,
45) -> String {
46    if is_claude_code_client(client_name) {
47        return build_claude_code_instructions();
48    }
49    build_full_instructions_for_compiler(crp_mode, client_name, unified_tool_mode)
50}
51
52fn is_claude_code_client(client_name: &str) -> bool {
53    let lower = client_name.to_lowercase();
54    lower.contains("claude") && !lower.contains("cursor")
55}
56
57pub fn claude_config_dir_display() -> String {
58    match std::env::var("CLAUDE_CONFIG_DIR") {
59        Ok(dir) if !dir.trim().is_empty() => {
60            let dir = dir.trim().to_string();
61            if dir.starts_with('~') {
62                dir
63            } else if let Some(home) = dirs::home_dir() {
64                let home_str = home.to_string_lossy();
65                if let Some(rest) = dir.strip_prefix(home_str.as_ref()) {
66                    format!("~{rest}")
67                } else {
68                    dir
69                }
70            } else {
71                dir
72            }
73        }
74        _ => "~/.claude".to_string(),
75    }
76}
77
78fn build_claude_code_instructions() -> String {
79    let shell_hint = build_shell_hint();
80    let config_dir = claude_config_dir_display();
81
82    // Load session state for continuity (compact version for Claude Code's char limit)
83    let session_block = match crate::core::session::SessionState::load_latest() {
84        Some(session) => {
85            let mut parts = Vec::new();
86            if let Some(ref task) = session.task {
87                let pct = task
88                    .progress_pct
89                    .map_or(String::new(), |p| format!(" [{p}%]"));
90                parts.push(format!("Task: {}{pct}", task.description));
91            }
92            if !session.decisions.is_empty() {
93                let items: Vec<&str> = session
94                    .decisions
95                    .iter()
96                    .rev()
97                    .take(3)
98                    .map(|d| d.summary.as_str())
99                    .collect();
100                parts.push(format!("Decisions: {}", items.join("; ")));
101            }
102            if !session.files_touched.is_empty() {
103                let modified: Vec<&str> = session
104                    .files_touched
105                    .iter()
106                    .filter(|f| f.modified)
107                    .take(5)
108                    .map(|f| f.path.as_str())
109                    .collect();
110                if !modified.is_empty() {
111                    parts.push(format!("Modified: {}", modified.join(", ")));
112                }
113            }
114            if !session.findings.is_empty() {
115                let recent: Vec<&str> = session
116                    .findings
117                    .iter()
118                    .rev()
119                    .take(3)
120                    .map(|f| f.summary.as_str())
121                    .collect();
122                parts.push(format!("Recent: {}", recent.join("; ")));
123            }
124            if parts.is_empty() {
125                String::new()
126            } else {
127                format!("\n\n--- SESSION ---\n{}\n---", parts.join("\n"))
128            }
129        }
130        None => String::new(),
131    };
132
133    let cfg = crate::core::config::Config::load();
134    let shadow_preamble = if cfg.shadow_mode {
135        "SHADOW MODE ACTIVE: ALL reads/searches/shell MUST use ctx_* tools. Native equivalents are intercepted.\n\n"
136    } else {
137        ""
138    };
139
140    let instr = format!("\
141{shadow_preamble}\
142ALWAYS use lean-ctx MCP tools instead of native equivalents.
143
144Tool mapping (MANDATORY):
145• Read/cat/head/tail -> ctx_read(path, mode)
146• Shell/bash -> ctx_shell(command)
147• Grep/rg -> ctx_search(pattern, path)
148• ls/find -> ctx_tree(path, depth)
149• Edit/StrReplace -> native (lean-ctx=READ only). If Edit needs Read and Read is unavailable, use ctx_edit.
150• Write, Delete, Glob -> normal. NEVER loop on Edit failures — use ctx_edit.
151
152ctx_read modes: full|map|signatures|diff|task|reference|aggressive|entropy|lines:N-M
153Auto-selects mode. Re-reads ~13 tok. File refs F1,F2.. persist.
154
155Workflow: Orient(ctx_overview) → Locate(ctx_search) → Read(ctx_read) → Edit(ctx_edit/native) → Verify(ctx_read diff + ctx_shell test) → Record(ctx_knowledge).
156
157CEP: 1.ACT FIRST 2.DELTA ONLY 3.STRUCTURED(+/-/~) 4.ONE LINE 5.QUALITY
158{shell_hint}\
159Prefer: ctx_read>Read | ctx_shell>Shell | ctx_search>Grep | ctx_tree>ls
160Edit: native Edit/StrReplace preferred, ctx_edit if Edit unavailable.
161Never echo tool output. Never narrate. Show only changed code.
162Full instructions at {config_dir}/CLAUDE.md (imports rules/lean-ctx.md){session_block}");
163
164    instr
165}
166
167fn build_full_instructions(crp_mode: CrpMode, client_name: &str) -> String {
168    let cfg = crate::core::config::Config::load();
169    let minimal = cfg.minimal_overhead_effective_for_client(client_name);
170
171    let profile = crate::core::litm::LitmProfile::from_client_name(client_name);
172    let loaded_session = if minimal {
173        None
174    } else {
175        crate::core::session::SessionState::load_latest()
176    };
177
178    let (session_block, litm_end_block) = match loaded_session {
179        Some(ref session) => {
180            let positioned = crate::core::litm::position_optimize(session);
181            let begin = format!(
182                "\n\n--- ACTIVE SESSION (LITM P1: begin position, profile: {}) ---\n{}\n---\n",
183                profile.name, positioned.begin_block
184            );
185            let end = if positioned.end_block.is_empty() {
186                String::new()
187            } else {
188                format!(
189                    "\n--- SESSION RESUME (post-compaction) ---\n{}\n---\n",
190                    positioned.end_block
191                )
192            };
193            (begin, end)
194        }
195        None => (String::new(), String::new()),
196    };
197
198    let project_root_for_blocks = if minimal {
199        None
200    } else {
201        loaded_session
202            .as_ref()
203            .and_then(|s| s.project_root.clone())
204            .or_else(|| {
205                std::env::current_dir()
206                    .ok()
207                    .map(|p| p.to_string_lossy().to_string())
208            })
209    };
210
211    let knowledge_block = match &project_root_for_blocks {
212        Some(root) => {
213            let knowledge = crate::core::knowledge::ProjectKnowledge::load(root);
214            match knowledge {
215                Some(k) if !k.facts.is_empty() || !k.patterns.is_empty() => {
216                    let aaak = k.format_aaak();
217                    if aaak.is_empty() {
218                        String::new()
219                    } else {
220                        format!("\n--- PROJECT MEMORY (AAAK) ---\n{}\n---\n", aaak.trim())
221                    }
222                }
223                _ => String::new(),
224            }
225        }
226        None => String::new(),
227    };
228
229    let gotcha_block = match &project_root_for_blocks {
230        Some(root) => {
231            let store = crate::core::gotcha_tracker::GotchaStore::load(root);
232            let files: Vec<String> = loaded_session
233                .as_ref()
234                .map(|s| s.files_touched.iter().map(|ft| ft.path.clone()).collect())
235                .unwrap_or_default();
236            let block = store.format_injection_block(&files);
237            if block.is_empty() {
238                String::new()
239            } else {
240                format!("\n{block}\n")
241            }
242        }
243        None => String::new(),
244    };
245
246    let shell_hint = build_shell_hint();
247
248    use crate::core::rules_canonical as rc;
249    let tool_bullets = rc::tool_mapping_bullets(rc::Mode::Mcp);
250    let compat = rc::compatibility_block();
251    let read_modes = rc::ctx_read_modes_block();
252    let auto_block = rc::automation_block();
253    let cep = rc::cep_block();
254    let litm_pref = rc::litm_end_block(rc::Mode::Mcp);
255
256    let shadow_preamble = if cfg.shadow_mode {
257        "SHADOW MODE ACTIVE: ALL file reads, searches, and shell commands MUST go through ctx_* tools. \
258         Native Read/Grep/Shell are intercepted and redirected — using ctx_* directly is faster and more reliable.\n\n"
259    } else {
260        ""
261    };
262
263    let mut base = format!(
264        "\
265{shadow_preamble}\
266CRITICAL: ALWAYS use lean-ctx MCP tools instead of native equivalents for token savings.\n\
267\n\
268{tool_bullets}\n\
269\n\
270{compat}\n\
271{shell_hint}\
272\n\
273{read_modes}\n\
274\n\
275{auto_block}\n\
276\n\
277{cep}\n\
278\n\
279{decoder_block}\n\
280\n\
281{session_block}\
282{knowledge_block}\
283{gotcha_block}\
284\n\
285--- ORIGIN ---\n\
286{origin}\n\
287\n\
288{litm_pref}\
289{litm_end_block}",
290        decoder_block = crate::core::protocol::instruction_decoder_block(),
291        origin = crate::core::integrity::origin_line(),
292        litm_end_block = &litm_end_block
293    );
294
295    if should_use_unified(client_name) {
296        base.push_str("\n\n");
297        base.push_str(rc::unified_tool_mode_block());
298        base.push('\n');
299    }
300
301    let intelligence_block = build_intelligence_block();
302    let terse_block = build_terse_agent_block_for_client(&crp_mode, client_name);
303
304    // The guidance suffix (CRP-mode rules + compression/output-style + the
305    // intelligence block) is the operational contract for the agent and must
306    // survive the token cap. The variable session/knowledge/gotcha blocks live
307    // inside `base` and are the right thing to shed under pressure (H3). So we
308    // protect the suffix and truncate only `base` to fit the budget.
309    let guidance_suffix = match crp_mode {
310        CrpMode::Off => format!("{terse_block}{intelligence_block}"),
311        CrpMode::Compact => format!(
312            "CRP MODE: compact\n\
313Omit filler. Abbreviate: fn,cfg,impl,deps,req,res,ctx,err,ret,arg,val,ty,mod.\n\
314Diff lines (+/-) only. TARGET: <=200 tok. Trust tool outputs.\n\n\
315{terse_block}{intelligence_block}"
316        ),
317        CrpMode::Tdd => format!(
318            "CRP MODE: tdd\n\
319Max density. Every token carries meaning. Fn refs only, diff lines (+/-) only.\n\
320Abbreviate: fn,cfg,impl,deps,req,res,ctx,err,ret,arg,val,ty,mod.\n\
321+F1:42 param(timeout:Duration) | -F1:10-15 | ~F1:42 old->new\n\
322BUDGET: <=150 tok. ZERO NARRATION. Trust tool outputs.\n\n\
323{terse_block}{intelligence_block}"
324        ),
325    };
326
327    assemble_within_cap(&base, &guidance_suffix, INSTRUCTION_CAP_TOKENS)
328}
329
330/// Join `base` and a protected `suffix` so the result fits `cap_tokens`,
331/// truncating only `base` if needed. The suffix is the agent's operational
332/// contract (compression/output-style guidance) and is preserved verbatim as
333/// long as it fits on its own; otherwise we fall back to capping the whole.
334fn assemble_within_cap(base: &str, suffix: &str, cap_tokens: usize) -> String {
335    use crate::core::tokens::count_tokens;
336    let suffix = suffix.trim_end_matches('\n');
337    if suffix.is_empty() {
338        let full = base.to_string();
339        return if count_tokens(&full) > cap_tokens {
340            truncate_to_token_cap(&full, cap_tokens)
341        } else {
342            full
343        };
344    }
345
346    let full = format!("{base}\n\n{suffix}");
347    if count_tokens(&full) <= cap_tokens {
348        return full;
349    }
350
351    let suffix_tokens = count_tokens(suffix);
352    // Reserve room for the suffix plus the "\n\n" join. If the suffix alone is
353    // already at/over budget, degrade to a plain tail-cap of the whole text.
354    let Some(base_budget) = cap_tokens.checked_sub(suffix_tokens + 1) else {
355        return truncate_to_token_cap(&full, cap_tokens);
356    };
357    let trimmed_base = truncate_to_token_cap(base, base_budget);
358    format!("{trimmed_base}\n\n{suffix}")
359}
360
361fn truncate_to_token_cap(s: &str, cap_tokens: usize) -> String {
362    use crate::core::tokens::count_tokens;
363    if count_tokens(s) <= cap_tokens {
364        return s.to_string();
365    }
366    // Keep whole lines: candidate cut points are the byte offsets of each
367    // newline. Token count is monotonic in prefix length, so binary-search for
368    // the longest whole-line prefix within the cap. This costs O(log lines)
369    // tokenizations instead of O(lines) — the per-line loop was pathologically
370    // slow on large session blocks (and timed out under coverage's ptrace
371    // instrumentation).
372    let cuts: Vec<usize> = s.match_indices('\n').map(|(i, _)| i).collect();
373    let (mut lo, mut hi) = (0usize, cuts.len());
374    let mut best: Option<usize> = None;
375    while lo < hi {
376        let mid = lo + (hi - lo) / 2;
377        let end = cuts[mid];
378        if end > 0 && count_tokens(&s[..end]) <= cap_tokens {
379            best = Some(end);
380            lo = mid + 1;
381        } else {
382            hi = mid;
383        }
384    }
385    if let Some(end) = best {
386        return s[..end].to_string();
387    }
388    // No line boundary fits — fall back to a char-boundary byte approximation.
389    let byte_approx = cap_tokens * 4;
390    let safe = s.floor_char_boundary(byte_approx.min(s.len()));
391    s[..safe].to_string()
392}
393
394fn build_full_instructions_for_test(crp_mode: CrpMode, client_name: &str) -> String {
395    use crate::core::rules_canonical as rc;
396    let shell_hint = build_shell_hint();
397    let session_block = String::new();
398    let knowledge_block = String::new();
399    let gotcha_block = String::new();
400    let litm_end_block = String::new();
401
402    let tool_bullets = rc::tool_mapping_bullets(rc::Mode::Mcp);
403    let compat = rc::compatibility_block();
404    let read_modes = rc::ctx_read_modes_block();
405    let auto_block = rc::automation_block();
406    let cep = rc::cep_block();
407    let litm_pref = rc::litm_end_block(rc::Mode::Mcp);
408
409    let mut base = format!(
410        "\
411CRITICAL: ALWAYS use lean-ctx MCP tools instead of native equivalents for token savings.\n\
412\n\
413{tool_bullets}\n\
414\n\
415{compat}\n\
416{shell_hint}\
417\n\
418{read_modes}\n\
419\n\
420{auto_block}\n\
421\n\
422{cep}\n\
423\n\
424{decoder_block}\n\
425\n\
426{session_block}\
427{knowledge_block}\
428{gotcha_block}\
429\n\
430--- ORIGIN ---\n\
431{origin}\n\
432\n\
433{litm_pref}\
434{litm_end_block}",
435        decoder_block = crate::core::protocol::instruction_decoder_block(),
436        origin = crate::core::integrity::origin_line(),
437        litm_end_block = &litm_end_block
438    );
439
440    if should_use_unified(client_name) {
441        base.push_str("\n\n");
442        base.push_str(rc::unified_tool_mode_block());
443        base.push('\n');
444    }
445
446    let intelligence_block = build_intelligence_block();
447    let terse_block = build_terse_agent_block_for_client(&crp_mode, client_name);
448
449    match crp_mode {
450        CrpMode::Off => format!("{base}\n\n{terse_block}{intelligence_block}"),
451        CrpMode::Compact => {
452            format!(
453                "{base}\n\n\
454CRP MODE: compact\n\
455Omit filler. Abbreviate: fn,cfg,impl,deps,req,res,ctx,err,ret,arg,val,ty,mod.\n\
456Diff lines (+/-) only. TARGET: <=200 tok. Trust tool outputs.\n\n\
457{terse_block}{intelligence_block}"
458            )
459        }
460        CrpMode::Tdd => {
461            format!(
462                "{base}\n\n\
463CRP MODE: tdd\n\
464Max density. Every token carries meaning. Fn refs only, diff lines (+/-) only.\n\
465Abbreviate: fn,cfg,impl,deps,req,res,ctx,err,ret,arg,val,ty,mod.\n\
466+F1:42 param(timeout:Duration) | -F1:10-15 | ~F1:42 old->new\n\
467BUDGET: <=150 tok. ZERO NARRATION. Trust tool outputs.\n\n\
468{terse_block}{intelligence_block}"
469            )
470        }
471    }
472}
473
474fn build_full_instructions_for_compiler(
475    crp_mode: CrpMode,
476    client_name: &str,
477    unified_tool_mode: bool,
478) -> String {
479    let shell_hint = build_shell_hint();
480    let session_block = String::new();
481    let knowledge_block = String::new();
482    let gotcha_block = String::new();
483    let litm_end_block = String::new();
484
485    use crate::core::rules_canonical as rc;
486    let tool_bullets = rc::tool_mapping_bullets(rc::Mode::Mcp);
487    let compat = rc::compatibility_block();
488    let read_modes = rc::ctx_read_modes_block();
489    let auto_blk = rc::automation_block();
490    let cep = rc::cep_block();
491    let litm_pref = rc::litm_end_block(rc::Mode::Mcp);
492
493    let mut base = format!(
494        "\
495CRITICAL: ALWAYS use lean-ctx MCP tools instead of native equivalents for token savings.\n\
496\n\
497{tool_bullets}\n\
498\n\
499{compat}\n\
500{shell_hint}\
501\n\
502{read_modes}\n\
503\n\
504{auto_blk}\n\
505\n\
506{cep}\n\
507\n\
508{decoder_block}\n\
509\n\
510{session_block}\
511{knowledge_block}\
512{gotcha_block}\
513\n\
514--- ORIGIN ---\n\
515{origin}\n\
516\n\
517{litm_pref}\
518{litm_end_block}",
519        decoder_block = crate::core::protocol::instruction_decoder_block(),
520        origin = crate::core::integrity::origin_line(),
521        litm_end_block = &litm_end_block
522    );
523
524    if unified_tool_mode {
525        base.push_str("\n\n");
526        base.push_str(rc::unified_tool_mode_block());
527        base.push('\n');
528    }
529
530    let _ = client_name; // keep signature aligned with other builders
531    let intelligence_block = build_intelligence_block();
532
533    match crp_mode {
534        CrpMode::Off => format!("{base}\n\n{intelligence_block}"),
535        CrpMode::Compact => {
536            format!(
537                "{base}\n\n\
538CRP MODE: compact\n\
539Omit filler. Abbreviate: fn,cfg,impl,deps,req,res,ctx,err,ret,arg,val,ty,mod.\n\
540Diff lines (+/-) only. TARGET: <=200 tok. Trust tool outputs.\n\n\
541{intelligence_block}"
542            )
543        }
544        CrpMode::Tdd => {
545            format!(
546                "{base}\n\n\
547CRP MODE: tdd\n\
548Max density. Every token carries meaning. Fn refs only, diff lines (+/-) only.\n\
549Abbreviate: fn,cfg,impl,deps,req,res,ctx,err,ret,arg,val,ty,mod.\n\
550+F1:42 param(timeout:Duration) | -F1:10-15 | ~F1:42 old->new\n\
551BUDGET: <=150 tok. ZERO NARRATION. Trust tool outputs.\n\n\
552{intelligence_block}"
553            )
554        }
555    }
556}
557
558pub fn claude_code_instructions() -> String {
559    build_claude_code_instructions()
560}
561
562fn build_terse_agent_block_for_client(_crp_mode: &CrpMode, client_name: &str) -> String {
563    use crate::core::config::{CompressionLevel, Config};
564    let cfg = Config::load();
565    let compression = CompressionLevel::effective(&cfg);
566    if compression.is_active() {
567        return crate::core::terse::agent_prompts::build_prompt_block_for_client(
568            &compression,
569            client_name,
570        );
571    }
572    String::new()
573}
574
575fn build_intelligence_block() -> String {
576    "\
577OUTPUT EFFICIENCY:\n\
578• Never echo tool output code. Never add narration comments. Show only changed code.\n\
579• [TASK:type] and SCOPE hints included. Architecture=thorough, generate=code."
580        .to_string()
581}
582
583fn build_shell_hint() -> String {
584    if !cfg!(windows) {
585        return String::new();
586    }
587    let name = crate::shell::shell_name();
588    let is_posix = matches!(name.as_str(), "bash" | "sh" | "zsh" | "fish");
589    if is_posix {
590        format!(
591            "\nSHELL: {name} (POSIX). Use POSIX commands (cat, head, grep, find, ls). \
592             Do NOT use PowerShell cmdlets (Get-Content, Select-Object, Get-ChildItem).\n"
593        )
594    } else if name.contains("powershell") || name.contains("pwsh") {
595        format!("\nSHELL: {name}. Use PowerShell cmdlets.\n")
596    } else {
597        format!("\nSHELL: {name}.\n")
598    }
599}
600
601fn should_use_unified(client_name: &str) -> bool {
602    if std::env::var("LEAN_CTX_FULL_TOOLS").is_ok() {
603        return false;
604    }
605    if std::env::var("LEAN_CTX_UNIFIED").is_ok() {
606        return true;
607    }
608    let _ = client_name;
609    false
610}
611
612#[cfg(test)]
613mod tests {
614    use super::*;
615    use crate::core::tokens::count_tokens;
616
617    #[test]
618    fn guidance_suffix_survives_oversized_base() {
619        // Simulate a bloated session/knowledge `base` that alone exceeds the cap.
620        let base = "SESSION LINE\n".repeat(4000);
621        let suffix = "OUTPUT STYLE: expert-terse\nFn refs only, diff lines only.";
622        let out = assemble_within_cap(&base, suffix, INSTRUCTION_CAP_TOKENS);
623
624        assert!(
625            out.contains("OUTPUT STYLE: expert-terse"),
626            "protected guidance suffix must survive truncation"
627        );
628        assert!(
629            count_tokens(&out) <= INSTRUCTION_CAP_TOKENS,
630            "assembled output must respect the token cap"
631        );
632        assert!(
633            out.len() < base.len(),
634            "oversized base must have been truncated"
635        );
636    }
637
638    #[test]
639    fn under_cap_keeps_everything() {
640        let base = "tool mapping block";
641        let suffix = "OUTPUT STYLE: dense";
642        let out = assemble_within_cap(base, suffix, INSTRUCTION_CAP_TOKENS);
643        assert!(out.contains(base));
644        assert!(out.contains(suffix));
645    }
646
647    #[test]
648    fn empty_suffix_caps_base_only() {
649        let base = "x\n".repeat(4000);
650        let out = assemble_within_cap(&base, "", INSTRUCTION_CAP_TOKENS);
651        assert!(count_tokens(&out) <= INSTRUCTION_CAP_TOKENS);
652    }
653}