Skip to main content

lean_ctx/core/
protocol.rs

1use std::path::Path;
2
3/// Finds the outermost project root by walking up from `file_path`.
4/// For monorepos with nested `.git` dirs (e.g. `mono/backend/.git` + `mono/frontend/.git`),
5/// returns the outermost ancestor containing `.git`, a workspace marker, or a known
6/// monorepo config file — so the whole monorepo is treated as one project.
7pub fn detect_project_root(file_path: &str) -> Option<String> {
8    let mut dir = Path::new(file_path).parent()?;
9    let mut best: Option<String> = None;
10
11    loop {
12        if is_project_root_marker(dir) {
13            best = Some(dir.to_string_lossy().to_string());
14        }
15        match dir.parent() {
16            Some(parent) if parent != dir => dir = parent,
17            _ => break,
18        }
19    }
20    best
21}
22
23/// Checks if a directory looks like a project root (has `.git`, workspace config, etc.).
24fn is_project_root_marker(dir: &Path) -> bool {
25    const MARKERS: &[&str] = &[
26        ".git",
27        "Cargo.toml",
28        "package.json",
29        "go.work",
30        "pnpm-workspace.yaml",
31        "lerna.json",
32        "nx.json",
33        "turbo.json",
34        ".projectile",
35        "pyproject.toml",
36        "setup.py",
37        "Makefile",
38        "CMakeLists.txt",
39        "BUILD.bazel",
40    ];
41    MARKERS.iter().any(|m| dir.join(m).exists())
42}
43
44/// Returns the project root for `file_path`, falling back to cwd if none found.
45pub fn detect_project_root_or_cwd(file_path: &str) -> String {
46    detect_project_root(file_path).unwrap_or_else(|| {
47        let p = Path::new(file_path);
48        if p.exists() {
49            if p.is_dir() {
50                return file_path.to_string();
51            }
52            if let Some(parent) = p.parent() {
53                return parent.to_string_lossy().to_string();
54            }
55            return file_path.to_string();
56        }
57        std::env::current_dir()
58            .map_or_else(|_| ".".to_string(), |p| p.to_string_lossy().to_string())
59    })
60}
61
62/// Returns the file name component of a path for compact display.
63pub fn shorten_path(path: &str) -> String {
64    let p = Path::new(path);
65    if let Some(name) = p.file_name() {
66        return name.to_string_lossy().to_string();
67    }
68    path.to_string()
69}
70
71/// Formats a token savings summary like `[42 tok saved (30%)]`.
72pub fn format_savings(original: usize, compressed: usize) -> String {
73    let saved = original.saturating_sub(compressed);
74    if original == 0 {
75        return "0 tok saved".to_string();
76    }
77    let pct = (saved as f64 / original as f64 * 100.0).round() as usize;
78    format!("[{saved} tok saved ({pct}%)]")
79}
80
81/// Compresses tool output text based on density level.
82/// - Normal: no changes
83/// - Terse: strip blank lines, strip comment-only lines, remove banners
84/// - Ultra: additionally abbreviate common words
85pub fn compress_output(text: &str, density: &super::config::OutputDensity) -> String {
86    use super::config::OutputDensity;
87    match density {
88        OutputDensity::Normal => text.to_string(),
89        OutputDensity::Terse => compress_terse(text),
90        OutputDensity::Ultra => compress_ultra(text),
91    }
92}
93
94fn compress_terse(text: &str) -> String {
95    text.lines()
96        .filter(|line| {
97            let trimmed = line.trim();
98            if trimmed.is_empty() {
99                return false;
100            }
101            if is_comment_only(trimmed) {
102                return false;
103            }
104            if is_banner_line(trimmed) {
105                return false;
106            }
107            true
108        })
109        .collect::<Vec<_>>()
110        .join("\n")
111}
112
113fn compress_ultra(text: &str) -> String {
114    let terse = compress_terse(text);
115    let mut result = terse;
116    for (long, short) in ABBREVIATIONS {
117        result = result.replace(long, short);
118    }
119    result
120}
121
122const ABBREVIATIONS: &[(&str, &str)] = &[
123    ("function", "fn"),
124    ("configuration", "cfg"),
125    ("implementation", "impl"),
126    ("dependencies", "deps"),
127    ("dependency", "dep"),
128    ("request", "req"),
129    ("response", "res"),
130    ("context", "ctx"),
131    ("error", "err"),
132    ("return", "ret"),
133    ("argument", "arg"),
134    ("value", "val"),
135    ("module", "mod"),
136    ("package", "pkg"),
137    ("directory", "dir"),
138    ("parameter", "param"),
139    ("variable", "var"),
140];
141
142fn is_comment_only(line: &str) -> bool {
143    line.starts_with("//")
144        || line.starts_with('#')
145        || line.starts_with("--")
146        || (line.starts_with("/*") && line.ends_with("*/"))
147}
148
149fn is_banner_line(line: &str) -> bool {
150    if line.len() < 4 {
151        return false;
152    }
153    let chars: Vec<char> = line.chars().collect();
154    let first = chars[0];
155    if matches!(first, '=' | '-' | '*' | '─' | '━' | '▀' | '▄') {
156        let same_count = chars.iter().filter(|c| **c == first).count();
157        return same_count as f64 / chars.len() as f64 > 0.7;
158    }
159    false
160}
161
162/// A terse instruction code and its human-readable expansion.
163pub struct InstructionTemplate {
164    pub code: &'static str,
165    pub full: &'static str,
166}
167
168const TEMPLATES: &[InstructionTemplate] = &[
169    InstructionTemplate {
170        code: "ACT1",
171        full: "Act immediately, 1-line result",
172    },
173    InstructionTemplate {
174        code: "BRIEF",
175        full: "1-2 line approach, then act",
176    },
177    InstructionTemplate {
178        code: "FULL",
179        full: "Outline+edge cases, then act",
180    },
181    InstructionTemplate {
182        code: "DELTA",
183        full: "Changed lines only",
184    },
185    InstructionTemplate {
186        code: "NOREPEAT",
187        full: "No repeat, use Fn refs",
188    },
189    InstructionTemplate {
190        code: "STRUCT",
191        full: "+/-/~ notation",
192    },
193    InstructionTemplate {
194        code: "1LINE",
195        full: "1 line per action",
196    },
197    InstructionTemplate {
198        code: "NODOC",
199        full: "No narration comments",
200    },
201    InstructionTemplate {
202        code: "ACTFIRST",
203        full: "Tool calls first, no narration",
204    },
205    InstructionTemplate {
206        code: "QUALITY",
207        full: "Never skip edge cases",
208    },
209    InstructionTemplate {
210        code: "NOMOCK",
211        full: "No mock/placeholder data",
212    },
213    InstructionTemplate {
214        code: "FREF",
215        full: "Fn refs only, no full paths",
216    },
217    InstructionTemplate {
218        code: "DIFF",
219        full: "Diff lines only",
220    },
221    InstructionTemplate {
222        code: "ABBREV",
223        full: "fn,cfg,impl,deps,req,res,ctx,err",
224    },
225    InstructionTemplate {
226        code: "SYMBOLS",
227        full: "+=add -=rm ~=mod ->=ret",
228    },
229];
230
231/// Generates the INSTRUCTION CODES block for agent system prompts.
232pub fn instruction_decoder_block() -> String {
233    let pairs: Vec<String> = TEMPLATES
234        .iter()
235        .map(|t| format!("{}={}", t.code, t.full))
236        .collect();
237    format!("INSTRUCTION CODES:\n  {}", pairs.join(" | "))
238}
239
240/// Encode an instruction suffix using short codes with budget hints.
241/// Response budget is dynamic based on task complexity to shape LLM output length.
242pub fn encode_instructions(complexity: &str) -> String {
243    match complexity {
244        "mechanical" => "MODE: ACT1 DELTA 1LINE | BUDGET: <=50 tokens, 1 line answer".to_string(),
245        "simple" => "MODE: BRIEF DELTA 1LINE | BUDGET: <=100 tokens, structured".to_string(),
246        "standard" => "MODE: BRIEF DELTA NOREPEAT STRUCT | BUDGET: <=200 tokens".to_string(),
247        "complex" => {
248            "MODE: FULL QUALITY NOREPEAT STRUCT FREF DIFF | BUDGET: <=500 tokens".to_string()
249        }
250        "architectural" => {
251            "MODE: FULL QUALITY NOREPEAT STRUCT FREF | BUDGET: unlimited".to_string()
252        }
253        _ => "MODE: BRIEF | BUDGET: <=200 tokens".to_string(),
254    }
255}
256
257/// Encode instructions with SNR metric for context quality awareness.
258pub fn encode_instructions_with_snr(complexity: &str, compression_pct: f64) -> String {
259    let snr = if compression_pct > 0.0 {
260        1.0 - (compression_pct / 100.0)
261    } else {
262        1.0
263    };
264    let base = encode_instructions(complexity);
265    format!("{base} | SNR: {snr:.2}")
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271
272    #[test]
273    fn is_project_root_marker_detects_git() {
274        let tmp = std::env::temp_dir().join("lean-ctx-test-root-marker");
275        let _ = std::fs::create_dir_all(&tmp);
276        let git_dir = tmp.join(".git");
277        let _ = std::fs::create_dir_all(&git_dir);
278        assert!(is_project_root_marker(&tmp));
279        let _ = std::fs::remove_dir_all(&tmp);
280    }
281
282    #[test]
283    fn is_project_root_marker_detects_cargo_toml() {
284        let tmp = std::env::temp_dir().join("lean-ctx-test-cargo-marker");
285        let _ = std::fs::create_dir_all(&tmp);
286        let _ = std::fs::write(tmp.join("Cargo.toml"), "[package]");
287        assert!(is_project_root_marker(&tmp));
288        let _ = std::fs::remove_dir_all(&tmp);
289    }
290
291    #[test]
292    fn detect_project_root_finds_outermost() {
293        let base = std::env::temp_dir().join("lean-ctx-test-monorepo");
294        let inner = base.join("packages").join("app");
295        let _ = std::fs::create_dir_all(&inner);
296        let _ = std::fs::create_dir_all(base.join(".git"));
297        let _ = std::fs::create_dir_all(inner.join(".git"));
298
299        let test_file = inner.join("main.rs");
300        let _ = std::fs::write(&test_file, "fn main() {}");
301
302        let root = detect_project_root(test_file.to_str().unwrap());
303        assert!(root.is_some(), "should find a project root for nested .git");
304        let root_path = std::path::PathBuf::from(root.unwrap());
305        assert_eq!(
306            crate::core::pathutil::safe_canonicalize(&root_path).ok(),
307            crate::core::pathutil::safe_canonicalize(&base).ok(),
308            "should return outermost .git, not inner"
309        );
310
311        let _ = std::fs::remove_dir_all(&base);
312    }
313
314    #[test]
315    fn decoder_block_contains_all_codes() {
316        let block = instruction_decoder_block();
317        for t in TEMPLATES {
318            assert!(
319                block.contains(t.code),
320                "decoder should contain code {}",
321                t.code
322            );
323        }
324    }
325
326    #[test]
327    fn encoded_instructions_are_compact() {
328        use super::super::tokens::count_tokens;
329        let full = "TASK COMPLEXITY: mechanical\nMinimal reasoning needed. Act immediately, report result in one line. Show only changed lines, not full files.";
330        let encoded = encode_instructions("mechanical");
331        assert!(
332            count_tokens(&encoded) <= count_tokens(full),
333            "encoded ({}) should be <= full ({})",
334            count_tokens(&encoded),
335            count_tokens(full)
336        );
337    }
338
339    #[test]
340    fn all_complexity_levels_encode() {
341        for level in &["mechanical", "standard", "architectural"] {
342            let encoded = encode_instructions(level);
343            assert!(encoded.starts_with("MODE:"), "should start with MODE:");
344        }
345    }
346}