Skip to main content

lean_ctx/core/
protocol.rs

1use std::path::Path;
2
3/// Finds the outermost project root by walking up from `file_path`.
4/// For monorepos with nested `.git` dirs (e.g. `mono/backend/.git` + `mono/frontend/.git`),
5/// returns the outermost ancestor containing `.git`, a workspace marker, or a known
6/// monorepo config file — so the whole monorepo is treated as one project.
7pub fn detect_project_root(file_path: &str) -> Option<String> {
8    let mut dir = Path::new(file_path).parent()?;
9    let mut best: Option<String> = None;
10
11    loop {
12        if is_project_root_marker(dir) {
13            best = Some(
14                crate::core::pathutil::safe_canonicalize_or_self(dir)
15                    .to_string_lossy()
16                    .to_string(),
17            );
18        }
19        match dir.parent() {
20            Some(parent) if parent != dir => dir = parent,
21            _ => break,
22        }
23    }
24    best
25}
26
27/// Checks if a directory looks like a project root (has `.git`, workspace config, etc.).
28fn is_project_root_marker(dir: &Path) -> bool {
29    const MARKERS: &[&str] = &[
30        ".git",
31        "Cargo.toml",
32        "package.json",
33        "go.work",
34        "pnpm-workspace.yaml",
35        "lerna.json",
36        "nx.json",
37        "turbo.json",
38        ".projectile",
39        "pyproject.toml",
40        "setup.py",
41        "Makefile",
42        "CMakeLists.txt",
43        "BUILD.bazel",
44    ];
45    MARKERS.iter().any(|m| dir.join(m).exists())
46}
47
48pub fn detect_project_root_or_cwd(file_path: &str) -> String {
49    detect_project_root(file_path).unwrap_or_else(|| {
50        let p = Path::new(file_path);
51        if p.exists() {
52            if p.is_dir() {
53                return file_path.to_string();
54            }
55            if let Some(parent) = p.parent() {
56                return parent.to_string_lossy().to_string();
57            }
58            return file_path.to_string();
59        }
60        std::env::current_dir()
61            .map(|p| p.to_string_lossy().to_string())
62            .unwrap_or_else(|_| ".".to_string())
63    })
64}
65
66pub fn shorten_path(path: &str) -> String {
67    let p = Path::new(path);
68    if let Some(name) = p.file_name() {
69        return name.to_string_lossy().to_string();
70    }
71    path.to_string()
72}
73
74pub fn format_savings(original: usize, compressed: usize) -> String {
75    let saved = original.saturating_sub(compressed);
76    if original == 0 {
77        return "0 tok saved".to_string();
78    }
79    let pct = (saved as f64 / original as f64 * 100.0).round() as usize;
80    format!("[{saved} tok saved ({pct}%)]")
81}
82
83/// Compresses tool output text based on density level.
84/// - Normal: no changes
85/// - Terse: strip blank lines, strip comment-only lines, remove banners
86/// - Ultra: additionally abbreviate common words
87pub fn compress_output(text: &str, density: &super::config::OutputDensity) -> String {
88    use super::config::OutputDensity;
89    match density {
90        OutputDensity::Normal => text.to_string(),
91        OutputDensity::Terse => compress_terse(text),
92        OutputDensity::Ultra => compress_ultra(text),
93    }
94}
95
96fn compress_terse(text: &str) -> String {
97    text.lines()
98        .filter(|line| {
99            let trimmed = line.trim();
100            if trimmed.is_empty() {
101                return false;
102            }
103            if is_comment_only(trimmed) {
104                return false;
105            }
106            if is_banner_line(trimmed) {
107                return false;
108            }
109            true
110        })
111        .collect::<Vec<_>>()
112        .join("\n")
113}
114
115fn compress_ultra(text: &str) -> String {
116    let terse = compress_terse(text);
117    let mut result = terse;
118    for (long, short) in ABBREVIATIONS {
119        result = result.replace(long, short);
120    }
121    result
122}
123
124const ABBREVIATIONS: &[(&str, &str)] = &[
125    ("function", "fn"),
126    ("configuration", "cfg"),
127    ("implementation", "impl"),
128    ("dependencies", "deps"),
129    ("dependency", "dep"),
130    ("request", "req"),
131    ("response", "res"),
132    ("context", "ctx"),
133    ("error", "err"),
134    ("return", "ret"),
135    ("argument", "arg"),
136    ("value", "val"),
137    ("module", "mod"),
138    ("package", "pkg"),
139    ("directory", "dir"),
140    ("parameter", "param"),
141    ("variable", "var"),
142];
143
144fn is_comment_only(line: &str) -> bool {
145    line.starts_with("//")
146        || line.starts_with('#')
147        || line.starts_with("--")
148        || (line.starts_with("/*") && line.ends_with("*/"))
149}
150
151fn is_banner_line(line: &str) -> bool {
152    if line.len() < 4 {
153        return false;
154    }
155    let chars: Vec<char> = line.chars().collect();
156    let first = chars[0];
157    if matches!(first, '=' | '-' | '*' | '─' | '━' | '▀' | '▄') {
158        let same_count = chars.iter().filter(|c| **c == first).count();
159        return same_count as f64 / chars.len() as f64 > 0.7;
160    }
161    false
162}
163
164pub struct InstructionTemplate {
165    pub code: &'static str,
166    pub full: &'static str,
167}
168
169const TEMPLATES: &[InstructionTemplate] = &[
170    InstructionTemplate {
171        code: "ACT1",
172        full: "Act immediately, 1-line result",
173    },
174    InstructionTemplate {
175        code: "BRIEF",
176        full: "1-2 line approach, then act",
177    },
178    InstructionTemplate {
179        code: "FULL",
180        full: "Outline+edge cases, then act",
181    },
182    InstructionTemplate {
183        code: "DELTA",
184        full: "Changed lines only",
185    },
186    InstructionTemplate {
187        code: "NOREPEAT",
188        full: "No repeat, use Fn refs",
189    },
190    InstructionTemplate {
191        code: "STRUCT",
192        full: "+/-/~ notation",
193    },
194    InstructionTemplate {
195        code: "1LINE",
196        full: "1 line per action",
197    },
198    InstructionTemplate {
199        code: "NODOC",
200        full: "No narration comments",
201    },
202    InstructionTemplate {
203        code: "ACTFIRST",
204        full: "Tool calls first, no narration",
205    },
206    InstructionTemplate {
207        code: "QUALITY",
208        full: "Never skip edge cases",
209    },
210    InstructionTemplate {
211        code: "NOMOCK",
212        full: "No mock/placeholder data",
213    },
214    InstructionTemplate {
215        code: "FREF",
216        full: "Fn refs only, no full paths",
217    },
218    InstructionTemplate {
219        code: "DIFF",
220        full: "Diff lines only",
221    },
222    InstructionTemplate {
223        code: "ABBREV",
224        full: "fn,cfg,impl,deps,req,res,ctx,err",
225    },
226    InstructionTemplate {
227        code: "SYMBOLS",
228        full: "+=add -=rm ~=mod ->=ret",
229    },
230];
231
232pub fn instruction_decoder_block() -> String {
233    let pairs: Vec<String> = TEMPLATES
234        .iter()
235        .map(|t| format!("{}={}", t.code, t.full))
236        .collect();
237    format!("INSTRUCTION CODES:\n  {}", pairs.join(" | "))
238}
239
240/// Encode an instruction suffix using short codes with budget hints.
241/// Response budget is dynamic based on task complexity to shape LLM output length.
242pub fn encode_instructions(complexity: &str) -> String {
243    match complexity {
244        "mechanical" => "MODE: ACT1 DELTA 1LINE | BUDGET: <=50 tokens, 1 line answer".to_string(),
245        "simple" => "MODE: BRIEF DELTA 1LINE | BUDGET: <=100 tokens, structured".to_string(),
246        "standard" => "MODE: BRIEF DELTA NOREPEAT STRUCT | BUDGET: <=200 tokens".to_string(),
247        "complex" => {
248            "MODE: FULL QUALITY NOREPEAT STRUCT FREF DIFF | BUDGET: <=500 tokens".to_string()
249        }
250        "architectural" => {
251            "MODE: FULL QUALITY NOREPEAT STRUCT FREF | BUDGET: unlimited".to_string()
252        }
253        _ => "MODE: BRIEF | BUDGET: <=200 tokens".to_string(),
254    }
255}
256
257/// Encode instructions with SNR metric for context quality awareness.
258pub fn encode_instructions_with_snr(complexity: &str, compression_pct: f64) -> String {
259    let snr = if compression_pct > 0.0 {
260        1.0 - (compression_pct / 100.0)
261    } else {
262        1.0
263    };
264    let base = encode_instructions(complexity);
265    format!("{base} | SNR: {snr:.2}")
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271
272    #[test]
273    fn is_project_root_marker_detects_git() {
274        let tmp = std::env::temp_dir().join("nebu-ctx-test-root-marker");
275        let _ = std::fs::create_dir_all(&tmp);
276        let git_dir = tmp.join(".git");
277        let _ = std::fs::create_dir_all(&git_dir);
278        assert!(is_project_root_marker(&tmp));
279        let _ = std::fs::remove_dir_all(&tmp);
280    }
281
282    #[test]
283    fn is_project_root_marker_detects_cargo_toml() {
284        let tmp = std::env::temp_dir().join("nebu-ctx-test-cargo-marker");
285        let _ = std::fs::create_dir_all(&tmp);
286        let _ = std::fs::write(tmp.join("Cargo.toml"), "[package]");
287        assert!(is_project_root_marker(&tmp));
288        let _ = std::fs::remove_dir_all(&tmp);
289    }
290
291    #[test]
292    fn detect_project_root_finds_outermost() {
293        let base = std::env::temp_dir().join("nebu-ctx-test-monorepo");
294        let inner = base.join("packages").join("app");
295        let _ = std::fs::create_dir_all(&inner);
296        let _ = std::fs::create_dir_all(base.join(".git"));
297        let _ = std::fs::create_dir_all(inner.join(".git"));
298
299        let test_file = inner.join("main.rs");
300        let _ = std::fs::write(&test_file, "fn main() {}");
301
302        let root = detect_project_root(test_file.to_str().unwrap());
303        assert!(root.is_some(), "should find a project root for nested .git");
304        let root_path = std::path::PathBuf::from(root.unwrap());
305        assert_eq!(
306            crate::core::pathutil::safe_canonicalize(&root_path).ok(),
307            crate::core::pathutil::safe_canonicalize(&base).ok(),
308            "should return outermost .git, not inner"
309        );
310
311        let _ = std::fs::remove_dir_all(&base);
312    }
313
314    #[cfg(unix)]
315    #[test]
316    fn detect_project_root_canonicalizes_symlink_aliases() {
317        use std::os::unix::fs::symlink;
318
319        let tmp = tempfile::tempdir().unwrap();
320        let real_root = tmp.path().join("real-repo");
321        let alias_parent = tmp.path().join("alias-parent");
322        let alias_root = alias_parent.join("repo");
323        let nested = real_root.join("src");
324
325        std::fs::create_dir_all(real_root.join(".git")).unwrap();
326        std::fs::create_dir_all(&nested).unwrap();
327        std::fs::create_dir_all(&alias_parent).unwrap();
328        symlink(&real_root, &alias_root).unwrap();
329
330        let detected = detect_project_root(alias_root.join("src/main.rs").to_str().unwrap());
331
332        assert_eq!(
333            detected.as_deref(),
334            Some(real_root.to_string_lossy().as_ref())
335        );
336    }
337
338    #[test]
339    fn decoder_block_contains_all_codes() {
340        let block = instruction_decoder_block();
341        for t in TEMPLATES {
342            assert!(
343                block.contains(t.code),
344                "decoder should contain code {}",
345                t.code
346            );
347        }
348    }
349
350    #[test]
351    fn encoded_instructions_are_compact() {
352        use super::super::tokens::count_tokens;
353        let full = "TASK COMPLEXITY: mechanical\nMinimal reasoning needed. Act immediately, report result in one line. Show only changed lines, not full files.";
354        let encoded = encode_instructions("mechanical");
355        assert!(
356            count_tokens(&encoded) <= count_tokens(full),
357            "encoded ({}) should be <= full ({})",
358            count_tokens(&encoded),
359            count_tokens(full)
360        );
361    }
362
363    #[test]
364    fn all_complexity_levels_encode() {
365        for level in &["mechanical", "standard", "architectural"] {
366            let encoded = encode_instructions(level);
367            assert!(encoded.starts_with("MODE:"), "should start with MODE:");
368        }
369    }
370}