1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13pub struct ReadOutput {
16 pub content: String,
17 pub resolved_mode: String,
18 pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28 CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32 if crp_mode.is_tdd() {
33 format!("{mode}:tdd")
34 } else {
35 mode.to_string()
36 }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40 format!("{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\")")
41}
42
43pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
47 if crate::core::binary_detect::is_binary_file(path) {
48 let msg = crate::core::binary_detect::binary_file_message(path);
49 return Err(std::io::Error::other(msg));
50 }
51
52 if let Ok(canonical) = std::path::Path::new(path).canonicalize() {
53 if let Ok(cwd) = std::env::current_dir() {
54 let root = crate::core::pathjail::canonicalize_or_self(&cwd);
55 if !canonical.starts_with(&root) {
56 let allow = crate::core::pathjail::allow_paths_from_env_and_config();
57 let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
58 .ok()
59 .is_some_and(|d| canonical.starts_with(d));
60 let tmp_ok = canonical.starts_with(std::env::temp_dir());
61 if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
62 tracing::warn!(
63 "defense-in-depth: path may escape project root: {}",
64 canonical.display()
65 );
66 }
67 }
68 }
69 }
70
71 let cap = crate::core::limits::max_read_bytes();
72 let meta = std::fs::metadata(path).map_err(|e| {
73 std::io::Error::other(format!("cannot stat file (refusing unbounded read): {e}"))
74 })?;
75 if meta.len() > cap as u64 {
76 return Err(std::io::Error::other(format!(
77 "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
78 Increase the limit or use a line-range read: mode=\"lines:1-100\"",
79 meta.len(),
80 cap
81 )));
82 }
83
84 let bytes = std::fs::read(path)?;
85 match String::from_utf8(bytes) {
86 Ok(s) => Ok(s),
87 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
88 }
89}
90
91pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
93 handle_with_options(cache, path, mode, false, crp_mode, None)
94}
95
96pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
98 handle_with_options(cache, path, mode, true, crp_mode, None)
99}
100
101pub fn handle_with_task(
103 cache: &mut SessionCache,
104 path: &str,
105 mode: &str,
106 crp_mode: CrpMode,
107 task: Option<&str>,
108) -> String {
109 handle_with_options(cache, path, mode, false, crp_mode, task)
110}
111
112pub fn handle_with_task_resolved(
114 cache: &mut SessionCache,
115 path: &str,
116 mode: &str,
117 crp_mode: CrpMode,
118 task: Option<&str>,
119) -> ReadOutput {
120 handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
121}
122
123pub fn handle_fresh_with_task(
125 cache: &mut SessionCache,
126 path: &str,
127 mode: &str,
128 crp_mode: CrpMode,
129 task: Option<&str>,
130) -> String {
131 handle_with_options(cache, path, mode, true, crp_mode, task)
132}
133
134pub fn handle_fresh_with_task_resolved(
136 cache: &mut SessionCache,
137 path: &str,
138 mode: &str,
139 crp_mode: CrpMode,
140 task: Option<&str>,
141) -> ReadOutput {
142 handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
143}
144
145fn handle_with_options(
146 cache: &mut SessionCache,
147 path: &str,
148 mode: &str,
149 fresh: bool,
150 crp_mode: CrpMode,
151 task: Option<&str>,
152) -> String {
153 handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
154}
155
156fn handle_with_options_resolved(
157 cache: &mut SessionCache,
158 path: &str,
159 mode: &str,
160 fresh: bool,
161 crp_mode: CrpMode,
162 task: Option<&str>,
163) -> ReadOutput {
164 let file_ref = cache.get_file_ref(path);
165 let short = protocol::shorten_path(path);
166 let ext = Path::new(path)
167 .extension()
168 .and_then(|e| e.to_str())
169 .unwrap_or("");
170
171 if fresh {
172 cache.invalidate(path);
173 }
174
175 if mode == "diff" {
176 let (out, sent) = handle_diff(cache, path, &file_ref);
177 return ReadOutput {
178 content: out,
179 resolved_mode: "diff".into(),
180 output_tokens: sent,
181 };
182 }
183
184 if mode != "full" {
185 if let Some(existing) = cache.get(path) {
186 let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
187 if stale {
188 cache.invalidate(path);
189 }
190 }
191 }
192
193 if let Some(existing) = cache.get(path) {
194 if mode == "full" {
195 let (out, sent) =
196 handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
197 let out = crate::core::redaction::redact_text_if_enabled(&out);
198 return ReadOutput {
199 content: out,
200 resolved_mode: "full".into(),
201 output_tokens: sent,
202 };
203 }
204 let content = existing.content.clone();
205 let original_tokens = existing.original_tokens;
206 let resolved_mode = if mode == "auto" {
207 resolve_auto_mode(path, original_tokens, task)
208 } else {
209 mode.to_string()
210 };
211 if is_cacheable_mode(&resolved_mode) {
212 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
213 if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
214 let sent = count_tokens(cached_output);
215 let out = crate::core::redaction::redact_text_if_enabled(cached_output);
216 return ReadOutput {
217 content: out,
218 resolved_mode,
219 output_tokens: sent,
220 };
221 }
222 }
223 let (out, sent) = process_mode(
224 &content,
225 &resolved_mode,
226 &file_ref,
227 &short,
228 ext,
229 original_tokens,
230 crp_mode,
231 path,
232 task,
233 );
234 if is_cacheable_mode(&resolved_mode) {
235 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
236 cache.set_compressed(path, &cache_key, out.clone());
237 }
238 let out = crate::core::redaction::redact_text_if_enabled(&out);
239 return ReadOutput {
240 content: out,
241 resolved_mode,
242 output_tokens: sent,
243 };
244 }
245
246 let content = match read_file_lossy(path) {
247 Ok(c) => c,
248 Err(e) => {
249 let msg = format!("ERROR: {e}");
250 let tokens = count_tokens(&msg);
251 return ReadOutput {
252 content: msg,
253 resolved_mode: "error".into(),
254 output_tokens: tokens,
255 };
256 }
257 };
258
259 let similar_hint = find_similar_and_update_semantic_index(path, &content);
260 let graph_hint = build_graph_related_hint(path);
261
262 let store_result = cache.store(path, content.clone());
263
264 if mode == "full" {
265 cache.mark_full_delivered(path);
266 let (mut output, sent) = format_full_output(
267 &file_ref,
268 &short,
269 ext,
270 &content,
271 store_result.original_tokens,
272 store_result.line_count,
273 task,
274 );
275 if let Some(hint) = &graph_hint {
276 output.push_str(&format!("\n{hint}"));
277 }
278 if let Some(hint) = similar_hint {
279 output.push_str(&format!("\n{hint}"));
280 }
281 let output = crate::core::redaction::redact_text_if_enabled(&output);
282 return ReadOutput {
283 content: output,
284 resolved_mode: "full".into(),
285 output_tokens: sent,
286 };
287 }
288
289 let resolved_mode = if mode == "auto" {
290 resolve_auto_mode(path, store_result.original_tokens, task)
291 } else {
292 mode.to_string()
293 };
294
295 let (mut output, _sent) = process_mode(
296 &content,
297 &resolved_mode,
298 &file_ref,
299 &short,
300 ext,
301 store_result.original_tokens,
302 crp_mode,
303 path,
304 task,
305 );
306 if is_cacheable_mode(&resolved_mode) {
307 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
308 cache.set_compressed(path, &cache_key, output.clone());
309 }
310 if let Some(hint) = &graph_hint {
311 output.push_str(&format!("\n{hint}"));
312 }
313 if let Some(hint) = similar_hint {
314 output.push_str(&format!("\n{hint}"));
315 }
316 let output = crate::core::redaction::redact_text_if_enabled(&output);
317 let final_tokens = count_tokens(&output);
318 ReadOutput {
319 content: output,
320 resolved_mode,
321 output_tokens: final_tokens,
322 }
323}
324
325pub fn is_instruction_file(path: &str) -> bool {
326 let lower = path.to_lowercase();
327 let filename = std::path::Path::new(&lower)
328 .file_name()
329 .and_then(|f| f.to_str())
330 .unwrap_or("");
331
332 matches!(
333 filename,
334 "skill.md"
335 | "agents.md"
336 | "rules.md"
337 | ".cursorrules"
338 | ".clinerules"
339 | "lean-ctx.md"
340 | "lean-ctx.mdc"
341 ) || lower.contains("/skills/")
342 || lower.contains("/.cursor/rules/")
343 || lower.contains("/.claude/rules/")
344 || lower.contains("/agents.md")
345}
346
347fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
348 if is_instruction_file(file_path) {
349 return "full".to_string();
350 }
351
352 let intent_query = task.unwrap_or("read");
355 let route = crate::core::intent_router::route_v1(intent_query);
356 let intent_mode = &route.decision.effective_read_mode;
357 if intent_mode != "auto" && intent_mode != "reference" {
358 return intent_mode.clone();
359 }
360
361 let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
363 let predictor = crate::core::mode_predictor::ModePredictor::new();
364 let mut predicted = predictor
365 .predict_best_mode(&sig)
366 .unwrap_or_else(|| "full".to_string());
367 if predicted == "auto" {
368 predicted = "full".to_string();
369 }
370
371 if let Some(project_root) =
373 crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
374 {
375 let ext = std::path::Path::new(file_path)
376 .extension()
377 .and_then(|e| e.to_str())
378 .unwrap_or("");
379 let bucket = match original_tokens {
380 0..=2000 => "sm",
381 2001..=10000 => "md",
382 10001..=50000 => "lg",
383 _ => "xl",
384 };
385 let bandit_key = format!("{ext}_{bucket}");
386 let mut store = crate::core::bandit::BanditStore::load(&project_root);
387 let bandit = store.get_or_create(&bandit_key);
388 let arm = bandit.select_arm();
389 if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
390 predicted = "aggressive".to_string();
391 }
392 }
393
394 let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
396 let chosen = policy.choose_auto_mode(task, &predicted);
397
398 if original_tokens > 2000 {
399 if predicted == "map" || predicted == "signatures" {
400 if chosen != "map" && chosen != "signatures" {
401 return predicted;
402 }
403 } else if chosen == "full" && predicted != "full" {
404 return predicted;
405 }
406 }
407
408 chosen
409}
410
411fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
412 let cfg = crate::core::config::Config::load();
413 let profile = crate::core::config::MemoryProfile::effective(&cfg);
414 if !profile.semantic_cache_enabled() {
415 return None;
416 }
417
418 let project_root = detect_project_root(path);
419 let session_id = format!("{}", std::process::id());
420 let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
421
422 let similar = index.find_similar(content, 0.7);
423 let relevant: Vec<_> = similar
424 .into_iter()
425 .filter(|(p, _)| p != path)
426 .take(3)
427 .collect();
428
429 index.add_file(path, content, &session_id);
430 let _ = index.save(&project_root);
431
432 if relevant.is_empty() {
433 return None;
434 }
435
436 let hints: Vec<String> = relevant
437 .iter()
438 .map(|(p, score)| format!(" {p} ({:.0}% similar)", score * 100.0))
439 .collect();
440
441 Some(format!(
442 "[semantic: {} similar file(s) in cache]\n{}",
443 relevant.len(),
444 hints.join("\n")
445 ))
446}
447
448fn detect_project_root(path: &str) -> String {
449 crate::core::protocol::detect_project_root_or_cwd(path)
450}
451
452fn build_graph_related_hint(path: &str) -> Option<String> {
453 let project_root = detect_project_root(path);
454 crate::core::graph_context::build_related_hint(path, &project_root, 5)
455}
456
457const AUTO_DELTA_THRESHOLD: f64 = 0.6;
458
459fn handle_full_with_auto_delta(
461 cache: &mut SessionCache,
462 path: &str,
463 file_ref: &str,
464 short: &str,
465 ext: &str,
466 task: Option<&str>,
467) -> (String, usize) {
468 let Ok(disk_content) = read_file_lossy(path) else {
469 cache.record_cache_hit(path);
470 let out = if let Some(existing) = cache.get(path) {
471 format!(
472 "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
473 existing.read_count, existing.line_count
474 )
475 } else {
476 format!("[file read failed and no cached version available] {file_ref}={short}")
477 };
478 let sent = count_tokens(&out);
479 return (out, sent);
480 };
481
482 let old_content = cache
483 .get(path)
484 .map(|e| e.content.clone())
485 .unwrap_or_default();
486 let store_result = cache.store(path, disk_content.clone());
487
488 if store_result.was_hit {
489 if store_result.full_content_delivered {
490 let out = format!(
491 "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
492 store_result.read_count, store_result.line_count
493 );
494 let sent = count_tokens(&out);
495 return (out, sent);
496 }
497 cache.mark_full_delivered(path);
498 return format_full_output(
499 file_ref,
500 short,
501 ext,
502 &disk_content,
503 store_result.original_tokens,
504 store_result.line_count,
505 task,
506 );
507 }
508
509 let diff = compressor::diff_content(&old_content, &disk_content);
510 let diff_tokens = count_tokens(&diff);
511 let full_tokens = store_result.original_tokens;
512
513 if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
514 let savings = protocol::format_savings(full_tokens, diff_tokens);
515 let out = format!(
516 "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
517 disk_content.lines().count()
518 );
519 return (out, diff_tokens);
520 }
521
522 format_full_output(
523 file_ref,
524 short,
525 ext,
526 &disk_content,
527 store_result.original_tokens,
528 store_result.line_count,
529 task,
530 )
531}
532
533fn format_full_output(
534 file_ref: &str,
535 short: &str,
536 ext: &str,
537 content: &str,
538 original_tokens: usize,
539 line_count: usize,
540 task: Option<&str>,
541) -> (String, usize) {
542 let tokens = original_tokens;
543 let metadata = build_header(file_ref, short, ext, content, line_count, true);
544
545 let mut reordered: Option<String> = None;
546 {
547 let profile = crate::core::profiles::active_profile();
548 let cfg = profile.layout;
549 if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
550 let task_str = task.unwrap_or("");
551 if !task_str.is_empty() {
552 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
553 let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
554 content, &keywords, &cfg,
555 );
556 if !r.skipped && r.changed {
557 reordered = Some(r.output);
558 }
559 }
560 }
561 }
562
563 let content_for_output = reordered.as_deref().unwrap_or(content);
564
565 let mut sym = SymbolMap::new();
566 let idents = symbol_map::extract_identifiers(content_for_output, ext);
567 for ident in &idents {
568 sym.register(ident);
569 }
570
571 if sym.len() >= 3 {
572 let sym_table = sym.format_table();
573 let compressed = sym.apply(content_for_output);
574 let original_tok = count_tokens(content_for_output);
575 let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
576 let net_saving = original_tok.saturating_sub(compressed_tok);
577 if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
578 let output = format!("{metadata}\n{compressed}{sym_table}");
579 let sent = count_tokens(&output);
580 let savings = protocol::format_savings(tokens, sent);
581 return (format!("{output}\n{savings}"), sent);
582 }
583 }
584
585 let output = format!("{metadata}\n{content_for_output}");
586 let sent = count_tokens(&output);
587 let savings = protocol::format_savings(tokens, sent);
588 (format!("{output}\n{savings}"), sent)
589}
590
591fn build_header(
592 file_ref: &str,
593 short: &str,
594 ext: &str,
595 content: &str,
596 line_count: usize,
597 include_deps: bool,
598) -> String {
599 let mut header = format!("{file_ref}={short} {line_count}L");
600
601 if include_deps {
602 let dep_info = deps::extract_deps(content, ext);
603 if !dep_info.imports.is_empty() {
604 let imports_str: Vec<&str> = dep_info
605 .imports
606 .iter()
607 .take(8)
608 .map(std::string::String::as_str)
609 .collect();
610 header.push_str(&format!("\n deps {}", imports_str.join(",")));
611 }
612 if !dep_info.exports.is_empty() {
613 let exports_str: Vec<&str> = dep_info
614 .exports
615 .iter()
616 .take(8)
617 .map(std::string::String::as_str)
618 .collect();
619 header.push_str(&format!("\n exports {}", exports_str.join(",")));
620 }
621 }
622
623 header
624}
625
626#[allow(clippy::too_many_arguments)]
627fn process_mode(
628 content: &str,
629 mode: &str,
630 file_ref: &str,
631 short: &str,
632 ext: &str,
633 original_tokens: usize,
634 crp_mode: CrpMode,
635 file_path: &str,
636 task: Option<&str>,
637) -> (String, usize) {
638 let line_count = content.lines().count();
639
640 match mode {
641 "auto" => {
642 let chosen = resolve_auto_mode(file_path, original_tokens, task);
643 process_mode(
644 content,
645 &chosen,
646 file_ref,
647 short,
648 ext,
649 original_tokens,
650 crp_mode,
651 file_path,
652 task,
653 )
654 }
655 "full" => format_full_output(
656 file_ref,
657 short,
658 ext,
659 content,
660 original_tokens,
661 line_count,
662 task,
663 ),
664 "signatures" => {
665 let sigs = signatures::extract_signatures(content, ext);
666 let dep_info = deps::extract_deps(content, ext);
667
668 let mut output = format!("{file_ref}={short} {line_count}L");
669 if !dep_info.imports.is_empty() {
670 let imports_str: Vec<&str> = dep_info
671 .imports
672 .iter()
673 .take(8)
674 .map(std::string::String::as_str)
675 .collect();
676 output.push_str(&format!("\n deps {}", imports_str.join(",")));
677 }
678 for sig in &sigs {
679 output.push('\n');
680 if crp_mode.is_tdd() {
681 output.push_str(&sig.to_tdd());
682 } else {
683 output.push_str(&sig.to_compact());
684 }
685 }
686 let sent = count_tokens(&output);
687 let savings = protocol::format_savings(original_tokens, sent);
688 (
689 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
690 sent,
691 )
692 }
693 "map" => {
694 if ext == "php" {
695 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
696 {
697 let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
698 let sent = count_tokens(&output);
699 let savings = protocol::format_savings(original_tokens, sent);
700 output.push('\n');
701 output.push_str(&savings);
702 return (append_compressed_hint(&output, file_path), sent);
703 }
704 }
705
706 let sigs = signatures::extract_signatures(content, ext);
707 let dep_info = deps::extract_deps(content, ext);
708
709 let mut output = format!("{file_ref}={short} {line_count}L");
710
711 if !dep_info.imports.is_empty() {
712 output.push_str("\n deps: ");
713 output.push_str(&dep_info.imports.join(", "));
714 }
715
716 if !dep_info.exports.is_empty() {
717 output.push_str("\n exports: ");
718 output.push_str(&dep_info.exports.join(", "));
719 }
720
721 let key_sigs: Vec<&signatures::Signature> = sigs
722 .iter()
723 .filter(|s| s.is_exported || s.indent == 0)
724 .collect();
725
726 if !key_sigs.is_empty() {
727 output.push_str("\n API:");
728 for sig in &key_sigs {
729 output.push_str("\n ");
730 if crp_mode.is_tdd() {
731 output.push_str(&sig.to_tdd());
732 } else {
733 output.push_str(&sig.to_compact());
734 }
735 }
736 }
737
738 let sent = count_tokens(&output);
739 let savings = protocol::format_savings(original_tokens, sent);
740 (
741 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
742 sent,
743 )
744 }
745 "aggressive" => {
746 #[cfg(feature = "tree-sitter")]
747 let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
748 #[cfg(not(feature = "tree-sitter"))]
749 let ast_pruned: Option<String> = None;
750
751 let base = ast_pruned.as_deref().unwrap_or(content);
752
753 let session_intent = crate::core::session::SessionState::load_latest()
754 .and_then(|s| s.active_structured_intent);
755 let raw = if let Some(ref intent) = session_intent {
756 compressor::task_aware_compress(base, Some(ext), intent)
757 } else {
758 compressor::aggressive_compress(base, Some(ext))
759 };
760 let compressed = compressor::safeguard_ratio(content, &raw);
761 let header = build_header(file_ref, short, ext, content, line_count, true);
762
763 let mut sym = SymbolMap::new();
764 let idents = symbol_map::extract_identifiers(&compressed, ext);
765 for ident in &idents {
766 sym.register(ident);
767 }
768
769 if sym.len() >= 3 {
770 let sym_table = sym.format_table();
771 let sym_applied = sym.apply(&compressed);
772 let orig_tok = count_tokens(&compressed);
773 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
774 let net = orig_tok.saturating_sub(comp_tok);
775 if orig_tok > 0 && net * 100 / orig_tok >= 5 {
776 let savings = protocol::format_savings(original_tokens, comp_tok);
777 return (
778 append_compressed_hint(
779 &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
780 file_path,
781 ),
782 comp_tok,
783 );
784 }
785 let savings = protocol::format_savings(original_tokens, orig_tok);
786 return (
787 append_compressed_hint(
788 &format!("{header}\n{compressed}\n{savings}"),
789 file_path,
790 ),
791 orig_tok,
792 );
793 }
794
795 let sent = count_tokens(&compressed);
796 let savings = protocol::format_savings(original_tokens, sent);
797 (
798 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
799 sent,
800 )
801 }
802 "entropy" => {
803 let result = entropy::entropy_compress_adaptive(content, file_path);
804 let avg_h = entropy::analyze_entropy(content).avg_entropy;
805 let header = build_header(file_ref, short, ext, content, line_count, false);
806 let techs = result.techniques.join(", ");
807 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
808 let sent = count_tokens(&output);
809 let savings = protocol::format_savings(original_tokens, sent);
810 let compression_ratio = if original_tokens > 0 {
811 1.0 - (sent as f64 / original_tokens as f64)
812 } else {
813 0.0
814 };
815 crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
816 (
817 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
818 sent,
819 )
820 }
821 "task" => {
822 let task_str = task.unwrap_or("");
823 if task_str.is_empty() {
824 let header = build_header(file_ref, short, ext, content, line_count, true);
825 let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
826 let sent = count_tokens(&out);
827 return (out, sent);
828 }
829 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
830 if keywords.is_empty() {
831 let header = build_header(file_ref, short, ext, content, line_count, true);
832 let out = format!(
833 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
834 );
835 let sent = count_tokens(&out);
836 return (out, sent);
837 }
838 let filtered =
839 crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
840 let filtered_lines = filtered.lines().count();
841 let header = format!(
842 "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
843 );
844 let project_root = detect_project_root(file_path);
845 let graph_ctx = crate::core::graph_context::build_graph_context(
846 file_path,
847 &project_root,
848 Some(crate::core::graph_context::GraphContextOptions::default()),
849 )
850 .map(|c| crate::core::graph_context::format_graph_context(&c))
851 .unwrap_or_default();
852
853 let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
854 let savings = protocol::format_savings(original_tokens, sent);
855 (
856 append_compressed_hint(
857 &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
858 file_path,
859 ),
860 sent,
861 )
862 }
863 "reference" => {
864 let tok = count_tokens(content);
865 let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
866 let sent = count_tokens(&output);
867 let savings = protocol::format_savings(original_tokens, sent);
868 (format!("{output}\n{savings}"), sent)
869 }
870 mode if mode.starts_with("lines:") => {
871 let range_str = &mode[6..];
872 let extracted = extract_line_range(content, range_str);
873 let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
874 let sent = count_tokens(&extracted);
875 let savings = protocol::format_savings(original_tokens, sent);
876 (format!("{header}\n{extracted}\n{savings}"), sent)
877 }
878 unknown => {
879 let header = build_header(file_ref, short, ext, content, line_count, true);
880 let out = format!(
881 "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
882 );
883 let sent = count_tokens(&out);
884 (out, sent)
885 }
886 }
887}
888
889fn extract_line_range(content: &str, range_str: &str) -> String {
890 let lines: Vec<&str> = content.lines().collect();
891 let total = lines.len();
892 let mut selected = Vec::new();
893
894 for part in range_str.split(',') {
895 let part = part.trim();
896 if let Some((start_s, end_s)) = part.split_once('-') {
897 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
898 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
899 for i in start..=end {
900 if i >= 1 && i <= total {
901 selected.push(format!("{i:>4}| {}", lines[i - 1]));
902 }
903 }
904 } else if let Ok(n) = part.parse::<usize>() {
905 if n >= 1 && n <= total {
906 selected.push(format!("{n:>4}| {}", lines[n - 1]));
907 }
908 }
909 }
910
911 if selected.is_empty() {
912 "No lines matched the range.".to_string()
913 } else {
914 selected.join("\n")
915 }
916}
917
918fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
919 let short = protocol::shorten_path(path);
920 let old_content = cache.get(path).map(|e| e.content.clone());
921
922 let new_content = match read_file_lossy(path) {
923 Ok(c) => c,
924 Err(e) => {
925 let msg = format!("ERROR: {e}");
926 let tokens = count_tokens(&msg);
927 return (msg, tokens);
928 }
929 };
930
931 let original_tokens = count_tokens(&new_content);
932
933 let diff_output = if let Some(old) = &old_content {
934 compressor::diff_content(old, &new_content)
935 } else {
936 format!("[first read]\n{new_content}")
937 };
938
939 cache.store(path, new_content);
940
941 let sent = count_tokens(&diff_output);
942 let savings = protocol::format_savings(original_tokens, sent);
943 (
944 format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}"),
945 sent,
946 )
947}
948
949#[cfg(test)]
950mod tests {
951 use super::*;
952 use std::time::Duration;
953
954 #[test]
955 fn test_header_toon_format_no_brackets() {
956 let content = "use std::io;\nfn main() {}\n";
957 let header = build_header("F1", "main.rs", "rs", content, 2, false);
958 assert!(!header.contains('['));
959 assert!(!header.contains(']'));
960 assert!(header.contains("F1=main.rs 2L"));
961 }
962
963 #[test]
964 fn test_header_toon_deps_indented() {
965 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
966 let header = build_header("F1", "main.rs", "rs", content, 3, true);
967 if header.contains("deps") {
968 assert!(
969 header.contains("\n deps "),
970 "deps should use indented TOON format"
971 );
972 assert!(
973 !header.contains("deps:["),
974 "deps should not use bracket format"
975 );
976 }
977 }
978
979 #[test]
980 fn test_header_toon_saves_tokens() {
981 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
982 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
983 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
984 let old_tokens = count_tokens(&old_header);
985 let new_tokens = count_tokens(&new_header);
986 assert!(
987 new_tokens <= old_tokens,
988 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
989 );
990 }
991
992 #[test]
993 fn test_tdd_symbols_are_compact() {
994 let symbols = [
995 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
996 ];
997 for sym in &symbols {
998 let tok = count_tokens(sym);
999 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1000 }
1001 }
1002
1003 #[test]
1004 fn test_task_mode_filters_content() {
1005 let content = (0..200)
1006 .map(|i| {
1007 if i % 20 == 0 {
1008 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1009 } else {
1010 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1011 }
1012 })
1013 .collect::<Vec<_>>()
1014 .join("\n");
1015 let full_tokens = count_tokens(&content);
1016 let task = Some("fix bug in validate_token");
1017 let (result, result_tokens) = process_mode(
1018 &content,
1019 "task",
1020 "F1",
1021 "test.rs",
1022 "rs",
1023 full_tokens,
1024 CrpMode::Off,
1025 "test.rs",
1026 task,
1027 );
1028 assert!(
1029 result_tokens < full_tokens,
1030 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1031 );
1032 assert!(
1033 result.contains("task-filtered"),
1034 "output should contain task-filtered marker"
1035 );
1036 }
1037
1038 #[test]
1039 fn test_task_mode_without_task_returns_full() {
1040 let content = "fn main() {}\nfn helper() {}\n";
1041 let tokens = count_tokens(content);
1042 let (result, _sent) = process_mode(
1043 content,
1044 "task",
1045 "F1",
1046 "test.rs",
1047 "rs",
1048 tokens,
1049 CrpMode::Off,
1050 "test.rs",
1051 None,
1052 );
1053 assert!(
1054 result.contains("no task set"),
1055 "should indicate no task: {result}"
1056 );
1057 }
1058
1059 #[test]
1060 fn test_reference_mode_one_line() {
1061 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1062 let tokens = count_tokens(content);
1063 let (result, _sent) = process_mode(
1064 content,
1065 "reference",
1066 "F1",
1067 "test.rs",
1068 "rs",
1069 tokens,
1070 CrpMode::Off,
1071 "test.rs",
1072 None,
1073 );
1074 let lines: Vec<&str> = result.lines().collect();
1075 assert!(
1076 lines.len() <= 3,
1077 "reference mode should be very compact, got {} lines",
1078 lines.len()
1079 );
1080 assert!(result.contains("lines"), "should contain line count");
1081 assert!(result.contains("tok"), "should contain token count");
1082 }
1083
1084 #[test]
1085 fn cached_lines_mode_invalidates_on_mtime_change() {
1086 let dir = tempfile::tempdir().unwrap();
1087 let path = dir.path().join("file.txt");
1088 let p = path.to_string_lossy().to_string();
1089
1090 std::fs::write(&path, "one\nsecond\n").unwrap();
1091 let mut cache = SessionCache::new();
1092
1093 let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1094 let l1: Vec<&str> = r1.content.lines().collect();
1095 let got1 = l1.get(1).copied().unwrap_or_default().trim();
1096 let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1097 assert_eq!(got1, "one");
1098
1099 std::thread::sleep(Duration::from_secs(1));
1100 std::fs::write(&path, "two\nsecond\n").unwrap();
1101
1102 let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1103 let l2: Vec<&str> = r2.content.lines().collect();
1104 let got2 = l2.get(1).copied().unwrap_or_default().trim();
1105 let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1106 assert_eq!(got2, "two");
1107 }
1108
1109 #[test]
1110 #[cfg_attr(tarpaulin, ignore)]
1111 fn benchmark_task_conditioned_compression() {
1112 let content = generate_benchmark_code(200);
1114 let full_tokens = count_tokens(&content);
1115 let task = Some("fix authentication in validate_token");
1116
1117 let (_full_output, full_tok) = process_mode(
1118 &content,
1119 "full",
1120 "F1",
1121 "server.rs",
1122 "rs",
1123 full_tokens,
1124 CrpMode::Off,
1125 "server.rs",
1126 task,
1127 );
1128 let (_task_output, task_tok) = process_mode(
1129 &content,
1130 "task",
1131 "F1",
1132 "server.rs",
1133 "rs",
1134 full_tokens,
1135 CrpMode::Off,
1136 "server.rs",
1137 task,
1138 );
1139 let (_sig_output, sig_tok) = process_mode(
1140 &content,
1141 "signatures",
1142 "F1",
1143 "server.rs",
1144 "rs",
1145 full_tokens,
1146 CrpMode::Off,
1147 "server.rs",
1148 task,
1149 );
1150 let (_ref_output, ref_tok) = process_mode(
1151 &content,
1152 "reference",
1153 "F1",
1154 "server.rs",
1155 "rs",
1156 full_tokens,
1157 CrpMode::Off,
1158 "server.rs",
1159 task,
1160 );
1161
1162 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1163 eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1164 eprintln!(" full: {full_tok:>6} tokens (baseline)");
1165 eprintln!(
1166 " task: {task_tok:>6} tokens ({:.0}% savings)",
1167 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1168 );
1169 eprintln!(
1170 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1171 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1172 );
1173 eprintln!(
1174 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
1175 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1176 );
1177 eprintln!("================================================\n");
1178
1179 assert!(task_tok < full_tok, "task mode should save tokens");
1180 assert!(sig_tok < full_tok, "signatures should save tokens");
1181 assert!(ref_tok < sig_tok, "reference should be most compact");
1182 }
1183
1184 fn generate_benchmark_code(lines: usize) -> String {
1185 let mut code = Vec::with_capacity(lines);
1186 code.push("use std::collections::HashMap;".to_string());
1187 code.push("use crate::core::auth;".to_string());
1188 code.push(String::new());
1189 code.push("pub struct Server {".to_string());
1190 code.push(" config: Config,".to_string());
1191 code.push(" cache: HashMap<String, String>,".to_string());
1192 code.push("}".to_string());
1193 code.push(String::new());
1194 code.push("impl Server {".to_string());
1195 code.push(
1196 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1197 .to_string(),
1198 );
1199 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
1200 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1201 code.push(" return Err(AuthError::Expired);".to_string());
1202 code.push(" }".to_string());
1203 code.push(" Ok(decoded.claims)".to_string());
1204 code.push(" }".to_string());
1205 code.push(String::new());
1206
1207 let remaining = lines.saturating_sub(code.len());
1208 for i in 0..remaining {
1209 if i % 30 == 0 {
1210 code.push(format!(
1211 " pub fn handler_{i}(&self, req: Request) -> Response {{"
1212 ));
1213 } else if i % 30 == 29 {
1214 code.push(" }".to_string());
1215 } else {
1216 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1217 }
1218 }
1219 code.push("}".to_string());
1220 code.join("\n")
1221 }
1222
1223 #[test]
1224 fn instruction_file_detection() {
1225 assert!(is_instruction_file(
1226 "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1227 ));
1228 assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1229 assert!(is_instruction_file("/project/AGENTS.md"));
1230 assert!(is_instruction_file("/project/.cursorrules"));
1231 assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1232 assert!(is_instruction_file("/skills/some-skill/README.md"));
1233
1234 assert!(!is_instruction_file("/project/src/main.rs"));
1235 assert!(!is_instruction_file("/project/config.json"));
1236 assert!(!is_instruction_file("/project/data/report.csv"));
1237 }
1238
1239 #[test]
1240 fn resolve_auto_mode_returns_full_for_instruction_files() {
1241 let mode = resolve_auto_mode(
1242 "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1243 5000,
1244 Some("read"),
1245 );
1246 assert_eq!(mode, "full", "SKILL.md must always be read in full");
1247
1248 let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1249 assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1250
1251 let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1252 assert_eq!(mode, "full", ".cursorrules must always be read in full");
1253 }
1254}