1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13pub struct ReadOutput {
16 pub content: String,
17 pub resolved_mode: String,
18 pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28 CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32 if crp_mode.is_tdd() {
33 format!("{mode}:tdd")
34 } else {
35 mode.to_string()
36 }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40 format!("{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\")")
41}
42
43pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
45 let cap = crate::core::limits::max_read_bytes();
46 if let Ok(meta) = std::fs::metadata(path) {
47 if meta.len() > cap as u64 {
48 return Err(std::io::Error::other(format!(
49 "file too large ({} bytes, cap {} via LCTX_MAX_READ_BYTES)",
50 meta.len(),
51 cap
52 )));
53 }
54 }
55 let bytes = std::fs::read(path)?;
56 match String::from_utf8(bytes) {
57 Ok(s) => Ok(s),
58 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
59 }
60}
61
62pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
64 handle_with_options(cache, path, mode, false, crp_mode, None)
65}
66
67pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
69 handle_with_options(cache, path, mode, true, crp_mode, None)
70}
71
72pub fn handle_with_task(
74 cache: &mut SessionCache,
75 path: &str,
76 mode: &str,
77 crp_mode: CrpMode,
78 task: Option<&str>,
79) -> String {
80 handle_with_options(cache, path, mode, false, crp_mode, task)
81}
82
83pub fn handle_with_task_resolved(
85 cache: &mut SessionCache,
86 path: &str,
87 mode: &str,
88 crp_mode: CrpMode,
89 task: Option<&str>,
90) -> ReadOutput {
91 handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
92}
93
94pub fn handle_fresh_with_task(
96 cache: &mut SessionCache,
97 path: &str,
98 mode: &str,
99 crp_mode: CrpMode,
100 task: Option<&str>,
101) -> String {
102 handle_with_options(cache, path, mode, true, crp_mode, task)
103}
104
105pub fn handle_fresh_with_task_resolved(
107 cache: &mut SessionCache,
108 path: &str,
109 mode: &str,
110 crp_mode: CrpMode,
111 task: Option<&str>,
112) -> ReadOutput {
113 handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
114}
115
116fn handle_with_options(
117 cache: &mut SessionCache,
118 path: &str,
119 mode: &str,
120 fresh: bool,
121 crp_mode: CrpMode,
122 task: Option<&str>,
123) -> String {
124 handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
125}
126
127fn handle_with_options_resolved(
128 cache: &mut SessionCache,
129 path: &str,
130 mode: &str,
131 fresh: bool,
132 crp_mode: CrpMode,
133 task: Option<&str>,
134) -> ReadOutput {
135 let file_ref = cache.get_file_ref(path);
136 let short = protocol::shorten_path(path);
137 let ext = Path::new(path)
138 .extension()
139 .and_then(|e| e.to_str())
140 .unwrap_or("");
141
142 if fresh {
143 cache.invalidate(path);
144 }
145
146 if mode == "diff" {
147 let (out, sent) = handle_diff(cache, path, &file_ref);
148 return ReadOutput {
149 content: out,
150 resolved_mode: "diff".into(),
151 output_tokens: sent,
152 };
153 }
154
155 if mode != "full" {
156 if let Some(existing) = cache.get(path) {
157 let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
158 if stale {
159 cache.invalidate(path);
160 }
161 }
162 }
163
164 if let Some(existing) = cache.get(path) {
165 if mode == "full" {
166 let (out, sent) =
167 handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
168 let out = crate::core::redaction::redact_text_if_enabled(&out);
169 return ReadOutput {
170 content: out,
171 resolved_mode: "full".into(),
172 output_tokens: sent,
173 };
174 }
175 let content = existing.content.clone();
176 let original_tokens = existing.original_tokens;
177 let resolved_mode = if mode == "auto" {
178 resolve_auto_mode(path, original_tokens, task)
179 } else {
180 mode.to_string()
181 };
182 if is_cacheable_mode(&resolved_mode) {
183 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
184 if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
185 let sent = count_tokens(cached_output);
186 let out = crate::core::redaction::redact_text_if_enabled(cached_output);
187 return ReadOutput {
188 content: out,
189 resolved_mode,
190 output_tokens: sent,
191 };
192 }
193 }
194 let (out, sent) = process_mode(
195 &content,
196 &resolved_mode,
197 &file_ref,
198 &short,
199 ext,
200 original_tokens,
201 crp_mode,
202 path,
203 task,
204 );
205 if is_cacheable_mode(&resolved_mode) {
206 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
207 cache.set_compressed(path, &cache_key, out.clone());
208 }
209 let out = crate::core::redaction::redact_text_if_enabled(&out);
210 return ReadOutput {
211 content: out,
212 resolved_mode,
213 output_tokens: sent,
214 };
215 }
216
217 let content = match read_file_lossy(path) {
218 Ok(c) => c,
219 Err(e) => {
220 let msg = format!("ERROR: {e}");
221 let tokens = count_tokens(&msg);
222 return ReadOutput {
223 content: msg,
224 resolved_mode: "error".into(),
225 output_tokens: tokens,
226 };
227 }
228 };
229
230 let similar_hint = find_similar_and_update_semantic_index(path, &content);
231 let graph_hint = build_graph_related_hint(path);
232
233 let store_result = cache.store(path, content.clone());
234
235 if mode == "full" {
236 cache.mark_full_delivered(path);
237 let (mut output, sent) = format_full_output(
238 &file_ref,
239 &short,
240 ext,
241 &content,
242 store_result.original_tokens,
243 store_result.line_count,
244 task,
245 );
246 if let Some(hint) = &graph_hint {
247 output.push_str(&format!("\n{hint}"));
248 }
249 if let Some(hint) = similar_hint {
250 output.push_str(&format!("\n{hint}"));
251 }
252 let output = crate::core::redaction::redact_text_if_enabled(&output);
253 return ReadOutput {
254 content: output,
255 resolved_mode: "full".into(),
256 output_tokens: sent,
257 };
258 }
259
260 let resolved_mode = if mode == "auto" {
261 resolve_auto_mode(path, store_result.original_tokens, task)
262 } else {
263 mode.to_string()
264 };
265
266 let (mut output, _sent) = process_mode(
267 &content,
268 &resolved_mode,
269 &file_ref,
270 &short,
271 ext,
272 store_result.original_tokens,
273 crp_mode,
274 path,
275 task,
276 );
277 if is_cacheable_mode(&resolved_mode) {
278 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
279 cache.set_compressed(path, &cache_key, output.clone());
280 }
281 if let Some(hint) = &graph_hint {
282 output.push_str(&format!("\n{hint}"));
283 }
284 if let Some(hint) = similar_hint {
285 output.push_str(&format!("\n{hint}"));
286 }
287 let output = crate::core::redaction::redact_text_if_enabled(&output);
288 let final_tokens = count_tokens(&output);
289 ReadOutput {
290 content: output,
291 resolved_mode,
292 output_tokens: final_tokens,
293 }
294}
295
296fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
297 let intent_query = task.unwrap_or("read");
300 let route = crate::core::intent_router::route_v1(intent_query);
301 let intent_mode = &route.decision.effective_read_mode;
302 if intent_mode != "auto" && intent_mode != "reference" {
303 return intent_mode.clone();
304 }
305
306 let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
308 let predictor = crate::core::mode_predictor::ModePredictor::new();
309 let mut predicted = predictor
310 .predict_best_mode(&sig)
311 .unwrap_or_else(|| "full".to_string());
312 if predicted == "auto" {
313 predicted = "full".to_string();
314 }
315
316 if let Some(project_root) =
318 crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
319 {
320 let ext = std::path::Path::new(file_path)
321 .extension()
322 .and_then(|e| e.to_str())
323 .unwrap_or("");
324 let bucket = match original_tokens {
325 0..=2000 => "sm",
326 2001..=10000 => "md",
327 10001..=50000 => "lg",
328 _ => "xl",
329 };
330 let bandit_key = format!("{ext}_{bucket}");
331 let mut store = crate::core::bandit::BanditStore::load(&project_root);
332 let bandit = store.get_or_create(&bandit_key);
333 let arm = bandit.select_arm();
334 if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
335 predicted = "aggressive".to_string();
336 }
337 }
338
339 let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
341 let chosen = policy.choose_auto_mode(task, &predicted);
342
343 if original_tokens > 2000 {
344 if predicted == "map" || predicted == "signatures" {
345 if chosen != "map" && chosen != "signatures" {
346 return predicted;
347 }
348 } else if chosen == "full" && predicted != "full" {
349 return predicted;
350 }
351 }
352
353 chosen
354}
355
356fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
357 let cfg = crate::core::config::Config::load();
358 let profile = crate::core::config::MemoryProfile::effective(&cfg);
359 if !profile.semantic_cache_enabled() {
360 return None;
361 }
362
363 let project_root = detect_project_root(path);
364 let session_id = format!("{}", std::process::id());
365 let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
366
367 let similar = index.find_similar(content, 0.7);
368 let relevant: Vec<_> = similar
369 .into_iter()
370 .filter(|(p, _)| p != path)
371 .take(3)
372 .collect();
373
374 index.add_file(path, content, &session_id);
375 let _ = index.save(&project_root);
376
377 if relevant.is_empty() {
378 return None;
379 }
380
381 let hints: Vec<String> = relevant
382 .iter()
383 .map(|(p, score)| format!(" {p} ({:.0}% similar)", score * 100.0))
384 .collect();
385
386 Some(format!(
387 "[semantic: {} similar file(s) in cache]\n{}",
388 relevant.len(),
389 hints.join("\n")
390 ))
391}
392
393fn detect_project_root(path: &str) -> String {
394 crate::core::protocol::detect_project_root_or_cwd(path)
395}
396
397fn build_graph_related_hint(path: &str) -> Option<String> {
398 let project_root = detect_project_root(path);
399 crate::core::graph_context::build_related_hint(path, &project_root, 5)
400}
401
402const AUTO_DELTA_THRESHOLD: f64 = 0.6;
403
404fn handle_full_with_auto_delta(
406 cache: &mut SessionCache,
407 path: &str,
408 file_ref: &str,
409 short: &str,
410 ext: &str,
411 task: Option<&str>,
412) -> (String, usize) {
413 let Ok(disk_content) = read_file_lossy(path) else {
414 cache.record_cache_hit(path);
415 let out = if let Some(existing) = cache.get(path) {
416 format!(
417 "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
418 existing.read_count, existing.line_count
419 )
420 } else {
421 format!("[file read failed and no cached version available] {file_ref}={short}")
422 };
423 let sent = count_tokens(&out);
424 return (out, sent);
425 };
426
427 let old_content = cache
428 .get(path)
429 .map(|e| e.content.clone())
430 .unwrap_or_default();
431 let store_result = cache.store(path, disk_content.clone());
432
433 if store_result.was_hit {
434 if store_result.full_content_delivered {
435 let out = format!(
436 "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
437 store_result.read_count, store_result.line_count
438 );
439 let sent = count_tokens(&out);
440 return (out, sent);
441 }
442 cache.mark_full_delivered(path);
443 return format_full_output(
444 file_ref,
445 short,
446 ext,
447 &disk_content,
448 store_result.original_tokens,
449 store_result.line_count,
450 task,
451 );
452 }
453
454 let diff = compressor::diff_content(&old_content, &disk_content);
455 let diff_tokens = count_tokens(&diff);
456 let full_tokens = store_result.original_tokens;
457
458 if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
459 let savings = protocol::format_savings(full_tokens, diff_tokens);
460 let out = format!(
461 "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
462 disk_content.lines().count()
463 );
464 return (out, diff_tokens);
465 }
466
467 format_full_output(
468 file_ref,
469 short,
470 ext,
471 &disk_content,
472 store_result.original_tokens,
473 store_result.line_count,
474 task,
475 )
476}
477
478fn format_full_output(
479 file_ref: &str,
480 short: &str,
481 ext: &str,
482 content: &str,
483 original_tokens: usize,
484 line_count: usize,
485 task: Option<&str>,
486) -> (String, usize) {
487 let tokens = original_tokens;
488 let metadata = build_header(file_ref, short, ext, content, line_count, true);
489
490 let mut reordered: Option<String> = None;
491 {
492 let profile = crate::core::profiles::active_profile();
493 let cfg = profile.layout;
494 if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
495 let task_str = task.unwrap_or("");
496 if !task_str.is_empty() {
497 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
498 let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
499 content, &keywords, &cfg,
500 );
501 if !r.skipped && r.changed {
502 reordered = Some(r.output);
503 }
504 }
505 }
506 }
507
508 let content_for_output = reordered.as_deref().unwrap_or(content);
509
510 let mut sym = SymbolMap::new();
511 let idents = symbol_map::extract_identifiers(content_for_output, ext);
512 for ident in &idents {
513 sym.register(ident);
514 }
515
516 if sym.len() >= 3 {
517 let sym_table = sym.format_table();
518 let compressed = sym.apply(content_for_output);
519 let original_tok = count_tokens(content_for_output);
520 let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
521 let net_saving = original_tok.saturating_sub(compressed_tok);
522 if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
523 let output = format!("{metadata}\n{compressed}{sym_table}");
524 let sent = count_tokens(&output);
525 let savings = protocol::format_savings(tokens, sent);
526 return (format!("{output}\n{savings}"), sent);
527 }
528 }
529
530 let output = format!("{metadata}\n{content_for_output}");
531 let sent = count_tokens(&output);
532 let savings = protocol::format_savings(tokens, sent);
533 (format!("{output}\n{savings}"), sent)
534}
535
536fn build_header(
537 file_ref: &str,
538 short: &str,
539 ext: &str,
540 content: &str,
541 line_count: usize,
542 include_deps: bool,
543) -> String {
544 let mut header = format!("{file_ref}={short} {line_count}L");
545
546 if include_deps {
547 let dep_info = deps::extract_deps(content, ext);
548 if !dep_info.imports.is_empty() {
549 let imports_str: Vec<&str> = dep_info
550 .imports
551 .iter()
552 .take(8)
553 .map(std::string::String::as_str)
554 .collect();
555 header.push_str(&format!("\n deps {}", imports_str.join(",")));
556 }
557 if !dep_info.exports.is_empty() {
558 let exports_str: Vec<&str> = dep_info
559 .exports
560 .iter()
561 .take(8)
562 .map(std::string::String::as_str)
563 .collect();
564 header.push_str(&format!("\n exports {}", exports_str.join(",")));
565 }
566 }
567
568 header
569}
570
571#[allow(clippy::too_many_arguments)]
572fn process_mode(
573 content: &str,
574 mode: &str,
575 file_ref: &str,
576 short: &str,
577 ext: &str,
578 original_tokens: usize,
579 crp_mode: CrpMode,
580 file_path: &str,
581 task: Option<&str>,
582) -> (String, usize) {
583 let line_count = content.lines().count();
584
585 match mode {
586 "auto" => {
587 let chosen = resolve_auto_mode(file_path, original_tokens, task);
588 process_mode(
589 content,
590 &chosen,
591 file_ref,
592 short,
593 ext,
594 original_tokens,
595 crp_mode,
596 file_path,
597 task,
598 )
599 }
600 "full" => format_full_output(
601 file_ref,
602 short,
603 ext,
604 content,
605 original_tokens,
606 line_count,
607 task,
608 ),
609 "signatures" => {
610 let sigs = signatures::extract_signatures(content, ext);
611 let dep_info = deps::extract_deps(content, ext);
612
613 let mut output = format!("{file_ref}={short} {line_count}L");
614 if !dep_info.imports.is_empty() {
615 let imports_str: Vec<&str> = dep_info
616 .imports
617 .iter()
618 .take(8)
619 .map(std::string::String::as_str)
620 .collect();
621 output.push_str(&format!("\n deps {}", imports_str.join(",")));
622 }
623 for sig in &sigs {
624 output.push('\n');
625 if crp_mode.is_tdd() {
626 output.push_str(&sig.to_tdd());
627 } else {
628 output.push_str(&sig.to_compact());
629 }
630 }
631 let sent = count_tokens(&output);
632 let savings = protocol::format_savings(original_tokens, sent);
633 (
634 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
635 sent,
636 )
637 }
638 "map" => {
639 if ext == "php" {
640 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
641 {
642 let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
643 let sent = count_tokens(&output);
644 let savings = protocol::format_savings(original_tokens, sent);
645 output.push('\n');
646 output.push_str(&savings);
647 return (append_compressed_hint(&output, file_path), sent);
648 }
649 }
650
651 let sigs = signatures::extract_signatures(content, ext);
652 let dep_info = deps::extract_deps(content, ext);
653
654 let mut output = format!("{file_ref}={short} {line_count}L");
655
656 if !dep_info.imports.is_empty() {
657 output.push_str("\n deps: ");
658 output.push_str(&dep_info.imports.join(", "));
659 }
660
661 if !dep_info.exports.is_empty() {
662 output.push_str("\n exports: ");
663 output.push_str(&dep_info.exports.join(", "));
664 }
665
666 let key_sigs: Vec<&signatures::Signature> = sigs
667 .iter()
668 .filter(|s| s.is_exported || s.indent == 0)
669 .collect();
670
671 if !key_sigs.is_empty() {
672 output.push_str("\n API:");
673 for sig in &key_sigs {
674 output.push_str("\n ");
675 if crp_mode.is_tdd() {
676 output.push_str(&sig.to_tdd());
677 } else {
678 output.push_str(&sig.to_compact());
679 }
680 }
681 }
682
683 let sent = count_tokens(&output);
684 let savings = protocol::format_savings(original_tokens, sent);
685 (
686 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
687 sent,
688 )
689 }
690 "aggressive" => {
691 #[cfg(feature = "tree-sitter")]
692 let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
693 #[cfg(not(feature = "tree-sitter"))]
694 let ast_pruned: Option<String> = None;
695
696 let base = ast_pruned.as_deref().unwrap_or(content);
697
698 let session_intent = crate::core::session::SessionState::load_latest()
699 .and_then(|s| s.active_structured_intent);
700 let raw = if let Some(ref intent) = session_intent {
701 compressor::task_aware_compress(base, Some(ext), intent)
702 } else {
703 compressor::aggressive_compress(base, Some(ext))
704 };
705 let compressed = compressor::safeguard_ratio(content, &raw);
706 let header = build_header(file_ref, short, ext, content, line_count, true);
707
708 let mut sym = SymbolMap::new();
709 let idents = symbol_map::extract_identifiers(&compressed, ext);
710 for ident in &idents {
711 sym.register(ident);
712 }
713
714 if sym.len() >= 3 {
715 let sym_table = sym.format_table();
716 let sym_applied = sym.apply(&compressed);
717 let orig_tok = count_tokens(&compressed);
718 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
719 let net = orig_tok.saturating_sub(comp_tok);
720 if orig_tok > 0 && net * 100 / orig_tok >= 5 {
721 let savings = protocol::format_savings(original_tokens, comp_tok);
722 return (
723 append_compressed_hint(
724 &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
725 file_path,
726 ),
727 comp_tok,
728 );
729 }
730 let savings = protocol::format_savings(original_tokens, orig_tok);
731 return (
732 append_compressed_hint(
733 &format!("{header}\n{compressed}\n{savings}"),
734 file_path,
735 ),
736 orig_tok,
737 );
738 }
739
740 let sent = count_tokens(&compressed);
741 let savings = protocol::format_savings(original_tokens, sent);
742 (
743 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
744 sent,
745 )
746 }
747 "entropy" => {
748 let result = entropy::entropy_compress_adaptive(content, file_path);
749 let avg_h = entropy::analyze_entropy(content).avg_entropy;
750 let header = build_header(file_ref, short, ext, content, line_count, false);
751 let techs = result.techniques.join(", ");
752 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
753 let sent = count_tokens(&output);
754 let savings = protocol::format_savings(original_tokens, sent);
755 let compression_ratio = if original_tokens > 0 {
756 1.0 - (sent as f64 / original_tokens as f64)
757 } else {
758 0.0
759 };
760 crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
761 (
762 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
763 sent,
764 )
765 }
766 "task" => {
767 let task_str = task.unwrap_or("");
768 if task_str.is_empty() {
769 let header = build_header(file_ref, short, ext, content, line_count, true);
770 let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
771 let sent = count_tokens(&out);
772 return (out, sent);
773 }
774 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
775 if keywords.is_empty() {
776 let header = build_header(file_ref, short, ext, content, line_count, true);
777 let out = format!(
778 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
779 );
780 let sent = count_tokens(&out);
781 return (out, sent);
782 }
783 let filtered =
784 crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
785 let filtered_lines = filtered.lines().count();
786 let header = format!(
787 "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
788 );
789 let project_root = detect_project_root(file_path);
790 let graph_ctx = crate::core::graph_context::build_graph_context(
791 file_path,
792 &project_root,
793 Some(crate::core::graph_context::GraphContextOptions::default()),
794 )
795 .map(|c| crate::core::graph_context::format_graph_context(&c))
796 .unwrap_or_default();
797
798 let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
799 let savings = protocol::format_savings(original_tokens, sent);
800 (
801 append_compressed_hint(
802 &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
803 file_path,
804 ),
805 sent,
806 )
807 }
808 "reference" => {
809 let tok = count_tokens(content);
810 let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
811 let sent = count_tokens(&output);
812 let savings = protocol::format_savings(original_tokens, sent);
813 (format!("{output}\n{savings}"), sent)
814 }
815 mode if mode.starts_with("lines:") => {
816 let range_str = &mode[6..];
817 let extracted = extract_line_range(content, range_str);
818 let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
819 let sent = count_tokens(&extracted);
820 let savings = protocol::format_savings(original_tokens, sent);
821 (format!("{header}\n{extracted}\n{savings}"), sent)
822 }
823 unknown => {
824 let header = build_header(file_ref, short, ext, content, line_count, true);
825 let out = format!(
826 "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
827 );
828 let sent = count_tokens(&out);
829 (out, sent)
830 }
831 }
832}
833
834fn extract_line_range(content: &str, range_str: &str) -> String {
835 let lines: Vec<&str> = content.lines().collect();
836 let total = lines.len();
837 let mut selected = Vec::new();
838
839 for part in range_str.split(',') {
840 let part = part.trim();
841 if let Some((start_s, end_s)) = part.split_once('-') {
842 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
843 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
844 for i in start..=end {
845 if i >= 1 && i <= total {
846 selected.push(format!("{i:>4}| {}", lines[i - 1]));
847 }
848 }
849 } else if let Ok(n) = part.parse::<usize>() {
850 if n >= 1 && n <= total {
851 selected.push(format!("{n:>4}| {}", lines[n - 1]));
852 }
853 }
854 }
855
856 if selected.is_empty() {
857 "No lines matched the range.".to_string()
858 } else {
859 selected.join("\n")
860 }
861}
862
863fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
864 let short = protocol::shorten_path(path);
865 let old_content = cache.get(path).map(|e| e.content.clone());
866
867 let new_content = match read_file_lossy(path) {
868 Ok(c) => c,
869 Err(e) => {
870 let msg = format!("ERROR: {e}");
871 let tokens = count_tokens(&msg);
872 return (msg, tokens);
873 }
874 };
875
876 let original_tokens = count_tokens(&new_content);
877
878 let diff_output = if let Some(old) = &old_content {
879 compressor::diff_content(old, &new_content)
880 } else {
881 format!("[first read]\n{new_content}")
882 };
883
884 cache.store(path, new_content);
885
886 let sent = count_tokens(&diff_output);
887 let savings = protocol::format_savings(original_tokens, sent);
888 (
889 format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}"),
890 sent,
891 )
892}
893
894#[cfg(test)]
895mod tests {
896 use super::*;
897 use std::time::Duration;
898
899 #[test]
900 fn test_header_toon_format_no_brackets() {
901 let content = "use std::io;\nfn main() {}\n";
902 let header = build_header("F1", "main.rs", "rs", content, 2, false);
903 assert!(!header.contains('['));
904 assert!(!header.contains(']'));
905 assert!(header.contains("F1=main.rs 2L"));
906 }
907
908 #[test]
909 fn test_header_toon_deps_indented() {
910 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
911 let header = build_header("F1", "main.rs", "rs", content, 3, true);
912 if header.contains("deps") {
913 assert!(
914 header.contains("\n deps "),
915 "deps should use indented TOON format"
916 );
917 assert!(
918 !header.contains("deps:["),
919 "deps should not use bracket format"
920 );
921 }
922 }
923
924 #[test]
925 fn test_header_toon_saves_tokens() {
926 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
927 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
928 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
929 let old_tokens = count_tokens(&old_header);
930 let new_tokens = count_tokens(&new_header);
931 assert!(
932 new_tokens <= old_tokens,
933 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
934 );
935 }
936
937 #[test]
938 fn test_tdd_symbols_are_compact() {
939 let symbols = [
940 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
941 ];
942 for sym in &symbols {
943 let tok = count_tokens(sym);
944 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
945 }
946 }
947
948 #[test]
949 fn test_task_mode_filters_content() {
950 let content = (0..200)
951 .map(|i| {
952 if i % 20 == 0 {
953 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
954 } else {
955 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
956 }
957 })
958 .collect::<Vec<_>>()
959 .join("\n");
960 let full_tokens = count_tokens(&content);
961 let task = Some("fix bug in validate_token");
962 let (result, result_tokens) = process_mode(
963 &content,
964 "task",
965 "F1",
966 "test.rs",
967 "rs",
968 full_tokens,
969 CrpMode::Off,
970 "test.rs",
971 task,
972 );
973 assert!(
974 result_tokens < full_tokens,
975 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
976 );
977 assert!(
978 result.contains("task-filtered"),
979 "output should contain task-filtered marker"
980 );
981 }
982
983 #[test]
984 fn test_task_mode_without_task_returns_full() {
985 let content = "fn main() {}\nfn helper() {}\n";
986 let tokens = count_tokens(content);
987 let (result, _sent) = process_mode(
988 content,
989 "task",
990 "F1",
991 "test.rs",
992 "rs",
993 tokens,
994 CrpMode::Off,
995 "test.rs",
996 None,
997 );
998 assert!(
999 result.contains("no task set"),
1000 "should indicate no task: {result}"
1001 );
1002 }
1003
1004 #[test]
1005 fn test_reference_mode_one_line() {
1006 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1007 let tokens = count_tokens(content);
1008 let (result, _sent) = process_mode(
1009 content,
1010 "reference",
1011 "F1",
1012 "test.rs",
1013 "rs",
1014 tokens,
1015 CrpMode::Off,
1016 "test.rs",
1017 None,
1018 );
1019 let lines: Vec<&str> = result.lines().collect();
1020 assert!(
1021 lines.len() <= 3,
1022 "reference mode should be very compact, got {} lines",
1023 lines.len()
1024 );
1025 assert!(result.contains("lines"), "should contain line count");
1026 assert!(result.contains("tok"), "should contain token count");
1027 }
1028
1029 #[test]
1030 fn cached_lines_mode_invalidates_on_mtime_change() {
1031 let dir = tempfile::tempdir().unwrap();
1032 let path = dir.path().join("file.txt");
1033 let p = path.to_string_lossy().to_string();
1034
1035 std::fs::write(&path, "one\nsecond\n").unwrap();
1036 let mut cache = SessionCache::new();
1037
1038 let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1039 let l1: Vec<&str> = r1.content.lines().collect();
1040 let got1 = l1.get(1).copied().unwrap_or_default().trim();
1041 let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1042 assert_eq!(got1, "one");
1043
1044 std::thread::sleep(Duration::from_secs(1));
1045 std::fs::write(&path, "two\nsecond\n").unwrap();
1046
1047 let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1048 let l2: Vec<&str> = r2.content.lines().collect();
1049 let got2 = l2.get(1).copied().unwrap_or_default().trim();
1050 let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1051 assert_eq!(got2, "two");
1052 }
1053
1054 #[test]
1055 #[cfg_attr(tarpaulin, ignore)]
1056 fn benchmark_task_conditioned_compression() {
1057 let content = generate_benchmark_code(200);
1059 let full_tokens = count_tokens(&content);
1060 let task = Some("fix authentication in validate_token");
1061
1062 let (_full_output, full_tok) = process_mode(
1063 &content,
1064 "full",
1065 "F1",
1066 "server.rs",
1067 "rs",
1068 full_tokens,
1069 CrpMode::Off,
1070 "server.rs",
1071 task,
1072 );
1073 let (_task_output, task_tok) = process_mode(
1074 &content,
1075 "task",
1076 "F1",
1077 "server.rs",
1078 "rs",
1079 full_tokens,
1080 CrpMode::Off,
1081 "server.rs",
1082 task,
1083 );
1084 let (_sig_output, sig_tok) = process_mode(
1085 &content,
1086 "signatures",
1087 "F1",
1088 "server.rs",
1089 "rs",
1090 full_tokens,
1091 CrpMode::Off,
1092 "server.rs",
1093 task,
1094 );
1095 let (_ref_output, ref_tok) = process_mode(
1096 &content,
1097 "reference",
1098 "F1",
1099 "server.rs",
1100 "rs",
1101 full_tokens,
1102 CrpMode::Off,
1103 "server.rs",
1104 task,
1105 );
1106
1107 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1108 eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1109 eprintln!(" full: {full_tok:>6} tokens (baseline)");
1110 eprintln!(
1111 " task: {task_tok:>6} tokens ({:.0}% savings)",
1112 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1113 );
1114 eprintln!(
1115 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1116 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1117 );
1118 eprintln!(
1119 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
1120 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1121 );
1122 eprintln!("================================================\n");
1123
1124 assert!(task_tok < full_tok, "task mode should save tokens");
1125 assert!(sig_tok < full_tok, "signatures should save tokens");
1126 assert!(ref_tok < sig_tok, "reference should be most compact");
1127 }
1128
1129 fn generate_benchmark_code(lines: usize) -> String {
1130 let mut code = Vec::with_capacity(lines);
1131 code.push("use std::collections::HashMap;".to_string());
1132 code.push("use crate::core::auth;".to_string());
1133 code.push(String::new());
1134 code.push("pub struct Server {".to_string());
1135 code.push(" config: Config,".to_string());
1136 code.push(" cache: HashMap<String, String>,".to_string());
1137 code.push("}".to_string());
1138 code.push(String::new());
1139 code.push("impl Server {".to_string());
1140 code.push(
1141 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1142 .to_string(),
1143 );
1144 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
1145 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1146 code.push(" return Err(AuthError::Expired);".to_string());
1147 code.push(" }".to_string());
1148 code.push(" Ok(decoded.claims)".to_string());
1149 code.push(" }".to_string());
1150 code.push(String::new());
1151
1152 let remaining = lines.saturating_sub(code.len());
1153 for i in 0..remaining {
1154 if i % 30 == 0 {
1155 code.push(format!(
1156 " pub fn handler_{i}(&self, req: Request) -> Response {{"
1157 ));
1158 } else if i % 30 == 29 {
1159 code.push(" }".to_string());
1160 } else {
1161 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1162 }
1163 }
1164 code.push("}".to_string());
1165 code.join("\n")
1166 }
1167}