1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13pub struct ReadOutput {
16 pub content: String,
17 pub resolved_mode: String,
18 pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28 CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32 if crp_mode.is_tdd() {
33 format!("{mode}:tdd")
34 } else {
35 mode.to_string()
36 }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40 format!("{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\")")
41}
42
43pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
45 let cap = crate::core::limits::max_read_bytes();
46 if let Ok(meta) = std::fs::metadata(path) {
47 if meta.len() > cap as u64 {
48 return Err(std::io::Error::other(format!(
49 "file too large ({} bytes, cap {} via LCTX_MAX_READ_BYTES)",
50 meta.len(),
51 cap
52 )));
53 }
54 }
55 let bytes = std::fs::read(path)?;
56 match String::from_utf8(bytes) {
57 Ok(s) => Ok(s),
58 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
59 }
60}
61
62pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
64 handle_with_options(cache, path, mode, false, crp_mode, None)
65}
66
67pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
69 handle_with_options(cache, path, mode, true, crp_mode, None)
70}
71
72pub fn handle_with_task(
74 cache: &mut SessionCache,
75 path: &str,
76 mode: &str,
77 crp_mode: CrpMode,
78 task: Option<&str>,
79) -> String {
80 handle_with_options(cache, path, mode, false, crp_mode, task)
81}
82
83pub fn handle_with_task_resolved(
85 cache: &mut SessionCache,
86 path: &str,
87 mode: &str,
88 crp_mode: CrpMode,
89 task: Option<&str>,
90) -> ReadOutput {
91 handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
92}
93
94pub fn handle_fresh_with_task(
96 cache: &mut SessionCache,
97 path: &str,
98 mode: &str,
99 crp_mode: CrpMode,
100 task: Option<&str>,
101) -> String {
102 handle_with_options(cache, path, mode, true, crp_mode, task)
103}
104
105pub fn handle_fresh_with_task_resolved(
107 cache: &mut SessionCache,
108 path: &str,
109 mode: &str,
110 crp_mode: CrpMode,
111 task: Option<&str>,
112) -> ReadOutput {
113 handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
114}
115
116fn handle_with_options(
117 cache: &mut SessionCache,
118 path: &str,
119 mode: &str,
120 fresh: bool,
121 crp_mode: CrpMode,
122 task: Option<&str>,
123) -> String {
124 handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
125}
126
127fn handle_with_options_resolved(
128 cache: &mut SessionCache,
129 path: &str,
130 mode: &str,
131 fresh: bool,
132 crp_mode: CrpMode,
133 task: Option<&str>,
134) -> ReadOutput {
135 let file_ref = cache.get_file_ref(path);
136 let short = protocol::shorten_path(path);
137 let ext = Path::new(path)
138 .extension()
139 .and_then(|e| e.to_str())
140 .unwrap_or("");
141
142 if fresh {
143 cache.invalidate(path);
144 }
145
146 if mode == "diff" {
147 let (out, sent) = handle_diff(cache, path, &file_ref);
148 return ReadOutput {
149 content: out,
150 resolved_mode: "diff".into(),
151 output_tokens: sent,
152 };
153 }
154
155 if mode != "full" {
156 if let Some(existing) = cache.get(path) {
157 let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
158 if stale {
159 cache.invalidate(path);
160 }
161 }
162 }
163
164 if let Some(existing) = cache.get(path) {
165 if mode == "full" {
166 let (out, sent) =
167 handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
168 let out = crate::core::redaction::redact_text_if_enabled(&out);
169 return ReadOutput {
170 content: out,
171 resolved_mode: "full".into(),
172 output_tokens: sent,
173 };
174 }
175 let content = existing.content.clone();
176 let original_tokens = existing.original_tokens;
177 let resolved_mode = if mode == "auto" {
178 resolve_auto_mode(path, original_tokens, task)
179 } else {
180 mode.to_string()
181 };
182 if is_cacheable_mode(&resolved_mode) {
183 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
184 if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
185 let sent = count_tokens(cached_output);
186 let out = crate::core::redaction::redact_text_if_enabled(cached_output);
187 return ReadOutput {
188 content: out,
189 resolved_mode,
190 output_tokens: sent,
191 };
192 }
193 }
194 let (out, sent) = process_mode(
195 &content,
196 &resolved_mode,
197 &file_ref,
198 &short,
199 ext,
200 original_tokens,
201 crp_mode,
202 path,
203 task,
204 );
205 if is_cacheable_mode(&resolved_mode) {
206 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
207 cache.set_compressed(path, &cache_key, out.clone());
208 }
209 let out = crate::core::redaction::redact_text_if_enabled(&out);
210 return ReadOutput {
211 content: out,
212 resolved_mode,
213 output_tokens: sent,
214 };
215 }
216
217 let content = match read_file_lossy(path) {
218 Ok(c) => c,
219 Err(e) => {
220 let msg = format!("ERROR: {e}");
221 let tokens = count_tokens(&msg);
222 return ReadOutput {
223 content: msg,
224 resolved_mode: "error".into(),
225 output_tokens: tokens,
226 };
227 }
228 };
229
230 let similar_hint = find_semantic_similar(path, &content);
231 let graph_hint = build_graph_related_hint(path);
232
233 let store_result = cache.store(path, content.clone());
234
235 update_semantic_index(path, &content);
236
237 if mode == "full" {
238 let (mut output, sent) = format_full_output(
239 &file_ref,
240 &short,
241 ext,
242 &content,
243 store_result.original_tokens,
244 store_result.line_count,
245 task,
246 );
247 if let Some(hint) = &graph_hint {
248 output.push_str(&format!("\n{hint}"));
249 }
250 if let Some(hint) = similar_hint {
251 output.push_str(&format!("\n{hint}"));
252 }
253 let output = crate::core::redaction::redact_text_if_enabled(&output);
254 return ReadOutput {
255 content: output,
256 resolved_mode: "full".into(),
257 output_tokens: sent,
258 };
259 }
260
261 let resolved_mode = if mode == "auto" {
262 resolve_auto_mode(path, store_result.original_tokens, task)
263 } else {
264 mode.to_string()
265 };
266
267 let (mut output, _sent) = process_mode(
268 &content,
269 &resolved_mode,
270 &file_ref,
271 &short,
272 ext,
273 store_result.original_tokens,
274 crp_mode,
275 path,
276 task,
277 );
278 if is_cacheable_mode(&resolved_mode) {
279 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
280 cache.set_compressed(path, &cache_key, output.clone());
281 }
282 if let Some(hint) = &graph_hint {
283 output.push_str(&format!("\n{hint}"));
284 }
285 if let Some(hint) = similar_hint {
286 output.push_str(&format!("\n{hint}"));
287 }
288 let output = crate::core::redaction::redact_text_if_enabled(&output);
289 let final_tokens = count_tokens(&output);
290 ReadOutput {
291 content: output,
292 resolved_mode,
293 output_tokens: final_tokens,
294 }
295}
296
297fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
298 let intent_query = task.unwrap_or("read");
301 let route = crate::core::intent_router::route_v1(intent_query);
302 let intent_mode = &route.decision.effective_read_mode;
303 if intent_mode != "auto" && intent_mode != "reference" {
304 return intent_mode.clone();
305 }
306
307 let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
309 let predictor = crate::core::mode_predictor::ModePredictor::new();
310 let mut predicted = predictor
311 .predict_best_mode(&sig)
312 .unwrap_or_else(|| "full".to_string());
313 if predicted == "auto" {
314 predicted = "full".to_string();
315 }
316
317 if let Some(project_root) =
319 crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
320 {
321 let ext = std::path::Path::new(file_path)
322 .extension()
323 .and_then(|e| e.to_str())
324 .unwrap_or("");
325 let bucket = match original_tokens {
326 0..=2000 => "sm",
327 2001..=10000 => "md",
328 10001..=50000 => "lg",
329 _ => "xl",
330 };
331 let bandit_key = format!("{ext}_{bucket}");
332 let mut store = crate::core::bandit::BanditStore::load(&project_root);
333 let bandit = store.get_or_create(&bandit_key);
334 let arm = bandit.select_arm();
335 if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
336 predicted = "aggressive".to_string();
337 }
338 }
339
340 let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
342 let chosen = policy.choose_auto_mode(task, &predicted);
343
344 if original_tokens > 2000 {
345 if predicted == "map" || predicted == "signatures" {
346 if chosen != "map" && chosen != "signatures" {
347 return predicted;
348 }
349 } else if chosen == "full" && predicted != "full" {
350 return predicted;
351 }
352 }
353
354 chosen
355}
356
357fn find_semantic_similar(path: &str, content: &str) -> Option<String> {
358 let project_root = detect_project_root(path);
359 let index = crate::core::semantic_cache::SemanticCacheIndex::load(&project_root)?;
360
361 let similar = index.find_similar(content, 0.7);
362 let relevant: Vec<_> = similar
363 .into_iter()
364 .filter(|(p, _)| p != path)
365 .take(3)
366 .collect();
367
368 if relevant.is_empty() {
369 return None;
370 }
371
372 let hints: Vec<String> = relevant
373 .iter()
374 .map(|(p, score)| format!(" {p} ({:.0}% similar)", score * 100.0))
375 .collect();
376
377 Some(format!(
378 "[semantic: {} similar file(s) in cache]\n{}",
379 relevant.len(),
380 hints.join("\n")
381 ))
382}
383
384fn update_semantic_index(path: &str, content: &str) {
385 let project_root = detect_project_root(path);
386 let session_id = format!("{}", std::process::id());
387 let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
388 index.add_file(path, content, &session_id);
389 let _ = index.save(&project_root);
390}
391
392fn detect_project_root(path: &str) -> String {
393 crate::core::protocol::detect_project_root_or_cwd(path)
394}
395
396fn build_graph_related_hint(path: &str) -> Option<String> {
397 let project_root = detect_project_root(path);
398 crate::core::graph_context::build_related_hint(path, &project_root, 5)
399}
400
401const AUTO_DELTA_THRESHOLD: f64 = 0.6;
402
403fn handle_full_with_auto_delta(
405 cache: &mut SessionCache,
406 path: &str,
407 file_ref: &str,
408 short: &str,
409 ext: &str,
410 task: Option<&str>,
411) -> (String, usize) {
412 let Ok(disk_content) = read_file_lossy(path) else {
413 cache.record_cache_hit(path);
414 let out = if let Some(existing) = cache.get(path) {
415 format!(
416 "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
417 existing.read_count, existing.line_count
418 )
419 } else {
420 format!("[file read failed and no cached version available] {file_ref}={short}")
421 };
422 let sent = count_tokens(&out);
423 return (out, sent);
424 };
425
426 let old_content = cache
427 .get(path)
428 .map(|e| e.content.clone())
429 .unwrap_or_default();
430 let store_result = cache.store(path, disk_content.clone());
431
432 if store_result.was_hit {
433 let out = format!(
434 "{file_ref}={short} cached {}t {}L\nFile already in context from previous read. Use fresh=true to re-read if content needed again.",
435 store_result.read_count, store_result.line_count
436 );
437 let sent = count_tokens(&out);
438 return (out, sent);
439 }
440
441 let diff = compressor::diff_content(&old_content, &disk_content);
442 let diff_tokens = count_tokens(&diff);
443 let full_tokens = store_result.original_tokens;
444
445 if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
446 let savings = protocol::format_savings(full_tokens, diff_tokens);
447 let out = format!(
448 "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
449 disk_content.lines().count()
450 );
451 return (out, diff_tokens);
452 }
453
454 format_full_output(
455 file_ref,
456 short,
457 ext,
458 &disk_content,
459 store_result.original_tokens,
460 store_result.line_count,
461 task,
462 )
463}
464
465fn format_full_output(
466 file_ref: &str,
467 short: &str,
468 ext: &str,
469 content: &str,
470 original_tokens: usize,
471 line_count: usize,
472 task: Option<&str>,
473) -> (String, usize) {
474 let tokens = original_tokens;
475 let metadata = build_header(file_ref, short, ext, content, line_count, true);
476
477 let mut reordered: Option<String> = None;
478 {
479 let profile = crate::core::profiles::active_profile();
480 let cfg = profile.layout;
481 if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
482 let task_str = task.unwrap_or("");
483 if !task_str.is_empty() {
484 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
485 let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
486 content, &keywords, &cfg,
487 );
488 if !r.skipped && r.changed {
489 reordered = Some(r.output);
490 }
491 }
492 }
493 }
494
495 let content_for_output = reordered.as_deref().unwrap_or(content);
496
497 let mut sym = SymbolMap::new();
498 let idents = symbol_map::extract_identifiers(content_for_output, ext);
499 for ident in &idents {
500 sym.register(ident);
501 }
502
503 if sym.len() >= 3 {
504 let sym_table = sym.format_table();
505 let compressed = sym.apply(content_for_output);
506 let original_tok = count_tokens(content_for_output);
507 let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
508 let net_saving = original_tok.saturating_sub(compressed_tok);
509 if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
510 let output = format!("{metadata}\n{compressed}{sym_table}");
511 let sent = count_tokens(&output);
512 let savings = protocol::format_savings(tokens, sent);
513 return (format!("{output}\n{savings}"), sent);
514 }
515 }
516
517 let output = format!("{metadata}\n{content_for_output}");
518 let sent = count_tokens(&output);
519 let savings = protocol::format_savings(tokens, sent);
520 (format!("{output}\n{savings}"), sent)
521}
522
523fn build_header(
524 file_ref: &str,
525 short: &str,
526 ext: &str,
527 content: &str,
528 line_count: usize,
529 include_deps: bool,
530) -> String {
531 let mut header = format!("{file_ref}={short} {line_count}L");
532
533 if include_deps {
534 let dep_info = deps::extract_deps(content, ext);
535 if !dep_info.imports.is_empty() {
536 let imports_str: Vec<&str> = dep_info
537 .imports
538 .iter()
539 .take(8)
540 .map(std::string::String::as_str)
541 .collect();
542 header.push_str(&format!("\n deps {}", imports_str.join(",")));
543 }
544 if !dep_info.exports.is_empty() {
545 let exports_str: Vec<&str> = dep_info
546 .exports
547 .iter()
548 .take(8)
549 .map(std::string::String::as_str)
550 .collect();
551 header.push_str(&format!("\n exports {}", exports_str.join(",")));
552 }
553 }
554
555 header
556}
557
558#[allow(clippy::too_many_arguments)]
559fn process_mode(
560 content: &str,
561 mode: &str,
562 file_ref: &str,
563 short: &str,
564 ext: &str,
565 original_tokens: usize,
566 crp_mode: CrpMode,
567 file_path: &str,
568 task: Option<&str>,
569) -> (String, usize) {
570 let line_count = content.lines().count();
571
572 match mode {
573 "auto" => {
574 let chosen = resolve_auto_mode(file_path, original_tokens, task);
575 process_mode(
576 content,
577 &chosen,
578 file_ref,
579 short,
580 ext,
581 original_tokens,
582 crp_mode,
583 file_path,
584 task,
585 )
586 }
587 "full" => format_full_output(
588 file_ref,
589 short,
590 ext,
591 content,
592 original_tokens,
593 line_count,
594 task,
595 ),
596 "signatures" => {
597 let sigs = signatures::extract_signatures(content, ext);
598 let dep_info = deps::extract_deps(content, ext);
599
600 let mut output = format!("{file_ref}={short} {line_count}L");
601 if !dep_info.imports.is_empty() {
602 let imports_str: Vec<&str> = dep_info
603 .imports
604 .iter()
605 .take(8)
606 .map(std::string::String::as_str)
607 .collect();
608 output.push_str(&format!("\n deps {}", imports_str.join(",")));
609 }
610 for sig in &sigs {
611 output.push('\n');
612 if crp_mode.is_tdd() {
613 output.push_str(&sig.to_tdd());
614 } else {
615 output.push_str(&sig.to_compact());
616 }
617 }
618 let sent = count_tokens(&output);
619 let savings = protocol::format_savings(original_tokens, sent);
620 (
621 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
622 sent,
623 )
624 }
625 "map" => {
626 if ext == "php" {
627 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
628 {
629 let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
630 let sent = count_tokens(&output);
631 let savings = protocol::format_savings(original_tokens, sent);
632 output.push('\n');
633 output.push_str(&savings);
634 return (append_compressed_hint(&output, file_path), sent);
635 }
636 }
637
638 let sigs = signatures::extract_signatures(content, ext);
639 let dep_info = deps::extract_deps(content, ext);
640
641 let mut output = format!("{file_ref}={short} {line_count}L");
642
643 if !dep_info.imports.is_empty() {
644 output.push_str("\n deps: ");
645 output.push_str(&dep_info.imports.join(", "));
646 }
647
648 if !dep_info.exports.is_empty() {
649 output.push_str("\n exports: ");
650 output.push_str(&dep_info.exports.join(", "));
651 }
652
653 let key_sigs: Vec<&signatures::Signature> = sigs
654 .iter()
655 .filter(|s| s.is_exported || s.indent == 0)
656 .collect();
657
658 if !key_sigs.is_empty() {
659 output.push_str("\n API:");
660 for sig in &key_sigs {
661 output.push_str("\n ");
662 if crp_mode.is_tdd() {
663 output.push_str(&sig.to_tdd());
664 } else {
665 output.push_str(&sig.to_compact());
666 }
667 }
668 }
669
670 let sent = count_tokens(&output);
671 let savings = protocol::format_savings(original_tokens, sent);
672 (
673 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
674 sent,
675 )
676 }
677 "aggressive" => {
678 #[cfg(feature = "tree-sitter")]
679 let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
680 #[cfg(not(feature = "tree-sitter"))]
681 let ast_pruned: Option<String> = None;
682
683 let base = ast_pruned.as_deref().unwrap_or(content);
684
685 let session_intent = crate::core::session::SessionState::load_latest()
686 .and_then(|s| s.active_structured_intent);
687 let raw = if let Some(ref intent) = session_intent {
688 compressor::task_aware_compress(base, Some(ext), intent)
689 } else {
690 compressor::aggressive_compress(base, Some(ext))
691 };
692 let compressed = compressor::safeguard_ratio(content, &raw);
693 let header = build_header(file_ref, short, ext, content, line_count, true);
694
695 let mut sym = SymbolMap::new();
696 let idents = symbol_map::extract_identifiers(&compressed, ext);
697 for ident in &idents {
698 sym.register(ident);
699 }
700
701 if sym.len() >= 3 {
702 let sym_table = sym.format_table();
703 let sym_applied = sym.apply(&compressed);
704 let orig_tok = count_tokens(&compressed);
705 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
706 let net = orig_tok.saturating_sub(comp_tok);
707 if orig_tok > 0 && net * 100 / orig_tok >= 5 {
708 let savings = protocol::format_savings(original_tokens, comp_tok);
709 return (
710 append_compressed_hint(
711 &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
712 file_path,
713 ),
714 comp_tok,
715 );
716 }
717 let savings = protocol::format_savings(original_tokens, orig_tok);
718 return (
719 append_compressed_hint(
720 &format!("{header}\n{compressed}\n{savings}"),
721 file_path,
722 ),
723 orig_tok,
724 );
725 }
726
727 let sent = count_tokens(&compressed);
728 let savings = protocol::format_savings(original_tokens, sent);
729 (
730 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
731 sent,
732 )
733 }
734 "entropy" => {
735 let result = entropy::entropy_compress_adaptive(content, file_path);
736 let avg_h = entropy::analyze_entropy(content).avg_entropy;
737 let header = build_header(file_ref, short, ext, content, line_count, false);
738 let techs = result.techniques.join(", ");
739 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
740 let sent = count_tokens(&output);
741 let savings = protocol::format_savings(original_tokens, sent);
742 let compression_ratio = if original_tokens > 0 {
743 1.0 - (sent as f64 / original_tokens as f64)
744 } else {
745 0.0
746 };
747 crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
748 (
749 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
750 sent,
751 )
752 }
753 "task" => {
754 let task_str = task.unwrap_or("");
755 if task_str.is_empty() {
756 let header = build_header(file_ref, short, ext, content, line_count, true);
757 let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
758 let sent = count_tokens(&out);
759 return (out, sent);
760 }
761 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
762 if keywords.is_empty() {
763 let header = build_header(file_ref, short, ext, content, line_count, true);
764 let out = format!(
765 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
766 );
767 let sent = count_tokens(&out);
768 return (out, sent);
769 }
770 let filtered =
771 crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
772 let filtered_lines = filtered.lines().count();
773 let header = format!(
774 "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
775 );
776 let project_root = detect_project_root(file_path);
777 let graph_ctx = crate::core::graph_context::build_graph_context(
778 file_path,
779 &project_root,
780 Some(crate::core::graph_context::GraphContextOptions::default()),
781 )
782 .map(|c| crate::core::graph_context::format_graph_context(&c))
783 .unwrap_or_default();
784
785 let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
786 let savings = protocol::format_savings(original_tokens, sent);
787 (
788 append_compressed_hint(
789 &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
790 file_path,
791 ),
792 sent,
793 )
794 }
795 "reference" => {
796 let tok = count_tokens(content);
797 let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
798 let sent = count_tokens(&output);
799 let savings = protocol::format_savings(original_tokens, sent);
800 (format!("{output}\n{savings}"), sent)
801 }
802 mode if mode.starts_with("lines:") => {
803 let range_str = &mode[6..];
804 let extracted = extract_line_range(content, range_str);
805 let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
806 let sent = count_tokens(&extracted);
807 let savings = protocol::format_savings(original_tokens, sent);
808 (format!("{header}\n{extracted}\n{savings}"), sent)
809 }
810 unknown => {
811 let header = build_header(file_ref, short, ext, content, line_count, true);
812 let out = format!(
813 "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
814 );
815 let sent = count_tokens(&out);
816 (out, sent)
817 }
818 }
819}
820
821fn extract_line_range(content: &str, range_str: &str) -> String {
822 let lines: Vec<&str> = content.lines().collect();
823 let total = lines.len();
824 let mut selected = Vec::new();
825
826 for part in range_str.split(',') {
827 let part = part.trim();
828 if let Some((start_s, end_s)) = part.split_once('-') {
829 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
830 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
831 for i in start..=end {
832 if i >= 1 && i <= total {
833 selected.push(format!("{i:>4}| {}", lines[i - 1]));
834 }
835 }
836 } else if let Ok(n) = part.parse::<usize>() {
837 if n >= 1 && n <= total {
838 selected.push(format!("{n:>4}| {}", lines[n - 1]));
839 }
840 }
841 }
842
843 if selected.is_empty() {
844 "No lines matched the range.".to_string()
845 } else {
846 selected.join("\n")
847 }
848}
849
850fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
851 let short = protocol::shorten_path(path);
852 let old_content = cache.get(path).map(|e| e.content.clone());
853
854 let new_content = match read_file_lossy(path) {
855 Ok(c) => c,
856 Err(e) => {
857 let msg = format!("ERROR: {e}");
858 let tokens = count_tokens(&msg);
859 return (msg, tokens);
860 }
861 };
862
863 let original_tokens = count_tokens(&new_content);
864
865 let diff_output = if let Some(old) = &old_content {
866 compressor::diff_content(old, &new_content)
867 } else {
868 format!("[first read]\n{new_content}")
869 };
870
871 cache.store(path, new_content);
872
873 let sent = count_tokens(&diff_output);
874 let savings = protocol::format_savings(original_tokens, sent);
875 (
876 format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}"),
877 sent,
878 )
879}
880
881#[cfg(test)]
882mod tests {
883 use super::*;
884 use std::time::Duration;
885
886 #[test]
887 fn test_header_toon_format_no_brackets() {
888 let content = "use std::io;\nfn main() {}\n";
889 let header = build_header("F1", "main.rs", "rs", content, 2, false);
890 assert!(!header.contains('['));
891 assert!(!header.contains(']'));
892 assert!(header.contains("F1=main.rs 2L"));
893 }
894
895 #[test]
896 fn test_header_toon_deps_indented() {
897 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
898 let header = build_header("F1", "main.rs", "rs", content, 3, true);
899 if header.contains("deps") {
900 assert!(
901 header.contains("\n deps "),
902 "deps should use indented TOON format"
903 );
904 assert!(
905 !header.contains("deps:["),
906 "deps should not use bracket format"
907 );
908 }
909 }
910
911 #[test]
912 fn test_header_toon_saves_tokens() {
913 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
914 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
915 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
916 let old_tokens = count_tokens(&old_header);
917 let new_tokens = count_tokens(&new_header);
918 assert!(
919 new_tokens <= old_tokens,
920 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
921 );
922 }
923
924 #[test]
925 fn test_tdd_symbols_are_compact() {
926 let symbols = [
927 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
928 ];
929 for sym in &symbols {
930 let tok = count_tokens(sym);
931 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
932 }
933 }
934
935 #[test]
936 fn test_task_mode_filters_content() {
937 let content = (0..200)
938 .map(|i| {
939 if i % 20 == 0 {
940 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
941 } else {
942 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
943 }
944 })
945 .collect::<Vec<_>>()
946 .join("\n");
947 let full_tokens = count_tokens(&content);
948 let task = Some("fix bug in validate_token");
949 let (result, result_tokens) = process_mode(
950 &content,
951 "task",
952 "F1",
953 "test.rs",
954 "rs",
955 full_tokens,
956 CrpMode::Off,
957 "test.rs",
958 task,
959 );
960 assert!(
961 result_tokens < full_tokens,
962 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
963 );
964 assert!(
965 result.contains("task-filtered"),
966 "output should contain task-filtered marker"
967 );
968 }
969
970 #[test]
971 fn test_task_mode_without_task_returns_full() {
972 let content = "fn main() {}\nfn helper() {}\n";
973 let tokens = count_tokens(content);
974 let (result, _sent) = process_mode(
975 content,
976 "task",
977 "F1",
978 "test.rs",
979 "rs",
980 tokens,
981 CrpMode::Off,
982 "test.rs",
983 None,
984 );
985 assert!(
986 result.contains("no task set"),
987 "should indicate no task: {result}"
988 );
989 }
990
991 #[test]
992 fn test_reference_mode_one_line() {
993 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
994 let tokens = count_tokens(content);
995 let (result, _sent) = process_mode(
996 content,
997 "reference",
998 "F1",
999 "test.rs",
1000 "rs",
1001 tokens,
1002 CrpMode::Off,
1003 "test.rs",
1004 None,
1005 );
1006 let lines: Vec<&str> = result.lines().collect();
1007 assert!(
1008 lines.len() <= 3,
1009 "reference mode should be very compact, got {} lines",
1010 lines.len()
1011 );
1012 assert!(result.contains("lines"), "should contain line count");
1013 assert!(result.contains("tok"), "should contain token count");
1014 }
1015
1016 #[test]
1017 fn cached_lines_mode_invalidates_on_mtime_change() {
1018 let dir = tempfile::tempdir().unwrap();
1019 let path = dir.path().join("file.txt");
1020 let p = path.to_string_lossy().to_string();
1021
1022 std::fs::write(&path, "one\nsecond\n").unwrap();
1023 let mut cache = SessionCache::new();
1024
1025 let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1026 let l1: Vec<&str> = r1.content.lines().collect();
1027 let got1 = l1.get(1).copied().unwrap_or_default().trim();
1028 let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1029 assert_eq!(got1, "one");
1030
1031 std::thread::sleep(Duration::from_secs(1));
1032 std::fs::write(&path, "two\nsecond\n").unwrap();
1033
1034 let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1035 let l2: Vec<&str> = r2.content.lines().collect();
1036 let got2 = l2.get(1).copied().unwrap_or_default().trim();
1037 let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1038 assert_eq!(got2, "two");
1039 }
1040
1041 #[test]
1042 #[cfg_attr(tarpaulin, ignore)]
1043 fn benchmark_task_conditioned_compression() {
1044 let content = generate_benchmark_code(200);
1046 let full_tokens = count_tokens(&content);
1047 let task = Some("fix authentication in validate_token");
1048
1049 let (_full_output, full_tok) = process_mode(
1050 &content,
1051 "full",
1052 "F1",
1053 "server.rs",
1054 "rs",
1055 full_tokens,
1056 CrpMode::Off,
1057 "server.rs",
1058 task,
1059 );
1060 let (_task_output, task_tok) = process_mode(
1061 &content,
1062 "task",
1063 "F1",
1064 "server.rs",
1065 "rs",
1066 full_tokens,
1067 CrpMode::Off,
1068 "server.rs",
1069 task,
1070 );
1071 let (_sig_output, sig_tok) = process_mode(
1072 &content,
1073 "signatures",
1074 "F1",
1075 "server.rs",
1076 "rs",
1077 full_tokens,
1078 CrpMode::Off,
1079 "server.rs",
1080 task,
1081 );
1082 let (_ref_output, ref_tok) = process_mode(
1083 &content,
1084 "reference",
1085 "F1",
1086 "server.rs",
1087 "rs",
1088 full_tokens,
1089 CrpMode::Off,
1090 "server.rs",
1091 task,
1092 );
1093
1094 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1095 eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1096 eprintln!(" full: {full_tok:>6} tokens (baseline)");
1097 eprintln!(
1098 " task: {task_tok:>6} tokens ({:.0}% savings)",
1099 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1100 );
1101 eprintln!(
1102 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1103 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1104 );
1105 eprintln!(
1106 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
1107 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1108 );
1109 eprintln!("================================================\n");
1110
1111 assert!(task_tok < full_tok, "task mode should save tokens");
1112 assert!(sig_tok < full_tok, "signatures should save tokens");
1113 assert!(ref_tok < sig_tok, "reference should be most compact");
1114 }
1115
1116 fn generate_benchmark_code(lines: usize) -> String {
1117 let mut code = Vec::with_capacity(lines);
1118 code.push("use std::collections::HashMap;".to_string());
1119 code.push("use crate::core::auth;".to_string());
1120 code.push(String::new());
1121 code.push("pub struct Server {".to_string());
1122 code.push(" config: Config,".to_string());
1123 code.push(" cache: HashMap<String, String>,".to_string());
1124 code.push("}".to_string());
1125 code.push(String::new());
1126 code.push("impl Server {".to_string());
1127 code.push(
1128 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1129 .to_string(),
1130 );
1131 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
1132 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1133 code.push(" return Err(AuthError::Expired);".to_string());
1134 code.push(" }".to_string());
1135 code.push(" Ok(decoded.claims)".to_string());
1136 code.push(" }".to_string());
1137 code.push(String::new());
1138
1139 let remaining = lines.saturating_sub(code.len());
1140 for i in 0..remaining {
1141 if i % 30 == 0 {
1142 code.push(format!(
1143 " pub fn handler_{i}(&self, req: Request) -> Response {{"
1144 ));
1145 } else if i % 30 == 29 {
1146 code.push(" }".to_string());
1147 } else {
1148 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1149 }
1150 }
1151 code.push("}".to_string());
1152 code.join("\n")
1153 }
1154}