1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
14
15fn append_compressed_hint(output: &str, file_path: &str) -> String {
16 format!("{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\")")
17}
18
19pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
21 let cap = crate::core::limits::max_read_bytes();
22 if let Ok(meta) = std::fs::metadata(path) {
23 if meta.len() > cap as u64 {
24 return Err(std::io::Error::other(format!(
25 "file too large ({} bytes, cap {} via LCTX_MAX_READ_BYTES)",
26 meta.len(),
27 cap
28 )));
29 }
30 }
31 let bytes = std::fs::read(path)?;
32 match String::from_utf8(bytes) {
33 Ok(s) => Ok(s),
34 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
35 }
36}
37
38pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
40 handle_with_options(cache, path, mode, false, crp_mode, None)
41}
42
43pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
45 handle_with_options(cache, path, mode, true, crp_mode, None)
46}
47
48pub fn handle_with_task(
50 cache: &mut SessionCache,
51 path: &str,
52 mode: &str,
53 crp_mode: CrpMode,
54 task: Option<&str>,
55) -> String {
56 handle_with_options(cache, path, mode, false, crp_mode, task)
57}
58
59pub fn handle_with_task_resolved(
61 cache: &mut SessionCache,
62 path: &str,
63 mode: &str,
64 crp_mode: CrpMode,
65 task: Option<&str>,
66) -> (String, String) {
67 handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
68}
69
70pub fn handle_fresh_with_task(
72 cache: &mut SessionCache,
73 path: &str,
74 mode: &str,
75 crp_mode: CrpMode,
76 task: Option<&str>,
77) -> String {
78 handle_with_options(cache, path, mode, true, crp_mode, task)
79}
80
81pub fn handle_fresh_with_task_resolved(
83 cache: &mut SessionCache,
84 path: &str,
85 mode: &str,
86 crp_mode: CrpMode,
87 task: Option<&str>,
88) -> (String, String) {
89 handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
90}
91
92fn handle_with_options(
93 cache: &mut SessionCache,
94 path: &str,
95 mode: &str,
96 fresh: bool,
97 crp_mode: CrpMode,
98 task: Option<&str>,
99) -> String {
100 handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).0
101}
102
103fn handle_with_options_resolved(
104 cache: &mut SessionCache,
105 path: &str,
106 mode: &str,
107 fresh: bool,
108 crp_mode: CrpMode,
109 task: Option<&str>,
110) -> (String, String) {
111 let file_ref = cache.get_file_ref(path);
112 let short = protocol::shorten_path(path);
113 let ext = Path::new(path)
114 .extension()
115 .and_then(|e| e.to_str())
116 .unwrap_or("");
117
118 if fresh {
119 cache.invalidate(path);
120 }
121
122 if mode == "diff" {
123 return (handle_diff(cache, path, &file_ref), "diff".to_string());
124 }
125
126 if mode != "full" {
127 if let Some(existing) = cache.get(path) {
128 let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
129 if stale {
130 cache.invalidate(path);
131 }
132 }
133 }
134
135 if let Some(existing) = cache.get(path) {
136 if mode == "full" {
137 return (
138 handle_full_with_auto_delta(cache, path, &file_ref, &short, ext),
139 "full".to_string(),
140 );
141 }
142 let content = existing.content.clone();
143 let original_tokens = existing.original_tokens;
144 let resolved_mode = if mode == "auto" {
145 resolve_auto_mode(path, original_tokens, task)
146 } else {
147 mode.to_string()
148 };
149 let out = process_mode(
150 &content,
151 &resolved_mode,
152 &file_ref,
153 &short,
154 ext,
155 original_tokens,
156 crp_mode,
157 path,
158 task,
159 );
160 return (out, resolved_mode);
161 }
162
163 let content = match read_file_lossy(path) {
164 Ok(c) => c,
165 Err(e) => return (format!("ERROR: {e}"), "error".to_string()),
166 };
167
168 let similar_hint = find_semantic_similar(path, &content);
169
170 let store_result = cache.store(path, content.clone());
171
172 update_semantic_index(path, &content);
173
174 if mode == "full" {
175 let mut output = format_full_output(
176 &file_ref,
177 &short,
178 ext,
179 &content,
180 store_result.original_tokens,
181 store_result.line_count,
182 );
183 if let Some(hint) = similar_hint {
184 output.push_str(&format!("\n{hint}"));
185 }
186 return (output, "full".to_string());
187 }
188
189 let resolved_mode = if mode == "auto" {
190 resolve_auto_mode(path, store_result.original_tokens, task)
191 } else {
192 mode.to_string()
193 };
194
195 let mut output = process_mode(
196 &content,
197 &resolved_mode,
198 &file_ref,
199 &short,
200 ext,
201 store_result.original_tokens,
202 crp_mode,
203 path,
204 task,
205 );
206 if let Some(hint) = similar_hint {
207 output.push_str(&format!("\n{hint}"));
208 }
209 (output, resolved_mode)
210}
211
212fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
213 let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
214 let predictor = crate::core::mode_predictor::ModePredictor::new();
215 let mut predicted = predictor
216 .predict_best_mode(&sig)
217 .unwrap_or_else(|| "full".to_string());
218 if predicted == "auto" {
219 predicted = "full".to_string();
220 }
221
222 if let Some(project_root) =
223 crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
224 {
225 let ext = std::path::Path::new(file_path)
226 .extension()
227 .and_then(|e| e.to_str())
228 .unwrap_or("");
229 let bucket = match original_tokens {
230 0..=2000 => "sm",
231 2001..=10000 => "md",
232 10001..=50000 => "lg",
233 _ => "xl",
234 };
235 let bandit_key = format!("{ext}_{bucket}");
236 let mut store = crate::core::bandit::BanditStore::load(&project_root);
237 let bandit = store.get_or_create(&bandit_key);
238 let arm = bandit.select_arm();
239 if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
240 predicted = "aggressive".to_string();
241 }
242 }
243
244 let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
245 policy.choose_auto_mode(task, &predicted)
246}
247
248fn find_semantic_similar(path: &str, content: &str) -> Option<String> {
249 let project_root = detect_project_root(path);
250 let index = crate::core::semantic_cache::SemanticCacheIndex::load(&project_root)?;
251
252 let similar = index.find_similar(content, 0.7);
253 let relevant: Vec<_> = similar
254 .into_iter()
255 .filter(|(p, _)| p != path)
256 .take(3)
257 .collect();
258
259 if relevant.is_empty() {
260 return None;
261 }
262
263 let hints: Vec<String> = relevant
264 .iter()
265 .map(|(p, score)| format!(" {p} ({:.0}% similar)", score * 100.0))
266 .collect();
267
268 Some(format!(
269 "[semantic: {} similar file(s) in cache]\n{}",
270 relevant.len(),
271 hints.join("\n")
272 ))
273}
274
275fn update_semantic_index(path: &str, content: &str) {
276 let project_root = detect_project_root(path);
277 let session_id = format!("{}", std::process::id());
278 let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
279 index.add_file(path, content, &session_id);
280 let _ = index.save(&project_root);
281}
282
283fn detect_project_root(path: &str) -> String {
284 crate::core::protocol::detect_project_root_or_cwd(path)
285}
286
287const AUTO_DELTA_THRESHOLD: f64 = 0.6;
288
289fn handle_full_with_auto_delta(
291 cache: &mut SessionCache,
292 path: &str,
293 file_ref: &str,
294 short: &str,
295 ext: &str,
296) -> String {
297 let Ok(disk_content) = read_file_lossy(path) else {
298 cache.record_cache_hit(path);
299 return if let Some(existing) = cache.get(path) {
300 format!(
301 "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
302 existing.read_count, existing.line_count
303 )
304 } else {
305 format!("[file read failed and no cached version available] {file_ref}={short}")
306 };
307 };
308
309 let old_content = cache
310 .get(path)
311 .map(|e| e.content.clone())
312 .unwrap_or_default();
313 let store_result = cache.store(path, disk_content.clone());
314
315 if store_result.was_hit {
316 return format!(
317 "{file_ref}={short} cached {}t {}L\nFile already in context from previous read. Use fresh=true to re-read if content needed again.",
318 store_result.read_count, store_result.line_count
319 );
320 }
321
322 let diff = compressor::diff_content(&old_content, &disk_content);
323 let diff_tokens = count_tokens(&diff);
324 let full_tokens = store_result.original_tokens;
325
326 if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
327 let savings = protocol::format_savings(full_tokens, diff_tokens);
328 return format!(
329 "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
330 disk_content.lines().count()
331 );
332 }
333
334 format_full_output(
335 file_ref,
336 short,
337 ext,
338 &disk_content,
339 store_result.original_tokens,
340 store_result.line_count,
341 )
342}
343
344fn format_full_output(
345 file_ref: &str,
346 short: &str,
347 ext: &str,
348 content: &str,
349 original_tokens: usize,
350 line_count: usize,
351) -> String {
352 let tokens = original_tokens;
353 let metadata = build_header(file_ref, short, ext, content, line_count, true);
354
355 let mut sym = SymbolMap::new();
356 let idents = symbol_map::extract_identifiers(content, ext);
357 for ident in &idents {
358 sym.register(ident);
359 }
360
361 let sym_beneficial = if sym.len() >= 3 {
362 let sym_table = sym.format_table();
363 let compressed = sym.apply(content);
364 let original_tok = count_tokens(content);
365 let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
366 let net_saving = original_tok.saturating_sub(compressed_tok);
367 original_tok > 0 && net_saving * 100 / original_tok >= 5
368 } else {
369 false
370 };
371
372 if sym_beneficial {
373 let compressed_content = sym.apply(content);
374 let sym_table = sym.format_table();
375 let output = format!("{compressed_content}{sym_table}\n{metadata}");
376 let sent = count_tokens(&output);
377 let savings = protocol::format_savings(tokens, sent);
378 return format!("{output}\n{savings}");
379 }
380
381 let output = format!("{content}\n{metadata}");
382 let sent = count_tokens(&output);
383 let savings = protocol::format_savings(tokens, sent);
384 format!("{output}\n{savings}")
385}
386
387fn build_header(
388 file_ref: &str,
389 short: &str,
390 ext: &str,
391 content: &str,
392 line_count: usize,
393 include_deps: bool,
394) -> String {
395 let mut header = format!("{file_ref}={short} {line_count}L");
396
397 if include_deps {
398 let dep_info = deps::extract_deps(content, ext);
399 if !dep_info.imports.is_empty() {
400 let imports_str: Vec<&str> = dep_info
401 .imports
402 .iter()
403 .take(8)
404 .map(std::string::String::as_str)
405 .collect();
406 header.push_str(&format!("\n deps {}", imports_str.join(",")));
407 }
408 if !dep_info.exports.is_empty() {
409 let exports_str: Vec<&str> = dep_info
410 .exports
411 .iter()
412 .take(8)
413 .map(std::string::String::as_str)
414 .collect();
415 header.push_str(&format!("\n exports {}", exports_str.join(",")));
416 }
417 }
418
419 header
420}
421
422#[allow(clippy::too_many_arguments)]
423fn process_mode(
424 content: &str,
425 mode: &str,
426 file_ref: &str,
427 short: &str,
428 ext: &str,
429 original_tokens: usize,
430 crp_mode: CrpMode,
431 file_path: &str,
432 task: Option<&str>,
433) -> String {
434 let line_count = content.lines().count();
435
436 match mode {
437 "auto" => {
438 let chosen = resolve_auto_mode(file_path, original_tokens, task);
439 process_mode(
440 content,
441 &chosen,
442 file_ref,
443 short,
444 ext,
445 original_tokens,
446 crp_mode,
447 file_path,
448 task,
449 )
450 }
451 "signatures" => {
452 let sigs = signatures::extract_signatures(content, ext);
453 let dep_info = deps::extract_deps(content, ext);
454
455 let mut output = format!("{file_ref}={short} {line_count}L");
456 if !dep_info.imports.is_empty() {
457 let imports_str: Vec<&str> = dep_info
458 .imports
459 .iter()
460 .take(8)
461 .map(std::string::String::as_str)
462 .collect();
463 output.push_str(&format!("\n deps {}", imports_str.join(",")));
464 }
465 for sig in &sigs {
466 output.push('\n');
467 if crp_mode.is_tdd() {
468 output.push_str(&sig.to_tdd());
469 } else {
470 output.push_str(&sig.to_compact());
471 }
472 }
473 let sent = count_tokens(&output);
474 let savings = protocol::format_savings(original_tokens, sent);
475 append_compressed_hint(&format!("{output}\n{savings}"), file_path)
476 }
477 "map" => {
478 if ext == "php" {
479 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
480 {
481 let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
482 let sent = count_tokens(&output);
483 let savings = protocol::format_savings(original_tokens, sent);
484 output.push('\n');
485 output.push_str(&savings);
486 return append_compressed_hint(&output, file_path);
487 }
488 }
489
490 let sigs = signatures::extract_signatures(content, ext);
491 let dep_info = deps::extract_deps(content, ext);
492
493 let mut output = format!("{file_ref}={short} {line_count}L");
494
495 if !dep_info.imports.is_empty() {
496 output.push_str("\n deps: ");
497 output.push_str(&dep_info.imports.join(", "));
498 }
499
500 if !dep_info.exports.is_empty() {
501 output.push_str("\n exports: ");
502 output.push_str(&dep_info.exports.join(", "));
503 }
504
505 let key_sigs: Vec<&signatures::Signature> = sigs
506 .iter()
507 .filter(|s| s.is_exported || s.indent == 0)
508 .collect();
509
510 if !key_sigs.is_empty() {
511 output.push_str("\n API:");
512 for sig in &key_sigs {
513 output.push_str("\n ");
514 if crp_mode.is_tdd() {
515 output.push_str(&sig.to_tdd());
516 } else {
517 output.push_str(&sig.to_compact());
518 }
519 }
520 }
521
522 let sent = count_tokens(&output);
523 let savings = protocol::format_savings(original_tokens, sent);
524 append_compressed_hint(&format!("{output}\n{savings}"), file_path)
525 }
526 "aggressive" => {
527 #[cfg(feature = "tree-sitter")]
528 let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
529 #[cfg(not(feature = "tree-sitter"))]
530 let ast_pruned: Option<String> = None;
531
532 let base = ast_pruned.as_deref().unwrap_or(content);
533
534 let session_intent = crate::core::session::SessionState::load_latest()
535 .and_then(|s| s.active_structured_intent);
536 let raw = if let Some(ref intent) = session_intent {
537 compressor::task_aware_compress(base, Some(ext), intent)
538 } else {
539 compressor::aggressive_compress(base, Some(ext))
540 };
541 let compressed = compressor::safeguard_ratio(content, &raw);
542 let header = build_header(file_ref, short, ext, content, line_count, true);
543
544 let mut sym = SymbolMap::new();
545 let idents = symbol_map::extract_identifiers(&compressed, ext);
546 for ident in &idents {
547 sym.register(ident);
548 }
549
550 let sym_beneficial = if sym.len() >= 3 {
551 let sym_table = sym.format_table();
552 let sym_applied = sym.apply(&compressed);
553 let orig_tok = count_tokens(&compressed);
554 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
555 let net = orig_tok.saturating_sub(comp_tok);
556 orig_tok > 0 && net * 100 / orig_tok >= 5
557 } else {
558 false
559 };
560
561 if sym_beneficial {
562 let sym_output = sym.apply(&compressed);
563 let sym_table = sym.format_table();
564 let sent = count_tokens(&sym_output) + count_tokens(&sym_table);
565 let savings = protocol::format_savings(original_tokens, sent);
566 return append_compressed_hint(
567 &format!("{header}\n{sym_output}{sym_table}\n{savings}"),
568 file_path,
569 );
570 }
571
572 let sent = count_tokens(&compressed);
573 let savings = protocol::format_savings(original_tokens, sent);
574 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path)
575 }
576 "entropy" => {
577 let result = entropy::entropy_compress_adaptive(content, file_path);
578 let avg_h = entropy::analyze_entropy(content).avg_entropy;
579 let header = build_header(file_ref, short, ext, content, line_count, false);
580 let techs = result.techniques.join(", ");
581 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
582 let sent = count_tokens(&output);
583 let savings = protocol::format_savings(original_tokens, sent);
584 let compression_ratio = if original_tokens > 0 {
585 1.0 - (sent as f64 / original_tokens as f64)
586 } else {
587 0.0
588 };
589 crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
590 append_compressed_hint(&format!("{output}\n{savings}"), file_path)
591 }
592 "task" => {
593 let task_str = task.unwrap_or("");
594 if task_str.is_empty() {
595 let header = build_header(file_ref, short, ext, content, line_count, true);
596 return format!("{header}\n{content}\n[task mode: no task set — returned full]");
597 }
598 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
599 if keywords.is_empty() {
600 let header = build_header(file_ref, short, ext, content, line_count, true);
601 return format!(
602 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
603 );
604 }
605 let filtered =
606 crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
607 let filtered_lines = filtered.lines().count();
608 let header = format!(
609 "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
610 );
611 let project_root = detect_project_root(file_path);
612 let graph_ctx = crate::core::graph_context::build_graph_context(
613 file_path,
614 &project_root,
615 Some(crate::core::graph_context::GraphContextOptions::default()),
616 )
617 .map(|c| crate::core::graph_context::format_graph_context(&c))
618 .unwrap_or_default();
619
620 let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
621 let savings = protocol::format_savings(original_tokens, sent);
622 append_compressed_hint(
623 &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
624 file_path,
625 )
626 }
627 "reference" => {
628 let tok = count_tokens(content);
629 let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
630 let sent = count_tokens(&output);
631 let savings = protocol::format_savings(original_tokens, sent);
632 format!("{output}\n{savings}")
633 }
634 mode if mode.starts_with("lines:") => {
635 let range_str = &mode[6..];
636 let extracted = extract_line_range(content, range_str);
637 let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
638 let sent = count_tokens(&extracted);
639 let savings = protocol::format_savings(original_tokens, sent);
640 format!("{header}\n{extracted}\n{savings}")
641 }
642 unknown => {
643 let header = build_header(file_ref, short, ext, content, line_count, true);
644 format!(
645 "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
646 )
647 }
648 }
649}
650
651fn extract_line_range(content: &str, range_str: &str) -> String {
652 let lines: Vec<&str> = content.lines().collect();
653 let total = lines.len();
654 let mut selected = Vec::new();
655
656 for part in range_str.split(',') {
657 let part = part.trim();
658 if let Some((start_s, end_s)) = part.split_once('-') {
659 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
660 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
661 for i in start..=end {
662 if i >= 1 && i <= total {
663 selected.push(format!("{i:>4}| {}", lines[i - 1]));
664 }
665 }
666 } else if let Ok(n) = part.parse::<usize>() {
667 if n >= 1 && n <= total {
668 selected.push(format!("{n:>4}| {}", lines[n - 1]));
669 }
670 }
671 }
672
673 if selected.is_empty() {
674 "No lines matched the range.".to_string()
675 } else {
676 selected.join("\n")
677 }
678}
679
680fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> String {
681 let short = protocol::shorten_path(path);
682 let old_content = cache.get(path).map(|e| e.content.clone());
683
684 let new_content = match read_file_lossy(path) {
685 Ok(c) => c,
686 Err(e) => return format!("ERROR: {e}"),
687 };
688
689 let original_tokens = count_tokens(&new_content);
690
691 let diff_output = if let Some(old) = &old_content {
692 compressor::diff_content(old, &new_content)
693 } else {
694 format!("[first read]\n{new_content}")
695 };
696
697 cache.store(path, new_content);
698
699 let sent = count_tokens(&diff_output);
700 let savings = protocol::format_savings(original_tokens, sent);
701 format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}")
702}
703
704#[cfg(test)]
705mod tests {
706 use super::*;
707 use std::time::Duration;
708
709 #[test]
710 fn test_header_toon_format_no_brackets() {
711 let content = "use std::io;\nfn main() {}\n";
712 let header = build_header("F1", "main.rs", "rs", content, 2, false);
713 assert!(!header.contains('['));
714 assert!(!header.contains(']'));
715 assert!(header.contains("F1=main.rs 2L"));
716 }
717
718 #[test]
719 fn test_header_toon_deps_indented() {
720 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
721 let header = build_header("F1", "main.rs", "rs", content, 3, true);
722 if header.contains("deps") {
723 assert!(
724 header.contains("\n deps "),
725 "deps should use indented TOON format"
726 );
727 assert!(
728 !header.contains("deps:["),
729 "deps should not use bracket format"
730 );
731 }
732 }
733
734 #[test]
735 fn test_header_toon_saves_tokens() {
736 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
737 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
738 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
739 let old_tokens = count_tokens(&old_header);
740 let new_tokens = count_tokens(&new_header);
741 assert!(
742 new_tokens <= old_tokens,
743 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
744 );
745 }
746
747 #[test]
748 fn test_tdd_symbols_are_compact() {
749 let symbols = [
750 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
751 ];
752 for sym in &symbols {
753 let tok = count_tokens(sym);
754 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
755 }
756 }
757
758 #[test]
759 fn test_task_mode_filters_content() {
760 let content = (0..200)
761 .map(|i| {
762 if i % 20 == 0 {
763 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
764 } else {
765 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
766 }
767 })
768 .collect::<Vec<_>>()
769 .join("\n");
770 let full_tokens = count_tokens(&content);
771 let task = Some("fix bug in validate_token");
772 let result = process_mode(
773 &content,
774 "task",
775 "F1",
776 "test.rs",
777 "rs",
778 full_tokens,
779 CrpMode::Off,
780 "test.rs",
781 task,
782 );
783 let result_tokens = count_tokens(&result);
784 assert!(
785 result_tokens < full_tokens,
786 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
787 );
788 assert!(
789 result.contains("task-filtered"),
790 "output should contain task-filtered marker"
791 );
792 }
793
794 #[test]
795 fn test_task_mode_without_task_returns_full() {
796 let content = "fn main() {}\nfn helper() {}\n";
797 let tokens = count_tokens(content);
798 let result = process_mode(
799 content,
800 "task",
801 "F1",
802 "test.rs",
803 "rs",
804 tokens,
805 CrpMode::Off,
806 "test.rs",
807 None,
808 );
809 assert!(
810 result.contains("no task set"),
811 "should indicate no task: {result}"
812 );
813 }
814
815 #[test]
816 fn test_reference_mode_one_line() {
817 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
818 let tokens = count_tokens(content);
819 let result = process_mode(
820 content,
821 "reference",
822 "F1",
823 "test.rs",
824 "rs",
825 tokens,
826 CrpMode::Off,
827 "test.rs",
828 None,
829 );
830 let lines: Vec<&str> = result.lines().collect();
831 assert!(
832 lines.len() <= 3,
833 "reference mode should be very compact, got {} lines",
834 lines.len()
835 );
836 assert!(result.contains("lines"), "should contain line count");
837 assert!(result.contains("tok"), "should contain token count");
838 }
839
840 #[test]
841 fn cached_lines_mode_invalidates_on_mtime_change() {
842 let dir = tempfile::tempdir().unwrap();
843 let path = dir.path().join("file.txt");
844 let p = path.to_string_lossy().to_string();
845
846 std::fs::write(&path, "one\nsecond\n").unwrap();
847 let mut cache = SessionCache::new();
848
849 let (out1, _mode1) =
850 handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
851 let l1: Vec<&str> = out1.lines().collect();
852 let got1 = l1.get(1).copied().unwrap_or_default().trim();
853 let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
854 assert_eq!(got1, "one");
855
856 std::thread::sleep(Duration::from_secs(1));
857 std::fs::write(&path, "two\nsecond\n").unwrap();
858
859 let (out2, _mode2) =
860 handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
861 let l2: Vec<&str> = out2.lines().collect();
862 let got2 = l2.get(1).copied().unwrap_or_default().trim();
863 let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
864 assert_eq!(got2, "two");
865 }
866
867 #[test]
868 #[cfg_attr(tarpaulin, ignore)]
869 fn benchmark_task_conditioned_compression() {
870 let content = generate_benchmark_code(200);
872 let full_tokens = count_tokens(&content);
873 let task = Some("fix authentication in validate_token");
874
875 let full_output = process_mode(
876 &content,
877 "full",
878 "F1",
879 "server.rs",
880 "rs",
881 full_tokens,
882 CrpMode::Off,
883 "server.rs",
884 task,
885 );
886 let task_output = process_mode(
887 &content,
888 "task",
889 "F1",
890 "server.rs",
891 "rs",
892 full_tokens,
893 CrpMode::Off,
894 "server.rs",
895 task,
896 );
897 let sig_output = process_mode(
898 &content,
899 "signatures",
900 "F1",
901 "server.rs",
902 "rs",
903 full_tokens,
904 CrpMode::Off,
905 "server.rs",
906 task,
907 );
908 let ref_output = process_mode(
909 &content,
910 "reference",
911 "F1",
912 "server.rs",
913 "rs",
914 full_tokens,
915 CrpMode::Off,
916 "server.rs",
917 task,
918 );
919
920 let full_tok = count_tokens(&full_output);
921 let task_tok = count_tokens(&task_output);
922 let sig_tok = count_tokens(&sig_output);
923 let ref_tok = count_tokens(&ref_output);
924
925 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
926 eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
927 eprintln!(" full: {full_tok:>6} tokens (baseline)");
928 eprintln!(
929 " task: {task_tok:>6} tokens ({:.0}% savings)",
930 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
931 );
932 eprintln!(
933 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
934 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
935 );
936 eprintln!(
937 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
938 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
939 );
940 eprintln!("================================================\n");
941
942 assert!(task_tok < full_tok, "task mode should save tokens");
943 assert!(sig_tok < full_tok, "signatures should save tokens");
944 assert!(ref_tok < sig_tok, "reference should be most compact");
945 }
946
947 fn generate_benchmark_code(lines: usize) -> String {
948 let mut code = Vec::with_capacity(lines);
949 code.push("use std::collections::HashMap;".to_string());
950 code.push("use crate::core::auth;".to_string());
951 code.push(String::new());
952 code.push("pub struct Server {".to_string());
953 code.push(" config: Config,".to_string());
954 code.push(" cache: HashMap<String, String>,".to_string());
955 code.push("}".to_string());
956 code.push(String::new());
957 code.push("impl Server {".to_string());
958 code.push(
959 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
960 .to_string(),
961 );
962 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
963 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
964 code.push(" return Err(AuthError::Expired);".to_string());
965 code.push(" }".to_string());
966 code.push(" Ok(decoded.claims)".to_string());
967 code.push(" }".to_string());
968 code.push(String::new());
969
970 let remaining = lines.saturating_sub(code.len());
971 for i in 0..remaining {
972 if i % 30 == 0 {
973 code.push(format!(
974 " pub fn handler_{i}(&self, req: Request) -> Response {{"
975 ));
976 } else if i % 30 == 29 {
977 code.push(" }".to_string());
978 } else {
979 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
980 }
981 }
982 code.push("}".to_string());
983 code.join("\n")
984 }
985}