1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
14
15fn append_compressed_hint(output: &str, file_path: &str) -> String {
16 format!("{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\")")
17}
18
19pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
20 let cap = crate::core::limits::max_read_bytes();
21 if let Ok(meta) = std::fs::metadata(path) {
22 if meta.len() > cap as u64 {
23 return Err(std::io::Error::other(format!(
24 "file too large ({} bytes, cap {} via LCTX_MAX_READ_BYTES)",
25 meta.len(),
26 cap
27 )));
28 }
29 }
30 let bytes = std::fs::read(path)?;
31 match String::from_utf8(bytes) {
32 Ok(s) => Ok(s),
33 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
34 }
35}
36
37pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
38 handle_with_options(cache, path, mode, false, crp_mode, None)
39}
40
41pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
42 handle_with_options(cache, path, mode, true, crp_mode, None)
43}
44
45pub fn handle_with_task(
46 cache: &mut SessionCache,
47 path: &str,
48 mode: &str,
49 crp_mode: CrpMode,
50 task: Option<&str>,
51) -> String {
52 handle_with_options(cache, path, mode, false, crp_mode, task)
53}
54
55pub fn handle_with_task_resolved(
56 cache: &mut SessionCache,
57 path: &str,
58 mode: &str,
59 crp_mode: CrpMode,
60 task: Option<&str>,
61) -> (String, String) {
62 handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
63}
64
65pub fn handle_fresh_with_task(
66 cache: &mut SessionCache,
67 path: &str,
68 mode: &str,
69 crp_mode: CrpMode,
70 task: Option<&str>,
71) -> String {
72 handle_with_options(cache, path, mode, true, crp_mode, task)
73}
74
75pub fn handle_fresh_with_task_resolved(
76 cache: &mut SessionCache,
77 path: &str,
78 mode: &str,
79 crp_mode: CrpMode,
80 task: Option<&str>,
81) -> (String, String) {
82 handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
83}
84
85fn handle_with_options(
86 cache: &mut SessionCache,
87 path: &str,
88 mode: &str,
89 fresh: bool,
90 crp_mode: CrpMode,
91 task: Option<&str>,
92) -> String {
93 handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).0
94}
95
96fn handle_with_options_resolved(
97 cache: &mut SessionCache,
98 path: &str,
99 mode: &str,
100 fresh: bool,
101 crp_mode: CrpMode,
102 task: Option<&str>,
103) -> (String, String) {
104 let file_ref = cache.get_file_ref(path);
105 let short = protocol::shorten_path(path);
106 let ext = Path::new(path)
107 .extension()
108 .and_then(|e| e.to_str())
109 .unwrap_or("");
110
111 if fresh {
112 cache.invalidate(path);
113 }
114
115 if mode == "diff" {
116 return (handle_diff(cache, path, &file_ref), "diff".to_string());
117 }
118
119 if let Some(existing) = cache.get(path) {
120 if mode == "full" {
121 return (
122 handle_full_with_auto_delta(cache, path, &file_ref, &short, ext),
123 "full".to_string(),
124 );
125 }
126 let content = existing.content.clone();
127 let original_tokens = existing.original_tokens;
128 let resolved_mode = if mode == "auto" {
129 resolve_auto_mode(path, original_tokens, task)
130 } else {
131 mode.to_string()
132 };
133 let out = process_mode(
134 &content,
135 &resolved_mode,
136 &file_ref,
137 &short,
138 ext,
139 original_tokens,
140 crp_mode,
141 path,
142 task,
143 );
144 return (out, resolved_mode);
145 }
146
147 let content = match read_file_lossy(path) {
148 Ok(c) => c,
149 Err(e) => return (format!("ERROR: {e}"), "error".to_string()),
150 };
151
152 let similar_hint = find_semantic_similar(path, &content);
153
154 let store_result = cache.store(path, content.clone());
155
156 update_semantic_index(path, &content);
157
158 if mode == "full" {
159 let mut output = format_full_output(
160 &file_ref,
161 &short,
162 ext,
163 &content,
164 store_result.original_tokens,
165 store_result.line_count,
166 );
167 if let Some(hint) = similar_hint {
168 output.push_str(&format!("\n{hint}"));
169 }
170 return (output, "full".to_string());
171 }
172
173 let resolved_mode = if mode == "auto" {
174 resolve_auto_mode(path, store_result.original_tokens, task)
175 } else {
176 mode.to_string()
177 };
178
179 let mut output = process_mode(
180 &content,
181 &resolved_mode,
182 &file_ref,
183 &short,
184 ext,
185 store_result.original_tokens,
186 crp_mode,
187 path,
188 task,
189 );
190 if let Some(hint) = similar_hint {
191 output.push_str(&format!("\n{hint}"));
192 }
193 (output, resolved_mode)
194}
195
196fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
197 let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
198 let predictor = crate::core::mode_predictor::ModePredictor::new();
199 let mut predicted = predictor
200 .predict_best_mode(&sig)
201 .unwrap_or_else(|| "full".to_string());
202 if predicted == "auto" {
203 predicted = "full".to_string();
204 }
205
206 if let Some(project_root) =
207 crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
208 {
209 let ext = std::path::Path::new(file_path)
210 .extension()
211 .and_then(|e| e.to_str())
212 .unwrap_or("");
213 let bucket = match original_tokens {
214 0..=2000 => "sm",
215 2001..=10000 => "md",
216 10001..=50000 => "lg",
217 _ => "xl",
218 };
219 let bandit_key = format!("{ext}_{bucket}");
220 let mut store = crate::core::bandit::BanditStore::load(&project_root);
221 let bandit = store.get_or_create(&bandit_key);
222 let arm = bandit.select_arm();
223 if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
224 predicted = "aggressive".to_string();
225 }
226 }
227
228 if let Some(session) = crate::core::session::SessionState::load_latest() {
229 if let Some(task_type) = session.active_task_type() {
230 predicted = refine_mode_by_task_type(&predicted, task_type, original_tokens);
231 }
232 }
233
234 let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
235 policy.choose_auto_mode(task, &predicted)
236}
237
238fn refine_mode_by_task_type(
239 current: &str,
240 task_type: crate::core::intent_engine::TaskType,
241 token_count: usize,
242) -> String {
243 use crate::core::intent_engine::TaskType;
244
245 match task_type {
246 TaskType::FixBug | TaskType::Debug => {
247 if token_count > 5000 && current == "full" {
248 return "task".to_string();
249 }
250 current.to_string()
251 }
252 TaskType::Refactor | TaskType::Review => {
253 if token_count > 3000 && current == "full" {
254 return "signatures".to_string();
255 }
256 current.to_string()
257 }
258 TaskType::Generate => {
259 if token_count > 8000 && current == "full" {
260 return "signatures".to_string();
261 }
262 current.to_string()
263 }
264 TaskType::Explore => {
265 if token_count > 5000 && current == "full" {
266 return "map".to_string();
267 }
268 current.to_string()
269 }
270 TaskType::Test => {
271 if token_count > 10000 && current == "full" {
272 return "aggressive".to_string();
273 }
274 current.to_string()
275 }
276 TaskType::Config | TaskType::Deploy => current.to_string(),
277 }
278}
279
280fn find_semantic_similar(path: &str, content: &str) -> Option<String> {
281 let project_root = detect_project_root(path);
282 let index = crate::core::semantic_cache::SemanticCacheIndex::load(&project_root)?;
283
284 let similar = index.find_similar(content, 0.7);
285 let relevant: Vec<_> = similar
286 .into_iter()
287 .filter(|(p, _)| p != path)
288 .take(3)
289 .collect();
290
291 if relevant.is_empty() {
292 return None;
293 }
294
295 let hints: Vec<String> = relevant
296 .iter()
297 .map(|(p, score)| format!(" {p} ({:.0}% similar)", score * 100.0))
298 .collect();
299
300 Some(format!(
301 "[semantic: {} similar file(s) in cache]\n{}",
302 relevant.len(),
303 hints.join("\n")
304 ))
305}
306
307fn update_semantic_index(path: &str, content: &str) {
308 let project_root = detect_project_root(path);
309 let session_id = format!("{}", std::process::id());
310 let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
311 index.add_file(path, content, &session_id);
312 let _ = index.save(&project_root);
313}
314
315fn detect_project_root(path: &str) -> String {
316 crate::core::protocol::detect_project_root_or_cwd(path)
317}
318
319const AUTO_DELTA_THRESHOLD: f64 = 0.6;
320
321fn handle_full_with_auto_delta(
323 cache: &mut SessionCache,
324 path: &str,
325 file_ref: &str,
326 short: &str,
327 ext: &str,
328) -> String {
329 let disk_content = match read_file_lossy(path) {
330 Ok(c) => c,
331 Err(_) => {
332 cache.record_cache_hit(path);
333 let existing = cache.get(path).unwrap();
334 return format!(
335 "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
336 existing.read_count, existing.line_count
337 );
338 }
339 };
340
341 let old_content = cache.get(path).unwrap().content.clone();
342 let store_result = cache.store(path, disk_content.clone());
343
344 if store_result.was_hit {
345 return format!(
346 "{file_ref}={short} cached {}t {}L\nFile already in context from previous read. Use fresh=true to re-read if content needed again.",
347 store_result.read_count, store_result.line_count
348 );
349 }
350
351 let diff = compressor::diff_content(&old_content, &disk_content);
352 let diff_tokens = count_tokens(&diff);
353 let full_tokens = store_result.original_tokens;
354
355 if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
356 let savings = protocol::format_savings(full_tokens, diff_tokens);
357 return format!(
358 "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
359 disk_content.lines().count()
360 );
361 }
362
363 format_full_output(
364 file_ref,
365 short,
366 ext,
367 &disk_content,
368 store_result.original_tokens,
369 store_result.line_count,
370 )
371}
372
373fn format_full_output(
374 file_ref: &str,
375 short: &str,
376 ext: &str,
377 content: &str,
378 original_tokens: usize,
379 line_count: usize,
380) -> String {
381 let tokens = original_tokens;
382 let metadata = build_header(file_ref, short, ext, content, line_count, true);
383
384 let mut sym = SymbolMap::new();
385 let idents = symbol_map::extract_identifiers(content, ext);
386 for ident in &idents {
387 sym.register(ident);
388 }
389
390 let sym_beneficial = if sym.len() >= 3 {
391 let sym_table = sym.format_table();
392 let compressed = sym.apply(content);
393 let original_tok = count_tokens(content);
394 let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
395 let net_saving = original_tok.saturating_sub(compressed_tok);
396 original_tok > 0 && net_saving * 100 / original_tok >= 5
397 } else {
398 false
399 };
400
401 if sym_beneficial {
402 let compressed_content = sym.apply(content);
403 let sym_table = sym.format_table();
404 let output = format!("{compressed_content}{sym_table}\n{metadata}");
405 let sent = count_tokens(&output);
406 let savings = protocol::format_savings(tokens, sent);
407 return format!("{output}\n{savings}");
408 }
409
410 let output = format!("{content}\n{metadata}");
411 let sent = count_tokens(&output);
412 let savings = protocol::format_savings(tokens, sent);
413 format!("{output}\n{savings}")
414}
415
416fn build_header(
417 file_ref: &str,
418 short: &str,
419 ext: &str,
420 content: &str,
421 line_count: usize,
422 include_deps: bool,
423) -> String {
424 let mut header = format!("{file_ref}={short} {line_count}L");
425
426 if include_deps {
427 let dep_info = deps::extract_deps(content, ext);
428 if !dep_info.imports.is_empty() {
429 let imports_str: Vec<&str> = dep_info
430 .imports
431 .iter()
432 .take(8)
433 .map(|s| s.as_str())
434 .collect();
435 header.push_str(&format!("\n deps {}", imports_str.join(",")));
436 }
437 if !dep_info.exports.is_empty() {
438 let exports_str: Vec<&str> = dep_info
439 .exports
440 .iter()
441 .take(8)
442 .map(|s| s.as_str())
443 .collect();
444 header.push_str(&format!("\n exports {}", exports_str.join(",")));
445 }
446 }
447
448 header
449}
450
451#[allow(clippy::too_many_arguments)]
452fn process_mode(
453 content: &str,
454 mode: &str,
455 file_ref: &str,
456 short: &str,
457 ext: &str,
458 original_tokens: usize,
459 crp_mode: CrpMode,
460 file_path: &str,
461 task: Option<&str>,
462) -> String {
463 let line_count = content.lines().count();
464
465 match mode {
466 "auto" => {
467 let chosen = resolve_auto_mode(file_path, original_tokens, task);
468 process_mode(
469 content,
470 &chosen,
471 file_ref,
472 short,
473 ext,
474 original_tokens,
475 crp_mode,
476 file_path,
477 task,
478 )
479 }
480 "signatures" => {
481 let sigs = signatures::extract_signatures(content, ext);
482 let dep_info = deps::extract_deps(content, ext);
483
484 let mut output = format!("{file_ref}={short} {line_count}L");
485 if !dep_info.imports.is_empty() {
486 let imports_str: Vec<&str> = dep_info
487 .imports
488 .iter()
489 .take(8)
490 .map(|s| s.as_str())
491 .collect();
492 output.push_str(&format!("\n deps {}", imports_str.join(",")));
493 }
494 for sig in &sigs {
495 output.push('\n');
496 if crp_mode.is_tdd() {
497 output.push_str(&sig.to_tdd());
498 } else {
499 output.push_str(&sig.to_compact());
500 }
501 }
502 let sent = count_tokens(&output);
503 let savings = protocol::format_savings(original_tokens, sent);
504 append_compressed_hint(&format!("{output}\n{savings}"), file_path)
505 }
506 "map" => {
507 if ext == "php" {
508 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
509 {
510 let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
511 let sent = count_tokens(&output);
512 let savings = protocol::format_savings(original_tokens, sent);
513 output.push('\n');
514 output.push_str(&savings);
515 return append_compressed_hint(&output, file_path);
516 }
517 }
518
519 let sigs = signatures::extract_signatures(content, ext);
520 let dep_info = deps::extract_deps(content, ext);
521
522 let mut output = format!("{file_ref}={short} {line_count}L");
523
524 if !dep_info.imports.is_empty() {
525 output.push_str("\n deps: ");
526 output.push_str(&dep_info.imports.join(", "));
527 }
528
529 if !dep_info.exports.is_empty() {
530 output.push_str("\n exports: ");
531 output.push_str(&dep_info.exports.join(", "));
532 }
533
534 let key_sigs: Vec<&signatures::Signature> = sigs
535 .iter()
536 .filter(|s| s.is_exported || s.indent == 0)
537 .collect();
538
539 if !key_sigs.is_empty() {
540 output.push_str("\n API:");
541 for sig in &key_sigs {
542 output.push_str("\n ");
543 if crp_mode.is_tdd() {
544 output.push_str(&sig.to_tdd());
545 } else {
546 output.push_str(&sig.to_compact());
547 }
548 }
549 }
550
551 let sent = count_tokens(&output);
552 let savings = protocol::format_savings(original_tokens, sent);
553 append_compressed_hint(&format!("{output}\n{savings}"), file_path)
554 }
555 "aggressive" => {
556 #[cfg(feature = "tree-sitter")]
557 let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
558 #[cfg(not(feature = "tree-sitter"))]
559 let ast_pruned: Option<String> = None;
560
561 let base = ast_pruned.as_deref().unwrap_or(content);
562
563 let session_intent = crate::core::session::SessionState::load_latest()
564 .and_then(|s| s.active_structured_intent);
565 let raw = if let Some(ref intent) = session_intent {
566 compressor::task_aware_compress(base, Some(ext), intent)
567 } else {
568 compressor::aggressive_compress(base, Some(ext))
569 };
570 let compressed = compressor::safeguard_ratio(content, &raw);
571 let header = build_header(file_ref, short, ext, content, line_count, true);
572
573 let mut sym = SymbolMap::new();
574 let idents = symbol_map::extract_identifiers(&compressed, ext);
575 for ident in &idents {
576 sym.register(ident);
577 }
578
579 let sym_beneficial = if sym.len() >= 3 {
580 let sym_table = sym.format_table();
581 let sym_applied = sym.apply(&compressed);
582 let orig_tok = count_tokens(&compressed);
583 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
584 let net = orig_tok.saturating_sub(comp_tok);
585 orig_tok > 0 && net * 100 / orig_tok >= 5
586 } else {
587 false
588 };
589
590 if sym_beneficial {
591 let sym_output = sym.apply(&compressed);
592 let sym_table = sym.format_table();
593 let sent = count_tokens(&sym_output) + count_tokens(&sym_table);
594 let savings = protocol::format_savings(original_tokens, sent);
595 return append_compressed_hint(
596 &format!("{header}\n{sym_output}{sym_table}\n{savings}"),
597 file_path,
598 );
599 }
600
601 let sent = count_tokens(&compressed);
602 let savings = protocol::format_savings(original_tokens, sent);
603 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path)
604 }
605 "entropy" => {
606 let result = entropy::entropy_compress_adaptive(content, file_path);
607 let avg_h = entropy::analyze_entropy(content).avg_entropy;
608 let header = build_header(file_ref, short, ext, content, line_count, false);
609 let techs = result.techniques.join(", ");
610 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
611 let sent = count_tokens(&output);
612 let savings = protocol::format_savings(original_tokens, sent);
613 let compression_ratio = if original_tokens > 0 {
614 1.0 - (sent as f64 / original_tokens as f64)
615 } else {
616 0.0
617 };
618 crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
619 append_compressed_hint(&format!("{output}\n{savings}"), file_path)
620 }
621 "task" => {
622 let task_str = task.unwrap_or("");
623 if task_str.is_empty() {
624 let header = build_header(file_ref, short, ext, content, line_count, true);
625 return format!("{header}\n{content}\n[task mode: no task set — returned full]");
626 }
627 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
628 if keywords.is_empty() {
629 let header = build_header(file_ref, short, ext, content, line_count, true);
630 return format!(
631 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
632 );
633 }
634 let classified_type = crate::core::intent_engine::classify(task_str).task_type;
635 let filtered = crate::core::task_relevance::information_bottleneck_filter_typed(
636 content,
637 &keywords,
638 0.3,
639 Some(classified_type),
640 );
641 let filtered_lines = filtered.lines().count();
642 let header = format!(
643 "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
644 );
645 let project_root = detect_project_root(file_path);
646 let graph_ctx = crate::core::graph_context::build_graph_context(
647 file_path,
648 &project_root,
649 Some(crate::core::graph_context::GraphContextOptions::default()),
650 )
651 .map(|c| crate::core::graph_context::format_graph_context(&c))
652 .unwrap_or_default();
653
654 let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
655 let savings = protocol::format_savings(original_tokens, sent);
656 append_compressed_hint(
657 &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
658 file_path,
659 )
660 }
661 "reference" => {
662 let tok = count_tokens(content);
663 let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
664 let sent = count_tokens(&output);
665 let savings = protocol::format_savings(original_tokens, sent);
666 format!("{output}\n{savings}")
667 }
668 mode if mode.starts_with("lines:") => {
669 let range_str = &mode[6..];
670 let extracted = extract_line_range(content, range_str);
671 let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
672 let sent = count_tokens(&extracted);
673 let savings = protocol::format_savings(original_tokens, sent);
674 format!("{header}\n{extracted}\n{savings}")
675 }
676 unknown => {
677 let header = build_header(file_ref, short, ext, content, line_count, true);
678 format!(
679 "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
680 )
681 }
682 }
683}
684
685fn extract_line_range(content: &str, range_str: &str) -> String {
686 let lines: Vec<&str> = content.lines().collect();
687 let total = lines.len();
688 let mut selected = Vec::new();
689
690 for part in range_str.split(',') {
691 let part = part.trim();
692 if let Some((start_s, end_s)) = part.split_once('-') {
693 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
694 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
695 for i in start..=end {
696 if i >= 1 && i <= total {
697 selected.push(format!("{i:>4}| {}", lines[i - 1]));
698 }
699 }
700 } else if let Ok(n) = part.parse::<usize>() {
701 if n >= 1 && n <= total {
702 selected.push(format!("{n:>4}| {}", lines[n - 1]));
703 }
704 }
705 }
706
707 if selected.is_empty() {
708 "No lines matched the range.".to_string()
709 } else {
710 selected.join("\n")
711 }
712}
713
714fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> String {
715 let short = protocol::shorten_path(path);
716 let old_content = cache.get(path).map(|e| e.content.clone());
717
718 let new_content = match read_file_lossy(path) {
719 Ok(c) => c,
720 Err(e) => return format!("ERROR: {e}"),
721 };
722
723 let original_tokens = count_tokens(&new_content);
724
725 let diff_output = if let Some(old) = &old_content {
726 compressor::diff_content(old, &new_content)
727 } else {
728 format!("[first read]\n{new_content}")
729 };
730
731 cache.store(path, new_content);
732
733 let sent = count_tokens(&diff_output);
734 let savings = protocol::format_savings(original_tokens, sent);
735 format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}")
736}
737
738#[cfg(test)]
739mod tests {
740 use super::*;
741
742 #[test]
743 fn test_header_toon_format_no_brackets() {
744 let content = "use std::io;\nfn main() {}\n";
745 let header = build_header("F1", "main.rs", "rs", content, 2, false);
746 assert!(!header.contains('['));
747 assert!(!header.contains(']'));
748 assert!(header.contains("F1=main.rs 2L"));
749 }
750
751 #[test]
752 fn test_header_toon_deps_indented() {
753 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
754 let header = build_header("F1", "main.rs", "rs", content, 3, true);
755 if header.contains("deps") {
756 assert!(
757 header.contains("\n deps "),
758 "deps should use indented TOON format"
759 );
760 assert!(
761 !header.contains("deps:["),
762 "deps should not use bracket format"
763 );
764 }
765 }
766
767 #[test]
768 fn test_header_toon_saves_tokens() {
769 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
770 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
771 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
772 let old_tokens = count_tokens(&old_header);
773 let new_tokens = count_tokens(&new_header);
774 assert!(
775 new_tokens <= old_tokens,
776 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
777 );
778 }
779
780 #[test]
781 fn test_tdd_symbols_are_compact() {
782 let symbols = [
783 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
784 ];
785 for sym in &symbols {
786 let tok = count_tokens(sym);
787 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
788 }
789 }
790
791 #[test]
792 fn test_task_mode_filters_content() {
793 let content = (0..200)
794 .map(|i| {
795 if i % 20 == 0 {
796 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
797 } else {
798 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
799 }
800 })
801 .collect::<Vec<_>>()
802 .join("\n");
803 let full_tokens = count_tokens(&content);
804 let task = Some("fix bug in validate_token");
805 let result = process_mode(
806 &content,
807 "task",
808 "F1",
809 "test.rs",
810 "rs",
811 full_tokens,
812 CrpMode::Off,
813 "test.rs",
814 task,
815 );
816 let result_tokens = count_tokens(&result);
817 assert!(
818 result_tokens < full_tokens,
819 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
820 );
821 assert!(
822 result.contains("task-filtered"),
823 "output should contain task-filtered marker"
824 );
825 }
826
827 #[test]
828 fn test_task_mode_without_task_returns_full() {
829 let content = "fn main() {}\nfn helper() {}\n";
830 let tokens = count_tokens(content);
831 let result = process_mode(
832 content,
833 "task",
834 "F1",
835 "test.rs",
836 "rs",
837 tokens,
838 CrpMode::Off,
839 "test.rs",
840 None,
841 );
842 assert!(
843 result.contains("no task set"),
844 "should indicate no task: {result}"
845 );
846 }
847
848 #[test]
849 fn test_reference_mode_one_line() {
850 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
851 let tokens = count_tokens(content);
852 let result = process_mode(
853 content,
854 "reference",
855 "F1",
856 "test.rs",
857 "rs",
858 tokens,
859 CrpMode::Off,
860 "test.rs",
861 None,
862 );
863 let lines: Vec<&str> = result.lines().collect();
864 assert!(
865 lines.len() <= 3,
866 "reference mode should be very compact, got {} lines",
867 lines.len()
868 );
869 assert!(result.contains("lines"), "should contain line count");
870 assert!(result.contains("tok"), "should contain token count");
871 }
872
873 #[test]
874 fn benchmark_task_conditioned_compression() {
875 let content = generate_benchmark_code(500);
876 let full_tokens = count_tokens(&content);
877 let task = Some("fix authentication in validate_token");
878
879 let full_output = process_mode(
880 &content,
881 "full",
882 "F1",
883 "server.rs",
884 "rs",
885 full_tokens,
886 CrpMode::Off,
887 "server.rs",
888 task,
889 );
890 let task_output = process_mode(
891 &content,
892 "task",
893 "F1",
894 "server.rs",
895 "rs",
896 full_tokens,
897 CrpMode::Off,
898 "server.rs",
899 task,
900 );
901 let sig_output = process_mode(
902 &content,
903 "signatures",
904 "F1",
905 "server.rs",
906 "rs",
907 full_tokens,
908 CrpMode::Off,
909 "server.rs",
910 task,
911 );
912 let ref_output = process_mode(
913 &content,
914 "reference",
915 "F1",
916 "server.rs",
917 "rs",
918 full_tokens,
919 CrpMode::Off,
920 "server.rs",
921 task,
922 );
923
924 let full_tok = count_tokens(&full_output);
925 let task_tok = count_tokens(&task_output);
926 let sig_tok = count_tokens(&sig_output);
927 let ref_tok = count_tokens(&ref_output);
928
929 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
930 eprintln!("Source: 500-line Rust file, task='fix authentication in validate_token'");
931 eprintln!(" full: {full_tok:>6} tokens (baseline)");
932 eprintln!(
933 " task: {task_tok:>6} tokens ({:.0}% savings)",
934 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
935 );
936 eprintln!(
937 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
938 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
939 );
940 eprintln!(
941 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
942 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
943 );
944 eprintln!("================================================\n");
945
946 assert!(task_tok < full_tok, "task mode should save tokens");
947 assert!(sig_tok < full_tok, "signatures should save tokens");
948 assert!(ref_tok < sig_tok, "reference should be most compact");
949 }
950
951 fn generate_benchmark_code(lines: usize) -> String {
952 let mut code = Vec::with_capacity(lines);
953 code.push("use std::collections::HashMap;".to_string());
954 code.push("use crate::core::auth;".to_string());
955 code.push(String::new());
956 code.push("pub struct Server {".to_string());
957 code.push(" config: Config,".to_string());
958 code.push(" cache: HashMap<String, String>,".to_string());
959 code.push("}".to_string());
960 code.push(String::new());
961 code.push("impl Server {".to_string());
962 code.push(
963 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
964 .to_string(),
965 );
966 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
967 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
968 code.push(" return Err(AuthError::Expired);".to_string());
969 code.push(" }".to_string());
970 code.push(" Ok(decoded.claims)".to_string());
971 code.push(" }".to_string());
972 code.push(String::new());
973
974 let remaining = lines.saturating_sub(code.len());
975 for i in 0..remaining {
976 if i % 30 == 0 {
977 code.push(format!(
978 " pub fn handler_{i}(&self, req: Request) -> Response {{"
979 ));
980 } else if i % 30 == 29 {
981 code.push(" }".to_string());
982 } else {
983 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
984 }
985 }
986 code.push("}".to_string());
987 code.join("\n")
988 }
989}