1use std::collections::HashMap;
6use std::ops::Range;
7use std::path::Path;
8
9use rusqlite::{params, Connection};
10use serde::{Deserialize, Serialize};
11
12use crate::ast_parser::AstParser;
13use crate::error::{Result, SqzError};
14
15#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
19#[serde(rename_all = "lowercase")]
20pub enum FileReadMode {
21 Full,
23 Map,
25 Signatures,
27 Diff,
29 Aggressive,
31 Entropy,
33 Task,
35 Lines(Range<usize>),
37}
38
39#[derive(Debug, Clone)]
43pub struct ReadResult {
44 pub content: String,
45 pub mode: String,
46 pub tokens_original: u32,
47 pub tokens_result: u32,
48}
49
50#[derive(Debug, Clone)]
54pub struct BlockEntropy {
55 pub start_line: usize,
56 pub end_line: usize,
57 pub entropy: f64,
58 pub text: String,
59}
60
61fn shannon_entropy(text: &str) -> f64 {
63 if text.is_empty() {
64 return 0.0;
65 }
66 let mut freq: HashMap<char, usize> = HashMap::new();
67 let total = text.len() as f64;
68 for ch in text.chars() {
69 *freq.entry(ch).or_insert(0) += 1;
70 }
71 let mut entropy = 0.0;
72 for &count in freq.values() {
73 let p = count as f64 / total;
74 if p > 0.0 {
75 entropy -= p * p.log2();
76 }
77 }
78 entropy
79}
80
81fn compute_block_entropies(source: &str) -> Vec<BlockEntropy> {
84 let lines: Vec<&str> = source.lines().collect();
85 let mut blocks = Vec::new();
86 let mut block_start = 0;
87 let mut current_block = String::new();
88
89 for (i, line) in lines.iter().enumerate() {
90 if line.trim().is_empty() {
91 if !current_block.trim().is_empty() {
92 blocks.push(BlockEntropy {
93 start_line: block_start,
94 end_line: i,
95 entropy: shannon_entropy(¤t_block),
96 text: current_block.clone(),
97 });
98 }
99 current_block.clear();
100 block_start = i + 1;
101 } else {
102 if current_block.is_empty() {
103 block_start = i;
104 }
105 if !current_block.is_empty() {
106 current_block.push('\n');
107 }
108 current_block.push_str(line);
109 }
110 }
111 if !current_block.trim().is_empty() {
113 blocks.push(BlockEntropy {
114 start_line: block_start,
115 end_line: lines.len(),
116 entropy: shannon_entropy(¤t_block),
117 text: current_block,
118 });
119 }
120 blocks
121}
122
123fn filter_high_entropy(blocks: &[BlockEntropy], percentile: f64) -> Vec<&BlockEntropy> {
125 if blocks.is_empty() {
126 return Vec::new();
127 }
128 let mut entropies: Vec<f64> = blocks.iter().map(|b| b.entropy).collect();
129 entropies.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
130 let idx = ((percentile / 100.0) * (entropies.len() as f64 - 1.0)).round() as usize;
131 let threshold = entropies[idx.min(entropies.len() - 1)];
132 blocks.iter().filter(|b| b.entropy >= threshold).collect()
133}
134
135fn fts5_task_filter(source: &str, intent: &str) -> Result<String> {
140 let chunks = chunk_by_blocks(source);
141 if chunks.is_empty() {
142 return Ok(String::new());
143 }
144
145 let conn = Connection::open_in_memory()
146 .map_err(|e| SqzError::Other(format!("FTS5 in-memory open failed: {e}")))?;
147
148 conn.execute_batch(
149 r#"
150 CREATE VIRTUAL TABLE IF NOT EXISTS file_fts USING fts5(
151 chunk_id,
152 body,
153 tokenize='porter ascii'
154 );
155 "#,
156 )
157 .map_err(|e| SqzError::Other(format!("FTS5 schema creation failed: {e}")))?;
158
159 for (i, chunk) in chunks.iter().enumerate() {
160 conn.execute(
161 "INSERT INTO file_fts(chunk_id, body) VALUES (?1, ?2)",
162 params![i.to_string(), chunk],
163 )
164 .map_err(|e| SqzError::Other(format!("FTS5 insert failed: {e}")))?;
165 }
166
167 let sanitized: String = intent
169 .chars()
170 .map(|c| if c.is_alphanumeric() || c.is_whitespace() { c } else { ' ' })
171 .collect();
172 let terms: Vec<&str> = sanitized.split_whitespace().collect();
173 if terms.is_empty() {
174 return Ok(source.to_string());
176 }
177
178 let fts_query = terms.join(" OR ");
179
180 let mut stmt = conn
181 .prepare(
182 r#"SELECT body FROM file_fts
183 WHERE file_fts MATCH ?1
184 ORDER BY rank
185 LIMIT 20"#,
186 )
187 .map_err(|e| SqzError::Other(format!("FTS5 query prepare failed: {e}")))?;
188
189 let rows = stmt
190 .query_map(params![fts_query], |row| row.get::<_, String>(0))
191 .map_err(|e| SqzError::Other(format!("FTS5 query failed: {e}")))?;
192
193 let mut results = Vec::new();
194 for row in rows {
195 results.push(row.map_err(|e| SqzError::Other(format!("FTS5 row read failed: {e}")))?);
196 }
197
198 if results.is_empty() {
199 return Ok(source.to_string());
201 }
202
203 Ok(results.join("\n\n"))
204}
205
206fn chunk_by_blocks(text: &str) -> Vec<String> {
208 const MAX_CHUNK_BYTES: usize = 512;
209 let paragraphs: Vec<&str> = text.split("\n\n").collect();
210 let mut chunks = Vec::new();
211
212 for para in paragraphs {
213 let trimmed = para.trim();
214 if trimmed.is_empty() {
215 continue;
216 }
217 if trimmed.len() <= MAX_CHUNK_BYTES {
218 chunks.push(trimmed.to_string());
219 } else {
220 let mut current = String::new();
221 for line in trimmed.lines() {
222 if !current.is_empty() && current.len() + line.len() + 1 > MAX_CHUNK_BYTES {
223 chunks.push(std::mem::take(&mut current));
224 }
225 if !current.is_empty() {
226 current.push('\n');
227 }
228 current.push_str(line);
229 }
230 if !current.is_empty() {
231 chunks.push(current);
232 }
233 }
234 }
235
236 if chunks.is_empty() && !text.trim().is_empty() {
237 chunks.push(text.trim().to_string());
238 }
239 chunks
240}
241
242fn approx_tokens(s: &str) -> u32 {
245 ((s.len() as f64) / 4.0).ceil() as u32
246}
247
248pub struct FileReader {
253 ast_parser: AstParser,
254 entropy_percentile: f64,
255 context_lines: usize,
256}
257
258impl FileReader {
259 pub fn new() -> Self {
264 Self {
265 ast_parser: AstParser::new(),
266 entropy_percentile: 60.0,
267 context_lines: 3,
268 }
269 }
270
271 pub fn with_config(entropy_percentile: f64, context_lines: usize) -> Self {
273 Self {
274 ast_parser: AstParser::new(),
275 entropy_percentile,
276 context_lines,
277 }
278 }
279
280 pub fn read(
288 &self,
289 path: &Path,
290 source: &str,
291 mode: &FileReadMode,
292 intent: Option<&str>,
293 cached_content: Option<&str>,
294 ) -> Result<ReadResult> {
295 let tokens_original = approx_tokens(source);
296
297 match mode {
298 FileReadMode::Full => self.read_full(source, tokens_original),
299 FileReadMode::Map => self.read_map(path, source, tokens_original),
300 FileReadMode::Signatures => self.read_signatures(path, source, tokens_original),
301 FileReadMode::Diff => self.read_diff(source, cached_content, tokens_original),
302 FileReadMode::Aggressive => self.read_aggressive(path, source, tokens_original),
303 FileReadMode::Entropy => self.read_entropy(source, tokens_original),
304 FileReadMode::Task => self.read_task(source, intent, tokens_original),
305 FileReadMode::Lines(range) => {
306 self.read_lines(source, range.clone(), tokens_original)
307 }
308 }
309 }
310
311 fn read_full(&self, source: &str, tokens_original: u32) -> Result<ReadResult> {
313 Ok(ReadResult {
314 content: source.to_string(),
315 mode: "full".to_string(),
316 tokens_original,
317 tokens_result: tokens_original,
318 })
319 }
320
321 fn read_map(&self, path: &Path, source: &str, tokens_original: u32) -> Result<ReadResult> {
325 let lang = detect_language(path);
326 let mut parts: Vec<String> = Vec::new();
327
328 let line_count = source.lines().count();
330 parts.push(format!(
331 "# {} ({} lines)",
332 path.file_name()
333 .map(|n| n.to_string_lossy().to_string())
334 .unwrap_or_default(),
335 line_count
336 ));
337
338 if let Some(lang) = &lang {
339 if self.ast_parser.is_supported(lang) {
340 if let Ok(summary) = self.ast_parser.extract_signatures(source, lang) {
341 if !summary.imports.is_empty() {
343 let count = summary.imports.len();
344 parts.push(format!("imports: {count}"));
345 }
346 if !summary.types.is_empty() {
348 let mut names: Vec<&str> =
349 summary.types.iter().map(|t| t.name.as_str()).collect();
350 names.sort_unstable();
351 names.dedup();
352 parts.push(format!("types({}): {}", names.len(), names.join(", ")));
353 }
354 if !summary.classes.is_empty() {
356 let mut names: Vec<&str> =
357 summary.classes.iter().map(|c| c.name.as_str()).collect();
358 names.sort_unstable();
359 names.dedup();
360 parts.push(format!("structs({}): {}", names.len(), names.join(", ")));
361 }
362 if !summary.functions.is_empty() {
364 let mut names: Vec<&str> =
365 summary.functions.iter().map(|f| f.name.as_str()).collect();
366 names.sort_unstable();
367 names.dedup();
368 parts.push(format!("fns({}): {}", names.len(), names.join(", ")));
369 }
370 }
371 }
372 }
373
374 const MAP_TOKEN_BUDGET: u32 = 50;
376 loop {
377 let content = parts.join("\n");
378 if approx_tokens(&content) <= MAP_TOKEN_BUDGET || parts.len() <= 1 {
379 break;
380 }
381 parts.pop();
383 }
384
385 if parts.len() <= 1 {
387 let mut section_count = 0u32;
389 for line in source.lines() {
390 let trimmed = line.trim();
391 if trimmed.starts_with("fn ")
392 || trimmed.starts_with("pub fn ")
393 || trimmed.starts_with("def ")
394 || trimmed.starts_with("class ")
395 || trimmed.starts_with("function ")
396 || trimmed.starts_with("struct ")
397 || trimmed.starts_with("impl ")
398 || trimmed.starts_with("trait ")
399 {
400 section_count += 1;
401 }
402 }
403 if section_count > 0 {
404 parts.push(format!("sections: {section_count}"));
405 }
406 }
407
408 let content = parts.join("\n");
409 let tokens_result = approx_tokens(&content);
410
411 Ok(ReadResult {
412 content,
413 mode: "map".to_string(),
414 tokens_original,
415 tokens_result,
416 })
417 }
418
419 fn read_signatures(
421 &self,
422 path: &Path,
423 source: &str,
424 tokens_original: u32,
425 ) -> Result<ReadResult> {
426 let lang = detect_language(path);
427 if let Some(lang) = &lang {
428 if self.ast_parser.is_supported(lang) {
429 let summary = self.ast_parser.extract_signatures(source, lang)?;
430 let content = summary.to_text();
431 let tokens_result = approx_tokens(&content);
432 return Ok(ReadResult {
433 content,
434 mode: "signatures".to_string(),
435 tokens_original,
436 tokens_result,
437 });
438 }
439 }
440 Ok(ReadResult {
442 content: source.to_string(),
443 mode: "signatures".to_string(),
444 tokens_original,
445 tokens_result: tokens_original,
446 })
447 }
448
449 fn read_diff(
452 &self,
453 source: &str,
454 cached_content: Option<&str>,
455 tokens_original: u32,
456 ) -> Result<ReadResult> {
457 let cached = match cached_content {
458 Some(c) => c,
459 None => {
460 return Ok(ReadResult {
462 content: source.to_string(),
463 mode: "diff".to_string(),
464 tokens_original,
465 tokens_result: tokens_original,
466 });
467 }
468 };
469
470 if source == cached {
471 let content = "(no changes)".to_string();
472 return Ok(ReadResult {
473 content,
474 mode: "diff".to_string(),
475 tokens_original,
476 tokens_result: approx_tokens("(no changes)"),
477 });
478 }
479
480 let new_lines: Vec<&str> = source.lines().collect();
481 let old_lines: Vec<&str> = cached.lines().collect();
482
483 let mut changed_lines: Vec<usize> = Vec::new();
485 let max_len = new_lines.len().max(old_lines.len());
486 for i in 0..max_len {
487 let new_line = new_lines.get(i).copied().unwrap_or("");
488 let old_line = old_lines.get(i).copied().unwrap_or("");
489 if new_line != old_line {
490 changed_lines.push(i);
491 }
492 }
493
494 if changed_lines.is_empty() {
495 let content = "(no changes)".to_string();
496 return Ok(ReadResult {
497 content,
498 mode: "diff".to_string(),
499 tokens_original,
500 tokens_result: approx_tokens("(no changes)"),
501 });
502 }
503
504 let ctx = self.context_lines;
506 let mut included: Vec<bool> = vec![false; new_lines.len()];
507 for &line_idx in &changed_lines {
508 let start = line_idx.saturating_sub(ctx);
509 let end = (line_idx + ctx + 1).min(new_lines.len());
510 for j in start..end {
511 included[j] = true;
512 }
513 }
514
515 let mut output = Vec::new();
516 let mut in_range = false;
517 for (i, line) in new_lines.iter().enumerate() {
518 if included[i] {
519 if !in_range {
520 output.push(format!("@@ line {} @@", i + 1));
521 in_range = true;
522 }
523 let marker = if changed_lines.contains(&i) {
524 ">"
525 } else {
526 " "
527 };
528 output.push(format!("{marker} {line}"));
529 } else {
530 in_range = false;
531 }
532 }
533
534 let content = output.join("\n");
535 let tokens_result = approx_tokens(&content);
536
537 Ok(ReadResult {
538 content,
539 mode: "diff".to_string(),
540 tokens_original,
541 tokens_result,
542 })
543 }
544
545 fn read_aggressive(
548 &self,
549 path: &Path,
550 source: &str,
551 tokens_original: u32,
552 ) -> Result<ReadResult> {
553 let lang = detect_language(path);
555 let sig_content = if let Some(lang) = &lang {
556 if self.ast_parser.is_supported(lang) {
557 self.ast_parser
558 .extract_signatures(source, lang)
559 .ok()
560 .map(|s| s.to_text())
561 } else {
562 None
563 }
564 } else {
565 None
566 };
567
568 let blocks = compute_block_entropies(source);
570 let high = filter_high_entropy(&blocks, self.entropy_percentile);
571 let entropy_content: String = high.iter().map(|b| b.text.as_str()).collect::<Vec<_>>().join("\n\n");
572
573 let content = match sig_content {
576 Some(sigs) if !sigs.is_empty() => {
577 if entropy_content.is_empty() {
578 sigs
579 } else {
580 format!("{sigs}\n\n// --- high-entropy blocks ---\n{entropy_content}")
581 }
582 }
583 _ => {
584 if entropy_content.is_empty() {
585 source.to_string()
586 } else {
587 entropy_content
588 }
589 }
590 };
591
592 let tokens_result = approx_tokens(&content).min(tokens_original);
593 Ok(ReadResult {
594 content,
595 mode: "aggressive".to_string(),
596 tokens_original,
597 tokens_result,
598 })
599 }
600
601 fn read_entropy(&self, source: &str, tokens_original: u32) -> Result<ReadResult> {
604 let blocks = compute_block_entropies(source);
605 let high = filter_high_entropy(&blocks, self.entropy_percentile);
606
607 if high.is_empty() {
608 return Ok(ReadResult {
609 content: source.to_string(),
610 mode: "entropy".to_string(),
611 tokens_original,
612 tokens_result: tokens_original,
613 });
614 }
615
616 let content: String = high
617 .iter()
618 .map(|b| format!("// lines {}-{}\n{}", b.start_line + 1, b.end_line, b.text))
619 .collect::<Vec<_>>()
620 .join("\n\n");
621
622 let tokens_result = approx_tokens(&content);
623
624 if tokens_result > tokens_original {
627 let plain: String = high
628 .iter()
629 .map(|b| b.text.as_str())
630 .collect::<Vec<_>>()
631 .join("\n\n");
632 let plain_tokens = approx_tokens(&plain).min(tokens_original);
633 return Ok(ReadResult {
634 content: plain,
635 mode: "entropy".to_string(),
636 tokens_original,
637 tokens_result: plain_tokens,
638 });
639 }
640
641 Ok(ReadResult {
642 content,
643 mode: "entropy".to_string(),
644 tokens_original,
645 tokens_result,
646 })
647 }
648
649 fn read_task(
651 &self,
652 source: &str,
653 intent: Option<&str>,
654 tokens_original: u32,
655 ) -> Result<ReadResult> {
656 let intent = match intent {
657 Some(i) if !i.trim().is_empty() => i,
658 _ => {
659 return Ok(ReadResult {
661 content: source.to_string(),
662 mode: "task".to_string(),
663 tokens_original,
664 tokens_result: tokens_original,
665 });
666 }
667 };
668
669 let content = fts5_task_filter(source, intent)?;
670 let tokens_result = approx_tokens(&content);
671
672 Ok(ReadResult {
673 content,
674 mode: "task".to_string(),
675 tokens_original,
676 tokens_result,
677 })
678 }
679
680 fn read_lines(
682 &self,
683 source: &str,
684 range: Range<usize>,
685 tokens_original: u32,
686 ) -> Result<ReadResult> {
687 let lines: Vec<&str> = source.lines().collect();
688 let total = lines.len();
689
690 let start = range.start.min(total);
692 let end = range.end.min(total);
693
694 if start >= end {
695 return Ok(ReadResult {
696 content: String::new(),
697 mode: "lines".to_string(),
698 tokens_original,
699 tokens_result: 0,
700 });
701 }
702
703 let ctx_start = start.saturating_sub(self.context_lines);
705 let ctx_end = (end + self.context_lines).min(total);
706
707 let mut output = Vec::new();
708 output.push(format!("// lines {}-{} (of {})", start + 1, end, total));
709 for i in ctx_start..ctx_end {
710 let marker = if i >= start && i < end { ">" } else { " " };
711 output.push(format!("{marker} {:4} | {}", i + 1, lines[i]));
712 }
713
714 let content = output.join("\n");
715 let tokens_result = approx_tokens(&content);
716
717 Ok(ReadResult {
718 content,
719 mode: "lines".to_string(),
720 tokens_original,
721 tokens_result,
722 })
723 }
724
725 pub fn ast_parser(&self) -> &AstParser {
727 &self.ast_parser
728 }
729
730 pub fn entropy_percentile(&self) -> f64 {
732 self.entropy_percentile
733 }
734}
735
736impl Default for FileReader {
737 fn default() -> Self {
738 Self::new()
739 }
740}
741
742fn detect_language(path: &Path) -> Option<String> {
746 let ext = path.extension()?.to_str()?;
747 let lang = match ext {
748 "rs" => "rust",
749 "py" => "python",
750 "js" | "mjs" | "cjs" => "javascript",
751 "ts" | "tsx" => "typescript",
752 "go" => "go",
753 "java" => "java",
754 "c" | "h" => "c",
755 "cpp" | "cc" | "cxx" | "hpp" => "cpp",
756 "rb" => "ruby",
757 "sh" | "bash" => "bash",
758 "json" => "json",
759 "html" | "htm" => "html",
760 "css" => "css",
761 "cs" => "csharp",
762 "kt" | "kts" => "kotlin",
763 "swift" => "swift",
764 "toml" => "toml",
765 "yml" | "yaml" => "yaml",
766 _ => return None,
767 };
768 Some(lang.to_string())
769}
770
771pub fn compute_entropy(text: &str) -> f64 {
775 shannon_entropy(text)
776}
777
778pub fn analyze_block_entropies(source: &str) -> Vec<BlockEntropy> {
780 compute_block_entropies(source)
781}
782
783#[cfg(test)]
786mod tests {
787 use super::*;
788 use std::path::Path;
789
790 fn sample_rust_source() -> &'static str {
791 r#"use std::collections::HashMap;
792use std::path::Path;
793
794/// A configuration struct.
795pub struct Config {
796 pub name: String,
797 pub value: i32,
798}
799
800impl Config {
801 pub fn new(name: &str, value: i32) -> Self {
802 Self {
803 name: name.to_string(),
804 value,
805 }
806 }
807
808 pub fn validate(&self) -> bool {
809 !self.name.is_empty() && self.value > 0
810 }
811}
812
813pub fn process(config: &Config) -> String {
814 let mut result = String::new();
815 for i in 0..config.value {
816 result.push_str(&format!("item {}: {}\n", i, config.name));
817 }
818 result
819}
820
821pub type ConfigMap = HashMap<String, Config>;
822
823fn internal_helper() -> i32 {
824 42
825}
826"#
827 }
828
829 fn large_source(lines: usize) -> String {
830 let mut src = String::new();
831 src.push_str("use std::collections::HashMap;\n\n");
832 src.push_str("pub struct MyStruct {\n field: i32,\n}\n\n");
833 for i in 0..lines.saturating_sub(6) {
834 src.push_str(&format!("// line {i}: some content here\n"));
835 }
836 src
837 }
838
839 #[test]
840 fn test_full_mode_returns_unchanged() {
841 let reader = FileReader::new();
842 let source = "hello world\nline two\n";
843 let result = reader
844 .read(Path::new("test.txt"), source, &FileReadMode::Full, None, None)
845 .unwrap();
846 assert_eq!(result.content, source);
847 assert_eq!(result.mode, "full");
848 assert_eq!(result.tokens_original, result.tokens_result);
849 }
850
851 #[test]
852 fn test_map_mode_compact_output() {
853 let reader = FileReader::new();
854 let source = &large_source(300);
855 let result = reader
856 .read(
857 Path::new("test.rs"),
858 source,
859 &FileReadMode::Map,
860 None,
861 None,
862 )
863 .unwrap();
864 assert_eq!(result.mode, "map");
865 assert!(
867 result.tokens_result <= 50,
868 "map mode produced {} tokens, expected ≤50",
869 result.tokens_result
870 );
871 }
872
873 #[test]
874 fn test_signatures_mode_extracts_signatures() {
875 let reader = FileReader::new();
876 let source = sample_rust_source();
877 let result = reader
878 .read(
879 Path::new("test.rs"),
880 source,
881 &FileReadMode::Signatures,
882 None,
883 None,
884 )
885 .unwrap();
886 assert_eq!(result.mode, "signatures");
887 assert!(result.content.contains("use std::collections::HashMap"));
888 assert!(result.tokens_result < result.tokens_original);
889 }
890
891 #[test]
892 fn test_signatures_mode_unsupported_language_fallback() {
893 let reader = FileReader::new();
894 let source = "some content";
895 let result = reader
896 .read(
897 Path::new("test.xyz"),
898 source,
899 &FileReadMode::Signatures,
900 None,
901 None,
902 )
903 .unwrap();
904 assert_eq!(result.content, source);
906 }
907
908 #[test]
909 fn test_diff_mode_no_cached() {
910 let reader = FileReader::new();
911 let source = "line 1\nline 2\n";
912 let result = reader
913 .read(
914 Path::new("test.txt"),
915 source,
916 &FileReadMode::Diff,
917 None,
918 None,
919 )
920 .unwrap();
921 assert_eq!(result.content, source);
923 }
924
925 #[test]
926 fn test_diff_mode_no_changes() {
927 let reader = FileReader::new();
928 let source = "line 1\nline 2\n";
929 let result = reader
930 .read(
931 Path::new("test.txt"),
932 source,
933 &FileReadMode::Diff,
934 None,
935 Some(source),
936 )
937 .unwrap();
938 assert_eq!(result.content, "(no changes)");
939 }
940
941 #[test]
942 fn test_diff_mode_with_changes() {
943 let reader = FileReader::new();
944 let old = "line 1\nline 2\nline 3\nline 4\nline 5\n";
945 let new = "line 1\nline 2 modified\nline 3\nline 4\nline 5\n";
946 let result = reader
947 .read(
948 Path::new("test.txt"),
949 new,
950 &FileReadMode::Diff,
951 None,
952 Some(old),
953 )
954 .unwrap();
955 assert!(result.content.contains("line 2 modified"));
956 assert!(result.content.contains("@@"));
957 assert_ne!(result.content, new);
959 }
960
961 #[test]
962 fn test_entropy_mode_filters_blocks() {
963 let reader = FileReader::new();
964 let source = r#"
966fn complex_algorithm(data: &[u8]) -> Vec<u8> {
967 let mut result = Vec::new();
968 for (i, &byte) in data.iter().enumerate() {
969 let transformed = byte ^ (i as u8).wrapping_mul(0x5A);
970 result.push(transformed.rotate_left(3));
971 }
972 result
973}
974
975// boilerplate
976// boilerplate
977// boilerplate
978// boilerplate
979// boilerplate
980
981pub fn another_complex_fn(x: f64, y: f64) -> f64 {
982 let theta = x.atan2(y);
983 let r = (x * x + y * y).sqrt();
984 r * theta.sin() + theta.cos() * r.ln()
985}
986"#;
987 let result = reader
988 .read(
989 Path::new("test.rs"),
990 source,
991 &FileReadMode::Entropy,
992 None,
993 None,
994 )
995 .unwrap();
996 assert_eq!(result.mode, "entropy");
997 }
998
999 #[test]
1000 fn test_task_mode_no_intent_returns_full() {
1001 let reader = FileReader::new();
1002 let source = "some content\n";
1003 let result = reader
1004 .read(
1005 Path::new("test.txt"),
1006 source,
1007 &FileReadMode::Task,
1008 None,
1009 None,
1010 )
1011 .unwrap();
1012 assert_eq!(result.content, source);
1013 }
1014
1015 #[test]
1016 fn test_task_mode_with_intent() {
1017 let reader = FileReader::new();
1018 let source = r#"
1019fn authentication_handler(req: Request) -> Response {
1020 let token = req.header("Authorization");
1021 validate_token(token)
1022}
1023
1024fn database_query(sql: &str) -> Vec<Row> {
1025 let conn = get_connection();
1026 conn.execute(sql)
1027}
1028
1029fn logging_middleware(req: Request) -> Request {
1030 println!("Request: {}", req.path());
1031 req
1032}
1033"#;
1034 let result = reader
1035 .read(
1036 Path::new("test.rs"),
1037 source,
1038 &FileReadMode::Task,
1039 Some("authentication token validation"),
1040 None,
1041 )
1042 .unwrap();
1043 assert_eq!(result.mode, "task");
1044 assert!(result.content.contains("authentication") || result.content.contains("token"));
1046 }
1047
1048 #[test]
1049 fn test_lines_mode_extracts_range() {
1050 let reader = FileReader::new();
1051 let source = "line 1\nline 2\nline 3\nline 4\nline 5\nline 6\nline 7\nline 8\nline 9\nline 10\n";
1052 let result = reader
1053 .read(
1054 Path::new("test.txt"),
1055 source,
1056 &FileReadMode::Lines(3..6),
1057 None,
1058 None,
1059 )
1060 .unwrap();
1061 assert_eq!(result.mode, "lines");
1062 assert!(result.content.contains("line 4"));
1063 assert!(result.content.contains("line 5"));
1064 assert!(result.content.contains("line 6"));
1065 }
1066
1067 #[test]
1068 fn test_lines_mode_empty_range() {
1069 let reader = FileReader::new();
1070 let source = "line 1\nline 2\n";
1071 let result = reader
1072 .read(
1073 Path::new("test.txt"),
1074 source,
1075 &FileReadMode::Lines(5..5),
1076 None,
1077 None,
1078 )
1079 .unwrap();
1080 assert!(result.content.is_empty());
1081 }
1082
1083 #[test]
1084 fn test_aggressive_mode_compresses() {
1085 let reader = FileReader::new();
1086 let source = sample_rust_source();
1087 let result = reader
1088 .read(
1089 Path::new("test.rs"),
1090 source,
1091 &FileReadMode::Aggressive,
1092 None,
1093 None,
1094 )
1095 .unwrap();
1096 assert_eq!(result.mode, "aggressive");
1097 assert!(
1099 result.tokens_result <= result.tokens_original,
1100 "aggressive mode should compress: {} vs {}",
1101 result.tokens_result,
1102 result.tokens_original
1103 );
1104 }
1105
1106 #[test]
1107 fn test_shannon_entropy_empty() {
1108 assert_eq!(shannon_entropy(""), 0.0);
1109 }
1110
1111 #[test]
1112 fn test_shannon_entropy_single_char() {
1113 assert_eq!(shannon_entropy("aaaa"), 0.0);
1114 }
1115
1116 #[test]
1117 fn test_shannon_entropy_varied() {
1118 let e = shannon_entropy("abcdefghij");
1119 assert!(e > 3.0, "entropy of varied text should be high: {e}");
1121 }
1122
1123 #[test]
1124 fn test_detect_language() {
1125 assert_eq!(detect_language(Path::new("foo.rs")), Some("rust".into()));
1126 assert_eq!(detect_language(Path::new("bar.py")), Some("python".into()));
1127 assert_eq!(detect_language(Path::new("baz.js")), Some("javascript".into()));
1128 assert_eq!(detect_language(Path::new("qux.ts")), Some("typescript".into()));
1129 assert_eq!(detect_language(Path::new("no_ext")), None);
1130 }
1131
1132 #[test]
1133 fn test_file_read_mode_enum_variants() {
1134 let modes: Vec<FileReadMode> = vec![
1136 FileReadMode::Full,
1137 FileReadMode::Map,
1138 FileReadMode::Signatures,
1139 FileReadMode::Diff,
1140 FileReadMode::Aggressive,
1141 FileReadMode::Entropy,
1142 FileReadMode::Task,
1143 FileReadMode::Lines(0..10),
1144 ];
1145 assert_eq!(modes.len(), 8);
1146 }
1147
1148 #[test]
1149 fn test_block_entropies_computation() {
1150 let source = "fn foo() {\n let x = 1;\n}\n\nfn bar() {\n let y = 2;\n}\n";
1151 let blocks = compute_block_entropies(source);
1152 assert_eq!(blocks.len(), 2);
1153 assert!(blocks[0].entropy > 0.0);
1154 assert!(blocks[1].entropy > 0.0);
1155 }
1156
1157 #[test]
1158 fn test_default_creates_reader() {
1159 let reader = FileReader::default();
1160 assert_eq!(reader.entropy_percentile(), 60.0);
1161 }
1162
1163 #[test]
1164 fn test_with_config() {
1165 let reader = FileReader::with_config(75.0, 5);
1166 assert_eq!(reader.entropy_percentile(), 75.0);
1167 }
1168
1169 use proptest::prelude::*;
1172
1173 fn arb_source_code(min_lines: usize, max_lines: usize) -> impl Strategy<Value = String> {
1177 proptest::collection::vec(
1178 prop_oneof![
1179 Just("use std::collections::HashMap;\n".to_string()),
1180 Just("pub struct Foo {\n field: i32,\n}\n".to_string()),
1181 Just("pub fn bar(x: i32) -> i32 {\n x + 1\n}\n".to_string()),
1182 Just("// a comment line\n".to_string()),
1183 Just("\n".to_string()),
1184 Just("fn helper() -> bool { true }\n".to_string()),
1185 Just("let val = compute(a, b, c);\n".to_string()),
1186 Just("impl Foo {\n pub fn new() -> Self { Self { field: 0 } }\n}\n".to_string()),
1187 ],
1188 min_lines..=max_lines,
1189 )
1190 .prop_map(|chunks| chunks.join(""))
1191 }
1192
1193 proptest! {
1203 #[test]
1204 fn prop36_map_mode_token_limit(
1205 source in arb_source_code(40, 80),
1206 ) {
1207 let line_count = source.lines().count();
1209 prop_assume!(line_count >= 100);
1211
1212 let reader = FileReader::new();
1213 let path = Path::new("test.rs");
1214
1215 let map_result = reader
1216 .read(path, &source, &FileReadMode::Map, None, None)
1217 .unwrap();
1218
1219 prop_assert!(
1220 map_result.tokens_result <= 50,
1221 "map mode produced {} tokens for a {}-line file, expected ≤50",
1222 map_result.tokens_result,
1223 line_count
1224 );
1225 }
1226
1227 #[test]
1228 fn prop36_non_full_modes_compress(
1229 source in arb_source_code(30, 80),
1230 ) {
1231 let line_count = source.lines().count();
1232 prop_assume!(line_count >= 100);
1233
1234 let reader = FileReader::new();
1235 let path = Path::new("test.rs");
1236
1237 let full_result = reader
1238 .read(path, &source, &FileReadMode::Full, None, None)
1239 .unwrap();
1240 let full_tokens = full_result.tokens_result;
1241
1242 let map_result = reader
1244 .read(path, &source, &FileReadMode::Map, None, None)
1245 .unwrap();
1246 prop_assert!(
1247 map_result.tokens_result <= full_tokens,
1248 "map ({}) should be ≤ full ({})",
1249 map_result.tokens_result, full_tokens
1250 );
1251
1252 let sig_result = reader
1254 .read(path, &source, &FileReadMode::Signatures, None, None)
1255 .unwrap();
1256 prop_assert!(
1257 sig_result.tokens_result <= full_tokens,
1258 "signatures ({}) should be ≤ full ({})",
1259 sig_result.tokens_result, full_tokens
1260 );
1261
1262 let ent_result = reader
1264 .read(path, &source, &FileReadMode::Entropy, None, None)
1265 .unwrap();
1266 prop_assert!(
1267 ent_result.tokens_result <= full_tokens,
1268 "entropy ({}) should be ≤ full ({})",
1269 ent_result.tokens_result, full_tokens
1270 );
1271
1272 let agg_result = reader
1274 .read(path, &source, &FileReadMode::Aggressive, None, None)
1275 .unwrap();
1276 prop_assert!(
1277 agg_result.tokens_result <= full_tokens,
1278 "aggressive ({}) should be ≤ full ({})",
1279 agg_result.tokens_result, full_tokens
1280 );
1281 }
1282 }
1283}