infiniloom_engine/scanner/
mod.rs1mod common;
30mod io;
31mod parallel;
32mod pipelined;
33mod process;
34mod walk;
35
36pub use common::{is_binary_content, is_binary_extension, BINARY_EXTENSIONS};
37pub use io::{smart_read_file, smart_read_file_with_options, MMAP_THRESHOLD};
38pub use parallel::{scan_repository, UnifiedScanner};
39pub use pipelined::scan_files_pipelined;
40pub use process::{
41 count_tokens, count_tokens_accurate, estimate_lines, estimate_tokens, parse_with_thread_local,
42 process_file_content_only, process_file_with_content, process_file_without_content,
43};
44pub use walk::{collect_file_infos, collect_file_paths};
45
46use std::path::PathBuf;
47
48#[derive(Debug, Clone)]
68pub struct ScannerConfig {
69 pub include_hidden: bool,
71 pub respect_gitignore: bool,
73 pub read_contents: bool,
75 pub max_file_size: u64,
77 pub skip_symbols: bool,
79
80 pub use_mmap: bool,
84 pub accurate_tokens: bool,
87 pub use_pipelining: bool,
90 pub batch_size: usize,
94}
95
96pub const PIPELINE_THRESHOLD: usize = 100;
98
99pub const DEFAULT_BATCH_SIZE: usize = 5000;
101
102impl Default for ScannerConfig {
103 fn default() -> Self {
104 Self {
105 include_hidden: false,
106 respect_gitignore: true,
107 read_contents: true,
108 max_file_size: 50 * 1024 * 1024, skip_symbols: false,
110 use_mmap: true,
112 accurate_tokens: false,
113 use_pipelining: true,
114 batch_size: DEFAULT_BATCH_SIZE,
115 }
116 }
117}
118
119impl ScannerConfig {
120 pub fn fast() -> Self {
122 Self::default()
123 }
124
125 pub fn accurate() -> Self {
127 Self { accurate_tokens: true, ..Default::default() }
128 }
129}
130
131#[derive(Debug, Clone)]
135pub struct FileInfo {
136 pub path: PathBuf,
138 pub relative_path: String,
140 pub size_bytes: Option<u64>,
142 pub language: Option<String>,
144}
145
146impl FileInfo {
147 pub fn new(path: PathBuf, relative_path: String) -> Self {
149 Self { path, relative_path, size_bytes: None, language: None }
150 }
151
152 pub fn with_size(path: PathBuf, relative_path: String, size_bytes: u64) -> Self {
154 Self { path, relative_path, size_bytes: Some(size_bytes), language: None }
155 }
156
157 pub fn with_language(mut self, language: Option<String>) -> Self {
159 self.language = language;
160 self
161 }
162}
163
164#[cfg(test)]
165mod tests {
166 use super::*;
167
168 #[test]
169 fn test_scanner_config_default() {
170 let config = ScannerConfig::default();
171 assert!(!config.include_hidden);
172 assert!(config.respect_gitignore);
173 assert!(config.read_contents);
174 assert_eq!(config.max_file_size, 50 * 1024 * 1024);
175 assert!(!config.skip_symbols);
176 assert!(config.use_mmap);
178 assert!(!config.accurate_tokens);
179 assert!(config.use_pipelining);
180 assert_eq!(config.batch_size, DEFAULT_BATCH_SIZE);
181 }
182
183 #[test]
184 fn test_scanner_config_fast() {
185 let config = ScannerConfig::fast();
186 assert!(!config.accurate_tokens);
187 assert!(config.use_mmap);
188 assert!(config.use_pipelining);
189 }
190
191 #[test]
192 fn test_scanner_config_accurate() {
193 let config = ScannerConfig::accurate();
194 assert!(config.accurate_tokens);
195 assert!(config.use_mmap);
196 assert!(config.use_pipelining);
197 }
198
199 #[test]
200 fn test_file_info_new() {
201 let info = FileInfo::new(PathBuf::from("/path/to/file.rs"), "file.rs".to_string());
202 assert_eq!(info.relative_path, "file.rs");
203 assert!(info.size_bytes.is_none());
204 assert!(info.language.is_none());
205 }
206
207 #[test]
208 fn test_file_info_with_size() {
209 let info =
210 FileInfo::with_size(PathBuf::from("/path/to/file.rs"), "file.rs".to_string(), 1024);
211 assert_eq!(info.size_bytes, Some(1024));
212 }
213
214 #[test]
215 fn test_file_info_with_language() {
216 let info = FileInfo::new(PathBuf::from("/path/to/file.rs"), "file.rs".to_string())
217 .with_language(Some("Rust".to_string()));
218 assert_eq!(info.language, Some("Rust".to_string()));
219 }
220}