1use anyhow::Result;
18use std::collections::BTreeSet;
19use std::fs;
20use std::path::{Component, Path, PathBuf};
21use tokei::{Config, Languages};
22
23use crate::path::ValidatedRoot;
24use tokmd_settings::ScanOptions;
25use tokmd_types::ConfigMode;
26
27#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct InMemoryFile {
30 pub path: PathBuf,
31 pub bytes: Vec<u8>,
32}
33
34impl InMemoryFile {
35 #[must_use]
36 pub fn new(path: impl Into<PathBuf>, bytes: impl Into<Vec<u8>>) -> Self {
37 Self {
38 path: path.into(),
39 bytes: bytes.into(),
40 }
41 }
42}
43
44#[derive(Debug)]
54pub struct MaterializedScan {
55 languages: Languages,
56 logical_paths: Vec<PathBuf>,
57 root: tempfile::TempDir,
58}
59
60impl MaterializedScan {
61 #[must_use]
62 pub fn languages(&self) -> &Languages {
63 &self.languages
64 }
65
66 #[must_use]
67 pub fn logical_paths(&self) -> &[PathBuf] {
68 &self.logical_paths
69 }
70
71 #[must_use]
72 pub fn strip_prefix(&self) -> &Path {
73 self.root.path()
74 }
75}
76
77pub fn scan(paths: &[PathBuf], args: &ScanOptions) -> Result<Languages> {
107 let cfg = config_from_scan_options(args);
108 let ignores = ignored_patterns(args);
109 let roots: Vec<ValidatedRoot> = paths
110 .iter()
111 .map(ValidatedRoot::new)
112 .collect::<std::result::Result<_, _>>()?;
113 let scan_paths: Vec<PathBuf> = roots
114 .iter()
115 .map(|root| root.input().to_path_buf())
116 .collect();
117
118 let mut languages = Languages::new();
119 languages.get_statistics(&scan_paths, &ignores, &cfg);
120
121 Ok(languages)
122}
123
124#[must_use]
126pub fn config_from_scan_options(args: &ScanOptions) -> Config {
127 build_config(args)
128}
129
130pub fn normalize_in_memory_paths(inputs: &[InMemoryFile]) -> Result<Vec<PathBuf>> {
135 normalize_logical_paths(inputs, true)
136}
137
138pub fn scan_in_memory(inputs: &[InMemoryFile], args: &ScanOptions) -> Result<MaterializedScan> {
139 let root = tempfile::tempdir()?;
140 let logical_paths = normalize_in_memory_paths(inputs)?;
141
142 for (logical_path, input) in logical_paths.iter().zip(inputs) {
143 let full_path = root.path().join(logical_path);
144 if let Some(parent) = full_path.parent() {
145 fs::create_dir_all(parent)?;
146 }
147 fs::write(full_path, &input.bytes)?;
148 }
149
150 let scan_root = vec![root.path().to_path_buf()];
151 let languages = scan(&scan_root, args)?;
152
153 Ok(MaterializedScan {
154 languages,
155 logical_paths,
156 root,
157 })
158}
159
160fn build_config(args: &ScanOptions) -> Config {
161 let mut cfg = match args.config {
162 ConfigMode::Auto => Config::from_config_files(),
163 ConfigMode::None => Config::default(),
164 };
165
166 if args.hidden {
168 cfg.hidden = Some(true);
169 }
170 if args.no_ignore {
171 cfg.no_ignore = Some(true);
172 cfg.no_ignore_dot = Some(true);
173 cfg.no_ignore_parent = Some(true);
174 cfg.no_ignore_vcs = Some(true);
175 }
176 if args.no_ignore_dot {
177 cfg.no_ignore_dot = Some(true);
178 }
179 if args.no_ignore_parent {
180 cfg.no_ignore_parent = Some(true);
181 }
182 if args.no_ignore_vcs {
183 cfg.no_ignore_vcs = Some(true);
184 }
185 if args.treat_doc_strings_as_comments {
186 cfg.treat_doc_strings_as_comments = Some(true);
187 }
188
189 cfg
190}
191
192fn ignored_patterns(args: &ScanOptions) -> Vec<&str> {
193 args.excluded.iter().map(|s| s.as_str()).collect()
194}
195
196fn normalize_logical_paths(
197 inputs: &[InMemoryFile],
198 case_insensitive: bool,
199) -> Result<Vec<PathBuf>> {
200 let mut seen = BTreeSet::new();
201 let mut normalized = Vec::with_capacity(inputs.len());
202
203 for input in inputs {
204 let logical_path = normalize_logical_path(&input.path)?;
205 if !seen.insert(logical_path_key(&logical_path, case_insensitive)) {
206 anyhow::bail!("Duplicate in-memory path: {}", logical_path.display());
207 }
208 normalized.push(logical_path);
209 }
210
211 Ok(normalized)
212}
213
214fn logical_path_key(path: &Path, case_insensitive: bool) -> String {
215 let rendered = path.to_string_lossy();
216 if case_insensitive {
217 rendered.to_lowercase()
218 } else {
219 rendered.into_owned()
220 }
221}
222
223fn normalize_logical_path(path: &Path) -> Result<PathBuf> {
224 if path.as_os_str().is_empty() {
225 anyhow::bail!("In-memory path must not be empty");
226 }
227
228 let mut normalized = PathBuf::new();
229 for component in path.components() {
230 match component {
231 Component::Normal(segment) => normalized.push(segment),
232 Component::CurDir => {}
233 Component::ParentDir => {
234 anyhow::bail!(
235 "In-memory path must not contain parent traversal: {}",
236 path.display()
237 );
238 }
239 Component::RootDir | Component::Prefix(_) => {
240 anyhow::bail!("In-memory path must be relative: {}", path.display());
241 }
242 }
243 }
244
245 if normalized.as_os_str().is_empty() {
246 anyhow::bail!("In-memory path must resolve to a file: {}", path.display());
247 }
248
249 Ok(normalized)
250}
251
252#[cfg(test)]
253mod tests {
254 use super::*;
255
256 fn default_scan_options() -> ScanOptions {
257 ScanOptions {
258 excluded: vec![],
259 config: ConfigMode::Auto,
260 hidden: false,
261 no_ignore: false,
262 no_ignore_parent: false,
263 no_ignore_dot: false,
264 no_ignore_vcs: false,
265 treat_doc_strings_as_comments: false,
266 }
267 }
268
269 fn test_path() -> PathBuf {
271 PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("src")
272 }
273
274 #[test]
279 fn scan_finds_rust_files() -> Result<()> {
280 let args = default_scan_options();
281 let paths = vec![test_path()];
282 let result = scan(&paths, &args)?;
283 assert!(!result.is_empty());
285 assert!(result.get(&tokei::LanguageType::Rust).is_some());
286 Ok(())
287 }
288
289 #[test]
290 fn scan_with_nonexistent_path_returns_error() -> Result<()> {
291 let args = default_scan_options();
292 let dir = tempfile::tempdir()?;
293 let nonexistent = dir.path().join("definitely-not-created");
294 let paths = vec![nonexistent];
295 let result = scan(&paths, &args);
296 assert!(result.is_err());
298 assert!(
299 result
300 .expect_err("should have failed")
301 .to_string()
302 .contains("Path not found")
303 );
304 Ok(())
305 }
306
307 #[test]
312 fn scan_with_hidden_flag() -> Result<()> {
313 let mut args = default_scan_options();
314 args.hidden = true;
315 let paths = vec![test_path()];
316 let result = scan(&paths, &args);
317 assert!(result.is_ok());
318 Ok(())
319 }
320
321 #[test]
322 fn scan_with_no_ignore_flag() -> Result<()> {
323 let mut args = default_scan_options();
324 args.no_ignore = true;
325 let paths = vec![test_path()];
326 let result = scan(&paths, &args);
328 assert!(result.is_ok());
329 Ok(())
330 }
331
332 #[test]
333 fn scan_with_individual_no_ignore_flags() -> Result<()> {
334 let mut args = default_scan_options();
335 args.no_ignore_parent = true;
336 args.no_ignore_dot = true;
337 args.no_ignore_vcs = true;
338 let paths = vec![test_path()];
339 let result = scan(&paths, &args);
340 assert!(result.is_ok());
341 Ok(())
342 }
343
344 #[test]
345 fn scan_with_treat_doc_strings_as_comments() -> Result<()> {
346 let mut args = default_scan_options();
347 args.treat_doc_strings_as_comments = true;
348 let paths = vec![test_path()];
349 let result = scan(&paths, &args);
350 assert!(result.is_ok());
351 Ok(())
352 }
353
354 #[test]
355 fn scan_with_config_mode_none() -> Result<()> {
356 let mut args = default_scan_options();
357 args.config = ConfigMode::None;
358 let paths = vec![test_path()];
359 let result = scan(&paths, &args);
360 assert!(result.is_ok());
361 Ok(())
362 }
363
364 #[test]
365 fn scan_with_excluded_patterns() -> Result<()> {
366 let mut args = default_scan_options();
367 args.excluded = vec!["target".to_string(), "*.min.js".to_string()];
368 let paths = vec![test_path()];
369 let result = scan(&paths, &args);
370 assert!(result.is_ok());
371 Ok(())
372 }
373
374 #[test]
375 fn scan_with_all_flags_combined() -> Result<()> {
376 let args = ScanOptions {
377 excluded: vec!["node_modules".to_string()],
378 config: ConfigMode::None,
379 hidden: true,
380 no_ignore: true,
381 no_ignore_parent: true,
382 no_ignore_dot: true,
383 no_ignore_vcs: true,
384 treat_doc_strings_as_comments: true,
385 };
386 let paths = vec![test_path()];
387 let result = scan(&paths, &args);
389 assert!(result.is_ok());
390 Ok(())
391 }
392
393 #[test]
394 fn scan_returns_code_stats() -> Result<()> {
395 let args = default_scan_options();
396 let paths = vec![test_path()];
397 let result = scan(&paths, &args)?;
398
399 let rust = result
400 .get(&tokei::LanguageType::Rust)
401 .expect("should find rust in src/lib.rs");
402 assert!(rust.code > 0);
404 assert!(rust.lines() > 0);
405 Ok(())
406 }
407
408 #[test]
409 fn normalize_logical_path_strips_dot_segments() -> Result<()> {
410 let normalized = normalize_logical_path(Path::new("./src/./lib.rs"))?;
411 assert_eq!(normalized, PathBuf::from("src/lib.rs"));
412 Ok(())
413 }
414
415 #[test]
416 fn normalize_logical_path_rejects_absolute_paths() {
417 let err = normalize_logical_path(Path::new("/src/lib.rs")).unwrap_err();
418 assert!(err.to_string().contains("must be relative"));
419 }
420
421 #[test]
422 fn normalize_logical_path_rejects_parent_traversal() {
423 let err = normalize_logical_path(Path::new("../src/lib.rs")).unwrap_err();
424 assert!(err.to_string().contains("parent traversal"));
425 }
426
427 #[test]
428 fn normalize_logical_paths_rejects_duplicate_after_normalization() {
429 let inputs = vec![
430 InMemoryFile::new("./src/lib.rs", "fn main() {}\n"),
431 InMemoryFile::new("src/lib.rs", "fn main() {}\n"),
432 ];
433
434 let err = normalize_logical_paths(&inputs, false).unwrap_err();
435 assert!(err.to_string().contains("Duplicate in-memory path"));
436 }
437
438 #[test]
439 fn normalize_logical_paths_rejects_case_only_collision_on_case_insensitive_fs() {
440 let inputs = vec![
441 InMemoryFile::new("src/lib.rs", "fn main() {}\n"),
442 InMemoryFile::new("SRC/LIB.rs", "fn main() {}\n"),
443 ];
444
445 let err = normalize_logical_paths(&inputs, true).unwrap_err();
446 assert!(err.to_string().contains("Duplicate in-memory path"));
447 }
448}
449
450pub mod exclude;
451pub mod math;
452pub mod path;
453pub mod tokeignore;
454pub mod walk;
455
456pub use exclude::{add_exclude_pattern, has_exclude_pattern, normalize_exclude_pattern};
457pub use math::{gini_coefficient, percentile, round_f64, safe_ratio};
458pub use path::{normalize_rel_path, normalize_slashes};
459pub use tokeignore::{InitArgs, InitProfile, init_tokeignore};