1use anyhow::Result;
18use std::collections::BTreeSet;
19use std::fs;
20use std::path::{Component, Path, PathBuf};
21use tokei::{Config, Languages};
22
23use tokmd_settings::ScanOptions;
24use tokmd_types::ConfigMode;
25
26#[derive(Debug, Clone, PartialEq, Eq)]
28pub struct InMemoryFile {
29 pub path: PathBuf,
30 pub bytes: Vec<u8>,
31}
32
33impl InMemoryFile {
34 #[must_use]
35 pub fn new(path: impl Into<PathBuf>, bytes: impl Into<Vec<u8>>) -> Self {
36 Self {
37 path: path.into(),
38 bytes: bytes.into(),
39 }
40 }
41}
42
43#[derive(Debug)]
53pub struct MaterializedScan {
54 languages: Languages,
55 logical_paths: Vec<PathBuf>,
56 root: tempfile::TempDir,
57}
58
59impl MaterializedScan {
60 #[must_use]
61 pub fn languages(&self) -> &Languages {
62 &self.languages
63 }
64
65 #[must_use]
66 pub fn logical_paths(&self) -> &[PathBuf] {
67 &self.logical_paths
68 }
69
70 #[must_use]
71 pub fn strip_prefix(&self) -> &Path {
72 self.root.path()
73 }
74}
75
76pub fn scan(paths: &[PathBuf], args: &ScanOptions) -> Result<Languages> {
106 let cfg = config_from_scan_options(args);
107 let ignores = ignored_patterns(args);
108 for path in paths {
109 if !path.exists() {
110 anyhow::bail!("Path not found: {}", path.display());
111 }
112 }
113
114 let mut languages = Languages::new();
115 languages.get_statistics(paths, &ignores, &cfg);
116
117 Ok(languages)
118}
119
120#[must_use]
122pub fn config_from_scan_options(args: &ScanOptions) -> Config {
123 build_config(args)
124}
125
126pub fn normalize_in_memory_paths(inputs: &[InMemoryFile]) -> Result<Vec<PathBuf>> {
131 normalize_logical_paths(inputs, true)
132}
133
134pub fn scan_in_memory(inputs: &[InMemoryFile], args: &ScanOptions) -> Result<MaterializedScan> {
135 let root = tempfile::tempdir()?;
136 let logical_paths = normalize_in_memory_paths(inputs)?;
137
138 for (logical_path, input) in logical_paths.iter().zip(inputs) {
139 let full_path = root.path().join(logical_path);
140 if let Some(parent) = full_path.parent() {
141 fs::create_dir_all(parent)?;
142 }
143 fs::write(full_path, &input.bytes)?;
144 }
145
146 let scan_root = vec![root.path().to_path_buf()];
147 let languages = scan(&scan_root, args)?;
148
149 Ok(MaterializedScan {
150 languages,
151 logical_paths,
152 root,
153 })
154}
155
156fn build_config(args: &ScanOptions) -> Config {
157 let mut cfg = match args.config {
158 ConfigMode::Auto => Config::from_config_files(),
159 ConfigMode::None => Config::default(),
160 };
161
162 if args.hidden {
164 cfg.hidden = Some(true);
165 }
166 if args.no_ignore {
167 cfg.no_ignore = Some(true);
168 cfg.no_ignore_dot = Some(true);
169 cfg.no_ignore_parent = Some(true);
170 cfg.no_ignore_vcs = Some(true);
171 }
172 if args.no_ignore_dot {
173 cfg.no_ignore_dot = Some(true);
174 }
175 if args.no_ignore_parent {
176 cfg.no_ignore_parent = Some(true);
177 }
178 if args.no_ignore_vcs {
179 cfg.no_ignore_vcs = Some(true);
180 }
181 if args.treat_doc_strings_as_comments {
182 cfg.treat_doc_strings_as_comments = Some(true);
183 }
184
185 cfg
186}
187
188fn ignored_patterns(args: &ScanOptions) -> Vec<&str> {
189 args.excluded.iter().map(|s| s.as_str()).collect()
190}
191
192fn normalize_logical_paths(
193 inputs: &[InMemoryFile],
194 case_insensitive: bool,
195) -> Result<Vec<PathBuf>> {
196 let mut seen = BTreeSet::new();
197 let mut normalized = Vec::with_capacity(inputs.len());
198
199 for input in inputs {
200 let logical_path = normalize_logical_path(&input.path)?;
201 if !seen.insert(logical_path_key(&logical_path, case_insensitive)) {
202 anyhow::bail!("Duplicate in-memory path: {}", logical_path.display());
203 }
204 normalized.push(logical_path);
205 }
206
207 Ok(normalized)
208}
209
210fn logical_path_key(path: &Path, case_insensitive: bool) -> String {
211 let rendered = path.to_string_lossy();
212 if case_insensitive {
213 rendered.to_lowercase()
214 } else {
215 rendered.into_owned()
216 }
217}
218
219fn normalize_logical_path(path: &Path) -> Result<PathBuf> {
220 if path.as_os_str().is_empty() {
221 anyhow::bail!("In-memory path must not be empty");
222 }
223
224 let mut normalized = PathBuf::new();
225 for component in path.components() {
226 match component {
227 Component::Normal(segment) => normalized.push(segment),
228 Component::CurDir => {}
229 Component::ParentDir => {
230 anyhow::bail!(
231 "In-memory path must not contain parent traversal: {}",
232 path.display()
233 );
234 }
235 Component::RootDir | Component::Prefix(_) => {
236 anyhow::bail!("In-memory path must be relative: {}", path.display());
237 }
238 }
239 }
240
241 if normalized.as_os_str().is_empty() {
242 anyhow::bail!("In-memory path must resolve to a file: {}", path.display());
243 }
244
245 Ok(normalized)
246}
247
248#[cfg(test)]
249mod tests {
250 use super::*;
251
252 fn default_scan_options() -> ScanOptions {
253 ScanOptions {
254 excluded: vec![],
255 config: ConfigMode::Auto,
256 hidden: false,
257 no_ignore: false,
258 no_ignore_parent: false,
259 no_ignore_dot: false,
260 no_ignore_vcs: false,
261 treat_doc_strings_as_comments: false,
262 }
263 }
264
265 fn test_path() -> PathBuf {
267 PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("src")
268 }
269
270 #[test]
275 fn scan_finds_rust_files() -> Result<()> {
276 let args = default_scan_options();
277 let paths = vec![test_path()];
278 let result = scan(&paths, &args)?;
279 assert!(!result.is_empty());
281 assert!(result.get(&tokei::LanguageType::Rust).is_some());
282 Ok(())
283 }
284
285 #[test]
286 fn scan_with_nonexistent_path_returns_error() -> Result<()> {
287 let args = default_scan_options();
288 let dir = tempfile::tempdir()?;
289 let nonexistent = dir.path().join("definitely-not-created");
290 let paths = vec![nonexistent];
291 let result = scan(&paths, &args);
292 assert!(result.is_err());
294 assert!(
295 result
296 .expect_err("should have failed")
297 .to_string()
298 .contains("Path not found")
299 );
300 Ok(())
301 }
302
303 #[test]
308 fn scan_with_hidden_flag() -> Result<()> {
309 let mut args = default_scan_options();
310 args.hidden = true;
311 let paths = vec![test_path()];
312 let result = scan(&paths, &args);
313 assert!(result.is_ok());
314 Ok(())
315 }
316
317 #[test]
318 fn scan_with_no_ignore_flag() -> Result<()> {
319 let mut args = default_scan_options();
320 args.no_ignore = true;
321 let paths = vec![test_path()];
322 let result = scan(&paths, &args);
324 assert!(result.is_ok());
325 Ok(())
326 }
327
328 #[test]
329 fn scan_with_individual_no_ignore_flags() -> Result<()> {
330 let mut args = default_scan_options();
331 args.no_ignore_parent = true;
332 args.no_ignore_dot = true;
333 args.no_ignore_vcs = true;
334 let paths = vec![test_path()];
335 let result = scan(&paths, &args);
336 assert!(result.is_ok());
337 Ok(())
338 }
339
340 #[test]
341 fn scan_with_treat_doc_strings_as_comments() -> Result<()> {
342 let mut args = default_scan_options();
343 args.treat_doc_strings_as_comments = true;
344 let paths = vec![test_path()];
345 let result = scan(&paths, &args);
346 assert!(result.is_ok());
347 Ok(())
348 }
349
350 #[test]
351 fn scan_with_config_mode_none() -> Result<()> {
352 let mut args = default_scan_options();
353 args.config = ConfigMode::None;
354 let paths = vec![test_path()];
355 let result = scan(&paths, &args);
356 assert!(result.is_ok());
357 Ok(())
358 }
359
360 #[test]
361 fn scan_with_excluded_patterns() -> Result<()> {
362 let mut args = default_scan_options();
363 args.excluded = vec!["target".to_string(), "*.min.js".to_string()];
364 let paths = vec![test_path()];
365 let result = scan(&paths, &args);
366 assert!(result.is_ok());
367 Ok(())
368 }
369
370 #[test]
371 fn scan_with_all_flags_combined() -> Result<()> {
372 let args = ScanOptions {
373 excluded: vec!["node_modules".to_string()],
374 config: ConfigMode::None,
375 hidden: true,
376 no_ignore: true,
377 no_ignore_parent: true,
378 no_ignore_dot: true,
379 no_ignore_vcs: true,
380 treat_doc_strings_as_comments: true,
381 };
382 let paths = vec![test_path()];
383 let result = scan(&paths, &args);
385 assert!(result.is_ok());
386 Ok(())
387 }
388
389 #[test]
390 fn scan_returns_code_stats() -> Result<()> {
391 let args = default_scan_options();
392 let paths = vec![test_path()];
393 let result = scan(&paths, &args)?;
394
395 let rust = result
396 .get(&tokei::LanguageType::Rust)
397 .expect("should find rust in src/lib.rs");
398 assert!(rust.code > 0);
400 assert!(rust.lines() > 0);
401 Ok(())
402 }
403
404 #[test]
405 fn normalize_logical_path_strips_dot_segments() -> Result<()> {
406 let normalized = normalize_logical_path(Path::new("./src/./lib.rs"))?;
407 assert_eq!(normalized, PathBuf::from("src/lib.rs"));
408 Ok(())
409 }
410
411 #[test]
412 fn normalize_logical_path_rejects_absolute_paths() {
413 let err = normalize_logical_path(Path::new("/src/lib.rs")).unwrap_err();
414 assert!(err.to_string().contains("must be relative"));
415 }
416
417 #[test]
418 fn normalize_logical_path_rejects_parent_traversal() {
419 let err = normalize_logical_path(Path::new("../src/lib.rs")).unwrap_err();
420 assert!(err.to_string().contains("parent traversal"));
421 }
422
423 #[test]
424 fn normalize_logical_paths_rejects_duplicate_after_normalization() {
425 let inputs = vec![
426 InMemoryFile::new("./src/lib.rs", "fn main() {}\n"),
427 InMemoryFile::new("src/lib.rs", "fn main() {}\n"),
428 ];
429
430 let err = normalize_logical_paths(&inputs, false).unwrap_err();
431 assert!(err.to_string().contains("Duplicate in-memory path"));
432 }
433
434 #[test]
435 fn normalize_logical_paths_rejects_case_only_collision_on_case_insensitive_fs() {
436 let inputs = vec![
437 InMemoryFile::new("src/lib.rs", "fn main() {}\n"),
438 InMemoryFile::new("SRC/LIB.rs", "fn main() {}\n"),
439 ];
440
441 let err = normalize_logical_paths(&inputs, true).unwrap_err();
442 assert!(err.to_string().contains("Duplicate in-memory path"));
443 }
444}