1use std::borrow::Cow;
20use std::collections::{BTreeMap, BTreeSet};
21use std::fs;
22use std::path::{Path, PathBuf};
23
24pub mod module_key;
25
26use crate::module_key::module_key_from_normalized;
27use tokei::{CodeStats, Config, LanguageType, Languages};
28use tokmd_types::{
29 ChildIncludeMode, ChildrenMode, ExportData, FileKind, FileRow, LangReport, LangRow,
30 ModuleReport, ModuleRow, Totals,
31};
32
33const CHARS_PER_TOKEN: usize = 4;
35
36#[derive(Default, Clone, Copy)]
37struct Agg {
38 code: usize,
39 comments: usize,
40 blanks: usize,
41 bytes: usize,
42 tokens: usize,
43}
44
45#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
46struct Key<'a> {
47 path: String,
48 lang: &'a str,
49 kind: FileKind,
50}
51
52pub struct InMemoryRowInput<'a> {
54 pub logical_path: &'a Path,
55 pub bytes: &'a [u8],
56}
57
58impl<'a> InMemoryRowInput<'a> {
59 #[must_use]
60 pub fn new(logical_path: &'a Path, bytes: &'a [u8]) -> Self {
61 Self {
62 logical_path,
63 bytes,
64 }
65 }
66}
67
68fn get_file_metrics(path: &Path) -> (usize, usize) {
69 let bytes = fs::metadata(path).map(|m| m.len() as usize).unwrap_or(0);
73 metrics_from_byte_len(bytes)
74}
75
76fn metrics_from_bytes(bytes: &[u8]) -> (usize, usize) {
77 metrics_from_byte_len(bytes.len())
78}
79
80fn metrics_from_byte_len(bytes: usize) -> (usize, usize) {
81 let tokens = bytes / CHARS_PER_TOKEN;
82 (bytes, tokens)
83}
84
85fn synthetic_detection_path(logical_path: &Path) -> PathBuf {
86 let mut path = PathBuf::from("__tokmd_in_memory_detection__");
87 path.push(logical_path.file_name().unwrap_or(logical_path.as_os_str()));
88 path
89}
90
91fn language_from_in_memory_shebang(bytes: &[u8]) -> Option<LanguageType> {
92 const READ_LIMIT: usize = 128;
93
94 let first_line = bytes[..bytes.len().min(READ_LIMIT)]
95 .split(|b| *b == b'\n')
96 .next()?;
97 let first_line = std::str::from_utf8(first_line).ok()?;
98
99 let direct = LanguageType::list()
100 .iter()
101 .map(|(lang, _)| *lang)
102 .find(|lang| lang.shebangs().contains(&first_line));
103 if direct.is_some() {
104 return direct;
105 }
106
107 let mut words = first_line.split_whitespace();
108 if words.next() == Some("#!/usr/bin/env") {
109 let interpreter = env_interpreter_token(words)?;
110 return language_from_env_interpreter(interpreter);
111 }
112
113 None
114}
115
116fn env_interpreter_token<'a>(words: impl Iterator<Item = &'a str>) -> Option<&'a str> {
117 let mut skip_next = false;
118
119 for word in words {
120 if skip_next {
121 skip_next = false;
122 continue;
123 }
124
125 if word.is_empty() {
126 continue;
127 }
128
129 if looks_like_env_assignment(word) {
130 continue;
131 }
132
133 match word {
134 "-S" | "--split-string" | "-i" | "--ignore-environment" => continue,
135 "-u" | "--unset" | "-C" | "--chdir" | "-P" | "--default-path" | "-a" | "--argv0"
136 | "--default-signal" | "--ignore-signal" | "--block-signal" => {
137 skip_next = true;
138 continue;
139 }
140 _ if word.starts_with("--unset=")
141 || word.starts_with("--chdir=")
142 || word.starts_with("--default-path=")
143 || word.starts_with("--argv0=")
144 || word.starts_with("--default-signal=")
145 || word.starts_with("--ignore-signal=")
146 || word.starts_with("--block-signal=") =>
147 {
148 continue;
149 }
150 _ if word.starts_with('-') => continue,
151 _ => return Some(word),
152 }
153 }
154
155 None
156}
157
158fn looks_like_env_assignment(word: &str) -> bool {
159 let Some((name, _)) = word.split_once('=') else {
160 return false;
161 };
162
163 if name.is_empty() {
164 return false;
165 }
166
167 let mut chars = name.chars();
168 let Some(first) = chars.next() else {
169 return false;
170 };
171
172 if !(first == '_' || first.is_ascii_alphabetic()) {
173 return false;
174 }
175
176 chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric())
177}
178
179fn language_from_env_interpreter(interpreter: &str) -> Option<LanguageType> {
180 let token = interpreter
181 .rsplit('/')
182 .next()
183 .unwrap_or(interpreter)
184 .trim_start_matches('-');
187
188 if token.starts_with("python") {
189 return LanguageType::from_file_extension("py");
190 }
191
192 match token {
193 "bash" | "sh" | "zsh" | "ksh" | "fish" => LanguageType::from_name("Bash"),
194 "node" | "nodejs" => LanguageType::from_name("JavaScript"),
195 "ruby" => LanguageType::from_name("Ruby"),
196 "perl" | "perl5" => LanguageType::from_name("Perl"),
197 "php" => LanguageType::from_name("PHP"),
198 "pwsh" | "powershell" => LanguageType::from_name("PowerShell"),
199 _ => None,
200 }
201}
202
203fn detect_in_memory_language(
204 logical_path: &Path,
205 bytes: &[u8],
206 config: &Config,
207) -> Option<LanguageType> {
208 let detection_path = synthetic_detection_path(logical_path);
209 LanguageType::from_path(&detection_path, config)
210 .or_else(|| language_from_in_memory_shebang(bytes))
211}
212
213fn insert_row<'a>(
214 map: &mut BTreeMap<Key<'a>, (String, Agg)>,
215 key: Key<'a>,
216 module: String,
217 stats: &CodeStats,
218 bytes: usize,
219 tokens: usize,
220) {
221 let entry = map.entry(key).or_insert_with(|| (module, Agg::default()));
222 entry.1.code += stats.code;
223 entry.1.comments += stats.comments;
224 entry.1.blanks += stats.blanks;
225 entry.1.bytes += bytes;
226 entry.1.tokens += tokens;
227}
228
229fn rows_from_map<'a>(map: BTreeMap<Key<'a>, (String, Agg)>) -> Vec<FileRow> {
230 map.into_iter()
231 .map(|(key, (module, agg))| {
232 let lines = agg.code + agg.comments + agg.blanks;
233 FileRow {
234 path: key.path,
235 module,
236 lang: key.lang.to_string(),
237 kind: key.kind,
238 code: agg.code,
239 comments: agg.comments,
240 blanks: agg.blanks,
241 lines,
242 bytes: agg.bytes,
243 tokens: agg.tokens,
244 }
245 })
246 .collect()
247}
248
249pub fn collect_in_memory_file_rows(
254 inputs: &[InMemoryRowInput<'_>],
255 module_roots: &[String],
256 module_depth: usize,
257 children: ChildIncludeMode,
258 config: &Config,
259) -> Vec<FileRow> {
260 let mut map = BTreeMap::new();
261
262 for input in inputs {
263 let Some(lang_type) = detect_in_memory_language(input.logical_path, input.bytes, config)
264 else {
265 continue;
266 };
267
268 let path = normalize_path(input.logical_path, None);
269 let module = module_key_from_normalized(&path, module_roots, module_depth);
270 let stats = lang_type.parse_from_slice(input.bytes, config);
271 let summary = stats.summarise();
272 let (bytes, tokens) = metrics_from_bytes(input.bytes);
273
274 if children == ChildIncludeMode::Separate {
275 for (child_type, child_stats) in &stats.blobs {
276 let child_summary = child_stats.summarise();
277 insert_row(
278 &mut map,
279 Key {
280 path: path.clone(),
281 lang: child_type.name(),
282 kind: FileKind::Child,
283 },
284 module.clone(),
285 &child_summary,
286 0,
287 0,
288 );
289 }
290 }
291
292 insert_row(
293 &mut map,
294 Key {
295 path,
296 lang: lang_type.name(),
297 kind: FileKind::Parent,
298 },
299 module,
300 &summary,
301 bytes,
302 tokens,
303 );
304 }
305
306 rows_from_map(map)
307}
308
309pub fn create_lang_report(
310 languages: &Languages,
311 top: usize,
312 with_files: bool,
313 children: ChildrenMode,
314) -> LangReport {
315 let rows = collect_file_rows(languages, &[], 1, ChildIncludeMode::Separate, None);
316 create_lang_report_from_rows(&rows, top, with_files, children)
317}
318
319pub fn create_lang_report_from_rows(
320 file_rows: &[FileRow],
321 top: usize,
322 with_files: bool,
323 children: ChildrenMode,
324) -> LangReport {
325 #[derive(Default)]
326 struct LangAgg {
327 code: usize,
328 lines: usize,
329 bytes: usize,
330 tokens: usize,
331 }
332
333 let parent_lang_by_path: BTreeMap<&str, &str> = file_rows
334 .iter()
335 .filter(|row| row.kind == FileKind::Parent)
336 .map(|row| (row.path.as_str(), row.lang.as_str()))
337 .collect();
338 let mut child_totals_by_path: BTreeMap<&str, (usize, usize)> = BTreeMap::new();
339 for row in file_rows.iter().filter(|row| row.kind == FileKind::Child) {
340 let entry = child_totals_by_path.entry(row.path.as_str()).or_default();
341 entry.0 += row.code;
342 entry.1 += row.lines;
343 }
344
345 let mut by_lang: BTreeMap<(&str, bool), (LangAgg, BTreeSet<&str>)> = BTreeMap::new();
346
347 for row in file_rows {
348 match (children, row.kind) {
349 (ChildrenMode::Collapse, FileKind::Parent) => {
350 let entry = by_lang
351 .entry((row.lang.as_str(), false))
352 .or_insert_with(|| (LangAgg::default(), BTreeSet::new()));
353 entry.0.code += row.code;
354 entry.0.lines += row.lines;
355 entry.0.bytes += row.bytes;
356 entry.0.tokens += row.tokens;
357 entry.1.insert(row.path.as_str());
358 }
359 (ChildrenMode::Collapse, FileKind::Child) => {
360 if !parent_lang_by_path.contains_key(row.path.as_str()) {
361 let entry = by_lang
362 .entry((row.lang.as_str(), false))
363 .or_insert_with(|| (LangAgg::default(), BTreeSet::new()));
364 entry.0.code += row.code;
365 entry.0.lines += row.lines;
366 entry.1.insert(row.path.as_str());
367 }
368 }
369 (ChildrenMode::Separate, FileKind::Parent) => {
370 let (child_code, child_lines) = child_totals_by_path
371 .get(row.path.as_str())
372 .copied()
373 .unwrap_or((0, 0));
374
375 let entry = by_lang
376 .entry((row.lang.as_str(), false))
377 .or_insert_with(|| (LangAgg::default(), BTreeSet::new()));
378 entry.0.code += row.code.saturating_sub(child_code);
379 entry.0.lines += row.lines.saturating_sub(child_lines);
380 entry.0.bytes += row.bytes;
381 entry.0.tokens += row.tokens;
382 entry.1.insert(row.path.as_str());
383 }
384 (ChildrenMode::Separate, FileKind::Child) => {
385 let entry = by_lang
386 .entry((row.lang.as_str(), true))
387 .or_insert_with(|| (LangAgg::default(), BTreeSet::new()));
388 entry.0.code += row.code;
389 entry.0.lines += row.lines;
390 entry.1.insert(row.path.as_str());
391 }
392 }
393 }
394
395 let mut rows: Vec<LangRow> = Vec::with_capacity(by_lang.len());
396 for ((lang, is_embedded), (agg, files_set)) in by_lang {
397 if agg.code == 0 {
398 continue;
399 }
400 let files = files_set.len();
401 rows.push(LangRow {
402 lang: if is_embedded {
403 format!("{} (embedded)", lang)
404 } else {
405 lang.to_string()
406 },
407 code: agg.code,
408 lines: agg.lines,
409 files,
410 bytes: agg.bytes,
411 tokens: agg.tokens,
412 avg_lines: avg(agg.lines, files),
413 });
414 }
415
416 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.lang.cmp(&b.lang)));
417
418 let total_code: usize = rows.iter().map(|r| r.code).sum();
419 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
420 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
421 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
422 let total_files = unique_parent_file_count_from_rows(file_rows);
423
424 let total = Totals {
425 code: total_code,
426 lines: total_lines,
427 files: total_files,
428 bytes: total_bytes,
429 tokens: total_tokens,
430 avg_lines: avg(total_lines, total_files),
431 };
432
433 if top > 0 && rows.len() > top {
434 let other = fold_other_lang(&rows[top..]);
435 rows.truncate(top);
436 rows.push(other);
437 }
438
439 LangReport {
440 rows,
441 total,
442 with_files,
443 children,
444 top,
445 }
446}
447
448fn fold_other_lang(rows: &[LangRow]) -> LangRow {
449 let mut code = 0usize;
450 let mut lines = 0usize;
451 let mut files = 0usize;
452 let mut bytes = 0usize;
453 let mut tokens = 0usize;
454
455 for r in rows {
456 code += r.code;
457 lines += r.lines;
458 files += r.files;
459 bytes += r.bytes;
460 tokens += r.tokens;
461 }
462
463 LangRow {
464 lang: "Other".to_string(),
465 code,
466 lines,
467 files,
468 bytes,
469 tokens,
470 avg_lines: avg(lines, files),
471 }
472}
473
474pub fn create_module_report(
475 languages: &Languages,
476 module_roots: &[String],
477 module_depth: usize,
478 children: ChildIncludeMode,
479 top: usize,
480) -> ModuleReport {
481 let file_rows = collect_file_rows(languages, module_roots, module_depth, children, None);
482 create_module_report_from_rows(&file_rows, module_roots, module_depth, children, top)
483}
484
485pub fn create_module_report_from_rows(
486 file_rows: &[FileRow],
487 module_roots: &[String],
488 module_depth: usize,
489 children: ChildIncludeMode,
490 top: usize,
491) -> ModuleReport {
492 #[derive(Default)]
493 struct Agg {
494 code: usize,
495 lines: usize,
496 bytes: usize,
497 tokens: usize,
498 }
499
500 let mut by_module: BTreeMap<&str, (Agg, BTreeSet<&str>)> = BTreeMap::new();
501 let mut total_code = 0;
502 let mut total_lines = 0;
503 let mut total_bytes = 0;
504 let mut total_tokens = 0;
505
506 for r in file_rows {
507 total_code += r.code;
508 total_lines += r.lines;
509 total_bytes += r.bytes;
510 total_tokens += r.tokens;
511
512 let entry = by_module
513 .entry(r.module.as_str())
514 .or_insert_with(|| (Agg::default(), BTreeSet::new()));
515 entry.0.code += r.code;
516 entry.0.lines += r.lines;
517 entry.0.bytes += r.bytes;
518 entry.0.tokens += r.tokens;
519
520 if r.kind == FileKind::Parent {
521 entry.1.insert(r.path.as_str());
522 }
523 }
524
525 let mut rows: Vec<ModuleRow> = Vec::with_capacity(by_module.len());
526 for (module, (agg, files_set)) in by_module {
527 let files = files_set.len();
528 rows.push(ModuleRow {
529 module: module.to_string(),
530 code: agg.code,
531 lines: agg.lines,
532 files,
533 bytes: agg.bytes,
534 tokens: agg.tokens,
535 avg_lines: avg(agg.lines, files),
536 });
537 }
538
539 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.module.cmp(&b.module)));
541
542 if top > 0 && rows.len() > top {
543 let other = fold_other_module(&rows[top..]);
544 rows.truncate(top);
545 rows.push(other);
546 }
547
548 let total_files = unique_parent_file_count_from_rows(file_rows);
549
550 let total = Totals {
551 code: total_code,
552 lines: total_lines,
553 files: total_files,
554 bytes: total_bytes,
555 tokens: total_tokens,
556 avg_lines: avg(total_lines, total_files),
557 };
558
559 ModuleReport {
560 rows,
561 total,
562 module_roots: module_roots.to_vec(),
563 module_depth,
564 children,
565 top,
566 }
567}
568
569fn fold_other_module(rows: &[ModuleRow]) -> ModuleRow {
570 let mut code = 0usize;
571 let mut lines = 0usize;
572 let mut files = 0usize;
573 let mut bytes = 0usize;
574 let mut tokens = 0usize;
575
576 for r in rows {
577 code += r.code;
578 lines += r.lines;
579 files += r.files;
580 bytes += r.bytes;
581 tokens += r.tokens;
582 }
583
584 ModuleRow {
585 module: "Other".to_string(),
586 code,
587 lines,
588 files,
589 bytes,
590 tokens,
591 avg_lines: avg(lines, files),
592 }
593}
594
595pub fn create_export_data(
596 languages: &Languages,
597 module_roots: &[String],
598 module_depth: usize,
599 children: ChildIncludeMode,
600 strip_prefix: Option<&Path>,
601 min_code: usize,
602 max_rows: usize,
603) -> ExportData {
604 let rows = collect_file_rows(
605 languages,
606 module_roots,
607 module_depth,
608 children,
609 strip_prefix,
610 );
611 create_export_data_from_rows(
612 rows,
613 module_roots,
614 module_depth,
615 children,
616 min_code,
617 max_rows,
618 )
619}
620
621pub fn create_export_data_from_rows(
622 mut rows: Vec<FileRow>,
623 module_roots: &[String],
624 module_depth: usize,
625 children: ChildIncludeMode,
626 min_code: usize,
627 max_rows: usize,
628) -> ExportData {
629 if min_code > 0 {
631 rows.retain(|r| r.code >= min_code);
632 }
633 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.path.cmp(&b.path)));
634
635 if max_rows > 0 && rows.len() > max_rows {
636 rows.truncate(max_rows);
637 }
638
639 ExportData {
640 rows,
641 module_roots: module_roots.to_vec(),
642 module_depth,
643 children,
644 }
645}
646
647pub fn collect_file_rows(
652 languages: &Languages,
653 module_roots: &[String],
654 module_depth: usize,
655 children: ChildIncludeMode,
656 strip_prefix: Option<&Path>,
657) -> Vec<FileRow> {
658 let mut map = BTreeMap::new();
659
660 for (lang_type, lang) in languages.iter() {
662 for report in &lang.reports {
663 let path = normalize_path(&report.name, strip_prefix);
664 let module = module_key_from_normalized(&path, module_roots, module_depth);
665 let st = report.stats.summarise();
666 let (bytes, tokens) = get_file_metrics(&report.name);
667 insert_row(
668 &mut map,
669 Key {
670 path,
671 lang: lang_type.name(),
672 kind: FileKind::Parent,
673 },
674 module,
675 &st,
676 bytes,
677 tokens,
678 );
679 }
680 }
681
682 if children == ChildIncludeMode::Separate {
683 for (_lang_type, lang) in languages.iter() {
684 for (child_type, reports) in &lang.children {
685 for report in reports {
686 let path = normalize_path(&report.name, strip_prefix);
687 let module = module_key_from_normalized(&path, module_roots, module_depth);
688 let st = report.stats.summarise();
689 insert_row(
690 &mut map,
691 Key {
692 path,
693 lang: child_type.name(),
694 kind: FileKind::Child,
695 },
696 module,
697 &st,
698 0,
699 0,
700 );
701 }
702 }
703 }
704 }
705
706 rows_from_map(map)
707}
708
709pub fn unique_parent_file_count(languages: &Languages) -> usize {
710 let rows = collect_file_rows(languages, &[], 1, ChildIncludeMode::ParentsOnly, None);
711 unique_parent_file_count_from_rows(&rows)
712}
713
714pub fn unique_parent_file_count_from_rows(file_rows: &[FileRow]) -> usize {
715 file_rows
716 .iter()
717 .filter(|row| row.kind == FileKind::Parent)
718 .map(|row| row.path.as_str())
719 .collect::<BTreeSet<_>>()
720 .len()
721}
722
723pub fn avg(lines: usize, files: usize) -> usize {
739 if files == 0 {
740 return 0;
741 }
742 (lines + (files / 2)) / files
744}
745
746pub fn normalize_path(path: &Path, strip_prefix: Option<&Path>) -> String {
768 let s_cow = path.to_string_lossy();
769 let s: Cow<str> = if s_cow.contains('\\') {
770 Cow::Owned(s_cow.replace('\\', "/"))
771 } else {
772 s_cow
773 };
774
775 let mut slice: &str = &s;
776
777 if let Some(stripped) = slice.strip_prefix("./") {
779 slice = stripped;
780 }
781
782 if let Some(prefix) = strip_prefix {
783 let p_cow = prefix.to_string_lossy();
784 let p_cow_stripped: Cow<str> = if let Some(stripped) = p_cow.strip_prefix("./") {
786 Cow::Borrowed(stripped)
787 } else {
788 p_cow
789 };
790
791 let needs_replace = p_cow_stripped.contains('\\');
792 let needs_slash = !p_cow_stripped.ends_with('/');
793
794 if !needs_replace && !needs_slash {
795 if slice.starts_with(p_cow_stripped.as_ref()) {
797 slice = &slice[p_cow_stripped.len()..];
798 }
799 } else {
800 let mut pfx = if needs_replace {
802 p_cow_stripped.replace('\\', "/")
803 } else {
804 p_cow_stripped.into_owned()
805 };
806 if needs_slash {
807 pfx.push('/');
808 }
809 if slice.starts_with(&pfx) {
810 slice = &slice[pfx.len()..];
811 }
812 }
813 }
814
815 slice = slice.trim_start_matches('/');
816
817 if let Some(stripped) = slice.strip_prefix("./") {
819 slice = stripped;
820 }
821 slice = slice.trim_start_matches('/');
822
823 if slice.len() == s.len() {
824 s.into_owned()
825 } else {
826 slice.to_string()
827 }
828}
829
830pub fn module_key(path: &str, module_roots: &[String], module_depth: usize) -> String {
848 module_key::module_key(path, module_roots, module_depth)
849}
850
851#[cfg(test)]
852mod tests {
853 use super::*;
854 use std::path::PathBuf;
855
856 #[test]
857 fn module_key_root_level_file() {
858 assert_eq!(module_key("Cargo.toml", &["crates".into()], 2), "(root)");
859 assert_eq!(module_key("./Cargo.toml", &["crates".into()], 2), "(root)");
860 }
861
862 #[test]
863 fn module_key_crates_depth_2() {
864 let roots = vec!["crates".into(), "packages".into()];
865 assert_eq!(module_key("crates/foo/src/lib.rs", &roots, 2), "crates/foo");
866 assert_eq!(
867 module_key("packages/bar/src/main.rs", &roots, 2),
868 "packages/bar"
869 );
870 }
871
872 #[test]
873 fn module_key_crates_depth_1() {
874 let roots = vec!["crates".into(), "packages".into()];
875 assert_eq!(module_key("crates/foo/src/lib.rs", &roots, 1), "crates");
876 }
877
878 #[test]
879 fn module_key_non_root() {
880 let roots = vec!["crates".into()];
881 assert_eq!(module_key("src/lib.rs", &roots, 2), "src");
882 assert_eq!(module_key("tools/gen.rs", &roots, 2), "tools");
883 }
884
885 #[test]
886 fn module_key_depth_overflow_does_not_include_filename() {
887 let roots = vec!["crates".into()];
888 assert_eq!(module_key("crates/foo.rs", &roots, 2), "crates");
890 assert_eq!(
892 module_key("crates/foo/src/lib.rs", &roots, 10),
893 "crates/foo/src"
894 );
895 }
896
897 #[test]
898 fn normalize_path_strips_prefix() {
899 let p = PathBuf::from("C:/Code/Repo/src/main.rs");
900 let prefix = PathBuf::from("C:/Code/Repo");
901 let got = normalize_path(&p, Some(&prefix));
902 assert_eq!(got, "src/main.rs");
903 }
904
905 #[test]
906 fn normalize_path_normalization_slashes() {
907 let p = PathBuf::from(r"C:\Code\Repo\src\main.rs");
908 let got = normalize_path(&p, None);
909 assert_eq!(got, "C:/Code/Repo/src/main.rs");
910 }
911
912 mod normalize_properties {
913 use super::*;
914 use proptest::prelude::*;
915
916 fn arb_path_component() -> impl Strategy<Value = String> {
917 "[a-zA-Z0-9_.-]+"
918 }
919
920 fn arb_path(max_depth: usize) -> impl Strategy<Value = String> {
921 prop::collection::vec(arb_path_component(), 1..=max_depth)
922 .prop_map(|comps| comps.join("/"))
923 }
924
925 proptest! {
926 #[test]
927 fn normalize_path_is_idempotent(path in arb_path(5)) {
928 let p = PathBuf::from(&path);
929 let norm1 = normalize_path(&p, None);
930 let p2 = PathBuf::from(&norm1);
931 let norm2 = normalize_path(&p2, None);
932 prop_assert_eq!(norm1, norm2);
933 }
934
935 #[test]
936 fn normalize_path_handles_windows_separators(path in arb_path(5)) {
937 let win_path = path.replace('/', "\\");
938 let p_win = PathBuf::from(&win_path);
939 let p_unix = PathBuf::from(&path);
940
941 let norm_win = normalize_path(&p_win, None);
942 let norm_unix = normalize_path(&p_unix, None);
943
944 prop_assert_eq!(norm_win, norm_unix);
945 }
946
947 #[test]
948 fn normalize_path_no_leading_slash(path in arb_path(5)) {
949 let p = PathBuf::from(&path);
950 let norm = normalize_path(&p, None);
951 prop_assert!(!norm.starts_with('/'));
952 }
953
954 #[test]
955 fn normalize_path_no_leading_dot_slash(path in arb_path(5)) {
956 let p = PathBuf::from(&path);
957 let norm = normalize_path(&p, None);
958 prop_assert!(!norm.starts_with("./"));
959 }
960
961 #[test]
962 fn module_key_deterministic(
963 path in arb_path(5),
964 roots in prop::collection::vec(arb_path_component(), 1..3),
965 depth in 1usize..5
966 ) {
967 let k1 = module_key(&path, &roots, depth);
968 let k2 = module_key(&path, &roots, depth);
969 prop_assert_eq!(k1, k2);
970 }
971 }
972 }
973
974 mod fold_properties {
976 use super::*;
977 use proptest::prelude::*;
978
979 fn arb_lang_row() -> impl Strategy<Value = LangRow> {
980 (
981 "[a-zA-Z]+",
982 0usize..10000,
983 0usize..20000,
984 0usize..1000,
985 0usize..1000000,
986 0usize..100000,
987 )
988 .prop_map(|(lang, code, lines, files, bytes, tokens)| {
989 let avg_lines = (lines + (files / 2)).checked_div(files).unwrap_or(0);
990 LangRow {
991 lang,
992 code,
993 lines,
994 files,
995 bytes,
996 tokens,
997 avg_lines,
998 }
999 })
1000 }
1001
1002 fn arb_module_row() -> impl Strategy<Value = ModuleRow> {
1003 (
1004 "[a-zA-Z0-9_/]+",
1005 0usize..10000,
1006 0usize..20000,
1007 0usize..1000,
1008 0usize..1000000,
1009 0usize..100000,
1010 )
1011 .prop_map(|(module, code, lines, files, bytes, tokens)| {
1012 let avg_lines = (lines + (files / 2)).checked_div(files).unwrap_or(0);
1013 ModuleRow {
1014 module,
1015 code,
1016 lines,
1017 files,
1018 bytes,
1019 tokens,
1020 avg_lines,
1021 }
1022 })
1023 }
1024
1025 proptest! {
1026 #[test]
1027 fn fold_lang_preserves_totals(rows in prop::collection::vec(arb_lang_row(), 0..10)) {
1028 let folded = fold_other_lang(&rows);
1029
1030 let total_code: usize = rows.iter().map(|r| r.code).sum();
1031 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
1032 let total_files: usize = rows.iter().map(|r| r.files).sum();
1033 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
1034 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
1035
1036 prop_assert_eq!(folded.code, total_code, "Code mismatch");
1037 prop_assert_eq!(folded.lines, total_lines, "Lines mismatch");
1038 prop_assert_eq!(folded.files, total_files, "Files mismatch");
1039 prop_assert_eq!(folded.bytes, total_bytes, "Bytes mismatch");
1040 prop_assert_eq!(folded.tokens, total_tokens, "Tokens mismatch");
1041 }
1042
1043 #[test]
1044 fn fold_lang_empty_is_zero(_dummy in 0..1u8) {
1045 let folded = fold_other_lang(&[]);
1046 prop_assert_eq!(folded.code, 0);
1047 prop_assert_eq!(folded.lines, 0);
1048 prop_assert_eq!(folded.files, 0);
1049 prop_assert_eq!(folded.bytes, 0);
1050 prop_assert_eq!(folded.tokens, 0);
1051 prop_assert_eq!(folded.lang, "Other");
1052 }
1053
1054 #[test]
1055 fn fold_module_preserves_totals(rows in prop::collection::vec(arb_module_row(), 0..10)) {
1056 let folded = fold_other_module(&rows);
1057
1058 let total_code: usize = rows.iter().map(|r| r.code).sum();
1059 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
1060 let total_files: usize = rows.iter().map(|r| r.files).sum();
1061 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
1062 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
1063
1064 prop_assert_eq!(folded.code, total_code, "Code mismatch");
1065 prop_assert_eq!(folded.lines, total_lines, "Lines mismatch");
1066 prop_assert_eq!(folded.files, total_files, "Files mismatch");
1067 prop_assert_eq!(folded.bytes, total_bytes, "Bytes mismatch");
1068 prop_assert_eq!(folded.tokens, total_tokens, "Tokens mismatch");
1069 }
1070
1071 #[test]
1072 fn fold_module_empty_is_zero(_dummy in 0..1u8) {
1073 let folded = fold_other_module(&[]);
1074 prop_assert_eq!(folded.code, 0);
1075 prop_assert_eq!(folded.lines, 0);
1076 prop_assert_eq!(folded.files, 0);
1077 prop_assert_eq!(folded.bytes, 0);
1078 prop_assert_eq!(folded.tokens, 0);
1079 prop_assert_eq!(folded.module, "Other");
1080 }
1081
1082 #[test]
1083 fn fold_associative_lang(
1084 rows1 in prop::collection::vec(arb_lang_row(), 0..5),
1085 rows2 in prop::collection::vec(arb_lang_row(), 0..5)
1086 ) {
1087 let all: Vec<_> = rows1.iter().chain(rows2.iter()).cloned().collect();
1089 let fold_all = fold_other_lang(&all);
1090
1091 let fold1 = fold_other_lang(&rows1);
1092 let fold2 = fold_other_lang(&rows2);
1093 let combined = fold_other_lang(&[fold1, fold2]);
1094
1095 prop_assert_eq!(fold_all.code, combined.code);
1096 prop_assert_eq!(fold_all.lines, combined.lines);
1097 prop_assert_eq!(fold_all.files, combined.files);
1098 prop_assert_eq!(fold_all.bytes, combined.bytes);
1099 prop_assert_eq!(fold_all.tokens, combined.tokens);
1100 }
1101 }
1102 }
1103
1104 #[test]
1105 fn test_looks_like_env_assignment() {
1106 assert!(looks_like_env_assignment("FOO=bar"));
1107 assert!(looks_like_env_assignment("_FOO=bar"));
1108 assert!(looks_like_env_assignment("A_B_C=123"));
1109
1110 assert!(!looks_like_env_assignment("="));
1111 assert!(!looks_like_env_assignment("=bar"));
1112 assert!(!looks_like_env_assignment("1FOO=bar"));
1113 assert!(!looks_like_env_assignment("FOO-BAR=baz"));
1114 }
1115
1116 #[test]
1117 fn test_env_interpreter_token() {
1118 assert_eq!(
1120 env_interpreter_token(vec!["python"].into_iter()),
1121 Some("python")
1122 );
1123
1124 assert_eq!(
1126 env_interpreter_token(vec!["FOO=bar", "python"].into_iter()),
1127 Some("python")
1128 );
1129
1130 assert_eq!(
1132 env_interpreter_token(vec!["-S", "-i", "python"].into_iter()),
1133 Some("python")
1134 );
1135
1136 assert_eq!(
1138 env_interpreter_token(vec!["-u", "FOO", "-C", "/tmp", "python"].into_iter()),
1139 Some("python")
1140 );
1141 assert_eq!(
1142 env_interpreter_token(vec!["--unset", "FOO", "python"].into_iter()),
1143 Some("python")
1144 );
1145
1146 assert_eq!(
1148 env_interpreter_token(vec!["--unset=FOO", "python"].into_iter()),
1149 Some("python")
1150 );
1151 assert_eq!(
1152 env_interpreter_token(vec!["--chdir=/tmp", "python"].into_iter()),
1153 Some("python")
1154 );
1155 assert_eq!(
1156 env_interpreter_token(vec!["--default-path=/bin", "python"].into_iter()),
1157 Some("python")
1158 );
1159 assert_eq!(
1160 env_interpreter_token(vec!["--argv0=sh", "python"].into_iter()),
1161 Some("python")
1162 );
1163 assert_eq!(
1164 env_interpreter_token(vec!["--default-signal=SIGINT", "python"].into_iter()),
1165 Some("python")
1166 );
1167 assert_eq!(
1168 env_interpreter_token(vec!["--ignore-signal=SIGINT", "python"].into_iter()),
1169 Some("python")
1170 );
1171 assert_eq!(
1172 env_interpreter_token(vec!["--block-signal=SIGINT", "python"].into_iter()),
1173 Some("python")
1174 );
1175
1176 assert_eq!(
1178 env_interpreter_token(vec!["--unknown-flag", "python"].into_iter()),
1179 Some("python")
1180 );
1181
1182 assert_eq!(
1184 env_interpreter_token(vec!["", "python"].into_iter()),
1185 Some("python")
1186 );
1187
1188 assert_eq!(env_interpreter_token(vec!["FOO=bar"].into_iter()), None);
1190 }
1191}