1use std::borrow::Cow;
20use std::collections::{BTreeMap, BTreeSet};
21use std::fs;
22use std::path::{Path, PathBuf};
23
24use tokei::{CodeStats, Config, LanguageType, Languages};
25use tokmd_module_key::module_key_from_normalized;
26use tokmd_types::{
27 ChildIncludeMode, ChildrenMode, ExportData, FileKind, FileRow, LangReport, LangRow,
28 ModuleReport, ModuleRow, Totals,
29};
30
31const CHARS_PER_TOKEN: usize = 4;
33
34#[derive(Default, Clone, Copy)]
35struct Agg {
36 code: usize,
37 comments: usize,
38 blanks: usize,
39 bytes: usize,
40 tokens: usize,
41}
42
43#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
44struct Key {
45 path: String,
46 lang: String,
47 kind: FileKind,
48}
49
50pub struct InMemoryRowInput<'a> {
52 pub logical_path: &'a Path,
53 pub bytes: &'a [u8],
54}
55
56impl<'a> InMemoryRowInput<'a> {
57 #[must_use]
58 pub fn new(logical_path: &'a Path, bytes: &'a [u8]) -> Self {
59 Self {
60 logical_path,
61 bytes,
62 }
63 }
64}
65
66fn get_file_metrics(path: &Path) -> (usize, usize) {
67 let bytes = fs::metadata(path).map(|m| m.len() as usize).unwrap_or(0);
71 metrics_from_byte_len(bytes)
72}
73
74fn metrics_from_bytes(bytes: &[u8]) -> (usize, usize) {
75 metrics_from_byte_len(bytes.len())
76}
77
78fn metrics_from_byte_len(bytes: usize) -> (usize, usize) {
79 let tokens = bytes / CHARS_PER_TOKEN;
80 (bytes, tokens)
81}
82
83fn synthetic_detection_path(logical_path: &Path) -> PathBuf {
84 let mut path = PathBuf::from("__tokmd_in_memory_detection__");
85 path.push(logical_path.file_name().unwrap_or(logical_path.as_os_str()));
86 path
87}
88
89fn language_from_in_memory_shebang(bytes: &[u8]) -> Option<LanguageType> {
90 const READ_LIMIT: usize = 128;
91
92 let first_line = bytes[..bytes.len().min(READ_LIMIT)]
93 .split(|b| *b == b'\n')
94 .next()?;
95 let first_line = std::str::from_utf8(first_line).ok()?;
96
97 let direct = LanguageType::list()
98 .iter()
99 .map(|(lang, _)| *lang)
100 .find(|lang| lang.shebangs().contains(&first_line));
101 if direct.is_some() {
102 return direct;
103 }
104
105 let mut words = first_line.split_whitespace();
106 if words.next() == Some("#!/usr/bin/env") {
107 let interpreter = env_interpreter_token(words)?;
108 return language_from_env_interpreter(interpreter);
109 }
110
111 None
112}
113
114fn env_interpreter_token<'a>(words: impl Iterator<Item = &'a str>) -> Option<&'a str> {
115 let mut skip_next = false;
116
117 for word in words {
118 if skip_next {
119 skip_next = false;
120 continue;
121 }
122
123 if word.is_empty() {
124 continue;
125 }
126
127 if looks_like_env_assignment(word) {
128 continue;
129 }
130
131 match word {
132 "-S" | "--split-string" | "-i" | "--ignore-environment" => continue,
133 "-u" | "--unset" | "-C" | "--chdir" | "-P" | "--default-path" | "-a" | "--argv0"
134 | "--default-signal" | "--ignore-signal" | "--block-signal" => {
135 skip_next = true;
136 continue;
137 }
138 _ if word.starts_with("--unset=")
139 || word.starts_with("--chdir=")
140 || word.starts_with("--default-path=")
141 || word.starts_with("--argv0=")
142 || word.starts_with("--default-signal=")
143 || word.starts_with("--ignore-signal=")
144 || word.starts_with("--block-signal=") =>
145 {
146 continue;
147 }
148 _ if word.starts_with('-') => continue,
149 _ => return Some(word),
150 }
151 }
152
153 None
154}
155
156fn looks_like_env_assignment(word: &str) -> bool {
157 let Some((name, _)) = word.split_once('=') else {
158 return false;
159 };
160
161 if name.is_empty() {
162 return false;
163 }
164
165 let mut chars = name.chars();
166 let Some(first) = chars.next() else {
167 return false;
168 };
169
170 if !(first == '_' || first.is_ascii_alphabetic()) {
171 return false;
172 }
173
174 chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric())
175}
176
177fn language_from_env_interpreter(interpreter: &str) -> Option<LanguageType> {
178 let token = interpreter
179 .rsplit('/')
180 .next()
181 .unwrap_or(interpreter)
182 .trim_start_matches('-');
185
186 if token.starts_with("python") {
187 return LanguageType::from_file_extension("py");
188 }
189
190 match token {
191 "bash" | "sh" | "zsh" | "ksh" | "fish" => LanguageType::from_name("Bash"),
192 "node" | "nodejs" => LanguageType::from_name("JavaScript"),
193 "ruby" => LanguageType::from_name("Ruby"),
194 "perl" | "perl5" => LanguageType::from_name("Perl"),
195 "php" => LanguageType::from_name("PHP"),
196 "pwsh" | "powershell" => LanguageType::from_name("PowerShell"),
197 _ => None,
198 }
199}
200
201fn detect_in_memory_language(
202 logical_path: &Path,
203 bytes: &[u8],
204 config: &Config,
205) -> Option<LanguageType> {
206 let detection_path = synthetic_detection_path(logical_path);
207 LanguageType::from_path(&detection_path, config)
208 .or_else(|| language_from_in_memory_shebang(bytes))
209}
210
211fn insert_row(
212 map: &mut BTreeMap<Key, (String, Agg)>,
213 key: Key,
214 module: String,
215 stats: &CodeStats,
216 bytes: usize,
217 tokens: usize,
218) {
219 let entry = map.entry(key).or_insert_with(|| (module, Agg::default()));
220 entry.1.code += stats.code;
221 entry.1.comments += stats.comments;
222 entry.1.blanks += stats.blanks;
223 entry.1.bytes += bytes;
224 entry.1.tokens += tokens;
225}
226
227fn rows_from_map(map: BTreeMap<Key, (String, Agg)>) -> Vec<FileRow> {
228 map.into_iter()
229 .map(|(key, (module, agg))| {
230 let lines = agg.code + agg.comments + agg.blanks;
231 FileRow {
232 path: key.path,
233 module,
234 lang: key.lang,
235 kind: key.kind,
236 code: agg.code,
237 comments: agg.comments,
238 blanks: agg.blanks,
239 lines,
240 bytes: agg.bytes,
241 tokens: agg.tokens,
242 }
243 })
244 .collect()
245}
246
247pub fn collect_in_memory_file_rows(
252 inputs: &[InMemoryRowInput<'_>],
253 module_roots: &[String],
254 module_depth: usize,
255 children: ChildIncludeMode,
256 config: &Config,
257) -> Vec<FileRow> {
258 let mut map: BTreeMap<Key, (String, Agg)> = BTreeMap::new();
259
260 for input in inputs {
261 let Some(lang_type) = detect_in_memory_language(input.logical_path, input.bytes, config)
262 else {
263 continue;
264 };
265
266 let path = normalize_path(input.logical_path, None);
267 let module = module_key_from_normalized(&path, module_roots, module_depth);
268 let stats = lang_type.parse_from_slice(input.bytes, config);
269 let summary = stats.summarise();
270 let (bytes, tokens) = metrics_from_bytes(input.bytes);
271
272 insert_row(
273 &mut map,
274 Key {
275 path: path.clone(),
276 lang: lang_type.name().to_string(),
277 kind: FileKind::Parent,
278 },
279 module.clone(),
280 &summary,
281 bytes,
282 tokens,
283 );
284
285 if children == ChildIncludeMode::Separate {
286 for (child_type, child_stats) in &stats.blobs {
287 let child_summary = child_stats.summarise();
288 insert_row(
289 &mut map,
290 Key {
291 path: path.clone(),
292 lang: child_type.name().to_string(),
293 kind: FileKind::Child,
294 },
295 module.clone(),
296 &child_summary,
297 0,
298 0,
299 );
300 }
301 }
302 }
303
304 rows_from_map(map)
305}
306
307pub fn create_lang_report(
308 languages: &Languages,
309 top: usize,
310 with_files: bool,
311 children: ChildrenMode,
312) -> LangReport {
313 let rows = collect_file_rows(languages, &[], 1, ChildIncludeMode::Separate, None);
314 create_lang_report_from_rows(&rows, top, with_files, children)
315}
316
317pub fn create_lang_report_from_rows(
318 file_rows: &[FileRow],
319 top: usize,
320 with_files: bool,
321 children: ChildrenMode,
322) -> LangReport {
323 #[derive(Default)]
324 struct LangAgg {
325 code: usize,
326 lines: usize,
327 bytes: usize,
328 tokens: usize,
329 }
330
331 let parent_lang_by_path: BTreeMap<&str, &str> = file_rows
332 .iter()
333 .filter(|row| row.kind == FileKind::Parent)
334 .map(|row| (row.path.as_str(), row.lang.as_str()))
335 .collect();
336 let mut child_totals_by_path: BTreeMap<&str, (usize, usize)> = BTreeMap::new();
337 for row in file_rows.iter().filter(|row| row.kind == FileKind::Child) {
338 let entry = child_totals_by_path.entry(row.path.as_str()).or_default();
339 entry.0 += row.code;
340 entry.1 += row.lines;
341 }
342
343 let mut by_lang: BTreeMap<String, (LangAgg, BTreeSet<&str>)> = BTreeMap::new();
344
345 for row in file_rows {
346 match (children, row.kind) {
347 (ChildrenMode::Collapse, FileKind::Parent) => {
348 let entry = by_lang
349 .entry(row.lang.clone())
350 .or_insert_with(|| (LangAgg::default(), BTreeSet::new()));
351 entry.0.code += row.code;
352 entry.0.lines += row.lines;
353 entry.0.bytes += row.bytes;
354 entry.0.tokens += row.tokens;
355 entry.1.insert(row.path.as_str());
356 }
357 (ChildrenMode::Collapse, FileKind::Child) => {
358 if !parent_lang_by_path.contains_key(row.path.as_str()) {
359 let entry = by_lang
360 .entry(row.lang.clone())
361 .or_insert_with(|| (LangAgg::default(), BTreeSet::new()));
362 entry.0.code += row.code;
363 entry.0.lines += row.lines;
364 entry.1.insert(row.path.as_str());
365 }
366 }
367 (ChildrenMode::Separate, FileKind::Parent) => {
368 let (child_code, child_lines) = child_totals_by_path
369 .get(row.path.as_str())
370 .copied()
371 .unwrap_or((0, 0));
372
373 let entry = by_lang
374 .entry(row.lang.clone())
375 .or_insert_with(|| (LangAgg::default(), BTreeSet::new()));
376 entry.0.code += row.code.saturating_sub(child_code);
377 entry.0.lines += row.lines.saturating_sub(child_lines);
378 entry.0.bytes += row.bytes;
379 entry.0.tokens += row.tokens;
380 entry.1.insert(row.path.as_str());
381 }
382 (ChildrenMode::Separate, FileKind::Child) => {
383 let entry = by_lang
384 .entry(format!("{} (embedded)", row.lang))
385 .or_insert_with(|| (LangAgg::default(), BTreeSet::new()));
386 entry.0.code += row.code;
387 entry.0.lines += row.lines;
388 entry.1.insert(row.path.as_str());
389 }
390 }
391 }
392
393 let mut rows: Vec<LangRow> = Vec::with_capacity(by_lang.len());
394 for (lang, (agg, files_set)) in by_lang {
395 if agg.code == 0 {
396 continue;
397 }
398 let files = files_set.len();
399 rows.push(LangRow {
400 lang: lang.to_string(),
401 code: agg.code,
402 lines: agg.lines,
403 files,
404 bytes: agg.bytes,
405 tokens: agg.tokens,
406 avg_lines: avg(agg.lines, files),
407 });
408 }
409
410 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.lang.cmp(&b.lang)));
411
412 let total_code: usize = rows.iter().map(|r| r.code).sum();
413 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
414 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
415 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
416 let total_files = unique_parent_file_count_from_rows(file_rows);
417
418 let total = Totals {
419 code: total_code,
420 lines: total_lines,
421 files: total_files,
422 bytes: total_bytes,
423 tokens: total_tokens,
424 avg_lines: avg(total_lines, total_files),
425 };
426
427 if top > 0 && rows.len() > top {
428 let other = fold_other_lang(&rows[top..]);
429 rows.truncate(top);
430 rows.push(other);
431 }
432
433 LangReport {
434 rows,
435 total,
436 with_files,
437 children,
438 top,
439 }
440}
441
442fn fold_other_lang(rows: &[LangRow]) -> LangRow {
443 let mut code = 0usize;
444 let mut lines = 0usize;
445 let mut files = 0usize;
446 let mut bytes = 0usize;
447 let mut tokens = 0usize;
448
449 for r in rows {
450 code += r.code;
451 lines += r.lines;
452 files += r.files;
453 bytes += r.bytes;
454 tokens += r.tokens;
455 }
456
457 LangRow {
458 lang: "Other".to_string(),
459 code,
460 lines,
461 files,
462 bytes,
463 tokens,
464 avg_lines: avg(lines, files),
465 }
466}
467
468pub fn create_module_report(
469 languages: &Languages,
470 module_roots: &[String],
471 module_depth: usize,
472 children: ChildIncludeMode,
473 top: usize,
474) -> ModuleReport {
475 let file_rows = collect_file_rows(languages, module_roots, module_depth, children, None);
476 create_module_report_from_rows(&file_rows, module_roots, module_depth, children, top)
477}
478
479pub fn create_module_report_from_rows(
480 file_rows: &[FileRow],
481 module_roots: &[String],
482 module_depth: usize,
483 children: ChildIncludeMode,
484 top: usize,
485) -> ModuleReport {
486 #[derive(Default)]
487 struct Agg {
488 code: usize,
489 lines: usize,
490 bytes: usize,
491 tokens: usize,
492 }
493
494 let mut by_module: BTreeMap<&str, (Agg, BTreeSet<&str>)> = BTreeMap::new();
495 let mut total_code = 0;
496 let mut total_lines = 0;
497 let mut total_bytes = 0;
498 let mut total_tokens = 0;
499
500 for r in file_rows {
501 total_code += r.code;
502 total_lines += r.lines;
503 total_bytes += r.bytes;
504 total_tokens += r.tokens;
505
506 let entry = by_module
507 .entry(r.module.as_str())
508 .or_insert_with(|| (Agg::default(), BTreeSet::new()));
509 entry.0.code += r.code;
510 entry.0.lines += r.lines;
511 entry.0.bytes += r.bytes;
512 entry.0.tokens += r.tokens;
513
514 if r.kind == FileKind::Parent {
515 entry.1.insert(r.path.as_str());
516 }
517 }
518
519 let mut rows: Vec<ModuleRow> = Vec::with_capacity(by_module.len());
520 for (module, (agg, files_set)) in by_module {
521 let files = files_set.len();
522 rows.push(ModuleRow {
523 module: module.to_string(),
524 code: agg.code,
525 lines: agg.lines,
526 files,
527 bytes: agg.bytes,
528 tokens: agg.tokens,
529 avg_lines: avg(agg.lines, files),
530 });
531 }
532
533 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.module.cmp(&b.module)));
535
536 if top > 0 && rows.len() > top {
537 let other = fold_other_module(&rows[top..]);
538 rows.truncate(top);
539 rows.push(other);
540 }
541
542 let total_files = unique_parent_file_count_from_rows(file_rows);
543
544 let total = Totals {
545 code: total_code,
546 lines: total_lines,
547 files: total_files,
548 bytes: total_bytes,
549 tokens: total_tokens,
550 avg_lines: avg(total_lines, total_files),
551 };
552
553 ModuleReport {
554 rows,
555 total,
556 module_roots: module_roots.to_vec(),
557 module_depth,
558 children,
559 top,
560 }
561}
562
563fn fold_other_module(rows: &[ModuleRow]) -> ModuleRow {
564 let mut code = 0usize;
565 let mut lines = 0usize;
566 let mut files = 0usize;
567 let mut bytes = 0usize;
568 let mut tokens = 0usize;
569
570 for r in rows {
571 code += r.code;
572 lines += r.lines;
573 files += r.files;
574 bytes += r.bytes;
575 tokens += r.tokens;
576 }
577
578 ModuleRow {
579 module: "Other".to_string(),
580 code,
581 lines,
582 files,
583 bytes,
584 tokens,
585 avg_lines: avg(lines, files),
586 }
587}
588
589pub fn create_export_data(
590 languages: &Languages,
591 module_roots: &[String],
592 module_depth: usize,
593 children: ChildIncludeMode,
594 strip_prefix: Option<&Path>,
595 min_code: usize,
596 max_rows: usize,
597) -> ExportData {
598 let rows = collect_file_rows(
599 languages,
600 module_roots,
601 module_depth,
602 children,
603 strip_prefix,
604 );
605 create_export_data_from_rows(
606 rows,
607 module_roots,
608 module_depth,
609 children,
610 min_code,
611 max_rows,
612 )
613}
614
615pub fn create_export_data_from_rows(
616 mut rows: Vec<FileRow>,
617 module_roots: &[String],
618 module_depth: usize,
619 children: ChildIncludeMode,
620 min_code: usize,
621 max_rows: usize,
622) -> ExportData {
623 if min_code > 0 {
625 rows.retain(|r| r.code >= min_code);
626 }
627 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.path.cmp(&b.path)));
628
629 if max_rows > 0 && rows.len() > max_rows {
630 rows.truncate(max_rows);
631 }
632
633 ExportData {
634 rows,
635 module_roots: module_roots.to_vec(),
636 module_depth,
637 children,
638 }
639}
640
641pub fn collect_file_rows(
646 languages: &Languages,
647 module_roots: &[String],
648 module_depth: usize,
649 children: ChildIncludeMode,
650 strip_prefix: Option<&Path>,
651) -> Vec<FileRow> {
652 let mut map: BTreeMap<Key, (String , Agg)> = BTreeMap::new();
653
654 for (lang_type, lang) in languages.iter() {
656 for report in &lang.reports {
657 let path = normalize_path(&report.name, strip_prefix);
658 let module = module_key_from_normalized(&path, module_roots, module_depth);
659 let st = report.stats.summarise();
660 let (bytes, tokens) = get_file_metrics(&report.name);
661 insert_row(
662 &mut map,
663 Key {
664 path,
665 lang: lang_type.name().to_string(),
666 kind: FileKind::Parent,
667 },
668 module,
669 &st,
670 bytes,
671 tokens,
672 );
673 }
674 }
675
676 if children == ChildIncludeMode::Separate {
677 for (_lang_type, lang) in languages.iter() {
678 for (child_type, reports) in &lang.children {
679 for report in reports {
680 let path = normalize_path(&report.name, strip_prefix);
681 let module = module_key_from_normalized(&path, module_roots, module_depth);
682 let st = report.stats.summarise();
683 insert_row(
684 &mut map,
685 Key {
686 path,
687 lang: child_type.name().to_string(),
688 kind: FileKind::Child,
689 },
690 module,
691 &st,
692 0,
693 0,
694 );
695 }
696 }
697 }
698 }
699
700 rows_from_map(map)
701}
702
703pub fn unique_parent_file_count(languages: &Languages) -> usize {
704 let rows = collect_file_rows(languages, &[], 1, ChildIncludeMode::ParentsOnly, None);
705 unique_parent_file_count_from_rows(&rows)
706}
707
708pub fn unique_parent_file_count_from_rows(file_rows: &[FileRow]) -> usize {
709 file_rows
710 .iter()
711 .filter(|row| row.kind == FileKind::Parent)
712 .map(|row| row.path.as_str())
713 .collect::<BTreeSet<_>>()
714 .len()
715}
716
717pub fn avg(lines: usize, files: usize) -> usize {
733 if files == 0 {
734 return 0;
735 }
736 (lines + (files / 2)) / files
738}
739
740pub fn normalize_path(path: &Path, strip_prefix: Option<&Path>) -> String {
762 let s_cow = path.to_string_lossy();
763 let s: Cow<str> = if s_cow.contains('\\') {
764 Cow::Owned(s_cow.replace('\\', "/"))
765 } else {
766 s_cow
767 };
768
769 let mut slice: &str = &s;
770
771 if let Some(stripped) = slice.strip_prefix("./") {
773 slice = stripped;
774 }
775
776 if let Some(prefix) = strip_prefix {
777 let p_cow = prefix.to_string_lossy();
778 let p_cow_stripped: Cow<str> = if let Some(stripped) = p_cow.strip_prefix("./") {
780 Cow::Borrowed(stripped)
781 } else {
782 p_cow
783 };
784
785 let needs_replace = p_cow_stripped.contains('\\');
786 let needs_slash = !p_cow_stripped.ends_with('/');
787
788 if !needs_replace && !needs_slash {
789 if slice.starts_with(p_cow_stripped.as_ref()) {
791 slice = &slice[p_cow_stripped.len()..];
792 }
793 } else {
794 let mut pfx = if needs_replace {
796 p_cow_stripped.replace('\\', "/")
797 } else {
798 p_cow_stripped.into_owned()
799 };
800 if needs_slash {
801 pfx.push('/');
802 }
803 if slice.starts_with(&pfx) {
804 slice = &slice[pfx.len()..];
805 }
806 }
807 }
808
809 slice = slice.trim_start_matches('/');
810
811 if let Some(stripped) = slice.strip_prefix("./") {
813 slice = stripped;
814 }
815 slice = slice.trim_start_matches('/');
816
817 if slice.len() == s.len() {
818 s.into_owned()
819 } else {
820 slice.to_string()
821 }
822}
823
824pub fn module_key(path: &str, module_roots: &[String], module_depth: usize) -> String {
842 tokmd_module_key::module_key(path, module_roots, module_depth)
843}
844
845#[cfg(test)]
846mod tests {
847 use super::*;
848 use std::path::PathBuf;
849
850 #[test]
851 fn module_key_root_level_file() {
852 assert_eq!(module_key("Cargo.toml", &["crates".into()], 2), "(root)");
853 assert_eq!(module_key("./Cargo.toml", &["crates".into()], 2), "(root)");
854 }
855
856 #[test]
857 fn module_key_crates_depth_2() {
858 let roots = vec!["crates".into(), "packages".into()];
859 assert_eq!(module_key("crates/foo/src/lib.rs", &roots, 2), "crates/foo");
860 assert_eq!(
861 module_key("packages/bar/src/main.rs", &roots, 2),
862 "packages/bar"
863 );
864 }
865
866 #[test]
867 fn module_key_crates_depth_1() {
868 let roots = vec!["crates".into(), "packages".into()];
869 assert_eq!(module_key("crates/foo/src/lib.rs", &roots, 1), "crates");
870 }
871
872 #[test]
873 fn module_key_non_root() {
874 let roots = vec!["crates".into()];
875 assert_eq!(module_key("src/lib.rs", &roots, 2), "src");
876 assert_eq!(module_key("tools/gen.rs", &roots, 2), "tools");
877 }
878
879 #[test]
880 fn module_key_depth_overflow_does_not_include_filename() {
881 let roots = vec!["crates".into()];
882 assert_eq!(module_key("crates/foo.rs", &roots, 2), "crates");
884 assert_eq!(
886 module_key("crates/foo/src/lib.rs", &roots, 10),
887 "crates/foo/src"
888 );
889 }
890
891 #[test]
892 fn normalize_path_strips_prefix() {
893 let p = PathBuf::from("C:/Code/Repo/src/main.rs");
894 let prefix = PathBuf::from("C:/Code/Repo");
895 let got = normalize_path(&p, Some(&prefix));
896 assert_eq!(got, "src/main.rs");
897 }
898
899 #[test]
900 fn normalize_path_normalization_slashes() {
901 let p = PathBuf::from(r"C:\Code\Repo\src\main.rs");
902 let got = normalize_path(&p, None);
903 assert_eq!(got, "C:/Code/Repo/src/main.rs");
904 }
905
906 mod normalize_properties {
907 use super::*;
908 use proptest::prelude::*;
909
910 fn arb_path_component() -> impl Strategy<Value = String> {
911 "[a-zA-Z0-9_.-]+"
912 }
913
914 fn arb_path(max_depth: usize) -> impl Strategy<Value = String> {
915 prop::collection::vec(arb_path_component(), 1..=max_depth)
916 .prop_map(|comps| comps.join("/"))
917 }
918
919 proptest! {
920 #[test]
921 fn normalize_path_is_idempotent(path in arb_path(5)) {
922 let p = PathBuf::from(&path);
923 let norm1 = normalize_path(&p, None);
924 let p2 = PathBuf::from(&norm1);
925 let norm2 = normalize_path(&p2, None);
926 prop_assert_eq!(norm1, norm2);
927 }
928
929 #[test]
930 fn normalize_path_handles_windows_separators(path in arb_path(5)) {
931 let win_path = path.replace('/', "\\");
932 let p_win = PathBuf::from(&win_path);
933 let p_unix = PathBuf::from(&path);
934
935 let norm_win = normalize_path(&p_win, None);
936 let norm_unix = normalize_path(&p_unix, None);
937
938 prop_assert_eq!(norm_win, norm_unix);
939 }
940
941 #[test]
942 fn normalize_path_no_leading_slash(path in arb_path(5)) {
943 let p = PathBuf::from(&path);
944 let norm = normalize_path(&p, None);
945 prop_assert!(!norm.starts_with('/'));
946 }
947
948 #[test]
949 fn normalize_path_no_leading_dot_slash(path in arb_path(5)) {
950 let p = PathBuf::from(&path);
951 let norm = normalize_path(&p, None);
952 prop_assert!(!norm.starts_with("./"));
953 }
954
955 #[test]
956 fn module_key_deterministic(
957 path in arb_path(5),
958 roots in prop::collection::vec(arb_path_component(), 1..3),
959 depth in 1usize..5
960 ) {
961 let k1 = module_key(&path, &roots, depth);
962 let k2 = module_key(&path, &roots, depth);
963 prop_assert_eq!(k1, k2);
964 }
965 }
966 }
967
968 mod fold_properties {
970 use super::*;
971 use proptest::prelude::*;
972
973 fn arb_lang_row() -> impl Strategy<Value = LangRow> {
974 (
975 "[a-zA-Z]+",
976 0usize..10000,
977 0usize..20000,
978 0usize..1000,
979 0usize..1000000,
980 0usize..100000,
981 )
982 .prop_map(|(lang, code, lines, files, bytes, tokens)| {
983 let avg_lines = (lines + (files / 2)).checked_div(files).unwrap_or(0);
984 LangRow {
985 lang,
986 code,
987 lines,
988 files,
989 bytes,
990 tokens,
991 avg_lines,
992 }
993 })
994 }
995
996 fn arb_module_row() -> impl Strategy<Value = ModuleRow> {
997 (
998 "[a-zA-Z0-9_/]+",
999 0usize..10000,
1000 0usize..20000,
1001 0usize..1000,
1002 0usize..1000000,
1003 0usize..100000,
1004 )
1005 .prop_map(|(module, code, lines, files, bytes, tokens)| {
1006 let avg_lines = (lines + (files / 2)).checked_div(files).unwrap_or(0);
1007 ModuleRow {
1008 module,
1009 code,
1010 lines,
1011 files,
1012 bytes,
1013 tokens,
1014 avg_lines,
1015 }
1016 })
1017 }
1018
1019 proptest! {
1020 #[test]
1021 fn fold_lang_preserves_totals(rows in prop::collection::vec(arb_lang_row(), 0..10)) {
1022 let folded = fold_other_lang(&rows);
1023
1024 let total_code: usize = rows.iter().map(|r| r.code).sum();
1025 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
1026 let total_files: usize = rows.iter().map(|r| r.files).sum();
1027 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
1028 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
1029
1030 prop_assert_eq!(folded.code, total_code, "Code mismatch");
1031 prop_assert_eq!(folded.lines, total_lines, "Lines mismatch");
1032 prop_assert_eq!(folded.files, total_files, "Files mismatch");
1033 prop_assert_eq!(folded.bytes, total_bytes, "Bytes mismatch");
1034 prop_assert_eq!(folded.tokens, total_tokens, "Tokens mismatch");
1035 }
1036
1037 #[test]
1038 fn fold_lang_empty_is_zero(_dummy in 0..1u8) {
1039 let folded = fold_other_lang(&[]);
1040 prop_assert_eq!(folded.code, 0);
1041 prop_assert_eq!(folded.lines, 0);
1042 prop_assert_eq!(folded.files, 0);
1043 prop_assert_eq!(folded.bytes, 0);
1044 prop_assert_eq!(folded.tokens, 0);
1045 prop_assert_eq!(folded.lang, "Other");
1046 }
1047
1048 #[test]
1049 fn fold_module_preserves_totals(rows in prop::collection::vec(arb_module_row(), 0..10)) {
1050 let folded = fold_other_module(&rows);
1051
1052 let total_code: usize = rows.iter().map(|r| r.code).sum();
1053 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
1054 let total_files: usize = rows.iter().map(|r| r.files).sum();
1055 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
1056 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
1057
1058 prop_assert_eq!(folded.code, total_code, "Code mismatch");
1059 prop_assert_eq!(folded.lines, total_lines, "Lines mismatch");
1060 prop_assert_eq!(folded.files, total_files, "Files mismatch");
1061 prop_assert_eq!(folded.bytes, total_bytes, "Bytes mismatch");
1062 prop_assert_eq!(folded.tokens, total_tokens, "Tokens mismatch");
1063 }
1064
1065 #[test]
1066 fn fold_module_empty_is_zero(_dummy in 0..1u8) {
1067 let folded = fold_other_module(&[]);
1068 prop_assert_eq!(folded.code, 0);
1069 prop_assert_eq!(folded.lines, 0);
1070 prop_assert_eq!(folded.files, 0);
1071 prop_assert_eq!(folded.bytes, 0);
1072 prop_assert_eq!(folded.tokens, 0);
1073 prop_assert_eq!(folded.module, "Other");
1074 }
1075
1076 #[test]
1077 fn fold_associative_lang(
1078 rows1 in prop::collection::vec(arb_lang_row(), 0..5),
1079 rows2 in prop::collection::vec(arb_lang_row(), 0..5)
1080 ) {
1081 let all: Vec<_> = rows1.iter().chain(rows2.iter()).cloned().collect();
1083 let fold_all = fold_other_lang(&all);
1084
1085 let fold1 = fold_other_lang(&rows1);
1086 let fold2 = fold_other_lang(&rows2);
1087 let combined = fold_other_lang(&[fold1, fold2]);
1088
1089 prop_assert_eq!(fold_all.code, combined.code);
1090 prop_assert_eq!(fold_all.lines, combined.lines);
1091 prop_assert_eq!(fold_all.files, combined.files);
1092 prop_assert_eq!(fold_all.bytes, combined.bytes);
1093 prop_assert_eq!(fold_all.tokens, combined.tokens);
1094 }
1095 }
1096 }
1097}