1use std::borrow::Cow;
20use std::collections::{BTreeMap, BTreeSet};
21use std::fs;
22use std::path::Path;
23
24use tokei::{LanguageType, Languages};
25use tokmd_module_key::module_key_from_normalized;
26use tokmd_types::{
27 ChildIncludeMode, ChildrenMode, ExportData, FileKind, FileRow, LangReport, LangRow,
28 ModuleReport, ModuleRow, Totals,
29};
30
31const CHARS_PER_TOKEN: usize = 4;
33
34fn get_file_metrics(path: &Path) -> (usize, usize) {
35 let bytes = fs::metadata(path).map(|m| m.len() as usize).unwrap_or(0);
39 let tokens = bytes / CHARS_PER_TOKEN;
40 (bytes, tokens)
41}
42
43pub fn create_lang_report(
44 languages: &Languages,
45 top: usize,
46 with_files: bool,
47 children: ChildrenMode,
48) -> LangReport {
49 let mut rows: Vec<LangRow> = Vec::new();
62
63 #[derive(Default)]
65 struct LangAgg {
66 code: usize,
67 lines: usize,
68 files: usize,
69 }
70
71 match children {
72 ChildrenMode::Collapse => {
73 for (lang_type, lang) in languages.iter() {
77 let sum = lang.summarise();
78 if sum.code == 0 {
79 continue;
80 }
81
82 let mut bytes_sum = 0;
84 let mut tokens_sum = 0;
85 for report in &lang.reports {
86 let (b, t) = get_file_metrics(&report.name);
87 bytes_sum += b;
88 tokens_sum += t;
89 }
90
91 let lines = sum.code + sum.comments + sum.blanks;
92 let files = lang.reports.len();
93 let avg_lines = avg(lines, files);
94
95 rows.push(LangRow {
96 lang: lang_type.name().to_string(),
97 code: sum.code,
98 lines,
99 files,
100 bytes: bytes_sum,
101 tokens: tokens_sum,
102 avg_lines,
103 });
104 }
105 }
106 ChildrenMode::Separate => {
107 let mut embedded: BTreeMap<LanguageType, LangAgg> = BTreeMap::new();
112
113 for (lang_type, lang) in languages.iter() {
114 if lang.code > 0 {
115 let lines = lang.code + lang.comments + lang.blanks;
116 let files = lang.reports.len();
117
118 let mut bytes_sum = 0;
120 let mut tokens_sum = 0;
121 for report in &lang.reports {
122 let (b, t) = get_file_metrics(&report.name);
123 bytes_sum += b;
124 tokens_sum += t;
125 }
126
127 rows.push(LangRow {
128 lang: lang_type.name().to_string(),
129 code: lang.code,
130 lines,
131 files,
132 bytes: bytes_sum,
133 tokens: tokens_sum,
134 avg_lines: avg(lines, files),
135 });
136 }
137
138 for (child_type, reports) in &lang.children {
139 let entry = embedded.entry(*child_type).or_default();
140 entry.files += reports.len();
141 for r in reports {
142 let st = r.stats.summarise();
143 entry.code += st.code;
144 entry.lines += st.code + st.comments + st.blanks;
145 }
147 }
148 }
149
150 for (child_type, agg) in embedded {
151 if agg.code == 0 {
152 continue;
153 }
154 let avg_lines = avg(agg.lines, agg.files);
155 rows.push(LangRow {
156 lang: format!("{} (embedded)", child_type.name()),
157 code: agg.code,
158 lines: agg.lines,
159 files: agg.files,
160 bytes: 0, tokens: 0, avg_lines,
163 });
164 }
165 }
166 }
167
168 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.lang.cmp(&b.lang)));
170
171 let total_code: usize = rows.iter().map(|r| r.code).sum();
173 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
174 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
175 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
176 let total_files = unique_parent_file_count(languages);
177
178 let total = Totals {
179 code: total_code,
180 lines: total_lines,
181 files: total_files,
182 bytes: total_bytes,
183 tokens: total_tokens,
184 avg_lines: avg(total_lines, total_files),
185 };
186
187 if top > 0 && rows.len() > top {
188 let other = fold_other_lang(&rows[top..]);
189 rows.truncate(top);
190 rows.push(other);
191 }
192
193 LangReport {
194 rows,
195 total,
196 with_files,
197 children,
198 top,
199 }
200}
201
202fn fold_other_lang(rows: &[LangRow]) -> LangRow {
203 let mut code = 0usize;
204 let mut lines = 0usize;
205 let mut files = 0usize;
206 let mut bytes = 0usize;
207 let mut tokens = 0usize;
208
209 for r in rows {
210 code += r.code;
211 lines += r.lines;
212 files += r.files;
213 bytes += r.bytes;
214 tokens += r.tokens;
215 }
216
217 LangRow {
218 lang: "Other".to_string(),
219 code,
220 lines,
221 files,
222 bytes,
223 tokens,
224 avg_lines: avg(lines, files),
225 }
226}
227
228pub fn create_module_report(
229 languages: &Languages,
230 module_roots: &[String],
231 module_depth: usize,
232 children: ChildIncludeMode,
233 top: usize,
234) -> ModuleReport {
235 let file_rows = collect_file_rows(languages, module_roots, module_depth, children, None);
237
238 #[derive(Default)]
239 struct Agg {
240 code: usize,
241 lines: usize,
242 bytes: usize,
243 tokens: usize,
244 }
245
246 let mut by_module: BTreeMap<String, Agg> = BTreeMap::new();
247 let mut total_code = 0;
248 let mut total_lines = 0;
249 let mut total_bytes = 0;
250 let mut total_tokens = 0;
251
252 for r in file_rows {
253 total_code += r.code;
254 total_lines += r.lines;
255 total_bytes += r.bytes;
256 total_tokens += r.tokens;
257
258 let entry = by_module.entry(r.module).or_default();
259 entry.code += r.code;
260 entry.lines += r.lines;
261 entry.bytes += r.bytes;
262 entry.tokens += r.tokens;
263 }
264
265 let mut module_files: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
267 for (lang_type, lang) in languages.iter() {
268 let _ = lang_type; for report in &lang.reports {
270 let path = normalize_path(&report.name, None);
271 let module = module_key_from_normalized(&path, module_roots, module_depth);
272 module_files.entry(module).or_default().insert(path);
273 }
274 }
275
276 let mut rows: Vec<ModuleRow> = Vec::new();
277 for (module, agg) in by_module {
278 let files = module_files.get(&module).map(|s| s.len()).unwrap_or(0);
279 rows.push(ModuleRow {
280 module,
281 code: agg.code,
282 lines: agg.lines,
283 files,
284 bytes: agg.bytes,
285 tokens: agg.tokens,
286 avg_lines: avg(agg.lines, files),
287 });
288 }
289
290 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.module.cmp(&b.module)));
292
293 if top > 0 && rows.len() > top {
294 let other = fold_other_module(&rows[top..]);
295 rows.truncate(top);
296 rows.push(other);
297 }
298
299 let total_files = unique_parent_file_count(languages);
300
301 let total = Totals {
302 code: total_code,
303 lines: total_lines,
304 files: total_files,
305 bytes: total_bytes,
306 tokens: total_tokens,
307 avg_lines: avg(total_lines, total_files),
308 };
309
310 ModuleReport {
311 rows,
312 total,
313 module_roots: module_roots.to_vec(),
314 module_depth,
315 children,
316 top,
317 }
318}
319
320fn fold_other_module(rows: &[ModuleRow]) -> ModuleRow {
321 let mut code = 0usize;
322 let mut lines = 0usize;
323 let mut files = 0usize;
324 let mut bytes = 0usize;
325 let mut tokens = 0usize;
326
327 for r in rows {
328 code += r.code;
329 lines += r.lines;
330 files += r.files;
331 bytes += r.bytes;
332 tokens += r.tokens;
333 }
334
335 ModuleRow {
336 module: "Other".to_string(),
337 code,
338 lines,
339 files,
340 bytes,
341 tokens,
342 avg_lines: avg(lines, files),
343 }
344}
345
346pub fn create_export_data(
347 languages: &Languages,
348 module_roots: &[String],
349 module_depth: usize,
350 children: ChildIncludeMode,
351 strip_prefix: Option<&Path>,
352 min_code: usize,
353 max_rows: usize,
354) -> ExportData {
355 let mut rows = collect_file_rows(
356 languages,
357 module_roots,
358 module_depth,
359 children,
360 strip_prefix,
361 );
362
363 if min_code > 0 {
365 rows.retain(|r| r.code >= min_code);
366 }
367 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.path.cmp(&b.path)));
368
369 if max_rows > 0 && rows.len() > max_rows {
370 rows.truncate(max_rows);
371 }
372
373 ExportData {
374 rows,
375 module_roots: module_roots.to_vec(),
376 module_depth,
377 children,
378 }
379}
380
381pub fn collect_file_rows(
386 languages: &Languages,
387 module_roots: &[String],
388 module_depth: usize,
389 children: ChildIncludeMode,
390 strip_prefix: Option<&Path>,
391) -> Vec<FileRow> {
392 #[derive(Default, Clone, Copy)]
393 struct Agg {
394 code: usize,
395 comments: usize,
396 blanks: usize,
397 bytes: usize,
398 tokens: usize,
399 }
400
401 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
403 struct Key {
404 path: String,
405 lang: String,
406 kind: FileKind,
407 }
408
409 let mut map: BTreeMap<Key, (String , Agg)> = BTreeMap::new();
410
411 for (lang_type, lang) in languages.iter() {
413 for report in &lang.reports {
414 let path = normalize_path(&report.name, strip_prefix);
415 let module = module_key_from_normalized(&path, module_roots, module_depth);
416 let st = report.stats.summarise();
417 let (bytes, tokens) = get_file_metrics(&report.name);
418
419 let key = Key {
420 path,
421 lang: lang_type.name().to_string(),
422 kind: FileKind::Parent,
423 };
424 let entry = map.entry(key).or_insert_with(|| (module, Agg::default()));
425 entry.1.code += st.code;
426 entry.1.comments += st.comments;
427 entry.1.blanks += st.blanks;
428 entry.1.bytes += bytes;
429 entry.1.tokens += tokens;
430 }
431 }
432
433 if children == ChildIncludeMode::Separate {
434 for (_lang_type, lang) in languages.iter() {
435 for (child_type, reports) in &lang.children {
436 for report in reports {
437 let path = normalize_path(&report.name, strip_prefix);
438 let module = module_key_from_normalized(&path, module_roots, module_depth);
439 let st = report.stats.summarise();
440 let key = Key {
443 path,
444 lang: child_type.name().to_string(),
445 kind: FileKind::Child,
446 };
447 let entry = map.entry(key).or_insert_with(|| (module, Agg::default()));
448 entry.1.code += st.code;
449 entry.1.comments += st.comments;
450 entry.1.blanks += st.blanks;
451 }
454 }
455 }
456 }
457
458 map.into_iter()
459 .map(|(key, (module, agg))| {
460 let lines = agg.code + agg.comments + agg.blanks;
461 FileRow {
462 path: key.path,
463 module,
464 lang: key.lang,
465 kind: key.kind,
466 code: agg.code,
467 comments: agg.comments,
468 blanks: agg.blanks,
469 lines,
470 bytes: agg.bytes,
471 tokens: agg.tokens,
472 }
473 })
474 .collect()
475}
476
477pub fn unique_parent_file_count(languages: &Languages) -> usize {
478 let mut seen: BTreeSet<String> = BTreeSet::new();
479 for (_lang_type, lang) in languages.iter() {
480 for report in &lang.reports {
481 let path = normalize_path(&report.name, None);
482 seen.insert(path);
483 }
484 }
485 seen.len()
486}
487
488pub fn avg(lines: usize, files: usize) -> usize {
504 if files == 0 {
505 return 0;
506 }
507 (lines + (files / 2)) / files
509}
510
511pub fn normalize_path(path: &Path, strip_prefix: Option<&Path>) -> String {
533 let s_cow = path.to_string_lossy();
534 let s: Cow<str> = if s_cow.contains('\\') {
535 Cow::Owned(s_cow.replace('\\', "/"))
536 } else {
537 s_cow
538 };
539
540 let mut slice: &str = &s;
541
542 if let Some(stripped) = slice.strip_prefix("./") {
544 slice = stripped;
545 }
546
547 if let Some(prefix) = strip_prefix {
548 let p_cow = prefix.to_string_lossy();
549 let p_cow_stripped: Cow<str> = if let Some(stripped) = p_cow.strip_prefix("./") {
551 Cow::Borrowed(stripped)
552 } else {
553 p_cow
554 };
555
556 let needs_replace = p_cow_stripped.contains('\\');
557 let needs_slash = !p_cow_stripped.ends_with('/');
558
559 if !needs_replace && !needs_slash {
560 if slice.starts_with(p_cow_stripped.as_ref()) {
562 slice = &slice[p_cow_stripped.len()..];
563 }
564 } else {
565 let mut pfx = if needs_replace {
567 p_cow_stripped.replace('\\', "/")
568 } else {
569 p_cow_stripped.into_owned()
570 };
571 if needs_slash {
572 pfx.push('/');
573 }
574 if slice.starts_with(&pfx) {
575 slice = &slice[pfx.len()..];
576 }
577 }
578 }
579
580 slice = slice.trim_start_matches('/');
581
582 if let Some(stripped) = slice.strip_prefix("./") {
584 slice = stripped;
585 }
586 slice = slice.trim_start_matches('/');
587
588 if slice.len() == s.len() {
589 s.into_owned()
590 } else {
591 slice.to_string()
592 }
593}
594
595pub fn module_key(path: &str, module_roots: &[String], module_depth: usize) -> String {
613 tokmd_module_key::module_key(path, module_roots, module_depth)
614}
615
616#[cfg(test)]
617mod tests {
618 use super::*;
619 use std::path::PathBuf;
620
621 #[test]
622 fn module_key_root_level_file() {
623 assert_eq!(module_key("Cargo.toml", &["crates".into()], 2), "(root)");
624 assert_eq!(module_key("./Cargo.toml", &["crates".into()], 2), "(root)");
625 }
626
627 #[test]
628 fn module_key_crates_depth_2() {
629 let roots = vec!["crates".into(), "packages".into()];
630 assert_eq!(module_key("crates/foo/src/lib.rs", &roots, 2), "crates/foo");
631 assert_eq!(
632 module_key("packages/bar/src/main.rs", &roots, 2),
633 "packages/bar"
634 );
635 }
636
637 #[test]
638 fn module_key_crates_depth_1() {
639 let roots = vec!["crates".into(), "packages".into()];
640 assert_eq!(module_key("crates/foo/src/lib.rs", &roots, 1), "crates");
641 }
642
643 #[test]
644 fn module_key_non_root() {
645 let roots = vec!["crates".into()];
646 assert_eq!(module_key("src/lib.rs", &roots, 2), "src");
647 assert_eq!(module_key("tools/gen.rs", &roots, 2), "tools");
648 }
649
650 #[test]
651 fn module_key_depth_overflow_does_not_include_filename() {
652 let roots = vec!["crates".into()];
653 assert_eq!(module_key("crates/foo.rs", &roots, 2), "crates");
655 assert_eq!(
657 module_key("crates/foo/src/lib.rs", &roots, 10),
658 "crates/foo/src"
659 );
660 }
661
662 #[test]
663 fn normalize_path_strips_prefix() {
664 let p = PathBuf::from("C:/Code/Repo/src/main.rs");
665 let prefix = PathBuf::from("C:/Code/Repo");
666 let got = normalize_path(&p, Some(&prefix));
667 assert_eq!(got, "src/main.rs");
668 }
669
670 #[test]
671 fn normalize_path_normalization_slashes() {
672 let p = PathBuf::from(r"C:\Code\Repo\src\main.rs");
673 let got = normalize_path(&p, None);
674 assert_eq!(got, "C:/Code/Repo/src/main.rs");
675 }
676
677 mod normalize_properties {
678 use super::*;
679 use proptest::prelude::*;
680
681 fn arb_path_component() -> impl Strategy<Value = String> {
682 "[a-zA-Z0-9_.-]+"
683 }
684
685 fn arb_path(max_depth: usize) -> impl Strategy<Value = String> {
686 prop::collection::vec(arb_path_component(), 1..=max_depth)
687 .prop_map(|comps| comps.join("/"))
688 }
689
690 proptest! {
691 #[test]
692 fn normalize_path_is_idempotent(path in arb_path(5)) {
693 let p = PathBuf::from(&path);
694 let norm1 = normalize_path(&p, None);
695 let p2 = PathBuf::from(&norm1);
696 let norm2 = normalize_path(&p2, None);
697 prop_assert_eq!(norm1, norm2);
698 }
699
700 #[test]
701 fn normalize_path_handles_windows_separators(path in arb_path(5)) {
702 let win_path = path.replace('/', "\\");
703 let p_win = PathBuf::from(&win_path);
704 let p_unix = PathBuf::from(&path);
705
706 let norm_win = normalize_path(&p_win, None);
707 let norm_unix = normalize_path(&p_unix, None);
708
709 prop_assert_eq!(norm_win, norm_unix);
710 }
711
712 #[test]
713 fn normalize_path_no_leading_slash(path in arb_path(5)) {
714 let p = PathBuf::from(&path);
715 let norm = normalize_path(&p, None);
716 prop_assert!(!norm.starts_with('/'));
717 }
718
719 #[test]
720 fn normalize_path_no_leading_dot_slash(path in arb_path(5)) {
721 let p = PathBuf::from(&path);
722 let norm = normalize_path(&p, None);
723 prop_assert!(!norm.starts_with("./"));
724 }
725
726 #[test]
727 fn module_key_deterministic(
728 path in arb_path(5),
729 roots in prop::collection::vec(arb_path_component(), 1..3),
730 depth in 1usize..5
731 ) {
732 let k1 = module_key(&path, &roots, depth);
733 let k2 = module_key(&path, &roots, depth);
734 prop_assert_eq!(k1, k2);
735 }
736 }
737 }
738
739 mod fold_properties {
741 use super::*;
742 use proptest::prelude::*;
743
744 fn arb_lang_row() -> impl Strategy<Value = LangRow> {
745 (
746 "[a-zA-Z]+",
747 0usize..10000,
748 0usize..20000,
749 0usize..1000,
750 0usize..1000000,
751 0usize..100000,
752 )
753 .prop_map(|(lang, code, lines, files, bytes, tokens)| {
754 let avg_lines = (lines + (files / 2)).checked_div(files).unwrap_or(0);
755 LangRow {
756 lang,
757 code,
758 lines,
759 files,
760 bytes,
761 tokens,
762 avg_lines,
763 }
764 })
765 }
766
767 fn arb_module_row() -> impl Strategy<Value = ModuleRow> {
768 (
769 "[a-zA-Z0-9_/]+",
770 0usize..10000,
771 0usize..20000,
772 0usize..1000,
773 0usize..1000000,
774 0usize..100000,
775 )
776 .prop_map(|(module, code, lines, files, bytes, tokens)| {
777 let avg_lines = (lines + (files / 2)).checked_div(files).unwrap_or(0);
778 ModuleRow {
779 module,
780 code,
781 lines,
782 files,
783 bytes,
784 tokens,
785 avg_lines,
786 }
787 })
788 }
789
790 proptest! {
791 #[test]
792 fn fold_lang_preserves_totals(rows in prop::collection::vec(arb_lang_row(), 0..10)) {
793 let folded = fold_other_lang(&rows);
794
795 let total_code: usize = rows.iter().map(|r| r.code).sum();
796 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
797 let total_files: usize = rows.iter().map(|r| r.files).sum();
798 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
799 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
800
801 prop_assert_eq!(folded.code, total_code, "Code mismatch");
802 prop_assert_eq!(folded.lines, total_lines, "Lines mismatch");
803 prop_assert_eq!(folded.files, total_files, "Files mismatch");
804 prop_assert_eq!(folded.bytes, total_bytes, "Bytes mismatch");
805 prop_assert_eq!(folded.tokens, total_tokens, "Tokens mismatch");
806 }
807
808 #[test]
809 fn fold_lang_empty_is_zero(_dummy in 0..1u8) {
810 let folded = fold_other_lang(&[]);
811 prop_assert_eq!(folded.code, 0);
812 prop_assert_eq!(folded.lines, 0);
813 prop_assert_eq!(folded.files, 0);
814 prop_assert_eq!(folded.bytes, 0);
815 prop_assert_eq!(folded.tokens, 0);
816 prop_assert_eq!(folded.lang, "Other");
817 }
818
819 #[test]
820 fn fold_module_preserves_totals(rows in prop::collection::vec(arb_module_row(), 0..10)) {
821 let folded = fold_other_module(&rows);
822
823 let total_code: usize = rows.iter().map(|r| r.code).sum();
824 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
825 let total_files: usize = rows.iter().map(|r| r.files).sum();
826 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
827 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
828
829 prop_assert_eq!(folded.code, total_code, "Code mismatch");
830 prop_assert_eq!(folded.lines, total_lines, "Lines mismatch");
831 prop_assert_eq!(folded.files, total_files, "Files mismatch");
832 prop_assert_eq!(folded.bytes, total_bytes, "Bytes mismatch");
833 prop_assert_eq!(folded.tokens, total_tokens, "Tokens mismatch");
834 }
835
836 #[test]
837 fn fold_module_empty_is_zero(_dummy in 0..1u8) {
838 let folded = fold_other_module(&[]);
839 prop_assert_eq!(folded.code, 0);
840 prop_assert_eq!(folded.lines, 0);
841 prop_assert_eq!(folded.files, 0);
842 prop_assert_eq!(folded.bytes, 0);
843 prop_assert_eq!(folded.tokens, 0);
844 prop_assert_eq!(folded.module, "Other");
845 }
846
847 #[test]
848 fn fold_associative_lang(
849 rows1 in prop::collection::vec(arb_lang_row(), 0..5),
850 rows2 in prop::collection::vec(arb_lang_row(), 0..5)
851 ) {
852 let all: Vec<_> = rows1.iter().chain(rows2.iter()).cloned().collect();
854 let fold_all = fold_other_lang(&all);
855
856 let fold1 = fold_other_lang(&rows1);
857 let fold2 = fold_other_lang(&rows2);
858 let combined = fold_other_lang(&[fold1, fold2]);
859
860 prop_assert_eq!(fold_all.code, combined.code);
861 prop_assert_eq!(fold_all.lines, combined.lines);
862 prop_assert_eq!(fold_all.files, combined.files);
863 prop_assert_eq!(fold_all.bytes, combined.bytes);
864 prop_assert_eq!(fold_all.tokens, combined.tokens);
865 }
866 }
867 }
868}