1use std::borrow::Cow;
20use std::collections::{BTreeMap, BTreeSet};
21use std::fs;
22use std::path::Path;
23
24use tokei::{LanguageType, Languages};
25use tokmd_types::{
26 ChildIncludeMode, ChildrenMode, ExportData, FileKind, FileRow, LangReport, LangRow,
27 ModuleReport, ModuleRow, Totals,
28};
29
30const CHARS_PER_TOKEN: usize = 4;
32
33fn get_file_metrics(path: &Path) -> (usize, usize) {
34 let bytes = fs::metadata(path).map(|m| m.len() as usize).unwrap_or(0);
38 let tokens = bytes / CHARS_PER_TOKEN;
39 (bytes, tokens)
40}
41
42pub fn create_lang_report(
43 languages: &Languages,
44 top: usize,
45 with_files: bool,
46 children: ChildrenMode,
47) -> LangReport {
48 let mut rows: Vec<LangRow> = Vec::new();
61
62 #[derive(Default)]
64 struct LangAgg {
65 code: usize,
66 lines: usize,
67 files: usize,
68 }
69
70 match children {
71 ChildrenMode::Collapse => {
72 for (lang_type, lang) in languages.iter() {
76 let sum = lang.summarise();
77 if sum.code == 0 {
78 continue;
79 }
80
81 let mut bytes_sum = 0;
83 let mut tokens_sum = 0;
84 for report in &lang.reports {
85 let (b, t) = get_file_metrics(&report.name);
86 bytes_sum += b;
87 tokens_sum += t;
88 }
89
90 let lines = sum.code + sum.comments + sum.blanks;
91 let files = lang.reports.len();
92 let avg_lines = avg(lines, files);
93
94 rows.push(LangRow {
95 lang: lang_type.name().to_string(),
96 code: sum.code,
97 lines,
98 files,
99 bytes: bytes_sum,
100 tokens: tokens_sum,
101 avg_lines,
102 });
103 }
104 }
105 ChildrenMode::Separate => {
106 let mut embedded: BTreeMap<LanguageType, LangAgg> = BTreeMap::new();
111
112 for (lang_type, lang) in languages.iter() {
113 if lang.code > 0 {
114 let lines = lang.code + lang.comments + lang.blanks;
115 let files = lang.reports.len();
116
117 let mut bytes_sum = 0;
119 let mut tokens_sum = 0;
120 for report in &lang.reports {
121 let (b, t) = get_file_metrics(&report.name);
122 bytes_sum += b;
123 tokens_sum += t;
124 }
125
126 rows.push(LangRow {
127 lang: lang_type.name().to_string(),
128 code: lang.code,
129 lines,
130 files,
131 bytes: bytes_sum,
132 tokens: tokens_sum,
133 avg_lines: avg(lines, files),
134 });
135 }
136
137 for (child_type, reports) in &lang.children {
138 let entry = embedded.entry(*child_type).or_default();
139 entry.files += reports.len();
140 for r in reports {
141 let st = r.stats.summarise();
142 entry.code += st.code;
143 entry.lines += st.code + st.comments + st.blanks;
144 }
146 }
147 }
148
149 for (child_type, agg) in embedded {
150 if agg.code == 0 {
151 continue;
152 }
153 let avg_lines = avg(agg.lines, agg.files);
154 rows.push(LangRow {
155 lang: format!("{} (embedded)", child_type.name()),
156 code: agg.code,
157 lines: agg.lines,
158 files: agg.files,
159 bytes: 0, tokens: 0, avg_lines,
162 });
163 }
164 }
165 }
166
167 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.lang.cmp(&b.lang)));
169
170 let total_code: usize = rows.iter().map(|r| r.code).sum();
172 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
173 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
174 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
175 let total_files = unique_parent_file_count(languages);
176
177 let total = Totals {
178 code: total_code,
179 lines: total_lines,
180 files: total_files,
181 bytes: total_bytes,
182 tokens: total_tokens,
183 avg_lines: avg(total_lines, total_files),
184 };
185
186 if top > 0 && rows.len() > top {
187 let other = fold_other_lang(&rows[top..]);
188 rows.truncate(top);
189 rows.push(other);
190 }
191
192 LangReport {
193 rows,
194 total,
195 with_files,
196 children,
197 top,
198 }
199}
200
201fn fold_other_lang(rows: &[LangRow]) -> LangRow {
202 let mut code = 0usize;
203 let mut lines = 0usize;
204 let mut files = 0usize;
205 let mut bytes = 0usize;
206 let mut tokens = 0usize;
207
208 for r in rows {
209 code += r.code;
210 lines += r.lines;
211 files += r.files;
212 bytes += r.bytes;
213 tokens += r.tokens;
214 }
215
216 LangRow {
217 lang: "Other".to_string(),
218 code,
219 lines,
220 files,
221 bytes,
222 tokens,
223 avg_lines: avg(lines, files),
224 }
225}
226
227pub fn create_module_report(
228 languages: &Languages,
229 module_roots: &[String],
230 module_depth: usize,
231 children: ChildIncludeMode,
232 top: usize,
233) -> ModuleReport {
234 let file_rows = collect_file_rows(languages, module_roots, module_depth, children, None);
236
237 #[derive(Default)]
238 struct Agg {
239 code: usize,
240 lines: usize,
241 bytes: usize,
242 tokens: usize,
243 }
244
245 let mut by_module: BTreeMap<String, Agg> = BTreeMap::new();
246 for r in &file_rows {
247 let entry = by_module.entry(r.module.clone()).or_default();
248 entry.code += r.code;
249 entry.lines += r.lines;
250 entry.bytes += r.bytes;
251 entry.tokens += r.tokens;
252 }
253
254 let mut module_files: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
256 for (lang_type, lang) in languages.iter() {
257 let _ = lang_type; for report in &lang.reports {
259 let path = normalize_path(&report.name, None);
260 let module = module_key_from_normalized(&path, module_roots, module_depth);
261 module_files.entry(module).or_default().insert(path);
262 }
263 }
264
265 let mut rows: Vec<ModuleRow> = Vec::new();
266 for (module, agg) in by_module {
267 let files = module_files.get(&module).map(|s| s.len()).unwrap_or(0);
268 rows.push(ModuleRow {
269 module,
270 code: agg.code,
271 lines: agg.lines,
272 files,
273 bytes: agg.bytes,
274 tokens: agg.tokens,
275 avg_lines: avg(agg.lines, files),
276 });
277 }
278
279 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.module.cmp(&b.module)));
281
282 if top > 0 && rows.len() > top {
283 let other = fold_other_module(&rows[top..]);
284 rows.truncate(top);
285 rows.push(other);
286 }
287
288 let total_files = unique_parent_file_count(languages);
289 let total_code: usize = file_rows.iter().map(|r| r.code).sum();
290 let total_lines: usize = file_rows.iter().map(|r| r.lines).sum();
291 let total_bytes: usize = file_rows.iter().map(|r| r.bytes).sum();
292 let total_tokens: usize = file_rows.iter().map(|r| r.tokens).sum();
293
294 let total = Totals {
295 code: total_code,
296 lines: total_lines,
297 files: total_files,
298 bytes: total_bytes,
299 tokens: total_tokens,
300 avg_lines: avg(total_lines, total_files),
301 };
302
303 ModuleReport {
304 rows,
305 total,
306 module_roots: module_roots.to_vec(),
307 module_depth,
308 children,
309 top,
310 }
311}
312
313fn fold_other_module(rows: &[ModuleRow]) -> ModuleRow {
314 let mut code = 0usize;
315 let mut lines = 0usize;
316 let mut files = 0usize;
317 let mut bytes = 0usize;
318 let mut tokens = 0usize;
319
320 for r in rows {
321 code += r.code;
322 lines += r.lines;
323 files += r.files;
324 bytes += r.bytes;
325 tokens += r.tokens;
326 }
327
328 ModuleRow {
329 module: "Other".to_string(),
330 code,
331 lines,
332 files,
333 bytes,
334 tokens,
335 avg_lines: avg(lines, files),
336 }
337}
338
339pub fn create_export_data(
340 languages: &Languages,
341 module_roots: &[String],
342 module_depth: usize,
343 children: ChildIncludeMode,
344 strip_prefix: Option<&Path>,
345 min_code: usize,
346 max_rows: usize,
347) -> ExportData {
348 let mut rows = collect_file_rows(
349 languages,
350 module_roots,
351 module_depth,
352 children,
353 strip_prefix,
354 );
355
356 if min_code > 0 {
358 rows.retain(|r| r.code >= min_code);
359 }
360 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.path.cmp(&b.path)));
361
362 if max_rows > 0 && rows.len() > max_rows {
363 rows.truncate(max_rows);
364 }
365
366 ExportData {
367 rows,
368 module_roots: module_roots.to_vec(),
369 module_depth,
370 children,
371 }
372}
373
374pub fn collect_file_rows(
379 languages: &Languages,
380 module_roots: &[String],
381 module_depth: usize,
382 children: ChildIncludeMode,
383 strip_prefix: Option<&Path>,
384) -> Vec<FileRow> {
385 #[derive(Default, Clone, Copy)]
386 struct Agg {
387 code: usize,
388 comments: usize,
389 blanks: usize,
390 bytes: usize,
391 tokens: usize,
392 }
393
394 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
396 struct Key {
397 path: String,
398 lang: String,
399 kind: FileKind,
400 }
401
402 let mut map: BTreeMap<Key, (String , Agg)> = BTreeMap::new();
403
404 for (lang_type, lang) in languages.iter() {
406 for report in &lang.reports {
407 let path = normalize_path(&report.name, strip_prefix);
408 let module = module_key_from_normalized(&path, module_roots, module_depth);
409 let st = report.stats.summarise();
410 let (bytes, tokens) = get_file_metrics(&report.name);
411
412 let key = Key {
413 path: path.clone(),
414 lang: lang_type.name().to_string(),
415 kind: FileKind::Parent,
416 };
417 let entry = map.entry(key).or_insert_with(|| (module, Agg::default()));
418 entry.1.code += st.code;
419 entry.1.comments += st.comments;
420 entry.1.blanks += st.blanks;
421 entry.1.bytes += bytes;
422 entry.1.tokens += tokens;
423 }
424 }
425
426 if children == ChildIncludeMode::Separate {
427 for (_lang_type, lang) in languages.iter() {
428 for (child_type, reports) in &lang.children {
429 for report in reports {
430 let path = normalize_path(&report.name, strip_prefix);
431 let module = module_key_from_normalized(&path, module_roots, module_depth);
432 let st = report.stats.summarise();
433 let key = Key {
436 path: path.clone(),
437 lang: child_type.name().to_string(),
438 kind: FileKind::Child,
439 };
440 let entry = map.entry(key).or_insert_with(|| (module, Agg::default()));
441 entry.1.code += st.code;
442 entry.1.comments += st.comments;
443 entry.1.blanks += st.blanks;
444 }
447 }
448 }
449 }
450
451 map.into_iter()
452 .map(|(key, (module, agg))| {
453 let lines = agg.code + agg.comments + agg.blanks;
454 FileRow {
455 path: key.path,
456 module,
457 lang: key.lang,
458 kind: key.kind,
459 code: agg.code,
460 comments: agg.comments,
461 blanks: agg.blanks,
462 lines,
463 bytes: agg.bytes,
464 tokens: agg.tokens,
465 }
466 })
467 .collect()
468}
469
470pub fn unique_parent_file_count(languages: &Languages) -> usize {
471 let mut seen: BTreeSet<String> = BTreeSet::new();
472 for (_lang_type, lang) in languages.iter() {
473 for report in &lang.reports {
474 let path = normalize_path(&report.name, None);
475 seen.insert(path);
476 }
477 }
478 seen.len()
479}
480
481pub fn avg(lines: usize, files: usize) -> usize {
482 if files == 0 {
483 return 0;
484 }
485 (lines + (files / 2)) / files
487}
488
489pub fn normalize_path(path: &Path, strip_prefix: Option<&Path>) -> String {
495 let s_cow = path.to_string_lossy();
496 let s: Cow<str> = if s_cow.contains('\\') {
497 Cow::Owned(s_cow.replace('\\', "/"))
498 } else {
499 s_cow
500 };
501
502 let mut slice: &str = &s;
503
504 if let Some(stripped) = slice.strip_prefix("./") {
506 slice = stripped;
507 }
508
509 if let Some(prefix) = strip_prefix {
510 let p_cow = prefix.to_string_lossy();
511 let p_cow_stripped: Cow<str> = if let Some(stripped) = p_cow.strip_prefix("./") {
513 Cow::Borrowed(stripped)
514 } else {
515 p_cow
516 };
517
518 let needs_replace = p_cow_stripped.contains('\\');
519 let needs_slash = !p_cow_stripped.ends_with('/');
520
521 if !needs_replace && !needs_slash {
522 if slice.starts_with(p_cow_stripped.as_ref()) {
524 slice = &slice[p_cow_stripped.len()..];
525 }
526 } else {
527 let mut pfx = if needs_replace {
529 p_cow_stripped.replace('\\', "/")
530 } else {
531 p_cow_stripped.into_owned()
532 };
533 if needs_slash {
534 pfx.push('/');
535 }
536 if slice.starts_with(&pfx) {
537 slice = &slice[pfx.len()..];
538 }
539 }
540 }
541
542 slice = slice.trim_start_matches('/');
543
544 if let Some(stripped) = slice.strip_prefix("./") {
546 slice = stripped;
547 }
548
549 if slice.len() == s.len() {
550 s.into_owned()
551 } else {
552 slice.to_string()
553 }
554}
555
556pub fn module_key(path: &str, module_roots: &[String], module_depth: usize) -> String {
563 let mut p = path.replace('\\', "/");
565 if let Some(stripped) = p.strip_prefix("./") {
566 p = stripped.to_string();
567 }
568 p = p.trim_start_matches('/').to_string();
569
570 module_key_from_normalized(&p, module_roots, module_depth)
571}
572
573fn module_key_from_normalized(path: &str, module_roots: &[String], module_depth: usize) -> String {
578 let Some((dir_part, _file_part)) = path.rsplit_once('/') else {
581 return "(root)".to_string();
583 };
584
585 let mut dirs = dir_part.split('/').filter(|s| !s.is_empty());
586 let first = match dirs.next() {
587 Some(s) => s,
588 None => return "(root)".to_string(),
589 };
590
591 if !module_roots.iter().any(|r| r == first) {
593 return first.to_string();
594 }
595
596 let depth_needed = module_depth.max(1);
598 let mut key = String::with_capacity(dir_part.len());
599 key.push_str(first);
600
601 for _ in 1..depth_needed {
602 if let Some(seg) = dirs.next() {
603 key.push('/');
604 key.push_str(seg);
605 } else {
606 break;
607 }
608 }
609
610 key
611}
612
613#[cfg(test)]
614mod tests {
615 use super::*;
616 use std::path::PathBuf;
617
618 #[test]
619 fn module_key_root_level_file() {
620 assert_eq!(module_key("Cargo.toml", &["crates".into()], 2), "(root)");
621 assert_eq!(module_key("./Cargo.toml", &["crates".into()], 2), "(root)");
622 }
623
624 #[test]
625 fn module_key_crates_depth_2() {
626 let roots = vec!["crates".into(), "packages".into()];
627 assert_eq!(module_key("crates/foo/src/lib.rs", &roots, 2), "crates/foo");
628 assert_eq!(
629 module_key("packages/bar/src/main.rs", &roots, 2),
630 "packages/bar"
631 );
632 }
633
634 #[test]
635 fn module_key_crates_depth_1() {
636 let roots = vec!["crates".into(), "packages".into()];
637 assert_eq!(module_key("crates/foo/src/lib.rs", &roots, 1), "crates");
638 }
639
640 #[test]
641 fn module_key_non_root() {
642 let roots = vec!["crates".into()];
643 assert_eq!(module_key("src/lib.rs", &roots, 2), "src");
644 assert_eq!(module_key("tools/gen.rs", &roots, 2), "tools");
645 }
646
647 #[test]
648 fn module_key_depth_overflow_does_not_include_filename() {
649 let roots = vec!["crates".into()];
650 assert_eq!(module_key("crates/foo.rs", &roots, 2), "crates");
652 assert_eq!(
654 module_key("crates/foo/src/lib.rs", &roots, 10),
655 "crates/foo/src"
656 );
657 }
658
659 #[test]
660 fn normalize_path_strips_prefix() {
661 let p = PathBuf::from("C:/Code/Repo/src/main.rs");
662 let prefix = PathBuf::from("C:/Code/Repo");
663 let got = normalize_path(&p, Some(&prefix));
664 assert_eq!(got, "src/main.rs");
665 }
666
667 #[test]
668 fn normalize_path_normalization_slashes() {
669 let p = PathBuf::from(r"C:\Code\Repo\src\main.rs");
670 let got = normalize_path(&p, None);
671 assert_eq!(got, "C:/Code/Repo/src/main.rs");
672 }
673
674 mod fold_properties {
676 use super::*;
677 use proptest::prelude::*;
678
679 fn arb_lang_row() -> impl Strategy<Value = LangRow> {
680 (
681 "[a-zA-Z]+",
682 0usize..10000,
683 0usize..20000,
684 0usize..1000,
685 0usize..1000000,
686 0usize..100000,
687 )
688 .prop_map(|(lang, code, lines, files, bytes, tokens)| {
689 let avg_lines = if files == 0 {
690 0
691 } else {
692 (lines + (files / 2)) / files
693 };
694 LangRow {
695 lang,
696 code,
697 lines,
698 files,
699 bytes,
700 tokens,
701 avg_lines,
702 }
703 })
704 }
705
706 fn arb_module_row() -> impl Strategy<Value = ModuleRow> {
707 (
708 "[a-zA-Z0-9_/]+",
709 0usize..10000,
710 0usize..20000,
711 0usize..1000,
712 0usize..1000000,
713 0usize..100000,
714 )
715 .prop_map(|(module, code, lines, files, bytes, tokens)| {
716 let avg_lines = if files == 0 {
717 0
718 } else {
719 (lines + (files / 2)) / files
720 };
721 ModuleRow {
722 module,
723 code,
724 lines,
725 files,
726 bytes,
727 tokens,
728 avg_lines,
729 }
730 })
731 }
732
733 proptest! {
734 #[test]
735 fn fold_lang_preserves_totals(rows in prop::collection::vec(arb_lang_row(), 0..10)) {
736 let folded = fold_other_lang(&rows);
737
738 let total_code: usize = rows.iter().map(|r| r.code).sum();
739 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
740 let total_files: usize = rows.iter().map(|r| r.files).sum();
741 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
742 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
743
744 prop_assert_eq!(folded.code, total_code, "Code mismatch");
745 prop_assert_eq!(folded.lines, total_lines, "Lines mismatch");
746 prop_assert_eq!(folded.files, total_files, "Files mismatch");
747 prop_assert_eq!(folded.bytes, total_bytes, "Bytes mismatch");
748 prop_assert_eq!(folded.tokens, total_tokens, "Tokens mismatch");
749 }
750
751 #[test]
752 fn fold_lang_empty_is_zero(_dummy in 0..1u8) {
753 let folded = fold_other_lang(&[]);
754 prop_assert_eq!(folded.code, 0);
755 prop_assert_eq!(folded.lines, 0);
756 prop_assert_eq!(folded.files, 0);
757 prop_assert_eq!(folded.bytes, 0);
758 prop_assert_eq!(folded.tokens, 0);
759 prop_assert_eq!(folded.lang, "Other");
760 }
761
762 #[test]
763 fn fold_module_preserves_totals(rows in prop::collection::vec(arb_module_row(), 0..10)) {
764 let folded = fold_other_module(&rows);
765
766 let total_code: usize = rows.iter().map(|r| r.code).sum();
767 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
768 let total_files: usize = rows.iter().map(|r| r.files).sum();
769 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
770 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
771
772 prop_assert_eq!(folded.code, total_code, "Code mismatch");
773 prop_assert_eq!(folded.lines, total_lines, "Lines mismatch");
774 prop_assert_eq!(folded.files, total_files, "Files mismatch");
775 prop_assert_eq!(folded.bytes, total_bytes, "Bytes mismatch");
776 prop_assert_eq!(folded.tokens, total_tokens, "Tokens mismatch");
777 }
778
779 #[test]
780 fn fold_module_empty_is_zero(_dummy in 0..1u8) {
781 let folded = fold_other_module(&[]);
782 prop_assert_eq!(folded.code, 0);
783 prop_assert_eq!(folded.lines, 0);
784 prop_assert_eq!(folded.files, 0);
785 prop_assert_eq!(folded.bytes, 0);
786 prop_assert_eq!(folded.tokens, 0);
787 prop_assert_eq!(folded.module, "Other");
788 }
789
790 #[test]
791 fn fold_associative_lang(
792 rows1 in prop::collection::vec(arb_lang_row(), 0..5),
793 rows2 in prop::collection::vec(arb_lang_row(), 0..5)
794 ) {
795 let all: Vec<_> = rows1.iter().chain(rows2.iter()).cloned().collect();
797 let fold_all = fold_other_lang(&all);
798
799 let fold1 = fold_other_lang(&rows1);
800 let fold2 = fold_other_lang(&rows2);
801 let combined = fold_other_lang(&[fold1, fold2]);
802
803 prop_assert_eq!(fold_all.code, combined.code);
804 prop_assert_eq!(fold_all.lines, combined.lines);
805 prop_assert_eq!(fold_all.files, combined.files);
806 prop_assert_eq!(fold_all.bytes, combined.bytes);
807 prop_assert_eq!(fold_all.tokens, combined.tokens);
808 }
809 }
810 }
811}