1use std::borrow::Cow;
20use std::collections::{BTreeMap, BTreeSet};
21use std::fs;
22use std::path::Path;
23
24use tokei::{LanguageType, Languages};
25use tokmd_module_key::module_key_from_normalized;
26use tokmd_types::{
27 ChildIncludeMode, ChildrenMode, ExportData, FileKind, FileRow, LangReport, LangRow,
28 ModuleReport, ModuleRow, Totals,
29};
30
31const CHARS_PER_TOKEN: usize = 4;
33
34fn get_file_metrics(path: &Path) -> (usize, usize) {
35 let bytes = fs::metadata(path).map(|m| m.len() as usize).unwrap_or(0);
39 let tokens = bytes / CHARS_PER_TOKEN;
40 (bytes, tokens)
41}
42
43pub fn create_lang_report(
44 languages: &Languages,
45 top: usize,
46 with_files: bool,
47 children: ChildrenMode,
48) -> LangReport {
49 let mut rows: Vec<LangRow> = Vec::new();
62
63 #[derive(Default)]
65 struct LangAgg {
66 code: usize,
67 lines: usize,
68 files: usize,
69 }
70
71 match children {
72 ChildrenMode::Collapse => {
73 for (lang_type, lang) in languages.iter() {
77 let sum = lang.summarise();
78 if sum.code == 0 {
79 continue;
80 }
81
82 let mut bytes_sum = 0;
84 let mut tokens_sum = 0;
85 for report in &lang.reports {
86 let (b, t) = get_file_metrics(&report.name);
87 bytes_sum += b;
88 tokens_sum += t;
89 }
90
91 let lines = sum.code + sum.comments + sum.blanks;
92 let files = lang.reports.len();
93 let avg_lines = avg(lines, files);
94
95 rows.push(LangRow {
96 lang: lang_type.name().to_string(),
97 code: sum.code,
98 lines,
99 files,
100 bytes: bytes_sum,
101 tokens: tokens_sum,
102 avg_lines,
103 });
104 }
105 }
106 ChildrenMode::Separate => {
107 let mut embedded: BTreeMap<LanguageType, LangAgg> = BTreeMap::new();
112
113 for (lang_type, lang) in languages.iter() {
114 if lang.code > 0 {
115 let lines = lang.code + lang.comments + lang.blanks;
116 let files = lang.reports.len();
117
118 let mut bytes_sum = 0;
120 let mut tokens_sum = 0;
121 for report in &lang.reports {
122 let (b, t) = get_file_metrics(&report.name);
123 bytes_sum += b;
124 tokens_sum += t;
125 }
126
127 rows.push(LangRow {
128 lang: lang_type.name().to_string(),
129 code: lang.code,
130 lines,
131 files,
132 bytes: bytes_sum,
133 tokens: tokens_sum,
134 avg_lines: avg(lines, files),
135 });
136 }
137
138 for (child_type, reports) in &lang.children {
139 let entry = embedded.entry(*child_type).or_default();
140 entry.files += reports.len();
141 for r in reports {
142 let st = r.stats.summarise();
143 entry.code += st.code;
144 entry.lines += st.code + st.comments + st.blanks;
145 }
147 }
148 }
149
150 for (child_type, agg) in embedded {
151 if agg.code == 0 {
152 continue;
153 }
154 let avg_lines = avg(agg.lines, agg.files);
155 rows.push(LangRow {
156 lang: format!("{} (embedded)", child_type.name()),
157 code: agg.code,
158 lines: agg.lines,
159 files: agg.files,
160 bytes: 0, tokens: 0, avg_lines,
163 });
164 }
165 }
166 }
167
168 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.lang.cmp(&b.lang)));
170
171 let total_code: usize = rows.iter().map(|r| r.code).sum();
173 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
174 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
175 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
176 let total_files = unique_parent_file_count(languages);
177
178 let total = Totals {
179 code: total_code,
180 lines: total_lines,
181 files: total_files,
182 bytes: total_bytes,
183 tokens: total_tokens,
184 avg_lines: avg(total_lines, total_files),
185 };
186
187 if top > 0 && rows.len() > top {
188 let other = fold_other_lang(&rows[top..]);
189 rows.truncate(top);
190 rows.push(other);
191 }
192
193 LangReport {
194 rows,
195 total,
196 with_files,
197 children,
198 top,
199 }
200}
201
202fn fold_other_lang(rows: &[LangRow]) -> LangRow {
203 let mut code = 0usize;
204 let mut lines = 0usize;
205 let mut files = 0usize;
206 let mut bytes = 0usize;
207 let mut tokens = 0usize;
208
209 for r in rows {
210 code += r.code;
211 lines += r.lines;
212 files += r.files;
213 bytes += r.bytes;
214 tokens += r.tokens;
215 }
216
217 LangRow {
218 lang: "Other".to_string(),
219 code,
220 lines,
221 files,
222 bytes,
223 tokens,
224 avg_lines: avg(lines, files),
225 }
226}
227
228pub fn create_module_report(
229 languages: &Languages,
230 module_roots: &[String],
231 module_depth: usize,
232 children: ChildIncludeMode,
233 top: usize,
234) -> ModuleReport {
235 let file_rows = collect_file_rows(languages, module_roots, module_depth, children, None);
237
238 #[derive(Default)]
239 struct Agg {
240 code: usize,
241 lines: usize,
242 bytes: usize,
243 tokens: usize,
244 }
245
246 let mut by_module: BTreeMap<String, Agg> = BTreeMap::new();
247 for r in &file_rows {
248 let entry = by_module.entry(r.module.clone()).or_default();
249 entry.code += r.code;
250 entry.lines += r.lines;
251 entry.bytes += r.bytes;
252 entry.tokens += r.tokens;
253 }
254
255 let mut module_files: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
257 for (lang_type, lang) in languages.iter() {
258 let _ = lang_type; for report in &lang.reports {
260 let path = normalize_path(&report.name, None);
261 let module = module_key_from_normalized(&path, module_roots, module_depth);
262 module_files.entry(module).or_default().insert(path);
263 }
264 }
265
266 let mut rows: Vec<ModuleRow> = Vec::new();
267 for (module, agg) in by_module {
268 let files = module_files.get(&module).map(|s| s.len()).unwrap_or(0);
269 rows.push(ModuleRow {
270 module,
271 code: agg.code,
272 lines: agg.lines,
273 files,
274 bytes: agg.bytes,
275 tokens: agg.tokens,
276 avg_lines: avg(agg.lines, files),
277 });
278 }
279
280 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.module.cmp(&b.module)));
282
283 if top > 0 && rows.len() > top {
284 let other = fold_other_module(&rows[top..]);
285 rows.truncate(top);
286 rows.push(other);
287 }
288
289 let total_files = unique_parent_file_count(languages);
290 let total_code: usize = file_rows.iter().map(|r| r.code).sum();
291 let total_lines: usize = file_rows.iter().map(|r| r.lines).sum();
292 let total_bytes: usize = file_rows.iter().map(|r| r.bytes).sum();
293 let total_tokens: usize = file_rows.iter().map(|r| r.tokens).sum();
294
295 let total = Totals {
296 code: total_code,
297 lines: total_lines,
298 files: total_files,
299 bytes: total_bytes,
300 tokens: total_tokens,
301 avg_lines: avg(total_lines, total_files),
302 };
303
304 ModuleReport {
305 rows,
306 total,
307 module_roots: module_roots.to_vec(),
308 module_depth,
309 children,
310 top,
311 }
312}
313
314fn fold_other_module(rows: &[ModuleRow]) -> ModuleRow {
315 let mut code = 0usize;
316 let mut lines = 0usize;
317 let mut files = 0usize;
318 let mut bytes = 0usize;
319 let mut tokens = 0usize;
320
321 for r in rows {
322 code += r.code;
323 lines += r.lines;
324 files += r.files;
325 bytes += r.bytes;
326 tokens += r.tokens;
327 }
328
329 ModuleRow {
330 module: "Other".to_string(),
331 code,
332 lines,
333 files,
334 bytes,
335 tokens,
336 avg_lines: avg(lines, files),
337 }
338}
339
340pub fn create_export_data(
341 languages: &Languages,
342 module_roots: &[String],
343 module_depth: usize,
344 children: ChildIncludeMode,
345 strip_prefix: Option<&Path>,
346 min_code: usize,
347 max_rows: usize,
348) -> ExportData {
349 let mut rows = collect_file_rows(
350 languages,
351 module_roots,
352 module_depth,
353 children,
354 strip_prefix,
355 );
356
357 if min_code > 0 {
359 rows.retain(|r| r.code >= min_code);
360 }
361 rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.path.cmp(&b.path)));
362
363 if max_rows > 0 && rows.len() > max_rows {
364 rows.truncate(max_rows);
365 }
366
367 ExportData {
368 rows,
369 module_roots: module_roots.to_vec(),
370 module_depth,
371 children,
372 }
373}
374
375pub fn collect_file_rows(
380 languages: &Languages,
381 module_roots: &[String],
382 module_depth: usize,
383 children: ChildIncludeMode,
384 strip_prefix: Option<&Path>,
385) -> Vec<FileRow> {
386 #[derive(Default, Clone, Copy)]
387 struct Agg {
388 code: usize,
389 comments: usize,
390 blanks: usize,
391 bytes: usize,
392 tokens: usize,
393 }
394
395 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
397 struct Key {
398 path: String,
399 lang: String,
400 kind: FileKind,
401 }
402
403 let mut map: BTreeMap<Key, (String , Agg)> = BTreeMap::new();
404
405 for (lang_type, lang) in languages.iter() {
407 for report in &lang.reports {
408 let path = normalize_path(&report.name, strip_prefix);
409 let module = module_key_from_normalized(&path, module_roots, module_depth);
410 let st = report.stats.summarise();
411 let (bytes, tokens) = get_file_metrics(&report.name);
412
413 let key = Key {
414 path: path.clone(),
415 lang: lang_type.name().to_string(),
416 kind: FileKind::Parent,
417 };
418 let entry = map.entry(key).or_insert_with(|| (module, Agg::default()));
419 entry.1.code += st.code;
420 entry.1.comments += st.comments;
421 entry.1.blanks += st.blanks;
422 entry.1.bytes += bytes;
423 entry.1.tokens += tokens;
424 }
425 }
426
427 if children == ChildIncludeMode::Separate {
428 for (_lang_type, lang) in languages.iter() {
429 for (child_type, reports) in &lang.children {
430 for report in reports {
431 let path = normalize_path(&report.name, strip_prefix);
432 let module = module_key_from_normalized(&path, module_roots, module_depth);
433 let st = report.stats.summarise();
434 let key = Key {
437 path: path.clone(),
438 lang: child_type.name().to_string(),
439 kind: FileKind::Child,
440 };
441 let entry = map.entry(key).or_insert_with(|| (module, Agg::default()));
442 entry.1.code += st.code;
443 entry.1.comments += st.comments;
444 entry.1.blanks += st.blanks;
445 }
448 }
449 }
450 }
451
452 map.into_iter()
453 .map(|(key, (module, agg))| {
454 let lines = agg.code + agg.comments + agg.blanks;
455 FileRow {
456 path: key.path,
457 module,
458 lang: key.lang,
459 kind: key.kind,
460 code: agg.code,
461 comments: agg.comments,
462 blanks: agg.blanks,
463 lines,
464 bytes: agg.bytes,
465 tokens: agg.tokens,
466 }
467 })
468 .collect()
469}
470
471pub fn unique_parent_file_count(languages: &Languages) -> usize {
472 let mut seen: BTreeSet<String> = BTreeSet::new();
473 for (_lang_type, lang) in languages.iter() {
474 for report in &lang.reports {
475 let path = normalize_path(&report.name, None);
476 seen.insert(path);
477 }
478 }
479 seen.len()
480}
481
482pub fn avg(lines: usize, files: usize) -> usize {
483 if files == 0 {
484 return 0;
485 }
486 (lines + (files / 2)) / files
488}
489
490pub fn normalize_path(path: &Path, strip_prefix: Option<&Path>) -> String {
496 let s_cow = path.to_string_lossy();
497 let s: Cow<str> = if s_cow.contains('\\') {
498 Cow::Owned(s_cow.replace('\\', "/"))
499 } else {
500 s_cow
501 };
502
503 let mut slice: &str = &s;
504
505 if let Some(stripped) = slice.strip_prefix("./") {
507 slice = stripped;
508 }
509
510 if let Some(prefix) = strip_prefix {
511 let p_cow = prefix.to_string_lossy();
512 let p_cow_stripped: Cow<str> = if let Some(stripped) = p_cow.strip_prefix("./") {
514 Cow::Borrowed(stripped)
515 } else {
516 p_cow
517 };
518
519 let needs_replace = p_cow_stripped.contains('\\');
520 let needs_slash = !p_cow_stripped.ends_with('/');
521
522 if !needs_replace && !needs_slash {
523 if slice.starts_with(p_cow_stripped.as_ref()) {
525 slice = &slice[p_cow_stripped.len()..];
526 }
527 } else {
528 let mut pfx = if needs_replace {
530 p_cow_stripped.replace('\\', "/")
531 } else {
532 p_cow_stripped.into_owned()
533 };
534 if needs_slash {
535 pfx.push('/');
536 }
537 if slice.starts_with(&pfx) {
538 slice = &slice[pfx.len()..];
539 }
540 }
541 }
542
543 slice = slice.trim_start_matches('/');
544
545 if let Some(stripped) = slice.strip_prefix("./") {
547 slice = stripped;
548 }
549
550 if slice.len() == s.len() {
551 s.into_owned()
552 } else {
553 slice.to_string()
554 }
555}
556
557pub fn module_key(path: &str, module_roots: &[String], module_depth: usize) -> String {
564 tokmd_module_key::module_key(path, module_roots, module_depth)
565}
566
567#[cfg(test)]
568mod tests {
569 use super::*;
570 use std::path::PathBuf;
571
572 #[test]
573 fn module_key_root_level_file() {
574 assert_eq!(module_key("Cargo.toml", &["crates".into()], 2), "(root)");
575 assert_eq!(module_key("./Cargo.toml", &["crates".into()], 2), "(root)");
576 }
577
578 #[test]
579 fn module_key_crates_depth_2() {
580 let roots = vec!["crates".into(), "packages".into()];
581 assert_eq!(module_key("crates/foo/src/lib.rs", &roots, 2), "crates/foo");
582 assert_eq!(
583 module_key("packages/bar/src/main.rs", &roots, 2),
584 "packages/bar"
585 );
586 }
587
588 #[test]
589 fn module_key_crates_depth_1() {
590 let roots = vec!["crates".into(), "packages".into()];
591 assert_eq!(module_key("crates/foo/src/lib.rs", &roots, 1), "crates");
592 }
593
594 #[test]
595 fn module_key_non_root() {
596 let roots = vec!["crates".into()];
597 assert_eq!(module_key("src/lib.rs", &roots, 2), "src");
598 assert_eq!(module_key("tools/gen.rs", &roots, 2), "tools");
599 }
600
601 #[test]
602 fn module_key_depth_overflow_does_not_include_filename() {
603 let roots = vec!["crates".into()];
604 assert_eq!(module_key("crates/foo.rs", &roots, 2), "crates");
606 assert_eq!(
608 module_key("crates/foo/src/lib.rs", &roots, 10),
609 "crates/foo/src"
610 );
611 }
612
613 #[test]
614 fn normalize_path_strips_prefix() {
615 let p = PathBuf::from("C:/Code/Repo/src/main.rs");
616 let prefix = PathBuf::from("C:/Code/Repo");
617 let got = normalize_path(&p, Some(&prefix));
618 assert_eq!(got, "src/main.rs");
619 }
620
621 #[test]
622 fn normalize_path_normalization_slashes() {
623 let p = PathBuf::from(r"C:\Code\Repo\src\main.rs");
624 let got = normalize_path(&p, None);
625 assert_eq!(got, "C:/Code/Repo/src/main.rs");
626 }
627
628 mod fold_properties {
630 use super::*;
631 use proptest::prelude::*;
632
633 fn arb_lang_row() -> impl Strategy<Value = LangRow> {
634 (
635 "[a-zA-Z]+",
636 0usize..10000,
637 0usize..20000,
638 0usize..1000,
639 0usize..1000000,
640 0usize..100000,
641 )
642 .prop_map(|(lang, code, lines, files, bytes, tokens)| {
643 let avg_lines = if files == 0 {
644 0
645 } else {
646 (lines + (files / 2)) / files
647 };
648 LangRow {
649 lang,
650 code,
651 lines,
652 files,
653 bytes,
654 tokens,
655 avg_lines,
656 }
657 })
658 }
659
660 fn arb_module_row() -> impl Strategy<Value = ModuleRow> {
661 (
662 "[a-zA-Z0-9_/]+",
663 0usize..10000,
664 0usize..20000,
665 0usize..1000,
666 0usize..1000000,
667 0usize..100000,
668 )
669 .prop_map(|(module, code, lines, files, bytes, tokens)| {
670 let avg_lines = if files == 0 {
671 0
672 } else {
673 (lines + (files / 2)) / files
674 };
675 ModuleRow {
676 module,
677 code,
678 lines,
679 files,
680 bytes,
681 tokens,
682 avg_lines,
683 }
684 })
685 }
686
687 proptest! {
688 #[test]
689 fn fold_lang_preserves_totals(rows in prop::collection::vec(arb_lang_row(), 0..10)) {
690 let folded = fold_other_lang(&rows);
691
692 let total_code: usize = rows.iter().map(|r| r.code).sum();
693 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
694 let total_files: usize = rows.iter().map(|r| r.files).sum();
695 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
696 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
697
698 prop_assert_eq!(folded.code, total_code, "Code mismatch");
699 prop_assert_eq!(folded.lines, total_lines, "Lines mismatch");
700 prop_assert_eq!(folded.files, total_files, "Files mismatch");
701 prop_assert_eq!(folded.bytes, total_bytes, "Bytes mismatch");
702 prop_assert_eq!(folded.tokens, total_tokens, "Tokens mismatch");
703 }
704
705 #[test]
706 fn fold_lang_empty_is_zero(_dummy in 0..1u8) {
707 let folded = fold_other_lang(&[]);
708 prop_assert_eq!(folded.code, 0);
709 prop_assert_eq!(folded.lines, 0);
710 prop_assert_eq!(folded.files, 0);
711 prop_assert_eq!(folded.bytes, 0);
712 prop_assert_eq!(folded.tokens, 0);
713 prop_assert_eq!(folded.lang, "Other");
714 }
715
716 #[test]
717 fn fold_module_preserves_totals(rows in prop::collection::vec(arb_module_row(), 0..10)) {
718 let folded = fold_other_module(&rows);
719
720 let total_code: usize = rows.iter().map(|r| r.code).sum();
721 let total_lines: usize = rows.iter().map(|r| r.lines).sum();
722 let total_files: usize = rows.iter().map(|r| r.files).sum();
723 let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
724 let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
725
726 prop_assert_eq!(folded.code, total_code, "Code mismatch");
727 prop_assert_eq!(folded.lines, total_lines, "Lines mismatch");
728 prop_assert_eq!(folded.files, total_files, "Files mismatch");
729 prop_assert_eq!(folded.bytes, total_bytes, "Bytes mismatch");
730 prop_assert_eq!(folded.tokens, total_tokens, "Tokens mismatch");
731 }
732
733 #[test]
734 fn fold_module_empty_is_zero(_dummy in 0..1u8) {
735 let folded = fold_other_module(&[]);
736 prop_assert_eq!(folded.code, 0);
737 prop_assert_eq!(folded.lines, 0);
738 prop_assert_eq!(folded.files, 0);
739 prop_assert_eq!(folded.bytes, 0);
740 prop_assert_eq!(folded.tokens, 0);
741 prop_assert_eq!(folded.module, "Other");
742 }
743
744 #[test]
745 fn fold_associative_lang(
746 rows1 in prop::collection::vec(arb_lang_row(), 0..5),
747 rows2 in prop::collection::vec(arb_lang_row(), 0..5)
748 ) {
749 let all: Vec<_> = rows1.iter().chain(rows2.iter()).cloned().collect();
751 let fold_all = fold_other_lang(&all);
752
753 let fold1 = fold_other_lang(&rows1);
754 let fold2 = fold_other_lang(&rows2);
755 let combined = fold_other_lang(&[fold1, fold2]);
756
757 prop_assert_eq!(fold_all.code, combined.code);
758 prop_assert_eq!(fold_all.lines, combined.lines);
759 prop_assert_eq!(fold_all.files, combined.files);
760 prop_assert_eq!(fold_all.bytes, combined.bytes);
761 prop_assert_eq!(fold_all.tokens, combined.tokens);
762 }
763 }
764 }
765}