Skip to main content

tokmd_model/
lib.rs

1//! # tokmd-model
2//!
3//! **Tier 1 (Logic)**
4//!
5//! This crate contains the core business logic for aggregating and transforming code statistics.
6//! It handles the conversion from raw Tokei scan results into `tokmd` receipts.
7//!
8//! ## What belongs here
9//! * Aggregation logic (rolling up stats to modules/languages)
10//! * Deterministic sorting and filtering
11//! * Path normalization rules
12//! * Receipt generation logic
13//!
14//! ## What does NOT belong here
15//! * CLI argument parsing
16//! * Output formatting (printing to stdout/file)
17//! * Tokei interaction (use tokmd-scan)
18
19use std::borrow::Cow;
20use std::collections::{BTreeMap, BTreeSet};
21use std::fs;
22use std::path::Path;
23
24use tokei::{LanguageType, Languages};
25use tokmd_module_key::module_key_from_normalized;
26use tokmd_types::{
27    ChildIncludeMode, ChildrenMode, ExportData, FileKind, FileRow, LangReport, LangRow,
28    ModuleReport, ModuleRow, Totals,
29};
30
31/// Simple heuristic: 1 token ~= 4 chars (bytes).
32const CHARS_PER_TOKEN: usize = 4;
33
34fn get_file_metrics(path: &Path) -> (usize, usize) {
35    // Best-effort size calculation.
36    // If the file was deleted or is inaccessible during the scan post-processing,
37    // we return 0 bytes/tokens rather than crashing.
38    let bytes = fs::metadata(path).map(|m| m.len() as usize).unwrap_or(0);
39    let tokens = bytes / CHARS_PER_TOKEN;
40    (bytes, tokens)
41}
42
43pub fn create_lang_report(
44    languages: &Languages,
45    top: usize,
46    with_files: bool,
47    children: ChildrenMode,
48) -> LangReport {
49    // Aggregate metrics per language.
50    // Since we need to access the filesystem for bytes, we do it via collect_file_rows first?
51    // Or just iterate and compute. Since collect_file_rows is for Module/Export, we can't reuse it easily
52    // for Lang report without re-grouping.
53    // However, Lang report also needs to be accurate.
54    // To avoid double-counting bytes for embedded languages, we should only count bytes for PARENT languages.
55
56    // Let's iterate languages and files similar to collect_file_rows but grouping by Lang.
57
58    // We can't use collect_file_rows directly because it flattens everything.
59    // But we CAN use the same helper logic.
60
61    let mut rows: Vec<LangRow> = Vec::new();
62
63    // Helper map to store aggregated stats including bytes
64    #[derive(Default)]
65    struct LangAgg {
66        code: usize,
67        lines: usize,
68        files: usize,
69    }
70
71    match children {
72        ChildrenMode::Collapse => {
73            // Collapse embedded languages into the parent row.
74            // Bytes are attributed to the parent file's language.
75
76            for (lang_type, lang) in languages.iter() {
77                let sum = lang.summarise();
78                if sum.code == 0 {
79                    continue;
80                }
81
82                // Compute bytes sum for all files in this language
83                let mut bytes_sum = 0;
84                let mut tokens_sum = 0;
85                for report in &lang.reports {
86                    let (b, t) = get_file_metrics(&report.name);
87                    bytes_sum += b;
88                    tokens_sum += t;
89                }
90
91                let lines = sum.code + sum.comments + sum.blanks;
92                let files = lang.reports.len();
93                let avg_lines = avg(lines, files);
94
95                rows.push(LangRow {
96                    lang: lang_type.name().to_string(),
97                    code: sum.code,
98                    lines,
99                    files,
100                    bytes: bytes_sum,
101                    tokens: tokens_sum,
102                    avg_lines,
103                });
104            }
105        }
106        ChildrenMode::Separate => {
107            // Separate embedded languages.
108            // Bytes/Tokens should only be counted for the PARENT file.
109            // Embedded segments (children) have 0 bytes/tokens effectively to avoid double counting.
110
111            let mut embedded: BTreeMap<LanguageType, LangAgg> = BTreeMap::new();
112
113            for (lang_type, lang) in languages.iter() {
114                if lang.code > 0 {
115                    let lines = lang.code + lang.comments + lang.blanks;
116                    let files = lang.reports.len();
117
118                    // Parent files get the bytes
119                    let mut bytes_sum = 0;
120                    let mut tokens_sum = 0;
121                    for report in &lang.reports {
122                        let (b, t) = get_file_metrics(&report.name);
123                        bytes_sum += b;
124                        tokens_sum += t;
125                    }
126
127                    rows.push(LangRow {
128                        lang: lang_type.name().to_string(),
129                        code: lang.code,
130                        lines,
131                        files,
132                        bytes: bytes_sum,
133                        tokens: tokens_sum,
134                        avg_lines: avg(lines, files),
135                    });
136                }
137
138                for (child_type, reports) in &lang.children {
139                    let entry = embedded.entry(*child_type).or_default();
140                    entry.files += reports.len();
141                    for r in reports {
142                        let st = r.stats.summarise();
143                        entry.code += st.code;
144                        entry.lines += st.code + st.comments + st.blanks;
145                        // Embedded languages don't own the file, so 0 bytes/tokens
146                    }
147                }
148            }
149
150            for (child_type, agg) in embedded {
151                if agg.code == 0 {
152                    continue;
153                }
154                let avg_lines = avg(agg.lines, agg.files);
155                rows.push(LangRow {
156                    lang: format!("{} (embedded)", child_type.name()),
157                    code: agg.code,
158                    lines: agg.lines,
159                    files: agg.files,
160                    bytes: 0,  // No bytes for embedded
161                    tokens: 0, // No tokens for embedded
162                    avg_lines,
163                });
164            }
165        }
166    }
167
168    // Sort descending by code, then by language name for determinism.
169    rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.lang.cmp(&b.lang)));
170
171    // Compute totals
172    let total_code: usize = rows.iter().map(|r| r.code).sum();
173    let total_lines: usize = rows.iter().map(|r| r.lines).sum();
174    let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
175    let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
176    let total_files = unique_parent_file_count(languages);
177
178    let total = Totals {
179        code: total_code,
180        lines: total_lines,
181        files: total_files,
182        bytes: total_bytes,
183        tokens: total_tokens,
184        avg_lines: avg(total_lines, total_files),
185    };
186
187    if top > 0 && rows.len() > top {
188        let other = fold_other_lang(&rows[top..]);
189        rows.truncate(top);
190        rows.push(other);
191    }
192
193    LangReport {
194        rows,
195        total,
196        with_files,
197        children,
198        top,
199    }
200}
201
202fn fold_other_lang(rows: &[LangRow]) -> LangRow {
203    let mut code = 0usize;
204    let mut lines = 0usize;
205    let mut files = 0usize;
206    let mut bytes = 0usize;
207    let mut tokens = 0usize;
208
209    for r in rows {
210        code += r.code;
211        lines += r.lines;
212        files += r.files;
213        bytes += r.bytes;
214        tokens += r.tokens;
215    }
216
217    LangRow {
218        lang: "Other".to_string(),
219        code,
220        lines,
221        files,
222        bytes,
223        tokens,
224        avg_lines: avg(lines, files),
225    }
226}
227
228pub fn create_module_report(
229    languages: &Languages,
230    module_roots: &[String],
231    module_depth: usize,
232    children: ChildIncludeMode,
233    top: usize,
234) -> ModuleReport {
235    // Aggregate stats per module, but count files uniquely (parent files only).
236    let file_rows = collect_file_rows(languages, module_roots, module_depth, children, None);
237
238    #[derive(Default)]
239    struct Agg {
240        code: usize,
241        lines: usize,
242        bytes: usize,
243        tokens: usize,
244    }
245
246    let mut by_module: BTreeMap<String, Agg> = BTreeMap::new();
247    let mut total_code = 0;
248    let mut total_lines = 0;
249    let mut total_bytes = 0;
250    let mut total_tokens = 0;
251
252    for r in file_rows {
253        total_code += r.code;
254        total_lines += r.lines;
255        total_bytes += r.bytes;
256        total_tokens += r.tokens;
257
258        let entry = by_module.entry(r.module).or_default();
259        entry.code += r.code;
260        entry.lines += r.lines;
261        entry.bytes += r.bytes;
262        entry.tokens += r.tokens;
263    }
264
265    // Unique parent files per module.
266    let mut module_files: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
267    for (lang_type, lang) in languages.iter() {
268        let _ = lang_type; // keep the pattern explicit; we only need reports
269        for report in &lang.reports {
270            let path = normalize_path(&report.name, None);
271            let module = module_key_from_normalized(&path, module_roots, module_depth);
272            module_files.entry(module).or_default().insert(path);
273        }
274    }
275
276    let mut rows: Vec<ModuleRow> = Vec::new();
277    for (module, agg) in by_module {
278        let files = module_files.get(&module).map(|s| s.len()).unwrap_or(0);
279        rows.push(ModuleRow {
280            module,
281            code: agg.code,
282            lines: agg.lines,
283            files,
284            bytes: agg.bytes,
285            tokens: agg.tokens,
286            avg_lines: avg(agg.lines, files),
287        });
288    }
289
290    // Sort descending by code, then by module name for determinism.
291    rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.module.cmp(&b.module)));
292
293    if top > 0 && rows.len() > top {
294        let other = fold_other_module(&rows[top..]);
295        rows.truncate(top);
296        rows.push(other);
297    }
298
299    let total_files = unique_parent_file_count(languages);
300
301    let total = Totals {
302        code: total_code,
303        lines: total_lines,
304        files: total_files,
305        bytes: total_bytes,
306        tokens: total_tokens,
307        avg_lines: avg(total_lines, total_files),
308    };
309
310    ModuleReport {
311        rows,
312        total,
313        module_roots: module_roots.to_vec(),
314        module_depth,
315        children,
316        top,
317    }
318}
319
320fn fold_other_module(rows: &[ModuleRow]) -> ModuleRow {
321    let mut code = 0usize;
322    let mut lines = 0usize;
323    let mut files = 0usize;
324    let mut bytes = 0usize;
325    let mut tokens = 0usize;
326
327    for r in rows {
328        code += r.code;
329        lines += r.lines;
330        files += r.files;
331        bytes += r.bytes;
332        tokens += r.tokens;
333    }
334
335    ModuleRow {
336        module: "Other".to_string(),
337        code,
338        lines,
339        files,
340        bytes,
341        tokens,
342        avg_lines: avg(lines, files),
343    }
344}
345
346pub fn create_export_data(
347    languages: &Languages,
348    module_roots: &[String],
349    module_depth: usize,
350    children: ChildIncludeMode,
351    strip_prefix: Option<&Path>,
352    min_code: usize,
353    max_rows: usize,
354) -> ExportData {
355    let mut rows = collect_file_rows(
356        languages,
357        module_roots,
358        module_depth,
359        children,
360        strip_prefix,
361    );
362
363    // Filter and sort for determinism.
364    if min_code > 0 {
365        rows.retain(|r| r.code >= min_code);
366    }
367    rows.sort_by(|a, b| b.code.cmp(&a.code).then_with(|| a.path.cmp(&b.path)));
368
369    if max_rows > 0 && rows.len() > max_rows {
370        rows.truncate(max_rows);
371    }
372
373    ExportData {
374        rows,
375        module_roots: module_roots.to_vec(),
376        module_depth,
377        children,
378    }
379}
380
381/// Collect per-file contributions, optionally including embedded language reports.
382///
383/// This returns one row per (path, lang, kind), aggregated if tokei produced multiple
384/// reports for the same tuple.
385pub fn collect_file_rows(
386    languages: &Languages,
387    module_roots: &[String],
388    module_depth: usize,
389    children: ChildIncludeMode,
390    strip_prefix: Option<&Path>,
391) -> Vec<FileRow> {
392    #[derive(Default, Clone, Copy)]
393    struct Agg {
394        code: usize,
395        comments: usize,
396        blanks: usize,
397        bytes: usize,
398        tokens: usize,
399    }
400
401    // Deterministic map: key ordering is stable.
402    #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
403    struct Key {
404        path: String,
405        lang: String,
406        kind: FileKind,
407    }
408
409    let mut map: BTreeMap<Key, (String /*module*/, Agg)> = BTreeMap::new();
410
411    // Parent reports
412    for (lang_type, lang) in languages.iter() {
413        for report in &lang.reports {
414            let path = normalize_path(&report.name, strip_prefix);
415            let module = module_key_from_normalized(&path, module_roots, module_depth);
416            let st = report.stats.summarise();
417            let (bytes, tokens) = get_file_metrics(&report.name);
418
419            let key = Key {
420                path,
421                lang: lang_type.name().to_string(),
422                kind: FileKind::Parent,
423            };
424            let entry = map.entry(key).or_insert_with(|| (module, Agg::default()));
425            entry.1.code += st.code;
426            entry.1.comments += st.comments;
427            entry.1.blanks += st.blanks;
428            entry.1.bytes += bytes;
429            entry.1.tokens += tokens;
430        }
431    }
432
433    if children == ChildIncludeMode::Separate {
434        for (_lang_type, lang) in languages.iter() {
435            for (child_type, reports) in &lang.children {
436                for report in reports {
437                    let path = normalize_path(&report.name, strip_prefix);
438                    let module = module_key_from_normalized(&path, module_roots, module_depth);
439                    let st = report.stats.summarise();
440                    // Embedded children do not have bytes/tokens (they are inside the parent)
441
442                    let key = Key {
443                        path,
444                        lang: child_type.name().to_string(),
445                        kind: FileKind::Child,
446                    };
447                    let entry = map.entry(key).or_insert_with(|| (module, Agg::default()));
448                    entry.1.code += st.code;
449                    entry.1.comments += st.comments;
450                    entry.1.blanks += st.blanks;
451                    // entry.1.bytes += 0;
452                    // entry.1.tokens += 0;
453                }
454            }
455        }
456    }
457
458    map.into_iter()
459        .map(|(key, (module, agg))| {
460            let lines = agg.code + agg.comments + agg.blanks;
461            FileRow {
462                path: key.path,
463                module,
464                lang: key.lang,
465                kind: key.kind,
466                code: agg.code,
467                comments: agg.comments,
468                blanks: agg.blanks,
469                lines,
470                bytes: agg.bytes,
471                tokens: agg.tokens,
472            }
473        })
474        .collect()
475}
476
477pub fn unique_parent_file_count(languages: &Languages) -> usize {
478    let mut seen: BTreeSet<String> = BTreeSet::new();
479    for (_lang_type, lang) in languages.iter() {
480        for report in &lang.reports {
481            let path = normalize_path(&report.name, None);
482            seen.insert(path);
483        }
484    }
485    seen.len()
486}
487
488/// Compute the average of `lines` over `files`, rounding to nearest integer.
489///
490/// Returns 0 if `files` is zero.
491///
492/// # Examples
493///
494/// ```
495/// use tokmd_model::avg;
496///
497/// assert_eq!(avg(300, 3), 100);
498/// assert_eq!(avg(0, 5), 0);
499/// assert_eq!(avg(100, 0), 0);
500/// // Rounds to nearest: 7 / 2 = 3.5 → 4
501/// assert_eq!(avg(7, 2), 4);
502/// ```
503pub fn avg(lines: usize, files: usize) -> usize {
504    if files == 0 {
505        return 0;
506    }
507    // Round to nearest integer.
508    (lines + (files / 2)) / files
509}
510
511/// Normalize a path for portable output.
512///
513/// - Uses `/` separators
514/// - Strips leading `./`
515/// - Optionally strips a user-provided prefix (after normalization)
516///
517/// # Examples
518///
519/// ```
520/// use std::path::Path;
521/// use tokmd_model::normalize_path;
522///
523/// // Normalizes backslashes to forward slashes
524/// let p = Path::new("src\\main.rs");
525/// assert_eq!(normalize_path(p, None), "src/main.rs");
526///
527/// // Strips a prefix
528/// let p = Path::new("project/src/lib.rs");
529/// let prefix = Path::new("project");
530/// assert_eq!(normalize_path(&p, Some(&prefix)), "src/lib.rs");
531/// ```
532pub fn normalize_path(path: &Path, strip_prefix: Option<&Path>) -> String {
533    let s_cow = path.to_string_lossy();
534    let s: Cow<str> = if s_cow.contains('\\') {
535        Cow::Owned(s_cow.replace('\\', "/"))
536    } else {
537        s_cow
538    };
539
540    let mut slice: &str = &s;
541
542    // Strip leading ./ first, so strip_prefix can match against "src/" instead of "./src/"
543    if let Some(stripped) = slice.strip_prefix("./") {
544        slice = stripped;
545    }
546
547    if let Some(prefix) = strip_prefix {
548        let p_cow = prefix.to_string_lossy();
549        // Strip leading ./ from prefix so it can match normalized paths
550        let p_cow_stripped: Cow<str> = if let Some(stripped) = p_cow.strip_prefix("./") {
551            Cow::Borrowed(stripped)
552        } else {
553            p_cow
554        };
555
556        let needs_replace = p_cow_stripped.contains('\\');
557        let needs_slash = !p_cow_stripped.ends_with('/');
558
559        if !needs_replace && !needs_slash {
560            // Fast path: prefix is already clean and ends with slash
561            if slice.starts_with(p_cow_stripped.as_ref()) {
562                slice = &slice[p_cow_stripped.len()..];
563            }
564        } else {
565            // Slow path: normalize prefix
566            let mut pfx = if needs_replace {
567                p_cow_stripped.replace('\\', "/")
568            } else {
569                p_cow_stripped.into_owned()
570            };
571            if needs_slash {
572                pfx.push('/');
573            }
574            if slice.starts_with(&pfx) {
575                slice = &slice[pfx.len()..];
576            }
577        }
578    }
579
580    slice = slice.trim_start_matches('/');
581
582    // After trimming slashes, we might be left with a leading ./ (e.g. from "/./")
583    if let Some(stripped) = slice.strip_prefix("./") {
584        slice = stripped;
585    }
586    slice = slice.trim_start_matches('/');
587
588    if slice.len() == s.len() {
589        s.into_owned()
590    } else {
591        slice.to_string()
592    }
593}
594
595/// Compute a "module key" from an input path.
596///
597/// Rules:
598/// - Root-level files become "(root)".
599/// - If the first directory segment is in `module_roots`, join `module_depth` *directory* segments.
600/// - Otherwise, module key is the top-level directory.
601///
602/// # Examples
603///
604/// ```
605/// use tokmd_model::module_key;
606///
607/// let roots = vec!["crates".to_string()];
608/// assert_eq!(module_key("crates/foo/src/lib.rs", &roots, 2), "crates/foo");
609/// assert_eq!(module_key("src/lib.rs", &roots, 2), "src");
610/// assert_eq!(module_key("Cargo.toml", &roots, 2), "(root)");
611/// ```
612pub fn module_key(path: &str, module_roots: &[String], module_depth: usize) -> String {
613    tokmd_module_key::module_key(path, module_roots, module_depth)
614}
615
616#[cfg(test)]
617mod tests {
618    use super::*;
619    use std::path::PathBuf;
620
621    #[test]
622    fn module_key_root_level_file() {
623        assert_eq!(module_key("Cargo.toml", &["crates".into()], 2), "(root)");
624        assert_eq!(module_key("./Cargo.toml", &["crates".into()], 2), "(root)");
625    }
626
627    #[test]
628    fn module_key_crates_depth_2() {
629        let roots = vec!["crates".into(), "packages".into()];
630        assert_eq!(module_key("crates/foo/src/lib.rs", &roots, 2), "crates/foo");
631        assert_eq!(
632            module_key("packages/bar/src/main.rs", &roots, 2),
633            "packages/bar"
634        );
635    }
636
637    #[test]
638    fn module_key_crates_depth_1() {
639        let roots = vec!["crates".into(), "packages".into()];
640        assert_eq!(module_key("crates/foo/src/lib.rs", &roots, 1), "crates");
641    }
642
643    #[test]
644    fn module_key_non_root() {
645        let roots = vec!["crates".into()];
646        assert_eq!(module_key("src/lib.rs", &roots, 2), "src");
647        assert_eq!(module_key("tools/gen.rs", &roots, 2), "tools");
648    }
649
650    #[test]
651    fn module_key_depth_overflow_does_not_include_filename() {
652        let roots = vec!["crates".into()];
653        // File directly under a root: depth=2 should NOT include the filename
654        assert_eq!(module_key("crates/foo.rs", &roots, 2), "crates");
655        // Depth exceeds available directories: should stop at deepest directory
656        assert_eq!(
657            module_key("crates/foo/src/lib.rs", &roots, 10),
658            "crates/foo/src"
659        );
660    }
661
662    #[test]
663    fn normalize_path_strips_prefix() {
664        let p = PathBuf::from("C:/Code/Repo/src/main.rs");
665        let prefix = PathBuf::from("C:/Code/Repo");
666        let got = normalize_path(&p, Some(&prefix));
667        assert_eq!(got, "src/main.rs");
668    }
669
670    #[test]
671    fn normalize_path_normalization_slashes() {
672        let p = PathBuf::from(r"C:\Code\Repo\src\main.rs");
673        let got = normalize_path(&p, None);
674        assert_eq!(got, "C:/Code/Repo/src/main.rs");
675    }
676
677    mod normalize_properties {
678        use super::*;
679        use proptest::prelude::*;
680
681        fn arb_path_component() -> impl Strategy<Value = String> {
682            "[a-zA-Z0-9_.-]+"
683        }
684
685        fn arb_path(max_depth: usize) -> impl Strategy<Value = String> {
686            prop::collection::vec(arb_path_component(), 1..=max_depth)
687                .prop_map(|comps| comps.join("/"))
688        }
689
690        proptest! {
691            #[test]
692            fn normalize_path_is_idempotent(path in arb_path(5)) {
693                let p = PathBuf::from(&path);
694                let norm1 = normalize_path(&p, None);
695                let p2 = PathBuf::from(&norm1);
696                let norm2 = normalize_path(&p2, None);
697                prop_assert_eq!(norm1, norm2);
698            }
699
700            #[test]
701            fn normalize_path_handles_windows_separators(path in arb_path(5)) {
702                let win_path = path.replace('/', "\\");
703                let p_win = PathBuf::from(&win_path);
704                let p_unix = PathBuf::from(&path);
705
706                let norm_win = normalize_path(&p_win, None);
707                let norm_unix = normalize_path(&p_unix, None);
708
709                prop_assert_eq!(norm_win, norm_unix);
710            }
711
712            #[test]
713            fn normalize_path_no_leading_slash(path in arb_path(5)) {
714                let p = PathBuf::from(&path);
715                let norm = normalize_path(&p, None);
716                prop_assert!(!norm.starts_with('/'));
717            }
718
719            #[test]
720            fn normalize_path_no_leading_dot_slash(path in arb_path(5)) {
721                let p = PathBuf::from(&path);
722                let norm = normalize_path(&p, None);
723                prop_assert!(!norm.starts_with("./"));
724            }
725
726            #[test]
727            fn module_key_deterministic(
728                path in arb_path(5),
729                roots in prop::collection::vec(arb_path_component(), 1..3),
730                depth in 1usize..5
731            ) {
732                let k1 = module_key(&path, &roots, depth);
733                let k2 = module_key(&path, &roots, depth);
734                prop_assert_eq!(k1, k2);
735            }
736        }
737    }
738
739    // Property-based tests for fold_other_* functions
740    mod fold_properties {
741        use super::*;
742        use proptest::prelude::*;
743
744        fn arb_lang_row() -> impl Strategy<Value = LangRow> {
745            (
746                "[a-zA-Z]+",
747                0usize..10000,
748                0usize..20000,
749                0usize..1000,
750                0usize..1000000,
751                0usize..100000,
752            )
753                .prop_map(|(lang, code, lines, files, bytes, tokens)| {
754                    let avg_lines = (lines + (files / 2)).checked_div(files).unwrap_or(0);
755                    LangRow {
756                        lang,
757                        code,
758                        lines,
759                        files,
760                        bytes,
761                        tokens,
762                        avg_lines,
763                    }
764                })
765        }
766
767        fn arb_module_row() -> impl Strategy<Value = ModuleRow> {
768            (
769                "[a-zA-Z0-9_/]+",
770                0usize..10000,
771                0usize..20000,
772                0usize..1000,
773                0usize..1000000,
774                0usize..100000,
775            )
776                .prop_map(|(module, code, lines, files, bytes, tokens)| {
777                    let avg_lines = (lines + (files / 2)).checked_div(files).unwrap_or(0);
778                    ModuleRow {
779                        module,
780                        code,
781                        lines,
782                        files,
783                        bytes,
784                        tokens,
785                        avg_lines,
786                    }
787                })
788        }
789
790        proptest! {
791            #[test]
792            fn fold_lang_preserves_totals(rows in prop::collection::vec(arb_lang_row(), 0..10)) {
793                let folded = fold_other_lang(&rows);
794
795                let total_code: usize = rows.iter().map(|r| r.code).sum();
796                let total_lines: usize = rows.iter().map(|r| r.lines).sum();
797                let total_files: usize = rows.iter().map(|r| r.files).sum();
798                let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
799                let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
800
801                prop_assert_eq!(folded.code, total_code, "Code mismatch");
802                prop_assert_eq!(folded.lines, total_lines, "Lines mismatch");
803                prop_assert_eq!(folded.files, total_files, "Files mismatch");
804                prop_assert_eq!(folded.bytes, total_bytes, "Bytes mismatch");
805                prop_assert_eq!(folded.tokens, total_tokens, "Tokens mismatch");
806            }
807
808            #[test]
809            fn fold_lang_empty_is_zero(_dummy in 0..1u8) {
810                let folded = fold_other_lang(&[]);
811                prop_assert_eq!(folded.code, 0);
812                prop_assert_eq!(folded.lines, 0);
813                prop_assert_eq!(folded.files, 0);
814                prop_assert_eq!(folded.bytes, 0);
815                prop_assert_eq!(folded.tokens, 0);
816                prop_assert_eq!(folded.lang, "Other");
817            }
818
819            #[test]
820            fn fold_module_preserves_totals(rows in prop::collection::vec(arb_module_row(), 0..10)) {
821                let folded = fold_other_module(&rows);
822
823                let total_code: usize = rows.iter().map(|r| r.code).sum();
824                let total_lines: usize = rows.iter().map(|r| r.lines).sum();
825                let total_files: usize = rows.iter().map(|r| r.files).sum();
826                let total_bytes: usize = rows.iter().map(|r| r.bytes).sum();
827                let total_tokens: usize = rows.iter().map(|r| r.tokens).sum();
828
829                prop_assert_eq!(folded.code, total_code, "Code mismatch");
830                prop_assert_eq!(folded.lines, total_lines, "Lines mismatch");
831                prop_assert_eq!(folded.files, total_files, "Files mismatch");
832                prop_assert_eq!(folded.bytes, total_bytes, "Bytes mismatch");
833                prop_assert_eq!(folded.tokens, total_tokens, "Tokens mismatch");
834            }
835
836            #[test]
837            fn fold_module_empty_is_zero(_dummy in 0..1u8) {
838                let folded = fold_other_module(&[]);
839                prop_assert_eq!(folded.code, 0);
840                prop_assert_eq!(folded.lines, 0);
841                prop_assert_eq!(folded.files, 0);
842                prop_assert_eq!(folded.bytes, 0);
843                prop_assert_eq!(folded.tokens, 0);
844                prop_assert_eq!(folded.module, "Other");
845            }
846
847            #[test]
848            fn fold_associative_lang(
849                rows1 in prop::collection::vec(arb_lang_row(), 0..5),
850                rows2 in prop::collection::vec(arb_lang_row(), 0..5)
851            ) {
852                // Folding all at once should equal folding parts and combining
853                let all: Vec<_> = rows1.iter().chain(rows2.iter()).cloned().collect();
854                let fold_all = fold_other_lang(&all);
855
856                let fold1 = fold_other_lang(&rows1);
857                let fold2 = fold_other_lang(&rows2);
858                let combined = fold_other_lang(&[fold1, fold2]);
859
860                prop_assert_eq!(fold_all.code, combined.code);
861                prop_assert_eq!(fold_all.lines, combined.lines);
862                prop_assert_eq!(fold_all.files, combined.files);
863                prop_assert_eq!(fold_all.bytes, combined.bytes);
864                prop_assert_eq!(fold_all.tokens, combined.tokens);
865            }
866        }
867    }
868}