Skip to main content

tokmd_analysis_derived/
lib.rs

1use std::collections::{BTreeMap, BTreeSet};
2
3use tokmd_analysis_types::{
4    BoilerplateReport, CocomoReport, ContextWindowReport, DerivedReport, DerivedTotals,
5    DistributionReport, FileStatRow, HistogramBucket, IntegrityReport, LangPurityReport,
6    LangPurityRow, MaxFileReport, MaxFileRow, NestingReport, NestingRow, PolyglotReport,
7    RateReport, RateRow, RatioReport, RatioRow, ReadingTimeReport, TestDensityReport, TopOffenders,
8};
9use tokmd_analysis_util::{empty_file_row, is_infra_lang, is_test_path, path_depth};
10use tokmd_export_tree::render_analysis_tree;
11use tokmd_math::{gini_coefficient, percentile, round_f64, safe_ratio};
12use tokmd_types::{ExportData, FileKind, FileRow};
13
14const LINES_PER_MINUTE: usize = 20;
15const TOP_N: usize = 10;
16const MIN_DOC_LINES: usize = 50;
17const MIN_DENSE_LINES: usize = 10;
18
19pub fn derive_report(export: &ExportData, window_tokens: Option<usize>) -> DerivedReport {
20    let parents: Vec<&FileRow> = export
21        .rows
22        .iter()
23        .filter(|r| r.kind == FileKind::Parent)
24        .collect();
25
26    let mut totals = DerivedTotals {
27        files: parents.len(),
28        code: 0,
29        comments: 0,
30        blanks: 0,
31        lines: 0,
32        bytes: 0,
33        tokens: 0,
34    };
35
36    for row in &parents {
37        totals.code += row.code;
38        totals.comments += row.comments;
39        totals.blanks += row.blanks;
40        totals.lines += row.lines;
41        totals.bytes += row.bytes;
42        totals.tokens += row.tokens;
43    }
44
45    let doc_density = build_ratio_report(
46        "total",
47        totals.comments,
48        totals.code + totals.comments,
49        group_ratio(&parents, |r| r.lang.as_str(), |r| (r.comments, r.code)),
50        group_ratio(&parents, |r| r.module.as_str(), |r| (r.comments, r.code)),
51    );
52
53    let whitespace = build_ratio_report(
54        "total",
55        totals.blanks,
56        totals.code + totals.comments,
57        group_ratio(
58            &parents,
59            |r| r.lang.as_str(),
60            |r| (r.blanks, r.code + r.comments),
61        ),
62        group_ratio(
63            &parents,
64            |r| r.module.as_str(),
65            |r| (r.blanks, r.code + r.comments),
66        ),
67    );
68
69    let verbosity = build_rate_report(
70        "total",
71        totals.bytes,
72        totals.lines,
73        group_rate(&parents, |r| r.lang.as_str(), |r| (r.bytes, r.lines)),
74        group_rate(&parents, |r| r.module.as_str(), |r| (r.bytes, r.lines)),
75    );
76
77    let file_stats = build_file_stats(&parents);
78
79    let max_file = build_max_file_report(&file_stats);
80
81    let lang_purity = build_lang_purity_report(&parents);
82
83    let nesting = build_nesting_report(&file_stats);
84
85    let test_density = build_test_density_report(&parents);
86
87    let boilerplate = build_boilerplate_report(&parents);
88
89    let polyglot = build_polyglot_report(&parents);
90
91    let distribution = build_distribution_report(&parents);
92
93    let histogram = build_histogram(&parents);
94
95    let top = build_top_offenders(&file_stats);
96
97    let reading_time = ReadingTimeReport {
98        minutes: round_f64(totals.code as f64 / LINES_PER_MINUTE as f64, 2),
99        lines_per_minute: LINES_PER_MINUTE,
100        basis_lines: totals.code,
101    };
102
103    let context_window = window_tokens.map(|window| {
104        let pct = if window == 0 {
105            0.0
106        } else {
107            round_f64(totals.tokens as f64 / window as f64, 4)
108        };
109        ContextWindowReport {
110            window_tokens: window,
111            total_tokens: totals.tokens,
112            pct,
113            fits: totals.tokens <= window,
114        }
115    });
116
117    let cocomo = if totals.code == 0 {
118        None
119    } else {
120        let kloc = totals.code as f64 / 1000.0;
121        let (a, b, c, d) = (2.4, 1.05, 2.5, 0.38);
122        let effort = a * kloc.powf(b);
123        let duration = c * effort.powf(d);
124        let staff = if duration == 0.0 {
125            0.0
126        } else {
127            effort / duration
128        };
129        Some(CocomoReport {
130            mode: "organic".to_string(),
131            kloc: round_f64(kloc, 4),
132            effort_pm: round_f64(effort, 2),
133            duration_months: round_f64(duration, 2),
134            staff: round_f64(staff, 2),
135            a,
136            b,
137            c,
138            d,
139        })
140    };
141
142    let integrity = build_integrity_report(&parents);
143
144    DerivedReport {
145        totals,
146        doc_density,
147        whitespace,
148        verbosity,
149        max_file,
150        lang_purity,
151        nesting,
152        test_density,
153        boilerplate,
154        polyglot,
155        distribution,
156        histogram,
157        top,
158        tree: None,
159        reading_time,
160        context_window,
161        cocomo,
162        todo: None,
163        integrity,
164    }
165}
166
167fn build_ratio_report(
168    total_key: &str,
169    total_numer: usize,
170    total_denom: usize,
171    by_lang: BTreeMap<String, (usize, usize)>,
172    by_module: BTreeMap<String, (usize, usize)>,
173) -> RatioReport {
174    RatioReport {
175        total: RatioRow {
176            key: total_key.to_string(),
177            numerator: total_numer,
178            denominator: total_denom,
179            ratio: safe_ratio(total_numer, total_denom),
180        },
181        by_lang: build_ratio_rows(by_lang),
182        by_module: build_ratio_rows(by_module),
183    }
184}
185
186fn build_rate_report(
187    total_key: &str,
188    total_numer: usize,
189    total_denom: usize,
190    by_lang: BTreeMap<String, (usize, usize)>,
191    by_module: BTreeMap<String, (usize, usize)>,
192) -> RateReport {
193    RateReport {
194        total: RateRow {
195            key: total_key.to_string(),
196            numerator: total_numer,
197            denominator: total_denom,
198            rate: safe_ratio(total_numer, total_denom),
199        },
200        by_lang: build_rate_rows(by_lang),
201        by_module: build_rate_rows(by_module),
202    }
203}
204
205fn build_ratio_rows(map: BTreeMap<String, (usize, usize)>) -> Vec<RatioRow> {
206    let mut rows: Vec<RatioRow> = map
207        .into_iter()
208        .map(|(key, (numer, denom))| RatioRow {
209            key,
210            numerator: numer,
211            denominator: denom,
212            ratio: safe_ratio(numer, denom),
213        })
214        .collect();
215
216    rows.sort_by(|a, b| {
217        b.ratio
218            .partial_cmp(&a.ratio)
219            .unwrap_or(std::cmp::Ordering::Equal)
220            .then_with(|| a.key.cmp(&b.key))
221    });
222    rows
223}
224
225fn build_rate_rows(map: BTreeMap<String, (usize, usize)>) -> Vec<RateRow> {
226    let mut rows: Vec<RateRow> = map
227        .into_iter()
228        .map(|(key, (numer, denom))| RateRow {
229            key,
230            numerator: numer,
231            denominator: denom,
232            rate: safe_ratio(numer, denom),
233        })
234        .collect();
235
236    rows.sort_by(|a, b| {
237        b.rate
238            .partial_cmp(&a.rate)
239            .unwrap_or(std::cmp::Ordering::Equal)
240            .then_with(|| a.key.cmp(&b.key))
241    });
242    rows
243}
244
245fn group_ratio<FKey, FVals>(
246    rows: &[&FileRow],
247    key_fn: FKey,
248    vals_fn: FVals,
249) -> BTreeMap<String, (usize, usize)>
250where
251    FKey: for<'a> Fn(&'a FileRow) -> &'a str,
252    FVals: Fn(&FileRow) -> (usize, usize),
253{
254    let mut map: BTreeMap<String, (usize, usize)> = BTreeMap::new();
255    for row in rows {
256        let (numer, denom_part) = vals_fn(row);
257        let key = key_fn(row);
258        if let Some(entry) = map.get_mut(key) {
259            entry.0 += numer;
260            entry.1 += denom_part;
261        } else {
262            map.insert(key.to_owned(), (numer, denom_part));
263        }
264    }
265    map
266}
267
268fn group_rate<FKey, FVals>(
269    rows: &[&FileRow],
270    key_fn: FKey,
271    vals_fn: FVals,
272) -> BTreeMap<String, (usize, usize)>
273where
274    FKey: for<'a> Fn(&'a FileRow) -> &'a str,
275    FVals: Fn(&FileRow) -> (usize, usize),
276{
277    let mut map: BTreeMap<String, (usize, usize)> = BTreeMap::new();
278    for row in rows {
279        let (numer, denom) = vals_fn(row);
280        let key = key_fn(row);
281        if let Some(entry) = map.get_mut(key) {
282            entry.0 += numer;
283            entry.1 += denom;
284        } else {
285            map.insert(key.to_owned(), (numer, denom));
286        }
287    }
288    map
289}
290
291fn build_file_stats(rows: &[&FileRow]) -> Vec<FileStatRow> {
292    rows.iter()
293        .map(|r| FileStatRow {
294            path: r.path.clone(),
295            module: r.module.clone(),
296            lang: r.lang.clone(),
297            code: r.code,
298            comments: r.comments,
299            blanks: r.blanks,
300            lines: r.lines,
301            bytes: r.bytes,
302            tokens: r.tokens,
303            doc_pct: if r.code + r.comments == 0 {
304                None
305            } else {
306                Some(safe_ratio(r.comments, r.code + r.comments))
307            },
308            bytes_per_line: if r.lines == 0 {
309                None
310            } else {
311                Some(safe_ratio(r.bytes, r.lines))
312            },
313            depth: path_depth(&r.path),
314        })
315        .collect()
316}
317
318fn build_max_file_report(rows: &[FileStatRow]) -> MaxFileReport {
319    let mut overall = rows
320        .iter()
321        .max_by(|a, b| a.lines.cmp(&b.lines).then_with(|| a.path.cmp(&b.path)))
322        .cloned()
323        .unwrap_or_else(empty_file_row);
324
325    if rows.is_empty() {
326        overall = empty_file_row();
327    }
328
329    let mut by_lang: BTreeMap<String, FileStatRow> = BTreeMap::new();
330    let mut by_module: BTreeMap<String, FileStatRow> = BTreeMap::new();
331
332    for row in rows {
333        by_lang
334            .entry(row.lang.clone())
335            .and_modify(|existing| {
336                if row.lines > existing.lines
337                    || (row.lines == existing.lines && row.path < existing.path)
338                {
339                    *existing = row.clone();
340                }
341            })
342            .or_insert_with(|| row.clone());
343
344        by_module
345            .entry(row.module.clone())
346            .and_modify(|existing| {
347                if row.lines > existing.lines
348                    || (row.lines == existing.lines && row.path < existing.path)
349                {
350                    *existing = row.clone();
351                }
352            })
353            .or_insert_with(|| row.clone());
354    }
355
356    MaxFileReport {
357        overall,
358        by_lang: by_lang
359            .into_iter()
360            .map(|(key, file)| MaxFileRow { key, file })
361            .collect(),
362        by_module: by_module
363            .into_iter()
364            .map(|(key, file)| MaxFileRow { key, file })
365            .collect(),
366    }
367}
368
369fn build_lang_purity_report(rows: &[&FileRow]) -> LangPurityReport {
370    let mut by_module: BTreeMap<String, BTreeMap<String, usize>> = BTreeMap::new();
371
372    for row in rows {
373        if let Some(entry) = by_module.get_mut(row.module.as_str()) {
374            if let Some(lines) = entry.get_mut(row.lang.as_str()) {
375                *lines += row.lines;
376            } else {
377                entry.insert(row.lang.clone(), row.lines);
378            }
379        } else {
380            let mut entry = BTreeMap::new();
381            entry.insert(row.lang.clone(), row.lines);
382            by_module.insert(row.module.clone(), entry);
383        }
384    }
385
386    let mut out = Vec::new();
387    for (module, langs) in by_module {
388        let mut total = 0usize;
389        let mut dominant_lang: Option<&str> = None;
390        let mut dominant_lines = 0usize;
391        for (lang, lines) in &langs {
392            total += *lines;
393            if *lines > dominant_lines
394                || (*lines == dominant_lines && dominant_lang.is_some_and(|d| lang.as_str() < d))
395            {
396                dominant_lines = *lines;
397                dominant_lang = Some(lang.as_str());
398            }
399        }
400        let pct = if total == 0 {
401            0.0
402        } else {
403            safe_ratio(dominant_lines, total)
404        };
405        out.push(LangPurityRow {
406            module,
407            lang_count: langs.len(),
408            dominant_lang: dominant_lang.unwrap_or_default().to_string(),
409            dominant_lines,
410            dominant_pct: pct,
411        });
412    }
413
414    out.sort_by(|a, b| a.module.cmp(&b.module));
415    LangPurityReport { rows: out }
416}
417
418fn build_nesting_report(rows: &[FileStatRow]) -> NestingReport {
419    if rows.is_empty() {
420        return NestingReport {
421            max: 0,
422            avg: 0.0,
423            by_module: vec![],
424        };
425    }
426
427    let mut total_depth = 0usize;
428    let mut max_depth = 0usize;
429    let mut by_module: BTreeMap<String, Vec<usize>> = BTreeMap::new();
430
431    for row in rows {
432        total_depth += row.depth;
433        max_depth = max_depth.max(row.depth);
434        by_module
435            .entry(row.module.clone())
436            .or_default()
437            .push(row.depth);
438    }
439
440    let avg = round_f64(total_depth as f64 / rows.len() as f64, 2);
441
442    let mut module_rows = Vec::new();
443    for (module, depths) in by_module {
444        let max = depths.iter().copied().max().unwrap_or(0);
445        let sum: usize = depths.iter().sum();
446        let avg = if depths.is_empty() {
447            0.0
448        } else {
449            round_f64(sum as f64 / depths.len() as f64, 2)
450        };
451        module_rows.push(NestingRow {
452            key: module,
453            max,
454            avg,
455        });
456    }
457
458    NestingReport {
459        max: max_depth,
460        avg,
461        by_module: module_rows,
462    }
463}
464
465fn build_test_density_report(rows: &[&FileRow]) -> TestDensityReport {
466    let mut test_lines = 0usize;
467    let mut prod_lines = 0usize;
468    let mut test_files = 0usize;
469    let mut prod_files = 0usize;
470
471    for row in rows {
472        if is_test_path(&row.path) {
473            test_lines += row.code;
474            test_files += 1;
475        } else {
476            prod_lines += row.code;
477            prod_files += 1;
478        }
479    }
480
481    let total = test_lines + prod_lines;
482    let ratio = if total == 0 {
483        0.0
484    } else {
485        safe_ratio(test_lines, total)
486    };
487
488    TestDensityReport {
489        test_lines,
490        prod_lines,
491        test_files,
492        prod_files,
493        ratio,
494    }
495}
496
497fn build_boilerplate_report(rows: &[&FileRow]) -> BoilerplateReport {
498    let mut infra_lines = 0usize;
499    let mut logic_lines = 0usize;
500    let mut infra_langs: BTreeSet<String> = BTreeSet::new();
501
502    for row in rows {
503        if is_infra_lang(&row.lang) {
504            infra_lines += row.lines;
505            if !infra_langs.contains(&row.lang) {
506                infra_langs.insert(row.lang.clone());
507            }
508        } else {
509            logic_lines += row.lines;
510        }
511    }
512
513    let total = infra_lines + logic_lines;
514    let ratio = if total == 0 {
515        0.0
516    } else {
517        safe_ratio(infra_lines, total)
518    };
519
520    BoilerplateReport {
521        infra_lines,
522        logic_lines,
523        ratio,
524        infra_langs: infra_langs.into_iter().collect(),
525    }
526}
527
528fn build_polyglot_report(rows: &[&FileRow]) -> PolyglotReport {
529    let mut by_lang: BTreeMap<String, usize> = BTreeMap::new();
530    let mut total = 0usize;
531
532    for row in rows {
533        if let Some(val) = by_lang.get_mut(&row.lang) {
534            *val += row.code;
535        } else {
536            by_lang.insert(row.lang.clone(), row.code);
537        }
538        total += row.code;
539    }
540
541    let mut entropy = 0.0;
542    let mut dominant_lang: Option<&str> = None;
543    let mut dominant_lines = 0usize;
544
545    for (lang, lines) in &by_lang {
546        if *lines > dominant_lines
547            || (*lines == dominant_lines && dominant_lang.is_some_and(|d| lang.as_str() < d))
548        {
549            dominant_lines = *lines;
550            dominant_lang = Some(lang.as_str());
551        }
552        if total > 0 && *lines > 0 {
553            let p = *lines as f64 / total as f64;
554            entropy -= p * p.log2();
555        }
556    }
557
558    let dominant_pct = if total == 0 {
559        0.0
560    } else {
561        safe_ratio(dominant_lines, total)
562    };
563
564    PolyglotReport {
565        lang_count: by_lang.len(),
566        entropy: round_f64(entropy, 4),
567        dominant_lang: dominant_lang.unwrap_or_default().to_string(),
568        dominant_lines,
569        dominant_pct,
570    }
571}
572
573fn build_distribution_report(rows: &[&FileRow]) -> DistributionReport {
574    let mut sizes: Vec<usize> = rows.iter().map(|r| r.lines).collect();
575    sizes.sort();
576
577    if sizes.is_empty() {
578        return DistributionReport {
579            count: 0,
580            min: 0,
581            max: 0,
582            mean: 0.0,
583            median: 0.0,
584            p90: 0.0,
585            p99: 0.0,
586            gini: 0.0,
587        };
588    }
589
590    let count = sizes.len();
591    let sum: usize = sizes.iter().sum();
592    let mean = sum as f64 / count as f64;
593    let median = if count % 2 == 1 {
594        sizes[count / 2] as f64
595    } else {
596        (sizes[count / 2 - 1] as f64 + sizes[count / 2] as f64) / 2.0
597    };
598    let p90 = percentile(&sizes, 0.90);
599    let p99 = percentile(&sizes, 0.99);
600    let gini = gini_coefficient(&sizes);
601
602    DistributionReport {
603        count,
604        min: *sizes.first().unwrap_or(&0),
605        max: *sizes.last().unwrap_or(&0),
606        mean: round_f64(mean, 2),
607        median: round_f64(median, 2),
608        p90: round_f64(p90, 2),
609        p99: round_f64(p99, 2),
610        gini: round_f64(gini, 4),
611    }
612}
613
614fn build_histogram(rows: &[&FileRow]) -> Vec<HistogramBucket> {
615    let total = rows.len();
616    let buckets = vec![
617        ("Tiny", 0, Some(50)),
618        ("Small", 51, Some(200)),
619        ("Medium", 201, Some(500)),
620        ("Large", 501, Some(1000)),
621        ("Huge", 1001, None),
622    ];
623
624    let mut counts = vec![0usize; buckets.len()];
625    for row in rows {
626        let size = row.lines;
627        for (idx, (_label, min, max)) in buckets.iter().enumerate() {
628            let in_range = if let Some(max) = max {
629                size >= *min && size <= *max
630            } else {
631                size >= *min
632            };
633            if in_range {
634                counts[idx] += 1;
635                break;
636            }
637        }
638    }
639
640    buckets
641        .into_iter()
642        .zip(counts)
643        .map(|((label, min, max), files)| HistogramBucket {
644            label: label.to_string(),
645            min,
646            max,
647            files,
648            pct: if total == 0 {
649                0.0
650            } else {
651                round_f64(files as f64 / total as f64, 4)
652            },
653        })
654        .collect()
655}
656
657pub fn build_tree(export: &ExportData) -> String {
658    render_analysis_tree(export)
659}
660
661fn build_top_offenders(rows: &[FileStatRow]) -> TopOffenders {
662    let mut by_lines = rows.to_vec();
663    by_lines.sort_by(|a, b| b.lines.cmp(&a.lines).then_with(|| a.path.cmp(&b.path)));
664
665    let mut by_tokens = rows.to_vec();
666    by_tokens.sort_by(|a, b| b.tokens.cmp(&a.tokens).then_with(|| a.path.cmp(&b.path)));
667
668    let mut by_bytes = rows.to_vec();
669    by_bytes.sort_by(|a, b| b.bytes.cmp(&a.bytes).then_with(|| a.path.cmp(&b.path)));
670
671    let mut least_doc: Vec<FileStatRow> = rows
672        .iter()
673        .filter(|r| r.lines >= MIN_DOC_LINES)
674        .cloned()
675        .collect();
676    least_doc.sort_by(|a, b| {
677        let a_doc = a.doc_pct.unwrap_or(0.0);
678        let b_doc = b.doc_pct.unwrap_or(0.0);
679        a_doc
680            .partial_cmp(&b_doc)
681            .unwrap_or(std::cmp::Ordering::Equal)
682            .then_with(|| b.lines.cmp(&a.lines))
683            .then_with(|| a.path.cmp(&b.path))
684    });
685
686    let mut dense: Vec<FileStatRow> = rows
687        .iter()
688        .filter(|r| r.lines >= MIN_DENSE_LINES)
689        .cloned()
690        .collect();
691    dense.sort_by(|a, b| {
692        let a_rate = a.bytes_per_line.unwrap_or(0.0);
693        let b_rate = b.bytes_per_line.unwrap_or(0.0);
694        b_rate
695            .partial_cmp(&a_rate)
696            .unwrap_or(std::cmp::Ordering::Equal)
697            .then_with(|| a.path.cmp(&b.path))
698    });
699
700    TopOffenders {
701        largest_lines: by_lines.into_iter().take(TOP_N).collect(),
702        largest_tokens: by_tokens.into_iter().take(TOP_N).collect(),
703        largest_bytes: by_bytes.into_iter().take(TOP_N).collect(),
704        least_documented: least_doc.into_iter().take(TOP_N).collect(),
705        most_dense: dense.into_iter().take(TOP_N).collect(),
706    }
707}
708
709fn build_integrity_report(rows: &[&FileRow]) -> IntegrityReport {
710    let mut sorted_rows = rows.to_vec();
711    sorted_rows.sort_unstable_by(|&a, &b| compare_integrity_rows(a, b));
712
713    let mut hasher = blake3::Hasher::new();
714    let mut first = true;
715    for row in sorted_rows {
716        if !first {
717            hasher.update(b"\n");
718        }
719        first = false;
720        hasher.update(row.path.as_bytes());
721        hasher.update(b":");
722        hasher.update(row.bytes.to_string().as_bytes());
723        hasher.update(b":");
724        hasher.update(row.lines.to_string().as_bytes());
725    }
726
727    IntegrityReport {
728        algo: "blake3".to_string(),
729        hash: hasher.finalize().to_hex().to_string(),
730        entries: rows.len(),
731    }
732}
733
734fn compare_integrity_rows(a: &FileRow, b: &FileRow) -> std::cmp::Ordering {
735    let a_bytes = a.path.as_bytes();
736    let b_bytes = b.path.as_bytes();
737    let min_len = a_bytes.len().min(b_bytes.len());
738
739    // Fast slice compare for common prefix
740    let ord = a_bytes[..min_len].cmp(&b_bytes[..min_len]);
741    if ord != std::cmp::Ordering::Equal {
742        return ord;
743    }
744
745    // Paths are identical or one is prefix of other
746    if a_bytes.len() == b_bytes.len() {
747        // Identical paths. Compare numbers.
748        // We must emulate string sort of "bytes:lines".
749        // Format them to ensure correct string sort order.
750        let a_str = format!("{}:{}", a.bytes, a.lines);
751        let b_str = format!("{}:{}", b.bytes, b.lines);
752        return a_str.cmp(&b_str);
753    }
754
755    // One is shorter.
756    // The separator is ':'.
757    if a_bytes.len() < b_bytes.len() {
758        // a is prefix of b.
759        // Effective string a: "path:..."
760        // Effective string b: "path..."
761        // Compare ':' vs b[min_len]
762        b':'.cmp(&b_bytes[min_len])
763    } else {
764        // b is prefix of a.
765        // Effective string a: "path..."
766        // Effective string b: "path:..."
767        // Compare a[min_len] vs ':'
768        a_bytes[min_len].cmp(&b':')
769    }
770}
771
772#[cfg(test)]
773mod tests {
774    use super::*;
775    use tokmd_types::{FileKind, FileRow};
776
777    fn make_row(path: &str, bytes: usize, lines: usize) -> FileRow {
778        FileRow {
779            path: path.to_string(),
780            module: "mod".to_string(),
781            lang: "rust".to_string(),
782            kind: FileKind::Parent,
783            code: 0,
784            comments: 0,
785            blanks: 0,
786            lines,
787            bytes,
788            tokens: 0,
789        }
790    }
791
792    #[test]
793    fn test_compare_integrity_rows_matches_string_sort() {
794        let cases = vec![
795            ("a", 10, 10, "b", 10, 10),
796            ("a", 10, 10, "a", 10, 10),
797            ("a", 10, 10, "a", 20, 10),
798            ("a", 100, 10, "a", 20, 10), // "100" < "20" as string? No, '1' < '2'. So "100" < "20".
799            ("a", 10, 10, "a.b", 10, 10),
800            ("a.b", 10, 10, "a", 10, 10),
801            ("foo", 10, 10, "foo.bar", 10, 10),
802            ("foo.bar", 10, 10, "foo", 10, 10),
803            ("foo", 10, 10, "foo_bar", 10, 10),
804        ];
805
806        for (p1, b1, l1, p2, b2, l2) in cases {
807            let r1 = make_row(p1, b1, l1);
808            let r2 = make_row(p2, b2, l2);
809
810            let s1 = format!("{}:{}:{}", p1, b1, l1);
811            let s2 = format!("{}:{}:{}", p2, b2, l2);
812            let expected = s1.cmp(&s2);
813            let actual = compare_integrity_rows(&r1, &r2);
814
815            assert_eq!(actual, expected, "Failed for {} vs {}", s1, s2);
816        }
817    }
818}