Skip to main content

normalize_analyze/
ranked.rs

1//! Ranked list helpers: scoring, stats, and the shared rank pipeline.
2//!
3//! ## Table formatting
4//!
5//! The [`RankEntry`] trait + [`format_ranked_table`] function provide shared
6//! tabular rendering for all rank-pattern commands. Implement `RankEntry` on
7//! your entry struct, then call `format_ranked_table()` in your
8//! `OutputFormatter::format_text()`.
9
10use std::cmp::Ordering;
11use std::collections::BTreeMap;
12
13use schemars::JsonSchema;
14use serde::Serialize;
15
16use crate::Entity;
17
18// ── Column / alignment types ───────────────────────────────────────────
19
20/// Column alignment in a ranked table.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum Align {
23    Left,
24    Right,
25}
26
27/// Column definition for ranked table rendering.
28#[derive(Debug, Clone)]
29pub struct Column {
30    pub name: &'static str,
31    pub align: Align,
32}
33
34impl Column {
35    pub const fn left(name: &'static str) -> Self {
36        Self {
37            name,
38            align: Align::Left,
39        }
40    }
41
42    pub const fn right(name: &'static str) -> Self {
43        Self {
44            name,
45            align: Align::Right,
46        }
47    }
48}
49
50// ── RankEntry trait ────────────────────────────────────────────────────
51
52/// Trait for entries that can be rendered in a ranked table.
53///
54/// Implement this on your entry struct to use [`format_ranked_table`].
55pub trait RankEntry {
56    /// Column definitions for the table header.
57    fn columns() -> Vec<Column>;
58
59    /// Format this entry's values as strings, one per column, in column order.
60    fn values(&self) -> Vec<String>;
61}
62
63// ── Diffable rank entries ─────────────────────────────────────────────
64
65/// Trait for rank entries that support `--diff <ref>` comparison.
66///
67/// Implement this alongside `RankEntry` to enable generic `--diff` support.
68/// After calling [`compute_ranked_diff`], entries are annotated with deltas
69/// and sorted by `|delta|` descending.
70pub trait DiffableRankEntry {
71    /// Identity key for matching entries across snapshots (e.g. file path, module name).
72    fn diff_key(&self) -> &str;
73
74    /// The numeric metric to compute deltas on.
75    fn diff_score(&self) -> f64;
76
77    /// Set the computed delta.
78    fn set_delta(&mut self, delta: Option<f64>);
79
80    /// Get the delta (if diff has been applied).
81    fn delta(&self) -> Option<f64>;
82}
83
84/// Compute per-entry deltas between current and baseline entries.
85///
86/// For each entry in `current`, finds the matching baseline entry by
87/// [`DiffableRankEntry::diff_key`] and computes `current_score - baseline_score`.
88/// New entries (no baseline match) get delta = current_score.
89/// After this, `current` is sorted by `|delta|` descending.
90pub fn compute_ranked_diff<E: DiffableRankEntry>(current: &mut [E], baseline: &[E]) {
91    let baseline_map: std::collections::HashMap<&str, f64> = baseline
92        .iter()
93        .map(|e| (e.diff_key(), e.diff_score()))
94        .collect();
95
96    for e in current.iter_mut() {
97        let delta = match baseline_map.get(e.diff_key()) {
98            Some(&base) => e.diff_score() - base,
99            None => e.diff_score(), // new entity — full score as delta
100        };
101        e.set_delta(Some(delta));
102    }
103
104    current.sort_by(|a, b| {
105        let da = a.delta().unwrap_or(0.0).abs();
106        let db = b.delta().unwrap_or(0.0).abs();
107        db.partial_cmp(&da).unwrap_or(std::cmp::Ordering::Equal)
108    });
109}
110
111/// Format a delta value as a string with sign prefix.
112pub fn format_delta(delta: f64, as_pct: bool) -> String {
113    let sign = if delta >= 0.0 { "+" } else { "" };
114    if as_pct {
115        format!("{sign}{delta:.1}%")
116    } else {
117        format!("{sign}{delta:.2}")
118    }
119}
120
121// ── Table formatter ────────────────────────────────────────────────────
122
123/// Render a ranked list as a text table.
124///
125/// Produces: title line, blank line, column headers, separator, rows.
126/// If `entries` is empty, shows `empty_message` (or a default).
127pub fn format_ranked_table<E: RankEntry>(
128    title: &str,
129    entries: &[E],
130    empty_message: Option<&str>,
131) -> String {
132    let mut out = Vec::new();
133
134    out.push(title.to_string());
135    out.push(String::new());
136
137    if entries.is_empty() {
138        out.push(empty_message.unwrap_or("No entries.").to_string());
139        return out.join("\n");
140    }
141
142    let cols = E::columns();
143
144    // Pre-compute all values so we can measure widths
145    let all_values: Vec<Vec<String>> = entries.iter().map(|e| e.values()).collect();
146
147    // Compute column widths: max(header, all data)
148    let widths: Vec<usize> = cols
149        .iter()
150        .enumerate()
151        .map(|(i, col)| {
152            let header_w = col.name.len();
153            let data_w = all_values
154                .iter()
155                .map(|row| row.get(i).map_or(0, |v| v.len()))
156                .max()
157                .unwrap_or(0);
158            header_w.max(data_w)
159        })
160        .collect();
161
162    // Header row
163    let header: String = cols
164        .iter()
165        .zip(&widths)
166        .map(|(col, &w)| match col.align {
167            Align::Left => format!("{:<width$}", col.name, width = w),
168            Align::Right => format!("{:>width$}", col.name, width = w),
169        })
170        .collect::<Vec<_>>()
171        .join("  ");
172    out.push(header);
173
174    // Separator
175    let sep: String = widths
176        .iter()
177        .map(|&w| "-".repeat(w))
178        .collect::<Vec<_>>()
179        .join("--");
180    out.push(sep);
181
182    // Data rows
183    for row_vals in &all_values {
184        let row: String = cols
185            .iter()
186            .zip(&widths)
187            .enumerate()
188            .map(|(i, (col, &w))| {
189                let val = row_vals.get(i).map_or("", |v| v.as_str());
190                match col.align {
191                    Align::Left => format!("{:<width$}", val, width = w),
192                    Align::Right => format!("{:>width$}", val, width = w),
193                }
194            })
195            .collect::<Vec<_>>()
196            .join("  ");
197        out.push(row);
198    }
199
200    out.join("\n")
201}
202
203/// A scored entity in a ranked list.
204#[derive(Debug, Clone, Serialize, JsonSchema)]
205pub struct Scored<E: Entity> {
206    pub entity: E,
207    pub score: f64,
208    /// Optional secondary scores for display (e.g. "lines", "tokens").
209    #[serde(skip_serializing_if = "BTreeMap::is_empty")]
210    pub aux: BTreeMap<String, f64>,
211}
212
213impl<E: Entity> Scored<E> {
214    /// Create a scored entity with no auxiliary scores.
215    pub fn new(entity: E, score: f64) -> Self {
216        Self {
217            entity,
218            score,
219            aux: BTreeMap::new(),
220        }
221    }
222
223    /// Create a scored entity with auxiliary scores.
224    pub fn with_aux(entity: E, score: f64, aux: BTreeMap<String, f64>) -> Self {
225        Self { entity, score, aux }
226    }
227}
228
229/// Stats computed over a full ranked list before truncation.
230#[derive(Debug, Clone, Serialize, JsonSchema)]
231pub struct RankStats {
232    pub total_count: usize,
233    pub avg: f64,
234    pub max: f64,
235    pub min: f64,
236}
237
238impl RankStats {
239    /// Compute stats from an iterator of scores.
240    pub fn from_scores(scores: impl Iterator<Item = f64>) -> Self {
241        let mut total_count = 0usize;
242        let mut sum = 0.0f64;
243        let mut max = f64::NEG_INFINITY;
244        let mut min = f64::INFINITY;
245
246        for s in scores {
247            total_count += 1;
248            sum += s;
249            if s > max {
250                max = s;
251            }
252            if s < min {
253                min = s;
254            }
255        }
256
257        if total_count == 0 {
258            return Self {
259                total_count: 0,
260                avg: 0.0,
261                max: 0.0,
262                min: 0.0,
263            };
264        }
265
266        Self {
267            total_count,
268            avg: sum / total_count as f64,
269            max,
270            min,
271        }
272    }
273}
274
275/// Sort, compute stats, truncate — the shared rank pipeline.
276///
277/// Sorts `items` by score (descending by default, ascending if `ascending` is true),
278/// computes [`RankStats`] over the full list, then truncates to `limit`.
279/// A `limit` of 0 means no truncation.
280pub fn rank_pipeline<E: Entity>(
281    items: &mut Vec<Scored<E>>,
282    limit: usize,
283    ascending: bool,
284) -> RankStats {
285    if ascending {
286        items.sort_by(|a, b| a.score.partial_cmp(&b.score).unwrap_or(Ordering::Equal));
287    } else {
288        items.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(Ordering::Equal));
289    }
290    let stats = RankStats::from_scores(items.iter().map(|s| s.score));
291    if limit > 0 && items.len() > limit {
292        items.truncate(limit);
293    }
294    stats
295}
296
297/// Sort by custom comparator, compute stats from a score function, and truncate.
298///
299/// Like [`rank_pipeline`] but works directly on `Vec<T>` with multi-key sorts
300/// where a single `f64` score doesn't capture the full ordering.
301/// A `limit` of 0 means no truncation.
302pub fn rank_and_truncate<T>(
303    items: &mut Vec<T>,
304    limit: usize,
305    cmp: impl Fn(&T, &T) -> Ordering,
306    score: impl Fn(&T) -> f64,
307) -> RankStats {
308    items.sort_by(|a, b| cmp(a, b));
309    let stats = RankStats::from_scores(items.iter().map(&score));
310    if limit > 0 && items.len() > limit {
311        items.truncate(limit);
312    }
313    stats
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319    use crate::FileEntity;
320
321    #[test]
322    fn test_rank_pipeline_descending() {
323        let mut items = vec![
324            Scored::new(
325                FileEntity {
326                    path: "a.rs".into(),
327                },
328                3.0,
329            ),
330            Scored::new(
331                FileEntity {
332                    path: "b.rs".into(),
333                },
334                7.0,
335            ),
336            Scored::new(
337                FileEntity {
338                    path: "c.rs".into(),
339                },
340                1.0,
341            ),
342            Scored::new(
343                FileEntity {
344                    path: "d.rs".into(),
345                },
346                5.0,
347            ),
348        ];
349        let stats = rank_pipeline(&mut items, 2, false);
350
351        assert_eq!(stats.total_count, 4);
352        assert!((stats.avg - 4.0).abs() < f64::EPSILON);
353        assert!((stats.max - 7.0).abs() < f64::EPSILON);
354        assert!((stats.min - 1.0).abs() < f64::EPSILON);
355        assert_eq!(items.len(), 2);
356        assert_eq!(items[0].entity.path, "b.rs");
357        assert_eq!(items[1].entity.path, "d.rs");
358    }
359
360    #[test]
361    fn test_rank_pipeline_ascending() {
362        let mut items = vec![
363            Scored::new(
364                FileEntity {
365                    path: "a.rs".into(),
366                },
367                3.0,
368            ),
369            Scored::new(
370                FileEntity {
371                    path: "b.rs".into(),
372                },
373                7.0,
374            ),
375            Scored::new(
376                FileEntity {
377                    path: "c.rs".into(),
378                },
379                1.0,
380            ),
381        ];
382        let stats = rank_pipeline(&mut items, 2, true);
383
384        assert_eq!(stats.total_count, 3);
385        assert_eq!(items.len(), 2);
386        assert_eq!(items[0].entity.path, "c.rs");
387        assert_eq!(items[1].entity.path, "a.rs");
388    }
389
390    #[test]
391    fn test_rank_pipeline_no_limit() {
392        let mut items = vec![
393            Scored::new(
394                FileEntity {
395                    path: "a.rs".into(),
396                },
397                3.0,
398            ),
399            Scored::new(
400                FileEntity {
401                    path: "b.rs".into(),
402                },
403                7.0,
404            ),
405        ];
406        let stats = rank_pipeline(&mut items, 0, false);
407
408        assert_eq!(stats.total_count, 2);
409        assert_eq!(items.len(), 2); // no truncation
410    }
411
412    #[test]
413    fn test_rank_pipeline_empty() {
414        let mut items: Vec<Scored<FileEntity>> = Vec::new();
415        let stats = rank_pipeline(&mut items, 10, false);
416
417        assert_eq!(stats.total_count, 0);
418        assert!((stats.avg - 0.0).abs() < f64::EPSILON);
419    }
420
421    #[test]
422    fn test_rank_and_truncate() {
423        let mut items = vec![
424            ("a.rs", 3usize, 10usize),
425            ("b.rs", 1, 20),
426            ("c.rs", 3, 5),
427            ("d.rs", 2, 15),
428        ];
429        // Sort by first key ascending, then second key descending
430        let stats = rank_and_truncate(
431            &mut items,
432            3,
433            |a, b| a.1.cmp(&b.1).then(b.2.cmp(&a.2)),
434            |item| item.1 as f64,
435        );
436
437        assert_eq!(stats.total_count, 4);
438        assert_eq!(items.len(), 3);
439        assert_eq!(items[0].0, "b.rs"); // lowest first key
440        assert_eq!(items[1].0, "d.rs"); // second lowest
441    }
442
443    #[test]
444    fn test_rank_stats_from_scores() {
445        let stats = RankStats::from_scores([1.0, 2.0, 3.0, 4.0, 5.0].into_iter());
446        assert_eq!(stats.total_count, 5);
447        assert!((stats.avg - 3.0).abs() < f64::EPSILON);
448        assert!((stats.max - 5.0).abs() < f64::EPSILON);
449        assert!((stats.min - 1.0).abs() < f64::EPSILON);
450    }
451
452    // ── format_ranked_table tests ──────────────────────────────────────
453
454    #[derive(Clone)]
455    struct TestEntry {
456        name: String,
457        score: usize,
458    }
459
460    impl RankEntry for TestEntry {
461        fn columns() -> Vec<Column> {
462            vec![Column::left("Name"), Column::right("Score")]
463        }
464
465        fn values(&self) -> Vec<String> {
466            vec![self.name.clone(), self.score.to_string()]
467        }
468    }
469
470    #[test]
471    fn test_format_ranked_table_basic() {
472        let entries = vec![
473            TestEntry {
474                name: "alpha".into(),
475                score: 100,
476            },
477            TestEntry {
478                name: "beta".into(),
479                score: 42,
480            },
481        ];
482        let text = format_ranked_table("# Test Report", &entries, None);
483        assert!(text.contains("# Test Report"));
484        assert!(text.contains("Name"));
485        assert!(text.contains("Score"));
486        assert!(text.contains("alpha"));
487        assert!(text.contains("100"));
488        assert!(text.contains("beta"));
489        assert!(text.contains("42"));
490    }
491
492    #[test]
493    fn test_format_ranked_table_empty() {
494        let entries: Vec<TestEntry> = vec![];
495        let text = format_ranked_table("# Empty", &entries, Some("Nothing here."));
496        assert!(text.contains("Nothing here."));
497        assert!(!text.contains("Name")); // no header for empty
498    }
499
500    #[test]
501    fn test_format_ranked_table_alignment() {
502        let entries = vec![
503            TestEntry {
504                name: "a".into(),
505                score: 1,
506            },
507            TestEntry {
508                name: "long name".into(),
509                score: 9999,
510            },
511        ];
512        let text = format_ranked_table("# Align", &entries, None);
513        let lines: Vec<&str> = text.lines().collect();
514        // Header line should have right-aligned Score header
515        let header = lines[2]; // title, blank, header
516        assert!(header.contains("Name"));
517        assert!(header.contains("Score"));
518        // Data: "a" should be left-padded to match "long name" width
519        let row_a = lines[4]; // title, blank, header, sep, row
520        assert!(row_a.starts_with("a"));
521    }
522
523    // ── DiffableRankEntry tests ───────────────────────────────────────
524
525    #[derive(Clone)]
526    struct DiffEntry {
527        module: String,
528        ratio: f64,
529        delta: Option<f64>,
530    }
531
532    impl DiffableRankEntry for DiffEntry {
533        fn diff_key(&self) -> &str {
534            &self.module
535        }
536        fn diff_score(&self) -> f64 {
537            self.ratio
538        }
539        fn set_delta(&mut self, delta: Option<f64>) {
540            self.delta = delta;
541        }
542        fn delta(&self) -> Option<f64> {
543            self.delta
544        }
545    }
546
547    #[test]
548    fn test_compute_ranked_diff_basic() {
549        let baseline = vec![
550            DiffEntry {
551                module: "crate-a".into(),
552                ratio: 0.10,
553                delta: None,
554            },
555            DiffEntry {
556                module: "crate-b".into(),
557                ratio: 0.05,
558                delta: None,
559            },
560        ];
561        let mut current = vec![
562            DiffEntry {
563                module: "crate-a".into(),
564                ratio: 0.08,
565                delta: None,
566            },
567            DiffEntry {
568                module: "crate-b".into(),
569                ratio: 0.12,
570                delta: None,
571            },
572            DiffEntry {
573                module: "crate-c".into(),
574                ratio: 0.03,
575                delta: None,
576            },
577        ];
578
579        compute_ranked_diff(&mut current, &baseline);
580
581        // crate-b: 0.12 - 0.05 = 0.07 (largest |delta|)
582        // crate-c: 0.03 (new, no baseline) = 0.03
583        // crate-a: 0.08 - 0.10 = -0.02
584        assert_eq!(current[0].module, "crate-b");
585        assert!((current[0].delta.unwrap() - 0.07).abs() < 1e-9);
586
587        assert_eq!(current[1].module, "crate-c");
588        assert!((current[1].delta.unwrap() - 0.03).abs() < 1e-9);
589
590        assert_eq!(current[2].module, "crate-a");
591        assert!((current[2].delta.unwrap() - (-0.02)).abs() < 1e-9);
592    }
593
594    #[test]
595    fn test_compute_ranked_diff_empty_baseline() {
596        let baseline: Vec<DiffEntry> = vec![];
597        let mut current = vec![DiffEntry {
598            module: "a".into(),
599            ratio: 0.5,
600            delta: None,
601        }];
602
603        compute_ranked_diff(&mut current, &baseline);
604
605        // Everything is "new" — delta = score
606        assert!((current[0].delta.unwrap() - 0.5).abs() < 1e-9);
607    }
608
609    #[test]
610    fn test_format_delta() {
611        assert_eq!(format_delta(0.07, false), "+0.07");
612        assert_eq!(format_delta(-0.02, false), "-0.02");
613        assert_eq!(format_delta(5.3, true), "+5.3%");
614        assert_eq!(format_delta(-2.1, true), "-2.1%");
615    }
616}