Skip to main content

harn_rules/
report.rs

1//! Data tables — rules emit structured findings/metrics (#2837).
2//!
3//! Adopted from OpenRewrite's first-class data tables: a rule run can emit
4//! columnar findings + a metrics summary alongside (or instead of) diffs.
5//! This is **report-only** — building a table never edits anything — which
6//! is what inventory, impact analysis, and audit need.
7//!
8//! The envelope is plain `serde` (`Serialize`), consistent with the rest of
9//! the `--json` surfaces; [`DataTable::to_json`] renders it.
10
11use std::collections::{BTreeMap, BTreeSet};
12
13use serde::Serialize;
14
15use crate::engine::CompiledRule;
16use crate::error::RulesError;
17use crate::recipe::SourceFile;
18
19/// A columnar table of a rule's findings across a project.
20#[derive(Debug, Clone, Serialize)]
21pub struct DataTable {
22    /// The rule that produced the table.
23    pub rule_id: String,
24    /// The metavar column names present across all rows (sorted, stable).
25    pub columns: Vec<String>,
26    /// One row per match.
27    pub rows: Vec<TableRow>,
28    /// Roll-up metrics.
29    pub summary: TableSummary,
30}
31
32/// One finding: where it is, the matched text, and the metavar bindings.
33#[derive(Debug, Clone, Serialize)]
34pub struct TableRow {
35    /// The file the match is in.
36    pub path: String,
37    /// 0-based start row.
38    pub start_row: usize,
39    /// 0-based start column.
40    pub start_col: usize,
41    /// The matched text.
42    pub text: String,
43    /// The metavar bindings, keyed by name.
44    pub bindings: BTreeMap<String, String>,
45}
46
47/// Roll-up metrics for a [`DataTable`].
48#[derive(Debug, Clone, Serialize)]
49pub struct TableSummary {
50    /// Total number of findings (rows).
51    pub total_rows: usize,
52    /// Number of files with at least one finding.
53    pub files: usize,
54    /// Per-file finding counts (the #2824 "sites / files" measurement).
55    pub per_file: BTreeMap<String, usize>,
56}
57
58impl DataTable {
59    /// Render the table as a JSON string.
60    pub fn to_json(&self) -> String {
61        serde_json::to_string(self).expect("DataTable serializes")
62    }
63
64    /// Render the table as a `serde_json::Value`.
65    pub fn to_json_value(&self) -> serde_json::Value {
66        serde_json::to_value(self).expect("DataTable serializes")
67    }
68}
69
70/// Run `rule` over `files` in **report-only** mode and collect a
71/// [`DataTable`] — one row per match, with per-file and total counts. No
72/// edits are made.
73pub fn data_table(rule: &CompiledRule, files: &[SourceFile]) -> Result<DataTable, RulesError> {
74    let mut rows: Vec<TableRow> = Vec::new();
75    let mut per_file: BTreeMap<String, usize> = BTreeMap::new();
76
77    for file in files {
78        if file.language != rule.language() {
79            continue;
80        }
81        let matches = rule.run(&file.source)?;
82        if matches.is_empty() {
83            continue;
84        }
85        let path = file.path.display().to_string();
86        per_file.insert(path.clone(), matches.len());
87        for m in matches {
88            rows.push(TableRow {
89                path: path.clone(),
90                start_row: m.span.start_row,
91                start_col: m.span.start_col,
92                text: m.text,
93                bindings: m
94                    .bindings
95                    .into_iter()
96                    .map(|(name, binding)| (name, binding.text))
97                    .collect(),
98            });
99        }
100    }
101
102    let columns: BTreeSet<String> = rows
103        .iter()
104        .flat_map(|r| r.bindings.keys().cloned())
105        .collect();
106
107    let summary = TableSummary {
108        total_rows: rows.len(),
109        files: per_file.len(),
110        per_file,
111    };
112
113    Ok(DataTable {
114        rule_id: rule.id().to_string(),
115        columns: columns.into_iter().collect(),
116        rows,
117        summary,
118    })
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124    use crate::model::Rule;
125
126    fn rule(toml: &str) -> CompiledRule {
127        CompiledRule::compile(&Rule::from_toml_str(toml).unwrap()).unwrap()
128    }
129
130    fn ts(path: &str, source: &str) -> SourceFile {
131        SourceFile::detect(path, source).unwrap()
132    }
133
134    #[test]
135    fn report_only_table_counts_sites_per_file() {
136        let rule = rule(
137            r#"
138            id = "find-calls"
139            language = "typescript"
140            [rule]
141            pattern = "$FN()"
142            "#,
143        );
144        let files = vec![
145            ts("a.ts", "foo();\nbar();\n"),
146            ts("b.ts", "baz();\n"),
147            ts("c.ts", "const x = 1;\n"),
148        ];
149        let table = data_table(&rule, &files).unwrap();
150        assert_eq!(table.summary.total_rows, 3);
151        assert_eq!(table.summary.files, 2);
152        assert_eq!(table.summary.per_file["a.ts"], 2);
153        assert_eq!(table.summary.per_file["b.ts"], 1);
154        assert!(!table.summary.per_file.contains_key("c.ts"));
155        assert_eq!(table.columns, vec!["FN"]);
156        assert_eq!(table.rows[0].bindings["FN"], "foo");
157    }
158
159    #[test]
160    fn table_serializes_to_json() {
161        let rule = rule(
162            r#"
163            id = "r"
164            language = "typescript"
165            [rule]
166            pattern = "$FN()"
167            "#,
168        );
169        let table = data_table(&rule, &[ts("a.ts", "go();\n")]).unwrap();
170        let value = table.to_json_value();
171        assert_eq!(value["rule_id"], "r");
172        assert_eq!(value["summary"]["total_rows"], 1);
173        assert_eq!(value["rows"][0]["bindings"]["FN"], "go");
174    }
175}