Skip to main content

harn_rules/
report.rs

1//! Data tables — rules emit structured findings/metrics (#2837).
2//!
3//! Adopted from OpenRewrite's first-class data tables: a rule run can emit
4//! columnar findings + a metrics summary alongside (or instead of) diffs.
5//! This is **report-only** — building a table never edits anything — which
6//! is what inventory, impact analysis, and audit need.
7//!
8//! The envelope is plain `serde` (`Serialize`), consistent with the rest of
9//! the `--json` surfaces; [`DataTable::to_json`] renders it.
10
11use std::collections::{BTreeMap, BTreeSet};
12
13use serde::Serialize;
14
15use crate::engine::{BindingMetadata, CompiledRule};
16use crate::error::RulesError;
17use crate::recipe::SourceFile;
18
19/// A columnar table of a rule's findings across a project.
20#[derive(Debug, Clone, Serialize)]
21pub struct DataTable {
22    /// The rule that produced the table.
23    pub rule_id: String,
24    /// The metavar column names present across all rows (sorted, stable).
25    pub columns: Vec<String>,
26    /// One row per match.
27    pub rows: Vec<TableRow>,
28    /// Roll-up metrics.
29    pub summary: TableSummary,
30}
31
32/// One finding: where it is, the matched text, and the metavar bindings.
33#[derive(Debug, Clone, Serialize)]
34pub struct TableRow {
35    /// The file the match is in.
36    pub path: String,
37    /// 0-based start row.
38    pub start_row: usize,
39    /// 0-based start column.
40    pub start_col: usize,
41    /// The matched text.
42    pub text: String,
43    /// The metavar bindings, keyed by name.
44    pub bindings: BTreeMap<String, String>,
45    /// Optional semantic metadata for Harn captures, keyed by metavar name.
46    #[serde(skip_serializing_if = "BTreeMap::is_empty")]
47    pub capture_metadata: BTreeMap<String, BindingMetadata>,
48}
49
50/// Roll-up metrics for a [`DataTable`].
51#[derive(Debug, Clone, Serialize)]
52pub struct TableSummary {
53    /// Total number of findings (rows).
54    pub total_rows: usize,
55    /// Number of files with at least one finding.
56    pub files: usize,
57    /// Per-file finding counts (the #2824 "sites / files" measurement).
58    pub per_file: BTreeMap<String, usize>,
59}
60
61impl DataTable {
62    /// Render the table as a JSON string.
63    pub fn to_json(&self) -> String {
64        serde_json::to_string(self).expect("DataTable serializes")
65    }
66
67    /// Render the table as a `serde_json::Value`.
68    pub fn to_json_value(&self) -> serde_json::Value {
69        serde_json::to_value(self).expect("DataTable serializes")
70    }
71}
72
73/// Run `rule` over `files` in **report-only** mode and collect a
74/// [`DataTable`] — one row per match, with per-file and total counts. No
75/// edits are made.
76pub fn data_table(rule: &CompiledRule, files: &[SourceFile]) -> Result<DataTable, RulesError> {
77    let mut rows: Vec<TableRow> = Vec::new();
78    let mut per_file: BTreeMap<String, usize> = BTreeMap::new();
79
80    for file in files {
81        if file.language != rule.language() {
82            continue;
83        }
84        let matches = rule.run(&file.source)?;
85        if matches.is_empty() {
86            continue;
87        }
88        let path = file.path.display().to_string();
89        per_file.insert(path.clone(), matches.len());
90        for m in matches {
91            let capture_metadata = m
92                .bindings
93                .iter()
94                .filter(|(_, binding)| !binding.metadata.is_empty())
95                .map(|(name, binding)| (name.clone(), binding.metadata.clone()))
96                .collect();
97            rows.push(TableRow {
98                path: path.clone(),
99                start_row: m.span.start_row,
100                start_col: m.span.start_col,
101                text: m.text,
102                bindings: m
103                    .bindings
104                    .into_iter()
105                    .map(|(name, binding)| (name, binding.text))
106                    .collect(),
107                capture_metadata,
108            });
109        }
110    }
111
112    let columns: BTreeSet<String> = rows
113        .iter()
114        .flat_map(|r| r.bindings.keys().cloned())
115        .collect();
116
117    let summary = TableSummary {
118        total_rows: rows.len(),
119        files: per_file.len(),
120        per_file,
121    };
122
123    Ok(DataTable {
124        rule_id: rule.id().to_string(),
125        columns: columns.into_iter().collect(),
126        rows,
127        summary,
128    })
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134    use crate::model::Rule;
135
136    fn rule(toml: &str) -> CompiledRule {
137        CompiledRule::compile(&Rule::from_toml_str(toml).unwrap()).unwrap()
138    }
139
140    fn ts(path: &str, source: &str) -> SourceFile {
141        SourceFile::detect(path, source).unwrap()
142    }
143
144    #[test]
145    fn report_only_table_counts_sites_per_file() {
146        let rule = rule(
147            r#"
148            id = "find-calls"
149            language = "typescript"
150            [rule]
151            pattern = "$FN()"
152            "#,
153        );
154        let files = vec![
155            ts("a.ts", "foo();\nbar();\n"),
156            ts("b.ts", "baz();\n"),
157            ts("c.ts", "const x = 1;\n"),
158        ];
159        let table = data_table(&rule, &files).unwrap();
160        assert_eq!(table.summary.total_rows, 3);
161        assert_eq!(table.summary.files, 2);
162        assert_eq!(table.summary.per_file["a.ts"], 2);
163        assert_eq!(table.summary.per_file["b.ts"], 1);
164        assert!(!table.summary.per_file.contains_key("c.ts"));
165        assert_eq!(table.columns, vec!["FN"]);
166        assert_eq!(table.rows[0].bindings["FN"], "foo");
167    }
168
169    #[test]
170    fn table_serializes_to_json() {
171        let rule = rule(
172            r#"
173            id = "r"
174            language = "typescript"
175            [rule]
176            pattern = "$FN()"
177            "#,
178        );
179        let table = data_table(&rule, &[ts("a.ts", "go();\n")]).unwrap();
180        let value = table.to_json_value();
181        assert_eq!(value["rule_id"], "r");
182        assert_eq!(value["summary"]["total_rows"], 1);
183        assert_eq!(value["rows"][0]["bindings"]["FN"], "go");
184    }
185
186    #[test]
187    fn table_serializes_harn_capture_metadata() {
188        let rule = rule(
189            r#"
190            id = "typed-log"
191            language = "harn"
192            [rule]
193            pattern = "log($VALUE)"
194            "#,
195        );
196        let table = data_table(
197            &rule,
198            &[ts(
199                "a.harn",
200                "fn main() {\n  let count: int = 1\n  log(count)\n}\n",
201            )],
202        )
203        .unwrap();
204        let value = table.to_json_value();
205        let metadata = &value["rows"][0]["capture_metadata"]["VALUE"];
206        assert_eq!(metadata["type"], "int");
207        assert_eq!(metadata["resolved"]["name"], "count");
208        assert_eq!(metadata["resolved"]["start_row"], 1);
209        assert!(metadata["resolved"]["span"].is_null());
210    }
211}