1use crate::dialect::{parse_row, Dialect};
13use crate::model::Finding;
14
15#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct Table {
18 pub header: Vec<String>,
20 pub rows: Vec<Vec<String>>,
22}
23
24pub fn parse_table(text: &[u8], d: &Dialect) -> Result<Table, Vec<Finding>> {
27 let lines = split_lines(text);
28 let mut findings = Vec::new();
29 if lines.is_empty() {
30 return Err(vec![Finding::new("CSV_EMPTY", "no rows (expected a header)".to_string())]);
31 }
32 let header = match parse_row(lines[0], d) {
33 Ok(h) => h,
34 Err(f) => return Err(vec![f]),
35 };
36 let mut rows = Vec::new();
37 for (i, line) in lines.iter().enumerate().skip(1) {
38 match parse_row(line, d) {
39 Ok(r) => {
40 if r.len() != header.len() {
41 findings.push(Finding::new(
42 "COLUMN_COUNT",
43 format!("row {}: {} columns, header has {}", i, r.len(), header.len()),
44 ));
45 } else {
46 rows.push(r);
47 }
48 }
49 Err(mut f) => {
50 f.message = format!("row {}: {}", i, f.message);
51 findings.push(f);
52 }
53 }
54 }
55 if findings.is_empty() {
56 Ok(Table { header, rows })
57 } else {
58 Err(findings)
59 }
60}
61
62fn split_lines(text: &[u8]) -> Vec<&[u8]> {
63 let mut lines = Vec::new();
64 let mut start = 0;
65 for i in 0..text.len() {
66 if text[i] == 0x0a {
67 lines.push(&text[start..i]);
68 start = i + 1;
69 }
70 }
71 if start < text.len() {
72 lines.push(&text[start..]);
73 }
74 lines
75}
76
77#[derive(Debug, Clone, PartialEq, Eq)]
79pub struct DiffEntry {
80 pub row: usize,
82 pub field: String,
84 pub source: String,
86 pub target: String,
88}
89
90#[derive(Debug, Clone, PartialEq, Eq)]
92pub struct DiffReport {
93 pub entries: Vec<DiffEntry>,
95 pub findings: Vec<Finding>,
97}
98
99impl DiffReport {
100 pub fn is_clean(&self) -> bool {
102 self.entries.is_empty() && self.findings.is_empty()
103 }
104}
105
106pub fn diff(source: &Table, target: &Table) -> DiffReport {
109 let mut entries = Vec::new();
110 let mut findings = Vec::new();
111
112 for col in &source.header {
114 if !target.header.iter().any(|c| c == col) {
115 findings.push(Finding::new(
116 "COLUMN_ONLY_IN_SOURCE",
117 format!("column {} present in source but not target", col),
118 ));
119 }
120 }
121 for col in &target.header {
123 if !source.header.iter().any(|c| c == col) {
124 findings.push(Finding::new(
125 "COLUMN_ONLY_IN_TARGET",
126 format!("column {} present in target but not source", col),
127 ));
128 }
129 }
130
131 if source.rows.len() != target.rows.len() {
133 findings.push(Finding::new(
134 "ROW_COUNT",
135 format!("source has {} rows, target has {}", source.rows.len(), target.rows.len()),
136 ));
137 }
138
139 let common: Vec<&String> =
141 source.header.iter().filter(|c| target.header.iter().any(|t| &t == c)).collect();
142
143 let nrows = source.rows.len().max(target.rows.len());
144 for r in 0..nrows {
145 let srow = source.rows.get(r);
146 let trow = target.rows.get(r);
147 for col in &common {
148 let sval = srow
149 .and_then(|row| column_value(&source.header, row, col))
150 .unwrap_or_default();
151 let tval = trow
152 .and_then(|row| column_value(&target.header, row, col))
153 .unwrap_or_default();
154 if sval != tval {
155 entries.push(DiffEntry {
156 row: r,
157 field: (*col).clone(),
158 source: sval,
159 target: tval,
160 });
161 }
162 }
163 }
164
165 DiffReport { entries, findings }
166}
167
168fn column_value(header: &[String], row: &[String], col: &str) -> Option<String> {
170 let idx = header.iter().position(|h| h == col)?;
171 row.get(idx).cloned()
172}
173
174#[cfg(test)]
175mod tests {
176 use super::*;
177
178 fn table(text: &[u8]) -> Table {
179 parse_table(text, &Dialect::csv()).expect("parse table")
180 }
181
182 #[test]
183 fn identical_tables_are_clean() {
184 let a = table(b"ACCT,AMT\nA1,12.50\nA2,0.99\n");
185 let report = diff(&a, &a);
186 assert!(report.is_clean());
187 }
188
189 #[test]
190 fn changed_cell_is_reported() {
191 let s = table(b"ACCT,AMT\nA1,12.50\n");
192 let t = table(b"ACCT,AMT\nA1,99.99\n");
193 let report = diff(&s, &t);
194 assert_eq!(report.entries.len(), 1);
195 assert_eq!(report.entries[0].field, "AMT");
196 assert_eq!(report.entries[0].source, "12.50");
197 assert_eq!(report.entries[0].target, "99.99");
198 }
199
200 #[test]
201 fn column_alignment_by_name_not_position() {
202 let s = table(b"ACCT,AMT\nA1,12.50\n");
204 let t = table(b"AMT,ACCT\n12.50,A1\n");
205 let report = diff(&s, &t);
206 assert!(report.entries.is_empty(), "entries: {:?}", report.entries);
207 assert!(report.findings.is_empty());
208 }
209
210 #[test]
211 fn column_only_in_one_table_is_a_finding() {
212 let s = table(b"ACCT,AMT,STATUS\nA1,12.50,OK\n");
213 let t = table(b"ACCT,AMT\nA1,12.50\n");
214 let report = diff(&s, &t);
215 assert!(report.findings.iter().any(|f| f.code == "COLUMN_ONLY_IN_SOURCE"));
216 }
217
218 #[test]
219 fn row_count_mismatch_is_a_finding() {
220 let s = table(b"ACCT,AMT\nA1,12.50\nA2,1.00\n");
221 let t = table(b"ACCT,AMT\nA1,12.50\n");
222 let report = diff(&s, &t);
223 assert!(report.findings.iter().any(|f| f.code == "ROW_COUNT"));
224 }
225}