sanitize_engine/processor/
csv_proc.rs1use crate::error::{Result, SanitizeError};
20use crate::processor::{find_matching_rule, pattern_matches, replace_value, FileTypeProfile, Processor};
21use crate::store::MappingStore;
22
23const MAX_CSV_INPUT_SIZE: usize = 256 * 1024 * 1024; pub struct CsvProcessor;
28
29impl Processor for CsvProcessor {
30 fn name(&self) -> &'static str {
31 "csv"
32 }
33
34 fn can_handle(&self, _content: &[u8], profile: &FileTypeProfile) -> bool {
35 profile.processor == "csv"
36 }
37
38 fn process(
39 &self,
40 content: &[u8],
41 profile: &FileTypeProfile,
42 store: &MappingStore,
43 ) -> Result<Vec<u8>> {
44 if content.len() > MAX_CSV_INPUT_SIZE {
46 return Err(SanitizeError::InputTooLarge {
47 size: content.len(),
48 limit: MAX_CSV_INPUT_SIZE,
49 });
50 }
51
52 let delimiter = profile
53 .options
54 .get("delimiter")
55 .and_then(|s| s.as_bytes().first().copied())
56 .unwrap_or(b',');
57
58 let has_header = profile
59 .options
60 .get("has_header")
61 .map_or(true, |v| v != "false");
62
63 let mut reader = csv::ReaderBuilder::new()
64 .delimiter(delimiter)
65 .has_headers(has_header)
66 .flexible(true)
67 .from_reader(content);
68
69 let mut output = Vec::new();
70 let mut wtr = csv::WriterBuilder::new()
71 .delimiter(delimiter)
72 .from_writer(&mut output);
73
74 let column_rules: Vec<Option<usize>> = if has_header {
76 let headers = reader
77 .headers()
78 .map_err(|e| SanitizeError::ParseError {
79 format: "CSV".into(),
80 message: format!("CSV header error: {}", e),
81 })?
82 .clone();
83
84 wtr.write_record(headers.iter())
86 .map_err(|e| SanitizeError::IoError(format!("CSV write error: {}", e)))?;
87
88 headers
92 .iter()
93 .map(|h| {
94 profile
95 .fields
96 .iter()
97 .position(|r| pattern_matches(&r.pattern, h))
98 })
99 .collect()
100 } else {
101 Vec::new()
102 };
103
104 for result in reader.records() {
105 let record = result.map_err(|e| SanitizeError::ParseError {
106 format: "CSV".into(),
107 message: format!("CSV read error: {}", e),
108 })?;
109
110 let mut row: Vec<String> = Vec::with_capacity(record.len());
111 for (idx, field) in record.iter().enumerate() {
112 if has_header {
113 if let Some(Some(rule_idx)) = column_rules.get(idx) {
114 let rule = &profile.fields[*rule_idx];
115 let replaced = replace_value(field, rule, store)?;
116 row.push(replaced);
117 } else {
118 row.push(field.to_string());
119 }
120 } else {
121 let col_key = idx.to_string();
123 if let Some(rule) = find_matching_rule(&col_key, profile) {
124 let replaced = replace_value(field, rule, store)?;
125 row.push(replaced);
126 } else {
127 row.push(field.to_string());
128 }
129 }
130 }
131
132 wtr.write_record(&row)
133 .map_err(|e| SanitizeError::IoError(format!("CSV write error: {}", e)))?;
134 }
135
136 wtr.flush()
137 .map_err(|e| SanitizeError::IoError(format!("CSV flush error: {}", e)))?;
138 drop(wtr);
139
140 Ok(output)
141 }
142}
143
144#[cfg(test)]
145mod tests {
146 use super::*;
147 use crate::category::Category;
148 use crate::generator::HmacGenerator;
149 use crate::processor::profile::FieldRule;
150 use std::sync::Arc;
151
152 fn make_store() -> MappingStore {
153 let gen = Arc::new(HmacGenerator::new([42u8; 32]));
154 MappingStore::new(gen, None)
155 }
156
157 #[test]
158 fn basic_csv_replacement() {
159 let store = make_store();
160 let proc = CsvProcessor;
161
162 let content =
163 b"name,email,department\nAlice,alice@corp.com,Engineering\nBob,bob@corp.com,Sales\n";
164 let profile = FileTypeProfile::new(
165 "csv",
166 vec![
167 FieldRule::new("name").with_category(Category::Name),
168 FieldRule::new("email").with_category(Category::Email),
169 ],
170 );
171
172 let result = proc.process(content, &profile, &store).unwrap();
173 let out = String::from_utf8(result).unwrap();
174
175 assert!(!out.contains("Alice"));
176 assert!(!out.contains("alice@corp.com"));
177 assert!(!out.contains("Bob"));
178 assert!(!out.contains("bob@corp.com"));
179 assert!(out.contains("Engineering"));
181 assert!(out.contains("Sales"));
182 assert!(out.starts_with("name,email,department"));
184 }
185
186 #[test]
187 fn csv_deterministic_replacement() {
188 let store = make_store();
189 let proc = CsvProcessor;
190
191 let content = b"email\ntest@x.com\ntest@x.com\n";
192 let profile = FileTypeProfile::new(
193 "csv",
194 vec![FieldRule::new("email").with_category(Category::Email)],
195 );
196
197 let result = proc.process(content, &profile, &store).unwrap();
198 let out = String::from_utf8(result).unwrap();
199 let lines: Vec<&str> = out.lines().collect();
200
201 assert_eq!(lines[1], lines[2]);
203 assert_ne!(lines[1], "test@x.com");
204 }
205}