sanitize_engine/processor/
csv_proc.rs1use crate::error::{Result, SanitizeError};
20use crate::processor::{find_matching_rule, replace_value, FileTypeProfile, Processor};
21use crate::store::MappingStore;
22
23const MAX_CSV_INPUT_SIZE: usize = 256 * 1024 * 1024; pub struct CsvProcessor;
28
29impl Processor for CsvProcessor {
30 fn name(&self) -> &'static str {
31 "csv"
32 }
33
34 fn can_handle(&self, _content: &[u8], profile: &FileTypeProfile) -> bool {
35 profile.processor == "csv"
36 }
37
38 fn process(
39 &self,
40 content: &[u8],
41 profile: &FileTypeProfile,
42 store: &MappingStore,
43 ) -> Result<Vec<u8>> {
44 if content.len() > MAX_CSV_INPUT_SIZE {
46 return Err(SanitizeError::InputTooLarge {
47 size: content.len(),
48 limit: MAX_CSV_INPUT_SIZE,
49 });
50 }
51
52 let delimiter = profile
53 .options
54 .get("delimiter")
55 .and_then(|s| s.as_bytes().first().copied())
56 .unwrap_or(b',');
57
58 let has_header = profile
59 .options
60 .get("has_header")
61 .map_or(true, |v| v != "false");
62
63 let mut reader = csv::ReaderBuilder::new()
64 .delimiter(delimiter)
65 .has_headers(has_header)
66 .flexible(true)
67 .from_reader(content);
68
69 let mut output = Vec::new();
70 let mut wtr = csv::WriterBuilder::new()
71 .delimiter(delimiter)
72 .from_writer(&mut output);
73
74 let column_rules: Vec<Option<usize>> = if has_header {
76 let headers = reader
77 .headers()
78 .map_err(|e| SanitizeError::ParseError {
79 format: "CSV".into(),
80 message: format!("CSV header error: {}", e),
81 })?
82 .clone();
83
84 wtr.write_record(headers.iter())
86 .map_err(|e| SanitizeError::IoError(format!("CSV write error: {}", e)))?;
87
88 headers
90 .iter()
91 .map(|h| {
92 profile.fields.iter().position(|r| {
93 find_matching_rule(h, &FileTypeProfile::new("csv", vec![r.clone()]))
94 .is_some()
95 })
96 })
97 .collect()
98 } else {
99 Vec::new()
100 };
101
102 for result in reader.records() {
103 let record = result.map_err(|e| SanitizeError::ParseError {
104 format: "CSV".into(),
105 message: format!("CSV read error: {}", e),
106 })?;
107
108 let mut row: Vec<String> = Vec::with_capacity(record.len());
109 for (idx, field) in record.iter().enumerate() {
110 if has_header {
111 if let Some(Some(rule_idx)) = column_rules.get(idx) {
112 let rule = &profile.fields[*rule_idx];
113 let replaced = replace_value(field, rule, store)?;
114 row.push(replaced);
115 } else {
116 row.push(field.to_string());
117 }
118 } else {
119 let col_key = idx.to_string();
121 if let Some(rule) = find_matching_rule(&col_key, profile) {
122 let replaced = replace_value(field, rule, store)?;
123 row.push(replaced);
124 } else {
125 row.push(field.to_string());
126 }
127 }
128 }
129
130 wtr.write_record(&row)
131 .map_err(|e| SanitizeError::IoError(format!("CSV write error: {}", e)))?;
132 }
133
134 wtr.flush()
135 .map_err(|e| SanitizeError::IoError(format!("CSV flush error: {}", e)))?;
136 drop(wtr);
137
138 Ok(output)
139 }
140}
141
142#[cfg(test)]
143mod tests {
144 use super::*;
145 use crate::category::Category;
146 use crate::generator::HmacGenerator;
147 use crate::processor::profile::FieldRule;
148 use std::sync::Arc;
149
150 fn make_store() -> MappingStore {
151 let gen = Arc::new(HmacGenerator::new([42u8; 32]));
152 MappingStore::new(gen, None)
153 }
154
155 #[test]
156 fn basic_csv_replacement() {
157 let store = make_store();
158 let proc = CsvProcessor;
159
160 let content =
161 b"name,email,department\nAlice,alice@corp.com,Engineering\nBob,bob@corp.com,Sales\n";
162 let profile = FileTypeProfile::new(
163 "csv",
164 vec![
165 FieldRule::new("name").with_category(Category::Name),
166 FieldRule::new("email").with_category(Category::Email),
167 ],
168 );
169
170 let result = proc.process(content, &profile, &store).unwrap();
171 let out = String::from_utf8(result).unwrap();
172
173 assert!(!out.contains("Alice"));
174 assert!(!out.contains("alice@corp.com"));
175 assert!(!out.contains("Bob"));
176 assert!(!out.contains("bob@corp.com"));
177 assert!(out.contains("Engineering"));
179 assert!(out.contains("Sales"));
180 assert!(out.starts_with("name,email,department"));
182 }
183
184 #[test]
185 fn csv_deterministic_replacement() {
186 let store = make_store();
187 let proc = CsvProcessor;
188
189 let content = b"email\ntest@x.com\ntest@x.com\n";
190 let profile = FileTypeProfile::new(
191 "csv",
192 vec![FieldRule::new("email").with_category(Category::Email)],
193 );
194
195 let result = proc.process(content, &profile, &store).unwrap();
196 let out = String::from_utf8(result).unwrap();
197 let lines: Vec<&str> = out.lines().collect();
198
199 assert_eq!(lines[1], lines[2]);
201 assert_ne!(lines[1], "test@x.com");
202 }
203}