1use cloakrs_core::{PiiEntity, Result, Scanner};
4use serde::{Deserialize, Serialize};
5use std::collections::HashSet;
6use std::io::{Read, Write};
7
8#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
10pub struct CsvScanOptions {
11 pub has_headers: bool,
13 pub columns: Vec<String>,
15 pub column_indexes: Vec<usize>,
17 pub delimiter: u8,
19}
20
21impl CsvScanOptions {
22 fn delimiter(&self) -> u8 {
23 if self.delimiter == 0 {
24 b','
25 } else {
26 self.delimiter
27 }
28 }
29}
30
31#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
33pub struct CsvCellScanResult {
34 pub row_number: usize,
36 pub column_index: usize,
38 pub column_name: Option<String>,
40 pub findings: Vec<PiiEntity>,
42 pub masked_value: Option<String>,
44}
45
46#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
48pub struct CsvScanResult {
49 pub cells: Vec<CsvCellScanResult>,
51 pub masked_csv: String,
53}
54
55pub fn scan_csv_str(
57 input: &str,
58 scanner: &Scanner,
59 options: &CsvScanOptions,
60) -> Result<CsvScanResult> {
61 let mut output = Vec::new();
62 let cells = mask_csv_reader(input.as_bytes(), &mut output, scanner, options)?;
63 let masked_csv = String::from_utf8(output)
64 .map_err(|error| cloakrs_core::CloakError::ConfigError(error.to_string()))?;
65 Ok(CsvScanResult { cells, masked_csv })
66}
67
68pub fn mask_csv_reader<R, W>(
70 reader: R,
71 writer: W,
72 scanner: &Scanner,
73 options: &CsvScanOptions,
74) -> Result<Vec<CsvCellScanResult>>
75where
76 R: Read,
77 W: Write,
78{
79 let mut csv_reader = csv::ReaderBuilder::new()
80 .has_headers(options.has_headers)
81 .delimiter(options.delimiter())
82 .from_reader(reader);
83 let mut csv_writer = csv::WriterBuilder::new()
84 .has_headers(false)
85 .delimiter(options.delimiter())
86 .from_writer(writer);
87
88 let headers = if options.has_headers {
89 let headers = csv_reader.headers()?.clone();
90 csv_writer.write_record(&headers)?;
91 Some(headers)
92 } else {
93 None
94 };
95 let selected = selected_indexes(headers.as_ref(), options);
96 let mut cells = Vec::new();
97
98 for (row_index, record) in csv_reader.records().enumerate() {
99 let record = record?;
100 let mut masked_record: Vec<String> = record.iter().map(str::to_string).collect();
101 for (column_index, value) in record.iter().enumerate() {
102 if !selected.is_empty() && !selected.contains(&column_index) {
103 continue;
104 }
105 let scan = scanner.scan(value)?;
106 if scan.findings.is_empty() {
107 continue;
108 }
109 let masked_value = scan.masked_text.clone();
110 if let Some(masked_value) = &masked_value {
111 masked_record[column_index] = masked_value.clone();
112 }
113 cells.push(CsvCellScanResult {
114 row_number: row_index + 1,
115 column_index,
116 column_name: headers
117 .as_ref()
118 .and_then(|headers| headers.get(column_index))
119 .map(str::to_string),
120 findings: scan.findings,
121 masked_value,
122 });
123 }
124 csv_writer.write_record(masked_record)?;
125 }
126 csv_writer.flush()?;
127 Ok(cells)
128}
129
130fn selected_indexes(
131 headers: Option<&csv::StringRecord>,
132 options: &CsvScanOptions,
133) -> HashSet<usize> {
134 let mut selected: HashSet<usize> = options.column_indexes.iter().copied().collect();
135 if let Some(headers) = headers {
136 for column in &options.columns {
137 if let Some(index) = headers.iter().position(|header| header == column) {
138 selected.insert(index);
139 }
140 }
141 }
142 selected
143}
144
145#[cfg(test)]
146mod tests {
147 use super::*;
148 use cloakrs_core::Locale;
149 use cloakrs_patterns::default_registry;
150
151 fn scanner() -> Scanner {
152 default_registry()
153 .into_scanner_builder()
154 .locale(Locale::US)
155 .build()
156 .unwrap()
157 }
158
159 #[test]
160 fn test_scan_csv_str_with_headers_scans_named_column() {
161 let input = "name,email\nJane,jane@example.com\n";
162 let options = CsvScanOptions {
163 has_headers: true,
164 columns: vec!["email".to_string()],
165 column_indexes: Vec::new(),
166 delimiter: b',',
167 };
168 let result = scan_csv_str(input, &scanner(), &options).unwrap();
169 assert_eq!(result.cells.len(), 1);
170 assert_eq!(result.cells[0].column_name.as_deref(), Some("email"));
171 assert!(result.masked_csv.contains("[EMAIL]"));
172 }
173
174 #[test]
175 fn test_scan_csv_str_without_headers_scans_index() {
176 let input = "Jane,jane@example.com\n";
177 let options = CsvScanOptions {
178 has_headers: false,
179 columns: Vec::new(),
180 column_indexes: vec![1],
181 delimiter: b',',
182 };
183 let result = scan_csv_str(input, &scanner(), &options).unwrap();
184 assert_eq!(result.cells[0].row_number, 1);
185 assert_eq!(result.cells[0].column_index, 1);
186 }
187
188 #[test]
189 fn test_scan_csv_str_empty_selection_scans_all_columns() {
190 let input = "name,email,phone\nJane,jane@example.com,+1 (555) 123-4567\n";
191 let options = CsvScanOptions {
192 has_headers: true,
193 columns: Vec::new(),
194 column_indexes: Vec::new(),
195 delimiter: b',',
196 };
197 let result = scan_csv_str(input, &scanner(), &options).unwrap();
198 assert_eq!(result.cells.len(), 2);
199 }
200
201 #[test]
202 fn test_scan_csv_str_semicolon_delimiter() {
203 let input = "name;email\nJane;jane@example.com\n";
204 let options = CsvScanOptions {
205 has_headers: true,
206 columns: vec!["email".to_string()],
207 column_indexes: Vec::new(),
208 delimiter: b';',
209 };
210 let result = scan_csv_str(input, &scanner(), &options).unwrap();
211 assert!(result.masked_csv.contains("[EMAIL]"));
212 }
213
214 #[test]
215 fn test_scan_csv_str_quoted_multiline_field() {
216 let input = "notes\n\"hello\nemail jane@example.com\"\n";
217 let options = CsvScanOptions {
218 has_headers: true,
219 columns: vec!["notes".to_string()],
220 column_indexes: Vec::new(),
221 delimiter: b',',
222 };
223 let result = scan_csv_str(input, &scanner(), &options).unwrap();
224 assert_eq!(result.cells.len(), 1);
225 }
226}