1use ferray_core::error::{FerrayError, FerrayResult};
4
5#[derive(Debug, Clone)]
7pub struct TextParseOptions {
8 pub delimiter: char,
10 pub skiprows: usize,
12 pub comments: Option<char>,
14 pub max_rows: Option<usize>,
16}
17
18impl Default for TextParseOptions {
19 fn default() -> Self {
20 Self {
21 delimiter: ',',
22 skiprows: 0,
23 comments: Some('#'),
24 max_rows: None,
25 }
26 }
27}
28
29pub fn parse_text_grid(
33 content: &str,
34 opts: &TextParseOptions,
35) -> FerrayResult<(Vec<String>, usize, usize)> {
36 let all_lines: Vec<&str> = content.lines().collect();
37
38 let non_comment_lines: Vec<&str> = all_lines
42 .iter()
43 .filter(|line| {
44 let trimmed = line.trim();
45 if trimmed.is_empty() {
46 return false;
47 }
48 if let Some(comment_char) = opts.comments {
49 if trimmed.starts_with(comment_char) {
50 return false;
51 }
52 }
53 true
54 })
55 .copied()
56 .collect();
57
58 let data_lines: &[&str] = if opts.skiprows < non_comment_lines.len() {
60 &non_comment_lines[opts.skiprows..]
61 } else {
62 &[]
63 };
64
65 let data_lines = if let Some(max) = opts.max_rows {
67 &data_lines[..data_lines.len().min(max)]
68 } else {
69 data_lines
70 };
71
72 if data_lines.is_empty() {
73 return Ok((vec![], 0, 0));
74 }
75
76 let delim = opts.delimiter;
77 let nrows = data_lines.len();
78
79 let first_fields: Vec<&str> = data_lines[0].split(delim).collect();
81 let ncols = first_fields.len();
82
83 let mut cells = Vec::with_capacity(nrows * ncols);
84
85 for (row_idx, line) in data_lines.iter().enumerate() {
86 let fields: Vec<&str> = line.split(delim).collect();
87 if fields.len() != ncols {
88 return Err(FerrayError::io_error(format!(
89 "row {} has {} columns, expected {} (line: '{}')",
90 row_idx + opts.skiprows,
91 fields.len(),
92 ncols,
93 line,
94 )));
95 }
96 for field in fields {
97 cells.push(field.trim().to_string());
98 }
99 }
100
101 Ok((cells, nrows, ncols))
102}
103
104pub fn parse_text_grid_with_missing(
109 content: &str,
110 opts: &TextParseOptions,
111 missing_values: &[&str],
112) -> FerrayResult<(Vec<Option<String>>, usize, usize)> {
113 let all_lines: Vec<&str> = content.lines().collect();
114
115 let non_comment_lines: Vec<&str> = all_lines
117 .iter()
118 .filter(|line| {
119 let trimmed = line.trim();
120 if trimmed.is_empty() {
121 return false;
122 }
123 if let Some(comment_char) = opts.comments {
124 if trimmed.starts_with(comment_char) {
125 return false;
126 }
127 }
128 true
129 })
130 .copied()
131 .collect();
132
133 let data_lines: &[&str] = if opts.skiprows < non_comment_lines.len() {
134 &non_comment_lines[opts.skiprows..]
135 } else {
136 &[]
137 };
138
139 let data_lines = if let Some(max) = opts.max_rows {
140 &data_lines[..data_lines.len().min(max)]
141 } else {
142 data_lines
143 };
144
145 if data_lines.is_empty() {
146 return Ok((vec![], 0, 0));
147 }
148
149 let delim = opts.delimiter;
150 let nrows = data_lines.len();
151
152 let first_fields: Vec<&str> = data_lines[0].split(delim).collect();
153 let ncols = first_fields.len();
154
155 let mut cells = Vec::with_capacity(nrows * ncols);
156
157 for (row_idx, line) in data_lines.iter().enumerate() {
158 let fields: Vec<&str> = line.split(delim).collect();
159 for col_idx in 0..ncols {
161 if col_idx >= fields.len() {
162 cells.push(None);
163 } else {
164 let field = fields[col_idx].trim();
165 if field.is_empty() || missing_values.contains(&field) {
166 cells.push(None);
167 } else {
168 cells.push(Some(field.to_string()));
169 }
170 }
171 }
172 if fields.len() > ncols {
174 return Err(FerrayError::io_error(format!(
175 "row {} has {} columns, expected {} (line: '{}')",
176 row_idx + opts.skiprows,
177 fields.len(),
178 ncols,
179 line,
180 )));
181 }
182 }
183
184 Ok((cells, nrows, ncols))
185}
186
187#[cfg(test)]
188mod tests {
189 use super::*;
190
191 #[test]
192 fn parse_simple_csv() {
193 let content = "1,2,3\n4,5,6\n";
194 let opts = TextParseOptions {
195 delimiter: ',',
196 ..Default::default()
197 };
198 let (cells, nrows, ncols) = parse_text_grid(content, &opts).unwrap();
199 assert_eq!(nrows, 2);
200 assert_eq!(ncols, 3);
201 assert_eq!(cells, vec!["1", "2", "3", "4", "5", "6"]);
202 }
203
204 #[test]
205 fn parse_with_skiprows() {
206 let content = "# header\nname,value\n1,10\n2,20\n";
207 let opts = TextParseOptions {
208 delimiter: ',',
209 skiprows: 1,
210 comments: Some('#'),
211 ..Default::default()
212 };
213 let (cells, nrows, ncols) = parse_text_grid(content, &opts).unwrap();
214 assert_eq!(nrows, 2);
215 assert_eq!(ncols, 2);
216 assert_eq!(cells[0], "1");
217 }
218
219 #[test]
220 fn parse_with_comments() {
221 let content = "1,2\n# comment\n3,4\n";
222 let opts = TextParseOptions {
223 delimiter: ',',
224 comments: Some('#'),
225 ..Default::default()
226 };
227 let (cells, nrows, _) = parse_text_grid(content, &opts).unwrap();
228 assert_eq!(nrows, 2);
229 assert_eq!(cells, vec!["1", "2", "3", "4"]);
230 }
231
232 #[test]
233 fn parse_tab_delimited() {
234 let content = "1\t2\t3\n4\t5\t6\n";
235 let opts = TextParseOptions {
236 delimiter: '\t',
237 ..Default::default()
238 };
239 let (cells, nrows, ncols) = parse_text_grid(content, &opts).unwrap();
240 assert_eq!(nrows, 2);
241 assert_eq!(ncols, 3);
242 assert_eq!(cells[0], "1");
243 }
244
245 #[test]
246 fn parse_inconsistent_columns_error() {
247 let content = "1,2,3\n4,5\n";
248 let opts = TextParseOptions::default();
249 assert!(parse_text_grid(content, &opts).is_err());
250 }
251
252 #[test]
253 fn parse_missing_values() {
254 let content = "1,2,3\n4,,6\n7,8,\n";
255 let opts = TextParseOptions::default();
256 let (cells, nrows, ncols) = parse_text_grid_with_missing(content, &opts, &[]).unwrap();
257 assert_eq!(nrows, 3);
258 assert_eq!(ncols, 3);
259 assert_eq!(cells[0], Some("1".to_string()));
260 assert_eq!(cells[4], None); assert_eq!(cells[8], None); }
263
264 #[test]
265 fn parse_custom_missing_marker() {
266 let content = "1,NA,3\n4,5,NA\n";
267 let opts = TextParseOptions::default();
268 let (cells, _, _) = parse_text_grid_with_missing(content, &opts, &["NA"]).unwrap();
269 assert_eq!(cells[1], None);
270 assert_eq!(cells[5], None);
271 assert_eq!(cells[0], Some("1".to_string()));
272 }
273
274 #[test]
275 fn parse_empty_content() {
276 let content = "";
277 let opts = TextParseOptions::default();
278 let (cells, nrows, ncols) = parse_text_grid(content, &opts).unwrap();
279 assert_eq!(nrows, 0);
280 assert_eq!(ncols, 0);
281 assert!(cells.is_empty());
282 }
283}