1use ferray_core::error::{FerrayError, FerrayResult};
4
5#[derive(Debug, Clone)]
7pub struct TextParseOptions {
8 pub delimiter: char,
10 pub skiprows: usize,
12 pub comments: Option<char>,
14 pub max_rows: Option<usize>,
16}
17
18impl Default for TextParseOptions {
19 fn default() -> Self {
20 Self {
21 delimiter: ',',
22 skiprows: 0,
23 comments: Some('#'),
24 max_rows: None,
25 }
26 }
27}
28
29pub fn parse_text_grid(
33 content: &str,
34 opts: &TextParseOptions,
35) -> FerrayResult<(Vec<String>, usize, usize)> {
36 let all_lines: Vec<&str> = content.lines().collect();
37
38 let non_comment_lines: Vec<&str> = all_lines
42 .iter()
43 .filter(|line| {
44 let trimmed = line.trim();
45 if trimmed.is_empty() {
46 return false;
47 }
48 if let Some(comment_char) = opts.comments {
49 if trimmed.starts_with(comment_char) {
50 return false;
51 }
52 }
53 true
54 })
55 .copied()
56 .collect();
57
58 let data_lines: &[&str] = if opts.skiprows < non_comment_lines.len() {
60 &non_comment_lines[opts.skiprows..]
61 } else {
62 &[]
63 };
64
65 let data_lines = if let Some(max) = opts.max_rows {
67 &data_lines[..data_lines.len().min(max)]
68 } else {
69 data_lines
70 };
71
72 if data_lines.is_empty() {
73 return Ok((vec![], 0, 0));
74 }
75
76 let delim = opts.delimiter;
77 let nrows = data_lines.len();
78
79 let ncols = data_lines[0].split(delim).count();
81
82 let mut cells = Vec::with_capacity(nrows * ncols);
83
84 for (row_idx, line) in data_lines.iter().enumerate() {
85 let fields: Vec<&str> = line.split(delim).collect();
86 if fields.len() != ncols {
87 return Err(FerrayError::io_error(format!(
88 "row {} has {} columns, expected {} (line: '{}')",
89 row_idx + opts.skiprows,
90 fields.len(),
91 ncols,
92 line,
93 )));
94 }
95 for field in fields {
96 cells.push(field.trim().to_string());
97 }
98 }
99
100 Ok((cells, nrows, ncols))
101}
102
103pub fn parse_text_grid_with_missing(
108 content: &str,
109 opts: &TextParseOptions,
110 missing_values: &[&str],
111) -> FerrayResult<(Vec<Option<String>>, usize, usize)> {
112 let all_lines: Vec<&str> = content.lines().collect();
113
114 let non_comment_lines: Vec<&str> = all_lines
116 .iter()
117 .filter(|line| {
118 let trimmed = line.trim();
119 if trimmed.is_empty() {
120 return false;
121 }
122 if let Some(comment_char) = opts.comments {
123 if trimmed.starts_with(comment_char) {
124 return false;
125 }
126 }
127 true
128 })
129 .copied()
130 .collect();
131
132 let data_lines: &[&str] = if opts.skiprows < non_comment_lines.len() {
133 &non_comment_lines[opts.skiprows..]
134 } else {
135 &[]
136 };
137
138 let data_lines = if let Some(max) = opts.max_rows {
139 &data_lines[..data_lines.len().min(max)]
140 } else {
141 data_lines
142 };
143
144 if data_lines.is_empty() {
145 return Ok((vec![], 0, 0));
146 }
147
148 let delim = opts.delimiter;
149 let nrows = data_lines.len();
150
151 let ncols = data_lines[0].split(delim).count();
152
153 let mut cells = Vec::with_capacity(nrows * ncols);
154
155 for (row_idx, line) in data_lines.iter().enumerate() {
156 let fields: Vec<&str> = line.split(delim).collect();
157 for col_idx in 0..ncols {
159 if col_idx >= fields.len() {
160 cells.push(None);
161 } else {
162 let field = fields[col_idx].trim();
163 if field.is_empty() || missing_values.contains(&field) {
164 cells.push(None);
165 } else {
166 cells.push(Some(field.to_string()));
167 }
168 }
169 }
170 if fields.len() > ncols {
172 return Err(FerrayError::io_error(format!(
173 "row {} has {} columns, expected {} (line: '{}')",
174 row_idx + opts.skiprows,
175 fields.len(),
176 ncols,
177 line,
178 )));
179 }
180 }
181
182 Ok((cells, nrows, ncols))
183}
184
185#[cfg(test)]
186mod tests {
187 use super::*;
188
189 #[test]
190 fn parse_simple_csv() {
191 let content = "1,2,3\n4,5,6\n";
192 let opts = TextParseOptions {
193 delimiter: ',',
194 ..Default::default()
195 };
196 let (cells, nrows, ncols) = parse_text_grid(content, &opts).unwrap();
197 assert_eq!(nrows, 2);
198 assert_eq!(ncols, 3);
199 assert_eq!(cells, vec!["1", "2", "3", "4", "5", "6"]);
200 }
201
202 #[test]
203 fn parse_with_skiprows() {
204 let content = "# header\nname,value\n1,10\n2,20\n";
205 let opts = TextParseOptions {
206 delimiter: ',',
207 skiprows: 1,
208 comments: Some('#'),
209 ..Default::default()
210 };
211 let (cells, nrows, ncols) = parse_text_grid(content, &opts).unwrap();
212 assert_eq!(nrows, 2);
213 assert_eq!(ncols, 2);
214 assert_eq!(cells[0], "1");
215 }
216
217 #[test]
218 fn parse_with_comments() {
219 let content = "1,2\n# comment\n3,4\n";
220 let opts = TextParseOptions {
221 delimiter: ',',
222 comments: Some('#'),
223 ..Default::default()
224 };
225 let (cells, nrows, _) = parse_text_grid(content, &opts).unwrap();
226 assert_eq!(nrows, 2);
227 assert_eq!(cells, vec!["1", "2", "3", "4"]);
228 }
229
230 #[test]
231 fn parse_tab_delimited() {
232 let content = "1\t2\t3\n4\t5\t6\n";
233 let opts = TextParseOptions {
234 delimiter: '\t',
235 ..Default::default()
236 };
237 let (cells, nrows, ncols) = parse_text_grid(content, &opts).unwrap();
238 assert_eq!(nrows, 2);
239 assert_eq!(ncols, 3);
240 assert_eq!(cells[0], "1");
241 }
242
243 #[test]
244 fn parse_inconsistent_columns_error() {
245 let content = "1,2,3\n4,5\n";
246 let opts = TextParseOptions::default();
247 assert!(parse_text_grid(content, &opts).is_err());
248 }
249
250 #[test]
251 fn parse_missing_values() {
252 let content = "1,2,3\n4,,6\n7,8,\n";
253 let opts = TextParseOptions::default();
254 let (cells, nrows, ncols) = parse_text_grid_with_missing(content, &opts, &[]).unwrap();
255 assert_eq!(nrows, 3);
256 assert_eq!(ncols, 3);
257 assert_eq!(cells[0], Some("1".to_string()));
258 assert_eq!(cells[4], None); assert_eq!(cells[8], None); }
261
262 #[test]
263 fn parse_custom_missing_marker() {
264 let content = "1,NA,3\n4,5,NA\n";
265 let opts = TextParseOptions::default();
266 let (cells, _, _) = parse_text_grid_with_missing(content, &opts, &["NA"]).unwrap();
267 assert_eq!(cells[1], None);
268 assert_eq!(cells[5], None);
269 assert_eq!(cells[0], Some("1".to_string()));
270 }
271
272 #[test]
273 fn parse_empty_content() {
274 let content = "";
275 let opts = TextParseOptions::default();
276 let (cells, nrows, ncols) = parse_text_grid(content, &opts).unwrap();
277 assert_eq!(nrows, 0);
278 assert_eq!(ncols, 0);
279 assert!(cells.is_empty());
280 }
281}