mollendorff_forge/parser/
arrays.rs1use crate::error::{ForgeError, ForgeResult};
6use crate::types::ColumnValue;
7use serde_yaml_ng::Value;
8
9pub fn parse_array_value(col_name: &str, seq: &[Value]) -> ForgeResult<ColumnValue> {
15 if seq.is_empty() {
16 return Err(ForgeError::Parse(format!(
17 "Column '{col_name}' cannot be empty"
18 )));
19 }
20
21 let array_type = detect_array_type(&seq[0])?;
23
24 match array_type {
25 "Number" => {
26 let mut numbers = Vec::new();
27 for (i, val) in seq.iter().enumerate() {
28 match val {
29 Value::Number(n) => {
30 if let Some(f) = n.as_f64() {
31 numbers.push(f);
32 } else {
33 return Err(ForgeError::Parse(format!(
34 "Column '{col_name}' row {i}: Invalid number format"
35 )));
36 }
37 },
38 Value::Null => {
39 return Err(ForgeError::Parse(format!(
41 "Column '{col_name}' row {i}: null values not allowed in numeric arrays. \
42 Use 0 or remove the row if the value is missing."
43 )));
44 },
45 _ => {
46 return Err(ForgeError::Parse(format!(
47 "Column '{}' row {}: Expected Number, found {}",
48 col_name,
49 i,
50 type_name(val)
51 )));
52 },
53 }
54 }
55 Ok(ColumnValue::Number(numbers))
56 },
57 "Text" => {
58 let mut texts = Vec::new();
59 for (i, val) in seq.iter().enumerate() {
60 match val {
61 Value::String(s) => texts.push(s.clone()),
62 _ => {
63 return Err(ForgeError::Parse(format!(
64 "Column '{}' row {}: Expected Text, found {}",
65 col_name,
66 i,
67 type_name(val)
68 )));
69 },
70 }
71 }
72 Ok(ColumnValue::Text(texts))
73 },
74 "Date" => {
75 let mut dates = Vec::new();
76 for (i, val) in seq.iter().enumerate() {
77 match val {
78 Value::String(s) => {
79 if !is_valid_date_format(s) {
81 return Err(ForgeError::Parse(format!(
82 "Column '{col_name}' row {i}: Invalid date format '{s}' (expected YYYY-MM or YYYY-MM-DD)"
83 )));
84 }
85 dates.push(s.clone());
86 },
87 _ => {
88 return Err(ForgeError::Parse(format!(
89 "Column '{}' row {}: Expected Date, found {}",
90 col_name,
91 i,
92 type_name(val)
93 )));
94 },
95 }
96 }
97 Ok(ColumnValue::Date(dates))
98 },
99 "Boolean" => {
100 let mut bools = Vec::new();
101 for (i, val) in seq.iter().enumerate() {
102 match val {
103 Value::Bool(b) => bools.push(*b),
104 _ => {
105 return Err(ForgeError::Parse(format!(
106 "Column '{}' row {}: Expected Boolean, found {}",
107 col_name,
108 i,
109 type_name(val)
110 )));
111 },
112 }
113 }
114 Ok(ColumnValue::Boolean(bools))
115 },
116 _ => Err(ForgeError::Parse(format!(
117 "Column '{col_name}': Unsupported array type '{array_type}'"
118 ))),
119 }
120}
121
122pub fn detect_array_type(val: &Value) -> ForgeResult<&'static str> {
128 match val {
129 Value::Number(_) => Ok("Number"),
130 Value::String(s) => {
131 if is_valid_date_format(s) {
133 Ok("Date")
134 } else {
135 Ok("Text")
136 }
137 },
138 Value::Bool(_) => Ok("Boolean"),
139 Value::Null => Err(ForgeError::Parse(
140 "Array cannot start with null. First element must be a valid value to determine column type.".to_string()
141 )),
142 _ => Err(ForgeError::Parse(format!(
143 "Unsupported array element type: {}",
144 type_name(val)
145 ))),
146 }
147}
148
149#[must_use]
151pub fn is_valid_date_format(s: &str) -> bool {
152 if s.len() == 7 {
154 let parts: Vec<&str> = s.split('-').collect();
155 if parts.len() == 2 {
156 return parts[0].len() == 4
157 && parts[0].chars().all(|c| c.is_ascii_digit())
158 && parts[1].len() == 2
159 && parts[1].chars().all(|c| c.is_ascii_digit());
160 }
161 }
162 if s.len() == 10 {
164 let parts: Vec<&str> = s.split('-').collect();
165 if parts.len() == 3 {
166 return parts[0].len() == 4
167 && parts[0].chars().all(|c| c.is_ascii_digit())
168 && parts[1].len() == 2
169 && parts[1].chars().all(|c| c.is_ascii_digit())
170 && parts[2].len() == 2
171 && parts[2].chars().all(|c| c.is_ascii_digit());
172 }
173 }
174 false
175}
176
177#[must_use]
179pub const fn type_name(val: &Value) -> &'static str {
180 match val {
181 Value::Null => "Null",
182 Value::Bool(_) => "Boolean",
183 Value::Number(_) => "Number",
184 Value::String(_) => "String",
185 Value::Sequence(_) => "Array",
186 Value::Mapping(_) => "Mapping",
187 Value::Tagged(_) => "Tagged",
188 }
189}
190
191#[cfg(test)]
192mod tests {
193 use super::*;
194
195 #[test]
196 fn test_parse_number_array() {
197 let yaml_seq: Vec<Value> = vec![
198 Value::Number(1.into()),
199 Value::Number(2.into()),
200 Value::Number(3.into()),
201 ];
202 let result = parse_array_value("test_col", &yaml_seq).unwrap();
203
204 match result {
205 ColumnValue::Number(nums) => {
206 assert_eq!(nums, vec![1.0, 2.0, 3.0]);
207 },
208 _ => panic!("Expected Number array"),
209 }
210 }
211
212 #[test]
213 fn test_parse_text_array() {
214 let yaml_seq: Vec<Value> = vec![
215 Value::String("A".to_string()),
216 Value::String("B".to_string()),
217 Value::String("C".to_string()),
218 ];
219 let result = parse_array_value("test_col", &yaml_seq).unwrap();
220
221 match result {
222 ColumnValue::Text(texts) => {
223 assert_eq!(texts, vec!["A", "B", "C"]);
224 },
225 _ => panic!("Expected Text array"),
226 }
227 }
228
229 #[test]
230 fn test_parse_date_array() {
231 let yaml_seq: Vec<Value> = vec![
232 Value::String("2025-01".to_string()),
233 Value::String("2025-02".to_string()),
234 Value::String("2025-03".to_string()),
235 ];
236 let result = parse_array_value("test_col", &yaml_seq).unwrap();
237
238 match result {
239 ColumnValue::Date(dates) => {
240 assert_eq!(dates, vec!["2025-01", "2025-02", "2025-03"]);
241 },
242 _ => panic!("Expected Date array"),
243 }
244 }
245
246 #[test]
247 fn test_parse_boolean_array() {
248 let yaml_seq: Vec<Value> = vec![Value::Bool(true), Value::Bool(false), Value::Bool(true)];
249 let result = parse_array_value("test_col", &yaml_seq).unwrap();
250
251 match result {
252 ColumnValue::Boolean(bools) => {
253 assert_eq!(bools, vec![true, false, true]);
254 },
255 _ => panic!("Expected Boolean array"),
256 }
257 }
258
259 #[test]
260 fn test_mixed_type_array_error() {
261 let yaml_seq: Vec<Value> = vec![Value::Number(1.into()), Value::String("text".to_string())];
262 let result = parse_array_value("test_col", &yaml_seq);
263
264 assert!(result.is_err());
265 let err_msg = result.unwrap_err().to_string();
266 assert!(err_msg.contains("Expected Number, found String"));
267 }
268
269 #[test]
270 fn test_empty_array_error() {
271 let yaml_seq: Vec<Value> = vec![];
272 let result = parse_array_value("test_col", &yaml_seq);
273
274 assert!(result.is_err());
275 let err_msg = result.unwrap_err().to_string();
276 assert!(err_msg.contains("cannot be empty"));
277 }
278
279 #[test]
280 fn test_invalid_date_format_error() {
281 let yaml_seq: Vec<Value> = vec![
282 Value::String("2025-01".to_string()),
283 Value::String("2025-1".to_string()),
284 ];
285 let result = parse_array_value("test_col", &yaml_seq);
286
287 assert!(result.is_err());
288 let err_msg = result.unwrap_err().to_string();
289 assert!(err_msg.contains("Invalid date format"));
290 }
291
292 #[test]
293 fn test_valid_date_formats() {
294 assert!(is_valid_date_format("2025-01"));
295 assert!(is_valid_date_format("2025-12"));
296 assert!(is_valid_date_format("2025-01-15"));
297 assert!(is_valid_date_format("2025-12-31"));
298 assert!(!is_valid_date_format("2025-1"));
299 assert!(!is_valid_date_format("2025-1-1"));
300 assert!(!is_valid_date_format("25-01-01"));
301 assert!(!is_valid_date_format("not-a-date"));
302 }
303
304 #[test]
305 fn test_parse_date_format_yyyy_mm_dd() {
306 let yaml_seq: Vec<Value> = vec![
307 Value::String("2025-01-15".to_string()),
308 Value::String("2025-02-20".to_string()),
309 ];
310 let result = parse_array_value("test_col", &yaml_seq).unwrap();
311
312 match result {
313 ColumnValue::Date(dates) => {
314 assert_eq!(dates, vec!["2025-01-15", "2025-02-20"]);
315 },
316 _ => panic!("Expected Date array"),
317 }
318 }
319
320 #[test]
321 fn test_type_name_function() {
322 assert_eq!(type_name(&Value::Null), "Null");
323 assert_eq!(type_name(&Value::Bool(true)), "Boolean");
324 assert_eq!(type_name(&Value::Number(1.into())), "Number");
325 assert_eq!(type_name(&Value::String("test".to_string())), "String");
326 assert_eq!(type_name(&Value::Sequence(vec![])), "Array");
327 assert_eq!(
328 type_name(&Value::Mapping(serde_yaml_ng::Mapping::new())),
329 "Mapping"
330 );
331 }
332
333 #[test]
334 fn test_detect_array_type_unsupported() {
335 let val = Value::Sequence(vec![]);
336 let result = detect_array_type(&val);
337 assert!(result.is_err());
338 assert!(result.unwrap_err().to_string().contains("Unsupported"));
339 }
340
341 #[test]
342 fn test_boolean_array_wrong_type() {
343 let yaml_seq: Vec<Value> = vec![Value::Bool(true), Value::String("not bool".to_string())];
344 let result = parse_array_value("test_col", &yaml_seq);
345 assert!(result.is_err());
346 assert!(result.unwrap_err().to_string().contains("Expected Boolean"));
347 }
348
349 #[test]
350 fn test_date_array_wrong_type() {
351 let yaml_seq: Vec<Value> = vec![
352 Value::String("2025-01".to_string()),
353 Value::Number(123.into()),
354 ];
355 let result = parse_array_value("test_col", &yaml_seq);
356 assert!(result.is_err());
357 assert!(result.unwrap_err().to_string().contains("Expected Date"));
358 }
359
360 #[test]
361 fn test_text_array_wrong_type() {
362 let yaml_seq: Vec<Value> = vec![Value::String("text".to_string()), Value::Bool(true)];
363 let result = parse_array_value("test_col", &yaml_seq);
364 assert!(result.is_err());
365 assert!(result.unwrap_err().to_string().contains("Expected Text"));
366 }
367
368 #[test]
369 fn test_null_first_element_error() {
370 let result = detect_array_type(&Value::Null);
371 assert!(result.is_err());
372 let err = result.unwrap_err().to_string();
373 assert!(err.contains("cannot start with null"));
374 }
375}