Skip to main content

mollendorff_forge/parser/
arrays.rs

1//! Array parsing utilities for Forge YAML models
2//!
3//! Handles parsing of typed column arrays (Number, Text, Date, Boolean).
4
5use crate::error::{ForgeError, ForgeResult};
6use crate::types::ColumnValue;
7use serde_yaml_ng::Value;
8
9/// Parse a YAML array into a typed `ColumnValue`
10///
11/// # Errors
12///
13/// Returns an error if the array is empty, contains mixed types, or has invalid values.
14pub fn parse_array_value(col_name: &str, seq: &[Value]) -> ForgeResult<ColumnValue> {
15    if seq.is_empty() {
16        return Err(ForgeError::Parse(format!(
17            "Column '{col_name}' cannot be empty"
18        )));
19    }
20
21    // Detect the type from the first element
22    let array_type = detect_array_type(&seq[0])?;
23
24    match array_type {
25        "Number" => {
26            let mut numbers = Vec::new();
27            for (i, val) in seq.iter().enumerate() {
28                match val {
29                    Value::Number(n) => {
30                        if let Some(f) = n.as_f64() {
31                            numbers.push(f);
32                        } else {
33                            return Err(ForgeError::Parse(format!(
34                                "Column '{col_name}' row {i}: Invalid number format"
35                            )));
36                        }
37                    },
38                    Value::Null => {
39                        // Provide clear error for null values in numeric arrays
40                        return Err(ForgeError::Parse(format!(
41                            "Column '{col_name}' row {i}: null values not allowed in numeric arrays. \
42                            Use 0 or remove the row if the value is missing."
43                        )));
44                    },
45                    _ => {
46                        return Err(ForgeError::Parse(format!(
47                            "Column '{}' row {}: Expected Number, found {}",
48                            col_name,
49                            i,
50                            type_name(val)
51                        )));
52                    },
53                }
54            }
55            Ok(ColumnValue::Number(numbers))
56        },
57        "Text" => {
58            let mut texts = Vec::new();
59            for (i, val) in seq.iter().enumerate() {
60                match val {
61                    Value::String(s) => texts.push(s.clone()),
62                    _ => {
63                        return Err(ForgeError::Parse(format!(
64                            "Column '{}' row {}: Expected Text, found {}",
65                            col_name,
66                            i,
67                            type_name(val)
68                        )));
69                    },
70                }
71            }
72            Ok(ColumnValue::Text(texts))
73        },
74        "Date" => {
75            let mut dates = Vec::new();
76            for (i, val) in seq.iter().enumerate() {
77                match val {
78                    Value::String(s) => {
79                        // Validate date format (YYYY-MM or YYYY-MM-DD)
80                        if !is_valid_date_format(s) {
81                            return Err(ForgeError::Parse(format!(
82                                "Column '{col_name}' row {i}: Invalid date format '{s}' (expected YYYY-MM or YYYY-MM-DD)"
83                            )));
84                        }
85                        dates.push(s.clone());
86                    },
87                    _ => {
88                        return Err(ForgeError::Parse(format!(
89                            "Column '{}' row {}: Expected Date, found {}",
90                            col_name,
91                            i,
92                            type_name(val)
93                        )));
94                    },
95                }
96            }
97            Ok(ColumnValue::Date(dates))
98        },
99        "Boolean" => {
100            let mut bools = Vec::new();
101            for (i, val) in seq.iter().enumerate() {
102                match val {
103                    Value::Bool(b) => bools.push(*b),
104                    _ => {
105                        return Err(ForgeError::Parse(format!(
106                            "Column '{}' row {}: Expected Boolean, found {}",
107                            col_name,
108                            i,
109                            type_name(val)
110                        )));
111                    },
112                }
113            }
114            Ok(ColumnValue::Boolean(bools))
115        },
116        _ => Err(ForgeError::Parse(format!(
117            "Column '{col_name}': Unsupported array type '{array_type}'"
118        ))),
119    }
120}
121
122/// Detect the type of a YAML value
123///
124/// # Errors
125///
126/// Returns an error if the value is null or an unsupported type (e.g., nested mapping).
127pub fn detect_array_type(val: &Value) -> ForgeResult<&'static str> {
128    match val {
129        Value::Number(_) => Ok("Number"),
130        Value::String(s) => {
131            // Check if it's a date string
132            if is_valid_date_format(s) {
133                Ok("Date")
134            } else {
135                Ok("Text")
136            }
137        },
138        Value::Bool(_) => Ok("Boolean"),
139        Value::Null => Err(ForgeError::Parse(
140            "Array cannot start with null. First element must be a valid value to determine column type.".to_string()
141        )),
142        _ => Err(ForgeError::Parse(format!(
143            "Unsupported array element type: {}",
144            type_name(val)
145        ))),
146    }
147}
148
149/// Check if a string is a valid date format (YYYY-MM or YYYY-MM-DD)
150#[must_use]
151pub fn is_valid_date_format(s: &str) -> bool {
152    // YYYY-MM format
153    if s.len() == 7 {
154        let parts: Vec<&str> = s.split('-').collect();
155        if parts.len() == 2 {
156            return parts[0].len() == 4
157                && parts[0].chars().all(|c| c.is_ascii_digit())
158                && parts[1].len() == 2
159                && parts[1].chars().all(|c| c.is_ascii_digit());
160        }
161    }
162    // YYYY-MM-DD format
163    if s.len() == 10 {
164        let parts: Vec<&str> = s.split('-').collect();
165        if parts.len() == 3 {
166            return parts[0].len() == 4
167                && parts[0].chars().all(|c| c.is_ascii_digit())
168                && parts[1].len() == 2
169                && parts[1].chars().all(|c| c.is_ascii_digit())
170                && parts[2].len() == 2
171                && parts[2].chars().all(|c| c.is_ascii_digit());
172        }
173    }
174    false
175}
176
177/// Get the type name of a YAML value for error messages
178#[must_use]
179pub const fn type_name(val: &Value) -> &'static str {
180    match val {
181        Value::Null => "Null",
182        Value::Bool(_) => "Boolean",
183        Value::Number(_) => "Number",
184        Value::String(_) => "String",
185        Value::Sequence(_) => "Array",
186        Value::Mapping(_) => "Mapping",
187        Value::Tagged(_) => "Tagged",
188    }
189}
190
191#[cfg(test)]
192mod tests {
193    use super::*;
194
195    #[test]
196    fn test_parse_number_array() {
197        let yaml_seq: Vec<Value> = vec![
198            Value::Number(1.into()),
199            Value::Number(2.into()),
200            Value::Number(3.into()),
201        ];
202        let result = parse_array_value("test_col", &yaml_seq).unwrap();
203
204        match result {
205            ColumnValue::Number(nums) => {
206                assert_eq!(nums, vec![1.0, 2.0, 3.0]);
207            },
208            _ => panic!("Expected Number array"),
209        }
210    }
211
212    #[test]
213    fn test_parse_text_array() {
214        let yaml_seq: Vec<Value> = vec![
215            Value::String("A".to_string()),
216            Value::String("B".to_string()),
217            Value::String("C".to_string()),
218        ];
219        let result = parse_array_value("test_col", &yaml_seq).unwrap();
220
221        match result {
222            ColumnValue::Text(texts) => {
223                assert_eq!(texts, vec!["A", "B", "C"]);
224            },
225            _ => panic!("Expected Text array"),
226        }
227    }
228
229    #[test]
230    fn test_parse_date_array() {
231        let yaml_seq: Vec<Value> = vec![
232            Value::String("2025-01".to_string()),
233            Value::String("2025-02".to_string()),
234            Value::String("2025-03".to_string()),
235        ];
236        let result = parse_array_value("test_col", &yaml_seq).unwrap();
237
238        match result {
239            ColumnValue::Date(dates) => {
240                assert_eq!(dates, vec!["2025-01", "2025-02", "2025-03"]);
241            },
242            _ => panic!("Expected Date array"),
243        }
244    }
245
246    #[test]
247    fn test_parse_boolean_array() {
248        let yaml_seq: Vec<Value> = vec![Value::Bool(true), Value::Bool(false), Value::Bool(true)];
249        let result = parse_array_value("test_col", &yaml_seq).unwrap();
250
251        match result {
252            ColumnValue::Boolean(bools) => {
253                assert_eq!(bools, vec![true, false, true]);
254            },
255            _ => panic!("Expected Boolean array"),
256        }
257    }
258
259    #[test]
260    fn test_mixed_type_array_error() {
261        let yaml_seq: Vec<Value> = vec![Value::Number(1.into()), Value::String("text".to_string())];
262        let result = parse_array_value("test_col", &yaml_seq);
263
264        assert!(result.is_err());
265        let err_msg = result.unwrap_err().to_string();
266        assert!(err_msg.contains("Expected Number, found String"));
267    }
268
269    #[test]
270    fn test_empty_array_error() {
271        let yaml_seq: Vec<Value> = vec![];
272        let result = parse_array_value("test_col", &yaml_seq);
273
274        assert!(result.is_err());
275        let err_msg = result.unwrap_err().to_string();
276        assert!(err_msg.contains("cannot be empty"));
277    }
278
279    #[test]
280    fn test_invalid_date_format_error() {
281        let yaml_seq: Vec<Value> = vec![
282            Value::String("2025-01".to_string()),
283            Value::String("2025-1".to_string()),
284        ];
285        let result = parse_array_value("test_col", &yaml_seq);
286
287        assert!(result.is_err());
288        let err_msg = result.unwrap_err().to_string();
289        assert!(err_msg.contains("Invalid date format"));
290    }
291
292    #[test]
293    fn test_valid_date_formats() {
294        assert!(is_valid_date_format("2025-01"));
295        assert!(is_valid_date_format("2025-12"));
296        assert!(is_valid_date_format("2025-01-15"));
297        assert!(is_valid_date_format("2025-12-31"));
298        assert!(!is_valid_date_format("2025-1"));
299        assert!(!is_valid_date_format("2025-1-1"));
300        assert!(!is_valid_date_format("25-01-01"));
301        assert!(!is_valid_date_format("not-a-date"));
302    }
303
304    #[test]
305    fn test_parse_date_format_yyyy_mm_dd() {
306        let yaml_seq: Vec<Value> = vec![
307            Value::String("2025-01-15".to_string()),
308            Value::String("2025-02-20".to_string()),
309        ];
310        let result = parse_array_value("test_col", &yaml_seq).unwrap();
311
312        match result {
313            ColumnValue::Date(dates) => {
314                assert_eq!(dates, vec!["2025-01-15", "2025-02-20"]);
315            },
316            _ => panic!("Expected Date array"),
317        }
318    }
319
320    #[test]
321    fn test_type_name_function() {
322        assert_eq!(type_name(&Value::Null), "Null");
323        assert_eq!(type_name(&Value::Bool(true)), "Boolean");
324        assert_eq!(type_name(&Value::Number(1.into())), "Number");
325        assert_eq!(type_name(&Value::String("test".to_string())), "String");
326        assert_eq!(type_name(&Value::Sequence(vec![])), "Array");
327        assert_eq!(
328            type_name(&Value::Mapping(serde_yaml_ng::Mapping::new())),
329            "Mapping"
330        );
331    }
332
333    #[test]
334    fn test_detect_array_type_unsupported() {
335        let val = Value::Sequence(vec![]);
336        let result = detect_array_type(&val);
337        assert!(result.is_err());
338        assert!(result.unwrap_err().to_string().contains("Unsupported"));
339    }
340
341    #[test]
342    fn test_boolean_array_wrong_type() {
343        let yaml_seq: Vec<Value> = vec![Value::Bool(true), Value::String("not bool".to_string())];
344        let result = parse_array_value("test_col", &yaml_seq);
345        assert!(result.is_err());
346        assert!(result.unwrap_err().to_string().contains("Expected Boolean"));
347    }
348
349    #[test]
350    fn test_date_array_wrong_type() {
351        let yaml_seq: Vec<Value> = vec![
352            Value::String("2025-01".to_string()),
353            Value::Number(123.into()),
354        ];
355        let result = parse_array_value("test_col", &yaml_seq);
356        assert!(result.is_err());
357        assert!(result.unwrap_err().to_string().contains("Expected Date"));
358    }
359
360    #[test]
361    fn test_text_array_wrong_type() {
362        let yaml_seq: Vec<Value> = vec![Value::String("text".to_string()), Value::Bool(true)];
363        let result = parse_array_value("test_col", &yaml_seq);
364        assert!(result.is_err());
365        assert!(result.unwrap_err().to_string().contains("Expected Text"));
366    }
367
368    #[test]
369    fn test_null_first_element_error() {
370        let result = detect_array_type(&Value::Null);
371        assert!(result.is_err());
372        let err = result.unwrap_err().to_string();
373        assert!(err.contains("cannot start with null"));
374    }
375}