Skip to main content

aviso_validators/
date.rs

1// (C) Copyright 2024- ECMWF and individual contributors.
2//
3// This software is licensed under the terms of the Apache Licence Version 2.0
4// which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5// In applying this licence, ECMWF does not waive the privileges and immunities
6// granted to it by virtue of its status as an intergovernmental organisation nor
7// does it submit to any jurisdiction.
8
9//! Date validation and canonicalization handler
10//!
11//! Handles validation and canonicalization of date fields according to various
12//! input formats. Supports multiple common date representations and converts
13//! them to a consistent canonical format for topic generation and storage.
14
15use anyhow::{Context, Result, bail};
16use chrono::NaiveDate;
17
18/// Date validation and canonicalization handler
19///
20/// Supports multiple input date formats:
21/// - **YYYY-MM-DD**: ISO 8601 standard format (e.g., "2025-12-25")
22/// - **YYYYMMDD**: Compact format without separators (e.g., "20251225")
23/// - **YYYY-DDD**: Day-of-year format (e.g., "2025-359" for December 25th)
24///
25/// All valid input formats are canonicalized to the format specified in the
26/// schema configuration, ensuring consistent representation across the system.
27pub struct DateHandler;
28
29impl DateHandler {
30    /// Validate and canonicalize a date value according to schema requirements
31    ///
32    /// This method performs comprehensive date validation:
33    /// 1. Attempts to parse the input using multiple supported formats
34    /// 2. Validates that the date is actually valid (e.g., no February 30th)
35    /// 3. Converts to the canonical format specified in the schema
36    ///
37    /// # Arguments
38    /// * `value` - The date string to validate (in any supported format)
39    /// * `canonical_format` - Target format for canonicalization ("%Y%m%d" or "%Y-%m-%d")
40    /// * `field_name` - Name of the field being validated (for error messages)
41    ///
42    /// # Returns
43    /// * `Ok(String)` - The date in canonical format
44    /// * `Err(anyhow::Error)` - Invalid date format or impossible date
45    pub fn validate_and_canonicalize(
46        value: &str,
47        canonical_format: &str,
48        field_name: &str,
49    ) -> Result<String> {
50        // Parse the input date using our flexible parser
51        let parsed_date = Self::parse_date(value, field_name).context(format!(
52            "Failed to parse date value for field '{}'",
53            field_name
54        ))?;
55
56        // Convert to the requested canonical format
57        let canonicalized = match canonical_format {
58            "%Y%m%d" => parsed_date.format("%Y%m%d").to_string(),
59            "%Y-%m-%d" => parsed_date.format("%Y-%m-%d").to_string(),
60            _ => bail!(
61                "Unsupported date format '{}' for field '{}'",
62                canonical_format,
63                field_name
64            ),
65        };
66
67        tracing::debug!(
68            field_name = field_name,
69            input_value = value,
70            canonical_value = %canonicalized,
71            canonical_format = canonical_format,
72            "Date successfully validated and canonicalized"
73        );
74
75        Ok(canonicalized)
76    }
77
78    /// Parse date from multiple supported input formats
79    ///
80    /// Attempts to parse the input string using various common date formats
81    /// in order of preference. This flexible approach allows clients to use
82    /// whatever date format is most convenient for their use case.
83    ///
84    /// # Arguments
85    /// * `value` - The date string to parse
86    /// * `field_name` - Name of the field (for error messages)
87    ///
88    /// # Returns
89    /// * `Ok(NaiveDate)` - Successfully parsed date
90    /// * `Err(anyhow::Error)` - No supported format could parse the input
91    ///
92    /// # Supported Formats
93    /// 1. **ISO 8601**: YYYY-MM-DD (e.g., "2025-12-25")
94    /// 2. **Compact**: YYYYMMDD (e.g., "20251225")
95    /// 3. **Day-of-year**: YYYY-DDD (e.g., "2025-359")
96    fn parse_date(value: &str, field_name: &str) -> Result<NaiveDate> {
97        // Try ISO 8601 format first (YYYY-MM-DD)
98        if let Ok(date) = NaiveDate::parse_from_str(value, "%Y-%m-%d") {
99            tracing::debug!(
100                field_name = field_name,
101                input_value = value,
102                parsed_format = "ISO 8601 (YYYY-MM-DD)",
103                "Date parsed successfully"
104            );
105            return Ok(date);
106        }
107
108        // Try compact format (YYYYMMDD)
109        if let Ok(date) = NaiveDate::parse_from_str(value, "%Y%m%d") {
110            tracing::debug!(
111                field_name = field_name,
112                input_value = value,
113                parsed_format = "Compact (YYYYMMDD)",
114                "Date parsed successfully"
115            );
116            return Ok(date);
117        }
118
119        // Try day-of-year format (YYYY-DDD)
120        if let Ok(date) = NaiveDate::parse_from_str(value, "%Y-%j") {
121            tracing::debug!(
122                field_name = field_name,
123                input_value = value,
124                parsed_format = "Day-of-year (YYYY-DDD)",
125                "Date parsed successfully"
126            );
127            return Ok(date);
128        }
129
130        // No format worked, provide comprehensive error message
131        bail!(
132            "Field '{}' contains invalid date '{}'. Expected: YYYY-MM-DD, YYYYMMDD, or YYYY-DDD",
133            field_name,
134            value
135        );
136    }
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142
143    #[test]
144    fn test_iso_8601_format() {
145        let result = DateHandler::validate_and_canonicalize("2025-12-25", "%Y%m%d", "date");
146        assert!(result.is_ok());
147        assert_eq!(result.unwrap(), "20251225");
148    }
149
150    #[test]
151    fn test_compact_format() {
152        let result = DateHandler::validate_and_canonicalize("20251225", "%Y-%m-%d", "date");
153        assert!(result.is_ok());
154        assert_eq!(result.unwrap(), "2025-12-25");
155    }
156
157    #[test]
158    fn test_day_of_year_format() {
159        let result = DateHandler::validate_and_canonicalize("2025-359", "%Y%m%d", "date");
160        assert!(result.is_ok());
161        assert_eq!(result.unwrap(), "20251225"); // 359th day of 2025 is December 25th
162    }
163
164    #[test]
165    fn test_invalid_date() {
166        let result = DateHandler::validate_and_canonicalize("2025-02-30", "%Y%m%d", "date");
167        assert!(result.is_err());
168    }
169
170    #[test]
171    fn test_unsupported_canonical_format() {
172        let result = DateHandler::validate_and_canonicalize("2025-12-25", "%d/%m/%Y", "date");
173        assert!(result.is_err());
174    }
175
176    #[test]
177    fn test_leap_year_handling() {
178        // Test February 29th in leap year (2025)
179        let result = DateHandler::validate_and_canonicalize("2024-02-29", "%Y%m%d", "date");
180        assert!(result.is_ok());
181        assert_eq!(result.unwrap(), "20240229");
182
183        // Test February 29th in non-leap year (2023)
184        let result = DateHandler::validate_and_canonicalize("2023-02-29", "%Y%m%d", "date");
185        assert!(result.is_err());
186    }
187
188    #[test]
189    fn test_date_boundary_conditions() {
190        // Test year boundaries
191        let result = DateHandler::validate_and_canonicalize("1999-12-31", "%Y%m%d", "date");
192        assert!(result.is_ok());
193        assert_eq!(result.unwrap(), "19991231");
194
195        let result = DateHandler::validate_and_canonicalize("2000-01-01", "%Y%m%d", "date");
196        assert!(result.is_ok());
197        assert_eq!(result.unwrap(), "20000101");
198
199        // Test month boundaries
200        let result = DateHandler::validate_and_canonicalize("2025-01-31", "%Y%m%d", "date");
201        assert!(result.is_ok());
202
203        let result = DateHandler::validate_and_canonicalize("2025-02-01", "%Y%m%d", "date");
204        assert!(result.is_ok());
205    }
206
207    #[test]
208    fn test_invalid_dates_comprehensive() {
209        // Invalid month
210        let result = DateHandler::validate_and_canonicalize("2025-13-01", "%Y%m%d", "date");
211        assert!(result.is_err());
212
213        // Invalid day
214        let result = DateHandler::validate_and_canonicalize("2025-01-32", "%Y%m%d", "date");
215        assert!(result.is_err());
216
217        // February 30th
218        let result = DateHandler::validate_and_canonicalize("2025-02-30", "%Y%m%d", "date");
219        assert!(result.is_err());
220
221        // April 31st (April has only 30 days)
222        let result = DateHandler::validate_and_canonicalize("2025-04-31", "%Y%m%d", "date");
223        assert!(result.is_err());
224    }
225
226    #[test]
227    fn test_day_of_year_edge_cases() {
228        // Day 1 of year
229        assert_eq!(
230            DateHandler::validate_and_canonicalize("2025-001", "%Y%m%d", "date").unwrap(),
231            "20250101"
232        );
233        // Day 365 of non-leap year
234        assert_eq!(
235            DateHandler::validate_and_canonicalize("2025-365", "%Y%m%d", "date").unwrap(),
236            "20251231"
237        );
238        // Day 366 of leap year
239        assert_eq!(
240            DateHandler::validate_and_canonicalize("2024-366", "%Y%m%d", "date").unwrap(),
241            "20241231"
242        );
243        // Day 366 of non-leap year should fail
244        assert!(DateHandler::validate_and_canonicalize("2025-366", "%Y%m%d", "date").is_err());
245        // Day 0 should fail
246        assert!(DateHandler::validate_and_canonicalize("2025-000", "%Y%m%d", "date").is_err());
247    }
248
249    #[test]
250    fn test_format_consistency() {
251        // Same date in different formats should produce same canonical result
252        let iso_result =
253            DateHandler::validate_and_canonicalize("2025-12-25", "%Y%m%d", "date").unwrap();
254        let compact_result =
255            DateHandler::validate_and_canonicalize("20251225", "%Y%m%d", "date").unwrap();
256        let doy_result =
257            DateHandler::validate_and_canonicalize("2025-359", "%Y%m%d", "date").unwrap();
258
259        assert_eq!(iso_result, compact_result);
260        assert_eq!(compact_result, doy_result);
261        assert_eq!(iso_result, "20251225");
262    }
263
264    #[test]
265    fn test_malformed_input_formats() {
266        let malformed_inputs = [
267            "2025/12/25",          // wrong separator (slash)
268            "2025.12.25",          // wrong separator (dot)
269            "2025",                // incomplete (year only)
270            "2025-12-25T00:00:00", // extra time part
271            "25-12-2025",          // wrong order
272            "2025-13-01",          // invalid month (13)
273            "2025-02-30",          // invalid date (Feb 30)
274            "not-a-date",          // completely invalid
275            "",                    // empty string
276            "2025-",               // incomplete with separator
277            "abc-def-ghi",         // non-numeric
278        ];
279
280        for input in malformed_inputs {
281            let result = DateHandler::validate_and_canonicalize(input, "%Y%m%d", "date");
282            assert!(
283                result.is_err(),
284                "Should fail for input: '{}', but got: {:?}",
285                input,
286                result
287            );
288        }
289    }
290}