Skip to main content

datasynth_core/
serde_timestamp.rs

1//! Normalized timestamp serialization.
2//!
3//! Truncates all timestamps to microsecond precision and normalizes to UTC
4//! with a `Z` suffix. This ensures consistent timestamp formats across all
5//! output files, preventing pandas "Mixed timezones detected" errors.
6//!
7//! ## Modules
8//!
9//! - [`utc`] — for `DateTime<Utc>` fields
10//! - [`utc::option`] — for `Option<DateTime<Utc>>` fields
11//! - [`naive`] — for `NaiveDateTime` fields (serializes with Z suffix)
12//! - [`naive::option`] — for `Option<NaiveDateTime>` fields
13
14use std::fmt;
15
16use std::io::Write;
17
18use chrono::{DateTime, Datelike, NaiveDateTime, Timelike, Utc};
19use serde::{self, Deserializer, Serializer};
20
21/// Format a NaiveDateTime as `YYYY-MM-DDTHH:MM:SS[.ffffff]Z` into a stack buffer.
22/// Truncates to microsecond precision. Returns the formatted length.
23fn format_normalized(dt: NaiveDateTime, buf: &mut [u8; 32]) -> usize {
24    let micros = (dt.nanosecond() / 1_000) % 1_000_000;
25    let mut cursor = std::io::Cursor::new(&mut buf[..]);
26    if micros > 0 {
27        let _ = write!(
28            cursor,
29            "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:06}Z",
30            dt.year(),
31            dt.month(),
32            dt.day(),
33            dt.hour(),
34            dt.minute(),
35            dt.second(),
36            micros,
37        );
38    } else {
39        let _ = write!(
40            cursor,
41            "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
42            dt.year(),
43            dt.month(),
44            dt.day(),
45            dt.hour(),
46            dt.minute(),
47            dt.second(),
48        );
49    }
50    cursor.position() as usize
51}
52
53/// Serialize a NaiveDateTime as a normalized UTC timestamp string.
54fn serialize_normalized<S: Serializer>(
55    dt: NaiveDateTime,
56    serializer: S,
57) -> Result<S::Ok, S::Error> {
58    let mut buf = [0u8; 32];
59    let len = format_normalized(dt, &mut buf);
60    let s = std::str::from_utf8(&buf[..len]).expect("timestamp is always ASCII");
61    serializer.serialize_str(s)
62}
63
64// =========================================================================
65// utc — for DateTime<Utc> fields
66// =========================================================================
67
68/// Normalized serialization for `DateTime<Utc>` fields.
69///
70/// ```ignore
71/// #[serde(with = "datasynth_core::serde_timestamp::utc")]
72/// pub created_at: DateTime<Utc>,
73/// ```
74pub mod utc {
75    use super::*;
76
77    pub fn serialize<S: Serializer>(
78        value: &DateTime<Utc>,
79        serializer: S,
80    ) -> Result<S::Ok, S::Error> {
81        serialize_normalized(value.naive_utc(), serializer)
82    }
83
84    pub fn deserialize<'de, D: Deserializer<'de>>(
85        deserializer: D,
86    ) -> Result<DateTime<Utc>, D::Error> {
87        deserializer.deserialize_any(UtcVisitor)
88    }
89
90    /// Normalized serialization for `Option<DateTime<Utc>>` fields.
91    pub mod option {
92        use super::*;
93
94        pub fn serialize<S: Serializer>(
95            value: &Option<DateTime<Utc>>,
96            serializer: S,
97        ) -> Result<S::Ok, S::Error> {
98            match value {
99                Some(dt) => serialize_normalized(dt.naive_utc(), serializer),
100                None => serializer.serialize_none(),
101            }
102        }
103
104        pub fn deserialize<'de, D: Deserializer<'de>>(
105            deserializer: D,
106        ) -> Result<Option<DateTime<Utc>>, D::Error> {
107            deserializer.deserialize_any(OptionUtcVisitor)
108        }
109    }
110}
111
112// =========================================================================
113// naive — for NaiveDateTime fields (serialized with Z suffix)
114// =========================================================================
115
116/// Normalized serialization for `NaiveDateTime` fields.
117///
118/// Appends Z suffix so all timestamps are in a uniform UTC-like format,
119/// preventing "Mixed timezones detected" errors in pandas.
120///
121/// ```ignore
122/// #[serde(with = "datasynth_core::serde_timestamp::naive")]
123/// pub entry_timestamp: NaiveDateTime,
124/// ```
125pub mod naive {
126    use super::*;
127
128    pub fn serialize<S: Serializer>(
129        value: &NaiveDateTime,
130        serializer: S,
131    ) -> Result<S::Ok, S::Error> {
132        serialize_normalized(*value, serializer)
133    }
134
135    pub fn deserialize<'de, D: Deserializer<'de>>(
136        deserializer: D,
137    ) -> Result<NaiveDateTime, D::Error> {
138        deserializer.deserialize_any(NaiveVisitor)
139    }
140
141    /// Normalized serialization for `Option<NaiveDateTime>` fields.
142    pub mod option {
143        use super::*;
144
145        pub fn serialize<S: Serializer>(
146            value: &Option<NaiveDateTime>,
147            serializer: S,
148        ) -> Result<S::Ok, S::Error> {
149            match value {
150                Some(dt) => serialize_normalized(*dt, serializer),
151                None => serializer.serialize_none(),
152            }
153        }
154
155        pub fn deserialize<'de, D: Deserializer<'de>>(
156            deserializer: D,
157        ) -> Result<Option<NaiveDateTime>, D::Error> {
158            deserializer.deserialize_any(OptionNaiveVisitor)
159        }
160    }
161}
162
163// =========================================================================
164// Visitors
165// =========================================================================
166
167struct UtcVisitor;
168
169impl<'de> serde::de::Visitor<'de> for UtcVisitor {
170    type Value = DateTime<Utc>;
171
172    fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
173        write!(f, "an RFC 3339 timestamp string")
174    }
175
176    fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<DateTime<Utc>, E> {
177        // Accept both "...Z" and naive formats (treat as UTC)
178        if let Ok(dt) = DateTime::parse_from_rfc3339(v) {
179            return Ok(dt.with_timezone(&Utc));
180        }
181        if let Ok(ndt) = NaiveDateTime::parse_from_str(v, "%Y-%m-%dT%H:%M:%S%.fZ") {
182            return Ok(ndt.and_utc());
183        }
184        if let Ok(ndt) = NaiveDateTime::parse_from_str(v, "%Y-%m-%dT%H:%M:%S%.f") {
185            return Ok(ndt.and_utc());
186        }
187        if let Ok(ndt) = NaiveDateTime::parse_from_str(v, "%Y-%m-%dT%H:%M:%S") {
188            return Ok(ndt.and_utc());
189        }
190        Err(E::custom(format!("invalid timestamp: {v}")))
191    }
192}
193
194struct OptionUtcVisitor;
195
196impl<'de> serde::de::Visitor<'de> for OptionUtcVisitor {
197    type Value = Option<DateTime<Utc>>;
198
199    fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
200        write!(f, "an RFC 3339 timestamp string or null")
201    }
202
203    fn visit_none<E: serde::de::Error>(self) -> Result<Option<DateTime<Utc>>, E> {
204        Ok(None)
205    }
206
207    fn visit_unit<E: serde::de::Error>(self) -> Result<Option<DateTime<Utc>>, E> {
208        Ok(None)
209    }
210
211    fn visit_some<D: Deserializer<'de>>(
212        self,
213        deserializer: D,
214    ) -> Result<Option<DateTime<Utc>>, D::Error> {
215        deserializer.deserialize_any(UtcVisitor).map(Some)
216    }
217
218    fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<Option<DateTime<Utc>>, E> {
219        UtcVisitor.visit_str(v).map(Some)
220    }
221}
222
223struct NaiveVisitor;
224
225impl<'de> serde::de::Visitor<'de> for NaiveVisitor {
226    type Value = NaiveDateTime;
227
228    fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
229        write!(f, "a datetime string")
230    }
231
232    fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<NaiveDateTime, E> {
233        // Strip trailing Z if present (we store as NaiveDateTime)
234        let s = v.trim_end_matches('Z');
235        if let Ok(ndt) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f") {
236            return Ok(ndt);
237        }
238        if let Ok(ndt) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
239            return Ok(ndt);
240        }
241        Err(E::custom(format!("invalid datetime: {v}")))
242    }
243}
244
245struct OptionNaiveVisitor;
246
247impl<'de> serde::de::Visitor<'de> for OptionNaiveVisitor {
248    type Value = Option<NaiveDateTime>;
249
250    fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
251        write!(f, "a datetime string or null")
252    }
253
254    fn visit_none<E: serde::de::Error>(self) -> Result<Option<NaiveDateTime>, E> {
255        Ok(None)
256    }
257
258    fn visit_unit<E: serde::de::Error>(self) -> Result<Option<NaiveDateTime>, E> {
259        Ok(None)
260    }
261
262    fn visit_some<D: Deserializer<'de>>(
263        self,
264        deserializer: D,
265    ) -> Result<Option<NaiveDateTime>, D::Error> {
266        deserializer.deserialize_any(NaiveVisitor).map(Some)
267    }
268
269    fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<Option<NaiveDateTime>, E> {
270        NaiveVisitor.visit_str(v).map(Some)
271    }
272}
273
274#[cfg(test)]
275mod tests {
276    use chrono::NaiveDate;
277
278    use super::*;
279
280    #[derive(serde::Serialize, serde::Deserialize, Debug)]
281    struct TestUtc {
282        #[serde(with = "super::utc")]
283        ts: DateTime<Utc>,
284        #[serde(default, with = "super::utc::option")]
285        opt_ts: Option<DateTime<Utc>>,
286    }
287
288    #[derive(serde::Serialize, serde::Deserialize, Debug)]
289    struct TestNaive {
290        #[serde(with = "super::naive")]
291        ts: NaiveDateTime,
292        #[serde(default, with = "super::naive::option")]
293        opt_ts: Option<NaiveDateTime>,
294    }
295
296    #[test]
297    fn test_utc_truncates_nanoseconds() {
298        let dt = NaiveDate::from_ymd_opt(2024, 1, 15)
299            .unwrap()
300            .and_hms_nano_opt(8, 30, 45, 123_456_789)
301            .unwrap()
302            .and_utc();
303        let s = TestUtc {
304            ts: dt,
305            opt_ts: Some(dt),
306        };
307        let json = serde_json::to_string(&s).unwrap();
308        // Should truncate to microseconds: 123456, not 123456789
309        assert!(json.contains(".123456Z"), "got: {json}");
310        assert!(!json.contains("789"), "nanoseconds leaked: {json}");
311    }
312
313    #[test]
314    fn test_utc_omits_zero_micros() {
315        let dt = NaiveDate::from_ymd_opt(2024, 1, 15)
316            .unwrap()
317            .and_hms_opt(8, 30, 45)
318            .unwrap()
319            .and_utc();
320        let s = TestUtc {
321            ts: dt,
322            opt_ts: None,
323        };
324        let json = serde_json::to_string(&s).unwrap();
325        assert!(json.contains("08:30:45Z"), "got: {json}");
326        assert!(!json.contains(".000"), "unnecessary decimals: {json}");
327    }
328
329    #[test]
330    fn test_naive_gets_z_suffix() {
331        let dt = NaiveDate::from_ymd_opt(2024, 3, 10)
332            .unwrap()
333            .and_hms_nano_opt(14, 0, 0, 500_000_000)
334            .unwrap();
335        let s = TestNaive {
336            ts: dt,
337            opt_ts: Some(dt),
338        };
339        let json = serde_json::to_string(&s).unwrap();
340        // NaiveDateTime should also get Z suffix
341        assert!(json.contains(".500000Z"), "got: {json}");
342    }
343
344    #[test]
345    fn test_deserialize_both_formats() {
346        // With Z
347        let json = r#"{"ts":"2024-01-15T08:30:45.123456Z"}"#;
348        let v: TestUtc = serde_json::from_str(json).unwrap();
349        assert_eq!(v.ts.nanosecond(), 123_456_000);
350
351        // Without Z (accept as UTC)
352        let json = r#"{"ts":"2024-01-15T08:30:45.123456"}"#;
353        let v: TestUtc = serde_json::from_str(json).unwrap();
354        assert_eq!(v.ts.nanosecond(), 123_456_000);
355    }
356
357    #[test]
358    fn test_naive_deserialize_strips_z() {
359        let json = r#"{"ts":"2024-01-15T08:30:45.123456Z"}"#;
360        let v: TestNaive = serde_json::from_str(json).unwrap();
361        assert_eq!(v.ts.nanosecond(), 123_456_000);
362    }
363
364    #[test]
365    fn test_option_null() {
366        let json = r#"{"ts":"2024-01-15T08:30:45Z","opt_ts":null}"#;
367        let v: TestUtc = serde_json::from_str(json).unwrap();
368        assert!(v.opt_ts.is_none());
369    }
370}