json_archive/
event_deserialize.rs

1// json-archive is a tool for tracking JSON file changes over time
2// Copyright (C) 2025  Peoples Grocers LLC
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published
6// by the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16//
17// To purchase a license under different terms contact admin@peoplesgrocers.com
18// To request changes, report bugs, or give user feedback contact
19// marxism@peoplesgrocers.com
20//
21
22//! Event deserialization with diagnostic collection.
23//!
24//! ## Why this exists
25//!
26//! The .json.archive format uses arrays for events because that's compact and easy to work
27//! with in JavaScript: `["add", "/path", value, "obs-id"]`. The format is human-editable
28//! since people might want to experiment with it or fix issues by hand.
29//!
30//! Two problems in Rust:
31//!
32//! 1. **Array-based format**: Serde derive expects named struct fields. Deserializing from
33//!    positional arrays into structs requires custom Visitor implementation.
34//!
35//! 2. **Detailed error messages**: Goal is Elm-style diagnostics that show exactly what went
36//!    wrong, what was expected, and how to fix it. Serde's Deserialize trait only allows
37//!    returning string errors. To generate detailed diagnostics (with codes, severity levels,
38//!    advice), we need to manually implement the Visitor and collect errors in a wrapper type
39//!    instead of failing immediately. The wrapper gives us access to which field is being
40//!    parsed so we can say "expected observation ID at position 3" instead of "parse error".
41//!
42//! ## Library search
43//!
44//! Spent 30 minutes looking for existing solutions. Checked:
45//! - serde_path_to_error: Adds field path context but still returns string errors
46//! - figment: Configuration library, but sounded like could be used only for diagnostics
47//! - config/serde_value: Similar issue
48//! - json5: Relaxed JSON syntax, not diagnostic-focused
49//! - miette: a diagnostic library for Rust. It includes a series of
50//! traits/protocols that allow you to hook into its error reporting facilities,
51//! and even write your own error reports. This is better than my home built
52//! Diagnostic struct, but does not help me with deserialization.
53//!
54//! Found no library that handles both array deserialization and rich diagnostic collection.
55//! This could probably be automated or turned into a library, but for a simple format it was
56//! faster to implement by hand. Also serves as exploration of what diagnostic-driven parsing
57//! costs in terms of code.
58//!
59//! ## What this does
60//!
61//! EventDeserializer wraps Event and collects diagnostics during parsing. It implements
62//! Deserialize with a custom Visitor that validates each array position and populates the
63//! diagnostics vec instead of returning errors. The calling code (reader.rs) attaches
64//! location information (filename, line number) after deserialization.
65
66use chrono::{DateTime, Utc};
67use serde::de::{Deserialize, Deserializer, SeqAccess, Visitor};
68use serde_json::Value;
69use std::fmt;
70
71use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
72use crate::events::Event;
73
74#[derive(Debug, Default)]
75pub struct EventDeserializer {
76    pub event: Option<Event>,
77    pub diagnostics: Vec<Diagnostic>,
78}
79
80impl EventDeserializer {
81    pub fn new() -> Self {
82        Self::default()
83    }
84
85    fn add_diagnostic(&mut self, level: DiagnosticLevel, code: DiagnosticCode, message: String) {
86        self.diagnostics.push(Diagnostic::new(level, code, message));
87    }
88}
89
90impl<'de> Deserialize<'de> for EventDeserializer {
91    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
92    where
93        D: Deserializer<'de>,
94    {
95        deserializer.deserialize_seq(EventVisitor::new())
96    }
97}
98
99struct EventVisitor {
100    deserializer: EventDeserializer,
101}
102
103impl EventVisitor {
104    fn new() -> Self {
105        Self {
106            deserializer: EventDeserializer::new(),
107        }
108    }
109}
110
111impl<'de> Visitor<'de> for EventVisitor {
112    type Value = EventDeserializer;
113
114    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
115        formatter.write_str("an array representing an event")
116    }
117
118    fn visit_seq<A>(mut self, mut seq: A) -> Result<Self::Value, A::Error>
119    where
120        A: SeqAccess<'de>,
121    {
122        let mut elements: Vec<Value> = Vec::new();
123
124        while let Some(elem) = seq.next_element::<Value>()? {
125            elements.push(elem);
126        }
127
128        if elements.is_empty() {
129            self.deserializer.add_diagnostic(
130                DiagnosticLevel::Fatal,
131                DiagnosticCode::WrongFieldCount,
132                "I found an empty array, but events must have at least a string type field as first element.".to_string(),
133            );
134            return Ok(self.deserializer);
135        }
136
137        let event_type = match elements[0].as_str() {
138            Some(t) => t,
139            None => {
140                self.deserializer.add_diagnostic(
141                    DiagnosticLevel::Fatal,
142                    DiagnosticCode::WrongFieldType,
143                    "I expected the first element of an event to be a string event type."
144                        .to_string(),
145                );
146                return Ok(self.deserializer);
147            }
148        };
149
150        match event_type {
151            "observe" => {
152                if elements.len() != 4 {
153                    self.deserializer.add_diagnostic(
154                        DiagnosticLevel::Fatal,
155                        DiagnosticCode::WrongFieldCount,
156                        format!(
157                            "I expected an observe event to have 4 fields, but found {}.",
158                            elements.len()
159                        ),
160                    );
161                    return Ok(self.deserializer);
162                }
163
164                let id = match elements[1].as_str() {
165                    Some(s) => s.to_string(),
166                    None => {
167                        self.deserializer.add_diagnostic(
168                            DiagnosticLevel::Fatal,
169                            DiagnosticCode::WrongFieldType,
170                            "I expected the observation ID to be a string.".to_string(),
171                        );
172                        return Ok(self.deserializer);
173                    }
174                };
175
176                let timestamp = match elements[2].as_str() {
177                    Some(s) => match s.parse::<DateTime<Utc>>() {
178                        Ok(dt) => dt,
179                        Err(_) => {
180                            self.deserializer.add_diagnostic(
181                                DiagnosticLevel::Fatal,
182                                DiagnosticCode::WrongFieldType,
183                                "I expected the timestamp to be a valid ISO-8601 datetime string."
184                                    .to_string(),
185                            );
186                            return Ok(self.deserializer);
187                        }
188                    },
189                    None => {
190                        self.deserializer.add_diagnostic(
191                            DiagnosticLevel::Fatal,
192                            DiagnosticCode::WrongFieldType,
193                            "I expected the timestamp to be a string.".to_string(),
194                        );
195                        return Ok(self.deserializer);
196                    }
197                };
198
199                let change_count = match elements[3].as_u64() {
200                    Some(n) => n as usize,
201                    None => {
202                        self.deserializer.add_diagnostic(
203                            DiagnosticLevel::Fatal,
204                            DiagnosticCode::WrongFieldType,
205                            "I expected the change count to be a non-negative integer.".to_string(),
206                        );
207                        return Ok(self.deserializer);
208                    }
209                };
210
211                self.deserializer.event = Some(Event::Observe {
212                    observation_id: id,
213                    timestamp,
214                    change_count,
215                });
216            }
217
218            "add" => {
219                if elements.len() != 4 {
220                    self.deserializer.add_diagnostic(
221                        DiagnosticLevel::Fatal,
222                        DiagnosticCode::WrongFieldCount,
223                        format!(
224                            "I expected an add event to have 4 fields, but found {}.",
225                            elements.len()
226                        ),
227                    );
228                    return Ok(self.deserializer);
229                }
230
231                let path = match elements[1].as_str() {
232                    Some(s) => s.to_string(),
233                    None => {
234                        self.deserializer.add_diagnostic(
235                            DiagnosticLevel::Fatal,
236                            DiagnosticCode::WrongFieldType,
237                            "I expected the path to be a string.".to_string(),
238                        );
239                        return Ok(self.deserializer);
240                    }
241                };
242
243                let value = elements[2].clone();
244
245                let observation_id = match elements[3].as_str() {
246                    Some(s) => s.to_string(),
247                    None => {
248                        self.deserializer.add_diagnostic(
249                            DiagnosticLevel::Fatal,
250                            DiagnosticCode::WrongFieldType,
251                            "I expected the observation ID to be a string.".to_string(),
252                        );
253                        return Ok(self.deserializer);
254                    }
255                };
256
257                self.deserializer.event = Some(Event::Add {
258                    path,
259                    value,
260                    observation_id,
261                });
262            }
263
264            "change" => {
265                if elements.len() != 4 {
266                    self.deserializer.add_diagnostic(
267                        DiagnosticLevel::Fatal,
268                        DiagnosticCode::WrongFieldCount,
269                        format!(
270                            "I expected a change event to have 4 fields, but found {}.",
271                            elements.len()
272                        ),
273                    );
274                    return Ok(self.deserializer);
275                }
276
277                let path = match elements[1].as_str() {
278                    Some(s) => s.to_string(),
279                    None => {
280                        self.deserializer.add_diagnostic(
281                            DiagnosticLevel::Fatal,
282                            DiagnosticCode::WrongFieldType,
283                            "I expected the path to be a string.".to_string(),
284                        );
285                        return Ok(self.deserializer);
286                    }
287                };
288
289                let new_value = elements[2].clone();
290
291                let observation_id = match elements[3].as_str() {
292                    Some(s) => s.to_string(),
293                    None => {
294                        self.deserializer.add_diagnostic(
295                            DiagnosticLevel::Fatal,
296                            DiagnosticCode::WrongFieldType,
297                            "I expected the observation ID to be a string.".to_string(),
298                        );
299                        return Ok(self.deserializer);
300                    }
301                };
302
303                self.deserializer.event = Some(Event::Change {
304                    path,
305                    new_value,
306                    observation_id,
307                });
308            }
309
310            "remove" => {
311                if elements.len() != 3 {
312                    self.deserializer.add_diagnostic(
313                        DiagnosticLevel::Fatal,
314                        DiagnosticCode::WrongFieldCount,
315                        format!(
316                            "I expected a remove event to have 3 fields, but found {}.",
317                            elements.len()
318                        ),
319                    );
320                    return Ok(self.deserializer);
321                }
322
323                let path = match elements[1].as_str() {
324                    Some(s) => s.to_string(),
325                    None => {
326                        self.deserializer.add_diagnostic(
327                            DiagnosticLevel::Fatal,
328                            DiagnosticCode::WrongFieldType,
329                            "I expected the path to be a string.".to_string(),
330                        );
331                        return Ok(self.deserializer);
332                    }
333                };
334
335                let observation_id = match elements[2].as_str() {
336                    Some(s) => s.to_string(),
337                    None => {
338                        self.deserializer.add_diagnostic(
339                            DiagnosticLevel::Fatal,
340                            DiagnosticCode::WrongFieldType,
341                            "I expected the observation ID to be a string.".to_string(),
342                        );
343                        return Ok(self.deserializer);
344                    }
345                };
346
347                self.deserializer.event = Some(Event::Remove {
348                    path,
349                    observation_id,
350                });
351            }
352
353            "move" => {
354                if elements.len() != 4 {
355                    self.deserializer.add_diagnostic(
356                        DiagnosticLevel::Fatal,
357                        DiagnosticCode::WrongFieldCount,
358                        format!(
359                            "I expected a move event to have 4 fields, but found {}.",
360                            elements.len()
361                        ),
362                    );
363                    return Ok(self.deserializer);
364                }
365
366                let path = match elements[1].as_str() {
367                    Some(s) => s.to_string(),
368                    None => {
369                        self.deserializer.add_diagnostic(
370                            DiagnosticLevel::Fatal,
371                            DiagnosticCode::WrongFieldType,
372                            "I expected the path to be a string.".to_string(),
373                        );
374                        return Ok(self.deserializer);
375                    }
376                };
377
378                let moves = match self.parse_moves(&elements[2]) {
379                    Ok(moves) => moves,
380                    Err(err_msg) => {
381                        self.deserializer.add_diagnostic(
382                            DiagnosticLevel::Fatal,
383                            DiagnosticCode::WrongFieldType,
384                            err_msg,
385                        );
386                        return Ok(self.deserializer);
387                    }
388                };
389
390                let observation_id = match elements[3].as_str() {
391                    Some(s) => s.to_string(),
392                    None => {
393                        self.deserializer.add_diagnostic(
394                            DiagnosticLevel::Fatal,
395                            DiagnosticCode::WrongFieldType,
396                            "I expected the observation ID to be a string.".to_string(),
397                        );
398                        return Ok(self.deserializer);
399                    }
400                };
401
402                self.deserializer.event = Some(Event::Move {
403                    path,
404                    moves,
405                    observation_id,
406                });
407            }
408
409            "snapshot" => {
410                if elements.len() != 4 {
411                    self.deserializer.add_diagnostic(
412                        DiagnosticLevel::Fatal,
413                        DiagnosticCode::WrongFieldCount,
414                        format!(
415                            "I expected a snapshot event to have 4 fields, but found {}.",
416                            elements.len()
417                        ),
418                    );
419                    return Ok(self.deserializer);
420                }
421
422                let observation_id = match elements[1].as_str() {
423                    Some(s) => s.to_string(),
424                    None => {
425                        self.deserializer.add_diagnostic(
426                            DiagnosticLevel::Fatal,
427                            DiagnosticCode::WrongFieldType,
428                            "I expected the observation ID to be a string.".to_string(),
429                        );
430                        return Ok(self.deserializer);
431                    }
432                };
433
434                let timestamp = match elements[2].as_str() {
435                    Some(s) => match s.parse::<DateTime<Utc>>() {
436                        Ok(dt) => dt,
437                        Err(_) => {
438                            self.deserializer.add_diagnostic(
439                                DiagnosticLevel::Fatal,
440                                DiagnosticCode::WrongFieldType,
441                                "I expected the timestamp to be a valid ISO-8601 datetime string."
442                                    .to_string(),
443                            );
444                            return Ok(self.deserializer);
445                        }
446                    },
447                    None => {
448                        self.deserializer.add_diagnostic(
449                            DiagnosticLevel::Fatal,
450                            DiagnosticCode::WrongFieldType,
451                            "I expected the timestamp to be a string.".to_string(),
452                        );
453                        return Ok(self.deserializer);
454                    }
455                };
456
457                let object = elements[3].clone();
458
459                self.deserializer.event = Some(Event::Snapshot {
460                    observation_id,
461                    timestamp,
462                    object,
463                });
464            }
465
466            _ => {
467                self.deserializer.add_diagnostic(
468                    DiagnosticLevel::Warning,
469                    DiagnosticCode::UnknownEventType,
470                    format!("I found an unknown event type: '{}'", event_type),
471                );
472            }
473        }
474
475        Ok(self.deserializer)
476    }
477}
478
479impl EventVisitor {
480    fn parse_moves(&mut self, moves_value: &Value) -> Result<Vec<(usize, usize)>, String> {
481        let moves_array = match moves_value.as_array() {
482            Some(arr) => arr,
483            None => {
484                return Err("I expected the moves to be an array of [from, to] pairs.".to_string());
485            }
486        };
487
488        let mut moves = Vec::new();
489        for move_pair in moves_array {
490            let pair = match move_pair.as_array() {
491                Some(p) if p.len() == 2 => p,
492                _ => {
493                    return Err("I expected each move to be a [from, to] pair.".to_string());
494                }
495            };
496
497            let from_idx = match pair[0].as_u64() {
498                Some(i) => i as usize,
499                None => {
500                    return Err(
501                        "I expected the 'from' index to be a non-negative integer.".to_string()
502                    );
503                }
504            };
505
506            let to_idx = match pair[1].as_u64() {
507                Some(i) => i as usize,
508                None => {
509                    return Err(
510                        "I expected the 'to' index to be a non-negative integer.".to_string()
511                    );
512                }
513            };
514
515            moves.push((from_idx, to_idx));
516        }
517
518        Ok(moves)
519    }
520}
521
522#[cfg(test)]
523mod tests {
524    use super::*;
525    use serde_json::json;
526
527    #[test]
528    fn test_deserialize_observe_event() {
529        let json = json!(["observe", "obs-1", "2025-01-01T00:00:00Z", 1]);
530        let result: Result<EventDeserializer, _> = serde_json::from_value(json);
531
532        assert!(result.is_ok());
533        let deserializer = result.unwrap();
534        assert!(deserializer.diagnostics.is_empty());
535        assert!(matches!(
536            deserializer.event,
537            Some(Event::Observe { observation_id, timestamp: _, change_count })
538            if observation_id == "obs-1" && change_count == 1
539        ));
540    }
541
542    #[test]
543    fn test_deserialize_add_event() {
544        let json = json!(["add", "/count", 42, "obs-1"]);
545        let result: Result<EventDeserializer, _> = serde_json::from_value(json);
546
547        assert!(result.is_ok());
548        let deserializer = result.unwrap();
549        assert!(deserializer.diagnostics.is_empty());
550        assert!(matches!(
551            deserializer.event,
552            Some(Event::Add { path, value, observation_id })
553            if path == "/count" && value == json!(42) && observation_id == "obs-1"
554        ));
555    }
556
557    #[test]
558    fn test_deserialize_invalid_event_type() {
559        let json = json!(["invalid", "some", "data"]);
560        let result: Result<EventDeserializer, _> = serde_json::from_value(json);
561
562        assert!(result.is_ok());
563        let deserializer = result.unwrap();
564        assert_eq!(deserializer.diagnostics.len(), 1);
565        assert_eq!(
566            deserializer.diagnostics[0].code,
567            DiagnosticCode::UnknownEventType
568        );
569        assert!(deserializer.event.is_none());
570    }
571
572    #[test]
573    fn test_deserialize_wrong_field_count() {
574        let json = json!(["observe", "obs-1"]);
575        let result: Result<EventDeserializer, _> = serde_json::from_value(json);
576
577        assert!(result.is_ok());
578        let deserializer = result.unwrap();
579        assert_eq!(deserializer.diagnostics.len(), 1);
580        assert_eq!(
581            deserializer.diagnostics[0].code,
582            DiagnosticCode::WrongFieldCount
583        );
584        assert!(deserializer.event.is_none());
585    }
586
587    #[test]
588    fn test_deserialize_move_event() {
589        let json = json!(["move", "/items", [[0, 2], [1, 0]], "obs-1"]);
590        let result: Result<EventDeserializer, _> = serde_json::from_value(json);
591
592        assert!(result.is_ok());
593        let deserializer = result.unwrap();
594        assert!(deserializer.diagnostics.is_empty());
595        assert!(matches!(
596            deserializer.event,
597            Some(Event::Move { path, moves, observation_id })
598            if path == "/items" && moves == vec![(0, 2), (1, 0)] && observation_id == "obs-1"
599        ));
600    }
601}