json_archive/
event_deserialize.rs

1// json-archive is a tool for tracking JSON file changes over time
2// Copyright (C) 2025  Peoples Grocers LLC
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published
6// by the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16//
17// To purchase a license under different terms contact admin@peoplesgrocers.com
18// To request changes, report bugs, or give user feedback contact
19// marxism@peoplesgrocers.com
20//
21
22//! Event deserialization with diagnostic collection.
23//!
24//! ## Why this exists
25//!
26//! The .json.archive format uses arrays for events because that's compact and easy to work
27//! with in JavaScript: `["add", "/path", value, "obs-id"]`. The format is human-editable
28//! since people might want to experiment with it or fix issues by hand.
29//!
30//! Two problems in Rust:
31//!
32//! 1. **Array-based format**: Serde derive expects named struct fields. Deserializing from
33//!    positional arrays into structs requires custom Visitor implementation.
34//!
35//! 2. **Detailed error messages**: Goal is Elm-style diagnostics that show exactly what went
36//!    wrong, what was expected, and how to fix it. Serde's Deserialize trait only allows
37//!    returning string errors. To generate detailed diagnostics (with codes, severity levels,
38//!    advice), we need to manually implement the Visitor and collect errors in a wrapper type
39//!    instead of failing immediately. The wrapper gives us access to which field is being
40//!    parsed so we can say "expected observation ID at position 3" instead of "parse error".
41//!
42//! ## Library search
43//!
44//! Spent 30 minutes looking for existing solutions. Checked:
45//! - serde_path_to_error: Adds field path context but still returns string errors
46//! - figment: Configuration library, but sounded like could be used only for diagnostics 
47//! - config/serde_value: Similar issue
48//! - json5: Relaxed JSON syntax, not diagnostic-focused
49//! - miette: a diagnostic library for Rust. It includes a series of
50//! traits/protocols that allow you to hook into its error reporting facilities,
51//! and even write your own error reports. This is better than my home built
52//! Diagnostic struct, but does not help me with deserialization.
53//!
54//! Found no library that handles both array deserialization and rich diagnostic collection.
55//! This could probably be automated or turned into a library, but for a simple format it was
56//! faster to implement by hand. Also serves as exploration of what diagnostic-driven parsing
57//! costs in terms of code.
58//!
59//! ## What this does
60//!
61//! EventDeserializer wraps Event and collects diagnostics during parsing. It implements
62//! Deserialize with a custom Visitor that validates each array position and populates the
63//! diagnostics vec instead of returning errors. The calling code (reader.rs) attaches
64//! location information (filename, line number) after deserialization.
65
66use serde::de::{Deserialize, Deserializer, SeqAccess, Visitor};
67use serde_json::Value;
68use std::fmt;
69use chrono::{DateTime, Utc};
70
71use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
72use crate::events::Event;
73
74#[derive(Debug, Default)]
75pub struct EventDeserializer {
76    pub event: Option<Event>,
77    pub diagnostics: Vec<Diagnostic>,
78}
79
80impl EventDeserializer {
81    pub fn new() -> Self {
82        Self::default()
83    }
84
85    fn add_diagnostic(&mut self, level: DiagnosticLevel, code: DiagnosticCode, message: String) {
86        self.diagnostics.push(Diagnostic::new(level, code, message));
87    }
88}
89
90impl<'de> Deserialize<'de> for EventDeserializer {
91    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
92    where
93        D: Deserializer<'de>,
94    {
95        deserializer.deserialize_seq(EventVisitor::new())
96    }
97}
98
99struct EventVisitor {
100    deserializer: EventDeserializer,
101}
102
103impl EventVisitor {
104    fn new() -> Self {
105        Self {
106            deserializer: EventDeserializer::new(),
107        }
108    }
109}
110
111impl<'de> Visitor<'de> for EventVisitor {
112    type Value = EventDeserializer;
113
114    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
115        formatter.write_str("an array representing an event")
116    }
117
118    fn visit_seq<A>(mut self, mut seq: A) -> Result<Self::Value, A::Error>
119    where
120        A: SeqAccess<'de>,
121    {
122        let mut elements: Vec<Value> = Vec::new();
123        
124        while let Some(elem) = seq.next_element::<Value>()? {
125            elements.push(elem);
126        }
127
128        if elements.is_empty() {
129            self.deserializer.add_diagnostic(
130                DiagnosticLevel::Fatal,
131                DiagnosticCode::WrongFieldCount,
132                "I found an empty array, but events must have at least a string type field as first element.".to_string(),
133            );
134            return Ok(self.deserializer);
135        }
136
137        let event_type = match elements[0].as_str() {
138            Some(t) => t,
139            None => {
140                self.deserializer.add_diagnostic(
141                    DiagnosticLevel::Fatal,
142                    DiagnosticCode::WrongFieldType,
143                    "I expected the first element of an event to be a string event type.".to_string(),
144                );
145                return Ok(self.deserializer);
146            }
147        };
148
149        match event_type {
150            "observe" => {
151                if elements.len() != 4 {
152                    self.deserializer.add_diagnostic(
153                        DiagnosticLevel::Fatal,
154                        DiagnosticCode::WrongFieldCount,
155                        format!("I expected an observe event to have 4 fields, but found {}.", elements.len()),
156                    );
157                    return Ok(self.deserializer);
158                }
159
160                let id = match elements[1].as_str() {
161                    Some(s) => s.to_string(),
162                    None => {
163                        self.deserializer.add_diagnostic(
164                            DiagnosticLevel::Fatal,
165                            DiagnosticCode::WrongFieldType,
166                            "I expected the observation ID to be a string.".to_string(),
167                        );
168                        return Ok(self.deserializer);
169                    }
170                };
171
172                let timestamp = match elements[2].as_str() {
173                    Some(s) => match s.parse::<DateTime<Utc>>() {
174                        Ok(dt) => dt,
175                        Err(_) => {
176                            self.deserializer.add_diagnostic(
177                                DiagnosticLevel::Fatal,
178                                DiagnosticCode::WrongFieldType,
179                                "I expected the timestamp to be a valid ISO-8601 datetime string.".to_string(),
180                            );
181                            return Ok(self.deserializer);
182                        }
183                    },
184                    None => {
185                        self.deserializer.add_diagnostic(
186                            DiagnosticLevel::Fatal,
187                            DiagnosticCode::WrongFieldType,
188                            "I expected the timestamp to be a string.".to_string(),
189                        );
190                        return Ok(self.deserializer);
191                    }
192                };
193
194                let change_count = match elements[3].as_u64() {
195                    Some(n) => n as usize,
196                    None => {
197                        self.deserializer.add_diagnostic(
198                            DiagnosticLevel::Fatal,
199                            DiagnosticCode::WrongFieldType,
200                            "I expected the change count to be a non-negative integer.".to_string(),
201                        );
202                        return Ok(self.deserializer);
203                    }
204                };
205
206                self.deserializer.event = Some(Event::Observe {
207                    observation_id: id,
208                    timestamp,
209                    change_count,
210                });
211            }
212
213            "add" => {
214                if elements.len() != 4 {
215                    self.deserializer.add_diagnostic(
216                        DiagnosticLevel::Fatal,
217                        DiagnosticCode::WrongFieldCount,
218                        format!("I expected an add event to have 4 fields, but found {}.", elements.len()),
219                    );
220                    return Ok(self.deserializer);
221                }
222
223                let path = match elements[1].as_str() {
224                    Some(s) => s.to_string(),
225                    None => {
226                        self.deserializer.add_diagnostic(
227                            DiagnosticLevel::Fatal,
228                            DiagnosticCode::WrongFieldType,
229                            "I expected the path to be a string.".to_string(),
230                        );
231                        return Ok(self.deserializer);
232                    }
233                };
234
235                let value = elements[2].clone();
236
237                let observation_id = match elements[3].as_str() {
238                    Some(s) => s.to_string(),
239                    None => {
240                        self.deserializer.add_diagnostic(
241                            DiagnosticLevel::Fatal,
242                            DiagnosticCode::WrongFieldType,
243                            "I expected the observation ID to be a string.".to_string(),
244                        );
245                        return Ok(self.deserializer);
246                    }
247                };
248
249                self.deserializer.event = Some(Event::Add {
250                    path,
251                    value,
252                    observation_id,
253                });
254            }
255
256            "change" => {
257                if elements.len() != 4 {
258                    self.deserializer.add_diagnostic(
259                        DiagnosticLevel::Fatal,
260                        DiagnosticCode::WrongFieldCount,
261                        format!("I expected a change event to have 4 fields, but found {}.", elements.len()),
262                    );
263                    return Ok(self.deserializer);
264                }
265
266                let path = match elements[1].as_str() {
267                    Some(s) => s.to_string(),
268                    None => {
269                        self.deserializer.add_diagnostic(
270                            DiagnosticLevel::Fatal,
271                            DiagnosticCode::WrongFieldType,
272                            "I expected the path to be a string.".to_string(),
273                        );
274                        return Ok(self.deserializer);
275                    }
276                };
277
278                let new_value = elements[2].clone();
279
280                let observation_id = match elements[3].as_str() {
281                    Some(s) => s.to_string(),
282                    None => {
283                        self.deserializer.add_diagnostic(
284                            DiagnosticLevel::Fatal,
285                            DiagnosticCode::WrongFieldType,
286                            "I expected the observation ID to be a string.".to_string(),
287                        );
288                        return Ok(self.deserializer);
289                    }
290                };
291
292                self.deserializer.event = Some(Event::Change {
293                    path,
294                    new_value,
295                    observation_id,
296                });
297            }
298
299            "remove" => {
300                if elements.len() != 3 {
301                    self.deserializer.add_diagnostic(
302                        DiagnosticLevel::Fatal,
303                        DiagnosticCode::WrongFieldCount,
304                        format!("I expected a remove event to have 3 fields, but found {}.", elements.len()),
305                    );
306                    return Ok(self.deserializer);
307                }
308
309                let path = match elements[1].as_str() {
310                    Some(s) => s.to_string(),
311                    None => {
312                        self.deserializer.add_diagnostic(
313                            DiagnosticLevel::Fatal,
314                            DiagnosticCode::WrongFieldType,
315                            "I expected the path to be a string.".to_string(),
316                        );
317                        return Ok(self.deserializer);
318                    }
319                };
320
321                let observation_id = match elements[2].as_str() {
322                    Some(s) => s.to_string(),
323                    None => {
324                        self.deserializer.add_diagnostic(
325                            DiagnosticLevel::Fatal,
326                            DiagnosticCode::WrongFieldType,
327                            "I expected the observation ID to be a string.".to_string(),
328                        );
329                        return Ok(self.deserializer);
330                    }
331                };
332
333                self.deserializer.event = Some(Event::Remove {
334                    path,
335                    observation_id,
336                });
337            }
338
339            "move" => {
340                if elements.len() != 4 {
341                    self.deserializer.add_diagnostic(
342                        DiagnosticLevel::Fatal,
343                        DiagnosticCode::WrongFieldCount,
344                        format!("I expected a move event to have 4 fields, but found {}.", elements.len()),
345                    );
346                    return Ok(self.deserializer);
347                }
348
349                let path = match elements[1].as_str() {
350                    Some(s) => s.to_string(),
351                    None => {
352                        self.deserializer.add_diagnostic(
353                            DiagnosticLevel::Fatal,
354                            DiagnosticCode::WrongFieldType,
355                            "I expected the path to be a string.".to_string(),
356                        );
357                        return Ok(self.deserializer);
358                    }
359                };
360
361                let moves = match self.parse_moves(&elements[2]) {
362                    Ok(moves) => moves,
363                    Err(err_msg) => {
364                        self.deserializer.add_diagnostic(
365                            DiagnosticLevel::Fatal,
366                            DiagnosticCode::WrongFieldType,
367                            err_msg,
368                        );
369                        return Ok(self.deserializer);
370                    }
371                };
372
373                let observation_id = match elements[3].as_str() {
374                    Some(s) => s.to_string(),
375                    None => {
376                        self.deserializer.add_diagnostic(
377                            DiagnosticLevel::Fatal,
378                            DiagnosticCode::WrongFieldType,
379                            "I expected the observation ID to be a string.".to_string(),
380                        );
381                        return Ok(self.deserializer);
382                    }
383                };
384
385                self.deserializer.event = Some(Event::Move {
386                    path,
387                    moves,
388                    observation_id,
389                });
390            }
391
392            "snapshot" => {
393                if elements.len() != 4 {
394                    self.deserializer.add_diagnostic(
395                        DiagnosticLevel::Fatal,
396                        DiagnosticCode::WrongFieldCount,
397                        format!("I expected a snapshot event to have 4 fields, but found {}.", elements.len()),
398                    );
399                    return Ok(self.deserializer);
400                }
401
402                let observation_id = match elements[1].as_str() {
403                    Some(s) => s.to_string(),
404                    None => {
405                        self.deserializer.add_diagnostic(
406                            DiagnosticLevel::Fatal,
407                            DiagnosticCode::WrongFieldType,
408                            "I expected the observation ID to be a string.".to_string(),
409                        );
410                        return Ok(self.deserializer);
411                    }
412                };
413
414                let timestamp = match elements[2].as_str() {
415                    Some(s) => match s.parse::<DateTime<Utc>>() {
416                        Ok(dt) => dt,
417                        Err(_) => {
418                            self.deserializer.add_diagnostic(
419                                DiagnosticLevel::Fatal,
420                                DiagnosticCode::WrongFieldType,
421                                "I expected the timestamp to be a valid ISO-8601 datetime string.".to_string(),
422                            );
423                            return Ok(self.deserializer);
424                        }
425                    },
426                    None => {
427                        self.deserializer.add_diagnostic(
428                            DiagnosticLevel::Fatal,
429                            DiagnosticCode::WrongFieldType,
430                            "I expected the timestamp to be a string.".to_string(),
431                        );
432                        return Ok(self.deserializer);
433                    }
434                };
435
436                let object = elements[3].clone();
437
438                self.deserializer.event = Some(Event::Snapshot {
439                    observation_id,
440                    timestamp,
441                    object,
442                });
443            }
444
445            _ => {
446                self.deserializer.add_diagnostic(
447                    DiagnosticLevel::Warning,
448                    DiagnosticCode::UnknownEventType,
449                    format!("I found an unknown event type: '{}'", event_type),
450                );
451            }
452        }
453
454        Ok(self.deserializer)
455    }
456}
457
458impl EventVisitor {
459    fn parse_moves(&mut self, moves_value: &Value) -> Result<Vec<(usize, usize)>, String> {
460        let moves_array = match moves_value.as_array() {
461            Some(arr) => arr,
462            None => {
463                return Err("I expected the moves to be an array of [from, to] pairs.".to_string());
464            }
465        };
466
467        let mut moves = Vec::new();
468        for move_pair in moves_array {
469            let pair = match move_pair.as_array() {
470                Some(p) if p.len() == 2 => p,
471                _ => {
472                    return Err("I expected each move to be a [from, to] pair.".to_string());
473                }
474            };
475
476            let from_idx = match pair[0].as_u64() {
477                Some(i) => i as usize,
478                None => {
479                    return Err("I expected the 'from' index to be a non-negative integer.".to_string());
480                }
481            };
482
483            let to_idx = match pair[1].as_u64() {
484                Some(i) => i as usize,
485                None => {
486                    return Err("I expected the 'to' index to be a non-negative integer.".to_string());
487                }
488            };
489
490            moves.push((from_idx, to_idx));
491        }
492
493        Ok(moves)
494    }
495}
496
497#[cfg(test)]
498mod tests {
499    use super::*;
500    use serde_json::json;
501
502    #[test]
503    fn test_deserialize_observe_event() {
504        let json = json!(["observe", "obs-1", "2025-01-01T00:00:00Z", 1]);
505        let result: Result<EventDeserializer, _> = serde_json::from_value(json);
506        
507        assert!(result.is_ok());
508        let deserializer = result.unwrap();
509        assert!(deserializer.diagnostics.is_empty());
510        assert!(matches!(
511            deserializer.event,
512            Some(Event::Observe { observation_id, timestamp: _, change_count })
513            if observation_id == "obs-1" && change_count == 1
514        ));
515    }
516
517    #[test]
518    fn test_deserialize_add_event() {
519        let json = json!(["add", "/count", 42, "obs-1"]);
520        let result: Result<EventDeserializer, _> = serde_json::from_value(json);
521        
522        assert!(result.is_ok());
523        let deserializer = result.unwrap();
524        assert!(deserializer.diagnostics.is_empty());
525        assert!(matches!(
526            deserializer.event,
527            Some(Event::Add { path, value, observation_id })
528            if path == "/count" && value == json!(42) && observation_id == "obs-1"
529        ));
530    }
531
532    #[test]
533    fn test_deserialize_invalid_event_type() {
534        let json = json!(["invalid", "some", "data"]);
535        let result: Result<EventDeserializer, _> = serde_json::from_value(json);
536        
537        assert!(result.is_ok());
538        let deserializer = result.unwrap();
539        assert_eq!(deserializer.diagnostics.len(), 1);
540        assert_eq!(deserializer.diagnostics[0].code, DiagnosticCode::UnknownEventType);
541        assert!(deserializer.event.is_none());
542    }
543
544    #[test]
545    fn test_deserialize_wrong_field_count() {
546        let json = json!(["observe", "obs-1"]);
547        let result: Result<EventDeserializer, _> = serde_json::from_value(json);
548        
549        assert!(result.is_ok());
550        let deserializer = result.unwrap();
551        assert_eq!(deserializer.diagnostics.len(), 1);
552        assert_eq!(deserializer.diagnostics[0].code, DiagnosticCode::WrongFieldCount);
553        assert!(deserializer.event.is_none());
554    }
555
556    #[test]
557    fn test_deserialize_move_event() {
558        let json = json!(["move", "/items", [[0, 2], [1, 0]], "obs-1"]);
559        let result: Result<EventDeserializer, _> = serde_json::from_value(json);
560        
561        assert!(result.is_ok());
562        let deserializer = result.unwrap();
563        assert!(deserializer.diagnostics.is_empty());
564        assert!(matches!(
565            deserializer.event,
566            Some(Event::Move { path, moves, observation_id })
567            if path == "/items" && moves == vec![(0, 2), (1, 0)] && observation_id == "obs-1"
568        ));
569    }
570}