Skip to main content

provenance_mark/
validate.rs

1use std::collections::{HashMap, HashSet};
2
3use serde::Serialize;
4
5use crate::ProvenanceMark;
6
7// Helper module for serializing ProvenanceMark as UR string
8mod provenance_mark_as_ur {
9    use bc_ur::UREncodable;
10    use serde::Serializer;
11
12    use crate::ProvenanceMark;
13
14    pub fn serialize<S>(
15        mark: &ProvenanceMark,
16        serializer: S,
17    ) -> Result<S::Ok, S::Error>
18    where
19        S: Serializer,
20    {
21        serializer.serialize_str(&mark.ur_string())
22    }
23}
24
25// Helper module for serializing Vec<ProvenanceMark> as Vec<UR string>
26mod provenance_marks_as_ur {
27    use bc_ur::UREncodable;
28    use serde::Serializer;
29
30    use crate::ProvenanceMark;
31
32    pub fn serialize<S>(
33        marks: &[ProvenanceMark],
34        serializer: S,
35    ) -> Result<S::Ok, S::Error>
36    where
37        S: Serializer,
38    {
39        use serde::ser::SerializeSeq;
40        let mut seq = serializer.serialize_seq(Some(marks.len()))?;
41        for mark in marks {
42            seq.serialize_element(&mark.ur_string())?;
43        }
44        seq.end()
45    }
46}
47
48// Helper module for serializing dcbor::Date as ISO8601 string
49mod date_as_iso8601 {
50    use serde::Serializer;
51
52    pub fn serialize<S>(
53        date: &dcbor::Date,
54        serializer: S,
55    ) -> Result<S::Ok, S::Error>
56    where
57        S: Serializer,
58    {
59        serializer.serialize_str(&date.to_string())
60    }
61}
62
63/// Format for validation report output
64#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
65pub enum ValidationReportFormat {
66    /// Human-readable text format
67    #[default]
68    Text,
69    /// Compact JSON format (no whitespace)
70    JsonCompact,
71    /// Pretty-printed JSON format (with indentation)
72    JsonPretty,
73}
74
75/// Issue flagged during validation
76#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
77#[serde(tag = "type", content = "data")]
78pub enum ValidationIssue {
79    /// Hash mismatch between consecutive marks
80    HashMismatch {
81        #[serde(with = "hex")]
82        expected: Vec<u8>,
83        #[serde(with = "hex")]
84        actual: Vec<u8>,
85    },
86    /// Key mismatch between consecutive marks
87    KeyMismatch,
88    /// Sequence number gap
89    SequenceGap { expected: u32, actual: u32 },
90    /// Date ordering violation
91    DateOrdering {
92        #[serde(serialize_with = "date_as_iso8601::serialize")]
93        previous: dcbor::Date,
94        #[serde(serialize_with = "date_as_iso8601::serialize")]
95        next: dcbor::Date,
96    },
97    /// Non-genesis mark at sequence 0
98    NonGenesisAtZero,
99    /// Invalid genesis key
100    InvalidGenesisKey,
101}
102
103impl std::fmt::Display for ValidationIssue {
104    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
105        match self {
106            ValidationIssue::HashMismatch { expected, actual } => {
107                write!(
108                    f,
109                    "hash mismatch: expected {}, got {}",
110                    hex::encode(expected),
111                    hex::encode(actual)
112                )
113            }
114            ValidationIssue::KeyMismatch => {
115                write!(
116                    f,
117                    "key mismatch: current hash was not generated from next key"
118                )
119            }
120            ValidationIssue::SequenceGap { expected, actual } => {
121                write!(
122                    f,
123                    "sequence number gap: expected {}, got {}",
124                    expected, actual
125                )
126            }
127            ValidationIssue::DateOrdering { previous, next } => {
128                write!(
129                    f,
130                    "date must be equal or later: previous is {}, next is {}",
131                    previous, next
132                )
133            }
134            ValidationIssue::NonGenesisAtZero => {
135                write!(f, "non-genesis mark at sequence 0")
136            }
137            ValidationIssue::InvalidGenesisKey => {
138                write!(f, "genesis mark must have key equal to chain_id")
139            }
140        }
141    }
142}
143
144impl std::error::Error for ValidationIssue {}
145
146/// A mark with any issues flagged during validation
147#[derive(Debug, Clone, Serialize)]
148pub struct FlaggedMark {
149    #[serde(serialize_with = "provenance_mark_as_ur::serialize")]
150    mark: ProvenanceMark,
151    issues: Vec<ValidationIssue>,
152}
153
154impl FlaggedMark {
155    fn new(mark: ProvenanceMark) -> Self { Self { mark, issues: Vec::new() } }
156
157    fn with_issue(mark: ProvenanceMark, issue: ValidationIssue) -> Self {
158        Self { mark, issues: vec![issue] }
159    }
160
161    pub fn mark(&self) -> &ProvenanceMark { &self.mark }
162    pub fn issues(&self) -> &[ValidationIssue] { &self.issues }
163}
164
165/// Report for a contiguous sequence of marks within a chain
166#[derive(Debug, Clone, Serialize)]
167pub struct SequenceReport {
168    start_seq: u32,
169    end_seq: u32,
170    marks: Vec<FlaggedMark>,
171}
172
173impl SequenceReport {
174    pub fn start_seq(&self) -> u32 { self.start_seq }
175    pub fn end_seq(&self) -> u32 { self.end_seq }
176    pub fn marks(&self) -> &[FlaggedMark] { &self.marks }
177}
178
179/// Report for a chain of marks with the same chain ID
180#[derive(Debug, Clone, Serialize)]
181pub struct ChainReport {
182    #[serde(with = "hex")]
183    chain_id: Vec<u8>,
184    has_genesis: bool,
185    #[serde(serialize_with = "provenance_marks_as_ur::serialize")]
186    marks: Vec<ProvenanceMark>,
187    sequences: Vec<SequenceReport>,
188}
189
190impl ChainReport {
191    pub fn chain_id(&self) -> &[u8] { &self.chain_id }
192    pub fn has_genesis(&self) -> bool { self.has_genesis }
193    pub fn marks(&self) -> &[ProvenanceMark] { &self.marks }
194    pub fn sequences(&self) -> &[SequenceReport] { &self.sequences }
195
196    /// Get the chain ID as a hex string for display
197    pub fn chain_id_hex(&self) -> String { hex::encode(&self.chain_id) }
198}
199
200/// Complete validation report
201#[derive(Debug, Clone, Serialize)]
202pub struct ValidationReport {
203    #[serde(serialize_with = "provenance_marks_as_ur::serialize")]
204    marks: Vec<ProvenanceMark>,
205    chains: Vec<ChainReport>,
206}
207
208impl ValidationReport {
209    pub fn marks(&self) -> &[ProvenanceMark] { &self.marks }
210    pub fn chains(&self) -> &[ChainReport] { &self.chains }
211
212    /// Format the validation report as human-readable text.
213    ///
214    /// Returns a formatted string if the report contains interesting
215    /// information (issues, multiple chains, or multiple sequences).
216    /// Returns an empty string if the report represents a single perfect chain
217    /// with no issues.
218    pub fn format(&self, format: ValidationReportFormat) -> String {
219        match format {
220            ValidationReportFormat::Text => self.format_text(),
221            ValidationReportFormat::JsonCompact => {
222                serde_json::to_string(self).unwrap_or_default()
223            }
224            ValidationReportFormat::JsonPretty => {
225                serde_json::to_string_pretty(self).unwrap_or_default()
226            }
227        }
228    }
229
230    fn format_text(&self) -> String {
231        if !self.is_interesting() {
232            return String::new();
233        }
234
235        let mut lines = Vec::new();
236
237        // Report summary
238        lines.push(format!("Total marks: {}", self.marks.len()));
239        lines.push(format!("Chains: {}", self.chains.len()));
240        lines.push(String::new());
241
242        // Report each chain
243        for (chain_idx, chain) in self.chains.iter().enumerate() {
244            // Show short chain ID (first 4 bytes)
245            let chain_id_hex = chain.chain_id_hex();
246            let short_chain_id = if chain_id_hex.len() > 8 {
247                &chain_id_hex[..8]
248            } else {
249                &chain_id_hex
250            };
251
252            lines.push(format!("Chain {}: {}", chain_idx + 1, short_chain_id));
253
254            if !chain.has_genesis() {
255                lines.push("  Warning: No genesis mark found".to_string());
256            }
257
258            // Report each sequence
259            for seq in chain.sequences() {
260                // Report each mark in the sequence
261                for flagged_mark in seq.marks() {
262                    let mark = flagged_mark.mark();
263                    let short_id = mark.identifier();
264                    let seq_num = mark.seq();
265
266                    // Build the mark line with annotations
267                    let mut annotations = Vec::new();
268
269                    // Check if it's genesis
270                    if mark.is_genesis() {
271                        annotations.push("genesis mark".to_string());
272                    }
273
274                    // Add issue annotations
275                    for issue in flagged_mark.issues() {
276                        let issue_str = match issue {
277                            ValidationIssue::SequenceGap {
278                                expected,
279                                actual: _,
280                            } => {
281                                format!("gap: {} missing", expected)
282                            }
283                            ValidationIssue::DateOrdering {
284                                previous,
285                                next,
286                            } => {
287                                format!("date {} < {}", previous, next)
288                            }
289                            ValidationIssue::HashMismatch { .. } => {
290                                "hash mismatch".to_string()
291                            }
292                            ValidationIssue::KeyMismatch => {
293                                "key mismatch".to_string()
294                            }
295                            ValidationIssue::NonGenesisAtZero => {
296                                "non-genesis at seq 0".to_string()
297                            }
298                            ValidationIssue::InvalidGenesisKey => {
299                                "invalid genesis key".to_string()
300                            }
301                        };
302                        annotations.push(issue_str);
303                    }
304
305                    // Format the line
306                    if annotations.is_empty() {
307                        lines.push(format!("  {}: {}", seq_num, short_id));
308                    } else {
309                        lines.push(format!(
310                            "  {}: {} ({})",
311                            seq_num,
312                            short_id,
313                            annotations.join(", ")
314                        ));
315                    }
316                }
317            }
318
319            lines.push(String::new());
320        }
321
322        lines.join("\n").trim_end().to_string()
323    }
324
325    /// Check if the validation report contains interesting information.
326    ///
327    /// Returns false for a single perfect chain with no issues, true otherwise.
328    fn is_interesting(&self) -> bool {
329        // Not interesting if empty
330        if self.chains.is_empty() {
331            return false;
332        }
333
334        // Check if any chain is missing genesis
335        for chain in &self.chains {
336            if !chain.has_genesis() {
337                return true;
338            }
339        }
340
341        // Not interesting if single chain with single perfect sequence
342        if self.chains.len() == 1 {
343            let chain = &self.chains[0];
344            if chain.sequences().len() == 1 {
345                let seq = &chain.sequences()[0];
346                // Check if the sequence has no issues
347                if seq.marks().iter().all(|m| m.issues().is_empty()) {
348                    return false;
349                }
350            }
351        }
352
353        true
354    }
355
356    /// Check if the validation report has any issues.
357    ///
358    /// Returns true if there are validation issues, missing genesis,
359    /// multiple chains, or multiple sequences.
360    pub fn has_issues(&self) -> bool {
361        // Missing genesis is considered an issue
362        for chain in &self.chains {
363            if !chain.has_genesis() {
364                return true;
365            }
366        }
367
368        // Check for validation issues in marks
369        for chain in &self.chains {
370            for seq in chain.sequences() {
371                for mark in seq.marks() {
372                    if !mark.issues().is_empty() {
373                        return true;
374                    }
375                }
376            }
377        }
378
379        // Multiple chains or sequences are also considered issues
380        if self.chains.len() > 1 {
381            return true;
382        }
383
384        if self.chains.len() == 1 && self.chains[0].sequences().len() > 1 {
385            return true;
386        }
387
388        false
389    }
390
391    /// Validate a collection of provenance marks
392    /// Validate a collection of provenance marks
393    pub fn validate(marks: Vec<ProvenanceMark>) -> Self {
394        // Deduplicate exact duplicates
395        let mut seen = HashSet::new();
396        let mut deduplicated_marks = Vec::new();
397        for mark in marks {
398            if seen.insert(mark.clone()) {
399                deduplicated_marks.push(mark);
400            }
401        }
402
403        // Bin marks by chain ID
404        let mut chain_bins: HashMap<Vec<u8>, Vec<ProvenanceMark>> =
405            HashMap::new();
406        for mark in &deduplicated_marks {
407            chain_bins
408                .entry(mark.chain_id().to_vec())
409                .or_default()
410                .push(mark.clone());
411        }
412
413        // Process each chain
414        let mut chains = Vec::new();
415        for (chain_id_bytes, mut chain_marks) in chain_bins {
416            // Sort by sequence number
417            chain_marks.sort_by_key(|m| m.seq());
418
419            // Check for genesis mark
420            let has_genesis = chain_marks
421                .first()
422                .is_some_and(|m| m.seq() == 0 && m.is_genesis());
423
424            // Build sequence bins
425            let sequences = Self::build_sequence_bins(&chain_marks);
426
427            chains.push(ChainReport {
428                chain_id: chain_id_bytes,
429                has_genesis,
430                marks: chain_marks,
431                sequences,
432            });
433        }
434
435        // Sort chains by chain ID for consistent output
436        chains.sort_by(|a, b| a.chain_id.cmp(&b.chain_id));
437
438        ValidationReport { marks: deduplicated_marks, chains }
439    }
440
441    fn build_sequence_bins(marks: &[ProvenanceMark]) -> Vec<SequenceReport> {
442        let mut sequences = Vec::new();
443        let mut current_sequence: Vec<FlaggedMark> = Vec::new();
444
445        for (i, mark) in marks.iter().enumerate() {
446            if i == 0 {
447                // First mark starts a sequence
448                current_sequence.push(FlaggedMark::new(mark.clone()));
449            } else {
450                let prev = &marks[i - 1];
451
452                // Check if this mark follows the previous one
453                match prev.precedes_opt(mark) {
454                    Ok(()) => {
455                        // Continues the current sequence
456                        current_sequence.push(FlaggedMark::new(mark.clone()));
457                    }
458                    Err(e) => {
459                        // Breaks the sequence - save current and start new
460                        if !current_sequence.is_empty() {
461                            sequences.push(Self::create_sequence_report(
462                                current_sequence,
463                            ));
464                        }
465
466                        // Start new sequence with this mark, flagged with the
467                        // issue
468                        let issue = match e {
469                            crate::Error::Validation(v) => v,
470                            _ => ValidationIssue::KeyMismatch, // Fallback
471                        };
472                        current_sequence =
473                            vec![FlaggedMark::with_issue(mark.clone(), issue)];
474                    }
475                }
476            }
477        }
478
479        // Add the final sequence
480        if !current_sequence.is_empty() {
481            sequences.push(Self::create_sequence_report(current_sequence));
482        }
483
484        sequences
485    }
486
487    fn create_sequence_report(marks: Vec<FlaggedMark>) -> SequenceReport {
488        let start_seq = marks.first().map(|m| m.mark.seq()).unwrap_or(0);
489        let end_seq = marks.last().map(|m| m.mark.seq()).unwrap_or(0);
490
491        SequenceReport { start_seq, end_seq, marks }
492    }
493}
494
495impl ProvenanceMark {
496    /// Validate a collection of provenance marks
497    ///
498    /// This method analyzes the provided marks and produces a comprehensive
499    /// validation report that includes:
500    /// - Deduplication of exact duplicates
501    /// - Organization by chain ID
502    /// - Detection of genesis marks
503    /// - Identification of contiguous sequences
504    /// - Flagging of validation issues (hash mismatches, sequence gaps, etc.)
505    pub fn validate(marks: Vec<ProvenanceMark>) -> ValidationReport {
506        ValidationReport::validate(marks)
507    }
508}