Skip to main content

cdx_core/provenance/
record.rs

1//! Provenance record for complete document history tracking.
2//!
3//! The provenance record (`provenance/record.json`) stores comprehensive
4//! provenance information including lineage, timestamps, and derivation history.
5
6use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8
9use crate::manifest::Lineage;
10use crate::{DocumentId, HashAlgorithm};
11
12/// Complete provenance record for a document.
13///
14/// This structure is stored at `provenance/record.json` and provides:
15/// - Document identity and creation information
16/// - Full lineage chain
17/// - Merkle tree information for content integrity
18/// - Timestamp records for temporal anchoring
19/// - Derivation records for tracking content sources
20#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
21#[serde(rename_all = "camelCase")]
22pub struct ProvenanceRecord {
23    /// Version of the provenance record format.
24    pub version: String,
25
26    /// Document identifier.
27    pub document_id: DocumentId,
28
29    /// When the document was created.
30    pub created: DateTime<Utc>,
31
32    /// Information about the document creator.
33    #[serde(default, skip_serializing_if = "Option::is_none")]
34    pub creator: Option<CreatorInfo>,
35
36    /// Lineage information.
37    #[serde(default, skip_serializing_if = "Option::is_none")]
38    pub lineage: Option<Lineage>,
39
40    /// Merkle tree information.
41    pub merkle: MerkleInfo,
42
43    /// Timestamp records for temporal anchoring.
44    #[serde(default, skip_serializing_if = "Vec::is_empty")]
45    pub timestamps: Vec<TimestampRecord>,
46
47    /// Records of content derived from other sources.
48    #[serde(default, skip_serializing_if = "Vec::is_empty")]
49    pub derived_from: Vec<DerivationRecord>,
50}
51
52impl ProvenanceRecord {
53    /// Current version of the provenance record format.
54    pub const VERSION: &'static str = "0.1";
55
56    /// Create a new provenance record.
57    #[must_use]
58    pub fn new(document_id: DocumentId, merkle: MerkleInfo) -> Self {
59        Self {
60            version: Self::VERSION.to_string(),
61            document_id,
62            created: Utc::now(),
63            creator: None,
64            lineage: None,
65            merkle,
66            timestamps: Vec::new(),
67            derived_from: Vec::new(),
68        }
69    }
70
71    /// Set the creator information.
72    #[must_use]
73    pub fn with_creator(mut self, creator: CreatorInfo) -> Self {
74        self.creator = Some(creator);
75        self
76    }
77
78    /// Set the lineage information.
79    #[must_use]
80    pub fn with_lineage(mut self, lineage: Lineage) -> Self {
81        self.lineage = Some(lineage);
82        self
83    }
84
85    /// Add a timestamp record.
86    #[must_use]
87    pub fn with_timestamp(mut self, timestamp: TimestampRecord) -> Self {
88        self.timestamps.push(timestamp);
89        self
90    }
91
92    /// Add a derivation record.
93    #[must_use]
94    pub fn with_derivation(mut self, derivation: DerivationRecord) -> Self {
95        self.derived_from.push(derivation);
96        self
97    }
98
99    /// Serialize to JSON.
100    ///
101    /// # Errors
102    ///
103    /// Returns an error if serialization fails.
104    pub fn to_json(&self) -> crate::Result<String> {
105        serde_json::to_string_pretty(self).map_err(Into::into)
106    }
107
108    /// Deserialize from JSON.
109    ///
110    /// # Errors
111    ///
112    /// Returns an error if deserialization fails.
113    pub fn from_json(json: &str) -> crate::Result<Self> {
114        serde_json::from_str(json).map_err(Into::into)
115    }
116}
117
118/// Information about the document creator.
119#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
120#[serde(rename_all = "camelCase")]
121pub struct CreatorInfo {
122    /// Creator's name.
123    pub name: String,
124
125    /// Creator's email address.
126    #[serde(default, skip_serializing_if = "Option::is_none")]
127    pub email: Option<String>,
128
129    /// Creator's organization.
130    #[serde(default, skip_serializing_if = "Option::is_none")]
131    pub organization: Option<String>,
132
133    /// URI identifying the creator.
134    #[serde(default, skip_serializing_if = "Option::is_none")]
135    pub uri: Option<String>,
136}
137
138impl CreatorInfo {
139    /// Create new creator info with just a name.
140    #[must_use]
141    pub fn new(name: impl Into<String>) -> Self {
142        Self {
143            name: name.into(),
144            email: None,
145            organization: None,
146            uri: None,
147        }
148    }
149
150    /// Set the email address.
151    #[must_use]
152    pub fn with_email(mut self, email: impl Into<String>) -> Self {
153        self.email = Some(email.into());
154        self
155    }
156
157    /// Set the organization.
158    #[must_use]
159    pub fn with_organization(mut self, org: impl Into<String>) -> Self {
160        self.organization = Some(org.into());
161        self
162    }
163
164    /// Set the URI.
165    #[must_use]
166    pub fn with_uri(mut self, uri: impl Into<String>) -> Self {
167        self.uri = Some(uri.into());
168        self
169    }
170}
171
172/// Information about the Merkle tree structure.
173#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
174#[serde(rename_all = "camelCase")]
175pub struct MerkleInfo {
176    /// Merkle root hash.
177    pub root: DocumentId,
178
179    /// Number of content blocks.
180    pub block_count: usize,
181
182    /// Hash algorithm used.
183    pub algorithm: HashAlgorithm,
184}
185
186impl MerkleInfo {
187    /// Create new Merkle info.
188    #[must_use]
189    pub fn new(root: DocumentId, block_count: usize, algorithm: HashAlgorithm) -> Self {
190        Self {
191            root,
192            block_count,
193            algorithm,
194        }
195    }
196}
197
198/// Record of a timestamp anchoring.
199#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
200#[serde(rename_all = "camelCase")]
201pub struct TimestampRecord {
202    /// Timestamp method used.
203    pub method: TimestampMethod,
204
205    /// Name or URL of the timestamp authority.
206    pub authority: String,
207
208    /// Time recorded by the authority.
209    pub time: DateTime<Utc>,
210
211    /// Base64-encoded timestamp token or proof.
212    pub token: String,
213
214    /// Transaction ID or reference (for blockchain anchors).
215    #[serde(default, skip_serializing_if = "Option::is_none")]
216    pub transaction_id: Option<String>,
217}
218
219impl TimestampRecord {
220    /// Create a new RFC 3161 timestamp record.
221    #[must_use]
222    pub fn rfc3161(
223        authority: impl Into<String>,
224        time: DateTime<Utc>,
225        token: impl Into<String>,
226    ) -> Self {
227        Self {
228            method: TimestampMethod::Rfc3161,
229            authority: authority.into(),
230            time,
231            token: token.into(),
232            transaction_id: None,
233        }
234    }
235
236    /// Create a new Bitcoin timestamp record.
237    #[must_use]
238    pub fn bitcoin(
239        time: DateTime<Utc>,
240        token: impl Into<String>,
241        tx_id: impl Into<String>,
242    ) -> Self {
243        Self {
244            method: TimestampMethod::Bitcoin,
245            authority: "Bitcoin Mainnet".to_string(),
246            time,
247            token: token.into(),
248            transaction_id: Some(tx_id.into()),
249        }
250    }
251
252    /// Create a new Ethereum timestamp record.
253    #[must_use]
254    pub fn ethereum(
255        time: DateTime<Utc>,
256        token: impl Into<String>,
257        tx_id: impl Into<String>,
258    ) -> Self {
259        Self {
260            method: TimestampMethod::Ethereum,
261            authority: "Ethereum Mainnet".to_string(),
262            time,
263            token: token.into(),
264            transaction_id: Some(tx_id.into()),
265        }
266    }
267
268    /// Create a new `OpenTimestamps` record.
269    #[must_use]
270    pub fn open_timestamps(time: DateTime<Utc>, token: impl Into<String>) -> Self {
271        Self {
272            method: TimestampMethod::OpenTimestamps,
273            authority: "OpenTimestamps".to_string(),
274            time,
275            token: token.into(),
276            transaction_id: None,
277        }
278    }
279
280    /// Verify that this timestamp matches a document ID.
281    ///
282    /// Note: This only checks the token format, not cryptographic validity.
283    /// Full verification requires the timestamp authority's certificate or blockchain access.
284    #[must_use]
285    pub fn matches_document(&self, _document_id: &DocumentId) -> bool {
286        // Basic validation - token should be non-empty
287        !self.token.is_empty()
288    }
289}
290
291/// Method used for timestamp anchoring.
292#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, strum::Display)]
293#[serde(rename_all = "lowercase")]
294pub enum TimestampMethod {
295    /// RFC 3161 Time Stamp Protocol.
296    #[strum(serialize = "RFC 3161")]
297    Rfc3161,
298    /// Bitcoin blockchain anchoring.
299    Bitcoin,
300    /// Ethereum blockchain anchoring.
301    Ethereum,
302    /// `OpenTimestamps` protocol.
303    OpenTimestamps,
304}
305
306/// Record of content derived from another source.
307#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
308#[serde(rename_all = "camelCase")]
309pub struct DerivationRecord {
310    /// Source document or resource identifier.
311    pub source: String,
312
313    /// Type of derivation.
314    pub derivation_type: DerivationType,
315
316    /// Human-readable description.
317    #[serde(default, skip_serializing_if = "Option::is_none")]
318    pub description: Option<String>,
319
320    /// When the derivation occurred.
321    #[serde(default, skip_serializing_if = "Option::is_none")]
322    pub timestamp: Option<DateTime<Utc>>,
323
324    /// License under which the source was used.
325    #[serde(default, skip_serializing_if = "Option::is_none")]
326    pub license: Option<String>,
327}
328
329impl DerivationRecord {
330    /// Create a new derivation record.
331    #[must_use]
332    pub fn new(source: impl Into<String>, derivation_type: DerivationType) -> Self {
333        Self {
334            source: source.into(),
335            derivation_type,
336            description: None,
337            timestamp: None,
338            license: None,
339        }
340    }
341
342    /// Set a description.
343    #[must_use]
344    pub fn with_description(mut self, description: impl Into<String>) -> Self {
345        self.description = Some(description.into());
346        self
347    }
348
349    /// Set the timestamp.
350    #[must_use]
351    pub fn with_timestamp(mut self, timestamp: DateTime<Utc>) -> Self {
352        self.timestamp = Some(timestamp);
353        self
354    }
355
356    /// Set the license.
357    #[must_use]
358    pub fn with_license(mut self, license: impl Into<String>) -> Self {
359        self.license = Some(license.into());
360        self
361    }
362}
363
364/// Type of content derivation.
365#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, strum::Display)]
366#[serde(rename_all = "camelCase")]
367pub enum DerivationType {
368    /// Direct quotation from source.
369    Quotation,
370    /// Paraphrased or summarized content.
371    Paraphrase,
372    /// Content translated from another language.
373    Translation,
374    /// Content adapted or modified.
375    Adaptation,
376    /// Content based on or inspired by source.
377    #[strum(serialize = "Based On")]
378    BasedOn,
379    /// Content imported from external source.
380    Import,
381}
382
383#[cfg(test)]
384mod tests {
385    use super::*;
386
387    fn test_hash() -> DocumentId {
388        "sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
389            .parse()
390            .unwrap()
391    }
392
393    #[test]
394    fn test_provenance_record_creation() {
395        let merkle = MerkleInfo::new(test_hash(), 10, HashAlgorithm::Sha256);
396        let record = ProvenanceRecord::new(test_hash(), merkle);
397
398        assert_eq!(record.version, "0.1");
399        assert_eq!(record.merkle.block_count, 10);
400        assert!(record.timestamps.is_empty());
401    }
402
403    #[test]
404    fn test_provenance_record_with_creator() {
405        let merkle = MerkleInfo::new(test_hash(), 5, HashAlgorithm::Sha256);
406        let creator = CreatorInfo::new("Jane Doe")
407            .with_email("jane@example.com")
408            .with_organization("Acme Corp");
409
410        let record = ProvenanceRecord::new(test_hash(), merkle).with_creator(creator);
411
412        assert!(record.creator.is_some());
413        assert_eq!(record.creator.as_ref().unwrap().name, "Jane Doe");
414    }
415
416    #[test]
417    fn test_timestamp_record_rfc3161() {
418        let timestamp =
419            TimestampRecord::rfc3161("https://timestamp.example.com", Utc::now(), "base64token");
420
421        assert_eq!(timestamp.method, TimestampMethod::Rfc3161);
422        assert_eq!(timestamp.authority, "https://timestamp.example.com");
423    }
424
425    #[test]
426    fn test_timestamp_record_bitcoin() {
427        let timestamp = TimestampRecord::bitcoin(Utc::now(), "opreturn_data", "abc123def456");
428
429        assert_eq!(timestamp.method, TimestampMethod::Bitcoin);
430        assert!(timestamp.transaction_id.is_some());
431    }
432
433    #[test]
434    fn test_derivation_record() {
435        let derivation =
436            DerivationRecord::new("https://example.com/source", DerivationType::Quotation)
437                .with_description("Quote from chapter 3")
438                .with_license("CC-BY-4.0");
439
440        assert_eq!(derivation.derivation_type, DerivationType::Quotation);
441        assert!(derivation.description.is_some());
442    }
443
444    #[test]
445    fn test_provenance_record_serialization() {
446        let merkle = MerkleInfo::new(test_hash(), 3, HashAlgorithm::Sha256);
447        let record = ProvenanceRecord::new(test_hash(), merkle);
448
449        let json = record.to_json().unwrap();
450        assert!(json.contains("\"version\": \"0.1\""));
451        assert!(json.contains("\"blockCount\": 3"));
452
453        let deserialized = ProvenanceRecord::from_json(&json).unwrap();
454        assert_eq!(deserialized.merkle.block_count, 3);
455    }
456
457    #[test]
458    fn test_timestamp_method_display() {
459        assert_eq!(TimestampMethod::Rfc3161.to_string(), "RFC 3161");
460        assert_eq!(TimestampMethod::Bitcoin.to_string(), "Bitcoin");
461    }
462
463    #[test]
464    fn test_derivation_type_display() {
465        assert_eq!(DerivationType::Quotation.to_string(), "Quotation");
466        assert_eq!(DerivationType::Translation.to_string(), "Translation");
467    }
468}