reasonkit/verification/
proof_ledger.rs

1//! ProofLedger - Immutable Citation Ledger
2//!
3//! Part of Protocol Delta: The Anchor
4//!
5//! Provides cryptographic binding for citations to prevent drift and ensure
6//! research claims remain verifiable over time.
7
8use chrono::{DateTime, Utc};
9use rusqlite::{params, Connection, Result as SqliteResult};
10use serde::{Deserialize, Serialize};
11use sha2::{Digest, Sha256};
12use std::path::{Path, PathBuf};
13use thiserror::Error;
14
15/// Errors that can occur during ProofLedger operations
16#[derive(Error, Debug)]
17pub enum ProofLedgerError {
18    /// Database operation failed
19    #[error("Database error: {0}")]
20    Database(#[from] rusqlite::Error),
21
22    /// Hash not found in ledger
23    #[error("Hash not found: {0}")]
24    HashNotFound(String),
25
26    /// Content drift detected
27    #[error("Content drift detected: expected {expected}, got {actual}")]
28    DriftDetected {
29        /// Expected hash
30        expected: String,
31        /// Actual hash of current content
32        actual: String,
33    },
34
35    /// IO error
36    #[error("IO error: {0}")]
37    Io(#[from] std::io::Error),
38}
39
40/// Result type for ProofLedger operations
41pub type Result<T> = std::result::Result<T, ProofLedgerError>;
42
43/// An immutable anchor representing a snapshot of content at a point in time
44#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
45pub struct Anchor {
46    /// SHA-256 hash of the content (primary key)
47    pub hash: String,
48
49    /// Source URL
50    pub url: String,
51
52    /// Timestamp when the content was anchored
53    pub timestamp: DateTime<Utc>,
54
55    /// Snippet of the content (first 200 chars)
56    pub content_snippet: String,
57
58    /// Full content (stored separately in content-addressable storage)
59    /// Only the snippet is kept in the ledger for efficiency
60    #[serde(skip)]
61    pub full_content: Option<String>,
62
63    /// Optional metadata (JSON)
64    pub metadata: Option<String>,
65}
66
67/// Verification result from drift detection
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct VerificationResult {
70    /// Whether verification passed
71    pub verified: bool,
72
73    /// Original hash from anchor
74    pub original_hash: String,
75
76    /// Current content hash
77    pub current_hash: String,
78
79    /// Human-readable message
80    pub message: String,
81
82    /// Original anchor data
83    pub anchor: Anchor,
84}
85
86/// The ProofLedger - manages immutable citations
87pub struct ProofLedger {
88    /// SQLite connection for the ledger
89    conn: Connection,
90
91    /// Path to the ledger database
92    ledger_path: PathBuf,
93}
94
95impl ProofLedger {
96    /// Create a new ProofLedger with the specified ledger path
97    ///
98    /// # Arguments
99    ///
100    /// * `ledger_path` - Path to the SQLite database file
101    ///
102    /// # Examples
103    ///
104    /// ```no_run
105    /// use reasonkit::verification::ProofLedger;
106    ///
107    /// let ledger = ProofLedger::new("./proof_ledger.db")?;
108    /// # Ok::<(), reasonkit::verification::ProofLedgerError>(())
109    /// ```
110    pub fn new<P: AsRef<Path>>(ledger_path: P) -> Result<Self> {
111        let path = ledger_path.as_ref().to_path_buf();
112        let conn = Connection::open(&path)?;
113
114        // Create tables if they don't exist
115        conn.execute(
116            "CREATE TABLE IF NOT EXISTS anchors (
117                hash TEXT PRIMARY KEY,
118                url TEXT NOT NULL,
119                timestamp TEXT NOT NULL,
120                content_snippet TEXT NOT NULL,
121                metadata TEXT
122            )",
123            [],
124        )?;
125
126        // Create index on URL for faster lookups
127        conn.execute(
128            "CREATE INDEX IF NOT EXISTS idx_anchors_url ON anchors(url)",
129            [],
130        )?;
131
132        // Create index on timestamp for temporal queries
133        conn.execute(
134            "CREATE INDEX IF NOT EXISTS idx_anchors_timestamp ON anchors(timestamp)",
135            [],
136        )?;
137
138        Ok(Self {
139            conn,
140            ledger_path: path,
141        })
142    }
143
144    /// Create an in-memory ProofLedger (for testing)
145    ///
146    /// # Examples
147    ///
148    /// ```no_run
149    /// use reasonkit::verification::ProofLedger;
150    ///
151    /// let ledger = ProofLedger::in_memory()?;
152    /// # Ok::<(), reasonkit::verification::ProofLedgerError>(())
153    /// ```
154    pub fn in_memory() -> Result<Self> {
155        let conn = Connection::open_in_memory()?;
156
157        conn.execute(
158            "CREATE TABLE anchors (
159                hash TEXT PRIMARY KEY,
160                url TEXT NOT NULL,
161                timestamp TEXT NOT NULL,
162                content_snippet TEXT NOT NULL,
163                metadata TEXT
164            )",
165            [],
166        )?;
167
168        conn.execute("CREATE INDEX idx_anchors_url ON anchors(url)", [])?;
169
170        conn.execute(
171            "CREATE INDEX idx_anchors_timestamp ON anchors(timestamp)",
172            [],
173        )?;
174
175        Ok(Self {
176            conn,
177            ledger_path: PathBuf::from(":memory:"),
178        })
179    }
180
181    /// Compute SHA-256 hash of content
182    fn compute_hash(content: &str) -> String {
183        let mut hasher = Sha256::new();
184        hasher.update(content.as_bytes());
185        format!("{:x}", hasher.finalize())
186    }
187
188    /// Anchor content to the ledger
189    ///
190    /// Creates an immutable anchor for the given content and URL.
191    /// Returns the hash ID which can be used for citations.
192    ///
193    /// # Arguments
194    ///
195    /// * `content` - The full content to anchor
196    /// * `url` - Source URL
197    /// * `metadata` - Optional JSON metadata
198    ///
199    /// # Examples
200    ///
201    /// ```no_run
202    /// use reasonkit::verification::ProofLedger;
203    ///
204    /// let ledger = ProofLedger::in_memory()?;
205    /// let hash = ledger.anchor(
206    ///     "The global AI market size was valued at USD 196.63 billion in 2023.",
207    ///     "https://example.com/ai-market",
208    ///     None,
209    /// )?;
210    /// println!("Citation hash: {}", hash);
211    /// # Ok::<(), reasonkit::verification::ProofLedgerError>(())
212    /// ```
213    pub fn anchor(&self, content: &str, url: &str, metadata: Option<String>) -> Result<String> {
214        let hash = Self::compute_hash(content);
215        let timestamp = Utc::now();
216
217        // Create snippet (first 200 chars)
218        let snippet = if content.len() > 200 {
219            format!("{}...", &content[..200])
220        } else {
221            content.to_string()
222        };
223
224        // Try to insert; ignore if already exists
225        let result = self.conn.execute(
226            "INSERT OR IGNORE INTO anchors (hash, url, timestamp, content_snippet, metadata)
227             VALUES (?1, ?2, ?3, ?4, ?5)",
228            params![hash, url, timestamp.to_rfc3339(), snippet, metadata],
229        );
230
231        match result {
232            Ok(rows) if rows > 0 => {
233                tracing::info!("Anchored new proof: {}... -> {}", &hash[..8], url);
234            }
235            Ok(_) => {
236                tracing::debug!("Existing anchor found: {}...", &hash[..8]);
237            }
238            Err(e) => return Err(ProofLedgerError::Database(e)),
239        }
240
241        Ok(hash)
242    }
243
244    /// Retrieve an anchor by hash
245    ///
246    /// # Arguments
247    ///
248    /// * `hash` - The SHA-256 hash to look up
249    ///
250    /// # Examples
251    ///
252    /// ```no_run
253    /// use reasonkit::verification::ProofLedger;
254    ///
255    /// let ledger = ProofLedger::in_memory()?;
256    /// let hash = ledger.anchor("test content", "https://example.com", None)?;
257    /// let anchor = ledger.get_anchor(&hash)?;
258    /// assert_eq!(anchor.url, "https://example.com");
259    /// # Ok::<(), reasonkit::verification::ProofLedgerError>(())
260    /// ```
261    pub fn get_anchor(&self, hash: &str) -> Result<Anchor> {
262        let mut stmt = self.conn.prepare(
263            "SELECT hash, url, timestamp, content_snippet, metadata
264             FROM anchors WHERE hash = ?1",
265        )?;
266
267        let anchor = stmt.query_row(params![hash], |row| {
268            Ok(Anchor {
269                hash: row.get(0)?,
270                url: row.get(1)?,
271                timestamp: DateTime::parse_from_rfc3339(&row.get::<_, String>(2)?)
272                    .map(|dt| dt.with_timezone(&Utc))
273                    .map_err(|_| rusqlite::Error::InvalidQuery)?,
274                content_snippet: row.get(3)?,
275                full_content: None,
276                metadata: row.get(4)?,
277            })
278        })?;
279
280        Ok(anchor)
281    }
282
283    /// Verify current content against anchored hash
284    ///
285    /// Detects if the content has drifted from the original anchored version.
286    ///
287    /// # Arguments
288    ///
289    /// * `hash` - The original hash from the citation
290    /// * `current_content` - The current content to verify
291    ///
292    /// # Examples
293    ///
294    /// ```no_run
295    /// use reasonkit::verification::ProofLedger;
296    ///
297    /// let ledger = ProofLedger::in_memory()?;
298    /// let original = "Original content";
299    /// let hash = ledger.anchor(original, "https://example.com", None)?;
300    ///
301    /// // Verify with same content
302    /// let result = ledger.verify(&hash, original)?;
303    /// assert!(result.verified);
304    ///
305    /// // Verify with different content (drift)
306    /// let result = ledger.verify(&hash, "Modified content")?;
307    /// assert!(!result.verified);
308    /// # Ok::<(), reasonkit::verification::ProofLedgerError>(())
309    /// ```
310    pub fn verify(&self, hash: &str, current_content: &str) -> Result<VerificationResult> {
311        // Get the original anchor
312        let anchor = self.get_anchor(hash)?;
313
314        // Compute hash of current content
315        let current_hash = Self::compute_hash(current_content);
316
317        if current_hash == hash {
318            Ok(VerificationResult {
319                verified: true,
320                original_hash: hash.to_string(),
321                current_hash,
322                message: "VERIFIED: Content matches original anchor".to_string(),
323                anchor,
324            })
325        } else {
326            Ok(VerificationResult {
327                verified: false,
328                original_hash: hash.to_string(),
329                current_hash: current_hash.clone(),
330                message: format!(
331                    "DRIFT DETECTED: Expected {}..., got {}...",
332                    &hash[..8],
333                    &current_hash[..8]
334                ),
335                anchor,
336            })
337        }
338    }
339
340    /// Check for content drift by re-fetching and verifying
341    ///
342    /// This is a higher-level function that would typically:
343    /// 1. Re-fetch content from the URL
344    /// 2. Verify against the anchor
345    /// 3. Return drift status
346    ///
347    /// Note: This function requires external fetch capability.
348    /// For now, it just verifies the provided content.
349    ///
350    /// # Arguments
351    ///
352    /// * `hash` - The original hash
353    /// * `refetched_content` - Content re-fetched from the source
354    pub fn check_drift(&self, hash: &str, refetched_content: &str) -> Result<VerificationResult> {
355        self.verify(hash, refetched_content)
356    }
357
358    /// List all anchors for a given URL
359    ///
360    /// Useful for finding all citations from a particular source.
361    ///
362    /// # Arguments
363    ///
364    /// * `url` - The URL to search for
365    pub fn list_by_url(&self, url: &str) -> Result<Vec<Anchor>> {
366        let mut stmt = self.conn.prepare(
367            "SELECT hash, url, timestamp, content_snippet, metadata
368             FROM anchors WHERE url = ?1
369             ORDER BY timestamp DESC",
370        )?;
371
372        let anchors = stmt
373            .query_map(params![url], |row| {
374                Ok(Anchor {
375                    hash: row.get(0)?,
376                    url: row.get(1)?,
377                    timestamp: DateTime::parse_from_rfc3339(&row.get::<_, String>(2)?)
378                        .map(|dt| dt.with_timezone(&Utc))
379                        .map_err(|_| rusqlite::Error::InvalidQuery)?,
380                    content_snippet: row.get(3)?,
381                    full_content: None,
382                    metadata: row.get(4)?,
383                })
384            })?
385            .collect::<SqliteResult<Vec<_>>>()?;
386
387        Ok(anchors)
388    }
389
390    /// Count total anchors in the ledger
391    pub fn count(&self) -> Result<i64> {
392        let count: i64 = self
393            .conn
394            .query_row("SELECT COUNT(*) FROM anchors", [], |row| row.get(0))?;
395        Ok(count)
396    }
397
398    /// Get the ledger database path
399    pub fn ledger_path(&self) -> &Path {
400        &self.ledger_path
401    }
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407
408    #[test]
409    fn test_hash_computation() {
410        let content = "The global AI market size was valued at USD 196.63 billion in 2023.";
411        let hash = ProofLedger::compute_hash(content);
412
413        // Hash should be 64 hex characters (SHA-256)
414        assert_eq!(hash.len(), 64);
415
416        // Same content should produce same hash
417        let hash2 = ProofLedger::compute_hash(content);
418        assert_eq!(hash, hash2);
419
420        // Different content should produce different hash
421        let hash3 = ProofLedger::compute_hash("Different content");
422        assert_ne!(hash, hash3);
423    }
424
425    #[test]
426    fn test_anchor_creation() {
427        let ledger = ProofLedger::in_memory().unwrap();
428        let content = "Test content for anchoring";
429        let url = "https://example.com/test";
430
431        let hash = ledger.anchor(content, url, None).unwrap();
432
433        // Hash should be 64 hex characters
434        assert_eq!(hash.len(), 64);
435
436        // Should be able to retrieve the anchor
437        let anchor = ledger.get_anchor(&hash).unwrap();
438        assert_eq!(anchor.url, url);
439        assert!(anchor.content_snippet.contains("Test content"));
440    }
441
442    #[test]
443    fn test_duplicate_anchor() {
444        let ledger = ProofLedger::in_memory().unwrap();
445        let content = "Duplicate test";
446        let url = "https://example.com";
447
448        let hash1 = ledger.anchor(content, url, None).unwrap();
449        let hash2 = ledger.anchor(content, url, None).unwrap();
450
451        // Same content should produce same hash
452        assert_eq!(hash1, hash2);
453
454        // Should only have one entry
455        assert_eq!(ledger.count().unwrap(), 1);
456    }
457
458    #[test]
459    fn test_verification_success() {
460        let ledger = ProofLedger::in_memory().unwrap();
461        let content = "Original immutable content";
462        let hash = ledger.anchor(content, "https://example.com", None).unwrap();
463
464        let result = ledger.verify(&hash, content).unwrap();
465
466        assert!(result.verified);
467        assert_eq!(result.original_hash, result.current_hash);
468        assert!(result.message.contains("VERIFIED"));
469    }
470
471    #[test]
472    fn test_drift_detection() {
473        let ledger = ProofLedger::in_memory().unwrap();
474        let original = "Original content";
475        let modified = "Modified content";
476
477        let hash = ledger
478            .anchor(original, "https://example.com", None)
479            .unwrap();
480
481        let result = ledger.verify(&hash, modified).unwrap();
482
483        assert!(!result.verified);
484        assert_ne!(result.original_hash, result.current_hash);
485        assert!(result.message.contains("DRIFT DETECTED"));
486    }
487
488    #[test]
489    fn test_list_by_url() {
490        let ledger = ProofLedger::in_memory().unwrap();
491        let url = "https://example.com/article";
492
493        ledger.anchor("Content 1", url, None).unwrap();
494        ledger.anchor("Content 2", url, None).unwrap();
495        ledger
496            .anchor("Content 3", "https://different.com", None)
497            .unwrap();
498
499        let anchors = ledger.list_by_url(url).unwrap();
500
501        // Should have 2 anchors from the target URL
502        assert_eq!(anchors.len(), 2);
503
504        // All should have the correct URL
505        assert!(anchors.iter().all(|a| a.url == url));
506    }
507
508    #[test]
509    fn test_metadata_storage() {
510        let ledger = ProofLedger::in_memory().unwrap();
511        let metadata = r#"{"type": "market_stat", "confidence": 0.95}"#.to_string();
512
513        let hash = ledger
514            .anchor(
515                "Content with metadata",
516                "https://example.com",
517                Some(metadata.clone()),
518            )
519            .unwrap();
520
521        let anchor = ledger.get_anchor(&hash).unwrap();
522        assert_eq!(anchor.metadata, Some(metadata));
523    }
524
525    #[test]
526    fn test_snippet_truncation() {
527        let ledger = ProofLedger::in_memory().unwrap();
528        let long_content = "A".repeat(300);
529
530        let hash = ledger
531            .anchor(&long_content, "https://example.com", None)
532            .unwrap();
533        let anchor = ledger.get_anchor(&hash).unwrap();
534
535        // Snippet should be truncated to ~203 chars (200 + "...")
536        assert!(anchor.content_snippet.len() <= 204);
537        assert!(anchor.content_snippet.ends_with("..."));
538    }
539
540    #[test]
541    fn test_hash_not_found() {
542        let ledger = ProofLedger::in_memory().unwrap();
543        let fake_hash = "0".repeat(64);
544
545        let result = ledger.get_anchor(&fake_hash);
546        assert!(result.is_err());
547    }
548}