Skip to main content

quipu_core/
checkpoint.rs

1//! Signed integrity checkpoints.
2//!
3//! The Merkle spine proves that retained records were not edited in place and
4//! that the history is append-only. It cannot, on its own, expose a *full
5//! rewrite*: an insider with disk access can delete the spine and every segment
6//! and replay a self-consistent tree from scratch, and nothing inside the store
7//! remains to contradict it. A checkpoint pins (segment seq, record count, tree
8//! size, Merkle root) under an RSA signature in a separate append-only file, and
9//! the anchor hook ([`crate::StoreConfig::anchor`]) lets the operator export
10//! each checkpoint to an external trust domain the insider cannot reach (another
11//! host, a ticket system, a transparency log). A rewritten tree cannot reproduce
12//! the checkpointed root without the original records, and a forged checkpoint
13//! cannot be signed without the private key. Because the root commits to the
14//! whole history, a verifier can also prove — with a consistency proof against
15//! the current spine — that the checkpointed state is a genuine prefix of the
16//! present one.
17
18use crate::crypto::{self, KeyRing};
19use crate::error::{Error, Result};
20use crate::merkle::Hash;
21use serde::{Deserialize, Serialize};
22use std::io::Write;
23use std::path::{Path, PathBuf};
24
25const FILE_NAME: &str = "checkpoints.log";
26const WIRE_VERSION: u8 = 2;
27/// Domain separation: these signatures must never be confusable with any
28/// other RSA signature made by the same key.
29const SIGNING_DOMAIN: &[u8] = b"quipu-checkpoint-v2\0";
30
31/// One signed integrity checkpoint of the audit-log Merkle tree. See the module
32/// docs for the threat it addresses.
33#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct Checkpoint {
35    /// UTC micros at signing time.
36    pub created_at: u64,
37    /// Sequence number of the logs segment that was active at checkpoint time.
38    pub segment_seq: u64,
39    /// Log records on disk at checkpoint time (decreases after retention).
40    pub record_count: u64,
41    /// Total records ever appended at checkpoint time — the Merkle tree size the
42    /// `merkle_root` is taken over. Never decreases (the spine is not purged), so
43    /// the current tree is always an extension of any past checkpoint.
44    pub tree_size: u64,
45    /// Merkle root over the first `tree_size` leaves at checkpoint time.
46    pub merkle_root: Hash,
47    /// [`crate::crypto::KeyVersion`] of the RSA key that signed this
48    /// checkpoint — after a rotation, old checkpoints still verify against
49    /// the retained public key of their own version.
50    pub key_version: u32,
51    /// RSA PKCS#1 v1.5 / SHA-256 signature over the fields above.
52    pub signature: Vec<u8>,
53}
54
55impl Checkpoint {
56    /// Sign a new checkpoint. Fails if the [`KeyRing`] holds no private key —
57    /// callers gate on [`KeyRing::can_sign`] to skip instead.
58    pub(crate) fn sign(
59        keys: &KeyRing,
60        created_at: u64,
61        segment_seq: u64,
62        record_count: u64,
63        tree_size: u64,
64        merkle_root: Hash,
65    ) -> Result<Self> {
66        let (key_version, signature) = keys.sign(&signing_bytes(
67            created_at,
68            segment_seq,
69            record_count,
70            tree_size,
71            &merkle_root,
72        ))?;
73        Ok(Self {
74            created_at,
75            segment_seq,
76            record_count,
77            tree_size,
78            merkle_root,
79            key_version,
80            signature,
81        })
82    }
83
84    /// Verify the signature with the ring's public key.
85    pub fn verify(&self, keys: &KeyRing) -> Result<()> {
86        keys.verify_signature(
87            self.key_version,
88            &signing_bytes(
89                self.created_at,
90                self.segment_seq,
91                self.record_count,
92                self.tree_size,
93                &self.merkle_root,
94            ),
95            &self.signature,
96        )
97        .map_err(|e| Error::Crypto(format!("checkpoint signature invalid: {e}")))
98    }
99
100    /// Hex form of the Merkle root — the value to hand to external anchors.
101    pub fn merkle_root_hex(&self) -> String {
102        crypto::hex(&self.merkle_root)
103    }
104}
105
106fn signing_bytes(
107    created_at: u64,
108    segment_seq: u64,
109    record_count: u64,
110    tree_size: u64,
111    merkle_root: &Hash,
112) -> Vec<u8> {
113    let mut out = Vec::with_capacity(SIGNING_DOMAIN.len() + 32 + merkle_root.len());
114    out.extend_from_slice(SIGNING_DOMAIN);
115    out.extend_from_slice(&created_at.to_le_bytes());
116    out.extend_from_slice(&segment_seq.to_le_bytes());
117    out.extend_from_slice(&record_count.to_le_bytes());
118    out.extend_from_slice(&tree_size.to_le_bytes());
119    out.extend_from_slice(merkle_root);
120    out
121}
122
123/// On-disk line format: one JSON object per line, hex/base64 for the binary
124/// fields so the file stays human-inspectable (it is the artifact operators
125/// compare against external anchors).
126#[derive(Serialize, Deserialize)]
127struct Wire {
128    v: u8,
129    created_at: u64,
130    segment_seq: u64,
131    record_count: u64,
132    tree_size: u64,
133    merkle_root: String,
134    /// Signing-key version. Defaults to 1 for lines written before key
135    /// rotation existed (JSON is self-describing, so this is additive).
136    #[serde(default = "default_key_version")]
137    key_version: u32,
138    signature: String,
139}
140
141fn default_key_version() -> u32 {
142    1
143}
144
145/// The append-only checkpoint file at the store root. Deliberately *not* a
146/// segment [`Table`](crate::storage::Table): checkpoints are the external
147/// reference for the segment chain and must not depend on the format they
148/// vouch for, and segment files stay byte-identical to stores that never
149/// checkpoint.
150pub(crate) struct CheckpointLog {
151    path: PathBuf,
152}
153
154impl CheckpointLog {
155    pub(crate) fn new(root: &Path) -> Self {
156        Self {
157            path: root.join(FILE_NAME),
158        }
159    }
160
161    pub(crate) fn path(&self) -> &Path {
162        &self.path
163    }
164
165    /// Append one checkpoint and fsync. Checkpoints are rare (segment seals,
166    /// retention runs), so unconditional durability is cheap here — and a
167    /// checkpoint that claims a head must not itself be lost to a crash.
168    pub(crate) fn append(&self, cp: &Checkpoint) -> Result<()> {
169        let wire = Wire {
170            v: WIRE_VERSION,
171            created_at: cp.created_at,
172            segment_seq: cp.segment_seq,
173            record_count: cp.record_count,
174            tree_size: cp.tree_size,
175            merkle_root: crypto::hex(&cp.merkle_root),
176            key_version: cp.key_version,
177            signature: crypto::b64::encode(&cp.signature),
178        };
179        let mut line = serde_json::to_string(&wire).map_err(|e| Error::Encode(e.to_string()))?;
180        line.push('\n');
181        let mut file = std::fs::OpenOptions::new()
182            .create(true)
183            .append(true)
184            .open(&self.path)?;
185        file.write_all(line.as_bytes())?;
186        file.sync_data()?;
187        Ok(())
188    }
189
190    /// Read every checkpoint, oldest first. A missing file means "never
191    /// checkpointed" (write-only configurations). A torn *last* line is a
192    /// crash mid-append and is skipped; a malformed earlier line is
193    /// corruption and fails.
194    pub(crate) fn read_all(&self) -> Result<Vec<Checkpoint>> {
195        let text = match std::fs::read_to_string(&self.path) {
196            Ok(t) => t,
197            Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
198            Err(e) => return Err(e.into()),
199        };
200        let lines: Vec<&str> = text.lines().collect();
201        let mut out = Vec::with_capacity(lines.len());
202        for (i, line) in lines.iter().enumerate() {
203            match parse_line(line) {
204                Some(cp) => out.push(cp),
205                None if i == lines.len() - 1 => break,
206                None => {
207                    return Err(Error::Corrupt {
208                        segment: self.path.display().to_string(),
209                        offset: i as u64,
210                        reason: "unreadable checkpoint line".into(),
211                    });
212                }
213            }
214        }
215        Ok(out)
216    }
217}
218
219fn parse_line(line: &str) -> Option<Checkpoint> {
220    let wire: Wire = serde_json::from_str(line).ok()?;
221    if wire.v != WIRE_VERSION {
222        return None;
223    }
224    let root: Hash = crypto::hex_decode(&wire.merkle_root)?.try_into().ok()?;
225    Some(Checkpoint {
226        created_at: wire.created_at,
227        segment_seq: wire.segment_seq,
228        record_count: wire.record_count,
229        tree_size: wire.tree_size,
230        merkle_root: root,
231        key_version: wire.key_version,
232        signature: crypto::b64::decode(&wire.signature)?,
233    })
234}
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239
240    #[test]
241    fn roundtrip_and_torn_tail_tolerated() {
242        let dir = tempfile::tempdir().unwrap();
243        let log = CheckpointLog::new(dir.path());
244        assert!(log.read_all().unwrap().is_empty(), "missing file is empty");
245
246        let keys = KeyRing::generate_ephemeral(2048).unwrap();
247        let cp = Checkpoint::sign(&keys, 1, 2, 3, 3, [7; 32]).unwrap();
248        log.append(&cp).unwrap();
249        // simulate a crash mid-append of the next checkpoint
250        let mut f = std::fs::OpenOptions::new()
251            .append(true)
252            .open(log.path())
253            .unwrap();
254        f.write_all(b"{\"v\":1,\"created_at\":9").unwrap();
255        drop(f);
256
257        let read = log.read_all().unwrap();
258        assert_eq!(read, vec![cp.clone()]);
259        read[0].verify(&keys).unwrap();
260
261        // a tampered field breaks the signature
262        let mut bad = cp;
263        bad.record_count += 1;
264        assert!(bad.verify(&keys).is_err());
265    }
266}