graft_core/
commit_hash.rs

1use std::{
2    fmt::{Debug, Display},
3    str::FromStr,
4};
5
6use thiserror::Error;
7use zerocopy::{Immutable, IntoBytes, KnownLayout, TryFromBytes, Unaligned};
8
9use crate::{
10    VolumeId,
11    cbe::CBE64,
12    derive_zerocopy_encoding,
13    lsn::LSN,
14    page::Page,
15    page_count::PageCount,
16    pageidx::PageIdx,
17    zerocopy_ext::{self, ZerocopyErr},
18};
19
20/// The size of a `CommitHash` in bytes.
21const COMMIT_HASH_SIZE: usize = 32;
22
23/// The size of the hash portion of the `CommitHash` in bytes.
24const HASH_SIZE: usize = 31;
25
26/// Magic number to initialize commit hash computation
27const COMMIT_HASH_MAGIC: [u8; 4] = [0x68, 0xA4, 0x19, 0x30];
28
29// The length of an encoded CommitHash in base58.
30// To calculate this compute ceil(32 * (log2(256) / log2(58)))
31//
32// Note: we require that CommitHash's always are their maximum length
33// This is currently guaranteed for well-constructed CommitHash's due to the
34// CommitHashPrefix occupying the most significant byte.
35const ENCODED_LEN: usize = 44;
36
37#[derive(Debug, Error, PartialEq)]
38pub enum CommitHashParseErr {
39    #[error("invalid base58 encoding")]
40    DecodeErr(#[from] bs58::decode::Error),
41
42    #[error("invalid zerocopy encoding")]
43    ZerocopyErr(#[from] zerocopy_ext::ZerocopyErr),
44
45    #[error("invalid length")]
46    InvalidLength,
47}
48
49#[derive(
50    Debug,
51    Clone,
52    Copy,
53    PartialEq,
54    Eq,
55    Default,
56    TryFromBytes,
57    IntoBytes,
58    Immutable,
59    KnownLayout,
60    Unaligned,
61)]
62#[repr(u8)]
63pub enum CommitHashPrefix {
64    #[default]
65    Value = b'C',
66}
67
68#[derive(
69    Clone, PartialEq, Eq, Default, TryFromBytes, IntoBytes, Immutable, KnownLayout, Unaligned,
70)]
71#[repr(C)]
72pub struct CommitHash {
73    prefix: CommitHashPrefix,
74    hash: [u8; HASH_SIZE],
75}
76
77static_assertions::assert_eq_size!(CommitHash, [u8; COMMIT_HASH_SIZE]);
78
79impl CommitHash {
80    pub const ZERO: Self = Self {
81        prefix: CommitHashPrefix::Value,
82        hash: [0; HASH_SIZE],
83    };
84
85    #[cfg(any(test, feature = "testutil"))]
86    pub fn testonly_random() -> Self {
87        Self {
88            prefix: CommitHashPrefix::Value,
89            hash: rand::random(),
90        }
91    }
92
93    /// Encodes the `CommitHash` to base58 and returns it as a string
94    #[inline]
95    pub fn pretty(&self) -> String {
96        bs58::encode(self.as_bytes()).into_string()
97    }
98}
99
100impl TryFrom<[u8; COMMIT_HASH_SIZE]> for CommitHash {
101    type Error = CommitHashParseErr;
102
103    #[inline]
104    fn try_from(value: [u8; COMMIT_HASH_SIZE]) -> Result<Self, Self::Error> {
105        Ok(zerocopy::try_transmute!(value).map_err(ZerocopyErr::from)?)
106    }
107}
108
109impl From<CommitHash> for [u8; COMMIT_HASH_SIZE] {
110    #[inline]
111    fn from(value: CommitHash) -> Self {
112        zerocopy::transmute!(value)
113    }
114}
115
116impl FromStr for CommitHash {
117    type Err = CommitHashParseErr;
118
119    fn from_str(value: &str) -> Result<Self, Self::Err> {
120        // verify the length
121        if value.len() != ENCODED_LEN {
122            return Err(CommitHashParseErr::InvalidLength);
123        }
124
125        // parse from base58
126        let bytes: [u8; COMMIT_HASH_SIZE] = bs58::decode(value.as_bytes()).into_array_const()?;
127        bytes.try_into()
128    }
129}
130
131impl Debug for CommitHash {
132    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
133        write!(f, "CommitHash({})", self.pretty())
134    }
135}
136
137impl Display for CommitHash {
138    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
139        write!(f, "{}", self.pretty())
140    }
141}
142
143derive_zerocopy_encoding!(
144    encode type (CommitHash)
145    with size (COMMIT_HASH_SIZE)
146    with empty (CommitHash::ZERO)
147);
148
149/// Builder for computing commit hashes using BLAKE3.
150///
151/// Implements the commit hash algorithm as specified in RFC 0001.
152/// The hash incorporates the volume ID, LSN, page count, and page data
153/// to ensure uniqueness and integrity verification.
154pub struct CommitHashBuilder {
155    hasher: blake3::Hasher,
156    last_pageidx: Option<PageIdx>,
157}
158
159impl CommitHashBuilder {
160    /// Creates a new `CommitHashBuilder` initialized with the given volume metadata.
161    pub fn new(vid: VolumeId, lsn: LSN, volume_pages: PageCount) -> Self {
162        let mut hasher = blake3::Hasher::new();
163        hasher.update(&COMMIT_HASH_MAGIC);
164        hasher.update(vid.as_bytes());
165        hasher.update(CBE64::from(lsn).as_bytes());
166        hasher.update(&volume_pages.to_u32().to_be_bytes());
167        Self { hasher, last_pageidx: None }
168    }
169
170    /// Writes a page to the hash computation.
171    ///
172    /// # Panics
173    /// This method will panic if pages are written out of order by pageidx
174    pub fn write_page(&mut self, pageidx: PageIdx, page: &Page) {
175        // Ensure pages are written in order
176        if let Some(last_pageidx) = self.last_pageidx.replace(pageidx) {
177            assert!(
178                pageidx > last_pageidx,
179                "Pages must be written in order by pageidx. Last: {last_pageidx}, Current: {pageidx}"
180            );
181        }
182
183        self.hasher.update(&pageidx.to_u32().to_be_bytes());
184        self.hasher.update(page.as_ref());
185    }
186
187    /// Finalizes the hash computation and returns the `CommitHash`.
188    pub fn build(self) -> CommitHash {
189        let hash = self.hasher.finalize();
190        let mut bytes = *hash.as_bytes();
191        bytes[0] = CommitHashPrefix::Value as u8;
192        zerocopy::try_transmute!(bytes).expect("prefix byte manually set")
193    }
194}
195
196#[cfg(test)]
197mod tests {
198    use std::panic;
199
200    use super::*;
201    use crate::{lsn, pageidx};
202    use bilrost::{Message, OwnedMessage};
203
204    #[graft_test::test]
205    fn test_commit_hash_bilrost() {
206        #[derive(Message, Debug, PartialEq, Eq)]
207        struct TestMsg {
208            hash: Option<CommitHash>,
209        }
210
211        let msg = TestMsg {
212            hash: Some(CommitHash::testonly_random()),
213        };
214        let b = msg.encode_to_bytes();
215        let decoded: TestMsg = TestMsg::decode(b).unwrap();
216        assert_eq!(decoded, msg, "Decoded message does not match original");
217    }
218
219    #[graft_test::test]
220    fn test_commit_hash_builder_table() {
221        let vid: VolumeId = "5rMJhdVrxb-2e7iyEK3dXuE3".parse().unwrap();
222
223        struct TestCase {
224            name: &'static str,
225            vid: VolumeId,
226            lsn: LSN,
227            page_count: PageCount,
228            pages: Vec<(PageIdx, Page)>,
229            expected_hash: &'static str,
230        }
231
232        let test_cases = vec![
233            TestCase {
234                name: "empty_volume",
235                vid: vid.clone(),
236                lsn: lsn!(1),
237                page_count: PageCount::ZERO,
238                pages: vec![],
239                expected_hash: "5YbaAZvwrzRck5WQPwaKqo5SirMns1WGPwxvkoc16Jn6",
240            },
241            TestCase {
242                name: "single_page",
243                vid: vid.clone(),
244                lsn: lsn!(42),
245                page_count: PageCount::new(1),
246                pages: vec![(pageidx!(1), Page::test_filled(0xAA))],
247                expected_hash: "5XqotAhgdkC8NBdv5eS4jZFM1LCeugjLQHpwSDEgfz8n",
248            },
249            TestCase {
250                name: "multiple_pages",
251                vid,
252                lsn: lsn!(123),
253                page_count: PageCount::new(2),
254                pages: vec![
255                    (pageidx!(1), Page::test_filled(0x11)),
256                    (pageidx!(2), Page::test_filled(0x22)),
257                ],
258                expected_hash: "5XYzfp5hcQLw3TejqZPT1GcXz2XV7fXFGYYhJ1KLUjNw",
259            },
260        ];
261
262        for test_case in test_cases {
263            let mut builder =
264                CommitHashBuilder::new(test_case.vid, test_case.lsn, test_case.page_count);
265
266            for (pageidx, page) in test_case.pages {
267                builder.write_page(pageidx, &page);
268            }
269
270            let hash = builder.build();
271            println!("hash for case {}: {}", test_case.name, hash.pretty());
272            let expected_hash: CommitHash = test_case.expected_hash.parse().unwrap();
273
274            assert_eq!(
275                hash,
276                expected_hash,
277                "Hash mismatch for test case: {}. Expected: {}, Got: {}",
278                test_case.name,
279                test_case.expected_hash,
280                hash.pretty()
281            );
282            assert_eq!(
283                &hash.pretty(),
284                test_case.expected_hash,
285                "Pretty format mismatch for test case: {}. Expected: {}, Got: {}",
286                test_case.name,
287                test_case.expected_hash,
288                hash.pretty()
289            );
290        }
291    }
292
293    #[graft_test::test]
294    #[should_panic(expected = "Pages must be written in order by pageidx")]
295    fn test_commit_hash_builder_page_order_panic() {
296        let mut builder = CommitHashBuilder::new(VolumeId::random(), LSN::FIRST, PageCount::ZERO);
297        builder.write_page(pageidx!(2), &Page::test_filled(0x22));
298        builder.write_page(pageidx!(1), &Page::test_filled(0x11)); // This should panic
299    }
300
301    #[graft_test::test]
302    #[test]
303    fn test_commit_hash_from_str() {
304        let hash: CommitHash = "5aNs8RN7tSRqfi66ubcPqSVqrWBGbaPU6C4mBVp6NYgo"
305            .parse()
306            .unwrap();
307        let encoded = hash.pretty();
308        let decoded: CommitHash = encoded.parse().unwrap();
309        assert_eq!(hash, decoded);
310    }
311
312    #[graft_test::test]
313    fn test_commit_hash_from_str_invalid() {
314        // Test various invalid inputs
315        let invalid_cases = vec![
316            "",      // empty string
317            "short", // too short
318            "verylongstringthatiswaytoologtobeahashverylongstringthatiswaytoologtobeahashverylongstringthatiswaytoologtobeahash", // too long
319            "invalid!@#$%^&*()characters", // invalid characters
320            "5aNs8RN7tSRqfi66ubcPqSVqrWBGbaPU6C4mBVp6NYg", // wrong length (43 chars)
321            "5aNs8RN7tSRqfi66ubcPqSVqrWBGbaPU6C4mBVp6NYgoY", // wrong length (45 chars)
322            "4aNs8RN7tSRqfi66ubcPqSVqrWBGbaPU6C4mBVp6NYgo", // wrong prefix
323        ];
324
325        for case in invalid_cases {
326            if let Ok(hash) = case.parse::<CommitHash>() {
327                panic!(
328                    "Expected error for case: `{}`, but parsed successfully: {}",
329                    case,
330                    hash.pretty()
331                )
332            }
333        }
334    }
335}