git_bug/replica/entity/operation/
operation_pack.rs

1// git-bug-rs - A rust library for interfacing with git-bug repositories
2//
3// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de>
4// SPDX-License-Identifier: GPL-3.0-or-later
5//
6// This file is part of git-bug-rs/git-gub.
7//
8// You should have received a copy of the License along with this program.
9// If not, see <https://www.gnu.org/licenses/agpl.txt>.
10
11//! On disk representation of operations. This makes it possible to commit a
12//! series of operations in on go.
13
14use std::cell::OnceCell;
15
16use gix::objs::tree::EntryRef;
17use redb::Database;
18use serde::{Deserialize, Serialize, de::DeserializeOwned};
19use sha2::{Digest, Sha256};
20use simd_json::{OwnedValue, json_typed, lazy, value::prelude::base::Writable};
21
22use super::Operation;
23use crate::replica::{
24    cache::impl_cache,
25    entity::{Entity, id::Id, identity::IdentityStub, lamport},
26};
27
28/// An [`OperationPack`] is a wrapper structure to store multiple [`Operations`]
29/// in a single git blob. Additionally, it holds and stores the metadata for
30/// those operations.
31#[derive(Debug, Deserialize, Serialize)]
32pub(crate) struct OperationPack<E: Entity> {
33    /// The author of the Operations. Must be the same author for all the
34    /// Operations.
35    pub(crate) author: IdentityStub,
36
37    /// The list of Operation stored in the [`OperationPack`]
38    #[serde(bound = "Vec<Operation<E>>: serde::Serialize + serde::de::DeserializeOwned")]
39    pub(crate) operations: Vec<Operation<E>>,
40
41    /// Encode the entity's logical time of creation across all entities of the
42    /// same type.
43    ///
44    /// # Note
45    /// This value is only set on the root [`OperationPack`].
46    pub(crate) create_time: Option<lamport::Time>,
47
48    /// Encode the entity's logical time of last edition across all entities of
49    /// the same type.
50    pub(crate) edit_time: lamport::Time,
51}
52
53#[allow(missing_docs)]
54pub mod decode {
55    use super::unmarshall;
56    use crate::replica::cache;
57
58    #[derive(Debug, thiserror::Error)]
59    pub enum Error {
60        #[error("Expected to find an tree with the entity commit, but found none. Error: {0}")]
61        MissingTree(#[from] gix::object::commit::Error),
62
63        #[error("Failed to decode the tree to access it's entries.")]
64        FailedTreeDecode(),
65
66        #[error(
67            "The format version for this operations pack ({0}) exceeds the allowed maxium of {max}",
68            max = 1 << 12
69        )]
70        FormatVersionTooBig(usize),
71
72        #[error("The format version for this operations pack ({0}) is not known.")]
73        UnknownFormatVersion(usize),
74
75        #[error(
76            "The format version for this operations pack ({found}) is not what we expected \
77             ({expected})."
78        )]
79        WrongFormatVersion { found: usize, expected: usize },
80
81        #[error(
82            "Failed to unmarshall the internal json representation of this operation pack, due \
83             to: {0}"
84        )]
85        WrongData(#[from] unmarshall::Error),
86
87        #[error("The operations pack `{0}` tree entry was found multiple times.")]
88        DuplicatedEntry(&'static str),
89        #[error("The operations pack `{0}` tree entry was missing.")]
90        MissingEntry(&'static str),
91
92        #[error(
93            "The opertions pack contained a number that we could not parse as number: \
94             {fake_number}, because {error}."
95        )]
96        WrongNumberFormat {
97            fake_number: String,
98            error: std::num::ParseIntError,
99        },
100
101        #[error(transparent)]
102        CacheError(#[from] cache::Error),
103    }
104}
105
106const OPS_ENTRY_NAME: &str = "ops";
107#[allow(unused)]
108const EXTRA_ENTRY_NAME: &str = "extra";
109const VERSION_ENTRY_PREFIX: &str = "version-";
110const CREATE_CLOCK_ENTRY_PREFIX: &str = "create-clock-";
111const EDIT_CLOCK_ENTRY_PREFIX: &str = "edit-clock-";
112
113impl<E: Entity> OperationPack<E>
114where
115    E::OperationData: DeserializeOwned,
116{
117    fn check_version(entries: &[EntryRef<'_>]) -> Result<(), decode::Error> {
118        // Check the format version first, failing early instead of trying to read
119        // something.
120        let mut version: usize = 0;
121        for entry in entries {
122            if entry.filename.len() > VERSION_ENTRY_PREFIX.len()
123                && entry.filename[..VERSION_ENTRY_PREFIX.len()] == VERSION_ENTRY_PREFIX
124            {
125                let version_str = entry.filename[VERSION_ENTRY_PREFIX.len()..].to_string();
126                version = version_str
127                    .parse()
128                    .map_err(|err| decode::Error::WrongNumberFormat {
129                        fake_number: version_str,
130                        error: err,
131                    })?;
132
133                // TODO(@bpeetz): Why are we checking for such a large version?
134                // And wouldn't this already be found by the following checks? <2025-04-15>
135                if version > (1 << 12) {
136                    return Err(decode::Error::FormatVersionTooBig(version));
137                }
138                break;
139            }
140        }
141
142        if version == 0 {
143            return Err(decode::Error::UnknownFormatVersion(version));
144        }
145        if version != E::FORMAT_VERSION {
146            return Err(decode::Error::WrongFormatVersion {
147                found: version,
148                expected: E::FORMAT_VERSION,
149            });
150        }
151
152        Ok(())
153    }
154
155    #[allow(clippy::too_many_lines)]
156    pub(crate) fn from_repository(
157        db: &Database,
158        commit: &gix::Commit<'_>,
159    ) -> Result<Self, decode::Error> {
160        impl_cache!(@mk_table "operation_packs");
161
162        impl_cache! {@lookup db, commit.id().detach().as_bytes()}
163
164        let tree = commit.tree()?;
165
166        // TODO(@bpeetz): We could use the `Tree::iter` method instead, but this would
167        // mean to decode the tree twice (once for version, than for parsing)
168        // <2025-04-15>
169        let decoded_tree = tree.decode().map_err(|err| {
170            // (We want to ensure that this error type stays the same.)
171            #[allow(clippy::unit_cmp)]
172            {
173                // Check that the error is really useless
174                // (we do not enable the fancy error feature).
175                assert_eq!(err.inner, ());
176            }
177
178            decode::Error::FailedTreeDecode()
179        })?;
180
181        Self::check_version(&decoded_tree.entries)?;
182
183        let (operations, author, create_time, edit_time): (
184            Vec<Operation<E>>,
185            IdentityStub,
186            Option<lamport::Time>,
187            lamport::Time,
188        ) = {
189            let operations: OnceCell<Vec<Operation<E>>> = OnceCell::new();
190            let author: OnceCell<IdentityStub> = OnceCell::new();
191            let create_clock: OnceCell<lamport::Time> = OnceCell::new();
192            let edit_clock: OnceCell<lamport::Time> = OnceCell::new();
193
194            for entry in decoded_tree.entries {
195                match entry.filename {
196                    ops if ops == OPS_ENTRY_NAME => {
197                        let mut data = tree
198                            .repo
199                            .find_object(entry.oid)
200                            .expect("The id comes from a valid tree. It should also exist")
201                            .into_blob();
202
203                        let (new_operations, new_author) = Self::unmarshal_blob(&mut data)?;
204                        operations
205                            .set(new_operations)
206                            .map_err(|_| decode::Error::DuplicatedEntry(OPS_ENTRY_NAME))?;
207                        author
208                            .set(new_author)
209                            .map_err(|_| decode::Error::DuplicatedEntry(OPS_ENTRY_NAME))?;
210                    }
211
212                    create_clock_value
213                        if create_clock_value.len() > CREATE_CLOCK_ENTRY_PREFIX.len()
214                            && create_clock_value[..CREATE_CLOCK_ENTRY_PREFIX.len()]
215                                == CREATE_CLOCK_ENTRY_PREFIX =>
216                    {
217                        let value_str =
218                            create_clock_value[CREATE_CLOCK_ENTRY_PREFIX.len()..].to_string();
219                        let value: u64 =
220                            value_str
221                                .parse()
222                                .map_err(|err| decode::Error::WrongNumberFormat {
223                                    fake_number: value_str,
224                                    error: err,
225                                })?;
226
227                        create_clock.set(lamport::Time::from(value)).map_err(|_| {
228                            decode::Error::DuplicatedEntry(CREATE_CLOCK_ENTRY_PREFIX)
229                        })?;
230                    }
231                    edit_clock_value
232                        if edit_clock_value.len() > EDIT_CLOCK_ENTRY_PREFIX.len()
233                            && edit_clock_value[..EDIT_CLOCK_ENTRY_PREFIX.len()]
234                                == EDIT_CLOCK_ENTRY_PREFIX =>
235                    {
236                        let value_str =
237                            edit_clock_value[EDIT_CLOCK_ENTRY_PREFIX.len()..].to_string();
238                        let value: u64 =
239                            value_str
240                                .parse()
241                                .map_err(|err| decode::Error::WrongNumberFormat {
242                                    fake_number: value_str,
243                                    error: err,
244                                })?;
245
246                        edit_clock
247                            .set(lamport::Time::from(value))
248                            .map_err(|_| decode::Error::DuplicatedEntry(EDIT_CLOCK_ENTRY_PREFIX))?;
249                    }
250
251                    version
252                        if version.len() > VERSION_ENTRY_PREFIX.len()
253                            && version[..VERSION_ENTRY_PREFIX.len()] == VERSION_ENTRY_PREFIX =>
254                    {
255                        // The version, we can ignore that now.
256                    }
257                    other => unreachable!("BUG: Unknown entry name: {other}"),
258                }
259            }
260            (
261                operations
262                    .into_inner()
263                    .ok_or(decode::Error::MissingEntry(OPS_ENTRY_NAME))?,
264                author
265                    .into_inner()
266                    .ok_or(decode::Error::MissingEntry(OPS_ENTRY_NAME))?,
267                create_clock.into_inner(),
268                edit_clock
269                    .into_inner()
270                    .ok_or(decode::Error::MissingEntry(EDIT_CLOCK_ENTRY_PREFIX))?,
271            )
272        };
273
274        {
275            // Verify the commit signature if we expected one.
276            // TODO(@bpeetz): Perform the PGP signature validation. <2025-04-15>
277        }
278
279        let me = Self {
280            author,
281            operations,
282            create_time,
283            edit_time,
284        };
285
286        impl_cache! {@populate db, commit.id().detach().as_bytes(), &me}
287
288        Ok(me)
289    }
290
291    /// Calculate this [`OperationPack`]'s [`Id`].
292    ///
293    /// This first serializes the pack to JSON encoding and then calculates the
294    /// sha256 hash of the resulting string.
295    pub(crate) fn id(&self) -> Id {
296        let mut hasher = Sha256::new();
297        hasher.update(Self::as_marshaled_json(self).as_bytes());
298        let result = hasher.finalize();
299        Id::from_hex(&result[..]).expect("The hex comes from a hasher it is valid.")
300    }
301
302    /// Get the git-bug compatible JSON representation of this
303    /// [`OperationPack`].
304    fn as_marshaled_json(&self) -> String {
305        let ops: Vec<_> = self.operations.iter().map(Operation::as_value).collect();
306
307        let repr = json_typed!(borrowed, {
308            "author": self.author.as_value(),
309            "ops": ops
310        });
311
312        repr.encode()
313    }
314
315    fn unmarshal_blob(
316        data: &mut gix::Blob<'_>,
317    ) -> Result<(Vec<Operation<E>>, IdentityStub), unmarshall::Error> {
318        let data_str = String::from_utf8_lossy(&data.data).to_string();
319        // struct Repr {
320        //     author: IdentityStub,
321        //     ops: Vec<serde_json::Value>,
322        // }
323
324        let tape =
325            simd_json::to_tape(&mut data.data).map_err(|err| unmarshall::Error::InvalidJson {
326                data: data_str.clone(),
327                error: err,
328            })?;
329
330        let value = tape.as_value();
331        let lazy = lazy::Value::from_tape(value);
332
333        let author = IdentityStub::from_value(&lazy.get("author").ok_or_else(|| {
334            unmarshall::Error::MissingAuthor {
335                data: data_str.clone(),
336            }
337        })?)
338        .map_err(unmarshall::Error::IdentityStubParse)?;
339
340        let ops: Vec<Operation<E>> = lazy
341            .get("ops")
342            .ok_or_else(|| unmarshall::Error::MissingOps {
343                data: data_str.clone(),
344            })?
345            .as_array()
346            .ok_or_else(|| unmarshall::Error::OpsNotArray { data: data_str })?
347            .iter()
348            .map(|raw_op| {
349                let raw_op_owned: OwnedValue = raw_op.into_value().into();
350
351                Operation::<E>::from_value(raw_op_owned.clone(), author).map_err(|err| {
352                    unmarshall::Error::InvalidOperation {
353                        op: raw_op_owned,
354                        error: err,
355                    }
356                })
357            })
358            .collect::<Result<_, _>>()?;
359
360        Ok((ops, author))
361    }
362}
363
364#[allow(missing_docs)]
365pub mod unmarshall {
366    use simd_json::OwnedValue;
367
368    use crate::replica::entity::operation::decode;
369
370    #[derive(Debug, thiserror::Error)]
371    pub enum Error {
372        #[error("The operation pack data ({data}) is invalid json: {error}")]
373        InvalidJson {
374            data: String,
375            error: simd_json::Error,
376        },
377
378        #[error("One of the operation pack's operations ({op}) could not be parsed: {error}")]
379        InvalidOperation {
380            op: OwnedValue,
381            error: decode::Error,
382        },
383
384        #[error("Was missing the “author” field in the data: {data}")]
385        MissingAuthor { data: String },
386        #[error("Was missing the “ops” field in the data: {data}")]
387        MissingOps { data: String },
388
389        #[error("The “ops” field is in the data not an array: {data}")]
390        OpsNotArray { data: String },
391
392        #[error("Failed to parse identity stub: {0}")]
393        IdentityStubParse(crate::replica::entity::identity::from_value::Error),
394    }
395}