git-bug 0.2.4

A rust library for interfacing with git-bug repositories
Documentation
// git-bug-rs - A rust library for interfacing with git-bug repositories
//
// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de>
// SPDX-License-Identifier: GPL-3.0-or-later
//
// This file is part of git-bug-rs/git-gub.
//
// You should have received a copy of the License along with this program.
// If not, see <https://www.gnu.org/licenses/agpl.txt>.

//! On disk representation of operations. This makes it possible to commit a
//! series of operations in on go.

use std::cell::OnceCell;

use gix::objs::tree::EntryRef;
use redb::Database;
use serde::{Deserialize, Serialize, de::DeserializeOwned};
use sha2::{Digest, Sha256};
use simd_json::{OwnedValue, json_typed, lazy, value::prelude::base::Writable};

use super::Operation;
use crate::replica::{
    cache::impl_cache,
    entity::{Entity, id::Id, identity::IdentityStub, lamport},
};

/// An [`OperationPack`] is a wrapper structure to store multiple [`Operations`]
/// in a single git blob. Additionally, it holds and stores the metadata for
/// those operations.
#[derive(Debug, Deserialize, Serialize)]
pub(crate) struct OperationPack<E: Entity> {
    /// The author of the Operations. Must be the same author for all the
    /// Operations.
    pub(crate) author: IdentityStub,

    /// The list of Operation stored in the [`OperationPack`]
    #[serde(bound = "Vec<Operation<E>>: serde::Serialize + serde::de::DeserializeOwned")]
    pub(crate) operations: Vec<Operation<E>>,

    /// Encode the entity's logical time of creation across all entities of the
    /// same type.
    ///
    /// # Note
    /// This value is only set on the root [`OperationPack`].
    pub(crate) create_time: Option<lamport::Time>,

    /// Encode the entity's logical time of last edition across all entities of
    /// the same type.
    pub(crate) edit_time: lamport::Time,
}

#[allow(missing_docs)]
pub mod decode {
    use super::unmarshall;
    use crate::replica::cache;

    #[derive(Debug, thiserror::Error)]
    pub enum Error {
        #[error("Expected to find an tree with the entity commit, but found none. Error: {0}")]
        MissingTree(#[from] gix::object::commit::Error),

        #[error("Failed to decode the tree to access it's entries.")]
        FailedTreeDecode(),

        #[error(
            "The format version for this operations pack ({0}) exceeds the allowed maxium of {max}",
            max = 1 << 12
        )]
        FormatVersionTooBig(usize),

        #[error("The format version for this operations pack ({0}) is not known.")]
        UnknownFormatVersion(usize),

        #[error(
            "The format version for this operations pack ({found}) is not what we expected \
             ({expected})."
        )]
        WrongFormatVersion { found: usize, expected: usize },

        #[error(
            "Failed to unmarshall the internal json representation of this operation pack, due \
             to: {0}"
        )]
        WrongData(#[from] unmarshall::Error),

        #[error("The operations pack `{0}` tree entry was found multiple times.")]
        DuplicatedEntry(&'static str),
        #[error("The operations pack `{0}` tree entry was missing.")]
        MissingEntry(&'static str),

        #[error(
            "The opertions pack contained a number that we could not parse as number: \
             {fake_number}, because {error}."
        )]
        WrongNumberFormat {
            fake_number: String,
            error: std::num::ParseIntError,
        },

        #[error(transparent)]
        CacheError(#[from] cache::Error),
    }
}

const OPS_ENTRY_NAME: &str = "ops";
#[allow(unused)]
const EXTRA_ENTRY_NAME: &str = "extra";
const VERSION_ENTRY_PREFIX: &str = "version-";
const CREATE_CLOCK_ENTRY_PREFIX: &str = "create-clock-";
const EDIT_CLOCK_ENTRY_PREFIX: &str = "edit-clock-";

impl<E: Entity> OperationPack<E>
where
    E::OperationData: DeserializeOwned,
{
    fn check_version(entries: &[EntryRef<'_>]) -> Result<(), decode::Error> {
        // Check the format version first, failing early instead of trying to read
        // something.
        let mut version: usize = 0;
        for entry in entries {
            if entry.filename.len() > VERSION_ENTRY_PREFIX.len()
                && entry.filename[..VERSION_ENTRY_PREFIX.len()] == VERSION_ENTRY_PREFIX
            {
                let version_str = entry.filename[VERSION_ENTRY_PREFIX.len()..].to_string();
                version = version_str
                    .parse()
                    .map_err(|err| decode::Error::WrongNumberFormat {
                        fake_number: version_str,
                        error: err,
                    })?;

                // TODO(@bpeetz): Why are we checking for such a large version?
                // And wouldn't this already be found by the following checks? <2025-04-15>
                if version > (1 << 12) {
                    return Err(decode::Error::FormatVersionTooBig(version));
                }
                break;
            }
        }

        if version == 0 {
            return Err(decode::Error::UnknownFormatVersion(version));
        }
        if version != E::FORMAT_VERSION {
            return Err(decode::Error::WrongFormatVersion {
                found: version,
                expected: E::FORMAT_VERSION,
            });
        }

        Ok(())
    }

    #[allow(clippy::too_many_lines)]
    pub(crate) fn from_repository(
        db: &Database,
        commit: &gix::Commit<'_>,
    ) -> Result<Self, decode::Error> {
        impl_cache!(@mk_table "operation_packs");

        impl_cache! {@lookup db, commit.id().detach().as_bytes()}

        let tree = commit.tree()?;

        // TODO(@bpeetz): We could use the `Tree::iter` method instead, but this would
        // mean to decode the tree twice (once for version, than for parsing)
        // <2025-04-15>
        let decoded_tree = tree.decode().map_err(|err| {
            // (We want to ensure that this error type stays the same.)
            #[allow(clippy::unit_cmp)]
            {
                // Check that the error is really useless
                // (we do not enable the fancy error feature).
                assert_eq!(err.inner, ());
            }

            decode::Error::FailedTreeDecode()
        })?;

        Self::check_version(&decoded_tree.entries)?;

        let (operations, author, create_time, edit_time): (
            Vec<Operation<E>>,
            IdentityStub,
            Option<lamport::Time>,
            lamport::Time,
        ) = {
            let operations: OnceCell<Vec<Operation<E>>> = OnceCell::new();
            let author: OnceCell<IdentityStub> = OnceCell::new();
            let create_clock: OnceCell<lamport::Time> = OnceCell::new();
            let edit_clock: OnceCell<lamport::Time> = OnceCell::new();

            for entry in decoded_tree.entries {
                match entry.filename {
                    ops if ops == OPS_ENTRY_NAME => {
                        let mut data = tree
                            .repo
                            .find_object(entry.oid)
                            .expect("The id comes from a valid tree. It should also exist")
                            .into_blob();

                        let (new_operations, new_author) = Self::unmarshal_blob(&mut data)?;
                        operations
                            .set(new_operations)
                            .map_err(|_| decode::Error::DuplicatedEntry(OPS_ENTRY_NAME))?;
                        author
                            .set(new_author)
                            .map_err(|_| decode::Error::DuplicatedEntry(OPS_ENTRY_NAME))?;
                    }

                    create_clock_value
                        if create_clock_value.len() > CREATE_CLOCK_ENTRY_PREFIX.len()
                            && create_clock_value[..CREATE_CLOCK_ENTRY_PREFIX.len()]
                                == CREATE_CLOCK_ENTRY_PREFIX =>
                    {
                        let value_str =
                            create_clock_value[CREATE_CLOCK_ENTRY_PREFIX.len()..].to_string();
                        let value: u64 =
                            value_str
                                .parse()
                                .map_err(|err| decode::Error::WrongNumberFormat {
                                    fake_number: value_str,
                                    error: err,
                                })?;

                        create_clock.set(lamport::Time::from(value)).map_err(|_| {
                            decode::Error::DuplicatedEntry(CREATE_CLOCK_ENTRY_PREFIX)
                        })?;
                    }
                    edit_clock_value
                        if edit_clock_value.len() > EDIT_CLOCK_ENTRY_PREFIX.len()
                            && edit_clock_value[..EDIT_CLOCK_ENTRY_PREFIX.len()]
                                == EDIT_CLOCK_ENTRY_PREFIX =>
                    {
                        let value_str =
                            edit_clock_value[EDIT_CLOCK_ENTRY_PREFIX.len()..].to_string();
                        let value: u64 =
                            value_str
                                .parse()
                                .map_err(|err| decode::Error::WrongNumberFormat {
                                    fake_number: value_str,
                                    error: err,
                                })?;

                        edit_clock
                            .set(lamport::Time::from(value))
                            .map_err(|_| decode::Error::DuplicatedEntry(EDIT_CLOCK_ENTRY_PREFIX))?;
                    }

                    version
                        if version.len() > VERSION_ENTRY_PREFIX.len()
                            && version[..VERSION_ENTRY_PREFIX.len()] == VERSION_ENTRY_PREFIX =>
                    {
                        // The version, we can ignore that now.
                    }
                    other => unreachable!("BUG: Unknown entry name: {other}"),
                }
            }
            (
                operations
                    .into_inner()
                    .ok_or(decode::Error::MissingEntry(OPS_ENTRY_NAME))?,
                author
                    .into_inner()
                    .ok_or(decode::Error::MissingEntry(OPS_ENTRY_NAME))?,
                create_clock.into_inner(),
                edit_clock
                    .into_inner()
                    .ok_or(decode::Error::MissingEntry(EDIT_CLOCK_ENTRY_PREFIX))?,
            )
        };

        {
            // Verify the commit signature if we expected one.
            // TODO(@bpeetz): Perform the PGP signature validation. <2025-04-15>
        }

        let me = Self {
            author,
            operations,
            create_time,
            edit_time,
        };

        impl_cache! {@populate db, commit.id().detach().as_bytes(), &me}

        Ok(me)
    }

    /// Calculate this [`OperationPack`]'s [`Id`].
    ///
    /// This first serializes the pack to JSON encoding and then calculates the
    /// sha256 hash of the resulting string.
    pub(crate) fn id(&self) -> Id {
        let mut hasher = Sha256::new();
        hasher.update(Self::as_marshaled_json(self).as_bytes());
        let result = hasher.finalize();
        Id::from_hex(&result[..]).expect("The hex comes from a hasher it is valid.")
    }

    /// Get the git-bug compatible JSON representation of this
    /// [`OperationPack`].
    fn as_marshaled_json(&self) -> String {
        let ops: Vec<_> = self.operations.iter().map(Operation::as_value).collect();

        let repr = json_typed!(borrowed, {
            "author": self.author.as_value(),
            "ops": ops
        });

        repr.encode()
    }

    fn unmarshal_blob(
        data: &mut gix::Blob<'_>,
    ) -> Result<(Vec<Operation<E>>, IdentityStub), unmarshall::Error> {
        let data_str = String::from_utf8_lossy(&data.data).to_string();
        // struct Repr {
        //     author: IdentityStub,
        //     ops: Vec<serde_json::Value>,
        // }

        let tape =
            simd_json::to_tape(&mut data.data).map_err(|err| unmarshall::Error::InvalidJson {
                data: data_str.clone(),
                error: err,
            })?;

        let value = tape.as_value();
        let lazy = lazy::Value::from_tape(value);

        let author = IdentityStub::from_value(&lazy.get("author").ok_or_else(|| {
            unmarshall::Error::MissingAuthor {
                data: data_str.clone(),
            }
        })?)
        .map_err(unmarshall::Error::IdentityStubParse)?;

        let ops: Vec<Operation<E>> = lazy
            .get("ops")
            .ok_or_else(|| unmarshall::Error::MissingOps {
                data: data_str.clone(),
            })?
            .as_array()
            .ok_or_else(|| unmarshall::Error::OpsNotArray { data: data_str })?
            .iter()
            .map(|raw_op| {
                let raw_op_owned: OwnedValue = raw_op.into_value().into();

                Operation::<E>::from_value(raw_op_owned.clone(), author).map_err(|err| {
                    unmarshall::Error::InvalidOperation {
                        op: raw_op_owned,
                        error: err,
                    }
                })
            })
            .collect::<Result<_, _>>()?;

        Ok((ops, author))
    }
}

#[allow(missing_docs)]
pub mod unmarshall {
    use simd_json::OwnedValue;

    use crate::replica::entity::operation::decode;

    #[derive(Debug, thiserror::Error)]
    pub enum Error {
        #[error("The operation pack data ({data}) is invalid json: {error}")]
        InvalidJson {
            data: String,
            error: simd_json::Error,
        },

        #[error("One of the operation pack's operations ({op}) could not be parsed: {error}")]
        InvalidOperation {
            op: OwnedValue,
            error: decode::Error,
        },

        #[error("Was missing the “author” field in the data: {data}")]
        MissingAuthor { data: String },
        #[error("Was missing the “ops” field in the data: {data}")]
        MissingOps { data: String },

        #[error("The “ops” field is in the data not an array: {data}")]
        OpsNotArray { data: String },

        #[error("Failed to parse identity stub: {0}")]
        IdentityStubParse(crate::replica::entity::identity::from_value::Error),
    }
}