triblespace_core/
repo.rs

1//! This module provides a high-level API for storing and retrieving data from repositories.
2//! The design is inspired by Git, but with a focus on object/content-addressed storage.
3//! It separates storage concerns from the data model, and reduces the mutable state of the repository,
4//! to an absolute minimum, making it easier to reason about and allowing for different storage backends.
5//!
6//! Blob repositories are collections of blobs that can be content-addressed by their hash.
7#![allow(clippy::type_complexity)]
8//! This is typically local `.pile` file or a S3 bucket or a similar service.
9//! On their own they have no notion of branches or commits, or other stateful constructs.
10//! As such they also don't have a notion of time, order or history,
11//! massively relaxing the constraints on storage.
12//! This makes it possible to use a wide range of storage services, including those that don't support
13//! atomic transactions or have other limitations.
14//!
15//! Branch repositories on the other hand are a stateful construct that can be used to represent a branch pointing to a specific commit.
16//! They are stored in a separate repository, typically a  local `.pile` file, a database or an S3 compatible service with a compare-and-swap operation,
17//! and can be used to represent the state of a repository at a specific point in time.
18//!
19//! Technically, branches are just a mapping from a branch id to a blob hash,
20//! But because TribleSets are themselves easily stored in a blob, and because
21//! trible commit histories are an append-only chain of TribleSet metadata,
22//! the hash of the head is sufficient to represent the entire history of a branch.
23//!
24//! ## Basic usage
25//!
26//! ```rust,ignore
27//! use ed25519_dalek::SigningKey;
28//! use rand::rngs::OsRng;
29//! use triblespace::prelude::*;
30//! use triblespace::prelude::valueschemas::{GenId, ShortString};
31//! use triblespace::repo::{memoryrepo::MemoryRepo, Repository};
32//!
33//! let storage = MemoryRepo::default();
34//! let mut repo = Repository::new(storage, SigningKey::generate(&mut OsRng));
35//! let branch_id = repo.create_branch("main", None).expect("create branch");
36//! let mut ws = repo.pull(*branch_id).expect("pull branch");
37//!
38//! attributes! {
39//!     "8F180883F9FD5F787E9E0AF0DF5866B9" as pub author: GenId;
40//!     "0DBB530B37B966D137C50B943700EDB2" as pub firstname: ShortString;
41//!     "6BAA463FD4EAF45F6A103DB9433E4545" as pub lastname: ShortString;
42//! }
43//! let author = fucid();
44//! ws.commit(
45//!     entity!{ &author @
46//!         literature::firstname: "Frank",
47//!         literature::lastname: "Herbert",
48//!      },
49//!     Some("initial commit"),
50//! );
51//!
52//! // Single-attempt push: `try_push` uploads local blobs and attempts a
53//! // single CAS update. On conflict it returns a workspace containing the
54//! // new branch state which you should merge into before retrying.
55//! match repo.try_push(&mut ws).expect("try_push") {
56//!     None => {}
57//!     Some(_) => panic!("unexpected conflict"),
58//! }
59//! ```
60//!
61//! `create_branch` registers a new branch and returns an `ExclusiveId` guard.
62//! `pull` creates a new workspace from an existing branch while
63//! `branch_from` can be used to start a new branch from a specific commit
64//! handle. See `examples/workspace.rs` for a more complete example.
65//!
66//! ## Handling conflicts
67//!
68//! The single-attempt primitive is [`Repository::try_push`]. It returns
69//! `Ok(None)` on success or `Ok(Some(conflict_ws))` when the branch advanced
70//! concurrently. Callers that want explicit conflict handling may use this
71//! form:
72//!
73//! ```rust,ignore
74//! while let Some(mut other) = repo.try_push(&mut ws)? {
75//!     // Merge our staged changes into the incoming workspace and retry.
76//!     other.merge(&mut ws)?;
77//!     ws = other;
78//! }
79//! ```
80//!
81//! For convenience `Repository::push` is provided as a retrying wrapper that
82//! performs the merge-and-retry loop for you. Call `push` when you prefer the
83//! repository to handle conflicts automatically; call `try_push` when you need
84//! to inspect or control the intermediate conflict workspace yourself.
85//!
86//! `push` performs a compare‐and‐swap (CAS) update on the branch metadata.
87//! This optimistic concurrency control keeps branches consistent without
88//! locking and can be emulated by many storage systems (for example by
89//! using conditional writes on S3).
90//!
91//! ## Git parallels
92//!
93//! The API deliberately mirrors concepts from Git to make its usage familiar:
94//!
95//! - A `Repository` stores commits and branch metadata similar to a remote.
96//! - `Workspace` is akin to a working directory combined with an index. It
97//!   tracks changes against a branch head until you `push` them.
98//! - `create_branch` and `branch_from` correspond to creating new branches from
99//!   scratch or from a specific commit, respectively.
100//! - `push` updates the repository atomically. If the branch advanced in the
101//!   meantime, you receive a conflict workspace which can be merged before
102//!   retrying the push.
103//! - `pull` is similar to cloning a branch into a new workspace.
104//!
105//! `pull` uses the repository's default signing key for new commits. If you
106//! need to work with a different identity, the `_with_key` variants allow providing
107//! an explicit key when creating branches or pulling workspaces.
108//!
109//! These parallels should help readers leverage their Git knowledge when
110//! working with trible repositories.
111//!
112pub mod branch;
113pub mod commit;
114pub mod hybridstore;
115pub mod memoryrepo;
116pub mod objectstore;
117pub mod pile;
118
119/// Trait for storage backends that require explicit close/cleanup.
120///
121/// Not all storage backends need to implement this; implementations that have
122/// nothing to do on close may return Ok(()) or use `Infallible` as the error
123/// type.
124pub trait StorageClose {
125    /// Error type returned by `close`.
126    type Error: std::error::Error;
127
128    /// Consume the storage and perform any necessary cleanup.
129    fn close(self) -> Result<(), Self::Error>;
130}
131
132// Convenience impl for repositories whose storage supports explicit close.
133impl<Storage> Repository<Storage>
134where
135    Storage: BlobStore<Blake3> + BranchStore<Blake3> + StorageClose,
136{
137    /// Close the repository's underlying storage if it supports explicit
138    /// close operations.
139    ///
140    /// This method is only available when the storage type implements
141    /// [`StorageClose`]. It consumes the repository and delegates to the
142    /// storage's `close` implementation, returning any error produced.
143    pub fn close(self) -> Result<(), <Storage as StorageClose>::Error> {
144        self.storage.close()
145    }
146}
147
148use crate::macros::pattern;
149use std::collections::{HashSet, VecDeque};
150use std::convert::Infallible;
151use std::error::Error;
152use std::fmt::Debug;
153use std::fmt::{self};
154
155use commit::commit_metadata;
156use hifitime::Epoch;
157use itertools::Itertools;
158
159use crate::blob::schemas::simplearchive::UnarchiveError;
160use crate::blob::schemas::UnknownBlob;
161use crate::blob::Blob;
162use crate::blob::BlobSchema;
163use crate::blob::MemoryBlobStore;
164use crate::blob::ToBlob;
165use crate::blob::TryFromBlob;
166use crate::find;
167use crate::id::ufoid;
168use crate::id::Id;
169use crate::metadata;
170use crate::patch::Entry;
171use crate::patch::IdentitySchema;
172use crate::patch::PATCH;
173use crate::prelude::valueschemas::GenId;
174use crate::repo::branch::branch_metadata;
175use crate::trible::TribleSet;
176use crate::value::schemas::hash::Handle;
177use crate::value::schemas::hash::HashProtocol;
178use crate::value::Value;
179use crate::value::ValueSchema;
180use crate::value::VALUE_LEN;
181use ed25519_dalek::SigningKey;
182
183use crate::blob::schemas::longstring::LongString;
184use crate::blob::schemas::simplearchive::SimpleArchive;
185use crate::prelude::*;
186use crate::value::schemas::ed25519 as ed;
187use crate::value::schemas::hash::Blake3;
188use crate::value::schemas::shortstring::ShortString;
189use crate::value::schemas::time::NsTAIInterval;
190
191attributes! {
192    /// The actual data of the commit.
193    "4DD4DDD05CC31734B03ABB4E43188B1F" as pub content: Handle<Blake3, SimpleArchive>;
194    /// A commit that this commit is based on.
195    "317044B612C690000D798CA660ECFD2A" as pub parent: Handle<Blake3, SimpleArchive>;
196    /// A (potentially long) message describing the commit.
197    "B59D147839100B6ED4B165DF76EDF3BB" as pub message: Handle<Blake3, LongString>;
198    /// A short message describing the commit.
199    "12290C0BE0E9207E324F24DDE0D89300" as pub short_message: ShortString;
200    /// The hash of the first commit in the commit chain of the branch.
201    "272FBC56108F336C4D2E17289468C35F" as pub head: Handle<Blake3, SimpleArchive>;
202    /// An id used to track the branch.
203    "8694CC73AF96A5E1C7635C677D1B928A" as pub branch: GenId;
204    /// Timestamp range when this commit was created.
205    "71FF566AB4E3119FC2C5E66A18979586" as pub timestamp: NsTAIInterval;
206    /// The author of the signature identified by their ed25519 public key.
207    "ADB4FFAD247C886848161297EFF5A05B" as pub signed_by: ed::ED25519PublicKey;
208    /// The `r` part of a ed25519 signature.
209    "9DF34F84959928F93A3C40AEB6E9E499" as pub signature_r: ed::ED25519RComponent;
210    /// The `s` part of a ed25519 signature.
211    "1ACE03BF70242B289FDF00E4327C3BC6" as pub signature_s: ed::ED25519SComponent;
212}
213
214/// The `ListBlobs` trait is used to list all blobs in a repository.
215pub trait BlobStoreList<H: HashProtocol> {
216    type Iter<'a>: Iterator<Item = Result<Value<Handle<H, UnknownBlob>>, Self::Err>>
217    where
218        Self: 'a;
219    type Err: Error + Debug + Send + Sync + 'static;
220
221    /// Lists all blobs in the repository.
222    fn blobs<'a>(&'a self) -> Self::Iter<'a>;
223}
224
225/// Metadata about a blob in a repository.
226#[derive(Debug, Clone)]
227pub struct BlobMetadata {
228    /// Timestamp in milliseconds since UNIX epoch when the blob was created/stored.
229    pub timestamp: u64,
230    /// Length of the blob in bytes.
231    pub length: u64,
232}
233
234/// Trait exposing metadata lookup for blobs available in a repository reader.
235pub trait BlobStoreMeta<H: HashProtocol> {
236    /// Error type returned by metadata calls.
237    type MetaError: std::error::Error + Send + Sync + 'static;
238
239    fn metadata<S>(
240        &self,
241        handle: Value<Handle<H, S>>,
242    ) -> Result<Option<BlobMetadata>, Self::MetaError>
243    where
244        S: BlobSchema + 'static,
245        Handle<H, S>: ValueSchema;
246}
247
248/// Trait exposing a monotonic "forget" operation.
249///
250/// Forget is idempotent and monotonic: it removes materialization from a
251/// particular repository but does not semantically delete derived facts.
252pub trait BlobStoreForget<H: HashProtocol> {
253    type ForgetError: std::error::Error + Send + Sync + 'static;
254
255    fn forget<S>(&mut self, handle: Value<Handle<H, S>>) -> Result<(), Self::ForgetError>
256    where
257        S: BlobSchema + 'static,
258        Handle<H, S>: ValueSchema;
259}
260
261/// The `GetBlob` trait is used to retrieve blobs from a repository.
262pub trait BlobStoreGet<H: HashProtocol> {
263    type GetError<E: std::error::Error>: Error;
264
265    /// Retrieves a blob from the repository by its handle.
266    /// The handle is a unique identifier for the blob, and is used to retrieve it from the repository.
267    /// The blob is returned as a `Blob` object, which contains the raw bytes of the blob,
268    /// which can be deserialized via the appropriate schema type, which is specified by the `T` type parameter.
269    ///
270    /// # Errors
271    /// Returns an error if the blob could not be found in the repository.
272    /// The error type is specified by the `Err` associated type.
273    fn get<T, S>(
274        &self,
275        handle: Value<Handle<H, S>>,
276    ) -> Result<T, Self::GetError<<T as TryFromBlob<S>>::Error>>
277    where
278        S: BlobSchema + 'static,
279        T: TryFromBlob<S>,
280        Handle<H, S>: ValueSchema;
281}
282
283/// The `PutBlob` trait is used to store blobs in a repository.
284pub trait BlobStorePut<H: HashProtocol> {
285    type PutError: Error + Debug + Send + Sync + 'static;
286
287    fn put<S, T>(&mut self, item: T) -> Result<Value<Handle<H, S>>, Self::PutError>
288    where
289        S: BlobSchema + 'static,
290        T: ToBlob<S>,
291        Handle<H, S>: ValueSchema;
292}
293
294pub trait BlobStore<H: HashProtocol>: BlobStorePut<H> {
295    type Reader: BlobStoreGet<H> + BlobStoreList<H> + Clone + Send + PartialEq + Eq + 'static;
296    type ReaderError: Error + Debug + Send + Sync + 'static;
297    fn reader(&mut self) -> Result<Self::Reader, Self::ReaderError>;
298}
299
300/// Trait for blob stores that can retain a supplied set of handles.
301pub trait BlobStoreKeep<H: HashProtocol> {
302    /// Retain only the blobs identified by `handles`.
303    fn keep<I>(&mut self, handles: I)
304    where
305        I: IntoIterator<Item = Value<Handle<H, UnknownBlob>>>;
306}
307
308#[derive(Debug)]
309pub enum PushResult<H>
310where
311    H: HashProtocol,
312{
313    Success(),
314    Conflict(Option<Value<Handle<H, SimpleArchive>>>),
315}
316
317pub trait BranchStore<H: HashProtocol> {
318    type BranchesError: Error + Debug + Send + Sync + 'static;
319    type HeadError: Error + Debug + Send + Sync + 'static;
320    type UpdateError: Error + Debug + Send + Sync + 'static;
321
322    type ListIter<'a>: Iterator<Item = Result<Id, Self::BranchesError>>
323    where
324        Self: 'a;
325
326    /// Lists all branches in the repository.
327    /// This function returns a stream of branch ids.
328    fn branches<'a>(&'a mut self) -> Result<Self::ListIter<'a>, Self::BranchesError>;
329
330    // NOTE: keep the API lean — callers may call `branches()` and handle the
331    // fallible iterator directly; we avoid adding an extra helper here.
332
333    /// Retrieves a branch from the repository by its id.
334    /// The id is a unique identifier for the branch, and is used to retrieve it from the repository.
335    ///
336    /// # Errors
337    /// Returns an error if the branch could not be found in the repository.
338    ///
339    /// # Parameters
340    /// * `id` - The id of the branch to retrieve.
341    ///
342    /// # Returns
343    /// * A future that resolves to the handle of the branch.
344    /// * The handle is a unique identifier for the branch, and is used to retrieve it from the repository.
345    fn head(&mut self, id: Id) -> Result<Option<Value<Handle<H, SimpleArchive>>>, Self::HeadError>;
346
347    /// Puts a branch on the repository, creating or updating it.
348    ///
349    /// # Parameters
350    /// * `old` - Expected current value of the branch (None if creating new)
351    /// * `new` - Value to update the branch to
352    ///
353    /// # Returns
354    /// * `Success` - Push completed successfully
355    /// * `Conflict(current)` - Failed because the branch's current value doesn't match `old`
356    ///   (contains the actual current value for conflict resolution)
357    fn update(
358        &mut self,
359        id: Id,
360        old: Option<Value<Handle<H, SimpleArchive>>>,
361        new: Value<Handle<H, SimpleArchive>>,
362    ) -> Result<PushResult<H>, Self::UpdateError>;
363}
364
365#[derive(Debug)]
366pub enum TransferError<ListErr, LoadErr, StoreErr> {
367    List(ListErr),
368    Load(LoadErr),
369    Store(StoreErr),
370}
371
372impl<ListErr, LoadErr, StoreErr> fmt::Display for TransferError<ListErr, LoadErr, StoreErr> {
373    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
374        write!(f, "failed to transfer blob")
375    }
376}
377
378impl<ListErr, LoadErr, StoreErr> Error for TransferError<ListErr, LoadErr, StoreErr>
379where
380    ListErr: Debug + Error + 'static,
381    LoadErr: Debug + Error + 'static,
382    StoreErr: Debug + Error + 'static,
383{
384    fn source(&self) -> Option<&(dyn Error + 'static)> {
385        match self {
386            Self::List(e) => Some(e),
387            Self::Load(e) => Some(e),
388            Self::Store(e) => Some(e),
389        }
390    }
391}
392
393/// Copies the specified blob handles from `source` into `target`.
394pub fn transfer<'a, BS, BT, HS, HT, Handles>(
395    source: &'a BS,
396    target: &'a mut BT,
397    handles: Handles,
398) -> impl Iterator<
399    Item = Result<
400        (
401            Value<Handle<HS, UnknownBlob>>,
402            Value<Handle<HT, UnknownBlob>>,
403        ),
404        TransferError<
405            Infallible,
406            <BS as BlobStoreGet<HS>>::GetError<Infallible>,
407            <BT as BlobStorePut<HT>>::PutError,
408        >,
409    >,
410> + 'a
411where
412    BS: BlobStoreGet<HS> + 'a,
413    BT: BlobStorePut<HT> + 'a,
414    HS: 'static + HashProtocol,
415    HT: 'static + HashProtocol,
416    Handles: IntoIterator<Item = Value<Handle<HS, UnknownBlob>>> + 'a,
417    Handles::IntoIter: 'a,
418{
419    handles.into_iter().map(move |source_handle| {
420        let blob: Blob<UnknownBlob> = source.get(source_handle).map_err(TransferError::Load)?;
421        let target_handle = target.put(blob).map_err(TransferError::Store)?;
422        Ok((source_handle, target_handle))
423    })
424}
425
426/// Iterator that visits every blob handle reachable from a set of roots.
427pub struct ReachableHandles<'a, BS, H>
428where
429    BS: BlobStoreGet<H>,
430    H: 'static + HashProtocol,
431{
432    source: &'a BS,
433    queue: VecDeque<Value<Handle<H, UnknownBlob>>>,
434    visited: HashSet<[u8; VALUE_LEN]>,
435}
436
437impl<'a, BS, H> ReachableHandles<'a, BS, H>
438where
439    BS: BlobStoreGet<H>,
440    H: 'static + HashProtocol,
441{
442    fn new(source: &'a BS, roots: impl IntoIterator<Item = Value<Handle<H, UnknownBlob>>>) -> Self {
443        let mut queue = VecDeque::new();
444        for handle in roots {
445            queue.push_back(handle);
446        }
447
448        Self {
449            source,
450            queue,
451            visited: HashSet::new(),
452        }
453    }
454
455    fn enqueue_from_blob(&mut self, blob: &Blob<UnknownBlob>) {
456        let bytes = blob.bytes.as_ref();
457        let mut offset = 0usize;
458
459        while offset + VALUE_LEN <= bytes.len() {
460            let mut raw = [0u8; VALUE_LEN];
461            raw.copy_from_slice(&bytes[offset..offset + VALUE_LEN]);
462
463            if !self.visited.contains(&raw) {
464                let candidate = Value::<Handle<H, UnknownBlob>>::new(raw);
465                if self
466                    .source
467                    .get::<anybytes::Bytes, UnknownBlob>(candidate)
468                    .is_ok()
469                {
470                    self.queue.push_back(candidate);
471                }
472            }
473
474            offset += VALUE_LEN;
475        }
476    }
477}
478
479impl<'a, BS, H> Iterator for ReachableHandles<'a, BS, H>
480where
481    BS: BlobStoreGet<H>,
482    H: 'static + HashProtocol,
483{
484    type Item = Value<Handle<H, UnknownBlob>>;
485
486    fn next(&mut self) -> Option<Self::Item> {
487        while let Some(handle) = self.queue.pop_front() {
488            let raw = handle.raw;
489
490            if !self.visited.insert(raw) {
491                continue;
492            }
493
494            if let Ok(blob) = self.source.get(handle) {
495                self.enqueue_from_blob(&blob);
496            }
497
498            return Some(handle);
499        }
500
501        None
502    }
503}
504
505/// Create a breadth-first iterator over blob handles reachable from `roots`.
506pub fn reachable<'a, BS, H>(
507    source: &'a BS,
508    roots: impl IntoIterator<Item = Value<Handle<H, UnknownBlob>>>,
509) -> ReachableHandles<'a, BS, H>
510where
511    BS: BlobStoreGet<H>,
512    H: 'static + HashProtocol,
513{
514    ReachableHandles::new(source, roots)
515}
516
517/// Iterate over every 32-byte candidate in the value column of a [`TribleSet`].
518///
519/// This is a conservative conversion used when scanning metadata for potential
520/// blob handles. Each 32-byte chunk is treated as a `Handle<H, UnknownBlob>`.
521/// Callers can feed the resulting iterator into [`BlobStoreKeep::keep`] or other
522/// helpers that accept collections of handles.
523pub fn potential_handles<'a, H>(
524    set: &'a TribleSet,
525) -> impl Iterator<Item = Value<Handle<H, UnknownBlob>>> + 'a
526where
527    H: HashProtocol,
528{
529    set.vae.iter().map(|raw| {
530        let mut value = [0u8; VALUE_LEN];
531        value.copy_from_slice(&raw[0..VALUE_LEN]);
532        Value::<Handle<H, UnknownBlob>>::new(value)
533    })
534}
535
536/// An error that can occur when creating a commit.
537/// This error can be caused by a failure to store the content or metadata blobs.
538#[derive(Debug)]
539pub enum CreateCommitError<BlobErr: Error + Debug + Send + Sync + 'static> {
540    /// Failed to store the content blob.
541    ContentStorageError(BlobErr),
542    /// Failed to store the commit metadata blob.
543    CommitStorageError(BlobErr),
544}
545
546impl<BlobErr: Error + Debug + Send + Sync + 'static> fmt::Display for CreateCommitError<BlobErr> {
547    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
548        match self {
549            CreateCommitError::ContentStorageError(e) => write!(f, "Content storage failed: {e}"),
550            CreateCommitError::CommitStorageError(e) => {
551                write!(f, "Commit metadata storage failed: {e}")
552            }
553        }
554    }
555}
556
557impl<BlobErr: Error + Debug + Send + Sync + 'static> Error for CreateCommitError<BlobErr> {
558    fn source(&self) -> Option<&(dyn Error + 'static)> {
559        match self {
560            CreateCommitError::ContentStorageError(e) => Some(e),
561            CreateCommitError::CommitStorageError(e) => Some(e),
562        }
563    }
564}
565
566#[derive(Debug)]
567pub enum MergeError {
568    /// The merge failed because the workspaces have different base repos.
569    DifferentRepos(),
570}
571
572#[derive(Debug)]
573pub enum PushError<Storage: BranchStore<Blake3> + BlobStore<Blake3>> {
574    /// An error occurred while enumerating the branch storage branches.
575    StorageBranches(Storage::BranchesError),
576    /// An error occurred while creating a blob reader.
577    StorageReader(<Storage as BlobStore<Blake3>>::ReaderError),
578    /// An error occurred while reading metadata blobs.
579    StorageGet(
580        <<Storage as BlobStore<Blake3>>::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>,
581    ),
582    /// An error occurred while transferring blobs to the repository.
583    StoragePut(<Storage as BlobStorePut<Blake3>>::PutError),
584    /// An error occurred while updating the branch storage.
585    BranchUpdate(Storage::UpdateError),
586    /// Malformed branch metadata.
587    BadBranchMetadata(),
588    /// Merge failed while retrying a push.
589    MergeError(MergeError),
590}
591
592// Allow using the `?` operator to convert MergeError into PushError in
593// contexts where PushError is the function error type. This keeps call sites
594// succinct by avoiding manual mapping closures like
595// `.map_err(|e| PushError::MergeError(e))?`.
596impl<Storage> From<MergeError> for PushError<Storage>
597where
598    Storage: BranchStore<Blake3> + BlobStore<Blake3>,
599{
600    fn from(e: MergeError) -> Self {
601        PushError::MergeError(e)
602    }
603}
604
605// Note: we intentionally avoid generic `From` impls for storage-associated
606// error types because they can overlap with other blanket implementations
607// and lead to coherence conflicts. Call sites use explicit mapping via the
608// enum variant constructors (e.g. `map_err(PushError::StoragePut)`) where
609// needed which keeps conversions explicit and stable.
610
611#[derive(Debug)]
612pub enum BranchError<Storage>
613where
614    Storage: BranchStore<Blake3> + BlobStore<Blake3>,
615{
616    /// An error occurred while creating a blob reader.
617    StorageReader(<Storage as BlobStore<Blake3>>::ReaderError),
618    /// An error occurred while reading metadata blobs.
619    StorageGet(
620        <<Storage as BlobStore<Blake3>>::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>,
621    ),
622    /// An error occurred while storing blobs.
623    StoragePut(<Storage as BlobStorePut<Blake3>>::PutError),
624    /// An error occurred while retrieving branch heads.
625    BranchHead(Storage::HeadError),
626    /// An error occurred while updating the branch storage.
627    BranchUpdate(Storage::UpdateError),
628    /// The branch already exists.
629    AlreadyExists(),
630    /// The referenced base branch does not exist.
631    BranchNotFound(Id),
632}
633
634#[derive(Debug)]
635pub enum LookupError<Storage>
636where
637    Storage: BranchStore<Blake3> + BlobStore<Blake3>,
638{
639    StorageBranches(Storage::BranchesError),
640    BranchHead(Storage::HeadError),
641    StorageReader(<Storage as BlobStore<Blake3>>::ReaderError),
642    StorageGet(
643        <<Storage as BlobStore<Blake3>>::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>,
644    ),
645    /// Multiple branches were found with the given name.
646    NameConflict(Vec<Id>),
647    BadBranchMetadata(),
648}
649
650/// High-level wrapper combining a blob store and branch store into a usable
651/// repository API.
652///
653/// The `Repository` type exposes convenience methods for creating branches,
654/// committing data and pushing changes while delegating actual storage to the
655/// given `BlobStore` and `BranchStore` implementations.
656pub struct Repository<Storage: BlobStore<Blake3> + BranchStore<Blake3>> {
657    storage: Storage,
658    signing_key: SigningKey,
659}
660
661pub enum PullError<BranchStorageErr, BlobReaderErr, BlobStorageErr>
662where
663    BranchStorageErr: Error,
664    BlobReaderErr: Error,
665    BlobStorageErr: Error,
666{
667    /// The branch does not exist in the repository.
668    BranchNotFound(Id),
669    /// An error occurred while accessing the branch storage.
670    BranchStorage(BranchStorageErr),
671    /// An error occurred while creating a blob reader.
672    BlobReader(BlobReaderErr),
673    /// An error occurred while accessing the blob storage.
674    BlobStorage(BlobStorageErr),
675    /// The branch metadata is malformed or does not contain the expected fields.
676    BadBranchMetadata(),
677}
678
679impl<B, R, C> fmt::Debug for PullError<B, R, C>
680where
681    B: Error + fmt::Debug,
682    R: Error + fmt::Debug,
683    C: Error + fmt::Debug,
684{
685    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
686        match self {
687            PullError::BranchNotFound(id) => f.debug_tuple("BranchNotFound").field(id).finish(),
688            PullError::BranchStorage(e) => f.debug_tuple("BranchStorage").field(e).finish(),
689            PullError::BlobReader(e) => f.debug_tuple("BlobReader").field(e).finish(),
690            PullError::BlobStorage(e) => f.debug_tuple("BlobStorage").field(e).finish(),
691            PullError::BadBranchMetadata() => f.debug_tuple("BadBranchMetadata").finish(),
692        }
693    }
694}
695
696impl<Storage> Repository<Storage>
697where
698    Storage: BlobStore<Blake3> + BranchStore<Blake3>,
699{
700    /// Creates a new repository with the given blob and branch repositories.
701    /// The blob repository is used to store the actual data of the repository,
702    /// while the branch repository is used to store the state of the repository.
703    /// The hash protocol is used to hash the blobs and branches in the repository.
704    ///
705    /// # Parameters
706    /// * `blobs` - The blob repository to use for storing blobs.
707    /// * `branches` - The branch repository to use for storing branches.
708    /// # Returns
709    /// * A new `Repo` object that can be used to store and retrieve blobs and branches.
710    pub fn new(storage: Storage, signing_key: SigningKey) -> Self {
711        Self {
712            storage,
713            signing_key,
714        }
715    }
716
717    /// Consume the repository and return the underlying storage backend.
718    ///
719    /// This is useful for callers that need to take ownership of the storage
720    /// (for example to call `close()` on a `Pile`) instead of letting the
721    /// repository drop it implicitly.
722    pub fn into_storage(self) -> Storage {
723        self.storage
724    }
725
726    /// Replace the repository signing key.
727    pub fn set_signing_key(&mut self, signing_key: SigningKey) {
728        self.signing_key = signing_key;
729    }
730
731    /// Initializes a new branch in the repository.
732    /// Branches are the only mutable state in the repository,
733    /// and are used to represent the state of a commit chain at a specific point in time.
734    /// A branch must always point to a commit, and this function can be used to create a new branch.
735    ///
736    /// Creates a new branch in the repository.
737    /// This branch is a pointer to a specific commit in the repository.
738    /// The branch is created with name and is initialized to point to the opionally given commit.
739    /// The branch is signed by the branch signing key.
740    ///
741    /// # Parameters
742    /// * `branch_name` - Name of the new branch.
743    /// * `commit` - Commit to initialize the branch from.
744    pub fn create_branch(
745        &mut self,
746        branch_name: &str,
747        commit: Option<CommitHandle>,
748    ) -> Result<ExclusiveId, BranchError<Storage>> {
749        self.create_branch_with_key(branch_name, commit, self.signing_key.clone())
750    }
751
752    /// Same as [`branch_from`] but uses the provided signing key.
753    pub fn create_branch_with_key(
754        &mut self,
755        branch_name: &str,
756        commit: Option<CommitHandle>,
757        signing_key: SigningKey,
758    ) -> Result<ExclusiveId, BranchError<Storage>> {
759        let branch_id = ufoid();
760
761        let branch_set = if let Some(commit) = commit {
762            let reader = self
763                .storage
764                .reader()
765                .map_err(|e| BranchError::StorageReader(e))?;
766            let set: TribleSet = reader.get(commit).map_err(|e| BranchError::StorageGet(e))?;
767
768            branch::branch_metadata(&signing_key, *branch_id, branch_name, Some(set.to_blob()))
769        } else {
770            branch::branch_unsigned(*branch_id, branch_name, None)
771        };
772
773        let branch_blob = branch_set.to_blob();
774        let branch_handle = self
775            .storage
776            .put(branch_blob)
777            .map_err(|e| BranchError::StoragePut(e))?;
778
779        let push_result = self
780            .storage
781            .update(*branch_id, None, branch_handle)
782            .map_err(|e| BranchError::BranchUpdate(e))?;
783
784        match push_result {
785            PushResult::Success() => Ok(branch_id),
786            PushResult::Conflict(_) => Err(BranchError::AlreadyExists()),
787        }
788    }
789
790    /// Pulls an existing branch using the repository's signing key.
791    pub fn pull(
792        &mut self,
793        branch_id: Id,
794    ) -> Result<
795        Workspace<Storage>,
796        PullError<
797            Storage::HeadError,
798            Storage::ReaderError,
799            <Storage::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>,
800        >,
801    > {
802        self.pull_with_key(branch_id, self.signing_key.clone())
803    }
804
805    /// Same as [`pull`] but overrides the signing key.
806    pub fn pull_with_key(
807        &mut self,
808        branch_id: Id,
809        signing_key: SigningKey,
810    ) -> Result<
811        Workspace<Storage>,
812        PullError<
813            Storage::HeadError,
814            Storage::ReaderError,
815            <Storage::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>,
816        >,
817    > {
818        // 1. Get the branch metadata head from the branch store.
819        let base_branch_meta_handle = match self.storage.head(branch_id) {
820            Ok(Some(handle)) => handle,
821            Ok(None) => return Err(PullError::BranchNotFound(branch_id)),
822            Err(e) => return Err(PullError::BranchStorage(e)),
823        };
824        // 2. Get the current commit from the branch metadata.
825        let reader = self.storage.reader().map_err(PullError::BlobReader)?;
826        let base_branch_meta: TribleSet = match reader.get(base_branch_meta_handle) {
827            Ok(metadata) => metadata,
828            Err(e) => return Err(PullError::BlobStorage(e)),
829        };
830
831        let head_ = match find!(
832            (head_: Value<_>),
833            pattern!(&base_branch_meta, [{ head: ?head_ }])
834        )
835        .at_most_one()
836        {
837            Ok(Some((h,))) => Some(h),
838            Ok(None) => None,
839            Err(_) => return Err(PullError::BadBranchMetadata()),
840        };
841        // Create workspace with the current commit and base blobs.
842        let base_blobs = self.storage.reader().map_err(PullError::BlobReader)?;
843        Ok(Workspace {
844            base_blobs,
845            local_blobs: MemoryBlobStore::new(),
846            head: head_,
847            base_head: head_,
848            base_branch_id: branch_id,
849            base_branch_meta: base_branch_meta_handle,
850            signing_key,
851        })
852    }
853
854    /// Pushes the workspace's new blobs and commit to the persistent repository.
855    /// This syncs the local BlobSet with the repository's BlobStore and performs
856    /// an atomic branch update (using the stored base_branch_meta).
857    pub fn push(&mut self, workspace: &mut Workspace<Storage>) -> Result<(), PushError<Storage>> {
858        // Retrying push: attempt a single push and, on conflict, merge the
859        // local workspace into the returned conflict workspace and retry.
860        // This implements the common push-merge-retry loop as a convenience
861        // wrapper around `try_push`.
862        while let Some(mut conflict_ws) = self.try_push(workspace)? {
863            // Keep the previous merge order: merge the caller's staged
864            // changes into the incoming conflict workspace. This preserves
865            // the semantic ordering of parents used in the merge commit.
866            conflict_ws.merge(workspace)?;
867
868            // Move the merged incoming workspace into the caller's workspace
869            // so the next try_push operates against the fresh branch state.
870            // Using assignment here is equivalent to `swap` but avoids
871            // retaining the previous `workspace` contents in the temp var.
872            *workspace = conflict_ws;
873        }
874
875        Ok(())
876    }
877
878    /// Single-attempt push: upload local blobs and try to update the branch
879    /// head once. Returns `Ok(None)` on success, or `Ok(Some(conflict_ws))`
880    /// when the branch was updated concurrently and the caller should merge.
881    pub fn try_push(
882        &mut self,
883        workspace: &mut Workspace<Storage>,
884    ) -> Result<Option<Workspace<Storage>>, PushError<Storage>> {
885        // 1. Sync `workspace.local_blobs` to repository's BlobStore.
886        let workspace_reader = workspace.local_blobs.reader().unwrap();
887        for handle in workspace_reader.blobs() {
888            let handle = handle.expect("infallible blob enumeration");
889            let blob: Blob<UnknownBlob> =
890                workspace_reader.get(handle).expect("infallible blob read");
891            self.storage.put(blob).map_err(PushError::StoragePut)?;
892        }
893
894        // 1.5 If the workspace's head did not change since the workspace was
895        // created, there's no commit to reference and therefore no branch
896        // metadata update is required. This avoids touching the branch store
897        // in the common case where only blobs were staged or nothing changed.
898        if workspace.base_head == workspace.head {
899            return Ok(None);
900        }
901
902        // 2. Create a new branch meta blob referencing the new workspace head.
903        let repo_reader = self.storage.reader().map_err(PushError::StorageReader)?;
904        let base_branch_meta: TribleSet = repo_reader
905            .get(workspace.base_branch_meta)
906            .map_err(PushError::StorageGet)?;
907
908        let Ok((branch_name,)) = find!((name: Value<_>),
909            pattern!(base_branch_meta, [{ metadata::shortname: ?name }])
910        )
911        .exactly_one() else {
912            return Err(PushError::BadBranchMetadata());
913        };
914
915        let head_handle = workspace.head.ok_or(PushError::BadBranchMetadata())?;
916        let head_: TribleSet = repo_reader
917            .get(head_handle)
918            .map_err(PushError::StorageGet)?;
919
920        let branch_meta = branch_metadata(
921            &workspace.signing_key,
922            workspace.base_branch_id,
923            branch_name.from_value(),
924            Some(head_.to_blob()),
925        );
926
927        let branch_meta_handle = self
928            .storage
929            .put(branch_meta)
930            .map_err(PushError::StoragePut)?;
931
932        // 3. Use CAS (comparing against workspace.base_branch_meta) to update the branch pointer.
933        let result = self
934            .storage
935            .update(
936                workspace.base_branch_id,
937                Some(workspace.base_branch_meta),
938                branch_meta_handle,
939            )
940            .map_err(PushError::BranchUpdate)?;
941
942        match result {
943            PushResult::Success() => {
944                // Update workspace base pointers so subsequent pushes can detect
945                // that the workspace is already synchronized and avoid re-upload.
946                workspace.base_branch_meta = branch_meta_handle;
947                workspace.base_head = workspace.head;
948                // Refresh the workspace base blob reader to ensure newly
949                // uploaded blobs are visible to subsequent checkout operations.
950                workspace.base_blobs = self.storage.reader().map_err(PushError::StorageReader)?;
951                // Clear staged local blobs now that they have been uploaded and
952                // the branch metadata updated. This frees memory and prevents
953                // repeated uploads of the same staged blobs on subsequent pushes.
954                workspace.local_blobs = MemoryBlobStore::new();
955                Ok(None)
956            }
957            PushResult::Conflict(conflicting_meta) => {
958                let conflicting_meta = conflicting_meta.ok_or(PushError::BadBranchMetadata())?;
959
960                let repo_reader = self.storage.reader().map_err(PushError::StorageReader)?;
961                let branch_meta: TribleSet = repo_reader
962                    .get(conflicting_meta)
963                    .map_err(PushError::StorageGet)?;
964
965                let head_ = match find!((head_: Value<_>),
966                    pattern!(&branch_meta, [{ head: ?head_ }])
967                )
968                .at_most_one()
969                {
970                    Ok(Some((h,))) => Some(h),
971                    Ok(None) => None,
972                    Err(_) => return Err(PushError::BadBranchMetadata()),
973                };
974
975                let conflict_ws = Workspace {
976                    base_blobs: self.storage.reader().map_err(PushError::StorageReader)?,
977                    local_blobs: MemoryBlobStore::new(),
978                    head: head_,
979                    base_head: head_,
980                    base_branch_id: workspace.base_branch_id,
981                    base_branch_meta: conflicting_meta,
982                    signing_key: workspace.signing_key.clone(),
983                };
984
985                Ok(Some(conflict_ws))
986            }
987        }
988    }
989}
990
991type CommitHandle = Value<Handle<Blake3, SimpleArchive>>;
992type CommitSet = PATCH<VALUE_LEN, IdentitySchema, ()>;
993type BranchMetaHandle = Value<Handle<Blake3, SimpleArchive>>;
994
995/// The Workspace represents the mutable working area or "staging" state.
996/// It was formerly known as `Head`. It is sent to worker threads,
997/// modified (via commits, merges, etc.), and then merged back into the Repository.
998pub struct Workspace<Blobs: BlobStore<Blake3>> {
999    /// A local BlobStore that holds any new blobs (commits, trees, deltas) before they are synced.
1000    local_blobs: MemoryBlobStore<Blake3>,
1001    /// The blob storage base for the workspace.
1002    base_blobs: Blobs::Reader,
1003    /// The branch id this workspace is tracking; None for a detached workspace.
1004    base_branch_id: Id,
1005    /// The meta-handle corresponding to the base branch state used for CAS.
1006    base_branch_meta: BranchMetaHandle,
1007    /// Handle to the current commit in the working branch. `None` for an empty branch.
1008    head: Option<CommitHandle>,
1009    /// The branch head snapshot when this workspace was created (pull time).
1010    ///
1011    /// This allows `try_push` to cheaply detect whether the commit head has
1012    /// advanced since the workspace was created without querying the remote
1013    /// branch store.
1014    base_head: Option<CommitHandle>,
1015    /// Signing key used for commit/branch signing.
1016    signing_key: SigningKey,
1017}
1018
1019impl<Blobs> fmt::Debug for Workspace<Blobs>
1020where
1021    Blobs: BlobStore<Blake3>,
1022    Blobs::Reader: fmt::Debug,
1023{
1024    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1025        f.debug_struct("Workspace")
1026            .field("local_blobs", &self.local_blobs)
1027            .field("base_blobs", &self.base_blobs)
1028            .field("base_branch_id", &self.base_branch_id)
1029            .field("base_branch_meta", &self.base_branch_meta)
1030            .field("base_head", &self.base_head)
1031            .field("head", &self.head)
1032            .finish()
1033    }
1034}
1035
1036/// Helper trait for [`Workspace::checkout`] specifying commit handles or ranges.
1037pub trait CommitSelector<Blobs: BlobStore<Blake3>> {
1038    fn select(
1039        self,
1040        ws: &mut Workspace<Blobs>,
1041    ) -> Result<
1042        CommitSet,
1043        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1044    >;
1045}
1046
1047/// Selector that returns a commit along with all of its ancestors.
1048pub struct Ancestors(pub CommitHandle);
1049
1050/// Convenience function to create an [`Ancestors`] selector.
1051pub fn ancestors(commit: CommitHandle) -> Ancestors {
1052    Ancestors(commit)
1053}
1054
1055/// Selector that returns the Nth ancestor along the first-parent chain.
1056pub struct NthAncestor(pub CommitHandle, pub usize);
1057
1058/// Convenience function to create an [`NthAncestor`] selector.
1059pub fn nth_ancestor(commit: CommitHandle, n: usize) -> NthAncestor {
1060    NthAncestor(commit, n)
1061}
1062
1063/// Selector that returns the direct parents of a commit.
1064pub struct Parents(pub CommitHandle);
1065
1066/// Convenience function to create a [`Parents`] selector.
1067pub fn parents(commit: CommitHandle) -> Parents {
1068    Parents(commit)
1069}
1070
1071/// Selector that returns commits reachable from either of two commits but not
1072/// both.
1073pub struct SymmetricDiff(pub CommitHandle, pub CommitHandle);
1074
1075/// Convenience function to create a [`SymmetricDiff`] selector.
1076pub fn symmetric_diff(a: CommitHandle, b: CommitHandle) -> SymmetricDiff {
1077    SymmetricDiff(a, b)
1078}
1079
1080/// Selector that returns the union of commits returned by two selectors.
1081pub struct Union<A, B> {
1082    left: A,
1083    right: B,
1084}
1085
1086/// Convenience function to create a [`Union`] selector.
1087pub fn union<A, B>(left: A, right: B) -> Union<A, B> {
1088    Union { left, right }
1089}
1090
1091/// Selector that returns the intersection of commits returned by two selectors.
1092pub struct Intersect<A, B> {
1093    left: A,
1094    right: B,
1095}
1096
1097/// Convenience function to create an [`Intersect`] selector.
1098pub fn intersect<A, B>(left: A, right: B) -> Intersect<A, B> {
1099    Intersect { left, right }
1100}
1101
1102/// Selector that returns commits from the left selector that are not also
1103/// returned by the right selector.
1104pub struct Difference<A, B> {
1105    left: A,
1106    right: B,
1107}
1108
1109/// Convenience function to create a [`Difference`] selector.
1110pub fn difference<A, B>(left: A, right: B) -> Difference<A, B> {
1111    Difference { left, right }
1112}
1113
1114/// Selector that returns commits with timestamps in the given inclusive range.
1115pub struct TimeRange(pub Epoch, pub Epoch);
1116
1117/// Convenience function to create a [`TimeRange`] selector.
1118pub fn time_range(start: Epoch, end: Epoch) -> TimeRange {
1119    TimeRange(start, end)
1120}
1121
1122/// Selector that filters commits returned by another selector.
1123pub struct Filter<S, F> {
1124    selector: S,
1125    filter: F,
1126}
1127
1128/// Convenience function to create a [`Filter`] selector.
1129pub fn filter<S, F>(selector: S, filter: F) -> Filter<S, F> {
1130    Filter { selector, filter }
1131}
1132
1133impl<Blobs> CommitSelector<Blobs> for CommitHandle
1134where
1135    Blobs: BlobStore<Blake3>,
1136{
1137    fn select(
1138        self,
1139        _ws: &mut Workspace<Blobs>,
1140    ) -> Result<
1141        CommitSet,
1142        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1143    > {
1144        let mut patch = CommitSet::new();
1145        patch.insert(&Entry::new(&self.raw));
1146        Ok(patch)
1147    }
1148}
1149
1150impl<Blobs> CommitSelector<Blobs> for Vec<CommitHandle>
1151where
1152    Blobs: BlobStore<Blake3>,
1153{
1154    fn select(
1155        self,
1156        _ws: &mut Workspace<Blobs>,
1157    ) -> Result<
1158        CommitSet,
1159        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1160    > {
1161        let mut patch = CommitSet::new();
1162        for handle in self {
1163            patch.insert(&Entry::new(&handle.raw));
1164        }
1165        Ok(patch)
1166    }
1167}
1168
1169impl<Blobs> CommitSelector<Blobs> for &[CommitHandle]
1170where
1171    Blobs: BlobStore<Blake3>,
1172{
1173    fn select(
1174        self,
1175        _ws: &mut Workspace<Blobs>,
1176    ) -> Result<
1177        CommitSet,
1178        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1179    > {
1180        let mut patch = CommitSet::new();
1181        for handle in self {
1182            patch.insert(&Entry::new(&handle.raw));
1183        }
1184        Ok(patch)
1185    }
1186}
1187
1188impl<Blobs> CommitSelector<Blobs> for Option<CommitHandle>
1189where
1190    Blobs: BlobStore<Blake3>,
1191{
1192    fn select(
1193        self,
1194        _ws: &mut Workspace<Blobs>,
1195    ) -> Result<
1196        CommitSet,
1197        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1198    > {
1199        let mut patch = CommitSet::new();
1200        if let Some(handle) = self {
1201            patch.insert(&Entry::new(&handle.raw));
1202        }
1203        Ok(patch)
1204    }
1205}
1206
1207impl<Blobs> CommitSelector<Blobs> for Ancestors
1208where
1209    Blobs: BlobStore<Blake3>,
1210{
1211    fn select(
1212        self,
1213        ws: &mut Workspace<Blobs>,
1214    ) -> Result<
1215        CommitSet,
1216        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1217    > {
1218        collect_reachable(ws, self.0)
1219    }
1220}
1221
1222impl<Blobs> CommitSelector<Blobs> for NthAncestor
1223where
1224    Blobs: BlobStore<Blake3>,
1225{
1226    fn select(
1227        self,
1228        ws: &mut Workspace<Blobs>,
1229    ) -> Result<
1230        CommitSet,
1231        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1232    > {
1233        let mut current = self.0;
1234        let mut remaining = self.1;
1235
1236        while remaining > 0 {
1237            let meta: TribleSet = ws.get(current).map_err(WorkspaceCheckoutError::Storage)?;
1238            let mut parents = find!((p: Value<_>), pattern!(&meta, [{ parent: ?p }]));
1239            let Some((p,)) = parents.next() else {
1240                return Ok(CommitSet::new());
1241            };
1242            current = p;
1243            remaining -= 1;
1244        }
1245
1246        let mut patch = CommitSet::new();
1247        patch.insert(&Entry::new(&current.raw));
1248        Ok(patch)
1249    }
1250}
1251
1252impl<Blobs> CommitSelector<Blobs> for Parents
1253where
1254    Blobs: BlobStore<Blake3>,
1255{
1256    fn select(
1257        self,
1258        ws: &mut Workspace<Blobs>,
1259    ) -> Result<
1260        CommitSet,
1261        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1262    > {
1263        let meta: TribleSet = ws.get(self.0).map_err(WorkspaceCheckoutError::Storage)?;
1264        let mut result = CommitSet::new();
1265        for (p,) in find!((p: Value<_>), pattern!(&meta, [{ parent: ?p }])) {
1266            result.insert(&Entry::new(&p.raw));
1267        }
1268        Ok(result)
1269    }
1270}
1271
1272impl<Blobs> CommitSelector<Blobs> for SymmetricDiff
1273where
1274    Blobs: BlobStore<Blake3>,
1275{
1276    fn select(
1277        self,
1278        ws: &mut Workspace<Blobs>,
1279    ) -> Result<
1280        CommitSet,
1281        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1282    > {
1283        let a = collect_reachable(ws, self.0)?;
1284        let b = collect_reachable(ws, self.1)?;
1285        let inter = a.intersect(&b);
1286        let mut union = a;
1287        union.union(b);
1288        Ok(union.difference(&inter))
1289    }
1290}
1291
1292impl<A, B, Blobs> CommitSelector<Blobs> for Union<A, B>
1293where
1294    A: CommitSelector<Blobs>,
1295    B: CommitSelector<Blobs>,
1296    Blobs: BlobStore<Blake3>,
1297{
1298    fn select(
1299        self,
1300        ws: &mut Workspace<Blobs>,
1301    ) -> Result<
1302        CommitSet,
1303        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1304    > {
1305        let mut left = self.left.select(ws)?;
1306        let right = self.right.select(ws)?;
1307        left.union(right);
1308        Ok(left)
1309    }
1310}
1311
1312impl<A, B, Blobs> CommitSelector<Blobs> for Intersect<A, B>
1313where
1314    A: CommitSelector<Blobs>,
1315    B: CommitSelector<Blobs>,
1316    Blobs: BlobStore<Blake3>,
1317{
1318    fn select(
1319        self,
1320        ws: &mut Workspace<Blobs>,
1321    ) -> Result<
1322        CommitSet,
1323        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1324    > {
1325        let left = self.left.select(ws)?;
1326        let right = self.right.select(ws)?;
1327        Ok(left.intersect(&right))
1328    }
1329}
1330
1331impl<A, B, Blobs> CommitSelector<Blobs> for Difference<A, B>
1332where
1333    A: CommitSelector<Blobs>,
1334    B: CommitSelector<Blobs>,
1335    Blobs: BlobStore<Blake3>,
1336{
1337    fn select(
1338        self,
1339        ws: &mut Workspace<Blobs>,
1340    ) -> Result<
1341        CommitSet,
1342        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1343    > {
1344        let left = self.left.select(ws)?;
1345        let right = self.right.select(ws)?;
1346        Ok(left.difference(&right))
1347    }
1348}
1349
1350impl<S, F, Blobs> CommitSelector<Blobs> for Filter<S, F>
1351where
1352    Blobs: BlobStore<Blake3>,
1353    S: CommitSelector<Blobs>,
1354    F: for<'x, 'y> Fn(&'x TribleSet, &'y TribleSet) -> bool,
1355{
1356    fn select(
1357        self,
1358        ws: &mut Workspace<Blobs>,
1359    ) -> Result<
1360        CommitSet,
1361        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1362    > {
1363        let patch = self.selector.select(ws)?;
1364        let mut result = CommitSet::new();
1365        let filter = self.filter;
1366        for raw in patch.iter() {
1367            let handle = Value::new(*raw);
1368            let meta: TribleSet = ws.get(handle).map_err(WorkspaceCheckoutError::Storage)?;
1369
1370            let Ok((content_handle,)) = find!(
1371                (c: Value<_>),
1372                pattern!(&meta, [{ content: ?c }])
1373            )
1374            .exactly_one() else {
1375                return Err(WorkspaceCheckoutError::BadCommitMetadata());
1376            };
1377
1378            let payload: TribleSet = ws
1379                .get(content_handle)
1380                .map_err(WorkspaceCheckoutError::Storage)?;
1381
1382            if filter(&meta, &payload) {
1383                result.insert(&Entry::new(raw));
1384            }
1385        }
1386        Ok(result)
1387    }
1388}
1389
1390/// Selector that yields commits touching a specific entity.
1391pub struct HistoryOf(pub Id);
1392
1393/// Convenience function to create a [`HistoryOf`] selector.
1394pub fn history_of(entity: Id) -> HistoryOf {
1395    HistoryOf(entity)
1396}
1397
1398impl<Blobs> CommitSelector<Blobs> for HistoryOf
1399where
1400    Blobs: BlobStore<Blake3>,
1401{
1402    fn select(
1403        self,
1404        ws: &mut Workspace<Blobs>,
1405    ) -> Result<
1406        CommitSet,
1407        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1408    > {
1409        let head_ = ws.head.ok_or(WorkspaceCheckoutError::NoHead)?;
1410        let entity = self.0;
1411        filter(
1412            ancestors(head_),
1413            move |_: &TribleSet, payload: &TribleSet| payload.iter().any(|t| t.e() == &entity),
1414        )
1415        .select(ws)
1416    }
1417}
1418
1419// Generic range selectors: allow any selector type to be used as a range
1420// endpoint. We still walk the history reachable from the end selector but now
1421// stop descending a branch as soon as we encounter a commit produced by the
1422// start selector. This keeps the mechanics explicit—`start..end` literally
1423// walks from `end` until it hits `start`—while continuing to support selectors
1424// such as `Ancestors(...)` at either boundary.
1425
1426fn collect_reachable_from_patch<Blobs: BlobStore<Blake3>>(
1427    ws: &mut Workspace<Blobs>,
1428    patch: CommitSet,
1429) -> Result<
1430    CommitSet,
1431    WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1432> {
1433    let mut result = CommitSet::new();
1434    for raw in patch.iter() {
1435        let handle = Value::new(*raw);
1436        let reach = collect_reachable(ws, handle)?;
1437        result.union(reach);
1438    }
1439    Ok(result)
1440}
1441
1442fn collect_reachable_from_patch_until<Blobs: BlobStore<Blake3>>(
1443    ws: &mut Workspace<Blobs>,
1444    seeds: CommitSet,
1445    stop: &CommitSet,
1446) -> Result<
1447    CommitSet,
1448    WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1449> {
1450    let mut visited = HashSet::new();
1451    let mut stack: Vec<CommitHandle> = seeds.iter().map(|raw| Value::new(*raw)).collect();
1452    let mut result = CommitSet::new();
1453
1454    while let Some(commit) = stack.pop() {
1455        if !visited.insert(commit) {
1456            continue;
1457        }
1458
1459        if stop.get(&commit.raw).is_some() {
1460            continue;
1461        }
1462
1463        result.insert(&Entry::new(&commit.raw));
1464
1465        let meta: TribleSet = ws
1466            .local_blobs
1467            .reader()
1468            .unwrap()
1469            .get(commit)
1470            .or_else(|_| ws.base_blobs.get(commit))
1471            .map_err(WorkspaceCheckoutError::Storage)?;
1472
1473        for (p,) in find!((p: Value<_>,), pattern!(&meta, [{ parent: ?p }])) {
1474            stack.push(p);
1475        }
1476    }
1477
1478    Ok(result)
1479}
1480
1481impl<T, Blobs> CommitSelector<Blobs> for std::ops::Range<T>
1482where
1483    T: CommitSelector<Blobs>,
1484    Blobs: BlobStore<Blake3>,
1485{
1486    fn select(
1487        self,
1488        ws: &mut Workspace<Blobs>,
1489    ) -> Result<
1490        CommitSet,
1491        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1492    > {
1493        let end_patch = self.end.select(ws)?;
1494        let start_patch = self.start.select(ws)?;
1495
1496        collect_reachable_from_patch_until(ws, end_patch, &start_patch)
1497    }
1498}
1499
1500impl<T, Blobs> CommitSelector<Blobs> for std::ops::RangeFrom<T>
1501where
1502    T: CommitSelector<Blobs>,
1503    Blobs: BlobStore<Blake3>,
1504{
1505    fn select(
1506        self,
1507        ws: &mut Workspace<Blobs>,
1508    ) -> Result<
1509        CommitSet,
1510        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1511    > {
1512        let head_ = ws.head.ok_or(WorkspaceCheckoutError::NoHead)?;
1513        let exclude_patch = self.start.select(ws)?;
1514
1515        let mut head_patch = CommitSet::new();
1516        head_patch.insert(&Entry::new(&head_.raw));
1517
1518        collect_reachable_from_patch_until(ws, head_patch, &exclude_patch)
1519    }
1520}
1521
1522impl<T, Blobs> CommitSelector<Blobs> for std::ops::RangeTo<T>
1523where
1524    T: CommitSelector<Blobs>,
1525    Blobs: BlobStore<Blake3>,
1526{
1527    fn select(
1528        self,
1529        ws: &mut Workspace<Blobs>,
1530    ) -> Result<
1531        CommitSet,
1532        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1533    > {
1534        let end_patch = self.end.select(ws)?;
1535        collect_reachable_from_patch(ws, end_patch)
1536    }
1537}
1538
1539impl<Blobs> CommitSelector<Blobs> for std::ops::RangeFull
1540where
1541    Blobs: BlobStore<Blake3>,
1542{
1543    fn select(
1544        self,
1545        ws: &mut Workspace<Blobs>,
1546    ) -> Result<
1547        CommitSet,
1548        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1549    > {
1550        let head_ = ws.head.ok_or(WorkspaceCheckoutError::NoHead)?;
1551        collect_reachable(ws, head_)
1552    }
1553}
1554
1555impl<Blobs> CommitSelector<Blobs> for TimeRange
1556where
1557    Blobs: BlobStore<Blake3>,
1558{
1559    fn select(
1560        self,
1561        ws: &mut Workspace<Blobs>,
1562    ) -> Result<
1563        CommitSet,
1564        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1565    > {
1566        let head_ = ws.head.ok_or(WorkspaceCheckoutError::NoHead)?;
1567        let start = self.0;
1568        let end = self.1;
1569        filter(
1570            ancestors(head_),
1571            move |meta: &TribleSet, _payload: &TribleSet| {
1572                if let Ok(Some((ts,))) =
1573                    find!((t: Value<_>), pattern!(meta, [{ timestamp: ?t }])).at_most_one()
1574                {
1575                    let (ts_start, ts_end): (Epoch, Epoch) =
1576                        crate::value::FromValue::from_value(&ts);
1577                    ts_start <= end && ts_end >= start
1578                } else {
1579                    false
1580                }
1581            },
1582        )
1583        .select(ws)
1584    }
1585}
1586
1587impl<Blobs: BlobStore<Blake3>> Workspace<Blobs> {
1588    /// Returns the branch id associated with this workspace.
1589    pub fn branch_id(&self) -> Id {
1590        self.base_branch_id
1591    }
1592
1593    /// Returns the current commit handle if one exists.
1594    pub fn head(&self) -> Option<CommitHandle> {
1595        self.head
1596    }
1597
1598    /// Adds a blob to the workspace's local blob store.
1599    /// Mirrors [`BlobStorePut::put`](crate::repo::BlobStorePut) for ease of use.
1600    pub fn put<S, T>(&mut self, item: T) -> Value<Handle<Blake3, S>>
1601    where
1602        S: BlobSchema + 'static,
1603        T: ToBlob<S>,
1604        Handle<Blake3, S>: ValueSchema,
1605    {
1606        self.local_blobs.put(item).expect("infallible blob put")
1607    }
1608
1609    /// Retrieves a blob from the workspace.
1610    ///
1611    /// The method first checks the workspace's local blob store and falls back
1612    /// to the base blob store if the blob is not found locally.
1613    pub fn get<T, S>(
1614        &mut self,
1615        handle: Value<Handle<Blake3, S>>,
1616    ) -> Result<T, <Blobs::Reader as BlobStoreGet<Blake3>>::GetError<<T as TryFromBlob<S>>::Error>>
1617    where
1618        S: BlobSchema + 'static,
1619        T: TryFromBlob<S>,
1620        Handle<Blake3, S>: ValueSchema,
1621    {
1622        self.local_blobs
1623            .reader()
1624            .unwrap()
1625            .get(handle)
1626            .or_else(|_| self.base_blobs.get(handle))
1627    }
1628
1629    /// Performs a commit in the workspace.
1630    /// This method creates a new commit blob (stored in the local blobset)
1631    /// and updates the current commit handle.
1632    pub fn commit(&mut self, content_: TribleSet, message_: Option<&str>) {
1633        // 1. Create a commit blob from the current head, content and the commit message (if any).
1634        let content_blob = content_.to_blob();
1635        // If a message is provided, store it as a LongString blob and pass the handle.
1636        let message_handle = message_.map(|m| self.put::<LongString, String>(m.to_string()));
1637        let parents = self.head.iter().copied();
1638
1639        let commit_set = crate::repo::commit::commit_metadata(
1640            &self.signing_key,
1641            parents,
1642            message_handle,
1643            Some(content_blob.clone()),
1644        );
1645        // 2. Store the content and commit blobs in `self.local_blobs`.
1646        let _ = self
1647            .local_blobs
1648            .put(content_blob)
1649            .expect("failed to put content blob");
1650        let commit_handle = self
1651            .local_blobs
1652            .put(commit_set)
1653            .expect("failed to put commit blob");
1654        // 3. Update `self.head` to point to the new commit.
1655        self.head = Some(commit_handle);
1656    }
1657
1658    /// Merge another workspace (or its commit state) into this one.
1659    ///
1660    /// Notes on semantics
1661    /// - This operation will copy the *staged* blobs created in `other`
1662    ///   (i.e., `other.local_blobs`) into `self.local_blobs`, then create a
1663    ///   merge commit whose parents are `self.head` and `other.head`.
1664    /// - The merge does *not* automatically import the entire base history
1665    ///   reachable from `other`'s head. If the incoming parent commits
1666    ///   reference blobs that do not exist in this repository's storage,
1667    ///   reading those commits later will fail until the missing blobs are
1668    ///   explicitly imported (for example via `repo::transfer(reachable(...))`).
1669    /// - This design keeps merge permissive and leaves cross-repository blob
1670    ///   import as an explicit user action.
1671    pub fn merge(&mut self, other: &mut Workspace<Blobs>) -> Result<CommitHandle, MergeError> {
1672        // 1. Transfer all blobs from the other workspace to self.local_blobs.
1673        let other_local = other.local_blobs.reader().unwrap();
1674        for r in other_local.blobs() {
1675            let handle = r.expect("infallible blob enumeration");
1676            let blob: Blob<UnknownBlob> = other_local.get(handle).expect("infallible blob read");
1677
1678            // Store the blob in the local workspace's blob store.
1679            self.local_blobs.put(blob).expect("infallible blob put");
1680        }
1681        // 2. Compute a merge commit from self.current_commit and other.current_commit.
1682        let parents = self.head.iter().copied().chain(other.head.iter().copied());
1683        let merge_commit = commit_metadata(
1684            &self.signing_key,
1685            parents,
1686            None, // No message for the merge commit
1687            None, // No content blob for the merge commit
1688        );
1689        // 3. Store the merge commit in self.local_blobs.
1690        let commit_handle = self
1691            .local_blobs
1692            .put(merge_commit)
1693            .expect("failed to put merge commit blob");
1694        self.head = Some(commit_handle);
1695
1696        Ok(commit_handle)
1697    }
1698
1699    /// Create a merge commit that ties this workspace's current head and an
1700    /// arbitrary other commit (already present in the underlying blob store)
1701    /// together without requiring another `Workspace` instance.
1702    ///
1703    /// This does not attach any content to the merge commit.
1704    pub fn merge_commit(
1705        &mut self,
1706        other: Value<Handle<Blake3, SimpleArchive>>,
1707    ) -> Result<CommitHandle, MergeError> {
1708        // Validate that `other` can be loaded from either local or base blobs.
1709        // If it cannot be loaded we still proceed with the merge; dereference
1710        // failures will surface later when reading history. Callers should
1711        // ensure the reachable blobs were imported beforehand (e.g. by
1712        // combining `reachable` with `transfer`).
1713
1714        let parents = self.head.iter().copied().chain(Some(other));
1715        let merge_commit = commit_metadata(&self.signing_key, parents, None, None);
1716        let commit_handle = self
1717            .local_blobs
1718            .put(merge_commit)
1719            .expect("failed to put merge commit blob");
1720        self.head = Some(commit_handle);
1721        Ok(commit_handle)
1722    }
1723
1724    /// Returns the combined [`TribleSet`] for the specified commits.
1725    ///
1726    /// Each commit handle must reference a commit blob stored either in the
1727    /// workspace's local blob store or the repository's base store. The
1728    /// associated content blobs are loaded and unioned together. An error is
1729    /// returned if any commit or content blob is missing or malformed.
1730    fn checkout_commits<I>(
1731        &mut self,
1732        commits: I,
1733    ) -> Result<
1734        TribleSet,
1735        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1736    >
1737    where
1738        I: IntoIterator<Item = CommitHandle>,
1739    {
1740        let local = self.local_blobs.reader().unwrap();
1741        let mut result = TribleSet::new();
1742        for commit in commits {
1743            let meta: TribleSet = local
1744                .get(commit)
1745                .or_else(|_| self.base_blobs.get(commit))
1746                .map_err(WorkspaceCheckoutError::Storage)?;
1747
1748            // Some commits (for example merge commits) intentionally do not
1749            // carry a content blob. Treat those as no-ops during checkout so
1750            // callers can request ancestor ranges without failing when a
1751            // merge commit is encountered.
1752            let content_opt =
1753                match find!((c: Value<_>), pattern!(&meta, [{ content: ?c }])).at_most_one() {
1754                    Ok(Some((c,))) => Some(c),
1755                    Ok(None) => None,
1756                    Err(_) => return Err(WorkspaceCheckoutError::BadCommitMetadata()),
1757                };
1758
1759            if let Some(c) = content_opt {
1760                let set: TribleSet = local
1761                    .get(c)
1762                    .or_else(|_| self.base_blobs.get(c))
1763                    .map_err(WorkspaceCheckoutError::Storage)?;
1764                result.union(set);
1765            } else {
1766                // No content for this commit (e.g. merge-only commit); skip it.
1767                continue;
1768            }
1769        }
1770        Ok(result)
1771    }
1772
1773    /// Returns the combined [`TribleSet`] for the specified commits or commit
1774    /// ranges. `spec` can be a single [`CommitHandle`], an iterator of handles
1775    /// or any of the standard range types over `CommitHandle`.
1776    pub fn checkout<R>(
1777        &mut self,
1778        spec: R,
1779    ) -> Result<
1780        TribleSet,
1781        WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1782    >
1783    where
1784        R: CommitSelector<Blobs>,
1785    {
1786        let patch = spec.select(self)?;
1787        let commits = patch.iter().map(|raw| Value::new(*raw));
1788        self.checkout_commits(commits)
1789    }
1790}
1791
1792#[derive(Debug)]
1793pub enum WorkspaceCheckoutError<GetErr: Error> {
1794    /// Error retrieving blobs from storage.
1795    Storage(GetErr),
1796    /// Commit metadata is malformed or missing required fields.
1797    BadCommitMetadata(),
1798    /// The workspace has no commits yet.
1799    NoHead,
1800}
1801
1802impl<E: Error + fmt::Debug> fmt::Display for WorkspaceCheckoutError<E> {
1803    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1804        match self {
1805            WorkspaceCheckoutError::Storage(e) => write!(f, "storage error: {e}"),
1806            WorkspaceCheckoutError::BadCommitMetadata() => {
1807                write!(f, "commit metadata missing content field")
1808            }
1809            WorkspaceCheckoutError::NoHead => write!(f, "workspace has no commits"),
1810        }
1811    }
1812}
1813
1814impl<E: Error + fmt::Debug> Error for WorkspaceCheckoutError<E> {}
1815
1816fn collect_reachable<Blobs: BlobStore<Blake3>>(
1817    ws: &mut Workspace<Blobs>,
1818    from: CommitHandle,
1819) -> Result<
1820    CommitSet,
1821    WorkspaceCheckoutError<<Blobs::Reader as BlobStoreGet<Blake3>>::GetError<UnarchiveError>>,
1822> {
1823    let mut visited = HashSet::new();
1824    let mut stack = vec![from];
1825    let mut result = CommitSet::new();
1826
1827    while let Some(commit) = stack.pop() {
1828        if !visited.insert(commit) {
1829            continue;
1830        }
1831        result.insert(&Entry::new(&commit.raw));
1832
1833        let meta: TribleSet = ws
1834            .local_blobs
1835            .reader()
1836            .unwrap()
1837            .get(commit)
1838            .or_else(|_| ws.base_blobs.get(commit))
1839            .map_err(WorkspaceCheckoutError::Storage)?;
1840
1841        for (p,) in find!((p: Value<_>,), pattern!(&meta, [{ parent: ?p }])) {
1842            stack.push(p);
1843        }
1844    }
1845
1846    Ok(result)
1847}