Skip to main content

xet_data/deduplication/
interface.rs

1use std::result::Result;
2
3use async_trait::async_trait;
4use xet_core_structures::merklehash::MerkleHash;
5use xet_core_structures::metadata_shard::file_structs::FileDataSequenceEntry;
6
7use super::RawXorbData;
8use crate::progress_tracking::upload_tracking::FileXorbDependency;
9
10/// The interface needed for the deduplication routines to run.  To use the deduplication code,
11/// define a struct that implements these methods.  This struct must be given by value to the FileDeduper
12/// struct on creation.
13///
14/// The two primary methods are chunk_hash_dedup_query, which determines whether and how a chunk can be deduped,  
15/// and register_new_xorb, which is called intermittently when a new block of data is available for upload.
16///
17/// The global dedup query functions are optional but needed if global dedup is to be enabled.
18#[cfg_attr(not(target_family = "wasm"), async_trait)]
19#[cfg_attr(target_family = "wasm", async_trait(?Send))]
20pub trait DeduplicationDataInterface: Send + Sync + 'static {
21    /// The error type used for the interface
22    type ErrorType;
23
24    /// Query for possible shards that
25    async fn chunk_hash_dedup_query(
26        &self,
27        query_hashes: &[MerkleHash],
28    ) -> std::result::Result<Option<(usize, FileDataSequenceEntry, bool)>, Self::ErrorType>;
29
30    /// Registers a new query for more information about the
31    /// global deduplication.  This is expected to run in the background.  Simply return Ok(()) to
32    /// disable global dedup queries.
33    async fn register_global_dedup_query(&mut self, _chunk_hash: MerkleHash) -> Result<(), Self::ErrorType>;
34
35    /// Waits for all the current queries to complete, then returns true if there is
36    /// new deduplication information available.
37    async fn complete_global_dedup_queries(&mut self) -> Result<bool, Self::ErrorType>;
38
39    /// Registers a Xorb of new data that has no deduplication references.
40    async fn register_new_xorb(&mut self, xorb: RawXorbData) -> Result<(), Self::ErrorType>;
41
42    /// Register a set of xorb dependencies; this is called periodically during the dedup
43    /// process with a list of (xorb hash, n_bytes).  As the final bit may get
44    /// returned as a partial xorb without a hash yet, it is not gauranteed that the
45    /// sum of the n_bytes across all the dependencies will equal the size of the file.
46    async fn register_xorb_dependencies(&mut self, dependencies: &[FileXorbDependency]);
47}