xet_data/deduplication/interface.rs
1use std::result::Result;
2
3use async_trait::async_trait;
4use xet_core_structures::merklehash::MerkleHash;
5use xet_core_structures::metadata_shard::file_structs::FileDataSequenceEntry;
6
7use super::RawXorbData;
8use crate::progress_tracking::upload_tracking::FileXorbDependency;
9
10/// The interface needed for the deduplication routines to run. To use the deduplication code,
11/// define a struct that implements these methods. This struct must be given by value to the FileDeduper
12/// struct on creation.
13///
14/// The two primary methods are chunk_hash_dedup_query, which determines whether and how a chunk can be deduped,
15/// and register_new_xorb, which is called intermittently when a new block of data is available for upload.
16///
17/// The global dedup query functions are optional but needed if global dedup is to be enabled.
18#[cfg_attr(not(target_family = "wasm"), async_trait)]
19#[cfg_attr(target_family = "wasm", async_trait(?Send))]
20pub trait DeduplicationDataInterface: Send + Sync + 'static {
21 /// The error type used for the interface
22 type ErrorType;
23
24 /// Query for possible shards that
25 async fn chunk_hash_dedup_query(
26 &self,
27 query_hashes: &[MerkleHash],
28 ) -> std::result::Result<Option<(usize, FileDataSequenceEntry, bool)>, Self::ErrorType>;
29
30 /// Registers a new query for more information about the
31 /// global deduplication. This is expected to run in the background. Simply return Ok(()) to
32 /// disable global dedup queries.
33 async fn register_global_dedup_query(&mut self, _chunk_hash: MerkleHash) -> Result<(), Self::ErrorType>;
34
35 /// Waits for all the current queries to complete, then returns true if there is
36 /// new deduplication information available.
37 async fn complete_global_dedup_queries(&mut self) -> Result<bool, Self::ErrorType>;
38
39 /// Registers a Xorb of new data that has no deduplication references.
40 async fn register_new_xorb(&mut self, xorb: RawXorbData) -> Result<(), Self::ErrorType>;
41
42 /// Register a set of xorb dependencies; this is called periodically during the dedup
43 /// process with a list of (xorb hash, n_bytes). As the final bit may get
44 /// returned as a partial xorb without a hash yet, it is not gauranteed that the
45 /// sum of the n_bytes across all the dependencies will equal the size of the file.
46 async fn register_xorb_dependencies(&mut self, dependencies: &[FileXorbDependency]);
47}