xet-data 1.5.2

Data processing pipeline for chunking, deduplication, and file reconstruction; used in the Hugging Face Xet client tools. Intended to be used through the API in the hf-xet package.
Documentation
use std::future::Future;
use std::pin::Pin;

use bytes::Bytes;
use xet_client::cas_types::FileRange;
use xet_runtime::utils::adjustable_semaphore::AdjustableSemaphorePermit;

use super::super::Result;

/// A future that produces the data bytes to be written.
pub type DataFuture = Pin<Box<dyn Future<Output = Result<Bytes>> + Send + 'static>>;

#[async_trait::async_trait]
pub trait DataWriter: Send + 'static {
    /// Sets the data source for the next sequential term.
    ///
    /// The byte range must be sequential - its start must match the end of the
    /// previous range (or 0 for the first call). The data future will be spawned
    /// as a task and its result will be written when ready.
    ///
    /// SequentialWriter will ensure that the actual writes happen in order.
    ///
    /// An optional semaphore permit can be passed for rate limiting. The permit
    /// will be released by the background writer after the data has been written.
    async fn set_next_term_data_source(
        &mut self,
        byte_range: FileRange,
        permit: Option<AdjustableSemaphorePermit>,
        data_future: DataFuture,
    ) -> Result<()>;

    /// Consumes the writer, waits until all data has been written, and returns the
    /// number of bytes written. Dropping the writer without calling `finish` cancels
    /// the reconstruction via the shared run state.
    async fn finish(mut self: Box<Self>) -> Result<u64>;
}