xet-client 1.5.2

Client library for communicating with Hugging Face Xet storage servers. Use through the hf-xet crate.
Documentation
//! Direct Access Client Trait
//!
//! This module defines the `DirectAccessClient` trait, which extends the standard
//! `Client` interface with direct XORB and file access methods. This is used by
//! the local server and testing utilities to access stored data directly.

use std::ops::Range;
use std::time::Duration;

use async_trait::async_trait;
use bytes::Bytes;
use xet_core_structures::merklehash::MerkleHash;
use xet_core_structures::xorb_object::XorbObject;

use super::super::interface::Client;
use crate::cas_types::{
    FileRange, QueryReconstructionResponse, QueryReconstructionResponseV2, XorbReconstructionFetchInfo,
};
use crate::error::Result;

/// A Client with direct access to XORB and file storage.
///
/// This trait extends the standard Client interface with methods for:
/// - Direct XORB access (read, list, delete)
/// - File data retrieval
/// - URL expiration control
/// - API delay simulation
///
/// Both `LocalClient` and `MemoryClient` implement this trait, allowing the
/// local server to work with either backend.
#[cfg_attr(not(target_family = "wasm"), async_trait)]
#[cfg_attr(target_family = "wasm", async_trait(?Send))]
pub trait DirectAccessClient: Client + Send + Sync {
    /// Sets the expiration duration for fetch term URLs.
    fn set_fetch_term_url_expiration(&self, expiration: Duration);

    /// Sets a random delay range for all Client API calls.
    ///
    /// When set, each Client trait method will sleep for a random duration
    /// within the specified range before returning. This simulates network latency.
    ///
    /// Pass `None` to disable the delay.
    fn set_api_delay_range(&self, delay_range: Option<Range<Duration>>);

    /// Sets the maximum number of byte ranges per `XorbMultiRangeFetch` entry
    /// in V2 reconstruction responses.
    ///
    /// Default is `usize::MAX` (all ranges in one fetch). When set to N,
    /// ranges for each xorb are grouped into entries of at most N ranges.
    /// This simulates the CloudFront URL length limit that forces splitting.
    fn set_max_ranges_per_fetch(&self, max_ranges: usize);

    /// Sets the expiration duration for global dedup shards.
    ///
    /// When set, `query_for_global_dedup_shard` will set the shard footer's
    /// `shard_key_expiry` to `now + expiration`
    ///
    /// Pass `None` to disable (default: returns full shards with no expiration).
    fn set_global_dedup_shard_expiration(&self, expiration: Option<Duration>);

    /// Disables V2 reconstruction responses with the given HTTP status code.
    /// When disabled, the V2 endpoint returns this status, forcing clients to
    /// fall back to V1. Pass 0 to re-enable.
    fn disable_v2_reconstruction(&self, status_code: u16);

    /// Returns the HTTP status code the V2 endpoint should return when disabled,
    /// or 0 if V2 is enabled.
    fn v2_disabled_status_code(&self) -> u16 {
        0
    }

    /// V1 reconstruction: returns per-range presigned URLs.
    async fn get_reconstruction_v1(
        &self,
        file_id: &MerkleHash,
        bytes_range: Option<FileRange>,
    ) -> Result<Option<QueryReconstructionResponse>>;

    /// V2 reconstruction: returns per-xorb multi-range fetch descriptors.
    async fn get_reconstruction_v2(
        &self,
        file_id: &MerkleHash,
        bytes_range: Option<FileRange>,
    ) -> Result<Option<QueryReconstructionResponseV2>>;

    /// Applies the configured API delay if set.
    ///
    /// This method sleeps for a random duration within the configured delay range.
    /// If no delay is configured (via `set_api_delay_range`), this returns immediately.
    async fn apply_api_delay(&self);

    /// Returns all XORB hashes stored in this client.
    async fn list_xorbs(&self) -> Result<Vec<MerkleHash>>;

    /// Deletes a XORB by hash.
    async fn delete_xorb(&self, hash: &MerkleHash);

    /// Get all uncompressed bytes from a XORB.
    async fn get_full_xorb(&self, hash: &MerkleHash) -> Result<Bytes>;

    /// Get uncompressed bytes from a XORB within chunk ranges.
    /// Each tuple represents a chunk index range [start, end).
    async fn get_xorb_ranges(&self, hash: &MerkleHash, chunk_ranges: Vec<(u32, u32)>) -> Result<Vec<Bytes>>;

    /// Get the length of the uncompressed XORB data.
    async fn xorb_length(&self, hash: &MerkleHash) -> Result<u32>;

    /// Check if a XORB exists.
    async fn xorb_exists(&self, hash: &MerkleHash) -> Result<bool>;

    /// Get the XorbObject footer/metadata for a XORB.
    async fn xorb_footer(&self, hash: &MerkleHash) -> Result<XorbObject>;

    /// Get the file size for a given file hash.
    async fn get_file_size(&self, hash: &MerkleHash) -> Result<u64>;

    /// Get file data, optionally within a byte range.
    async fn get_file_data(&self, hash: &MerkleHash, byte_range: Option<FileRange>) -> Result<Bytes>;

    /// Get raw (serialized) bytes from a XORB, optionally within a byte range.
    ///
    /// Unlike `get_xorb_ranges` which returns decompressed chunk data, this returns
    /// the raw bytes as stored (including compression headers). This is used by the
    /// server's fetch_term endpoint to serve data that clients can then decompress.
    async fn get_xorb_raw_bytes(&self, hash: &MerkleHash, byte_range: Option<FileRange>) -> Result<Bytes>;

    /// Get the total length of the raw (serialized) XORB data.
    async fn xorb_raw_length(&self, hash: &MerkleHash) -> Result<u64>;

    /// Fetches term data for a given hash and fetch term.
    /// Returns (data bytes, chunk byte indices) matching `Client::get_file_term_data`.
    async fn fetch_term_data(
        &self,
        hash: MerkleHash,
        fetch_term: XorbReconstructionFetchInfo,
    ) -> Result<(Bytes, Vec<u32>)>;
}