xet-data 1.5.2

Data processing pipeline for chunking, deduplication, and file reconstruction; used in the Hugging Face Xet client tools. Intended to be used through the API in the hf-xet package.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
mod chunking;
mod data_aggregator;
mod dedup_metrics;
mod defrag_prevention;
mod file_deduplication;
mod interface;

pub use chunking::{Chunker, find_partitions};
pub use data_aggregator::DataAggregator;
pub use dedup_metrics::DeduplicationMetrics;
pub use file_deduplication::FileDeduper;
pub use interface::DeduplicationDataInterface;
// Re-export types that moved to xorb_object for backward compatibility.
pub use xet_core_structures::xorb_object::Chunk;
pub use xet_core_structures::xorb_object::{RawXorbData, constants, test_utils};