ipfrs_core/
lib.rs

1//! # IPFRS Core
2//!
3//! Core types and traits for the IPFRS (InterPlanetary File Replication System).
4//!
5//! This crate provides fundamental building blocks for content-addressed storage:
6//!
7//! - **[`Block`]** - Content-addressed data blocks with CID verification
8//! - **[`Cid`]** - Content Identifiers for unique data addressing
9//! - **[`Ipld`]** - InterPlanetary Linked Data for structured content
10//! - **Chunking** - Split large files into Merkle DAG structures
11//! - **Streaming** - Async readers for DAG traversal
12//!
13//! ## Quick Start
14//!
15//! ```rust
16//! use ipfrs_core::{Block, CidBuilder};
17//! use bytes::Bytes;
18//!
19//! // Create a block from data
20//! let block = Block::new(Bytes::from_static(b"Hello, IPFS!")).unwrap();
21//! println!("CID: {}", block.cid());
22//!
23//! // Generate a CID directly
24//! let cid = CidBuilder::new().build(b"some data").unwrap();
25//! println!("Generated CID: {}", cid);
26//! ```
27//!
28//! ## Chunking Large Files
29//!
30//! ```rust
31//! use ipfrs_core::{Chunker, ChunkingConfig};
32//!
33//! let data = vec![0u8; 1_000_000]; // 1MB of data
34//! let chunker = Chunker::new();
35//! let chunked = chunker.chunk(&data).unwrap();
36//!
37//! println!("Root CID: {}", chunked.root_cid);
38//! println!("Chunks: {}", chunked.chunk_count);
39//! ```
40//!
41//! ## IPLD Encoding
42//!
43//! ```rust
44//! use ipfrs_core::Ipld;
45//! use std::collections::BTreeMap;
46//!
47//! // Create structured data
48//! let mut map = BTreeMap::new();
49//! map.insert("name".to_string(), Ipld::String("example".to_string()));
50//! map.insert("version".to_string(), Ipld::Integer(1));
51//! let ipld = Ipld::Map(map);
52//!
53//! // Encode to DAG-CBOR
54//! let cbor = ipld.to_dag_cbor().unwrap();
55//!
56//! // Decode back
57//! let decoded = Ipld::from_dag_cbor(&cbor).unwrap();
58//! ```
59//!
60//! ## Features
61//!
62//! - **SHA2-256, SHA2-512, SHA3-256, SHA3-512, BLAKE2b, BLAKE2s, and BLAKE3** hash algorithms with SIMD acceleration
63//! - **CIDv0 and CIDv1** support with conversion
64//! - **Multibase encoding** (Base32, Base58btc, Base64)
65//! - **DAG-CBOR, DAG-JSON, and DAG-JOSE** codecs
66//! - **Pluggable codec registry** for custom encoding/decoding
67//! - **DAG traversal and analysis** utilities for Merkle DAGs
68//! - **CAR (Content Addressable aRchive)** format support for data portability
69//! - **Compression support** with Zstd and LZ4 algorithms for storage efficiency
70//! - **Streaming compression** for efficient compression/decompression of large files
71//! - **Async streaming** for large files
72//! - **LRU block cache** for fast repeated access to frequently used blocks
73//! - **Apache Arrow integration** for zero-copy tensor access
74//! - **Parallel batch processing** with Rayon for high performance
75//! - **Parallel chunking** for multi-core large file processing
76//! - **Content-defined chunking** with deduplication
77//! - **Production metrics** and observability with percentile tracking
78
79pub mod arrow;
80pub mod batch;
81pub mod block;
82pub mod block_cache;
83pub mod car;
84pub mod chunking;
85pub mod cid;
86pub mod codec_registry;
87pub mod compression;
88pub mod config;
89pub mod dag;
90pub mod error;
91pub mod hash;
92pub mod integration;
93pub mod ipld;
94pub mod jose;
95pub mod metrics;
96pub mod parallel_chunking;
97pub mod pool;
98pub mod safetensors;
99pub mod streaming;
100pub mod streaming_compression;
101pub mod tensor;
102pub mod types;
103pub mod utils;
104
105pub use self::arrow::{
106    arrow_dtype_to_tensor, arrow_to_tensor_block, tensor_dtype_to_arrow, TensorBlockArrowExt,
107};
108pub use self::batch::{BatchProcessor, BatchStats};
109pub use self::block::{Block, BlockBuilder, BlockMetadata, MAX_BLOCK_SIZE, MIN_BLOCK_SIZE};
110pub use self::block_cache::{BlockCache, CacheStats};
111pub use self::car::{CarCompressionStats, CarHeader, CarReader, CarWriter, CarWriterBuilder};
112pub use self::chunking::{
113    ChunkedFile, Chunker, ChunkingConfig, ChunkingConfigBuilder, ChunkingStrategy, DagBuilder,
114    DagLink, DagNode, DeduplicationStats,
115};
116pub use self::cid::{
117    codec, parse_cid, parse_cid_with_base, Cid, CidBuilder, CidExt, HashAlgorithm,
118    MultibaseEncoding,
119};
120pub use self::codec_registry::{
121    global_codec_registry, Codec, CodecRegistry, DagCborCodec, DagJsonCodec, RawCodec,
122};
123pub use self::compression::{compress, compression_ratio, decompress, CompressionAlgorithm};
124pub use self::config::{global_config, set_global_config, Config, ConfigBuilder};
125pub use self::dag::{
126    collect_all_links, collect_unique_links, count_links_by_depth, dag_fanout_by_level,
127    extract_links, filter_dag, find_paths_to_cid, is_dag, map_dag, subgraph_size, topological_sort,
128    traverse_bfs, traverse_dfs, DagMetrics, DagStats,
129};
130pub use self::error::{Error, Result};
131pub use self::hash::{
132    global_hash_registry, Blake2b256Engine, Blake2b512Engine, Blake2s256Engine, Blake3Engine,
133    CpuFeatures, HashEngine, HashRegistry, Sha256Engine, Sha3_256Engine, Sha3_512Engine,
134    Sha512Engine,
135};
136pub use self::integration::{
137    DeduplicationStats as TensorDeduplicationStats, TensorBatchProcessor, TensorDeduplicator,
138    TensorStore,
139};
140pub use self::ipld::Ipld;
141pub use self::jose::{JoseBuilder, JoseSignature};
142pub use self::metrics::{global_metrics, Metrics, MetricsSnapshot, PercentileStats, Timer};
143pub use self::parallel_chunking::{
144    ParallelChunker, ParallelChunkingConfig, ParallelChunkingResult, ParallelDeduplicator,
145};
146pub use self::pool::{
147    freeze_bytes, global_bytes_pool, global_cid_string_pool, BytesPool, CidStringPool, PoolStats,
148};
149pub use self::safetensors::{SafetensorInfo, SafetensorsFile};
150pub use self::streaming::{
151    read_chunked_file, AsyncBlockReader, BlockFetcher, BlockReader, DagChunkStream,
152    MemoryBlockFetcher,
153};
154pub use self::streaming_compression::{CompressingStream, DecompressingStream, StreamingStats};
155pub use self::tensor::{TensorBlock, TensorDtype, TensorMetadata, TensorShape};
156pub use self::types::{BlockSize, PeerId, Priority};