helia_unixfs/lib.rs
1//! # Helia UnixFS
2//!
3//! A Rust implementation of the IPFS UnixFS filesystem, providing file and directory
4//! operations with content-addressed storage.
5//!
6//! ## Overview
7//!
8//! UnixFS is a protobuf-based format for representing files and directories on IPFS.
9//! This crate provides a high-level interface for:
10//!
11//! - **File Operations**: Store and retrieve files with automatic chunking for large files
12//! - **Directory Operations**: Create, modify, and traverse directory structures
13//! - **Metadata Support**: Unix-style permissions (mode) and modification times (mtime)
14//! - **Content Addressing**: All operations return CIDs (Content Identifiers)
15//! - **Efficient Chunking**: Automatic chunking for files >1MB with configurable chunk size
16//!
17//! ## Core Concepts
18//!
19//! ### Content Addressing
20//! Every file and directory is identified by a CID, ensuring:
21//! - **Immutability**: Content cannot be changed without changing the CID
22//! - **Deduplication**: Identical content has the same CID
23//! - **Verification**: Content can be verified against its CID
24//!
25//! ### DAG-PB vs Raw Blocks
26//! - **Small files (<256KB)**: Can be stored as either DAG-PB or raw blocks
27//! - **Large files (>256KB)**: Automatically chunked and stored as DAG-PB with links
28//! - **Directories**: Always stored as DAG-PB with links to entries
29//!
30//! ### Chunking Strategy
31//! Large files are split into chunks for efficient storage and retrieval:
32//! - **Default chunk size**: 262,144 bytes (256KB)
33//! - **Configurable**: Set `chunk_size` in `AddOptions`
34//! - **Merkle DAG**: Chunks are organized in a balanced tree structure
35//!
36//! ## Usage Examples
37//!
38//! ### Basic File Operations
39//!
40//! ```no_run
41//! use std::sync::Arc;
42//! use rust_helia::create_helia_default;
43//! use helia_unixfs::{UnixFS, UnixFSInterface, AddOptions};
44//! use bytes::Bytes;
45//!
46//! #[tokio::main]
47//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
48//! // Create a Helia node
49//! let helia = create_helia_default().await?;
50//! let fs = UnixFS::new(Arc::new(helia));
51//!
52//! // Add a small file
53//! let data = Bytes::from("Hello, IPFS!");
54//! let cid = fs.add_bytes(data, None).await?;
55//! println!("File CID: {}", cid);
56//!
57//! // Read the file back
58//! let content = fs.cat(&cid, None).await?;
59//! println!("Content: {:?}", content);
60//!
61//! // Add with options
62//! let data2 = Bytes::from("Important data");
63//! let cid2 = fs.add_bytes(data2, Some(AddOptions {
64//! pin: true, // Pin for persistence
65//! raw_leaves: true, // Use raw blocks for leaves
66//! ..Default::default()
67//! })).await?;
68//!
69//! Ok(())
70//! }
71//! ```
72//!
73//! ### File with Metadata
74//!
75//! ```no_run
76//! # use helia_unixfs::{FileCandidate, UnixFSTime, AddOptions};
77//! # use bytes::Bytes;
78//! # async fn example(fs: impl helia_unixfs::UnixFSInterface) -> Result<(), Box<dyn std::error::Error>> {
79//! // Create file with Unix permissions and timestamp
80//! let file = FileCandidate {
81//! path: "document.txt".to_string(),
82//! content: Bytes::from("Important document"),
83//! mode: Some(0o644), // rw-r--r--
84//! mtime: Some(UnixFSTime::now()),
85//! };
86//!
87//! let cid = fs.add_file(file, None).await?;
88//! # Ok(())
89//! # }
90//! ```
91//!
92//! ### Directory Operations
93//!
94//! ```no_run
95//! # async fn example(fs: impl helia_unixfs::UnixFSInterface) -> Result<(), Box<dyn std::error::Error>> {
96//! # use bytes::Bytes;
97//! // Create an empty directory
98//! let dir_cid = fs.add_directory(None, None).await?;
99//!
100//! // Add a file to the directory
101//! let file_data = Bytes::from("README content");
102//! let file_cid = fs.add_bytes(file_data, None).await?;
103//! let updated_dir = fs.cp(&file_cid, &dir_cid, "README.md", None).await?;
104//!
105//! // Create a subdirectory
106//! use helia_unixfs::MkdirOptions;
107//! let dir_with_subdir = fs.mkdir(
108//! &updated_dir,
109//! "docs",
110//! Some(MkdirOptions {
111//! mode: Some(0o755), // rwxr-xr-x
112//! ..Default::default()
113//! })
114//! ).await?;
115//!
116//! // List directory contents
117//! let entries = fs.ls(&dir_with_subdir, None).await?;
118//! // Iterate through entries...
119//! # Ok(())
120//! # }
121//! ```
122//!
123//! ### Large File Handling
124//!
125//! ```no_run
126//! # async fn example(fs: impl helia_unixfs::UnixFSInterface) -> Result<(), Box<dyn std::error::Error>> {
127//! # use bytes::Bytes;
128//! # use helia_unixfs::AddOptions;
129//! // Large files are automatically chunked
130//! let large_data = Bytes::from(vec![0u8; 5_000_000]); // 5MB
131//!
132//! let cid = fs.add_bytes(large_data, Some(AddOptions {
133//! chunk_size: Some(524_288), // 512KB chunks
134//! ..Default::default()
135//! })).await?;
136//!
137//! // Read with offset and length for efficient partial reads
138//! use helia_unixfs::CatOptions;
139//! let partial = fs.cat(&cid, Some(CatOptions {
140//! offset: Some(1_000_000), // Start at 1MB
141//! length: Some(100_000), // Read 100KB
142//! })).await?;
143//! # Ok(())
144//! # }
145//! ```
146//!
147//! ### Working with Statistics
148//!
149//! ```no_run
150//! # use cid::Cid;
151//! # async fn example(fs: impl helia_unixfs::UnixFSInterface, cid: &Cid) -> Result<(), Box<dyn std::error::Error>> {
152//! use helia_unixfs::{UnixFSStat, FileStat, DirectoryStat};
153//!
154//! let stats = fs.stat(cid, None).await?;
155//!
156//! match stats {
157//! UnixFSStat::File(file_stats) => {
158//! println!("File size: {} bytes", file_stats.size);
159//! println!("Blocks: {}", file_stats.blocks);
160//! if let Some(mode) = file_stats.mode {
161//! println!("Mode: {:o}", mode);
162//! }
163//! }
164//! UnixFSStat::Directory(dir_stats) => {
165//! println!("Directory with {} entries", dir_stats.entries);
166//! println!("Total size: {} bytes", dir_stats.size);
167//! }
168//! }
169//! # Ok(())
170//! # }
171//! ```
172//!
173//! ## Performance Characteristics
174//!
175//! ### File Size Guidelines
176//! - **< 256KB**: Single block, fast add/retrieve
177//! - **256KB - 1MB**: Single block with DAG-PB wrapper
178//! - **> 1MB**: Automatically chunked into 256KB blocks
179//! - **Very large (>100MB)**: Efficient streaming with balanced Merkle tree
180//!
181//! ### Memory Usage
182//! - **Small files**: Loaded entirely into memory
183//! - **Large files**: Chunked streaming, constant memory usage
184//! - **Directories**: Efficient lazy evaluation of entries
185//!
186//! ### Operation Complexity
187//! - **add_bytes()**: O(n) where n = file size
188//! - **cat()**: O(n) where n = bytes read
189//! - **ls()**: O(m) where m = number of entries
190//! - **cp()**: O(m) where m = directory size
191//! - **stat()**: O(1) - constant time
192//!
193//! ## Thread Safety
194//!
195//! All UnixFS operations are thread-safe:
196//! - Uses `Arc<dyn Helia>` for shared access
197//! - All methods use `&self` (immutable borrow)
198//! - Safe to share `UnixFS` instance across threads
199//! - Concurrent operations are supported
200//!
201//! ```no_run
202//! # use std::sync::Arc;
203//! # use rust_helia::create_helia_default;
204//! # use helia_unixfs::UnixFS;
205//! # #[tokio::main]
206//! # async fn main() -> Result<(), Box<dyn std::error::Error>> {
207//! let helia = create_helia_default().await?;
208//! let fs = Arc::new(UnixFS::new(Arc::new(helia)));
209//!
210//! // Clone and use in multiple tasks
211//! let fs1 = Arc::clone(&fs);
212//! let fs2 = Arc::clone(&fs);
213//!
214//! tokio::spawn(async move {
215//! // Use fs1 in this task
216//! });
217//!
218//! tokio::spawn(async move {
219//! // Use fs2 in this task
220//! });
221//! # Ok(())
222//! # }
223//! ```
224//!
225//! ## Error Handling
226//!
227//! All operations return `Result<T, UnixFSError>`:
228//!
229//! ```no_run
230//! # use helia_unixfs::{UnixFS, UnixFSInterface, UnixFSError};
231//! # async fn example(fs: impl UnixFSInterface, cid: &cid::Cid) -> Result<(), Box<dyn std::error::Error>> {
232//! match fs.cat(cid, None).await {
233//! Ok(data) => println!("Read {} bytes", data.len()),
234//! Err(UnixFSError::NotAFile { cid }) => {
235//! println!("Not a file: {}", cid);
236//! }
237//! Err(UnixFSError::NotUnixFS { cid }) => {
238//! println!("Not a UnixFS node: {}", cid);
239//! }
240//! Err(e) => println!("Error: {}", e),
241//! }
242//! # Ok(())
243//! # }
244//! ```
245//!
246//! ## Limitations
247//!
248//! ### Current Limitations
249//! - **Symlinks**: Not yet implemented (returns error)
250//! - **HAMTs**: Large directories (>10,000 entries) not optimized
251//! - **Inline CIDs**: Very small files not inlined in parent blocks
252//! - **Trickle DAG**: Only uses balanced DAG structure
253//!
254//! ### Future Enhancements
255//! - Support for UnixFS v2 features
256//! - HAMT-sharded directories for very large directories
257//! - Trickle DAG option for better streaming
258//! - More compression options
259//!
260//! ## Compatibility
261//!
262//! This implementation is compatible with:
263//! - **go-ipfs/Kubo**: Full compatibility with standard IPFS nodes
264//! - **js-ipfs**: Compatible with JavaScript IPFS implementations
265//! - **@helia/unixfs**: Compatible with TypeScript Helia implementation
266//!
267//! ## Examples Directory
268//!
269//! See the `examples/` directory for more usage examples:
270//! - `01_simple_file.rs` - Basic file operations
271//! - `02_large_file.rs` - Chunked file handling
272//! - `03_directories.rs` - Directory operations
273//! - `04_metadata.rs` - Working with permissions and times
274
275pub mod chunker;
276pub mod dag_pb;
277pub mod errors;
278mod pb;
279pub mod unixfs;
280
281#[cfg(test)]
282mod tests;
283
284use std::sync::Arc;
285use std::time::{SystemTime, UNIX_EPOCH};
286
287use async_trait::async_trait;
288use bytes::Bytes;
289use cid::Cid;
290use serde::{Deserialize, Serialize};
291
292use helia_interface::{AwaitIterable, Helia};
293
294pub use chunker::*;
295pub use dag_pb::*;
296pub use errors::*;
297pub use pb::*;
298pub use unixfs::*;
299
300/// File statistics
301#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
302pub struct FileStat {
303 pub cid: Cid,
304 pub size: u64,
305 pub blocks: u64,
306 pub type_: UnixFSType,
307 pub mode: Option<u32>,
308 pub mtime: Option<UnixFSTime>,
309}
310
311/// Directory statistics
312#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
313pub struct DirectoryStat {
314 pub cid: Cid,
315 pub size: u64,
316 pub blocks: u64,
317 pub type_: UnixFSType,
318 pub mode: Option<u32>,
319 pub mtime: Option<UnixFSTime>,
320 pub entries: u64,
321}
322
323/// UnixFS entry types
324#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
325pub enum UnixFSType {
326 File,
327 Directory,
328 Symlink,
329 Raw,
330}
331
332/// UnixFS timestamp
333#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
334pub struct UnixFSTime {
335 pub seconds: u64,
336 pub nanoseconds: Option<u32>,
337}
338
339impl UnixFSTime {
340 pub fn now() -> Self {
341 let now = SystemTime::now()
342 .duration_since(UNIX_EPOCH)
343 .unwrap_or_default();
344 Self {
345 seconds: now.as_secs(),
346 nanoseconds: Some(now.subsec_nanos()),
347 }
348 }
349}
350
351/// UnixFS directory entry
352#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
353pub struct UnixFSEntry {
354 pub name: String,
355 pub cid: Cid,
356 pub size: u64,
357 pub type_: UnixFSType,
358 pub mode: Option<u32>,
359 pub mtime: Option<UnixFSTime>,
360}
361
362/// File candidate for adding to UnixFS
363#[derive(Debug, Clone)]
364pub struct FileCandidate {
365 pub path: String,
366 pub content: Bytes,
367 pub mode: Option<u32>,
368 pub mtime: Option<UnixFSTime>,
369}
370
371/// Directory candidate for adding to UnixFS
372#[derive(Debug, Clone)]
373pub struct DirectoryCandidate {
374 pub path: String,
375 pub mode: Option<u32>,
376 pub mtime: Option<UnixFSTime>,
377}
378
379/// Options for adding content
380#[derive(Debug, Clone, Default)]
381pub struct AddOptions {
382 pub pin: bool,
383 pub chunk_size: Option<usize>,
384 pub raw_leaves: bool,
385 pub wrap_with_directory: bool,
386}
387
388/// Options for reading content
389#[derive(Debug, Clone, Default)]
390pub struct CatOptions {
391 pub offset: Option<u64>,
392 pub length: Option<u64>,
393}
394
395/// Options for listing directory contents
396#[derive(Debug, Clone, Default)]
397pub struct LsOptions {
398 pub recursive: bool,
399}
400
401/// Options for copying content
402#[derive(Debug, Clone, Default)]
403pub struct CpOptions {
404 pub create_path: bool,
405}
406
407/// Options for making directories
408#[derive(Debug, Clone, Default)]
409pub struct MkdirOptions {
410 pub parents: bool,
411 pub mode: Option<u32>,
412 pub mtime: Option<UnixFSTime>,
413}
414
415/// Options for removing content
416#[derive(Debug, Clone, Default)]
417pub struct RmOptions {
418 pub recursive: bool,
419}
420
421/// Options for file/directory statistics
422#[derive(Debug, Clone, Default)]
423pub struct StatOptions {
424 pub with_local: bool,
425}
426
427/// Main UnixFS interface trait
428#[async_trait]
429pub trait UnixFSInterface: Send + Sync {
430 /// Add bytes as a file
431 async fn add_bytes(
432 &self,
433 bytes: Bytes,
434 options: Option<AddOptions>,
435 ) -> Result<Cid, UnixFSError>;
436
437 /// Add a file candidate
438 async fn add_file(
439 &self,
440 file: FileCandidate,
441 options: Option<AddOptions>,
442 ) -> Result<Cid, UnixFSError>;
443
444 /// Add a directory
445 async fn add_directory(
446 &self,
447 dir: Option<DirectoryCandidate>,
448 options: Option<AddOptions>,
449 ) -> Result<Cid, UnixFSError>;
450
451 /// Read file content
452 async fn cat(&self, cid: &Cid, options: Option<CatOptions>) -> Result<Bytes, UnixFSError>;
453
454 /// Copy content to a directory
455 async fn cp(
456 &self,
457 source: &Cid,
458 target: &Cid,
459 name: &str,
460 options: Option<CpOptions>,
461 ) -> Result<Cid, UnixFSError>;
462
463 /// List directory contents
464 async fn ls(
465 &self,
466 cid: &Cid,
467 options: Option<LsOptions>,
468 ) -> Result<AwaitIterable<UnixFSEntry>, UnixFSError>;
469
470 /// Create a directory in an existing directory
471 async fn mkdir(
472 &self,
473 cid: &Cid,
474 dirname: &str,
475 options: Option<MkdirOptions>,
476 ) -> Result<Cid, UnixFSError>;
477
478 /// Remove content from a directory
479 async fn rm(
480 &self,
481 cid: &Cid,
482 path: &str,
483 options: Option<RmOptions>,
484 ) -> Result<Cid, UnixFSError>;
485
486 /// Get file or directory statistics
487 async fn stat(
488 &self,
489 cid: &Cid,
490 options: Option<StatOptions>,
491 ) -> Result<UnixFSStat, UnixFSError>;
492}
493
494/// Union type for file and directory statistics
495#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
496pub enum UnixFSStat {
497 File(FileStat),
498 Directory(DirectoryStat),
499}
500
501/// Create a UnixFS instance from a Helia node
502pub fn create_unixfs(helia: Arc<dyn Helia>) -> impl UnixFSInterface {
503 UnixFS::new(helia)
504}