helia_unixfs/
lib.rs

1//! # Helia UnixFS
2//!
3//! A Rust implementation of the IPFS UnixFS filesystem, providing file and directory
4//! operations with content-addressed storage.
5//!
6//! ## Overview
7//!
8//! UnixFS is a protobuf-based format for representing files and directories on IPFS.
9//! This crate provides a high-level interface for:
10//!
11//! - **File Operations**: Store and retrieve files with automatic chunking for large files
12//! - **Directory Operations**: Create, modify, and traverse directory structures
13//! - **Metadata Support**: Unix-style permissions (mode) and modification times (mtime)
14//! - **Content Addressing**: All operations return CIDs (Content Identifiers)
15//! - **Efficient Chunking**: Automatic chunking for files >1MB with configurable chunk size
16//!
17//! ## Core Concepts
18//!
19//! ### Content Addressing
20//! Every file and directory is identified by a CID, ensuring:
21//! - **Immutability**: Content cannot be changed without changing the CID
22//! - **Deduplication**: Identical content has the same CID
23//! - **Verification**: Content can be verified against its CID
24//!
25//! ### DAG-PB vs Raw Blocks
26//! - **Small files (<256KB)**: Can be stored as either DAG-PB or raw blocks
27//! - **Large files (>256KB)**: Automatically chunked and stored as DAG-PB with links
28//! - **Directories**: Always stored as DAG-PB with links to entries
29//!
30//! ### Chunking Strategy
31//! Large files are split into chunks for efficient storage and retrieval:
32//! - **Default chunk size**: 262,144 bytes (256KB)
33//! - **Configurable**: Set `chunk_size` in `AddOptions`
34//! - **Merkle DAG**: Chunks are organized in a balanced tree structure
35//!
36//! ## Usage Examples
37//!
38//! ### Basic File Operations
39//!
40//! ```no_run
41//! use std::sync::Arc;
42//! use rust_helia::create_helia_default;
43//! use helia_unixfs::{UnixFS, UnixFSInterface, AddOptions};
44//! use bytes::Bytes;
45//!
46//! #[tokio::main]
47//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
48//!     // Create a Helia node
49//!     let helia = create_helia_default().await?;
50//!     let fs = UnixFS::new(Arc::new(helia));
51//!     
52//!     // Add a small file
53//!     let data = Bytes::from("Hello, IPFS!");
54//!     let cid = fs.add_bytes(data, None).await?;
55//!     println!("File CID: {}", cid);
56//!     
57//!     // Read the file back
58//!     let content = fs.cat(&cid, None).await?;
59//!     println!("Content: {:?}", content);
60//!     
61//!     // Add with options
62//!     let data2 = Bytes::from("Important data");
63//!     let cid2 = fs.add_bytes(data2, Some(AddOptions {
64//!         pin: true,  // Pin for persistence
65//!         raw_leaves: true,  // Use raw blocks for leaves
66//!         ..Default::default()
67//!     })).await?;
68//!     
69//!     Ok(())
70//! }
71//! ```
72//!
73//! ### File with Metadata
74//!
75//! ```no_run
76//! # use helia_unixfs::{FileCandidate, UnixFSTime, AddOptions};
77//! # use bytes::Bytes;
78//! # async fn example(fs: impl helia_unixfs::UnixFSInterface) -> Result<(), Box<dyn std::error::Error>> {
79//! // Create file with Unix permissions and timestamp
80//! let file = FileCandidate {
81//!     path: "document.txt".to_string(),
82//!     content: Bytes::from("Important document"),
83//!     mode: Some(0o644),  // rw-r--r--
84//!     mtime: Some(UnixFSTime::now()),
85//! };
86//!
87//! let cid = fs.add_file(file, None).await?;
88//! # Ok(())
89//! # }
90//! ```
91//!
92//! ### Directory Operations
93//!
94//! ```no_run
95//! # async fn example(fs: impl helia_unixfs::UnixFSInterface) -> Result<(), Box<dyn std::error::Error>> {
96//! # use bytes::Bytes;
97//! // Create an empty directory
98//! let dir_cid = fs.add_directory(None, None).await?;
99//!
100//! // Add a file to the directory
101//! let file_data = Bytes::from("README content");
102//! let file_cid = fs.add_bytes(file_data, None).await?;
103//! let updated_dir = fs.cp(&file_cid, &dir_cid, "README.md", None).await?;
104//!
105//! // Create a subdirectory
106//! use helia_unixfs::MkdirOptions;
107//! let dir_with_subdir = fs.mkdir(
108//!     &updated_dir,
109//!     "docs",
110//!     Some(MkdirOptions {
111//!         mode: Some(0o755),  // rwxr-xr-x
112//!         ..Default::default()
113//!     })
114//! ).await?;
115//!
116//! // List directory contents
117//! let entries = fs.ls(&dir_with_subdir, None).await?;
118//! // Iterate through entries...
119//! # Ok(())
120//! # }
121//! ```
122//!
123//! ### Large File Handling
124//!
125//! ```no_run
126//! # async fn example(fs: impl helia_unixfs::UnixFSInterface) -> Result<(), Box<dyn std::error::Error>> {
127//! # use bytes::Bytes;
128//! # use helia_unixfs::AddOptions;
129//! // Large files are automatically chunked
130//! let large_data = Bytes::from(vec![0u8; 5_000_000]); // 5MB
131//!
132//! let cid = fs.add_bytes(large_data, Some(AddOptions {
133//!     chunk_size: Some(524_288), // 512KB chunks
134//!     ..Default::default()
135//! })).await?;
136//!
137//! // Read with offset and length for efficient partial reads
138//! use helia_unixfs::CatOptions;
139//! let partial = fs.cat(&cid, Some(CatOptions {
140//!     offset: Some(1_000_000),  // Start at 1MB
141//!     length: Some(100_000),     // Read 100KB
142//! })).await?;
143//! # Ok(())
144//! # }
145//! ```
146//!
147//! ### Working with Statistics
148//!
149//! ```no_run
150//! # use cid::Cid;
151//! # async fn example(fs: impl helia_unixfs::UnixFSInterface, cid: &Cid) -> Result<(), Box<dyn std::error::Error>> {
152//! use helia_unixfs::{UnixFSStat, FileStat, DirectoryStat};
153//!
154//! let stats = fs.stat(cid, None).await?;
155//!
156//! match stats {
157//!     UnixFSStat::File(file_stats) => {
158//!         println!("File size: {} bytes", file_stats.size);
159//!         println!("Blocks: {}", file_stats.blocks);
160//!         if let Some(mode) = file_stats.mode {
161//!             println!("Mode: {:o}", mode);
162//!         }
163//!     }
164//!     UnixFSStat::Directory(dir_stats) => {
165//!         println!("Directory with {} entries", dir_stats.entries);
166//!         println!("Total size: {} bytes", dir_stats.size);
167//!     }
168//! }
169//! # Ok(())
170//! # }
171//! ```
172//!
173//! ## Performance Characteristics
174//!
175//! ### File Size Guidelines
176//! - **< 256KB**: Single block, fast add/retrieve
177//! - **256KB - 1MB**: Single block with DAG-PB wrapper
178//! - **> 1MB**: Automatically chunked into 256KB blocks
179//! - **Very large (>100MB)**: Efficient streaming with balanced Merkle tree
180//!
181//! ### Memory Usage
182//! - **Small files**: Loaded entirely into memory
183//! - **Large files**: Chunked streaming, constant memory usage
184//! - **Directories**: Efficient lazy evaluation of entries
185//!
186//! ### Operation Complexity
187//! - **add_bytes()**: O(n) where n = file size
188//! - **cat()**: O(n) where n = bytes read
189//! - **ls()**: O(m) where m = number of entries
190//! - **cp()**: O(m) where m = directory size
191//! - **stat()**: O(1) - constant time
192//!
193//! ## Thread Safety
194//!
195//! All UnixFS operations are thread-safe:
196//! - Uses `Arc<dyn Helia>` for shared access
197//! - All methods use `&self` (immutable borrow)
198//! - Safe to share `UnixFS` instance across threads
199//! - Concurrent operations are supported
200//!
201//! ```no_run
202//! # use std::sync::Arc;
203//! # use rust_helia::create_helia_default;
204//! # use helia_unixfs::UnixFS;
205//! # #[tokio::main]
206//! # async fn main() -> Result<(), Box<dyn std::error::Error>> {
207//! let helia = create_helia_default().await?;
208//! let fs = Arc::new(UnixFS::new(Arc::new(helia)));
209//!
210//! // Clone and use in multiple tasks
211//! let fs1 = Arc::clone(&fs);
212//! let fs2 = Arc::clone(&fs);
213//!
214//! tokio::spawn(async move {
215//!     // Use fs1 in this task
216//! });
217//!
218//! tokio::spawn(async move {
219//!     // Use fs2 in this task
220//! });
221//! # Ok(())
222//! # }
223//! ```
224//!
225//! ## Error Handling
226//!
227//! All operations return `Result<T, UnixFSError>`:
228//!
229//! ```no_run
230//! # use helia_unixfs::{UnixFS, UnixFSInterface, UnixFSError};
231//! # async fn example(fs: impl UnixFSInterface, cid: &cid::Cid) -> Result<(), Box<dyn std::error::Error>> {
232//! match fs.cat(cid, None).await {
233//!     Ok(data) => println!("Read {} bytes", data.len()),
234//!     Err(UnixFSError::NotAFile { cid }) => {
235//!         println!("Not a file: {}", cid);
236//!     }
237//!     Err(UnixFSError::NotUnixFS { cid }) => {
238//!         println!("Not a UnixFS node: {}", cid);
239//!     }
240//!     Err(e) => println!("Error: {}", e),
241//! }
242//! # Ok(())
243//! # }
244//! ```
245//!
246//! ## Limitations
247//!
248//! ### Current Limitations
249//! - **Symlinks**: Not yet implemented (returns error)
250//! - **HAMTs**: Large directories (>10,000 entries) not optimized
251//! - **Inline CIDs**: Very small files not inlined in parent blocks
252//! - **Trickle DAG**: Only uses balanced DAG structure
253//!
254//! ### Future Enhancements
255//! - Support for UnixFS v2 features
256//! - HAMT-sharded directories for very large directories
257//! - Trickle DAG option for better streaming
258//! - More compression options
259//!
260//! ## Compatibility
261//!
262//! This implementation is compatible with:
263//! - **go-ipfs/Kubo**: Full compatibility with standard IPFS nodes
264//! - **js-ipfs**: Compatible with JavaScript IPFS implementations
265//! - **@helia/unixfs**: Compatible with TypeScript Helia implementation
266//!
267//! ## Examples Directory
268//!
269//! See the `examples/` directory for more usage examples:
270//! - `01_simple_file.rs` - Basic file operations
271//! - `02_large_file.rs` - Chunked file handling
272//! - `03_directories.rs` - Directory operations
273//! - `04_metadata.rs` - Working with permissions and times
274
275pub mod chunker;
276pub mod dag_pb;
277pub mod errors;
278mod pb;
279pub mod unixfs;
280
281#[cfg(test)]
282mod tests;
283
284use std::sync::Arc;
285use std::time::{SystemTime, UNIX_EPOCH};
286
287use async_trait::async_trait;
288use bytes::Bytes;
289use cid::Cid;
290use serde::{Deserialize, Serialize};
291
292use helia_interface::{AwaitIterable, Helia};
293
294pub use chunker::*;
295pub use dag_pb::*;
296pub use errors::*;
297pub use pb::*;
298pub use unixfs::*;
299
300/// File statistics
301#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
302pub struct FileStat {
303    pub cid: Cid,
304    pub size: u64,
305    pub blocks: u64,
306    pub type_: UnixFSType,
307    pub mode: Option<u32>,
308    pub mtime: Option<UnixFSTime>,
309}
310
311/// Directory statistics  
312#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
313pub struct DirectoryStat {
314    pub cid: Cid,
315    pub size: u64,
316    pub blocks: u64,
317    pub type_: UnixFSType,
318    pub mode: Option<u32>,
319    pub mtime: Option<UnixFSTime>,
320    pub entries: u64,
321}
322
323/// UnixFS entry types
324#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
325pub enum UnixFSType {
326    File,
327    Directory,
328    Symlink,
329    Raw,
330}
331
332/// UnixFS timestamp
333#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
334pub struct UnixFSTime {
335    pub seconds: u64,
336    pub nanoseconds: Option<u32>,
337}
338
339impl UnixFSTime {
340    pub fn now() -> Self {
341        let now = SystemTime::now()
342            .duration_since(UNIX_EPOCH)
343            .unwrap_or_default();
344        Self {
345            seconds: now.as_secs(),
346            nanoseconds: Some(now.subsec_nanos()),
347        }
348    }
349}
350
351/// UnixFS directory entry
352#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
353pub struct UnixFSEntry {
354    pub name: String,
355    pub cid: Cid,
356    pub size: u64,
357    pub type_: UnixFSType,
358    pub mode: Option<u32>,
359    pub mtime: Option<UnixFSTime>,
360}
361
362/// File candidate for adding to UnixFS
363#[derive(Debug, Clone)]
364pub struct FileCandidate {
365    pub path: String,
366    pub content: Bytes,
367    pub mode: Option<u32>,
368    pub mtime: Option<UnixFSTime>,
369}
370
371/// Directory candidate for adding to UnixFS
372#[derive(Debug, Clone)]
373pub struct DirectoryCandidate {
374    pub path: String,
375    pub mode: Option<u32>,
376    pub mtime: Option<UnixFSTime>,
377}
378
379/// Options for adding content
380#[derive(Debug, Clone, Default)]
381pub struct AddOptions {
382    pub pin: bool,
383    pub chunk_size: Option<usize>,
384    pub raw_leaves: bool,
385    pub wrap_with_directory: bool,
386}
387
388/// Options for reading content
389#[derive(Debug, Clone, Default)]
390pub struct CatOptions {
391    pub offset: Option<u64>,
392    pub length: Option<u64>,
393}
394
395/// Options for listing directory contents
396#[derive(Debug, Clone, Default)]
397pub struct LsOptions {
398    pub recursive: bool,
399}
400
401/// Options for copying content
402#[derive(Debug, Clone, Default)]
403pub struct CpOptions {
404    pub create_path: bool,
405}
406
407/// Options for making directories
408#[derive(Debug, Clone, Default)]
409pub struct MkdirOptions {
410    pub parents: bool,
411    pub mode: Option<u32>,
412    pub mtime: Option<UnixFSTime>,
413}
414
415/// Options for removing content
416#[derive(Debug, Clone, Default)]
417pub struct RmOptions {
418    pub recursive: bool,
419}
420
421/// Options for file/directory statistics
422#[derive(Debug, Clone, Default)]
423pub struct StatOptions {
424    pub with_local: bool,
425}
426
427/// Main UnixFS interface trait
428#[async_trait]
429pub trait UnixFSInterface: Send + Sync {
430    /// Add bytes as a file
431    async fn add_bytes(
432        &self,
433        bytes: Bytes,
434        options: Option<AddOptions>,
435    ) -> Result<Cid, UnixFSError>;
436
437    /// Add a file candidate
438    async fn add_file(
439        &self,
440        file: FileCandidate,
441        options: Option<AddOptions>,
442    ) -> Result<Cid, UnixFSError>;
443
444    /// Add a directory
445    async fn add_directory(
446        &self,
447        dir: Option<DirectoryCandidate>,
448        options: Option<AddOptions>,
449    ) -> Result<Cid, UnixFSError>;
450
451    /// Read file content
452    async fn cat(&self, cid: &Cid, options: Option<CatOptions>) -> Result<Bytes, UnixFSError>;
453
454    /// Copy content to a directory
455    async fn cp(
456        &self,
457        source: &Cid,
458        target: &Cid,
459        name: &str,
460        options: Option<CpOptions>,
461    ) -> Result<Cid, UnixFSError>;
462
463    /// List directory contents
464    async fn ls(
465        &self,
466        cid: &Cid,
467        options: Option<LsOptions>,
468    ) -> Result<AwaitIterable<UnixFSEntry>, UnixFSError>;
469
470    /// Create a directory in an existing directory
471    async fn mkdir(
472        &self,
473        cid: &Cid,
474        dirname: &str,
475        options: Option<MkdirOptions>,
476    ) -> Result<Cid, UnixFSError>;
477
478    /// Remove content from a directory
479    async fn rm(
480        &self,
481        cid: &Cid,
482        path: &str,
483        options: Option<RmOptions>,
484    ) -> Result<Cid, UnixFSError>;
485
486    /// Get file or directory statistics
487    async fn stat(
488        &self,
489        cid: &Cid,
490        options: Option<StatOptions>,
491    ) -> Result<UnixFSStat, UnixFSError>;
492}
493
494/// Union type for file and directory statistics
495#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
496pub enum UnixFSStat {
497    File(FileStat),
498    Directory(DirectoryStat),
499}
500
501/// Create a UnixFS instance from a Helia node
502pub fn create_unixfs(helia: Arc<dyn Helia>) -> impl UnixFSInterface {
503    UnixFS::new(helia)
504}