farena 0.2.1

A file-backed arena allocator using pread for RSS-conscious byte storage
Documentation
//! File-backed arena allocator using `pread` for random access.
//!
//! Write data to a temporary file, then read it back by location.
//! The data stays on disk instead of in memory, so your process doesn't use extra RAM.
//!
//! `pread` lets us read from any offset without seeking, which means:
//! - No file position to manage between reads
//! - Thread-safe: multiple threads can read concurrently without locking
//!
//! Use this when you need scratch space for bytes but can't afford to keep everything in memory.
//!
//! # Limitations
//!
//! - Each file is limited to 4GB (u32 offsets). For larger data, use multiple files.
//! - `FileArena` is immutable once built. To add more data, create a new writer,
//!   then build a new `FileArena` containing all files.
//! - Temp files use your system's temp directory (`TMPDIR`). This crate doesn't check
//!   if it's on real disk - make sure it's not a ramdisk like `tmpfs` or `ramfs`.
//! - This crate does many random reads. Use a fast storage for best performance.
//!
//! # Ordering contract
//!
//! When using multiple files, the file at position `i` in the `files` vec must come
//! from a `FileArenaWriter` created with index `i`. Passing files in the wrong order
//! will return incorrect data without any error.
//!
//! Correct:
//! ```text
//! writer0 -> file0, writer1 -> file1
//! FileArena::new(vec![file0, file1])  // file at index 0 from writer 0
//! ```
//!
//! Incorrect:
//! ```text
//! writer0 -> file0, writer1 -> file1
//! FileArena::new(vec![file1, file0])  // WRONG ORDER — will read garbage
//! ```
//!
//! # Usage
//!
//! ```rust
//! use farena::{FileArenaWriter, Location};
//!
//! // Write phase
//! let mut writer = FileArenaWriter::new(0)?;
//! let loc1 = writer.push("hello")?;
//! let loc2 = writer.push(" world")?;
//!
//! // Read phase — into_arena() is a convenience for single-file arenas
//! let arena = writer.into_arena()?;
//!
//! assert_eq!(arena.get(loc1)?, b"hello");
//! assert_eq!(arena.get(loc2)?, b" world");
//! # Ok::<_, std::io::Error>(())
//! ```
//!
//! # Multiple files
//!
//! Each writer gets a unique index. Collect files in index order:
//!
//! ```rust
//! # use farena::{FileArena, FileArenaWriter, Location};
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let mut w1 = FileArenaWriter::new(0)?;
//! let loc1 = w1.push("data1")?;
//! let f1 = w1.finish()?;
//!
//! let mut w2 = FileArenaWriter::new(1)?;
//! let loc2 = w2.push("data2")?;
//! let f2 = w2.finish()?;
//!
//! let arena = FileArena::new(vec![f1, f2])?;
//! assert_eq!(arena.get(loc1)?, b"data1");
//! assert_eq!(arena.get(loc2)?, b"data2");
//! # Ok(())
//! # }
//! ```
//!
//! # Parallel writing
//!
//! The design supports parallel writing. Each writer gets a unique index,
//! and you collect both the locations and files:
//!
//! ```rust,no_run
//! # use farena::{FileArena, FileArenaWriter, Location};
//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
//! let items = vec!["item1", "item2", "item3", "item4"];
//!
//! // Each task returns (location, file)
//! // Use .into_par_iter() with rayon for parallel execution
//! let results: Vec<(Location, std::fs::File)> = (0..items.len())
//!     .into_iter()
//!     .map(|i| {
//!         let mut writer = FileArenaWriter::new(i as u16).unwrap();
//!         let loc = writer.push(items[i]).unwrap();
//!         let file = writer.finish().unwrap();
//!         (loc, file)
//!     })
//!     .collect();
//!
//! // Split into locations and files
//! let (locations, files): (Vec<_>, Vec<_>) = results.into_iter().unzip();
//! let arena = FileArena::new(files)?;
//!
//! // Now you can read back using the locations
//! for loc in &locations {
//!     let data = arena.get(*loc)?;
//!     println!("Got: {}", String::from_utf8_lossy(&data));
//! }
//! # Ok(())
//! # }
//! ```
//!
//! # Graph/tree structures
//!
//! A common pattern is storing node metadata in memory while keeping
//! large payloads on disk. This is useful when:
//!
//! - Payloads are large and would consume too much memory
//! - You need to traverse the structure without loading all data at once
//! - You construct long text by concatenating payloads (e.g., thread content)
//!
//! For example, a tree where each node has an ID and a text payload:
//!
//! ```rust,no_run
//! # use farena::{FileArena, FileArenaWriter, Location};
//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
//! #[derive(Clone)]
//! struct Node {
//!     id: u64,
//!     payload_loc: Location,  // Text stored on disk
//!     children: Vec<u64>,
//! }
//!
//! // Build your tree with Locations instead of storing text directly
//! let mut nodes = Vec::new();
//! let mut writer = FileArenaWriter::new(0)?;
//!
//! // Write payloads, store locations
//! for (id, text) in &[("root", "root text"), ("child1", "child text")] {
//!     let loc = writer.push(*text)?;
//!     nodes.push(Node {
//!         id: hash(id),  // Your own hash function
//!         payload_loc: loc,
//!         children: vec![],
//!     });
//! }
//!
//! let arena = writer.into_arena()?;
//!
//! // Traverse and read payloads as needed
//! // Note: get_str_into appends, so we create a fresh buffer each iteration
//! for node in &nodes {
//!     let mut buf = String::new();
//!     arena.get_str_into(node.payload_loc, &mut buf)?;
//!     println!("Node {}: {}", node.id, buf);
//! }
//!
//! // Or concatenate payloads into a single buffer
//! let mut full_text = String::new();
//! for node in &nodes {
//!     arena.get_str_into(node.payload_loc, &mut full_text)?;
//! }
//! // full_text now contains all payloads concatenated
//! # fn hash(_: &str) -> u64 { 0 }
//! # Ok(())
//! # }
//! ```
//!
//! # Buffer reuse
//!
//! Reuse the same buffer across multiple reads to avoid allocations:
//!
//! ```rust
//! # use farena::{FileArenaWriter, Location};
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! # let mut writer = FileArenaWriter::new(0)?;
//! # let loc1 = writer.push("hello")?;
//! # let loc2 = writer.push(" world")?;
//! # let arena = writer.into_arena()?;
//! let mut buf = Vec::new();
//!
//! arena.get_into(loc1, &mut buf)?;
//! assert_eq!(buf, b"hello");
//!
//! buf.clear();  // Reuse without reallocating
//! arena.get_into(loc2, &mut buf)?;
//! assert_eq!(buf, b" world");
//! # Ok(())
//! # }
//! ```
//!
//! # Unsafe reads
//!
//! If you know your stored data is valid UTF-8, use `get_str_into_unchecked`
//! to skip the UTF-8 validation:
//!
//! ```rust
//! # use farena::{FileArenaWriter, Location};
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! # let mut writer = FileArenaWriter::new(0)?;
//! # let loc = writer.push("known utf8")?;
//! # let arena = writer.into_arena()?;
//! let mut buf = String::new();
//!
//! // SAFETY: we pushed valid UTF-8 above
//! unsafe { arena.get_str_into_unchecked(loc, &mut buf) }?;
//! assert_eq!(buf, "known utf8");
//! # Ok(())
//! # }
//! ```
//!
//! # Temp directory
//!
//! Temp files are created in your system's temp directory (respects `TMPDIR`).
//! Check your temp directory is on real disk with:
//!
//! ```text
//! df -h ${TMPDIR:-/tmp}
//! ```
//!
//! The filesystem should not be `tmpfs` or `ramfs`.

mod arena;
mod location;
mod writer;

pub use arena::FileArena;
pub use location::Location;
pub use writer::FileArenaWriter;