farena 0.3.0

A file-backed arena allocator using pread for memory byte storage
Documentation
//! File-backed arena allocator using `pread` for random access.
//!
//! Write data to a temporary file, then read it back by location.
//! The data stays on disk instead of in memory, so your process doesn't use extra RAM.
//!
//! `pread` lets us read from any offset without seeking, which means:
//! - No file position to manage between reads
//! - Thread-safe: multiple threads can read concurrently without locking
//!
//! Use this when you need scratch space for bytes but can't afford to keep everything in memory.
//!
//! # Limitations
//!
//! - Each file is limited to 4GB (u32 offsets). For larger data, use multiple files.
//! - `FileArena` is immutable once built. To add more data, create a new writer,
//!   then build a new `FileArena` containing all files.
//! - Temp files use your system's temp directory (`TMPDIR`). This crate doesn't check
//!   if it's on real disk - make sure it's not a ramdisk like `tmpfs` or `ramfs`.
//! - This crate does many random reads. Use a fast storage for best performance.
//! - Each file in a `FileArena` keeps one file descriptor open for its lifetime.
//!   Creating arenas with thousands of files may hit your system's ulimit. Check with
//!   `ulimit -n` and monitor with `lsof -p $$ | wc -l`. Increase the limit or reduce
//!   file count if needed.
//!
//! # Building multi-file arenas
//!
//! Use [`FileArenaBuilder`] to assemble arenas from multiple writers.
//! It handles file placement automatically, so you don't need to worry
//! about the ordering contract:
//!
//! ```rust
//! # use farena::{FileArenaWriter, FileArenaBuilder};
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let mut w0 = FileArenaWriter::new(0)?;
//! let loc0 = w0.push("data0")?;
//! let f0 = w0.finish()?;
//!
//! let mut w1 = FileArenaWriter::new(1)?;
//! let loc1 = w1.push("data1")?;
//! let f1 = w1.finish()?;
//!
//! let mut builder = FileArenaBuilder::new();
//! builder.add(f1, loc1);  // Order doesn't matter
//! builder.add(f0, loc0);
//! let arena = builder.build()?;
//!
//! assert_eq!(arena.get(loc0)?, b"data0");
//! assert_eq!(arena.get(loc1)?, b"data1");
//! # Ok(())
//! # }
//! ```
//!
//! # Usage
//!
//! ```rust
//! use farena::{FileArenaWriter, Location};
//!
//! // Write phase
//! let mut writer = FileArenaWriter::new(0)?;
//! let loc1 = writer.push("hello")?;
//! let loc2 = writer.push(" world")?;
//!
//! // Read phase — into_arena() is a convenience for single-file arenas
//! let arena = writer.into_arena()?;
//!
//! assert_eq!(arena.get(loc1)?, b"hello");
//! assert_eq!(arena.get(loc2)?, b" world");
//! # Ok::<_, std::io::Error>(())
//! ```
//!
//! # Multiple files (low-level)
//!
//! **Prefer [`FileArenaBuilder`] above** — it enforces the ordering
//! contract automatically. `FileArena::new` is the low-level alternative.
//!
//! Each writer gets a unique index. Files must be passed to
//! `FileArena::new` in index order:
//!
//! ```rust
//! # use farena::{FileArena, FileArenaWriter, Location};
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let mut w0 = FileArenaWriter::new(0)?;
//! let loc0 = w0.push("data0")?;
//! let f0 = w0.finish()?;
//!
//! let mut w1 = FileArenaWriter::new(1)?;
//! let loc1 = w1.push("data1")?;
//! let f1 = w1.finish()?;
//!
//! let arena = FileArena::new(vec![f0, f1])?;
//! assert_eq!(arena.get(loc0)?, b"data0");
//! assert_eq!(arena.get(loc1)?, b"data1");
//! # Ok(())
//! # }
//! ```
//!
//! # Parallel writing
//!
//! The design supports parallel writing. Each writer gets a unique index,
//! and [`FileArenaBuilder`] handles assembling the arena:
//!
//! ```rust,no_run
//! # use farena::{FileArenaWriter, FileArenaBuilder, Location};
//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
//! let items = vec!["item1", "item2", "item3", "item4"];
//!
//! // Use .into_par_iter() with rayon for parallel execution
//! let results: Vec<(Location, std::fs::File)> = (0..items.len())
//!     .into_iter()
//!     .map(|i| {
//!         let mut writer = FileArenaWriter::new(i as u16).unwrap();
//!         let loc = writer.push(items[i]).unwrap();
//!         let file = writer.finish().unwrap();
//!         (loc, file)
//!     })
//!     .collect();
//!
//! // Builder places files in the correct order automatically
//! let mut builder = FileArenaBuilder::new();
//! for (loc, file) in results {
//!     builder.add(file, loc);
//! }
//! let arena = builder.build()?;
//! # Ok(())
//! # }
//! ```
//!
//! # Graph/tree structures
//!
//! A common pattern is storing node metadata in memory while keeping
//! large payloads on disk. This is useful when:
//!
//! - Payloads are large and would consume too much memory
//! - You need to traverse the structure without loading all data at once
//! - You construct long text by concatenating payloads (e.g., thread content)
//!
//! For example, a tree where each node has an ID and a text payload:
//!
//! ```rust,no_run
//! # use farena::{FileArena, FileArenaWriter, Location};
//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
//! #[derive(Clone)]
//! struct Node {
//!     id: u64,
//!     payload_loc: Location,  // Text stored on disk
//!     children: Vec<u64>,
//! }
//!
//! // Build your tree with Locations instead of storing text directly
//! let mut nodes = Vec::new();
//! let mut writer = FileArenaWriter::new(0)?;
//!
//! // Write payloads, store locations
//! for (id, text) in &[("root", "root text"), ("child1", "child text")] {
//!     let loc = writer.push(*text)?;
//!     nodes.push(Node {
//!         id: hash(id),  // Your own hash function
//!         payload_loc: loc,
//!         children: vec![],
//!     });
//! }
//!
//! let arena = writer.into_arena()?;
//!
//! // Traverse and read payloads as needed
//! // Note: get_str_into appends, so we create a fresh buffer each iteration
//! for node in &nodes {
//!     let mut buf = String::new();
//!     arena.get_str_into(node.payload_loc, &mut buf)?;
//!     println!("Node {}: {}", node.id, buf);
//! }
//!
//! // Or concatenate payloads into a single buffer
//! let mut full_text = String::new();
//! for node in &nodes {
//!     arena.get_str_into(node.payload_loc, &mut full_text)?;
//! }
//! // full_text now contains all payloads concatenated
//! # fn hash(_: &str) -> u64 { 0 }
//! # Ok(())
//! # }
//! ```
//!
//! # Buffer reuse
//!
//! Reuse the same buffer across multiple reads to avoid allocations:
//!
//! ```rust
//! # use farena::{FileArenaWriter, Location};
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! # let mut writer = FileArenaWriter::new(0)?;
//! # let loc1 = writer.push("hello")?;
//! # let loc2 = writer.push(" world")?;
//! # let arena = writer.into_arena()?;
//! let mut buf = Vec::new();
//!
//! arena.get_into(loc1, &mut buf)?;
//! assert_eq!(buf, b"hello");
//!
//! buf.clear();  // Reuse without reallocating
//! arena.get_into(loc2, &mut buf)?;
//! assert_eq!(buf, b" world");
//! # Ok(())
//! # }
//! ```
//!
//! # Unsafe reads
//!
//! If you know your stored data is valid UTF-8, use `get_str_into_unchecked`
//! to skip the UTF-8 validation:
//!
//! ```rust
//! # use farena::{FileArenaWriter, Location};
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! # let mut writer = FileArenaWriter::new(0)?;
//! # let loc = writer.push("known utf8")?;
//! # let arena = writer.into_arena()?;
//! let mut buf = String::new();
//!
//! // SAFETY: we pushed valid UTF-8 above
//! unsafe { arena.get_str_into_unchecked(loc, &mut buf) }?;
//! assert_eq!(buf, "known utf8");
//! # Ok(())
//! # }
//! ```
//!
//! # Temp directory
//!
//! Temp files are created in your system's temp directory (respects `TMPDIR`).
//! Check your temp directory is on real disk with:
//!
//! ```text
//! df -h ${TMPDIR:-/tmp}
//! ```
//!
//! The filesystem should not be `tmpfs` or `ramfs`.

mod arena;
mod builder;
mod location;
mod writer;

pub use arena::FileArena;
pub use builder::FileArenaBuilder;
pub use location::Location;
pub use writer::FileArenaWriter;