1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
//! File-backed arena allocator using `pread` for random access.
//!
//! Write data to a temporary file, then read it back by location.
//! The data stays on disk instead of in memory, so your process doesn't use extra RAM.
//!
//! `pread` lets us read from any offset without seeking, which means:
//! - No file position to manage between reads
//! - Thread-safe: multiple threads can read concurrently without locking
//!
//! Use this when you need scratch space for bytes but can't afford to keep everything in memory.
//!
//! # Limitations
//!
//! - Each file is limited to 4GB (u32 offsets). For larger data, use multiple files.
//! - `FileArena` is immutable once built. To add more data, create a new writer,
//! then build a new `FileArena` containing all files.
//! - Temp files use your system's temp directory (`TMPDIR`). This crate doesn't check
//! if it's on real disk - make sure it's not a ramdisk like `tmpfs` or `ramfs`.
//! - This crate does many random reads. Use a fast storage for best performance.
//! - Each file in a `FileArena` keeps one file descriptor open for its lifetime.
//! Creating arenas with thousands of files may hit your system's ulimit. Check with
//! `ulimit -n` and monitor with `lsof -p $$ | wc -l`. Increase the limit or reduce
//! file count if needed.
//!
//! # Building multi-file arenas
//!
//! Use [`FileArenaBuilder`] to assemble arenas from multiple writers.
//! It handles file placement automatically, so you don't need to worry
//! about the ordering contract:
//!
//! ```rust
//! # use farena::{FileArenaWriter, FileArenaBuilder};
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let mut w0 = FileArenaWriter::new(0)?;
//! let loc0 = w0.push("data0")?;
//! let f0 = w0.finish()?;
//!
//! let mut w1 = FileArenaWriter::new(1)?;
//! let loc1 = w1.push("data1")?;
//! let f1 = w1.finish()?;
//!
//! let mut builder = FileArenaBuilder::new();
//! builder.add(f1, loc1); // Order doesn't matter
//! builder.add(f0, loc0);
//! let arena = builder.build()?;
//!
//! assert_eq!(arena.get(loc0)?, b"data0");
//! assert_eq!(arena.get(loc1)?, b"data1");
//! # Ok(())
//! # }
//! ```
//!
//! # Usage
//!
//! ```rust
//! use farena::{FileArenaWriter, Location};
//!
//! // Write phase
//! let mut writer = FileArenaWriter::new(0)?;
//! let loc1 = writer.push("hello")?;
//! let loc2 = writer.push(" world")?;
//!
//! // Read phase — into_arena() is a convenience for single-file arenas
//! let arena = writer.into_arena()?;
//!
//! assert_eq!(arena.get(loc1)?, b"hello");
//! assert_eq!(arena.get(loc2)?, b" world");
//! # Ok::<_, std::io::Error>(())
//! ```
//!
//! # Multiple files (low-level)
//!
//! **Prefer [`FileArenaBuilder`] above** — it enforces the ordering
//! contract automatically. `FileArena::new` is the low-level alternative.
//!
//! Each writer gets a unique index. Files must be passed to
//! `FileArena::new` in index order:
//!
//! ```rust
//! # use farena::{FileArena, FileArenaWriter, Location};
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let mut w0 = FileArenaWriter::new(0)?;
//! let loc0 = w0.push("data0")?;
//! let f0 = w0.finish()?;
//!
//! let mut w1 = FileArenaWriter::new(1)?;
//! let loc1 = w1.push("data1")?;
//! let f1 = w1.finish()?;
//!
//! let arena = FileArena::new(vec![f0, f1])?;
//! assert_eq!(arena.get(loc0)?, b"data0");
//! assert_eq!(arena.get(loc1)?, b"data1");
//! # Ok(())
//! # }
//! ```
//!
//! # Parallel writing
//!
//! The design supports parallel writing. Each writer gets a unique index,
//! and [`FileArenaBuilder`] handles assembling the arena:
//!
//! ```rust,no_run
//! # use farena::{FileArenaWriter, FileArenaBuilder, Location};
//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
//! let items = vec!["item1", "item2", "item3", "item4"];
//!
//! // Use .into_par_iter() with rayon for parallel execution
//! let results: Vec<(Location, std::fs::File)> = (0..items.len())
//! .into_iter()
//! .map(|i| {
//! let mut writer = FileArenaWriter::new(i as u16).unwrap();
//! let loc = writer.push(items[i]).unwrap();
//! let file = writer.finish().unwrap();
//! (loc, file)
//! })
//! .collect();
//!
//! // Builder places files in the correct order automatically
//! let mut builder = FileArenaBuilder::new();
//! for (loc, file) in results {
//! builder.add(file, loc);
//! }
//! let arena = builder.build()?;
//! # Ok(())
//! # }
//! ```
//!
//! # Graph/tree structures
//!
//! A common pattern is storing node metadata in memory while keeping
//! large payloads on disk. This is useful when:
//!
//! - Payloads are large and would consume too much memory
//! - You need to traverse the structure without loading all data at once
//! - You construct long text by concatenating payloads (e.g., thread content)
//!
//! For example, a tree where each node has an ID and a text payload:
//!
//! ```rust,no_run
//! # use farena::{FileArena, FileArenaWriter, Location};
//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
//! #[derive(Clone)]
//! struct Node {
//! id: u64,
//! payload_loc: Location, // Text stored on disk
//! children: Vec<u64>,
//! }
//!
//! // Build your tree with Locations instead of storing text directly
//! let mut nodes = Vec::new();
//! let mut writer = FileArenaWriter::new(0)?;
//!
//! // Write payloads, store locations
//! for (id, text) in &[("root", "root text"), ("child1", "child text")] {
//! let loc = writer.push(*text)?;
//! nodes.push(Node {
//! id: hash(id), // Your own hash function
//! payload_loc: loc,
//! children: vec![],
//! });
//! }
//!
//! let arena = writer.into_arena()?;
//!
//! // Traverse and read payloads as needed
//! // Note: get_str_into appends, so we create a fresh buffer each iteration
//! for node in &nodes {
//! let mut buf = String::new();
//! arena.get_str_into(node.payload_loc, &mut buf)?;
//! println!("Node {}: {}", node.id, buf);
//! }
//!
//! // Or concatenate payloads into a single buffer
//! let mut full_text = String::new();
//! for node in &nodes {
//! arena.get_str_into(node.payload_loc, &mut full_text)?;
//! }
//! // full_text now contains all payloads concatenated
//! # fn hash(_: &str) -> u64 { 0 }
//! # Ok(())
//! # }
//! ```
//!
//! # Buffer reuse
//!
//! Reuse the same buffer across multiple reads to avoid allocations:
//!
//! ```rust
//! # use farena::{FileArenaWriter, Location};
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! # let mut writer = FileArenaWriter::new(0)?;
//! # let loc1 = writer.push("hello")?;
//! # let loc2 = writer.push(" world")?;
//! # let arena = writer.into_arena()?;
//! let mut buf = Vec::new();
//!
//! arena.get_into(loc1, &mut buf)?;
//! assert_eq!(buf, b"hello");
//!
//! buf.clear(); // Reuse without reallocating
//! arena.get_into(loc2, &mut buf)?;
//! assert_eq!(buf, b" world");
//! # Ok(())
//! # }
//! ```
//!
//! # Unsafe reads
//!
//! If you know your stored data is valid UTF-8, use `get_str_into_unchecked`
//! to skip the UTF-8 validation:
//!
//! ```rust
//! # use farena::{FileArenaWriter, Location};
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! # let mut writer = FileArenaWriter::new(0)?;
//! # let loc = writer.push("known utf8")?;
//! # let arena = writer.into_arena()?;
//! let mut buf = String::new();
//!
//! // SAFETY: we pushed valid UTF-8 above
//! unsafe { arena.get_str_into_unchecked(loc, &mut buf) }?;
//! assert_eq!(buf, "known utf8");
//! # Ok(())
//! # }
//! ```
//!
//! # Temp directory
//!
//! Temp files are created in your system's temp directory (respects `TMPDIR`).
//! Check your temp directory is on real disk with:
//!
//! ```text
//! df -h ${TMPDIR:-/tmp}
//! ```
//!
//! The filesystem should not be `tmpfs` or `ramfs`.
pub use FileArena;
pub use FileArenaBuilder;
pub use Location;
pub use FileArenaWriter;