graphrefly_storage/file.rs
1//! Filesystem-backed kv backend (M4.C — DS-14-storage Audit 4).
2//!
3//! [`FileBackend`] maps each key to a `.bin` file under a configured directory.
4//! Keys are percent-encoded so any UTF-8 string can be stored safely:
5//! `[a-zA-Z0-9_-]` pass through; everything else is UTF-8 encoded with each
6//! byte formatted as lowercase `%xx`. The encoded filename for any given key
7//! is byte-identical to the TS `fileBackend` impl
8//! ([`packages/pure-ts/src/extra/storage/tiers-node.ts`](https://github.com/graphrefly/graphrefly-ts/blob/main/packages/pure-ts/src/extra/storage/tiers-node.ts) — D159) so a TS-written
9//! file can be loaded by a Rust reader on the same directory.
10//!
11//! Writes are atomic via [`tempfile::NamedTempFile::persist`]: a tempfile is
12//! created in the target directory, written in full, then renamed onto the
13//! key path. A partially-written file is never visible at the final path,
14//! even on process crash. The `NamedTempFile` Drop impl deletes any tempfile
15//! that never made it through `persist` (covers panics between create and
16//! commit).
17//!
18//! `flush()` is a no-op — durability is on per-write basis via the rename.
19//! `read` / `delete` / `list` tolerate missing directory + missing key by
20//! returning `Ok(None)` / `Ok(())` / `Ok(vec![])` respectively (D158).
21//!
22//! Cargo feature: gated behind `file` (default-on).
23
24use std::collections::HashMap;
25use std::fs;
26use std::io;
27use std::io::Write as _;
28use std::path::{Path, PathBuf};
29use std::sync::atomic::{AtomicU64, Ordering};
30use std::sync::{Arc, Mutex, OnceLock};
31use std::time::{SystemTime, UNIX_EPOCH};
32
33use serde::{de::DeserializeOwned, Serialize};
34use tempfile::NamedTempFile;
35
36use crate::backend::StorageBackend;
37use crate::codec::{Codec, JsonCodec};
38use crate::error::StorageError;
39use crate::memory::{
40 append_log_storage, kv_storage, snapshot_storage, AppendLogStorage, AppendLogStorageOptions,
41 KvStorage, KvStorageOptions, SnapshotStorage, SnapshotStorageOptions,
42};
43
44/// File extension applied to every key file. Inverse `decode_filename_to_key`
45/// rejects entries that don't end in this suffix.
46const FILE_SUFFIX: &str = ".bin";
47
48/// Lowercase hex alphabet for `%xx` encoding. Lower case is required for
49/// byte-equal cross-impl filenames; TS produces lowercase via
50/// `Number.toString(16)`.
51const HEX_LOWER: &[u8; 16] = b"0123456789abcdef";
52
53/// Filesystem-backed [`StorageBackend`].
54///
55/// One file per key under `dir`. Concurrent writers are safe at the
56/// per-key granularity (atomic rename via `tempfile`); concurrent writers
57/// to the SAME key race in unspecified-but-atomic fashion (last commit wins).
58///
59/// # Filesystem portability (B2 — 2026-05-22, /porting-to-rs)
60///
61/// Key→filename encoding preserves ASCII case: `Foo` and `foo` encode to
62/// `Foo.bin` and `foo.bin`. On case-insensitive filesystems (default macOS
63/// APFS, default Windows NTFS) these collide silently — last `write` wins.
64///
65/// To surface this loudly rather than corrupting data, `FileBackend` probes
66/// the filesystem on first `write()` and rejects subsequent writes whose
67/// encoded filename differs from a previously-written key only in casing.
68/// The probe is per-instance and runs at most once.
69///
70/// - **Case-sensitive filesystems** (Linux ext4/tmpfs, macOS APFS configured
71/// case-sensitive at format time): no enforcement; both `Foo` and `foo`
72/// succeed and resolve to distinct files.
73/// - **Case-insensitive filesystems** (default macOS APFS, Windows NTFS):
74/// second of `Foo` / `foo` fails with [`StorageError::BackendError`] whose
75/// message names both the existing and would-collide keys for diagnosis.
76/// - Read / list / delete paths are zero-overhead — the probe runs only on
77/// `write`, since collisions are write-introduced.
78///
79/// Tests force the probe outcome via
80/// [`FileBackend::with_case_insensitive`] so they're FS-independent.
81///
82/// # Example
83///
84/// ```ignore
85/// use std::sync::Arc;
86/// use graphrefly_storage::{file_backend, snapshot_storage, SnapshotStorageOptions};
87///
88/// let backend = file_backend("./checkpoints");
89/// let tier = snapshot_storage(backend, SnapshotStorageOptions::<MyState, _>::default());
90/// tier.save(state).unwrap();
91/// ```
92#[derive(Debug)]
93pub struct FileBackend {
94 dir: PathBuf,
95 name: String,
96 include_hidden: bool,
97 /// Case-sensitivity state, lazily initialized on first `write()`.
98 /// `None` until probed; `Some(false)` = case-sensitive (zero enforcement);
99 /// `Some(true)` = case-insensitive (track `seen_keys` and reject
100 /// case-divergent collisions).
101 case_state: OnceLock<CaseState>,
102 /// Probe-outcome override. `None` = probe naturally on first write;
103 /// `Some(b)` = skip probe and force `case_state` to `Some(b)`. Set via
104 /// [`Self::with_case_insensitive`] for FS-independent tests.
105 case_override: Option<bool>,
106}
107
108/// Resolved case-sensitivity classification + collision tracker.
109#[derive(Debug)]
110enum CaseState {
111 /// Filesystem distinguishes `Foo` from `foo`; no enforcement needed.
112 Sensitive,
113 /// Filesystem treats `Foo` and `foo` as the same file. Track the
114 /// canonical (lowercase) encoded filename → original encoded filename so
115 /// each subsequent write can detect cross-case collisions.
116 Insensitive {
117 seen: Mutex<HashMap<String, String>>,
118 },
119}
120
121impl FileBackend {
122 /// Construct a backend rooted at `dir`. The directory is created lazily on
123 /// first `write()` — `read` / `list` / `delete` tolerate its absence.
124 #[must_use]
125 pub fn new(dir: impl AsRef<Path>) -> Self {
126 let dir = dir.as_ref().to_path_buf();
127 let name = format!("file:{}", dir.display());
128 Self {
129 dir,
130 name,
131 include_hidden: false,
132 case_state: OnceLock::new(),
133 case_override: None,
134 }
135 }
136
137 /// Override whether `list()` includes filenames beginning with `.` (D161).
138 ///
139 /// Default `false`: hidden filenames are skipped. This protects against
140 /// in-flight `tempfile::NamedTempFile` temp files (which are created with
141 /// a leading-`.` prefix) leaking into enumeration results during a
142 /// concurrent flush.
143 ///
144 /// Pass `true` if your application intentionally writes keys whose
145 /// percent-encoding produces a leading-`.` filename and you need them
146 /// visible in `list()`.
147 #[must_use]
148 pub fn with_include_hidden(mut self, include: bool) -> Self {
149 self.include_hidden = include;
150 self
151 }
152
153 /// Override the filesystem case-sensitivity probe outcome (B2,
154 /// 2026-05-22). `Some(true)` forces case-insensitive enforcement;
155 /// `Some(false)` forces case-sensitive (skips enforcement). The natural
156 /// probe is bypassed when set.
157 ///
158 /// **Internal test hook only.** Gated behind `cfg(any(test,
159 /// feature = "test-hooks"))` so production callers cannot construct
160 /// a `FileBackend` with a misleading case-sensitivity classification
161 /// (e.g., `with_case_insensitive(false)` on an APFS volume would
162 /// re-introduce the silent-overwrite hazard B2 closes). The override
163 /// exists so unit tests can exercise both branches independently of
164 /// the host filesystem (macOS CI runners default to APFS case-
165 /// insensitive; Linux CI runners default to ext4/tmpfs case-sensitive).
166 ///
167 /// /qa G2.4 (2026-05-22): the original `pub` form was a public-API
168 /// expansion that escaped the porting-deferred close. Tightened to
169 /// test-only visibility.
170 #[cfg(any(test, feature = "test-hooks"))]
171 #[doc(hidden)]
172 #[must_use]
173 pub fn with_case_insensitive(mut self, forced: bool) -> Self {
174 self.case_override = Some(forced);
175 self
176 }
177
178 /// Backend root directory.
179 #[must_use]
180 pub fn dir(&self) -> &Path {
181 &self.dir
182 }
183
184 /// Whether `list()` includes dot-prefixed filenames.
185 #[must_use]
186 pub fn include_hidden(&self) -> bool {
187 self.include_hidden
188 }
189
190 /// Per-key filesystem path (`<dir>/<encoded-key>.bin`).
191 fn path_for(&self, key: &str) -> PathBuf {
192 let mut filename = encode_key_to_filename(key);
193 filename.push_str(FILE_SUFFIX);
194 self.dir.join(filename)
195 }
196
197 /// Encoded filename (sans dir) for a key — used by the case-collision
198 /// tracker for case-folded comparison.
199 fn filename_for(key: &str) -> String {
200 let mut filename = encode_key_to_filename(key);
201 filename.push_str(FILE_SUFFIX);
202 filename
203 }
204
205 /// Resolve `case_state`, running the filesystem probe lazily if needed.
206 /// Called from `write()` only — read / list / delete paths skip this so
207 /// they retain zero overhead. The probe runs at most once per
208 /// `FileBackend` instance.
209 fn ensure_case_state(&self) -> &CaseState {
210 self.case_state.get_or_init(|| {
211 // Respect the explicit override first (test-only hook).
212 if let Some(forced) = self.case_override {
213 return if forced {
214 CaseState::Insensitive {
215 seen: Mutex::new(HashMap::new()),
216 }
217 } else {
218 CaseState::Sensitive
219 };
220 }
221 match probe_case_sensitivity(&self.dir) {
222 Some(true) => CaseState::Insensitive {
223 seen: Mutex::new(HashMap::new()),
224 },
225 Some(false) | None => CaseState::Sensitive,
226 }
227 })
228 }
229
230 /// On case-insensitive filesystems, ensure `key`'s encoded filename
231 /// doesn't collide with a previously-written key that differs only in
232 /// casing. Returns the encoded filename for atomic insertion by the
233 /// caller post-success.
234 ///
235 /// On case-sensitive filesystems, no-op.
236 fn check_case_collision(&self, key: &str) -> Result<(), StorageError> {
237 let CaseState::Insensitive { seen } = self.ensure_case_state() else {
238 return Ok(());
239 };
240 let filename = Self::filename_for(key);
241 let folded = filename.to_ascii_lowercase();
242 // Lock scope: short; the map is touched only on writes.
243 let mut guard = seen.lock().expect("case-collision tracker poisoned");
244 if let Some(existing) = guard.get(&folded) {
245 if existing != &filename {
246 return Err(StorageError::BackendError {
247 message: format!(
248 "case-insensitive filesystem collision: existing key \
249 file {existing:?} and new key file {filename:?} \
250 (encoded from {key:?}) map to the same on-disk path \
251 when case-folded; FileBackend rejects to prevent \
252 silent overwrite",
253 ),
254 source: None,
255 });
256 }
257 } else {
258 guard.insert(folded, filename);
259 }
260 Ok(())
261 }
262
263 /// Drop a key from the case-collision tracker (allows the casing to be
264 /// reused after `delete`). No-op on case-sensitive filesystems.
265 fn release_case_slot(&self, key: &str) {
266 // Read-only access to `case_state` — DO NOT trigger the probe here.
267 // `delete()` should not pay probe cost.
268 let Some(CaseState::Insensitive { seen }) = self.case_state.get() else {
269 return;
270 };
271 let filename = Self::filename_for(key);
272 let folded = filename.to_ascii_lowercase();
273 if let Ok(mut guard) = seen.lock() {
274 // Only release if the slot holds our exact casing — avoids
275 // accidentally clearing a slot held by another casing of the
276 // same key (which would itself have failed `check_case_collision`).
277 if guard.get(&folded) == Some(&filename) {
278 guard.remove(&folded);
279 }
280 }
281 }
282}
283
284/// Probe whether the directory's filesystem treats casing as significant.
285///
286/// Returns `Some(true)` for case-insensitive, `Some(false)` for case-sensitive.
287/// Returns `None` if the probe cannot complete (directory not creatable,
288/// permission errors, etc.) — caller defaults to case-sensitive (no
289/// enforcement) so the probe failure mode is "lose protection," never
290/// "spurious rejection."
291///
292/// Algorithm: write a uniquely-named probe file, attempt `fs::metadata` of
293/// the same path uppercased, delete the probe file. The same-length match
294/// indicates the upper-cased path resolved to the lower-cased probe file —
295/// case-insensitivity.
296/// /qa G2.2 (2026-05-22): process-wide monotonic nonce. Two
297/// `FileBackend`s probing the same directory in the same nanosecond on
298/// systems with a coarse `SystemTime` resolution would otherwise share
299/// a probe filename and race each other's results. The nonce
300/// guarantees a unique probe filename even on low-resolution clocks.
301static PROBE_NONCE: AtomicU64 = AtomicU64::new(0);
302
303/// /qa G2.2 (2026-05-22): sweep orphan probe files left behind by
304/// SIGKILL'd or panicked prior runs. Probe files use the
305/// `.gr-case-probe-*` pattern; the leading `.` keeps them invisible to
306/// `list()` (D161 hidden filter), but they accumulate across crashes.
307/// Sweep runs at most once per process via the [`SWEPT`] `OnceLock`;
308/// any `.gr-case-probe-*` file is removed regardless of age — they are
309/// always short-lived and any survivor is by definition orphan.
310fn sweep_orphan_probe_files(dir: &Path) {
311 use std::collections::HashSet;
312 static SWEPT: OnceLock<Mutex<HashSet<PathBuf>>> = OnceLock::new();
313 let swept = SWEPT.get_or_init(|| Mutex::new(HashSet::new()));
314 let Ok(mut guard) = swept.lock() else {
315 return; // poisoned — skip the sweep, not load-bearing
316 };
317 if guard.contains(dir) {
318 return;
319 }
320 if let Ok(entries) = fs::read_dir(dir) {
321 for entry in entries.flatten() {
322 let name = entry.file_name();
323 let Some(name_str) = name.to_str() else {
324 continue;
325 };
326 if name_str.starts_with(".gr-case-probe-") || name_str.starts_with(".GR-CASE-PROBE-") {
327 let _ = fs::remove_file(entry.path());
328 }
329 }
330 }
331 guard.insert(dir.to_path_buf());
332}
333
334fn probe_case_sensitivity(dir: &Path) -> Option<bool> {
335 fs::create_dir_all(dir).ok()?;
336 // /qa G2.2: sweep orphans first so a SIGKILL'd prior run can't leave
337 // residue that pollutes a future `list()` on this directory.
338 sweep_orphan_probe_files(dir);
339 let nanos = SystemTime::now()
340 .duration_since(UNIX_EPOCH)
341 .ok()?
342 .as_nanos();
343 let pid = std::process::id();
344 // /qa G2.2: process-wide monotonic nonce closes the
345 // two-backends-same-nanosecond race vector.
346 let nonce = PROBE_NONCE.fetch_add(1, Ordering::Relaxed);
347 // Single canonical filename: lower-case stem. Probe via upper-case lookup.
348 // Leading `.` keeps the probe file invisible to `list()` (D161 hidden filter).
349 let lower_name = format!(".gr-case-probe-{pid}-{nanos}-{nonce}-a.bin");
350 let upper_name = lower_name.to_ascii_uppercase();
351 let lower_path = dir.join(&lower_name);
352 let upper_path = dir.join(&upper_name);
353 let _ = fs::write(&lower_path, b"probe");
354 let result = fs::metadata(&upper_path).is_ok();
355 let _ = fs::remove_file(&lower_path);
356 // Best-effort: if the upper-case path was somehow created as a distinct
357 // file (theoretically impossible on a case-sensitive FS since we only
358 // wrote the lower-case path), clean it up too.
359 let _ = fs::remove_file(&upper_path);
360 Some(result)
361}
362
363/// Convenience constructor returning an `Arc<FileBackend>`. Use this when
364/// sharing a single backend across multiple tiers (the paired
365/// `{ snapshot, wal }` pattern from DS-14-storage §a). For non-default
366/// configuration use `Arc::new(FileBackend::new(dir).with_include_hidden(true))`.
367#[must_use]
368pub fn file_backend(dir: impl AsRef<Path>) -> Arc<FileBackend> {
369 Arc::new(FileBackend::new(dir))
370}
371
372impl StorageBackend for FileBackend {
373 fn name(&self) -> &str {
374 &self.name
375 }
376
377 fn read(&self, key: &str) -> Result<Option<Vec<u8>>, StorageError> {
378 match fs::read(self.path_for(key)) {
379 Ok(bytes) => Ok(Some(bytes)),
380 Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(None),
381 Err(e) => Err(io_error("read", &self.dir, e)),
382 }
383 }
384
385 fn write(&self, key: &str, bytes: &[u8]) -> Result<(), StorageError> {
386 fs::create_dir_all(&self.dir).map_err(|e| io_error("mkdir", &self.dir, e))?;
387 // B2 (2026-05-22): on case-insensitive filesystems, reject writes
388 // whose encoded filename differs from a previously-written key only
389 // in casing. Probe runs at most once per backend instance. Checked
390 // BEFORE the atomic-rename write so a rejected write leaves no
391 // tempfile residue.
392 self.check_case_collision(key)?;
393 let target = self.path_for(key);
394 let mut tmp =
395 NamedTempFile::new_in(&self.dir).map_err(|e| io_error("tempfile", &self.dir, e))?;
396 tmp.write_all(bytes)
397 .map_err(|e| io_error("write tmp", &self.dir, e))?;
398 tmp.persist(&target)
399 .map_err(|e| io_error("rename", &self.dir, e.error))?;
400 Ok(())
401 }
402
403 fn delete(&self, key: &str) -> Result<(), StorageError> {
404 // B2 + /qa G2.3 (2026-05-22): on a case-insensitive filesystem,
405 // `path_for("Foo")` and `path_for("foo")` resolve to the SAME
406 // on-disk file. Releasing the case-collision slot BEFORE
407 // `fs::remove_file` opens a clobber race: thread A releases
408 // "Foo", thread B writes "foo" (passes case-check, becomes the
409 // canonical casing), thread A's `fs::remove_file` then removes
410 // thread B's just-written data. Sequence the ops so the slot
411 // release happens AFTER the on-disk delete succeeds.
412 match fs::remove_file(self.path_for(key)) {
413 Ok(()) => {
414 self.release_case_slot(key);
415 Ok(())
416 }
417 Err(e) if e.kind() == io::ErrorKind::NotFound => {
418 // File never existed — still safe to drop the slot, but
419 // do it after the kind-check so a failing `remove_file`
420 // doesn't strand the tracker entry.
421 self.release_case_slot(key);
422 Ok(())
423 }
424 Err(e) => Err(io_error("delete", &self.dir, e)),
425 }
426 }
427
428 fn list(&self, prefix: &str) -> Result<Vec<String>, StorageError> {
429 let entries = match fs::read_dir(&self.dir) {
430 Ok(e) => e,
431 Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
432 Err(e) => return Err(io_error("list", &self.dir, e)),
433 };
434 let mut keys = Vec::new();
435 for entry in entries {
436 let entry = entry.map_err(|e| io_error("list-entry", &self.dir, e))?;
437 let raw = entry.file_name();
438 let Some(name) = raw.to_str() else { continue };
439 if !self.include_hidden && name.starts_with('.') {
440 continue;
441 }
442 let Some(key) = decode_filename_to_key(name) else {
443 continue;
444 };
445 if !prefix.is_empty() && !key.starts_with(prefix) {
446 continue;
447 }
448 keys.push(key);
449 }
450 keys.sort();
451 Ok(keys)
452 }
453}
454
455fn io_error(op: &str, dir: &Path, source: io::Error) -> StorageError {
456 StorageError::BackendError {
457 message: format!("file backend {op} failed at {}: {source}", dir.display()),
458 source: Some(Box::new(source)),
459 }
460}
461
462/// Encode an arbitrary key to a safe filename stem.
463///
464/// `[a-zA-Z0-9_-]` pass through unencoded; everything else is UTF-8 encoded
465/// and each byte is formatted as lowercase `%xx`. Cross-impl byte-identical
466/// with TS [`pathFor`](https://github.com/graphrefly/graphrefly-ts/blob/main/packages/pure-ts/src/extra/storage/tiers-node.ts).
467fn encode_key_to_filename(key: &str) -> String {
468 let mut out = String::with_capacity(key.len());
469 let mut buf = [0u8; 4];
470 for ch in key.chars() {
471 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '-' {
472 out.push(ch);
473 continue;
474 }
475 for &byte in ch.encode_utf8(&mut buf).as_bytes() {
476 out.push('%');
477 out.push(HEX_LOWER[(byte >> 4) as usize] as char);
478 out.push(HEX_LOWER[(byte & 0x0F) as usize] as char);
479 }
480 }
481 out
482}
483
484/// Inverse of [`encode_key_to_filename`].
485///
486/// Returns `None` when:
487/// - the filename does not end in `.bin`
488/// - the decoded byte sequence is not valid UTF-8
489/// - the filename contains non-ASCII characters outside `%xx` escapes
490/// (those can't have come from our encoder; matches TS behavior of treating
491/// such filenames as un-decodable)
492///
493/// Truncated (`abc%5`) or invalid-hex (`abc%5z`) escapes fall through to
494/// literal-byte semantics — matches the TS `keyFromFilename` regex-fallthrough
495/// branch.
496fn decode_filename_to_key(filename: &str) -> Option<String> {
497 let stem = filename.strip_suffix(FILE_SUFFIX)?;
498 let chars: Vec<char> = stem.chars().collect();
499 let mut bytes: Vec<u8> = Vec::with_capacity(chars.len());
500 let mut i = 0;
501 while i < chars.len() {
502 let ch = chars[i];
503 if ch == '%' && i + 2 < chars.len() {
504 if let (Some(hi), Some(lo)) = (nibble(chars[i + 1]), nibble(chars[i + 2])) {
505 bytes.push((hi << 4) | lo);
506 i += 3;
507 continue;
508 }
509 }
510 if !ch.is_ascii() {
511 return None;
512 }
513 bytes.push(ch as u8);
514 i += 1;
515 }
516 String::from_utf8(bytes).ok()
517}
518
519fn nibble(c: char) -> Option<u8> {
520 c.to_digit(16).and_then(|d| u8::try_from(d).ok())
521}
522
523// ── Convenience tier wrappers ───────────────────────────────────────────────
524
525/// Convenience: snapshot tier over a fresh file backend rooted at `dir`.
526/// Mirror of [`crate::memory_snapshot`] for filesystem persistence.
527#[must_use]
528pub fn file_snapshot<T, C>(
529 dir: impl AsRef<Path>,
530 opts: SnapshotStorageOptions<T, C>,
531) -> SnapshotStorage<FileBackend, T, C>
532where
533 T: Send + Sync + 'static,
534 C: Codec<T>,
535{
536 snapshot_storage(Arc::new(FileBackend::new(dir)), opts)
537}
538
539/// Convenience: snapshot tier over a fresh file backend with
540/// [`SnapshotStorageOptions::default`] + a `JsonCodec`.
541#[must_use]
542pub fn file_snapshot_default<T>(dir: impl AsRef<Path>) -> SnapshotStorage<FileBackend, T, JsonCodec>
543where
544 T: Serialize + DeserializeOwned + Send + Sync + 'static,
545{
546 file_snapshot(dir, SnapshotStorageOptions::default())
547}
548
549/// Convenience: append-log tier over a fresh file backend rooted at `dir`.
550#[must_use]
551pub fn file_append_log<T, C>(
552 dir: impl AsRef<Path>,
553 opts: AppendLogStorageOptions<T, C>,
554) -> AppendLogStorage<FileBackend, T, C>
555where
556 T: Serialize + DeserializeOwned + Clone + Send + Sync + 'static,
557 C: Codec<Vec<T>>,
558{
559 append_log_storage(Arc::new(FileBackend::new(dir)), opts)
560}
561
562/// Convenience: append-log tier over a fresh file backend with
563/// [`AppendLogStorageOptions::default`] + a `JsonCodec`.
564#[must_use]
565pub fn file_append_log_default<T>(
566 dir: impl AsRef<Path>,
567) -> AppendLogStorage<FileBackend, T, JsonCodec>
568where
569 T: Serialize + DeserializeOwned + Clone + Send + Sync + 'static,
570{
571 file_append_log(dir, AppendLogStorageOptions::default())
572}
573
574/// Convenience: kv tier over a fresh file backend rooted at `dir`.
575#[must_use]
576pub fn file_kv<T, C>(
577 dir: impl AsRef<Path>,
578 opts: KvStorageOptions<T, C>,
579) -> KvStorage<FileBackend, T, C>
580where
581 T: Send + Sync + 'static,
582 C: Codec<T>,
583{
584 kv_storage(Arc::new(FileBackend::new(dir)), opts)
585}
586
587/// Convenience: kv tier over a fresh file backend with
588/// [`KvStorageOptions::default`] + a `JsonCodec`.
589#[must_use]
590pub fn file_kv_default<T>(dir: impl AsRef<Path>) -> KvStorage<FileBackend, T, JsonCodec>
591where
592 T: Serialize + DeserializeOwned + Send + Sync + 'static,
593{
594 file_kv(dir, KvStorageOptions::default())
595}
596
597#[cfg(test)]
598mod tests {
599 use super::*;
600
601 #[test]
602 fn encode_alphanumeric_passthrough() {
603 assert_eq!(encode_key_to_filename("abcXYZ-_09"), "abcXYZ-_09");
604 }
605
606 #[test]
607 fn encode_special_chars_percent_escape() {
608 assert_eq!(
609 encode_key_to_filename("app/with:slashes"),
610 "app%2fwith%3aslashes"
611 );
612 }
613
614 #[test]
615 fn encode_non_ascii_two_byte_utf8() {
616 // U+00E9 'é' = 0xC3 0xA9
617 assert_eq!(encode_key_to_filename("café"), "caf%c3%a9");
618 }
619
620 #[test]
621 fn encode_non_ascii_three_byte_utf8() {
622 // U+20AC '€' = 0xE2 0x82 0xAC
623 assert_eq!(encode_key_to_filename("€100"), "%e2%82%ac100");
624 }
625
626 #[test]
627 fn encode_emoji_four_byte_utf8() {
628 // U+1F44B 👋 = 0xF0 0x9F 0x91 0x8B
629 assert_eq!(encode_key_to_filename("👋"), "%f0%9f%91%8b");
630 }
631
632 #[test]
633 fn encode_empty_key() {
634 assert_eq!(encode_key_to_filename(""), "");
635 }
636
637 #[test]
638 fn decode_round_trip_covers_canonical_set() {
639 for key in [
640 "simple",
641 "app/with:slashes",
642 "café",
643 "€100",
644 "👋 hello",
645 "a-b_c",
646 "",
647 ] {
648 let filename = format!("{}.bin", encode_key_to_filename(key));
649 assert_eq!(
650 decode_filename_to_key(&filename).as_deref(),
651 Some(key),
652 "round-trip failed for {key:?}",
653 );
654 }
655 }
656
657 #[test]
658 fn decode_rejects_non_bin_suffix() {
659 assert!(decode_filename_to_key("foo.txt").is_none());
660 assert!(decode_filename_to_key("foo").is_none());
661 assert!(decode_filename_to_key(".bin").is_some()); // empty stem decodes to ""
662 }
663
664 #[test]
665 fn decode_truncated_percent_escape_treated_literally() {
666 // Matches TS keyFromFilename: incomplete `%x` at end falls through to
667 // ASCII branch — `abc%5` decodes to `abc%5`.
668 assert_eq!(
669 decode_filename_to_key("abc%5.bin").as_deref(),
670 Some("abc%5")
671 );
672 }
673
674 #[test]
675 fn decode_invalid_hex_treated_literally() {
676 // `%5z` fails the hex check, falls through to per-char ASCII bytes.
677 assert_eq!(
678 decode_filename_to_key("abc%5z.bin").as_deref(),
679 Some("abc%5z")
680 );
681 }
682
683 #[test]
684 fn decode_uppercase_hex_accepted() {
685 // TS regex is /[0-9a-f]{2}$/i (case-insensitive); Rust mirrors via
686 // char::to_digit which accepts both cases.
687 assert_eq!(
688 decode_filename_to_key("caf%C3%A9.bin").as_deref(),
689 Some("café")
690 );
691 }
692
693 // ── B2 (2026-05-22, /porting-to-rs storage-honest-error batch) ─────────
694 //
695 // Case-collision detection on case-insensitive filesystems.
696 //
697 // The tests use `FileBackend::with_case_insensitive(forced)` to bypass
698 // the natural filesystem probe — keeps outcomes deterministic across CI
699 // hosts (macOS APFS default = case-insensitive; Linux ext4 default =
700 // case-sensitive).
701
702 #[test]
703 fn case_insensitive_rejects_case_divergent_second_write() {
704 // Force case-insensitive enforcement regardless of the underlying
705 // filesystem. Then write `Foo` followed by `foo` and expect the
706 // second to fail with a clear diagnostic.
707 let dir = tempfile::tempdir().expect("tempdir");
708 let backend = FileBackend::new(dir.path()).with_case_insensitive(true);
709 backend
710 .write("Foo", b"first")
711 .expect("first write must succeed");
712 let err = backend
713 .write("foo", b"second")
714 .expect_err("case-divergent second write must reject");
715 let StorageError::BackendError { message, .. } = err else {
716 panic!("expected StorageError::BackendError, got: {err:?}");
717 };
718 assert!(
719 message.contains("case-insensitive filesystem collision"),
720 "diagnostic must label the failure class, got: {message}"
721 );
722 assert!(
723 message.contains("Foo.bin") && message.contains("foo.bin"),
724 "diagnostic must name both colliding encoded filenames, got: {message}"
725 );
726 }
727
728 #[test]
729 fn case_insensitive_same_casing_overwrites() {
730 // Writing the same key twice (same casing) is the normal overwrite
731 // case — must not be flagged as a collision.
732 let dir = tempfile::tempdir().expect("tempdir");
733 let backend = FileBackend::new(dir.path()).with_case_insensitive(true);
734 backend.write("Foo", b"first").expect("first write");
735 backend
736 .write("Foo", b"second")
737 .expect("same-casing overwrite must succeed");
738 let read = backend.read("Foo").expect("read").expect("present");
739 assert_eq!(read, b"second");
740 }
741
742 #[test]
743 fn case_insensitive_delete_releases_slot() {
744 // After deleting `Foo`, writing `foo` must succeed — the casing slot
745 // was released by the delete.
746 let dir = tempfile::tempdir().expect("tempdir");
747 let backend = FileBackend::new(dir.path()).with_case_insensitive(true);
748 backend.write("Foo", b"first").expect("write Foo");
749 backend.delete("Foo").expect("delete Foo");
750 backend.write("foo", b"new").expect("post-delete write foo");
751 let read = backend.read("foo").expect("read foo").expect("present");
752 assert_eq!(read, b"new");
753 }
754
755 #[test]
756 fn case_sensitive_allows_case_divergent_writes() {
757 // On a forced-sensitive backend, `Foo` and `foo` must both succeed
758 // and resolve to distinct files. We can't verify distinct on-disk
759 // files on a case-insensitive host (the second write would clobber
760 // the first), so we only assert the calls succeed and the
761 // collision tracker doesn't fire.
762 let dir = tempfile::tempdir().expect("tempdir");
763 let backend = FileBackend::new(dir.path()).with_case_insensitive(false);
764 backend.write("Foo", b"first").expect("write Foo");
765 backend
766 .write("foo", b"second")
767 .expect("forced-sensitive backend must not reject case-divergent keys");
768 }
769
770 #[test]
771 fn decode_rejects_non_ascii_outside_escapes() {
772 // A filename containing a literal non-ASCII char (not `%xx`) cannot
773 // have come from our encoder; treat as un-decodable.
774 assert!(decode_filename_to_key("café.bin").is_none());
775 }
776
777 #[test]
778 fn nibble_validates_hex_set() {
779 for c in ['0', '5', '9', 'a', 'f', 'A', 'F'] {
780 assert!(nibble(c).is_some(), "{c} should be a hex digit");
781 }
782 for c in ['g', 'G', '/', '@', '\u{00e9}'] {
783 assert!(nibble(c).is_none(), "{c} should not be a hex digit");
784 }
785 }
786}