Skip to main content

graphrefly_storage/
file.rs

1//! Filesystem-backed kv backend (M4.C — DS-14-storage Audit 4).
2//!
3//! [`FileBackend`] maps each key to a `.bin` file under a configured directory.
4//! Keys are percent-encoded so any UTF-8 string can be stored safely:
5//! `[a-zA-Z0-9_-]` pass through; everything else is UTF-8 encoded with each
6//! byte formatted as lowercase `%xx`. The encoded filename for any given key
7//! is byte-identical to the TS `fileBackend` impl
8//! ([`packages/pure-ts/src/extra/storage/tiers-node.ts`](https://github.com/graphrefly/graphrefly-ts/blob/main/packages/pure-ts/src/extra/storage/tiers-node.ts) — D159) so a TS-written
9//! file can be loaded by a Rust reader on the same directory.
10//!
11//! Writes are atomic via [`tempfile::NamedTempFile::persist`]: a tempfile is
12//! created in the target directory, written in full, then renamed onto the
13//! key path. A partially-written file is never visible at the final path,
14//! even on process crash. The `NamedTempFile` Drop impl deletes any tempfile
15//! that never made it through `persist` (covers panics between create and
16//! commit).
17//!
18//! `flush()` is a no-op — durability is on per-write basis via the rename.
19//! `read` / `delete` / `list` tolerate missing directory + missing key by
20//! returning `Ok(None)` / `Ok(())` / `Ok(vec![])` respectively (D158).
21//!
22//! Cargo feature: gated behind `file` (default-on).
23
24use std::collections::HashMap;
25use std::fs;
26use std::io;
27use std::io::Write as _;
28use std::path::{Path, PathBuf};
29use std::sync::atomic::{AtomicU64, Ordering};
30use std::sync::{Arc, Mutex, OnceLock};
31use std::time::{SystemTime, UNIX_EPOCH};
32
33use serde::{de::DeserializeOwned, Serialize};
34use tempfile::NamedTempFile;
35
36use crate::backend::StorageBackend;
37use crate::codec::{Codec, JsonCodec};
38use crate::error::StorageError;
39use crate::memory::{
40    append_log_storage, kv_storage, snapshot_storage, AppendLogStorage, AppendLogStorageOptions,
41    KvStorage, KvStorageOptions, SnapshotStorage, SnapshotStorageOptions,
42};
43
44/// File extension applied to every key file. Inverse `decode_filename_to_key`
45/// rejects entries that don't end in this suffix.
46const FILE_SUFFIX: &str = ".bin";
47
48/// Lowercase hex alphabet for `%xx` encoding. Lower case is required for
49/// byte-equal cross-impl filenames; TS produces lowercase via
50/// `Number.toString(16)`.
51const HEX_LOWER: &[u8; 16] = b"0123456789abcdef";
52
53/// Filesystem-backed [`StorageBackend`].
54///
55/// One file per key under `dir`. Concurrent writers are safe at the
56/// per-key granularity (atomic rename via `tempfile`); concurrent writers
57/// to the SAME key race in unspecified-but-atomic fashion (last commit wins).
58///
59/// # Filesystem portability (B2 — 2026-05-22, /porting-to-rs)
60///
61/// Key→filename encoding preserves ASCII case: `Foo` and `foo` encode to
62/// `Foo.bin` and `foo.bin`. On case-insensitive filesystems (default macOS
63/// APFS, default Windows NTFS) these collide silently — last `write` wins.
64///
65/// To surface this loudly rather than corrupting data, `FileBackend` probes
66/// the filesystem on first `write()` and rejects subsequent writes whose
67/// encoded filename differs from a previously-written key only in casing.
68/// The probe is per-instance and runs at most once.
69///
70/// - **Case-sensitive filesystems** (Linux ext4/tmpfs, macOS APFS configured
71///   case-sensitive at format time): no enforcement; both `Foo` and `foo`
72///   succeed and resolve to distinct files.
73/// - **Case-insensitive filesystems** (default macOS APFS, Windows NTFS):
74///   second of `Foo` / `foo` fails with [`StorageError::BackendError`] whose
75///   message names both the existing and would-collide keys for diagnosis.
76/// - Read / list / delete paths are zero-overhead — the probe runs only on
77///   `write`, since collisions are write-introduced.
78///
79/// Tests force the probe outcome via
80/// [`FileBackend::with_case_insensitive`] so they're FS-independent.
81///
82/// # Example
83///
84/// ```ignore
85/// use std::sync::Arc;
86/// use graphrefly_storage::{file_backend, snapshot_storage, SnapshotStorageOptions};
87///
88/// let backend = file_backend("./checkpoints");
89/// let tier = snapshot_storage(backend, SnapshotStorageOptions::<MyState, _>::default());
90/// tier.save(state).unwrap();
91/// ```
92#[derive(Debug)]
93pub struct FileBackend {
94    dir: PathBuf,
95    name: String,
96    include_hidden: bool,
97    /// Case-sensitivity state, lazily initialized on first `write()`.
98    /// `None` until probed; `Some(false)` = case-sensitive (zero enforcement);
99    /// `Some(true)` = case-insensitive (track `seen_keys` and reject
100    /// case-divergent collisions).
101    case_state: OnceLock<CaseState>,
102    /// Probe-outcome override. `None` = probe naturally on first write;
103    /// `Some(b)` = skip probe and force `case_state` to `Some(b)`. Set via
104    /// [`Self::with_case_insensitive`] for FS-independent tests.
105    case_override: Option<bool>,
106}
107
108/// Resolved case-sensitivity classification + collision tracker.
109#[derive(Debug)]
110enum CaseState {
111    /// Filesystem distinguishes `Foo` from `foo`; no enforcement needed.
112    Sensitive,
113    /// Filesystem treats `Foo` and `foo` as the same file. Track the
114    /// canonical (lowercase) encoded filename → original encoded filename so
115    /// each subsequent write can detect cross-case collisions.
116    Insensitive {
117        seen: Mutex<HashMap<String, String>>,
118    },
119}
120
121impl FileBackend {
122    /// Construct a backend rooted at `dir`. The directory is created lazily on
123    /// first `write()` — `read` / `list` / `delete` tolerate its absence.
124    #[must_use]
125    pub fn new(dir: impl AsRef<Path>) -> Self {
126        let dir = dir.as_ref().to_path_buf();
127        let name = format!("file:{}", dir.display());
128        Self {
129            dir,
130            name,
131            include_hidden: false,
132            case_state: OnceLock::new(),
133            case_override: None,
134        }
135    }
136
137    /// Override whether `list()` includes filenames beginning with `.` (D161).
138    ///
139    /// Default `false`: hidden filenames are skipped. This protects against
140    /// in-flight `tempfile::NamedTempFile` temp files (which are created with
141    /// a leading-`.` prefix) leaking into enumeration results during a
142    /// concurrent flush.
143    ///
144    /// Pass `true` if your application intentionally writes keys whose
145    /// percent-encoding produces a leading-`.` filename and you need them
146    /// visible in `list()`.
147    #[must_use]
148    pub fn with_include_hidden(mut self, include: bool) -> Self {
149        self.include_hidden = include;
150        self
151    }
152
153    /// Override the filesystem case-sensitivity probe outcome (B2,
154    /// 2026-05-22). `Some(true)` forces case-insensitive enforcement;
155    /// `Some(false)` forces case-sensitive (skips enforcement). The natural
156    /// probe is bypassed when set.
157    ///
158    /// **Internal test hook only.** Gated behind `cfg(any(test,
159    /// feature = "test-hooks"))` so production callers cannot construct
160    /// a `FileBackend` with a misleading case-sensitivity classification
161    /// (e.g., `with_case_insensitive(false)` on an APFS volume would
162    /// re-introduce the silent-overwrite hazard B2 closes). The override
163    /// exists so unit tests can exercise both branches independently of
164    /// the host filesystem (macOS CI runners default to APFS case-
165    /// insensitive; Linux CI runners default to ext4/tmpfs case-sensitive).
166    ///
167    /// /qa G2.4 (2026-05-22): the original `pub` form was a public-API
168    /// expansion that escaped the porting-deferred close. Tightened to
169    /// test-only visibility.
170    #[cfg(any(test, feature = "test-hooks"))]
171    #[doc(hidden)]
172    #[must_use]
173    pub fn with_case_insensitive(mut self, forced: bool) -> Self {
174        self.case_override = Some(forced);
175        self
176    }
177
178    /// Backend root directory.
179    #[must_use]
180    pub fn dir(&self) -> &Path {
181        &self.dir
182    }
183
184    /// Whether `list()` includes dot-prefixed filenames.
185    #[must_use]
186    pub fn include_hidden(&self) -> bool {
187        self.include_hidden
188    }
189
190    /// Per-key filesystem path (`<dir>/<encoded-key>.bin`).
191    fn path_for(&self, key: &str) -> PathBuf {
192        let mut filename = encode_key_to_filename(key);
193        filename.push_str(FILE_SUFFIX);
194        self.dir.join(filename)
195    }
196
197    /// Encoded filename (sans dir) for a key — used by the case-collision
198    /// tracker for case-folded comparison.
199    fn filename_for(key: &str) -> String {
200        let mut filename = encode_key_to_filename(key);
201        filename.push_str(FILE_SUFFIX);
202        filename
203    }
204
205    /// Resolve `case_state`, running the filesystem probe lazily if needed.
206    /// Called from `write()` only — read / list / delete paths skip this so
207    /// they retain zero overhead. The probe runs at most once per
208    /// `FileBackend` instance.
209    fn ensure_case_state(&self) -> &CaseState {
210        self.case_state.get_or_init(|| {
211            // Respect the explicit override first (test-only hook).
212            if let Some(forced) = self.case_override {
213                return if forced {
214                    CaseState::Insensitive {
215                        seen: Mutex::new(HashMap::new()),
216                    }
217                } else {
218                    CaseState::Sensitive
219                };
220            }
221            match probe_case_sensitivity(&self.dir) {
222                Some(true) => CaseState::Insensitive {
223                    seen: Mutex::new(HashMap::new()),
224                },
225                Some(false) | None => CaseState::Sensitive,
226            }
227        })
228    }
229
230    /// On case-insensitive filesystems, ensure `key`'s encoded filename
231    /// doesn't collide with a previously-written key that differs only in
232    /// casing. Returns the encoded filename for atomic insertion by the
233    /// caller post-success.
234    ///
235    /// On case-sensitive filesystems, no-op.
236    fn check_case_collision(&self, key: &str) -> Result<(), StorageError> {
237        let CaseState::Insensitive { seen } = self.ensure_case_state() else {
238            return Ok(());
239        };
240        let filename = Self::filename_for(key);
241        let folded = filename.to_ascii_lowercase();
242        // Lock scope: short; the map is touched only on writes.
243        let mut guard = seen.lock().expect("case-collision tracker poisoned");
244        if let Some(existing) = guard.get(&folded) {
245            if existing != &filename {
246                return Err(StorageError::BackendError {
247                    message: format!(
248                        "case-insensitive filesystem collision: existing key \
249                         file {existing:?} and new key file {filename:?} \
250                         (encoded from {key:?}) map to the same on-disk path \
251                         when case-folded; FileBackend rejects to prevent \
252                         silent overwrite",
253                    ),
254                    source: None,
255                });
256            }
257        } else {
258            guard.insert(folded, filename);
259        }
260        Ok(())
261    }
262
263    /// Drop a key from the case-collision tracker (allows the casing to be
264    /// reused after `delete`). No-op on case-sensitive filesystems.
265    fn release_case_slot(&self, key: &str) {
266        // Read-only access to `case_state` — DO NOT trigger the probe here.
267        // `delete()` should not pay probe cost.
268        let Some(CaseState::Insensitive { seen }) = self.case_state.get() else {
269            return;
270        };
271        let filename = Self::filename_for(key);
272        let folded = filename.to_ascii_lowercase();
273        if let Ok(mut guard) = seen.lock() {
274            // Only release if the slot holds our exact casing — avoids
275            // accidentally clearing a slot held by another casing of the
276            // same key (which would itself have failed `check_case_collision`).
277            if guard.get(&folded) == Some(&filename) {
278                guard.remove(&folded);
279            }
280        }
281    }
282}
283
284/// Probe whether the directory's filesystem treats casing as significant.
285///
286/// Returns `Some(true)` for case-insensitive, `Some(false)` for case-sensitive.
287/// Returns `None` if the probe cannot complete (directory not creatable,
288/// permission errors, etc.) — caller defaults to case-sensitive (no
289/// enforcement) so the probe failure mode is "lose protection," never
290/// "spurious rejection."
291///
292/// Algorithm: write a uniquely-named probe file, attempt `fs::metadata` of
293/// the same path uppercased, delete the probe file. The same-length match
294/// indicates the upper-cased path resolved to the lower-cased probe file —
295/// case-insensitivity.
296/// /qa G2.2 (2026-05-22): process-wide monotonic nonce. Two
297/// `FileBackend`s probing the same directory in the same nanosecond on
298/// systems with a coarse `SystemTime` resolution would otherwise share
299/// a probe filename and race each other's results. The nonce
300/// guarantees a unique probe filename even on low-resolution clocks.
301static PROBE_NONCE: AtomicU64 = AtomicU64::new(0);
302
303/// /qa G2.2 (2026-05-22): sweep orphan probe files left behind by
304/// SIGKILL'd or panicked prior runs. Probe files use the
305/// `.gr-case-probe-*` pattern; the leading `.` keeps them invisible to
306/// `list()` (D161 hidden filter), but they accumulate across crashes.
307/// Sweep runs at most once per process via the [`SWEPT`] `OnceLock`;
308/// any `.gr-case-probe-*` file is removed regardless of age — they are
309/// always short-lived and any survivor is by definition orphan.
310fn sweep_orphan_probe_files(dir: &Path) {
311    use std::collections::HashSet;
312    static SWEPT: OnceLock<Mutex<HashSet<PathBuf>>> = OnceLock::new();
313    let swept = SWEPT.get_or_init(|| Mutex::new(HashSet::new()));
314    let Ok(mut guard) = swept.lock() else {
315        return; // poisoned — skip the sweep, not load-bearing
316    };
317    if guard.contains(dir) {
318        return;
319    }
320    if let Ok(entries) = fs::read_dir(dir) {
321        for entry in entries.flatten() {
322            let name = entry.file_name();
323            let Some(name_str) = name.to_str() else {
324                continue;
325            };
326            if name_str.starts_with(".gr-case-probe-") || name_str.starts_with(".GR-CASE-PROBE-") {
327                let _ = fs::remove_file(entry.path());
328            }
329        }
330    }
331    guard.insert(dir.to_path_buf());
332}
333
334fn probe_case_sensitivity(dir: &Path) -> Option<bool> {
335    fs::create_dir_all(dir).ok()?;
336    // /qa G2.2: sweep orphans first so a SIGKILL'd prior run can't leave
337    // residue that pollutes a future `list()` on this directory.
338    sweep_orphan_probe_files(dir);
339    let nanos = SystemTime::now()
340        .duration_since(UNIX_EPOCH)
341        .ok()?
342        .as_nanos();
343    let pid = std::process::id();
344    // /qa G2.2: process-wide monotonic nonce closes the
345    // two-backends-same-nanosecond race vector.
346    let nonce = PROBE_NONCE.fetch_add(1, Ordering::Relaxed);
347    // Single canonical filename: lower-case stem. Probe via upper-case lookup.
348    // Leading `.` keeps the probe file invisible to `list()` (D161 hidden filter).
349    let lower_name = format!(".gr-case-probe-{pid}-{nanos}-{nonce}-a.bin");
350    let upper_name = lower_name.to_ascii_uppercase();
351    let lower_path = dir.join(&lower_name);
352    let upper_path = dir.join(&upper_name);
353    let _ = fs::write(&lower_path, b"probe");
354    let result = fs::metadata(&upper_path).is_ok();
355    let _ = fs::remove_file(&lower_path);
356    // Best-effort: if the upper-case path was somehow created as a distinct
357    // file (theoretically impossible on a case-sensitive FS since we only
358    // wrote the lower-case path), clean it up too.
359    let _ = fs::remove_file(&upper_path);
360    Some(result)
361}
362
363/// Convenience constructor returning an `Arc<FileBackend>`. Use this when
364/// sharing a single backend across multiple tiers (the paired
365/// `{ snapshot, wal }` pattern from DS-14-storage §a). For non-default
366/// configuration use `Arc::new(FileBackend::new(dir).with_include_hidden(true))`.
367#[must_use]
368pub fn file_backend(dir: impl AsRef<Path>) -> Arc<FileBackend> {
369    Arc::new(FileBackend::new(dir))
370}
371
372impl StorageBackend for FileBackend {
373    fn name(&self) -> &str {
374        &self.name
375    }
376
377    fn read(&self, key: &str) -> Result<Option<Vec<u8>>, StorageError> {
378        match fs::read(self.path_for(key)) {
379            Ok(bytes) => Ok(Some(bytes)),
380            Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(None),
381            Err(e) => Err(io_error("read", &self.dir, e)),
382        }
383    }
384
385    fn write(&self, key: &str, bytes: &[u8]) -> Result<(), StorageError> {
386        fs::create_dir_all(&self.dir).map_err(|e| io_error("mkdir", &self.dir, e))?;
387        // B2 (2026-05-22): on case-insensitive filesystems, reject writes
388        // whose encoded filename differs from a previously-written key only
389        // in casing. Probe runs at most once per backend instance. Checked
390        // BEFORE the atomic-rename write so a rejected write leaves no
391        // tempfile residue.
392        self.check_case_collision(key)?;
393        let target = self.path_for(key);
394        let mut tmp =
395            NamedTempFile::new_in(&self.dir).map_err(|e| io_error("tempfile", &self.dir, e))?;
396        tmp.write_all(bytes)
397            .map_err(|e| io_error("write tmp", &self.dir, e))?;
398        tmp.persist(&target)
399            .map_err(|e| io_error("rename", &self.dir, e.error))?;
400        Ok(())
401    }
402
403    fn delete(&self, key: &str) -> Result<(), StorageError> {
404        // B2 + /qa G2.3 (2026-05-22): on a case-insensitive filesystem,
405        // `path_for("Foo")` and `path_for("foo")` resolve to the SAME
406        // on-disk file. Releasing the case-collision slot BEFORE
407        // `fs::remove_file` opens a clobber race: thread A releases
408        // "Foo", thread B writes "foo" (passes case-check, becomes the
409        // canonical casing), thread A's `fs::remove_file` then removes
410        // thread B's just-written data. Sequence the ops so the slot
411        // release happens AFTER the on-disk delete succeeds.
412        match fs::remove_file(self.path_for(key)) {
413            Ok(()) => {
414                self.release_case_slot(key);
415                Ok(())
416            }
417            Err(e) if e.kind() == io::ErrorKind::NotFound => {
418                // File never existed — still safe to drop the slot, but
419                // do it after the kind-check so a failing `remove_file`
420                // doesn't strand the tracker entry.
421                self.release_case_slot(key);
422                Ok(())
423            }
424            Err(e) => Err(io_error("delete", &self.dir, e)),
425        }
426    }
427
428    fn list(&self, prefix: &str) -> Result<Vec<String>, StorageError> {
429        let entries = match fs::read_dir(&self.dir) {
430            Ok(e) => e,
431            Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
432            Err(e) => return Err(io_error("list", &self.dir, e)),
433        };
434        let mut keys = Vec::new();
435        for entry in entries {
436            let entry = entry.map_err(|e| io_error("list-entry", &self.dir, e))?;
437            let raw = entry.file_name();
438            let Some(name) = raw.to_str() else { continue };
439            if !self.include_hidden && name.starts_with('.') {
440                continue;
441            }
442            let Some(key) = decode_filename_to_key(name) else {
443                continue;
444            };
445            if !prefix.is_empty() && !key.starts_with(prefix) {
446                continue;
447            }
448            keys.push(key);
449        }
450        keys.sort();
451        Ok(keys)
452    }
453}
454
455fn io_error(op: &str, dir: &Path, source: io::Error) -> StorageError {
456    StorageError::BackendError {
457        message: format!("file backend {op} failed at {}: {source}", dir.display()),
458        source: Some(Box::new(source)),
459    }
460}
461
462/// Encode an arbitrary key to a safe filename stem.
463///
464/// `[a-zA-Z0-9_-]` pass through unencoded; everything else is UTF-8 encoded
465/// and each byte is formatted as lowercase `%xx`. Cross-impl byte-identical
466/// with TS [`pathFor`](https://github.com/graphrefly/graphrefly-ts/blob/main/packages/pure-ts/src/extra/storage/tiers-node.ts).
467fn encode_key_to_filename(key: &str) -> String {
468    let mut out = String::with_capacity(key.len());
469    let mut buf = [0u8; 4];
470    for ch in key.chars() {
471        if ch.is_ascii_alphanumeric() || ch == '_' || ch == '-' {
472            out.push(ch);
473            continue;
474        }
475        for &byte in ch.encode_utf8(&mut buf).as_bytes() {
476            out.push('%');
477            out.push(HEX_LOWER[(byte >> 4) as usize] as char);
478            out.push(HEX_LOWER[(byte & 0x0F) as usize] as char);
479        }
480    }
481    out
482}
483
484/// Inverse of [`encode_key_to_filename`].
485///
486/// Returns `None` when:
487/// - the filename does not end in `.bin`
488/// - the decoded byte sequence is not valid UTF-8
489/// - the filename contains non-ASCII characters outside `%xx` escapes
490///   (those can't have come from our encoder; matches TS behavior of treating
491///   such filenames as un-decodable)
492///
493/// Truncated (`abc%5`) or invalid-hex (`abc%5z`) escapes fall through to
494/// literal-byte semantics — matches the TS `keyFromFilename` regex-fallthrough
495/// branch.
496fn decode_filename_to_key(filename: &str) -> Option<String> {
497    let stem = filename.strip_suffix(FILE_SUFFIX)?;
498    let chars: Vec<char> = stem.chars().collect();
499    let mut bytes: Vec<u8> = Vec::with_capacity(chars.len());
500    let mut i = 0;
501    while i < chars.len() {
502        let ch = chars[i];
503        if ch == '%' && i + 2 < chars.len() {
504            if let (Some(hi), Some(lo)) = (nibble(chars[i + 1]), nibble(chars[i + 2])) {
505                bytes.push((hi << 4) | lo);
506                i += 3;
507                continue;
508            }
509        }
510        if !ch.is_ascii() {
511            return None;
512        }
513        bytes.push(ch as u8);
514        i += 1;
515    }
516    String::from_utf8(bytes).ok()
517}
518
519fn nibble(c: char) -> Option<u8> {
520    c.to_digit(16).and_then(|d| u8::try_from(d).ok())
521}
522
523// ── Convenience tier wrappers ───────────────────────────────────────────────
524
525/// Convenience: snapshot tier over a fresh file backend rooted at `dir`.
526/// Mirror of [`crate::memory_snapshot`] for filesystem persistence.
527#[must_use]
528pub fn file_snapshot<T, C>(
529    dir: impl AsRef<Path>,
530    opts: SnapshotStorageOptions<T, C>,
531) -> SnapshotStorage<FileBackend, T, C>
532where
533    T: Send + Sync + 'static,
534    C: Codec<T>,
535{
536    snapshot_storage(Arc::new(FileBackend::new(dir)), opts)
537}
538
539/// Convenience: snapshot tier over a fresh file backend with
540/// [`SnapshotStorageOptions::default`] + a `JsonCodec`.
541#[must_use]
542pub fn file_snapshot_default<T>(dir: impl AsRef<Path>) -> SnapshotStorage<FileBackend, T, JsonCodec>
543where
544    T: Serialize + DeserializeOwned + Send + Sync + 'static,
545{
546    file_snapshot(dir, SnapshotStorageOptions::default())
547}
548
549/// Convenience: append-log tier over a fresh file backend rooted at `dir`.
550#[must_use]
551pub fn file_append_log<T, C>(
552    dir: impl AsRef<Path>,
553    opts: AppendLogStorageOptions<T, C>,
554) -> AppendLogStorage<FileBackend, T, C>
555where
556    T: Serialize + DeserializeOwned + Clone + Send + Sync + 'static,
557    C: Codec<Vec<T>>,
558{
559    append_log_storage(Arc::new(FileBackend::new(dir)), opts)
560}
561
562/// Convenience: append-log tier over a fresh file backend with
563/// [`AppendLogStorageOptions::default`] + a `JsonCodec`.
564#[must_use]
565pub fn file_append_log_default<T>(
566    dir: impl AsRef<Path>,
567) -> AppendLogStorage<FileBackend, T, JsonCodec>
568where
569    T: Serialize + DeserializeOwned + Clone + Send + Sync + 'static,
570{
571    file_append_log(dir, AppendLogStorageOptions::default())
572}
573
574/// Convenience: kv tier over a fresh file backend rooted at `dir`.
575#[must_use]
576pub fn file_kv<T, C>(
577    dir: impl AsRef<Path>,
578    opts: KvStorageOptions<T, C>,
579) -> KvStorage<FileBackend, T, C>
580where
581    T: Send + Sync + 'static,
582    C: Codec<T>,
583{
584    kv_storage(Arc::new(FileBackend::new(dir)), opts)
585}
586
587/// Convenience: kv tier over a fresh file backend with
588/// [`KvStorageOptions::default`] + a `JsonCodec`.
589#[must_use]
590pub fn file_kv_default<T>(dir: impl AsRef<Path>) -> KvStorage<FileBackend, T, JsonCodec>
591where
592    T: Serialize + DeserializeOwned + Send + Sync + 'static,
593{
594    file_kv(dir, KvStorageOptions::default())
595}
596
597#[cfg(test)]
598mod tests {
599    use super::*;
600
601    #[test]
602    fn encode_alphanumeric_passthrough() {
603        assert_eq!(encode_key_to_filename("abcXYZ-_09"), "abcXYZ-_09");
604    }
605
606    #[test]
607    fn encode_special_chars_percent_escape() {
608        assert_eq!(
609            encode_key_to_filename("app/with:slashes"),
610            "app%2fwith%3aslashes"
611        );
612    }
613
614    #[test]
615    fn encode_non_ascii_two_byte_utf8() {
616        // U+00E9 'é' = 0xC3 0xA9
617        assert_eq!(encode_key_to_filename("café"), "caf%c3%a9");
618    }
619
620    #[test]
621    fn encode_non_ascii_three_byte_utf8() {
622        // U+20AC '€' = 0xE2 0x82 0xAC
623        assert_eq!(encode_key_to_filename("€100"), "%e2%82%ac100");
624    }
625
626    #[test]
627    fn encode_emoji_four_byte_utf8() {
628        // U+1F44B 👋 = 0xF0 0x9F 0x91 0x8B
629        assert_eq!(encode_key_to_filename("👋"), "%f0%9f%91%8b");
630    }
631
632    #[test]
633    fn encode_empty_key() {
634        assert_eq!(encode_key_to_filename(""), "");
635    }
636
637    #[test]
638    fn decode_round_trip_covers_canonical_set() {
639        for key in [
640            "simple",
641            "app/with:slashes",
642            "café",
643            "€100",
644            "👋 hello",
645            "a-b_c",
646            "",
647        ] {
648            let filename = format!("{}.bin", encode_key_to_filename(key));
649            assert_eq!(
650                decode_filename_to_key(&filename).as_deref(),
651                Some(key),
652                "round-trip failed for {key:?}",
653            );
654        }
655    }
656
657    #[test]
658    fn decode_rejects_non_bin_suffix() {
659        assert!(decode_filename_to_key("foo.txt").is_none());
660        assert!(decode_filename_to_key("foo").is_none());
661        assert!(decode_filename_to_key(".bin").is_some()); // empty stem decodes to ""
662    }
663
664    #[test]
665    fn decode_truncated_percent_escape_treated_literally() {
666        // Matches TS keyFromFilename: incomplete `%x` at end falls through to
667        // ASCII branch — `abc%5` decodes to `abc%5`.
668        assert_eq!(
669            decode_filename_to_key("abc%5.bin").as_deref(),
670            Some("abc%5")
671        );
672    }
673
674    #[test]
675    fn decode_invalid_hex_treated_literally() {
676        // `%5z` fails the hex check, falls through to per-char ASCII bytes.
677        assert_eq!(
678            decode_filename_to_key("abc%5z.bin").as_deref(),
679            Some("abc%5z")
680        );
681    }
682
683    #[test]
684    fn decode_uppercase_hex_accepted() {
685        // TS regex is /[0-9a-f]{2}$/i (case-insensitive); Rust mirrors via
686        // char::to_digit which accepts both cases.
687        assert_eq!(
688            decode_filename_to_key("caf%C3%A9.bin").as_deref(),
689            Some("café")
690        );
691    }
692
693    // ── B2 (2026-05-22, /porting-to-rs storage-honest-error batch) ─────────
694    //
695    // Case-collision detection on case-insensitive filesystems.
696    //
697    // The tests use `FileBackend::with_case_insensitive(forced)` to bypass
698    // the natural filesystem probe — keeps outcomes deterministic across CI
699    // hosts (macOS APFS default = case-insensitive; Linux ext4 default =
700    // case-sensitive).
701
702    #[test]
703    fn case_insensitive_rejects_case_divergent_second_write() {
704        // Force case-insensitive enforcement regardless of the underlying
705        // filesystem. Then write `Foo` followed by `foo` and expect the
706        // second to fail with a clear diagnostic.
707        let dir = tempfile::tempdir().expect("tempdir");
708        let backend = FileBackend::new(dir.path()).with_case_insensitive(true);
709        backend
710            .write("Foo", b"first")
711            .expect("first write must succeed");
712        let err = backend
713            .write("foo", b"second")
714            .expect_err("case-divergent second write must reject");
715        let StorageError::BackendError { message, .. } = err else {
716            panic!("expected StorageError::BackendError, got: {err:?}");
717        };
718        assert!(
719            message.contains("case-insensitive filesystem collision"),
720            "diagnostic must label the failure class, got: {message}"
721        );
722        assert!(
723            message.contains("Foo.bin") && message.contains("foo.bin"),
724            "diagnostic must name both colliding encoded filenames, got: {message}"
725        );
726    }
727
728    #[test]
729    fn case_insensitive_same_casing_overwrites() {
730        // Writing the same key twice (same casing) is the normal overwrite
731        // case — must not be flagged as a collision.
732        let dir = tempfile::tempdir().expect("tempdir");
733        let backend = FileBackend::new(dir.path()).with_case_insensitive(true);
734        backend.write("Foo", b"first").expect("first write");
735        backend
736            .write("Foo", b"second")
737            .expect("same-casing overwrite must succeed");
738        let read = backend.read("Foo").expect("read").expect("present");
739        assert_eq!(read, b"second");
740    }
741
742    #[test]
743    fn case_insensitive_delete_releases_slot() {
744        // After deleting `Foo`, writing `foo` must succeed — the casing slot
745        // was released by the delete.
746        let dir = tempfile::tempdir().expect("tempdir");
747        let backend = FileBackend::new(dir.path()).with_case_insensitive(true);
748        backend.write("Foo", b"first").expect("write Foo");
749        backend.delete("Foo").expect("delete Foo");
750        backend.write("foo", b"new").expect("post-delete write foo");
751        let read = backend.read("foo").expect("read foo").expect("present");
752        assert_eq!(read, b"new");
753    }
754
755    #[test]
756    fn case_sensitive_allows_case_divergent_writes() {
757        // On a forced-sensitive backend, `Foo` and `foo` must both succeed
758        // and resolve to distinct files. We can't verify distinct on-disk
759        // files on a case-insensitive host (the second write would clobber
760        // the first), so we only assert the calls succeed and the
761        // collision tracker doesn't fire.
762        let dir = tempfile::tempdir().expect("tempdir");
763        let backend = FileBackend::new(dir.path()).with_case_insensitive(false);
764        backend.write("Foo", b"first").expect("write Foo");
765        backend
766            .write("foo", b"second")
767            .expect("forced-sensitive backend must not reject case-divergent keys");
768    }
769
770    #[test]
771    fn decode_rejects_non_ascii_outside_escapes() {
772        // A filename containing a literal non-ASCII char (not `%xx`) cannot
773        // have come from our encoder; treat as un-decodable.
774        assert!(decode_filename_to_key("café.bin").is_none());
775    }
776
777    #[test]
778    fn nibble_validates_hex_set() {
779        for c in ['0', '5', '9', 'a', 'f', 'A', 'F'] {
780            assert!(nibble(c).is_some(), "{c} should be a hex digit");
781        }
782        for c in ['g', 'G', '/', '@', '\u{00e9}'] {
783            assert!(nibble(c).is_none(), "{c} should not be a hex digit");
784        }
785    }
786}