Skip to main content

anomstream_core/
persistence.rs

1//! Optional persistence helpers for [`crate::RandomCutForest`] and
2//! [`crate::ThresholdedForest`].
3//!
4//! Gated behind the `serde` cargo feature. Four flavours are
5//! exposed:
6//!
7//! - **Binary bytes** (`to_bytes` / `from_bytes`, gated on `postcard`):
8//!   a compact `postcard` payload prefixed with a 4-byte
9//!   little-endian version field. Use this for on-disk snapshots or
10//!   to ship forests over a network socket. (`postcard` replaced
11//!   `bincode` in persistence format v2 after the `bincode` crate
12//!   was marked unmaintained by `RustSec` in 2025.)
13//! - **JSON text** (`to_json` / `from_json`, gated on `serde_json`):
14//!   a human-readable text encoding wrapping the same versioned
15//!   envelope. Useful for debugging or for callers who already pipe
16//!   JSON elsewhere.
17//! - **Atomic file path** (`to_path` / `from_path`, gated on
18//!   `postcard + std`): write-tmp-then-rename + `fsync` so a crash or
19//!   power-loss mid-save cannot corrupt the snapshot on disk. Pair
20//!   with periodic checkpointing for **warm reload** — the detector
21//!   resumes exactly where it left off across restarts.
22//! - **JSON file path** (`to_json_path` / `from_json_path`, gated on
23//!   `serde_json + std`): same atomic write discipline, human-readable
24//!   payload.
25//!
26//! The version prefix lives **outside** the serialised payload so a
27//! version skew is detected before any third-party deserialiser runs
28//! against arbitrary bytes — a defence against malformed
29//! payload-driven panics.
30//!
31//! Both encodings preserve the per-point dimensionality `D` at the
32//! type level — callers must deserialise into a type with the same
33//! compile-time `D` that produced the payload.
34//!
35//! # Security
36//!
37//! These deserialisers are designed for **trusted checkpoints**:
38//! payloads produced by an earlier process you control, stored on
39//! a filesystem you control, and reloaded at warm-restart time. The
40//! `postcard` and `serde_json` decoders accept any well-formed
41//! payload that matches the schema — they perform no integrity check
42//! beyond the 4-byte version prefix and have no built-in cap on
43//! recursion depth, so a deliberately malformed payload could in
44//! principle drive an out-of-memory or stack-overflow condition.
45//!
46//! The current [`RandomCutForest`] / [`ThresholdedForest`] schema is
47//! arena-backed (flat `Vec<InternalData>` / `Vec<LeafData>`, no
48//! recursive type nesting) so the recursion-depth attack surface is
49//! limited in practice — but pretending the format is hostile-input-
50//! safe would be wrong. Defence-in-depth measures shipped here:
51//!
52//! - [`MAX_DESERIALIZE_BYTES`] / [`MAX_JSON_BYTES`] caps reject
53//!   absurdly large payloads up front (configurable per call via the
54//!   `*_with_max_size` variants).
55//! - The 4-byte version prefix is checked **before** any third-party
56//!   decoder runs against the bytes.
57//! - All deserialisers return a typed error rather than panicking
58//!   on truncated, mismatched, or malformed input.
59//!
60//! For checkpoints sourced from outside the process boundary
61//! (network sync, multi-tenant restore endpoints, partner-supplied
62//! state), pair these helpers with an out-of-band integrity check
63//! (HMAC, signature, or transport-level TLS+auth) before calling
64//! `from_bytes` / `from_path` / `from_json` / `from_json_path`. Do
65//! **not** feed unauthenticated bytes from a hostile source.
66
67#[cfg(any(feature = "postcard", feature = "serde_json"))]
68use crate::error::{RcfError, RcfResult};
69use crate::forest::RandomCutForest;
70#[cfg(feature = "serde")]
71use crate::thresholded::ThresholdedForest;
72
73/// Persistence format version for [`RandomCutForest`]. Bump on any
74/// breaking layout change. Version `4` splits the `NodeStore` arenas
75/// into typed `InternalData` / `LeafData` records (saves ~90 % on
76/// leaf-arena memory at `D = 16`); version `3` added the per-point
77/// timestamp side-map used by [`RandomCutForest::update_at`] /
78/// [`RandomCutForest::delete_before`]; version `2` was the first
79/// `postcard` payload after `RustSec` flagged `bincode` as
80/// unmaintained; version `1` was the original `bincode 2` payload.
81pub const PERSISTENCE_VERSION: u32 = 4;
82
83/// Persistence format version for [`ThresholdedForest`]. Distinct
84/// from [`PERSISTENCE_VERSION`] because the threshold envelope carries
85/// additional state (EMA stats, threshold config) that evolves on its
86/// own cadence. Version `4` inherits the forest's typed-arena bump.
87pub const THRESHOLDED_PERSISTENCE_VERSION: u32 = 4;
88
89/// Number of bytes reserved for the version prefix.
90pub const VERSION_PREFIX_BYTES: usize = 4;
91
92/// Default upper bound on `postcard` payload size accepted by
93/// [`RandomCutForest::from_bytes`] / [`ThresholdedForest::from_bytes`].
94/// Sized for a typical `D` ≤ 64, `num_trees` ≤ 1000, `sample_size` ≤ 2048
95/// deployment with comfortable headroom; larger workloads (high-`D`
96/// detectors with extensive arenas) call the
97/// [`RandomCutForest::from_bytes_with_max_size`] /
98/// [`ThresholdedForest::from_bytes_with_max_size`] variants and pass
99/// an explicit cap.
100pub const MAX_DESERIALIZE_BYTES: usize = 256 * 1024 * 1024;
101
102/// Default upper bound on `serde_json` payload size accepted by
103/// [`RandomCutForest::from_json`] / [`ThresholdedForest::from_json`].
104/// JSON encodings are roughly 4× the binary equivalent (utf-8 floats,
105/// field-name overhead) so the cap is correspondingly larger.
106pub const MAX_JSON_BYTES: usize = 1024 * 1024 * 1024;
107
108/// Reject payloads above the supplied byte cap before handing the
109/// bytes to a third-party decoder.
110#[cfg(any(feature = "postcard", feature = "serde_json"))]
111fn enforce_size_cap(len: usize, max: usize, kind: &'static str) -> RcfResult<()> {
112    if len > max {
113        return Err(RcfError::DeserializationFailed(format!(
114            "{kind} payload {len} byte(s) exceeds cap {max} (caller-controlled OOM guard) — \
115             use the `*_with_max_size` variant to opt into a larger bound"
116        )));
117    }
118    Ok(())
119}
120
121/// Decode the first four bytes of `bytes` as the persistence version.
122///
123/// # Errors
124///
125/// Returns [`RcfError::DeserializationFailed`] when `bytes` is shorter
126/// than [`VERSION_PREFIX_BYTES`].
127#[cfg(feature = "postcard")]
128fn read_version_prefix(bytes: &[u8]) -> RcfResult<u32> {
129    if bytes.len() < VERSION_PREFIX_BYTES {
130        return Err(RcfError::DeserializationFailed(format!(
131            "payload too short: {} byte(s), need at least {VERSION_PREFIX_BYTES}",
132            bytes.len()
133        )));
134    }
135    let mut v = [0_u8; VERSION_PREFIX_BYTES];
136    v.copy_from_slice(&bytes[..VERSION_PREFIX_BYTES]);
137    Ok(u32::from_le_bytes(v))
138}
139
140/// Path helpers for atomic write-tmp-rename persistence.
141///
142/// The tmp suffix is appended to the caller-supplied path so the temp
143/// file lives in the same filesystem — rename is only atomic within a
144/// single filesystem. The file is `fsync`'d before the rename so a
145/// power-loss between `write` and `rename` cannot leave a partially
146/// written snapshot on disk.
147#[cfg(all(feature = "std", any(feature = "postcard", feature = "serde_json")))]
148mod atomic {
149    use std::ffi::OsString;
150    use std::fs::{File, rename};
151    use std::io::Write;
152    use std::path::{Path, PathBuf};
153
154    use crate::error::{RcfError, RcfResult};
155
156    /// Compute the temporary path used for the atomic write.
157    pub(super) fn tmp_path(path: &Path) -> PathBuf {
158        let mut s: OsString = path.as_os_str().to_owned();
159        s.push(".tmp");
160        PathBuf::from(s)
161    }
162
163    /// Write `bytes` to `path` atomically: tmp file first, fsync,
164    /// then rename onto the target.
165    pub(super) fn write_atomic(path: &Path, bytes: &[u8]) -> RcfResult<()> {
166        let tmp = tmp_path(path);
167        let mut f = File::create(&tmp)
168            .map_err(|e| RcfError::SerializationFailed(format!("create {}: {e}", tmp.display())))?;
169        f.write_all(bytes)
170            .map_err(|e| RcfError::SerializationFailed(format!("write {}: {e}", tmp.display())))?;
171        f.sync_all()
172            .map_err(|e| RcfError::SerializationFailed(format!("fsync {}: {e}", tmp.display())))?;
173        drop(f);
174        rename(&tmp, path).map_err(|e| {
175            RcfError::SerializationFailed(format!(
176                "rename {} -> {}: {e}",
177                tmp.display(),
178                path.display()
179            ))
180        })?;
181        Ok(())
182    }
183
184    /// Read the full byte content of `path`.
185    #[cfg(feature = "postcard")]
186    pub(super) fn read_all(path: &Path) -> RcfResult<Vec<u8>> {
187        std::fs::read(path)
188            .map_err(|e| RcfError::DeserializationFailed(format!("read {}: {e}", path.display())))
189    }
190
191    /// Read the full text content of `path`.
192    #[cfg(feature = "serde_json")]
193    pub(super) fn read_all_string(path: &Path) -> RcfResult<String> {
194        std::fs::read_to_string(path)
195            .map_err(|e| RcfError::DeserializationFailed(format!("read {}: {e}", path.display())))
196    }
197}
198
199impl<const D: usize> RandomCutForest<D> {
200    /// Serialise the forest into a versioned binary blob.
201    ///
202    /// # Errors
203    ///
204    /// Returns [`RcfError::SerializationFailed`] when the underlying
205    /// `postcard` encoder rejects the payload.
206    #[cfg(feature = "postcard")]
207    pub fn to_bytes(&self) -> RcfResult<Vec<u8>> {
208        let mut out = Vec::with_capacity(VERSION_PREFIX_BYTES + 4096);
209        out.extend_from_slice(&PERSISTENCE_VERSION.to_le_bytes());
210        let payload = postcard::to_allocvec(self)
211            .map_err(|e| RcfError::SerializationFailed(e.to_string()))?;
212        out.extend_from_slice(&payload);
213        Ok(out)
214    }
215
216    /// Reload a forest previously produced by [`to_bytes`](Self::to_bytes).
217    ///
218    /// # Errors
219    ///
220    /// - [`RcfError::DeserializationFailed`] when the byte slice is
221    ///   too short to hold the version prefix, longer than
222    ///   [`MAX_DESERIALIZE_BYTES`], or the `postcard` payload is
223    ///   malformed.
224    /// - [`RcfError::IncompatibleVersion`] when the embedded version
225    ///   does not match [`PERSISTENCE_VERSION`].
226    ///
227    /// # Security
228    ///
229    /// Designed for trusted checkpoints — see the module-level
230    /// `# Security` section. The size cap defends against a
231    /// caller-controlled OOM at decode time; the version prefix
232    /// rejects schema drift before the third-party decoder runs.
233    /// Pair with an out-of-band integrity check (HMAC / signature /
234    /// authenticated transport) when bytes originate outside the
235    /// process trust boundary. Use
236    /// [`Self::from_bytes_with_max_size`] when the deployment's
237    /// expected payload exceeds [`MAX_DESERIALIZE_BYTES`].
238    #[cfg(feature = "postcard")]
239    pub fn from_bytes(bytes: &[u8]) -> RcfResult<Self> {
240        Self::from_bytes_with_max_size(bytes, MAX_DESERIALIZE_BYTES)
241    }
242
243    /// Variant of [`Self::from_bytes`] that accepts a caller-supplied
244    /// byte-length cap. Use when a high-D / large-arena deployment's
245    /// snapshot legitimately exceeds [`MAX_DESERIALIZE_BYTES`].
246    ///
247    /// # Errors
248    ///
249    /// Same as [`Self::from_bytes`] but the size check uses `max`
250    /// instead of [`MAX_DESERIALIZE_BYTES`].
251    ///
252    /// # Security
253    ///
254    /// Same trust model as [`Self::from_bytes`]. Setting `max` very
255    /// large (close to `usize::MAX`) effectively disables the OOM
256    /// guard — only do this on payloads that have already passed an
257    /// out-of-band integrity check.
258    #[cfg(feature = "postcard")]
259    pub fn from_bytes_with_max_size(bytes: &[u8], max: usize) -> RcfResult<Self> {
260        enforce_size_cap(bytes.len(), max, "RandomCutForest postcard")?;
261        let version = read_version_prefix(bytes)?;
262        if version != PERSISTENCE_VERSION {
263            return Err(RcfError::IncompatibleVersion {
264                found: version,
265                expected: PERSISTENCE_VERSION,
266            });
267        }
268        let forest: Self = postcard::from_bytes(&bytes[VERSION_PREFIX_BYTES..])
269            .map_err(|e| RcfError::DeserializationFailed(e.to_string()))?;
270        Ok(forest)
271    }
272
273    /// Atomically serialise the forest to `path` using the binary
274    /// encoding. Writes `<path>.tmp`, `fsync`s it, then renames onto
275    /// `path` — a mid-write crash leaves the previous snapshot
276    /// intact.
277    ///
278    /// # Errors
279    ///
280    /// - [`RcfError::SerializationFailed`] for any filesystem or
281    ///   encoder failure.
282    #[cfg(all(feature = "postcard", feature = "std"))]
283    pub fn to_path(&self, path: impl AsRef<std::path::Path>) -> RcfResult<()> {
284        let bytes = self.to_bytes()?;
285        atomic::write_atomic(path.as_ref(), &bytes)
286    }
287
288    /// Reload a forest from `path` using the binary encoding.
289    ///
290    /// # Errors
291    ///
292    /// - [`RcfError::DeserializationFailed`] when the file cannot be
293    ///   read, exceeds [`MAX_DESERIALIZE_BYTES`], or the payload is
294    ///   malformed.
295    /// - [`RcfError::IncompatibleVersion`] when the embedded version
296    ///   does not match [`PERSISTENCE_VERSION`].
297    ///
298    /// # Security
299    ///
300    /// Inherits the trust model of [`Self::from_bytes`] — designed
301    /// for filesystem checkpoints written by a process the caller
302    /// controls. Hostile bytes on the path require an out-of-band
303    /// integrity check (HMAC / signature) before this call.
304    #[cfg(all(feature = "postcard", feature = "std"))]
305    pub fn from_path(path: impl AsRef<std::path::Path>) -> RcfResult<Self> {
306        let bytes = atomic::read_all(path.as_ref())?;
307        Self::from_bytes(&bytes)
308    }
309
310    /// Serialise the forest as JSON. The version field lives at
311    /// `"version"` alongside the payload at `"forest"`.
312    ///
313    /// # Errors
314    ///
315    /// Returns [`RcfError::SerializationFailed`] when `serde_json`
316    /// rejects the payload.
317    #[cfg(feature = "serde_json")]
318    pub fn to_json(&self) -> RcfResult<String> {
319        let envelope = JsonEnvelope {
320            version: PERSISTENCE_VERSION,
321            forest: self,
322        };
323        serde_json::to_string(&envelope).map_err(|e| RcfError::SerializationFailed(e.to_string()))
324    }
325
326    /// Reload a forest from JSON produced by [`to_json`](Self::to_json).
327    ///
328    /// # Errors
329    ///
330    /// - [`RcfError::DeserializationFailed`] when the JSON is
331    ///   malformed or longer than [`MAX_JSON_BYTES`].
332    /// - [`RcfError::IncompatibleVersion`] when the embedded version
333    ///   does not match [`PERSISTENCE_VERSION`].
334    ///
335    /// # Security
336    ///
337    /// See module-level `# Security` notes. Use
338    /// [`Self::from_json_with_max_size`] for legitimate payloads
339    /// above [`MAX_JSON_BYTES`].
340    #[cfg(feature = "serde_json")]
341    pub fn from_json(json: &str) -> RcfResult<Self> {
342        Self::from_json_with_max_size(json, MAX_JSON_BYTES)
343    }
344
345    /// Variant of [`Self::from_json`] with a caller-supplied
346    /// byte-length cap.
347    ///
348    /// # Errors
349    ///
350    /// Same as [`Self::from_json`] with `max` replacing
351    /// [`MAX_JSON_BYTES`].
352    ///
353    /// # Security
354    ///
355    /// See module-level `# Security` notes.
356    #[cfg(feature = "serde_json")]
357    pub fn from_json_with_max_size(json: &str, max: usize) -> RcfResult<Self> {
358        enforce_size_cap(json.len(), max, "RandomCutForest JSON")?;
359        let envelope: JsonEnvelopeOwned<D> = serde_json::from_str(json)
360            .map_err(|e| RcfError::DeserializationFailed(e.to_string()))?;
361        if envelope.version != PERSISTENCE_VERSION {
362            return Err(RcfError::IncompatibleVersion {
363                found: envelope.version,
364                expected: PERSISTENCE_VERSION,
365            });
366        }
367        Ok(envelope.forest)
368    }
369
370    /// Atomically write the forest as JSON to `path`. Same atomic
371    /// write discipline as [`to_path`](Self::to_path).
372    ///
373    /// # Errors
374    ///
375    /// - [`RcfError::SerializationFailed`] for any filesystem or
376    ///   encoder failure.
377    #[cfg(all(feature = "serde_json", feature = "std"))]
378    pub fn to_json_path(&self, path: impl AsRef<std::path::Path>) -> RcfResult<()> {
379        let json = self.to_json()?;
380        atomic::write_atomic(path.as_ref(), json.as_bytes())
381    }
382
383    /// Reload a forest from a JSON file at `path`.
384    ///
385    /// # Errors
386    ///
387    /// - [`RcfError::DeserializationFailed`] when the file cannot be
388    ///   read, exceeds [`MAX_JSON_BYTES`], or the JSON is malformed.
389    /// - [`RcfError::IncompatibleVersion`] when the embedded version
390    ///   does not match [`PERSISTENCE_VERSION`].
391    ///
392    /// # Security
393    ///
394    /// Inherits the trust model of [`Self::from_json`].
395    #[cfg(all(feature = "serde_json", feature = "std"))]
396    pub fn from_json_path(path: impl AsRef<std::path::Path>) -> RcfResult<Self> {
397        let json = atomic::read_all_string(path.as_ref())?;
398        Self::from_json(&json)
399    }
400}
401
402impl<const D: usize> ThresholdedForest<D> {
403    /// Serialise the thresholded detector into a versioned binary blob.
404    ///
405    /// The payload carries the underlying forest, the threshold
406    /// configuration, and the EMA statistics — enough for a receiver
407    /// to resume scoring and emitting graded verdicts without a
408    /// warmup gap.
409    ///
410    /// # Errors
411    ///
412    /// Returns [`RcfError::SerializationFailed`] when the underlying
413    /// `postcard` encoder rejects the payload.
414    #[cfg(feature = "postcard")]
415    pub fn to_bytes(&self) -> RcfResult<Vec<u8>> {
416        let mut out = Vec::with_capacity(VERSION_PREFIX_BYTES + 4096);
417        out.extend_from_slice(&THRESHOLDED_PERSISTENCE_VERSION.to_le_bytes());
418        let payload = postcard::to_allocvec(self)
419            .map_err(|e| RcfError::SerializationFailed(e.to_string()))?;
420        out.extend_from_slice(&payload);
421        Ok(out)
422    }
423
424    /// Reload a thresholded detector previously produced by
425    /// [`to_bytes`](Self::to_bytes).
426    ///
427    /// # Errors
428    ///
429    /// - [`RcfError::DeserializationFailed`] when the byte slice is
430    ///   too short to hold the version prefix, longer than
431    ///   [`MAX_DESERIALIZE_BYTES`], or the `postcard` payload is
432    ///   malformed.
433    /// - [`RcfError::IncompatibleVersion`] when the embedded version
434    ///   does not match [`THRESHOLDED_PERSISTENCE_VERSION`].
435    ///
436    /// # Security
437    ///
438    /// Designed for trusted checkpoints — see the module-level
439    /// `# Security` section. Use [`Self::from_bytes_with_max_size`]
440    /// when the deployment's expected payload exceeds
441    /// [`MAX_DESERIALIZE_BYTES`].
442    #[cfg(feature = "postcard")]
443    pub fn from_bytes(bytes: &[u8]) -> RcfResult<Self> {
444        Self::from_bytes_with_max_size(bytes, MAX_DESERIALIZE_BYTES)
445    }
446
447    /// Variant of [`Self::from_bytes`] with a caller-supplied
448    /// byte-length cap.
449    ///
450    /// # Errors
451    ///
452    /// Same as [`Self::from_bytes`] with `max` replacing
453    /// [`MAX_DESERIALIZE_BYTES`].
454    ///
455    /// # Security
456    ///
457    /// See module-level `# Security` notes.
458    #[cfg(feature = "postcard")]
459    pub fn from_bytes_with_max_size(bytes: &[u8], max: usize) -> RcfResult<Self> {
460        enforce_size_cap(bytes.len(), max, "ThresholdedForest postcard")?;
461        let version = read_version_prefix(bytes)?;
462        if version != THRESHOLDED_PERSISTENCE_VERSION {
463            return Err(RcfError::IncompatibleVersion {
464                found: version,
465                expected: THRESHOLDED_PERSISTENCE_VERSION,
466            });
467        }
468        let detector: Self = postcard::from_bytes(&bytes[VERSION_PREFIX_BYTES..])
469            .map_err(|e| RcfError::DeserializationFailed(e.to_string()))?;
470        Ok(detector)
471    }
472
473    /// Atomically serialise the thresholded detector to `path`. Same
474    /// atomic write discipline as [`RandomCutForest::to_path`].
475    ///
476    /// # Errors
477    ///
478    /// - [`RcfError::SerializationFailed`] for any filesystem or
479    ///   encoder failure.
480    #[cfg(all(feature = "postcard", feature = "std"))]
481    pub fn to_path(&self, path: impl AsRef<std::path::Path>) -> RcfResult<()> {
482        let bytes = self.to_bytes()?;
483        atomic::write_atomic(path.as_ref(), &bytes)
484    }
485
486    /// Reload a thresholded detector from `path`.
487    ///
488    /// # Errors
489    ///
490    /// - [`RcfError::DeserializationFailed`] when the file cannot be
491    ///   read, exceeds [`MAX_DESERIALIZE_BYTES`], or the payload is
492    ///   malformed.
493    /// - [`RcfError::IncompatibleVersion`] when the embedded version
494    ///   does not match [`THRESHOLDED_PERSISTENCE_VERSION`].
495    ///
496    /// # Security
497    ///
498    /// Inherits the trust model of [`Self::from_bytes`].
499    #[cfg(all(feature = "postcard", feature = "std"))]
500    pub fn from_path(path: impl AsRef<std::path::Path>) -> RcfResult<Self> {
501        let bytes = atomic::read_all(path.as_ref())?;
502        Self::from_bytes(&bytes)
503    }
504
505    /// Serialise the thresholded detector as JSON.
506    ///
507    /// # Errors
508    ///
509    /// Returns [`RcfError::SerializationFailed`] when `serde_json`
510    /// rejects the payload.
511    #[cfg(feature = "serde_json")]
512    pub fn to_json(&self) -> RcfResult<String> {
513        let envelope = ThresholdedJsonEnvelope {
514            version: THRESHOLDED_PERSISTENCE_VERSION,
515            detector: self,
516        };
517        serde_json::to_string(&envelope).map_err(|e| RcfError::SerializationFailed(e.to_string()))
518    }
519
520    /// Reload a thresholded detector from JSON.
521    ///
522    /// # Errors
523    ///
524    /// - [`RcfError::DeserializationFailed`] when the JSON is
525    ///   malformed or longer than [`MAX_JSON_BYTES`].
526    /// - [`RcfError::IncompatibleVersion`] when the embedded version
527    ///   does not match [`THRESHOLDED_PERSISTENCE_VERSION`].
528    ///
529    /// # Security
530    ///
531    /// See module-level `# Security` notes.
532    #[cfg(feature = "serde_json")]
533    pub fn from_json(json: &str) -> RcfResult<Self> {
534        Self::from_json_with_max_size(json, MAX_JSON_BYTES)
535    }
536
537    /// Variant of [`Self::from_json`] with a caller-supplied
538    /// byte-length cap.
539    ///
540    /// # Errors
541    ///
542    /// Same as [`Self::from_json`] with `max` replacing
543    /// [`MAX_JSON_BYTES`].
544    ///
545    /// # Security
546    ///
547    /// See module-level `# Security` notes.
548    #[cfg(feature = "serde_json")]
549    pub fn from_json_with_max_size(json: &str, max: usize) -> RcfResult<Self> {
550        enforce_size_cap(json.len(), max, "ThresholdedForest JSON")?;
551        let envelope: ThresholdedJsonEnvelopeOwned<D> = serde_json::from_str(json)
552            .map_err(|e| RcfError::DeserializationFailed(e.to_string()))?;
553        if envelope.version != THRESHOLDED_PERSISTENCE_VERSION {
554            return Err(RcfError::IncompatibleVersion {
555                found: envelope.version,
556                expected: THRESHOLDED_PERSISTENCE_VERSION,
557            });
558        }
559        Ok(envelope.detector)
560    }
561
562    /// Atomically write the thresholded detector as JSON to `path`.
563    ///
564    /// # Errors
565    ///
566    /// - [`RcfError::SerializationFailed`] for any filesystem or
567    ///   encoder failure.
568    #[cfg(all(feature = "serde_json", feature = "std"))]
569    pub fn to_json_path(&self, path: impl AsRef<std::path::Path>) -> RcfResult<()> {
570        let json = self.to_json()?;
571        atomic::write_atomic(path.as_ref(), json.as_bytes())
572    }
573
574    /// Reload a thresholded detector from a JSON file at `path`.
575    ///
576    /// # Errors
577    ///
578    /// - [`RcfError::DeserializationFailed`] when the file cannot be
579    ///   read, exceeds [`MAX_JSON_BYTES`], or the JSON is malformed.
580    /// - [`RcfError::IncompatibleVersion`] when the embedded version
581    ///   does not match [`THRESHOLDED_PERSISTENCE_VERSION`].
582    ///
583    /// # Security
584    ///
585    /// Inherits the trust model of [`Self::from_json`].
586    #[cfg(all(feature = "serde_json", feature = "std"))]
587    pub fn from_json_path(path: impl AsRef<std::path::Path>) -> RcfResult<Self> {
588        let json = atomic::read_all_string(path.as_ref())?;
589        Self::from_json(&json)
590    }
591}
592
593/// JSON envelope used by [`RandomCutForest::to_json`] — borrows the
594/// forest to avoid an unnecessary clone during serialisation.
595#[cfg(feature = "serde_json")]
596#[derive(serde::Serialize)]
597struct JsonEnvelope<'a, const D: usize> {
598    /// Persistence format version embedded alongside the payload.
599    version: u32,
600    /// Borrowed forest to be serialised.
601    forest: &'a RandomCutForest<D>,
602}
603
604/// JSON envelope used by [`RandomCutForest::from_json`] — owns the
605/// reconstructed forest.
606#[cfg(feature = "serde_json")]
607#[derive(serde::Deserialize)]
608struct JsonEnvelopeOwned<const D: usize> {
609    /// Persistence format version embedded alongside the payload.
610    version: u32,
611    /// Reconstructed forest owned by the envelope.
612    forest: RandomCutForest<D>,
613}
614
615/// JSON envelope for [`ThresholdedForest::to_json`].
616#[cfg(feature = "serde_json")]
617#[derive(serde::Serialize)]
618struct ThresholdedJsonEnvelope<'a, const D: usize> {
619    /// Persistence format version embedded alongside the payload.
620    version: u32,
621    /// Borrowed detector to be serialised.
622    detector: &'a ThresholdedForest<D>,
623}
624
625/// JSON envelope for [`ThresholdedForest::from_json`].
626#[cfg(feature = "serde_json")]
627#[derive(serde::Deserialize)]
628struct ThresholdedJsonEnvelopeOwned<const D: usize> {
629    /// Persistence format version embedded alongside the payload.
630    version: u32,
631    /// Reconstructed detector owned by the envelope.
632    detector: ThresholdedForest<D>,
633}
634
635#[cfg(all(test, feature = "postcard"))]
636#[allow(clippy::float_cmp, clippy::cast_precision_loss, clippy::cast_lossless)] // Roundtrip asserts bit-exact equality + small bounded counters.
637mod binary_tests {
638    use super::*;
639    use crate::ForestBuilder;
640
641    fn trained_forest(seed: u64, updates: usize) -> RandomCutForest<2> {
642        let mut f = ForestBuilder::<2>::new()
643            .num_trees(50)
644            .sample_size(16)
645            .seed(seed)
646            .build()
647            .unwrap();
648        for i in 0..updates {
649            #[allow(clippy::cast_precision_loss)]
650            let v = i as f64 * 0.01;
651            f.update([v, v + 0.5]).unwrap();
652        }
653        f
654    }
655
656    #[test]
657    fn version_prefix_present() {
658        let f = trained_forest(2026, 10);
659        let bytes = f.to_bytes().unwrap();
660        assert!(bytes.len() >= VERSION_PREFIX_BYTES);
661        let mut v = [0_u8; 4];
662        v.copy_from_slice(&bytes[..4]);
663        assert_eq!(u32::from_le_bytes(v), PERSISTENCE_VERSION);
664    }
665
666    #[test]
667    fn empty_forest_roundtrip() {
668        let f = ForestBuilder::<4>::new()
669            .num_trees(50)
670            .sample_size(16)
671            .seed(1)
672            .build()
673            .unwrap();
674        let bytes = f.to_bytes().unwrap();
675        let back = RandomCutForest::<4>::from_bytes(&bytes).unwrap();
676        assert_eq!(back.num_trees(), f.num_trees());
677        assert_eq!(back.sample_size(), f.sample_size());
678        assert_eq!(back.dimension(), f.dimension());
679    }
680
681    #[test]
682    fn trained_forest_score_roundtrip() {
683        let f = trained_forest(7, 200);
684        let bytes = f.to_bytes().unwrap();
685        let back = RandomCutForest::<2>::from_bytes(&bytes).unwrap();
686        let probe = [1.5_f64, 2.0];
687        let s1: f64 = f.score(&probe).unwrap().into();
688        let s2: f64 = back.score(&probe).unwrap().into();
689        assert_eq!(s1, s2);
690    }
691
692    #[test]
693    fn time_decay_roundtrip() {
694        let mut f = ForestBuilder::<2>::new()
695            .num_trees(50)
696            .sample_size(16)
697            .time_decay(0.05)
698            .seed(11)
699            .build()
700            .unwrap();
701        for i in 0..100 {
702            #[allow(clippy::cast_precision_loss)]
703            let v = i as f64;
704            f.update([v, v]).unwrap();
705        }
706        let bytes = f.to_bytes().unwrap();
707        let back = RandomCutForest::<2>::from_bytes(&bytes).unwrap();
708        assert_eq!(f.config().time_decay, back.config().time_decay);
709        let probe = [10.0_f64, 10.0];
710        assert_eq!(
711            f64::from(f.score(&probe).unwrap()),
712            f64::from(back.score(&probe).unwrap())
713        );
714    }
715
716    #[test]
717    fn truncated_bytes_rejected() {
718        let bytes = [0_u8; 2];
719        let err = RandomCutForest::<2>::from_bytes(&bytes).unwrap_err();
720        assert!(matches!(err, RcfError::DeserializationFailed(_)));
721    }
722
723    #[test]
724    fn version_mismatch_rejected() {
725        let f = trained_forest(2026, 5);
726        let mut bytes = f.to_bytes().unwrap();
727        let bogus_version = (PERSISTENCE_VERSION + 99).to_le_bytes();
728        bytes[..VERSION_PREFIX_BYTES].copy_from_slice(&bogus_version);
729        let err = RandomCutForest::<2>::from_bytes(&bytes).unwrap_err();
730        match err {
731            RcfError::IncompatibleVersion { found, expected } => {
732                assert_eq!(found, PERSISTENCE_VERSION + 99);
733                assert_eq!(expected, PERSISTENCE_VERSION);
734            }
735            other => panic!("expected IncompatibleVersion, got {other:?}"),
736        }
737    }
738
739    #[test]
740    fn malformed_payload_rejected() {
741        let mut bytes = Vec::new();
742        bytes.extend_from_slice(&PERSISTENCE_VERSION.to_le_bytes());
743        bytes.extend_from_slice(&[0xFF; 16]);
744        let err = RandomCutForest::<2>::from_bytes(&bytes).unwrap_err();
745        assert!(matches!(err, RcfError::DeserializationFailed(_)));
746    }
747
748    #[test]
749    fn oversize_payload_rejected_by_default_cap() {
750        // Synthesise a payload larger than MAX_DESERIALIZE_BYTES
751        // by extending the version prefix with a tail of garbage.
752        // Real-world snapshots do not approach the cap; the test
753        // proves the cap fires before postcard sees the bytes.
754        let mut bytes = Vec::with_capacity(MAX_DESERIALIZE_BYTES + 16);
755        bytes.extend_from_slice(&PERSISTENCE_VERSION.to_le_bytes());
756        bytes.resize(MAX_DESERIALIZE_BYTES + 1, 0xAA);
757        let err = RandomCutForest::<2>::from_bytes(&bytes).unwrap_err();
758        assert!(matches!(err, RcfError::DeserializationFailed(_)));
759    }
760
761    #[test]
762    fn from_bytes_with_max_size_accepts_higher_cap() {
763        // A legitimate snapshot must round-trip through the
764        // explicit-cap variant exactly like the default path.
765        let f = trained_forest(7, 50);
766        let bytes = f.to_bytes().unwrap();
767        let back =
768            RandomCutForest::<2>::from_bytes_with_max_size(&bytes, MAX_DESERIALIZE_BYTES).unwrap();
769        assert_eq!(back.updates_seen(), f.updates_seen());
770    }
771
772    #[test]
773    fn from_bytes_with_max_size_rejects_below_payload_size() {
774        // Setting the cap below the payload size must reject.
775        let f = trained_forest(7, 50);
776        let bytes = f.to_bytes().unwrap();
777        let too_tight = bytes.len() - 1;
778        let err = RandomCutForest::<2>::from_bytes_with_max_size(&bytes, too_tight).unwrap_err();
779        assert!(matches!(err, RcfError::DeserializationFailed(_)));
780    }
781
782    #[test]
783    fn updates_seen_counter_roundtrips() {
784        let f = trained_forest(42, 75);
785        let before = f.updates_seen();
786        let bytes = f.to_bytes().unwrap();
787        let back = RandomCutForest::<2>::from_bytes(&bytes).unwrap();
788        assert_eq!(back.updates_seen(), before);
789    }
790}
791
792#[cfg(all(test, feature = "serde_json"))]
793#[allow(clippy::float_cmp, clippy::cast_precision_loss, clippy::cast_lossless)]
794mod json_tests {
795    use super::*;
796    use crate::ForestBuilder;
797
798    fn small_trained() -> RandomCutForest<2> {
799        let mut f = ForestBuilder::<2>::new()
800            .num_trees(50)
801            .sample_size(8)
802            .seed(2026)
803            .build()
804            .unwrap();
805        for i in 0..30 {
806            #[allow(clippy::cast_precision_loss)]
807            let v = i as f64;
808            f.update([v, v + 1.0]).unwrap();
809        }
810        f
811    }
812
813    #[test]
814    fn json_roundtrip_preserves_score() {
815        let f = small_trained();
816        let json = f.to_json().unwrap();
817        let back = RandomCutForest::<2>::from_json(&json).unwrap();
818        let probe = [3.0_f64, 4.0];
819        let s1: f64 = f.score(&probe).unwrap().into();
820        let s2: f64 = back.score(&probe).unwrap().into();
821        assert_eq!(s1, s2);
822    }
823
824    #[test]
825    fn json_envelope_carries_version_field() {
826        let f = small_trained();
827        let json = f.to_json().unwrap();
828        assert!(json.contains("\"version\""));
829        assert!(json.contains(&format!(":{PERSISTENCE_VERSION}")));
830    }
831
832    #[test]
833    fn json_version_mismatch_rejected() {
834        let f = small_trained();
835        let json = f.to_json().unwrap();
836        let bogus = json.replace(
837            &format!("\"version\":{PERSISTENCE_VERSION}"),
838            &format!("\"version\":{}", PERSISTENCE_VERSION + 99),
839        );
840        let err = RandomCutForest::<2>::from_json(&bogus).unwrap_err();
841        assert!(matches!(err, RcfError::IncompatibleVersion { .. }));
842    }
843
844    #[test]
845    fn json_malformed_rejected() {
846        assert!(matches!(
847            RandomCutForest::<2>::from_json("not json").unwrap_err(),
848            RcfError::DeserializationFailed(_)
849        ));
850    }
851
852    #[test]
853    fn json_oversize_payload_rejected_by_default_cap() {
854        // Synthesise a JSON string larger than MAX_JSON_BYTES via
855        // explicit-cap variant — feeding a real 1 GiB string into
856        // the default-cap variant would cost the test runner too
857        // much memory.
858        let f = small_trained();
859        let json = f.to_json().unwrap();
860        let err = RandomCutForest::<2>::from_json_with_max_size(&json, json.len() - 1).unwrap_err();
861        assert!(matches!(err, RcfError::DeserializationFailed(_)));
862    }
863
864    #[test]
865    fn json_with_max_size_round_trips_at_default_cap() {
866        let f = small_trained();
867        let json = f.to_json().unwrap();
868        let back = RandomCutForest::<2>::from_json_with_max_size(&json, MAX_JSON_BYTES).unwrap();
869        let probe = [3.0_f64, 4.0];
870        let s1: f64 = f.score(&probe).unwrap().into();
871        let s2: f64 = back.score(&probe).unwrap().into();
872        assert_eq!(s1, s2);
873    }
874}