anomstream_core/persistence.rs
1//! Optional persistence helpers for [`crate::RandomCutForest`] and
2//! [`crate::ThresholdedForest`].
3//!
4//! Gated behind the `serde` cargo feature. Four flavours are
5//! exposed:
6//!
7//! - **Binary bytes** (`to_bytes` / `from_bytes`, gated on `postcard`):
8//! a compact `postcard` payload prefixed with a 4-byte
9//! little-endian version field. Use this for on-disk snapshots or
10//! to ship forests over a network socket. (`postcard` replaced
11//! `bincode` in persistence format v2 after the `bincode` crate
12//! was marked unmaintained by `RustSec` in 2025.)
13//! - **JSON text** (`to_json` / `from_json`, gated on `serde_json`):
14//! a human-readable text encoding wrapping the same versioned
15//! envelope. Useful for debugging or for callers who already pipe
16//! JSON elsewhere.
17//! - **Atomic file path** (`to_path` / `from_path`, gated on
18//! `postcard + std`): write-tmp-then-rename + `fsync` so a crash or
19//! power-loss mid-save cannot corrupt the snapshot on disk. Pair
20//! with periodic checkpointing for **warm reload** — the detector
21//! resumes exactly where it left off across restarts.
22//! - **JSON file path** (`to_json_path` / `from_json_path`, gated on
23//! `serde_json + std`): same atomic write discipline, human-readable
24//! payload.
25//!
26//! The version prefix lives **outside** the serialised payload so a
27//! version skew is detected before any third-party deserialiser runs
28//! against arbitrary bytes — a defence against malformed
29//! payload-driven panics.
30//!
31//! Both encodings preserve the per-point dimensionality `D` at the
32//! type level — callers must deserialise into a type with the same
33//! compile-time `D` that produced the payload.
34//!
35//! # Security
36//!
37//! These deserialisers are designed for **trusted checkpoints**:
38//! payloads produced by an earlier process you control, stored on
39//! a filesystem you control, and reloaded at warm-restart time. The
40//! `postcard` and `serde_json` decoders accept any well-formed
41//! payload that matches the schema — they perform no integrity check
42//! beyond the 4-byte version prefix and have no built-in cap on
43//! recursion depth, so a deliberately malformed payload could in
44//! principle drive an out-of-memory or stack-overflow condition.
45//!
46//! The current [`RandomCutForest`] / [`ThresholdedForest`] schema is
47//! arena-backed (flat `Vec<InternalData>` / `Vec<LeafData>`, no
48//! recursive type nesting) so the recursion-depth attack surface is
49//! limited in practice — but pretending the format is hostile-input-
50//! safe would be wrong. Defence-in-depth measures shipped here:
51//!
52//! - [`MAX_DESERIALIZE_BYTES`] / [`MAX_JSON_BYTES`] caps reject
53//! absurdly large payloads up front (configurable per call via the
54//! `*_with_max_size` variants).
55//! - The 4-byte version prefix is checked **before** any third-party
56//! decoder runs against the bytes.
57//! - All deserialisers return a typed error rather than panicking
58//! on truncated, mismatched, or malformed input.
59//!
60//! For checkpoints sourced from outside the process boundary
61//! (network sync, multi-tenant restore endpoints, partner-supplied
62//! state), pair these helpers with an out-of-band integrity check
63//! (HMAC, signature, or transport-level TLS+auth) before calling
64//! `from_bytes` / `from_path` / `from_json` / `from_json_path`. Do
65//! **not** feed unauthenticated bytes from a hostile source.
66
67#[cfg(any(feature = "postcard", feature = "serde_json"))]
68use crate::error::{RcfError, RcfResult};
69use crate::forest::RandomCutForest;
70#[cfg(feature = "serde")]
71use crate::thresholded::ThresholdedForest;
72
73/// Persistence format version for [`RandomCutForest`]. Bump on any
74/// breaking layout change. Version `4` splits the `NodeStore` arenas
75/// into typed `InternalData` / `LeafData` records (saves ~90 % on
76/// leaf-arena memory at `D = 16`); version `3` added the per-point
77/// timestamp side-map used by [`RandomCutForest::update_at`] /
78/// [`RandomCutForest::delete_before`]; version `2` was the first
79/// `postcard` payload after `RustSec` flagged `bincode` as
80/// unmaintained; version `1` was the original `bincode 2` payload.
81pub const PERSISTENCE_VERSION: u32 = 4;
82
83/// Persistence format version for [`ThresholdedForest`]. Distinct
84/// from [`PERSISTENCE_VERSION`] because the threshold envelope carries
85/// additional state (EMA stats, threshold config) that evolves on its
86/// own cadence. Version `4` inherits the forest's typed-arena bump.
87pub const THRESHOLDED_PERSISTENCE_VERSION: u32 = 4;
88
89/// Number of bytes reserved for the version prefix.
90pub const VERSION_PREFIX_BYTES: usize = 4;
91
92/// Default upper bound on `postcard` payload size accepted by
93/// [`RandomCutForest::from_bytes`] / [`ThresholdedForest::from_bytes`].
94/// Sized for a typical `D` ≤ 64, `num_trees` ≤ 1000, `sample_size` ≤ 2048
95/// deployment with comfortable headroom; larger workloads (high-`D`
96/// detectors with extensive arenas) call the
97/// [`RandomCutForest::from_bytes_with_max_size`] /
98/// [`ThresholdedForest::from_bytes_with_max_size`] variants and pass
99/// an explicit cap.
100pub const MAX_DESERIALIZE_BYTES: usize = 256 * 1024 * 1024;
101
102/// Default upper bound on `serde_json` payload size accepted by
103/// [`RandomCutForest::from_json`] / [`ThresholdedForest::from_json`].
104/// JSON encodings are roughly 4× the binary equivalent (utf-8 floats,
105/// field-name overhead) so the cap is correspondingly larger.
106pub const MAX_JSON_BYTES: usize = 1024 * 1024 * 1024;
107
108/// Reject payloads above the supplied byte cap before handing the
109/// bytes to a third-party decoder.
110#[cfg(any(feature = "postcard", feature = "serde_json"))]
111fn enforce_size_cap(len: usize, max: usize, kind: &'static str) -> RcfResult<()> {
112 if len > max {
113 return Err(RcfError::DeserializationFailed(format!(
114 "{kind} payload {len} byte(s) exceeds cap {max} (caller-controlled OOM guard) — \
115 use the `*_with_max_size` variant to opt into a larger bound"
116 )));
117 }
118 Ok(())
119}
120
121/// Decode the first four bytes of `bytes` as the persistence version.
122///
123/// # Errors
124///
125/// Returns [`RcfError::DeserializationFailed`] when `bytes` is shorter
126/// than [`VERSION_PREFIX_BYTES`].
127#[cfg(feature = "postcard")]
128fn read_version_prefix(bytes: &[u8]) -> RcfResult<u32> {
129 if bytes.len() < VERSION_PREFIX_BYTES {
130 return Err(RcfError::DeserializationFailed(format!(
131 "payload too short: {} byte(s), need at least {VERSION_PREFIX_BYTES}",
132 bytes.len()
133 )));
134 }
135 let mut v = [0_u8; VERSION_PREFIX_BYTES];
136 v.copy_from_slice(&bytes[..VERSION_PREFIX_BYTES]);
137 Ok(u32::from_le_bytes(v))
138}
139
140/// Path helpers for atomic write-tmp-rename persistence.
141///
142/// The tmp suffix is appended to the caller-supplied path so the temp
143/// file lives in the same filesystem — rename is only atomic within a
144/// single filesystem. The file is `fsync`'d before the rename so a
145/// power-loss between `write` and `rename` cannot leave a partially
146/// written snapshot on disk.
147#[cfg(all(feature = "std", any(feature = "postcard", feature = "serde_json")))]
148mod atomic {
149 use std::ffi::OsString;
150 use std::fs::{File, rename};
151 use std::io::Write;
152 use std::path::{Path, PathBuf};
153
154 use crate::error::{RcfError, RcfResult};
155
156 /// Compute the temporary path used for the atomic write.
157 pub(super) fn tmp_path(path: &Path) -> PathBuf {
158 let mut s: OsString = path.as_os_str().to_owned();
159 s.push(".tmp");
160 PathBuf::from(s)
161 }
162
163 /// Write `bytes` to `path` atomically: tmp file first, fsync,
164 /// then rename onto the target.
165 pub(super) fn write_atomic(path: &Path, bytes: &[u8]) -> RcfResult<()> {
166 let tmp = tmp_path(path);
167 let mut f = File::create(&tmp)
168 .map_err(|e| RcfError::SerializationFailed(format!("create {}: {e}", tmp.display())))?;
169 f.write_all(bytes)
170 .map_err(|e| RcfError::SerializationFailed(format!("write {}: {e}", tmp.display())))?;
171 f.sync_all()
172 .map_err(|e| RcfError::SerializationFailed(format!("fsync {}: {e}", tmp.display())))?;
173 drop(f);
174 rename(&tmp, path).map_err(|e| {
175 RcfError::SerializationFailed(format!(
176 "rename {} -> {}: {e}",
177 tmp.display(),
178 path.display()
179 ))
180 })?;
181 Ok(())
182 }
183
184 /// Read the full byte content of `path`.
185 #[cfg(feature = "postcard")]
186 pub(super) fn read_all(path: &Path) -> RcfResult<Vec<u8>> {
187 std::fs::read(path)
188 .map_err(|e| RcfError::DeserializationFailed(format!("read {}: {e}", path.display())))
189 }
190
191 /// Read the full text content of `path`.
192 #[cfg(feature = "serde_json")]
193 pub(super) fn read_all_string(path: &Path) -> RcfResult<String> {
194 std::fs::read_to_string(path)
195 .map_err(|e| RcfError::DeserializationFailed(format!("read {}: {e}", path.display())))
196 }
197}
198
199impl<const D: usize> RandomCutForest<D> {
200 /// Serialise the forest into a versioned binary blob.
201 ///
202 /// # Errors
203 ///
204 /// Returns [`RcfError::SerializationFailed`] when the underlying
205 /// `postcard` encoder rejects the payload.
206 #[cfg(feature = "postcard")]
207 pub fn to_bytes(&self) -> RcfResult<Vec<u8>> {
208 let mut out = Vec::with_capacity(VERSION_PREFIX_BYTES + 4096);
209 out.extend_from_slice(&PERSISTENCE_VERSION.to_le_bytes());
210 let payload = postcard::to_allocvec(self)
211 .map_err(|e| RcfError::SerializationFailed(e.to_string()))?;
212 out.extend_from_slice(&payload);
213 Ok(out)
214 }
215
216 /// Reload a forest previously produced by [`to_bytes`](Self::to_bytes).
217 ///
218 /// # Errors
219 ///
220 /// - [`RcfError::DeserializationFailed`] when the byte slice is
221 /// too short to hold the version prefix, longer than
222 /// [`MAX_DESERIALIZE_BYTES`], or the `postcard` payload is
223 /// malformed.
224 /// - [`RcfError::IncompatibleVersion`] when the embedded version
225 /// does not match [`PERSISTENCE_VERSION`].
226 ///
227 /// # Security
228 ///
229 /// Designed for trusted checkpoints — see the module-level
230 /// `# Security` section. The size cap defends against a
231 /// caller-controlled OOM at decode time; the version prefix
232 /// rejects schema drift before the third-party decoder runs.
233 /// Pair with an out-of-band integrity check (HMAC / signature /
234 /// authenticated transport) when bytes originate outside the
235 /// process trust boundary. Use
236 /// [`Self::from_bytes_with_max_size`] when the deployment's
237 /// expected payload exceeds [`MAX_DESERIALIZE_BYTES`].
238 #[cfg(feature = "postcard")]
239 pub fn from_bytes(bytes: &[u8]) -> RcfResult<Self> {
240 Self::from_bytes_with_max_size(bytes, MAX_DESERIALIZE_BYTES)
241 }
242
243 /// Variant of [`Self::from_bytes`] that accepts a caller-supplied
244 /// byte-length cap. Use when a high-D / large-arena deployment's
245 /// snapshot legitimately exceeds [`MAX_DESERIALIZE_BYTES`].
246 ///
247 /// # Errors
248 ///
249 /// Same as [`Self::from_bytes`] but the size check uses `max`
250 /// instead of [`MAX_DESERIALIZE_BYTES`].
251 ///
252 /// # Security
253 ///
254 /// Same trust model as [`Self::from_bytes`]. Setting `max` very
255 /// large (close to `usize::MAX`) effectively disables the OOM
256 /// guard — only do this on payloads that have already passed an
257 /// out-of-band integrity check.
258 #[cfg(feature = "postcard")]
259 pub fn from_bytes_with_max_size(bytes: &[u8], max: usize) -> RcfResult<Self> {
260 enforce_size_cap(bytes.len(), max, "RandomCutForest postcard")?;
261 let version = read_version_prefix(bytes)?;
262 if version != PERSISTENCE_VERSION {
263 return Err(RcfError::IncompatibleVersion {
264 found: version,
265 expected: PERSISTENCE_VERSION,
266 });
267 }
268 let forest: Self = postcard::from_bytes(&bytes[VERSION_PREFIX_BYTES..])
269 .map_err(|e| RcfError::DeserializationFailed(e.to_string()))?;
270 Ok(forest)
271 }
272
273 /// Atomically serialise the forest to `path` using the binary
274 /// encoding. Writes `<path>.tmp`, `fsync`s it, then renames onto
275 /// `path` — a mid-write crash leaves the previous snapshot
276 /// intact.
277 ///
278 /// # Errors
279 ///
280 /// - [`RcfError::SerializationFailed`] for any filesystem or
281 /// encoder failure.
282 #[cfg(all(feature = "postcard", feature = "std"))]
283 pub fn to_path(&self, path: impl AsRef<std::path::Path>) -> RcfResult<()> {
284 let bytes = self.to_bytes()?;
285 atomic::write_atomic(path.as_ref(), &bytes)
286 }
287
288 /// Reload a forest from `path` using the binary encoding.
289 ///
290 /// # Errors
291 ///
292 /// - [`RcfError::DeserializationFailed`] when the file cannot be
293 /// read, exceeds [`MAX_DESERIALIZE_BYTES`], or the payload is
294 /// malformed.
295 /// - [`RcfError::IncompatibleVersion`] when the embedded version
296 /// does not match [`PERSISTENCE_VERSION`].
297 ///
298 /// # Security
299 ///
300 /// Inherits the trust model of [`Self::from_bytes`] — designed
301 /// for filesystem checkpoints written by a process the caller
302 /// controls. Hostile bytes on the path require an out-of-band
303 /// integrity check (HMAC / signature) before this call.
304 #[cfg(all(feature = "postcard", feature = "std"))]
305 pub fn from_path(path: impl AsRef<std::path::Path>) -> RcfResult<Self> {
306 let bytes = atomic::read_all(path.as_ref())?;
307 Self::from_bytes(&bytes)
308 }
309
310 /// Serialise the forest as JSON. The version field lives at
311 /// `"version"` alongside the payload at `"forest"`.
312 ///
313 /// # Errors
314 ///
315 /// Returns [`RcfError::SerializationFailed`] when `serde_json`
316 /// rejects the payload.
317 #[cfg(feature = "serde_json")]
318 pub fn to_json(&self) -> RcfResult<String> {
319 let envelope = JsonEnvelope {
320 version: PERSISTENCE_VERSION,
321 forest: self,
322 };
323 serde_json::to_string(&envelope).map_err(|e| RcfError::SerializationFailed(e.to_string()))
324 }
325
326 /// Reload a forest from JSON produced by [`to_json`](Self::to_json).
327 ///
328 /// # Errors
329 ///
330 /// - [`RcfError::DeserializationFailed`] when the JSON is
331 /// malformed or longer than [`MAX_JSON_BYTES`].
332 /// - [`RcfError::IncompatibleVersion`] when the embedded version
333 /// does not match [`PERSISTENCE_VERSION`].
334 ///
335 /// # Security
336 ///
337 /// See module-level `# Security` notes. Use
338 /// [`Self::from_json_with_max_size`] for legitimate payloads
339 /// above [`MAX_JSON_BYTES`].
340 #[cfg(feature = "serde_json")]
341 pub fn from_json(json: &str) -> RcfResult<Self> {
342 Self::from_json_with_max_size(json, MAX_JSON_BYTES)
343 }
344
345 /// Variant of [`Self::from_json`] with a caller-supplied
346 /// byte-length cap.
347 ///
348 /// # Errors
349 ///
350 /// Same as [`Self::from_json`] with `max` replacing
351 /// [`MAX_JSON_BYTES`].
352 ///
353 /// # Security
354 ///
355 /// See module-level `# Security` notes.
356 #[cfg(feature = "serde_json")]
357 pub fn from_json_with_max_size(json: &str, max: usize) -> RcfResult<Self> {
358 enforce_size_cap(json.len(), max, "RandomCutForest JSON")?;
359 let envelope: JsonEnvelopeOwned<D> = serde_json::from_str(json)
360 .map_err(|e| RcfError::DeserializationFailed(e.to_string()))?;
361 if envelope.version != PERSISTENCE_VERSION {
362 return Err(RcfError::IncompatibleVersion {
363 found: envelope.version,
364 expected: PERSISTENCE_VERSION,
365 });
366 }
367 Ok(envelope.forest)
368 }
369
370 /// Atomically write the forest as JSON to `path`. Same atomic
371 /// write discipline as [`to_path`](Self::to_path).
372 ///
373 /// # Errors
374 ///
375 /// - [`RcfError::SerializationFailed`] for any filesystem or
376 /// encoder failure.
377 #[cfg(all(feature = "serde_json", feature = "std"))]
378 pub fn to_json_path(&self, path: impl AsRef<std::path::Path>) -> RcfResult<()> {
379 let json = self.to_json()?;
380 atomic::write_atomic(path.as_ref(), json.as_bytes())
381 }
382
383 /// Reload a forest from a JSON file at `path`.
384 ///
385 /// # Errors
386 ///
387 /// - [`RcfError::DeserializationFailed`] when the file cannot be
388 /// read, exceeds [`MAX_JSON_BYTES`], or the JSON is malformed.
389 /// - [`RcfError::IncompatibleVersion`] when the embedded version
390 /// does not match [`PERSISTENCE_VERSION`].
391 ///
392 /// # Security
393 ///
394 /// Inherits the trust model of [`Self::from_json`].
395 #[cfg(all(feature = "serde_json", feature = "std"))]
396 pub fn from_json_path(path: impl AsRef<std::path::Path>) -> RcfResult<Self> {
397 let json = atomic::read_all_string(path.as_ref())?;
398 Self::from_json(&json)
399 }
400}
401
402impl<const D: usize> ThresholdedForest<D> {
403 /// Serialise the thresholded detector into a versioned binary blob.
404 ///
405 /// The payload carries the underlying forest, the threshold
406 /// configuration, and the EMA statistics — enough for a receiver
407 /// to resume scoring and emitting graded verdicts without a
408 /// warmup gap.
409 ///
410 /// # Errors
411 ///
412 /// Returns [`RcfError::SerializationFailed`] when the underlying
413 /// `postcard` encoder rejects the payload.
414 #[cfg(feature = "postcard")]
415 pub fn to_bytes(&self) -> RcfResult<Vec<u8>> {
416 let mut out = Vec::with_capacity(VERSION_PREFIX_BYTES + 4096);
417 out.extend_from_slice(&THRESHOLDED_PERSISTENCE_VERSION.to_le_bytes());
418 let payload = postcard::to_allocvec(self)
419 .map_err(|e| RcfError::SerializationFailed(e.to_string()))?;
420 out.extend_from_slice(&payload);
421 Ok(out)
422 }
423
424 /// Reload a thresholded detector previously produced by
425 /// [`to_bytes`](Self::to_bytes).
426 ///
427 /// # Errors
428 ///
429 /// - [`RcfError::DeserializationFailed`] when the byte slice is
430 /// too short to hold the version prefix, longer than
431 /// [`MAX_DESERIALIZE_BYTES`], or the `postcard` payload is
432 /// malformed.
433 /// - [`RcfError::IncompatibleVersion`] when the embedded version
434 /// does not match [`THRESHOLDED_PERSISTENCE_VERSION`].
435 ///
436 /// # Security
437 ///
438 /// Designed for trusted checkpoints — see the module-level
439 /// `# Security` section. Use [`Self::from_bytes_with_max_size`]
440 /// when the deployment's expected payload exceeds
441 /// [`MAX_DESERIALIZE_BYTES`].
442 #[cfg(feature = "postcard")]
443 pub fn from_bytes(bytes: &[u8]) -> RcfResult<Self> {
444 Self::from_bytes_with_max_size(bytes, MAX_DESERIALIZE_BYTES)
445 }
446
447 /// Variant of [`Self::from_bytes`] with a caller-supplied
448 /// byte-length cap.
449 ///
450 /// # Errors
451 ///
452 /// Same as [`Self::from_bytes`] with `max` replacing
453 /// [`MAX_DESERIALIZE_BYTES`].
454 ///
455 /// # Security
456 ///
457 /// See module-level `# Security` notes.
458 #[cfg(feature = "postcard")]
459 pub fn from_bytes_with_max_size(bytes: &[u8], max: usize) -> RcfResult<Self> {
460 enforce_size_cap(bytes.len(), max, "ThresholdedForest postcard")?;
461 let version = read_version_prefix(bytes)?;
462 if version != THRESHOLDED_PERSISTENCE_VERSION {
463 return Err(RcfError::IncompatibleVersion {
464 found: version,
465 expected: THRESHOLDED_PERSISTENCE_VERSION,
466 });
467 }
468 let detector: Self = postcard::from_bytes(&bytes[VERSION_PREFIX_BYTES..])
469 .map_err(|e| RcfError::DeserializationFailed(e.to_string()))?;
470 Ok(detector)
471 }
472
473 /// Atomically serialise the thresholded detector to `path`. Same
474 /// atomic write discipline as [`RandomCutForest::to_path`].
475 ///
476 /// # Errors
477 ///
478 /// - [`RcfError::SerializationFailed`] for any filesystem or
479 /// encoder failure.
480 #[cfg(all(feature = "postcard", feature = "std"))]
481 pub fn to_path(&self, path: impl AsRef<std::path::Path>) -> RcfResult<()> {
482 let bytes = self.to_bytes()?;
483 atomic::write_atomic(path.as_ref(), &bytes)
484 }
485
486 /// Reload a thresholded detector from `path`.
487 ///
488 /// # Errors
489 ///
490 /// - [`RcfError::DeserializationFailed`] when the file cannot be
491 /// read, exceeds [`MAX_DESERIALIZE_BYTES`], or the payload is
492 /// malformed.
493 /// - [`RcfError::IncompatibleVersion`] when the embedded version
494 /// does not match [`THRESHOLDED_PERSISTENCE_VERSION`].
495 ///
496 /// # Security
497 ///
498 /// Inherits the trust model of [`Self::from_bytes`].
499 #[cfg(all(feature = "postcard", feature = "std"))]
500 pub fn from_path(path: impl AsRef<std::path::Path>) -> RcfResult<Self> {
501 let bytes = atomic::read_all(path.as_ref())?;
502 Self::from_bytes(&bytes)
503 }
504
505 /// Serialise the thresholded detector as JSON.
506 ///
507 /// # Errors
508 ///
509 /// Returns [`RcfError::SerializationFailed`] when `serde_json`
510 /// rejects the payload.
511 #[cfg(feature = "serde_json")]
512 pub fn to_json(&self) -> RcfResult<String> {
513 let envelope = ThresholdedJsonEnvelope {
514 version: THRESHOLDED_PERSISTENCE_VERSION,
515 detector: self,
516 };
517 serde_json::to_string(&envelope).map_err(|e| RcfError::SerializationFailed(e.to_string()))
518 }
519
520 /// Reload a thresholded detector from JSON.
521 ///
522 /// # Errors
523 ///
524 /// - [`RcfError::DeserializationFailed`] when the JSON is
525 /// malformed or longer than [`MAX_JSON_BYTES`].
526 /// - [`RcfError::IncompatibleVersion`] when the embedded version
527 /// does not match [`THRESHOLDED_PERSISTENCE_VERSION`].
528 ///
529 /// # Security
530 ///
531 /// See module-level `# Security` notes.
532 #[cfg(feature = "serde_json")]
533 pub fn from_json(json: &str) -> RcfResult<Self> {
534 Self::from_json_with_max_size(json, MAX_JSON_BYTES)
535 }
536
537 /// Variant of [`Self::from_json`] with a caller-supplied
538 /// byte-length cap.
539 ///
540 /// # Errors
541 ///
542 /// Same as [`Self::from_json`] with `max` replacing
543 /// [`MAX_JSON_BYTES`].
544 ///
545 /// # Security
546 ///
547 /// See module-level `# Security` notes.
548 #[cfg(feature = "serde_json")]
549 pub fn from_json_with_max_size(json: &str, max: usize) -> RcfResult<Self> {
550 enforce_size_cap(json.len(), max, "ThresholdedForest JSON")?;
551 let envelope: ThresholdedJsonEnvelopeOwned<D> = serde_json::from_str(json)
552 .map_err(|e| RcfError::DeserializationFailed(e.to_string()))?;
553 if envelope.version != THRESHOLDED_PERSISTENCE_VERSION {
554 return Err(RcfError::IncompatibleVersion {
555 found: envelope.version,
556 expected: THRESHOLDED_PERSISTENCE_VERSION,
557 });
558 }
559 Ok(envelope.detector)
560 }
561
562 /// Atomically write the thresholded detector as JSON to `path`.
563 ///
564 /// # Errors
565 ///
566 /// - [`RcfError::SerializationFailed`] for any filesystem or
567 /// encoder failure.
568 #[cfg(all(feature = "serde_json", feature = "std"))]
569 pub fn to_json_path(&self, path: impl AsRef<std::path::Path>) -> RcfResult<()> {
570 let json = self.to_json()?;
571 atomic::write_atomic(path.as_ref(), json.as_bytes())
572 }
573
574 /// Reload a thresholded detector from a JSON file at `path`.
575 ///
576 /// # Errors
577 ///
578 /// - [`RcfError::DeserializationFailed`] when the file cannot be
579 /// read, exceeds [`MAX_JSON_BYTES`], or the JSON is malformed.
580 /// - [`RcfError::IncompatibleVersion`] when the embedded version
581 /// does not match [`THRESHOLDED_PERSISTENCE_VERSION`].
582 ///
583 /// # Security
584 ///
585 /// Inherits the trust model of [`Self::from_json`].
586 #[cfg(all(feature = "serde_json", feature = "std"))]
587 pub fn from_json_path(path: impl AsRef<std::path::Path>) -> RcfResult<Self> {
588 let json = atomic::read_all_string(path.as_ref())?;
589 Self::from_json(&json)
590 }
591}
592
593/// JSON envelope used by [`RandomCutForest::to_json`] — borrows the
594/// forest to avoid an unnecessary clone during serialisation.
595#[cfg(feature = "serde_json")]
596#[derive(serde::Serialize)]
597struct JsonEnvelope<'a, const D: usize> {
598 /// Persistence format version embedded alongside the payload.
599 version: u32,
600 /// Borrowed forest to be serialised.
601 forest: &'a RandomCutForest<D>,
602}
603
604/// JSON envelope used by [`RandomCutForest::from_json`] — owns the
605/// reconstructed forest.
606#[cfg(feature = "serde_json")]
607#[derive(serde::Deserialize)]
608struct JsonEnvelopeOwned<const D: usize> {
609 /// Persistence format version embedded alongside the payload.
610 version: u32,
611 /// Reconstructed forest owned by the envelope.
612 forest: RandomCutForest<D>,
613}
614
615/// JSON envelope for [`ThresholdedForest::to_json`].
616#[cfg(feature = "serde_json")]
617#[derive(serde::Serialize)]
618struct ThresholdedJsonEnvelope<'a, const D: usize> {
619 /// Persistence format version embedded alongside the payload.
620 version: u32,
621 /// Borrowed detector to be serialised.
622 detector: &'a ThresholdedForest<D>,
623}
624
625/// JSON envelope for [`ThresholdedForest::from_json`].
626#[cfg(feature = "serde_json")]
627#[derive(serde::Deserialize)]
628struct ThresholdedJsonEnvelopeOwned<const D: usize> {
629 /// Persistence format version embedded alongside the payload.
630 version: u32,
631 /// Reconstructed detector owned by the envelope.
632 detector: ThresholdedForest<D>,
633}
634
635#[cfg(all(test, feature = "postcard"))]
636#[allow(clippy::float_cmp, clippy::cast_precision_loss, clippy::cast_lossless)] // Roundtrip asserts bit-exact equality + small bounded counters.
637mod binary_tests {
638 use super::*;
639 use crate::ForestBuilder;
640
641 fn trained_forest(seed: u64, updates: usize) -> RandomCutForest<2> {
642 let mut f = ForestBuilder::<2>::new()
643 .num_trees(50)
644 .sample_size(16)
645 .seed(seed)
646 .build()
647 .unwrap();
648 for i in 0..updates {
649 #[allow(clippy::cast_precision_loss)]
650 let v = i as f64 * 0.01;
651 f.update([v, v + 0.5]).unwrap();
652 }
653 f
654 }
655
656 #[test]
657 fn version_prefix_present() {
658 let f = trained_forest(2026, 10);
659 let bytes = f.to_bytes().unwrap();
660 assert!(bytes.len() >= VERSION_PREFIX_BYTES);
661 let mut v = [0_u8; 4];
662 v.copy_from_slice(&bytes[..4]);
663 assert_eq!(u32::from_le_bytes(v), PERSISTENCE_VERSION);
664 }
665
666 #[test]
667 fn empty_forest_roundtrip() {
668 let f = ForestBuilder::<4>::new()
669 .num_trees(50)
670 .sample_size(16)
671 .seed(1)
672 .build()
673 .unwrap();
674 let bytes = f.to_bytes().unwrap();
675 let back = RandomCutForest::<4>::from_bytes(&bytes).unwrap();
676 assert_eq!(back.num_trees(), f.num_trees());
677 assert_eq!(back.sample_size(), f.sample_size());
678 assert_eq!(back.dimension(), f.dimension());
679 }
680
681 #[test]
682 fn trained_forest_score_roundtrip() {
683 let f = trained_forest(7, 200);
684 let bytes = f.to_bytes().unwrap();
685 let back = RandomCutForest::<2>::from_bytes(&bytes).unwrap();
686 let probe = [1.5_f64, 2.0];
687 let s1: f64 = f.score(&probe).unwrap().into();
688 let s2: f64 = back.score(&probe).unwrap().into();
689 assert_eq!(s1, s2);
690 }
691
692 #[test]
693 fn time_decay_roundtrip() {
694 let mut f = ForestBuilder::<2>::new()
695 .num_trees(50)
696 .sample_size(16)
697 .time_decay(0.05)
698 .seed(11)
699 .build()
700 .unwrap();
701 for i in 0..100 {
702 #[allow(clippy::cast_precision_loss)]
703 let v = i as f64;
704 f.update([v, v]).unwrap();
705 }
706 let bytes = f.to_bytes().unwrap();
707 let back = RandomCutForest::<2>::from_bytes(&bytes).unwrap();
708 assert_eq!(f.config().time_decay, back.config().time_decay);
709 let probe = [10.0_f64, 10.0];
710 assert_eq!(
711 f64::from(f.score(&probe).unwrap()),
712 f64::from(back.score(&probe).unwrap())
713 );
714 }
715
716 #[test]
717 fn truncated_bytes_rejected() {
718 let bytes = [0_u8; 2];
719 let err = RandomCutForest::<2>::from_bytes(&bytes).unwrap_err();
720 assert!(matches!(err, RcfError::DeserializationFailed(_)));
721 }
722
723 #[test]
724 fn version_mismatch_rejected() {
725 let f = trained_forest(2026, 5);
726 let mut bytes = f.to_bytes().unwrap();
727 let bogus_version = (PERSISTENCE_VERSION + 99).to_le_bytes();
728 bytes[..VERSION_PREFIX_BYTES].copy_from_slice(&bogus_version);
729 let err = RandomCutForest::<2>::from_bytes(&bytes).unwrap_err();
730 match err {
731 RcfError::IncompatibleVersion { found, expected } => {
732 assert_eq!(found, PERSISTENCE_VERSION + 99);
733 assert_eq!(expected, PERSISTENCE_VERSION);
734 }
735 other => panic!("expected IncompatibleVersion, got {other:?}"),
736 }
737 }
738
739 #[test]
740 fn malformed_payload_rejected() {
741 let mut bytes = Vec::new();
742 bytes.extend_from_slice(&PERSISTENCE_VERSION.to_le_bytes());
743 bytes.extend_from_slice(&[0xFF; 16]);
744 let err = RandomCutForest::<2>::from_bytes(&bytes).unwrap_err();
745 assert!(matches!(err, RcfError::DeserializationFailed(_)));
746 }
747
748 #[test]
749 fn oversize_payload_rejected_by_default_cap() {
750 // Synthesise a payload larger than MAX_DESERIALIZE_BYTES
751 // by extending the version prefix with a tail of garbage.
752 // Real-world snapshots do not approach the cap; the test
753 // proves the cap fires before postcard sees the bytes.
754 let mut bytes = Vec::with_capacity(MAX_DESERIALIZE_BYTES + 16);
755 bytes.extend_from_slice(&PERSISTENCE_VERSION.to_le_bytes());
756 bytes.resize(MAX_DESERIALIZE_BYTES + 1, 0xAA);
757 let err = RandomCutForest::<2>::from_bytes(&bytes).unwrap_err();
758 assert!(matches!(err, RcfError::DeserializationFailed(_)));
759 }
760
761 #[test]
762 fn from_bytes_with_max_size_accepts_higher_cap() {
763 // A legitimate snapshot must round-trip through the
764 // explicit-cap variant exactly like the default path.
765 let f = trained_forest(7, 50);
766 let bytes = f.to_bytes().unwrap();
767 let back =
768 RandomCutForest::<2>::from_bytes_with_max_size(&bytes, MAX_DESERIALIZE_BYTES).unwrap();
769 assert_eq!(back.updates_seen(), f.updates_seen());
770 }
771
772 #[test]
773 fn from_bytes_with_max_size_rejects_below_payload_size() {
774 // Setting the cap below the payload size must reject.
775 let f = trained_forest(7, 50);
776 let bytes = f.to_bytes().unwrap();
777 let too_tight = bytes.len() - 1;
778 let err = RandomCutForest::<2>::from_bytes_with_max_size(&bytes, too_tight).unwrap_err();
779 assert!(matches!(err, RcfError::DeserializationFailed(_)));
780 }
781
782 #[test]
783 fn updates_seen_counter_roundtrips() {
784 let f = trained_forest(42, 75);
785 let before = f.updates_seen();
786 let bytes = f.to_bytes().unwrap();
787 let back = RandomCutForest::<2>::from_bytes(&bytes).unwrap();
788 assert_eq!(back.updates_seen(), before);
789 }
790}
791
792#[cfg(all(test, feature = "serde_json"))]
793#[allow(clippy::float_cmp, clippy::cast_precision_loss, clippy::cast_lossless)]
794mod json_tests {
795 use super::*;
796 use crate::ForestBuilder;
797
798 fn small_trained() -> RandomCutForest<2> {
799 let mut f = ForestBuilder::<2>::new()
800 .num_trees(50)
801 .sample_size(8)
802 .seed(2026)
803 .build()
804 .unwrap();
805 for i in 0..30 {
806 #[allow(clippy::cast_precision_loss)]
807 let v = i as f64;
808 f.update([v, v + 1.0]).unwrap();
809 }
810 f
811 }
812
813 #[test]
814 fn json_roundtrip_preserves_score() {
815 let f = small_trained();
816 let json = f.to_json().unwrap();
817 let back = RandomCutForest::<2>::from_json(&json).unwrap();
818 let probe = [3.0_f64, 4.0];
819 let s1: f64 = f.score(&probe).unwrap().into();
820 let s2: f64 = back.score(&probe).unwrap().into();
821 assert_eq!(s1, s2);
822 }
823
824 #[test]
825 fn json_envelope_carries_version_field() {
826 let f = small_trained();
827 let json = f.to_json().unwrap();
828 assert!(json.contains("\"version\""));
829 assert!(json.contains(&format!(":{PERSISTENCE_VERSION}")));
830 }
831
832 #[test]
833 fn json_version_mismatch_rejected() {
834 let f = small_trained();
835 let json = f.to_json().unwrap();
836 let bogus = json.replace(
837 &format!("\"version\":{PERSISTENCE_VERSION}"),
838 &format!("\"version\":{}", PERSISTENCE_VERSION + 99),
839 );
840 let err = RandomCutForest::<2>::from_json(&bogus).unwrap_err();
841 assert!(matches!(err, RcfError::IncompatibleVersion { .. }));
842 }
843
844 #[test]
845 fn json_malformed_rejected() {
846 assert!(matches!(
847 RandomCutForest::<2>::from_json("not json").unwrap_err(),
848 RcfError::DeserializationFailed(_)
849 ));
850 }
851
852 #[test]
853 fn json_oversize_payload_rejected_by_default_cap() {
854 // Synthesise a JSON string larger than MAX_JSON_BYTES via
855 // explicit-cap variant — feeding a real 1 GiB string into
856 // the default-cap variant would cost the test runner too
857 // much memory.
858 let f = small_trained();
859 let json = f.to_json().unwrap();
860 let err = RandomCutForest::<2>::from_json_with_max_size(&json, json.len() - 1).unwrap_err();
861 assert!(matches!(err, RcfError::DeserializationFailed(_)));
862 }
863
864 #[test]
865 fn json_with_max_size_round_trips_at_default_cap() {
866 let f = small_trained();
867 let json = f.to_json().unwrap();
868 let back = RandomCutForest::<2>::from_json_with_max_size(&json, MAX_JSON_BYTES).unwrap();
869 let probe = [3.0_f64, 4.0];
870 let s1: f64 = f.score(&probe).unwrap().into();
871 let s2: f64 = back.score(&probe).unwrap().into();
872 assert_eq!(s1, s2);
873 }
874}