Skip to main content

pf_core/
fixture.rs

1// SPDX-License-Identifier: MIT
2//! Synthetic four-layer capture fixtures.
3//!
4//! These let us exercise the snapshot orchestrator on the build host (no GPU,
5//! no LLM, no CRIU) with realistic-shaped payloads. Used by Phase-1
6//! microbenchmarks and the `examples/01-hello-fork/` example.
7
8use crate::digest::Digest256;
9use crate::error::Result;
10use crate::manifest::{CacheLayer, EffectsLayer, ModelLayer, TraceLayer, WorldLayer};
11use crate::snapshot::{LayerCapture, LayerDescriptor, LayerKind};
12use crate::store::PfStore;
13
14/// Tunable knobs for the fixture set. Defaults sized for a sub-500-ms run on
15/// macOS arm64 (the build host). CI / GPU hosts override `cache_pages` and
16/// `model_diff_bytes` upward.
17#[derive(Clone, Debug)]
18pub struct FixtureSpec {
19    /// Number of paged KV-cache pages to emit (each `page_bytes` long).
20    pub cache_pages: usize,
21    /// Bytes per cache page (default 32 KiB ≈ realistic vLLM page size).
22    pub page_bytes: usize,
23    /// Number of files to emit in the world-layer FS tree.
24    pub world_files: usize,
25    /// Bytes per world file.
26    pub world_file_bytes: usize,
27    /// Bytes of the (single) model-diff blob.
28    pub model_diff_bytes: usize,
29    /// Number of effect-ledger entries.
30    pub effects_entries: usize,
31    /// Number of trace messages.
32    pub trace_messages: usize,
33    /// Optional seed mixed into payloads to differentiate forks.
34    pub seed: u64,
35}
36
37impl Default for FixtureSpec {
38    fn default() -> Self {
39        // Tuned so the full snapshot completes well under 500 ms on macOS
40        // arm64 with zstd-19 (≈ 1 MB total compressed).
41        Self {
42            cache_pages: 32,
43            page_bytes: 16 * 1024,
44            world_files: 64,
45            world_file_bytes: 4 * 1024,
46            model_diff_bytes: 64 * 1024,
47            effects_entries: 16,
48            trace_messages: 16,
49            seed: 0,
50        }
51    }
52}
53
54/// Cheap deterministic byte generator. Not cryptographic; just gives us
55/// payloads that compress like real data (high entropy at the per-byte level)
56/// while being reproducible for tests.
57fn fill(buf: &mut [u8], seed: u64) {
58    // SplitMix64 — 7 ops per 8 bytes, pure stdlib.
59    let mut s = seed.wrapping_add(0x9E37_79B9_7F4A_7C15);
60    for chunk in buf.chunks_mut(8) {
61        s = s.wrapping_add(0x9E37_79B9_7F4A_7C15);
62        let mut z = s;
63        z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
64        z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
65        z ^= z >> 31;
66        let bytes = z.to_le_bytes();
67        chunk.copy_from_slice(&bytes[..chunk.len()]);
68    }
69}
70
71// ---------- per-layer fixture captures ----------
72
73/// Synthetic model-layer capture: writes one base + one diff blob.
74pub struct FixtureModelCapture(pub FixtureSpec);
75impl LayerCapture for FixtureModelCapture {
76    fn kind(&self) -> LayerKind {
77        LayerKind::Model
78    }
79    fn capture(&self, store: &PfStore) -> Result<LayerDescriptor> {
80        // Base is a fixed marker (HF model fingerprint stand-in) so it dedupes
81        // across forks of the same base model.
82        let base = store
83            .blobs()
84            .put(b"base-model-fingerprint:llama-3-8b@sha256:demo")?;
85
86        // Wrap the synthetic random bytes in a `model.diff.v1` envelope
87        // matching `pf-model::serialize`. We can't depend on pf-model
88        // here (architecture rule: pf-core has no pf-* deps) so we hand-
89        // write the JSON structure.  Stuff the seeded entropy into a
90        // Full delta param's f32 vector — `model_diff_bytes / 4` floats.
91        let n_floats = self.0.model_diff_bytes / 4;
92        let mut bytes = vec![0u8; n_floats * 4];
93        fill(&mut bytes, self.0.seed ^ 0xD1FF);
94        // Reinterpret the random bytes as f32, replacing any non-finite
95        // values with 0.0 so the merge primitives stay finite.
96        let floats: Vec<f32> = bytes
97            .chunks_exact(4)
98            .map(|c| {
99                let v = f32::from_le_bytes([c[0], c[1], c[2], c[3]]);
100                if v.is_finite() { v } else { 0.0 }
101            })
102            .collect();
103        let envelope = serde_json::json!({
104            "layout": "model.diff.v1",
105            "diff": { "kind": "full", "params": { "synth_param": floats } },
106        });
107        let diff = store.blobs().put(&serde_json::to_vec(&envelope)?)?;
108        Ok(LayerDescriptor::Model(ModelLayer { base, diff }))
109    }
110}
111
112/// Synthetic cache-layer capture: writes N pages and a manifest blob.
113pub struct FixtureCacheCapture(pub FixtureSpec);
114impl LayerCapture for FixtureCacheCapture {
115    fn kind(&self) -> LayerKind {
116        LayerKind::Cache
117    }
118    fn capture(&self, store: &PfStore) -> Result<LayerDescriptor> {
119        let mut page_digests: Vec<(usize, Digest256, Digest256)> =
120            Vec::with_capacity(self.0.cache_pages);
121        let mut buf_k = vec![0u8; self.0.page_bytes];
122        let mut buf_v = vec![0u8; self.0.page_bytes];
123        for ix in 0..self.0.cache_pages {
124            fill(&mut buf_k, self.0.seed ^ (ix as u64) ^ 0xCACE_CAFE_CAFE);
125            fill(
126                &mut buf_v,
127                self.0.seed ^ (ix as u64) ^ 0xC0DE_C0DE_C0DE_C0DE,
128            );
129            let k = store.blobs().put(&buf_k)?;
130            let v = store.blobs().put(&buf_v)?;
131            page_digests.push((ix, k, v));
132        }
133        // The cache.manifest blob describes the page layout. Ad-hoc JSON for
134        // the fixture; real format documented in agent_docs/cache-layer.md.
135        let pages_json: Vec<serde_json::Value> = page_digests
136            .iter()
137            .map(|(ix, k, v)| serde_json::json!({"ix": ix, "k": k.as_str(), "v": v.as_str()}))
138            .collect();
139        let manifest_json = serde_json::json!({
140            "layout": "paged-batchinvariant-v1",
141            "page_size_bytes": self.0.page_bytes,
142            "pages": pages_json,
143        });
144        let manifest = store
145            .blobs()
146            .put(serde_json::to_vec(&manifest_json)?.as_slice())?;
147        Ok(LayerDescriptor::Cache(CacheLayer {
148            layout: "paged-batchinvariant-v1".into(),
149            manifest,
150        }))
151    }
152}
153
154/// Synthetic world-layer capture: writes N file blobs + an env blob + a procs
155/// blob (placeholder), then a tree-manifest blob.
156pub struct FixtureWorldCapture(pub FixtureSpec);
157impl LayerCapture for FixtureWorldCapture {
158    fn kind(&self) -> LayerKind {
159        LayerKind::World
160    }
161    fn capture(&self, store: &PfStore) -> Result<LayerDescriptor> {
162        let mut buf = vec![0u8; self.0.world_file_bytes];
163        let mut entries = Vec::with_capacity(self.0.world_files);
164        for i in 0..self.0.world_files {
165            fill(&mut buf, self.0.seed ^ (i as u64) ^ 0xF11E_CAFE);
166            let d = store.blobs().put(&buf)?;
167            // Match the canonical pf_world::FsTreeEntry shape so this
168            // fixture can flow through the Phase-6 merge engine.
169            entries.push(serde_json::json!({
170                "path": format!("src/file_{i:04}.rs"),
171                "mode": "0644",
172                "size": self.0.world_file_bytes,
173                "kind": "file",
174                "blob": d.as_str(),
175            }));
176        }
177        let tree_json = serde_json::json!({
178            "kind": "fs.tree.v1",
179            "entries": entries,
180        });
181        let fs = store.blobs().put(&serde_json::to_vec(&tree_json)?)?;
182        let env = store.blobs().put(
183            serde_json::to_vec(&serde_json::json!({"PWD":"/sandbox","seed":self.0.seed}))?
184                .as_slice(),
185        )?;
186        let procs = store.blobs().put(
187            serde_json::to_vec(&serde_json::json!({"unsupported_on": std::env::consts::OS}))?
188                .as_slice(),
189        )?;
190        Ok(LayerDescriptor::World(WorldLayer { fs, env, procs }))
191    }
192}
193
194/// Synthetic effects-layer capture: writes a JSONL ledger blob.
195pub struct FixtureEffectsCapture(pub FixtureSpec);
196impl LayerCapture for FixtureEffectsCapture {
197    fn kind(&self) -> LayerKind {
198        LayerKind::Effects
199    }
200    fn capture(&self, store: &PfStore) -> Result<LayerDescriptor> {
201        let mut jsonl = Vec::new();
202        // Header line so pf-effects::Ledger::deserialize / pf-merge::merge_effects
203        // recognize this as an effects.ledger.v1 blob.
204        let header = serde_json::json!({
205            "kind": "effects.ledger.v1",
206            "entries": self.0.effects_entries,
207        });
208        jsonl.extend_from_slice(&serde_json::to_vec(&header)?);
209        jsonl.push(b'\n');
210        for i in 0..self.0.effects_entries {
211            let entry = serde_json::json!({
212                "ts": "2026-05-05T14:11:00Z",
213                "tool_id": format!("synth_tool_{}", i % 4),
214                "args_hash": format!("sha256:{:064x}", (self.0.seed ^ (i as u64))),
215                "idempotency_key": format!("01J{:013}", i),
216                "result_hash": format!("sha256:{:064x}", (self.0.seed.wrapping_mul(7) ^ (i as u64))),
217                "side_effect_class": if i % 5 == 0 { "irreversible" } else { "pure" },
218                "session_hmac": "",
219            });
220            jsonl.extend_from_slice(&serde_json::to_vec(&entry)?);
221            jsonl.push(b'\n');
222        }
223        let ledger = store.blobs().put(&jsonl)?;
224        Ok(LayerDescriptor::Effects(EffectsLayer { ledger }))
225    }
226}
227
228/// Synthetic trace-layer capture: writes a JSONL message blob.
229pub struct FixtureTraceCapture(pub FixtureSpec);
230impl LayerCapture for FixtureTraceCapture {
231    fn kind(&self) -> LayerKind {
232        LayerKind::Trace
233    }
234    fn capture(&self, store: &PfStore) -> Result<LayerDescriptor> {
235        let mut jsonl = Vec::new();
236        for i in 0..self.0.trace_messages {
237            let entry = serde_json::json!({
238                "role": if i % 2 == 0 { "user" } else { "assistant" },
239                "content": format!("synthetic message #{i} (seed {})", self.0.seed),
240            });
241            jsonl.extend_from_slice(&serde_json::to_vec(&entry)?);
242            jsonl.push(b'\n');
243        }
244        let messages = store.blobs().put(&jsonl)?;
245        Ok(LayerDescriptor::Trace(TraceLayer { messages }))
246    }
247}