Skip to main content

stryke/
pec.rs

1//! On-disk bytecode bundles (`.pec`): serialized [`crate::ast::Program`] + [`crate::bytecode::Chunk`]
2//! for warm starts without re-parsing or re-compiling when `STRYKE_BC_CACHE=1`.
3
4use std::fs;
5use std::io::{self, Write};
6use std::path::PathBuf;
7
8use serde::{Deserialize, Deserializer, Serialize, Serializer};
9use sha2::{Digest, Sha256};
10
11use crate::ast::Program;
12use crate::bytecode::Chunk;
13use crate::error::{PerlError, PerlResult};
14use crate::value::PerlValue;
15
16/// `STRYKE_BC_CACHE=1` enables read-through `.pec` in [`cache_dir`] / `<sha256>.pec`.
17pub fn cache_enabled() -> bool {
18    matches!(
19        std::env::var("STRYKE_BC_CACHE").as_deref(),
20        Ok("1") | Ok("true") | Ok("yes")
21    )
22}
23
24/// `~/.cache/stryke/bc` (or `$STRYKE_BC_DIR` override).
25pub fn cache_dir() -> PathBuf {
26    if let Ok(p) = std::env::var("STRYKE_BC_DIR") {
27        return PathBuf::from(p);
28    }
29    let home = std::env::var_os("HOME")
30        .or_else(|| std::env::var_os("USERPROFILE"))
31        .unwrap_or_default();
32    PathBuf::from(home).join(".cache").join("stryke").join("bc")
33}
34
35/// Fingerprint for cache key (includes crate version, strict flag, path, and source).
36pub fn source_fingerprint(strict_vars: bool, source_file: &str, code: &str) -> [u8; 32] {
37    let mut h = Sha256::new();
38    h.update(env!("CARGO_PKG_VERSION").as_bytes());
39    h.update([0u8]);
40    h.update([strict_vars as u8]);
41    h.update([0u8]);
42    h.update(source_file.as_bytes());
43    h.update([0u8]);
44    h.update(code.as_bytes());
45    h.finalize().into()
46}
47
48pub fn cache_path_for_fingerprint(fp: &[u8; 32]) -> PathBuf {
49    cache_dir().join(format!(
50        "{:x}.pec",
51        u128::from_be_bytes(fp[0..16].try_into().unwrap())
52    ))
53}
54
55/// Hex form of full32-byte fingerprint (collision-safe filename).
56pub fn cache_path_hex(fp: &[u8; 32]) -> PathBuf {
57    cache_dir().join(format!("{}_{:x}.pec", hex::encode(fp), fp[0] as u32))
58}
59
60fn cache_path(fp: &[u8; 32]) -> PathBuf {
61    cache_dir().join(format!("{}.pec", hex::encode(fp)))
62}
63
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct PecBundle {
66    pub format_version: u32,
67    pub pointer_width: u8,
68    pub strict_vars: bool,
69    pub source_fingerprint: [u8; 32],
70    pub program: Program,
71    pub chunk: Chunk,
72}
73
74impl PecBundle {
75    /// Bumped from `1` to `2` when zstd compression was added — v1 readers will reject
76    /// v2 files and vice versa, so mixed-version caches are a clean miss (re-compile)
77    /// rather than a corrupt-decode error.
78    pub const FORMAT_VERSION: u32 = 2;
79    pub const MAGIC: [u8; 4] = *b"PEC2";
80    /// zstd compression level for the embedded payload. Level 3 is the sweet spot for
81    /// serialized bytecode: ~10× shrink ratio, compression ~2× faster than level 1 decode.
82    const ZSTD_LEVEL: i32 = 3;
83
84    pub fn new(strict_vars: bool, fp: [u8; 32], program: Program, chunk: Chunk) -> Self {
85        Self {
86            format_version: Self::FORMAT_VERSION,
87            pointer_width: std::mem::size_of::<usize>() as u8,
88            strict_vars,
89            source_fingerprint: fp,
90            program,
91            chunk,
92        }
93    }
94
95    pub fn encode(&self) -> PerlResult<Vec<u8>> {
96        let mut out = Vec::new();
97        out.extend_from_slice(&Self::MAGIC);
98        let payload = bincode::serialize(self)
99            .map_err(|e| PerlError::runtime(format!("pec: bincode serialize failed: {e}"), 0))?;
100        let compressed = zstd::stream::encode_all(&payload[..], Self::ZSTD_LEVEL)
101            .map_err(|e| PerlError::runtime(format!("pec: zstd encode failed: {e}"), 0))?;
102        out.extend_from_slice(&compressed);
103        Ok(out)
104    }
105
106    pub fn decode(bytes: &[u8]) -> PerlResult<Self> {
107        if bytes.len() < 4 + 8 {
108            return Err(PerlError::runtime("pec: file too small", 0));
109        }
110        if bytes[0..4] != Self::MAGIC {
111            return Err(PerlError::runtime("pec: bad magic", 0));
112        }
113        let payload = zstd::stream::decode_all(&bytes[4..])
114            .map_err(|e| PerlError::runtime(format!("pec: zstd decode failed: {e}"), 0))?;
115        let bundle: PecBundle = bincode::deserialize(&payload)
116            .map_err(|e| PerlError::runtime(format!("pec: bincode deserialize failed: {e}"), 0))?;
117        if bundle.format_version != Self::FORMAT_VERSION {
118            return Err(PerlError::runtime(
119                format!(
120                    "pec: unsupported format_version {} (expected {})",
121                    bundle.format_version,
122                    Self::FORMAT_VERSION
123                ),
124                0,
125            ));
126        }
127        if bundle.pointer_width != std::mem::size_of::<usize>() as u8 {
128            return Err(PerlError::runtime(
129                format!(
130                    "pec: pointer_width mismatch (file {} vs host {})",
131                    bundle.pointer_width,
132                    std::mem::size_of::<usize>()
133                ),
134                0,
135            ));
136        }
137        Ok(bundle)
138    }
139}
140
141/// Try load a bundle; `expected_fp` must match embedded fingerprint.
142pub fn try_load(expected_fp: &[u8; 32], strict_vars: bool) -> PerlResult<Option<PecBundle>> {
143    let path = cache_path(expected_fp);
144    let bytes = match fs::read(&path) {
145        Ok(b) => b,
146        Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(None),
147        Err(e) => {
148            return Err(PerlError::runtime(
149                format!("pec: read {}: {e}", path.display()),
150                0,
151            ))
152        }
153    };
154    let bundle = PecBundle::decode(&bytes)?;
155    if bundle.source_fingerprint != *expected_fp {
156        return Ok(None);
157    }
158    if bundle.strict_vars != strict_vars {
159        return Ok(None);
160    }
161    Ok(Some(bundle))
162}
163
164pub fn try_save(bundle: &PecBundle) -> PerlResult<()> {
165    let dir = cache_dir();
166    fs::create_dir_all(&dir).map_err(|e| {
167        PerlError::runtime(format!("pec: create_dir_all {}: {e}", dir.display()), 0)
168    })?;
169    let path = cache_path(&bundle.source_fingerprint);
170    let data = bundle.encode()?;
171    let tmp = path.with_extension("pec.tmp");
172    let mut f = fs::File::create(&tmp)
173        .map_err(|e| PerlError::runtime(format!("pec: create {}: {e}", tmp.display()), 0))?;
174    f.write_all(&data)
175        .map_err(|e| PerlError::runtime(format!("pec: write {}: {e}", tmp.display()), 0))?;
176    drop(f);
177    fs::rename(&tmp, &path).map_err(|e| {
178        PerlError::runtime(
179            format!("pec: rename {} -> {}: {e}", tmp.display(), path.display()),
180            0,
181        )
182    })?;
183    Ok(())
184}
185
186// ── Constant pool (Chunk.constants): only immediate-ish literals are allowed in .pec ───────────
187
188#[derive(Debug, Clone, Serialize, Deserialize)]
189#[serde(rename_all = "snake_case")]
190enum PecConst {
191    Undef,
192    Int(i64),
193    Float(f64),
194    Str(String),
195}
196
197fn pec_const_from_perl(v: &PerlValue) -> Result<PecConst, String> {
198    if v.is_undef() {
199        return Ok(PecConst::Undef);
200    }
201    if let Some(n) = v.as_integer() {
202        return Ok(PecConst::Int(n));
203    }
204    if let Some(f) = v.as_float() {
205        return Ok(PecConst::Float(f));
206    }
207    if let Some(s) = v.as_str() {
208        return Ok(PecConst::Str(s.to_string()));
209    }
210    Err(format!(
211        "constant pool value cannot be stored in .pec (type {})",
212        v.ref_type()
213    ))
214}
215
216fn perl_from_pec_const(c: PecConst) -> PerlValue {
217    match c {
218        PecConst::Undef => PerlValue::UNDEF,
219        PecConst::Int(n) => PerlValue::integer(n),
220        PecConst::Float(f) => PerlValue::float(f),
221        PecConst::Str(s) => PerlValue::string(s),
222    }
223}
224
225pub mod constants_pool_codec {
226    use super::*;
227
228    pub fn serialize<S>(values: &Vec<PerlValue>, ser: S) -> Result<S::Ok, S::Error>
229    where
230        S: Serializer,
231    {
232        let mut out = Vec::with_capacity(values.len());
233        for v in values {
234            let c = pec_const_from_perl(v).map_err(serde::ser::Error::custom)?;
235            out.push(c);
236        }
237        out.serialize(ser)
238    }
239
240    pub fn deserialize<'de, D>(de: D) -> Result<Vec<PerlValue>, D::Error>
241    where
242        D: Deserializer<'de>,
243    {
244        let v: Vec<PecConst> = Vec::deserialize(de)?;
245        Ok(v.into_iter().map(perl_from_pec_const).collect())
246    }
247}
248
249/// Remove mistaken duplicate helper filenames if any (no-op for normal paths).
250#[allow(dead_code)]
251pub fn pec_paths_legacy(_fp: &[u8; 32]) -> (PathBuf, PathBuf) {
252    (cache_path_for_fingerprint(_fp), cache_path_hex(_fp))
253}
254
255#[cfg(test)]
256mod tests {
257    use super::*;
258    use crate::compiler::Compiler;
259    use crate::interpreter::Interpreter;
260
261    #[test]
262    fn pec_round_trip_bundle_encode_decode() {
263        let code = "my $x = 40 + 2; $x";
264        let program = crate::parse(code).expect("parse");
265        let mut interp = Interpreter::new();
266        interp.prepare_program_top_level(&program).expect("prep");
267        let chunk = Compiler::new()
268            .with_source_file("-e".into())
269            .compile_program(&program)
270            .expect("compile");
271        let fp = source_fingerprint(false, "-e", code);
272        let bundle = PecBundle::new(false, fp, program, chunk);
273        let bytes = bundle.encode().expect("encode");
274        let got = PecBundle::decode(&bytes).expect("decode");
275        assert_eq!(got.source_fingerprint, fp);
276        assert_eq!(got.chunk.ops.len(), bundle.chunk.ops.len());
277    }
278}