Skip to main content

stryke/
aot.rs

1//! Ahead-of-time build: bake a Perl script into a copy of the running `stryke` binary as
2//! a compressed trailer, producing a self-contained executable.
3//!
4//! Layout (little-endian, appended to the end of a copy of the `stryke` binary):
5//!
6//! ```text
7//!   [elf/mach-o bytes of stryke ...]   (unchanged, still runs as `stryke`)
8//!   [zstd-compressed payload ...]
9//!   [u64 compressed_len]
10//!   [u64 uncompressed_len]
11//!   [u32 version]
12//!   [u32 reserved (0)]
13//!   [8 bytes magic  b"STRYKEAOT"]
14//! ```
15//!
16//! Payload (before zstd compression):
17//!
18//! ```text
19//!   [u32 script_name_len]
20//!   [script_name utf8]
21//!   [source bytes utf8]
22//! ```
23//!
24//! Why source, not bytecode? [`crate::bytecode::Chunk`] holds `Arc<HeapObject>` runtime
25//! values (regex objects, strings, closures, …) that are not serde-ready. Re-parsing a
26//! typical script adds ~1-2 ms to startup which is negligible for a deployment binary.
27//! The trailer format is versioned so a future pre-compiled-bytecode payload can live
28//! alongside v1 without breaking already-shipped binaries.
29//!
30//! ELF (Linux) and Mach-O (macOS) loaders ignore bytes past the program-header-listed
31//! segments, so appending data leaves the original `stryke` fully runnable. On macOS the
32//! resulting binary is unsigned — users distributing signed builds must re-`codesign`.
33
34use std::collections::HashMap;
35use std::fs::{self, File, OpenOptions};
36use std::io::{self, Read, Seek, SeekFrom, Write};
37use std::path::{Path, PathBuf};
38
39/// 8-byte trailer magic (`b"STRK_AOT"`).
40pub const AOT_MAGIC: &[u8; 8] = b"STRK_AOT";
41/// Trailer format version 1: single script.
42pub const AOT_VERSION_V1: u32 = 1;
43/// Trailer format version 2: project bundle with multiple files.
44pub const AOT_VERSION_V2: u32 = 2;
45/// Fixed trailer length in bytes: `8 (cl) + 8 (ul) + 4 (ver) + 4 (rsv) + 8 (magic)`.
46pub const TRAILER_LEN: u64 = 32;
47
48#[derive(Debug, Clone)]
49pub struct EmbeddedScript {
50    /// `__FILE__` / error-reporting name (e.g. `hello.pl`).
51    pub name: String,
52    /// UTF-8 Perl source.
53    pub source: String,
54}
55
56/// A bundled project: main entry point + library files.
57#[derive(Debug, Clone)]
58pub struct EmbeddedBundle {
59    /// Entry point script name (e.g. `main.stk`).
60    pub entry: String,
61    /// All files: path -> source (includes entry + lib files).
62    pub files: HashMap<String, String>,
63}
64
65/// Serialize `(name, source)` into the v1 pre-compression payload format.
66fn encode_payload_v1(name: &str, source: &str) -> Vec<u8> {
67    let mut out = Vec::with_capacity(4 + name.len() + source.len());
68    let name_len = u32::try_from(name.len()).expect("script name length fits in u32");
69    out.extend_from_slice(&name_len.to_le_bytes());
70    out.extend_from_slice(name.as_bytes());
71    out.extend_from_slice(source.as_bytes());
72    out
73}
74
75/// Serialize a project bundle into the v2 pre-compression payload format.
76fn encode_payload_v2(entry: &str, files: &HashMap<String, String>) -> Vec<u8> {
77    let mut out = Vec::new();
78    let file_count = u32::try_from(files.len()).expect("file count fits in u32");
79    out.extend_from_slice(&file_count.to_le_bytes());
80    let entry_len = u32::try_from(entry.len()).expect("entry name length fits in u32");
81    out.extend_from_slice(&entry_len.to_le_bytes());
82    out.extend_from_slice(entry.as_bytes());
83    for (path, source) in files {
84        let path_len = u32::try_from(path.len()).expect("path length fits in u32");
85        out.extend_from_slice(&path_len.to_le_bytes());
86        out.extend_from_slice(path.as_bytes());
87        let source_len = u32::try_from(source.len()).expect("source length fits in u32");
88        out.extend_from_slice(&source_len.to_le_bytes());
89        out.extend_from_slice(source.as_bytes());
90    }
91    out
92}
93
94/// Inverse of [`encode_payload_v1`].
95fn decode_payload_v1(bytes: &[u8]) -> Option<EmbeddedScript> {
96    if bytes.len() < 4 {
97        return None;
98    }
99    let name_len = u32::from_le_bytes(bytes[0..4].try_into().ok()?) as usize;
100    if 4 + name_len > bytes.len() {
101        return None;
102    }
103    let name = std::str::from_utf8(&bytes[4..4 + name_len])
104        .ok()?
105        .to_string();
106    let source = std::str::from_utf8(&bytes[4 + name_len..])
107        .ok()?
108        .to_string();
109    Some(EmbeddedScript { name, source })
110}
111
112/// Inverse of [`encode_payload_v2`].
113fn decode_payload_v2(bytes: &[u8]) -> Option<EmbeddedBundle> {
114    let mut pos = 0usize;
115    if bytes.len() < 8 {
116        return None;
117    }
118    let file_count = u32::from_le_bytes(bytes[pos..pos + 4].try_into().ok()?) as usize;
119    pos += 4;
120    let entry_len = u32::from_le_bytes(bytes[pos..pos + 4].try_into().ok()?) as usize;
121    pos += 4;
122    if pos + entry_len > bytes.len() {
123        return None;
124    }
125    let entry = std::str::from_utf8(&bytes[pos..pos + entry_len])
126        .ok()?
127        .to_string();
128    pos += entry_len;
129    let mut files = HashMap::with_capacity(file_count);
130    for _ in 0..file_count {
131        if pos + 4 > bytes.len() {
132            return None;
133        }
134        let path_len = u32::from_le_bytes(bytes[pos..pos + 4].try_into().ok()?) as usize;
135        pos += 4;
136        if pos + path_len > bytes.len() {
137            return None;
138        }
139        let path = std::str::from_utf8(&bytes[pos..pos + path_len])
140            .ok()?
141            .to_string();
142        pos += path_len;
143        if pos + 4 > bytes.len() {
144            return None;
145        }
146        let source_len = u32::from_le_bytes(bytes[pos..pos + 4].try_into().ok()?) as usize;
147        pos += 4;
148        if pos + source_len > bytes.len() {
149            return None;
150        }
151        let source = std::str::from_utf8(&bytes[pos..pos + source_len])
152            .ok()?
153            .to_string();
154        pos += source_len;
155        files.insert(path, source);
156    }
157    Some(EmbeddedBundle { entry, files })
158}
159
160/// Build a 32-byte trailer referring to `compressed_len` / `uncompressed_len`.
161fn build_trailer(compressed_len: u64, uncompressed_len: u64, version: u32) -> [u8; 32] {
162    let mut trailer = [0u8; 32];
163    trailer[0..8].copy_from_slice(&compressed_len.to_le_bytes());
164    trailer[8..16].copy_from_slice(&uncompressed_len.to_le_bytes());
165    trailer[16..20].copy_from_slice(&version.to_le_bytes());
166    // 20..24 reserved (zeros).
167    trailer[24..32].copy_from_slice(AOT_MAGIC);
168    trailer
169}
170
171/// Append a compressed v1 script payload to an existing file.
172pub fn append_embedded_script(out_path: &Path, name: &str, source: &str) -> io::Result<()> {
173    let payload = encode_payload_v1(name, source);
174    let compressed = zstd::stream::encode_all(&payload[..], 3)?;
175    let mut f = OpenOptions::new().append(true).open(out_path)?;
176    f.write_all(&compressed)?;
177    let trailer = build_trailer(
178        compressed.len() as u64,
179        payload.len() as u64,
180        AOT_VERSION_V1,
181    );
182    f.write_all(&trailer)?;
183    f.sync_all()?;
184    Ok(())
185}
186
187/// Append a compressed v2 bundle payload to an existing file.
188pub fn append_embedded_bundle(
189    out_path: &Path,
190    entry: &str,
191    files: &HashMap<String, String>,
192) -> io::Result<()> {
193    let payload = encode_payload_v2(entry, files);
194    let compressed = zstd::stream::encode_all(&payload[..], 3)?;
195    let mut f = OpenOptions::new().append(true).open(out_path)?;
196    f.write_all(&compressed)?;
197    let trailer = build_trailer(
198        compressed.len() as u64,
199        payload.len() as u64,
200        AOT_VERSION_V2,
201    );
202    f.write_all(&trailer)?;
203    f.sync_all()?;
204    Ok(())
205}
206
207/// Result of loading an embedded payload — either a single script (v1) or a bundle (v2).
208#[derive(Debug, Clone)]
209pub enum EmbeddedPayload {
210    Script(EmbeddedScript),
211    Bundle(EmbeddedBundle),
212}
213
214/// Fast probe: read the last 32 bytes of `exe` and return the embedded payload if present.
215/// Supports both v1 (single script) and v2 (project bundle) formats.
216pub fn try_load_embedded(exe: &Path) -> Option<EmbeddedPayload> {
217    let mut f = File::open(exe).ok()?;
218    let size = f.metadata().ok()?.len();
219    if size < TRAILER_LEN {
220        return None;
221    }
222    f.seek(SeekFrom::End(-(TRAILER_LEN as i64))).ok()?;
223    let mut trailer = [0u8; TRAILER_LEN as usize];
224    f.read_exact(&mut trailer).ok()?;
225    if &trailer[24..32] != AOT_MAGIC {
226        return None;
227    }
228    let compressed_len = u64::from_le_bytes(trailer[0..8].try_into().ok()?);
229    let uncompressed_len = u64::from_le_bytes(trailer[8..16].try_into().ok()?);
230    let version = u32::from_le_bytes(trailer[16..20].try_into().ok()?);
231    if compressed_len == 0 || compressed_len > size - TRAILER_LEN {
232        return None;
233    }
234    let payload_start = size - TRAILER_LEN - compressed_len;
235    f.seek(SeekFrom::Start(payload_start)).ok()?;
236    let mut compressed = vec![0u8; compressed_len as usize];
237    f.read_exact(&mut compressed).ok()?;
238    let payload = zstd::stream::decode_all(&compressed[..]).ok()?;
239    if payload.len() != uncompressed_len as usize {
240        return None;
241    }
242    match version {
243        AOT_VERSION_V1 => decode_payload_v1(&payload).map(EmbeddedPayload::Script),
244        AOT_VERSION_V2 => decode_payload_v2(&payload).map(EmbeddedPayload::Bundle),
245        _ => None,
246    }
247}
248
249/// Legacy: load v1 single script only (for backward compat).
250pub fn try_load_embedded_script(exe: &Path) -> Option<EmbeddedScript> {
251    match try_load_embedded(exe)? {
252        EmbeddedPayload::Script(s) => Some(s),
253        EmbeddedPayload::Bundle(b) => {
254            let source = b.files.get(&b.entry)?.clone();
255            Some(EmbeddedScript {
256                name: b.entry,
257                source,
258            })
259        }
260    }
261}
262
263/// `stryke build SCRIPT -o OUT`:
264/// 1. Read and parse-validate SCRIPT (surfacing syntax errors at build time, not at user run time).
265/// 2. Copy the currently-running `stryke` binary to OUT.
266/// 3. Append a compressed-source trailer.
267/// 4. `chmod +x` the result on unix.
268///
269/// Errors are returned as human-readable strings; the caller prints and sets an exit code.
270pub fn build(script_path: &Path, out_path: &Path) -> Result<PathBuf, String> {
271    let source = fs::read_to_string(script_path)
272        .map_err(|e| format!("stryke build: cannot read {}: {}", script_path.display(), e))?;
273    let script_name = script_path
274        .file_name()
275        .and_then(|s| s.to_str())
276        .unwrap_or("script.pl")
277        .to_string();
278
279    crate::parse_with_file(&source, &script_name).map_err(|e| format!("{}", e))?;
280
281    let exe = std::env::current_exe()
282        .map_err(|e| format!("stryke build: locating current executable: {}", e))?;
283
284    copy_exe_without_trailer(&exe, out_path).map_err(|e| {
285        format!(
286            "stryke build: copy {} -> {}: {}",
287            exe.display(),
288            out_path.display(),
289            e
290        )
291    })?;
292
293    append_embedded_script(out_path, &script_name, &source)
294        .map_err(|e| format!("stryke build: write trailer: {}", e))?;
295
296    set_executable(out_path);
297    Ok(out_path.to_path_buf())
298}
299
300/// Collect all `.stk` and `.pl` files from a directory, excluding `t/` (tests).
301fn collect_project_files(project_dir: &Path) -> io::Result<HashMap<String, String>> {
302    let mut files = HashMap::new();
303    fn visit(dir: &Path, base: &Path, files: &mut HashMap<String, String>) -> io::Result<()> {
304        for entry in fs::read_dir(dir)? {
305            let entry = entry?;
306            let path = entry.path();
307            let rel = path.strip_prefix(base).unwrap_or(&path);
308            let rel_str = rel.to_string_lossy();
309            if rel_str.starts_with("t/") || rel_str.starts_with("t\\") || rel_str == "t" {
310                continue;
311            }
312            if path.is_dir() {
313                visit(&path, base, files)?;
314            } else if let Some(ext) = path.extension() {
315                if ext == "stk" || ext == "pl" {
316                    let source = fs::read_to_string(&path)?;
317                    files.insert(rel.to_string_lossy().replace('\\', "/"), source);
318                }
319            }
320        }
321        Ok(())
322    }
323    visit(project_dir, project_dir, &mut files)?;
324    Ok(files)
325}
326
327/// `stryke build --project DIR -o OUT`:
328/// Bundle main.stk + lib/*.stk (excluding t/) into a single executable.
329pub fn build_project(project_dir: &Path, out_path: &Path) -> Result<PathBuf, String> {
330    let entry_path = project_dir.join("main.stk");
331    if !entry_path.exists() {
332        return Err(format!(
333            "stryke build: project directory {} has no main.stk",
334            project_dir.display()
335        ));
336    }
337
338    let files = collect_project_files(project_dir)
339        .map_err(|e| format!("stryke build: scanning project: {}", e))?;
340
341    eprintln!(
342        "stryke build: bundling {} files from {}",
343        files.len(),
344        project_dir.display()
345    );
346    for path in files.keys() {
347        eprintln!("  {}", path);
348    }
349
350    for (path, source) in &files {
351        crate::parse_with_file(source, path).map_err(|e| format!("{}", e))?;
352    }
353
354    let exe = std::env::current_exe()
355        .map_err(|e| format!("stryke build: locating current executable: {}", e))?;
356
357    copy_exe_without_trailer(&exe, out_path).map_err(|e| {
358        format!(
359            "stryke build: copy {} -> {}: {}",
360            exe.display(),
361            out_path.display(),
362            e
363        )
364    })?;
365
366    append_embedded_bundle(out_path, "main.stk", &files)
367        .map_err(|e| format!("stryke build: write trailer: {}", e))?;
368
369    set_executable(out_path);
370    Ok(out_path.to_path_buf())
371}
372
373#[cfg(unix)]
374fn set_executable(path: &Path) {
375    use std::os::unix::fs::PermissionsExt;
376    if let Ok(meta) = fs::metadata(path) {
377        let mut p = meta.permissions();
378        p.set_mode(p.mode() | 0o111);
379        let _ = fs::set_permissions(path, p);
380    }
381}
382
383#[cfg(not(unix))]
384fn set_executable(_path: &Path) {}
385
386/// Copy `src` to `dst`, skipping any existing AOT trailer on `src`. Prevents nested builds
387/// from stacking trailers: `stryke build a.pl -o a && stryke --exe a build b.pl -o b` would otherwise
388/// embed both scripts, one on top of the other.
389fn copy_exe_without_trailer(src: &Path, dst: &Path) -> io::Result<()> {
390    let mut sf = File::open(src)?;
391    let size = sf.metadata()?.len();
392    let keep = if size >= TRAILER_LEN {
393        sf.seek(SeekFrom::End(-(TRAILER_LEN as i64)))?;
394        let mut trailer = [0u8; TRAILER_LEN as usize];
395        if sf.read_exact(&mut trailer).is_ok() && &trailer[24..32] == AOT_MAGIC {
396            let compressed_len = u64::from_le_bytes(trailer[0..8].try_into().unwrap());
397            if compressed_len > 0 && compressed_len <= size - TRAILER_LEN {
398                size - TRAILER_LEN - compressed_len
399            } else {
400                size
401            }
402        } else {
403            size
404        }
405    } else {
406        size
407    };
408    sf.seek(SeekFrom::Start(0))?;
409    // Remove any existing destination first so `fs::copy`-like behaviour is atomic from the
410    // caller's point of view and we never open the running destination for truncation.
411    let _ = fs::remove_file(dst);
412    let mut df = File::create(dst)?;
413    let mut remaining = keep;
414    let mut buf = vec![0u8; 64 * 1024];
415    while remaining > 0 {
416        let n = std::cmp::min(remaining as usize, buf.len());
417        sf.read_exact(&mut buf[..n])?;
418        df.write_all(&buf[..n])?;
419        remaining -= n as u64;
420    }
421    df.sync_all()?;
422    Ok(())
423}
424
425#[cfg(test)]
426mod tests {
427    use super::*;
428
429    fn tmp_path(tag: &str) -> PathBuf {
430        let dir = std::env::temp_dir();
431        dir.join(format!(
432            "stryke-aot-test-{}-{}-{}",
433            std::process::id(),
434            tag,
435            rand::random::<u32>()
436        ))
437    }
438
439    #[test]
440    fn payload_roundtrips_name_and_source() {
441        let payload = encode_payload_v1("hello.pl", "print \"hi\\n\";\n");
442        let decoded = decode_payload_v1(&payload).expect("decode");
443        assert_eq!(decoded.name, "hello.pl");
444        assert_eq!(decoded.source, "print \"hi\\n\";\n");
445    }
446
447    #[test]
448    fn append_and_load_trailer_roundtrips_on_plain_file() {
449        let path = tmp_path("roundtrip");
450        // Pretend this is a `stryke` binary: write a non-empty prefix so trailer math is exercised.
451        fs::write(
452            &path,
453            b"not really an ELF, but good enough for trailer tests",
454        )
455        .unwrap();
456        append_embedded_script(&path, "script.pl", "my $x = 1 + 2;").unwrap();
457        let loaded = try_load_embedded(&path).expect("load");
458        match loaded {
459            EmbeddedPayload::Script(s) => {
460                assert_eq!(s.name, "script.pl");
461                assert_eq!(s.source, "my $x = 1 + 2;");
462            }
463            EmbeddedPayload::Bundle(_) => panic!("expected Script, got Bundle"),
464        }
465        fs::remove_file(&path).ok();
466    }
467
468    #[test]
469    fn load_returns_none_for_file_without_trailer() {
470        let path = tmp_path("no-trailer");
471        fs::write(&path, b"plain binary, no magic").unwrap();
472        assert!(try_load_embedded(&path).is_none());
473        fs::remove_file(&path).ok();
474    }
475
476    #[test]
477    fn load_returns_none_for_short_file() {
478        let path = tmp_path("short");
479        fs::write(&path, b"abc").unwrap();
480        assert!(try_load_embedded(&path).is_none());
481        fs::remove_file(&path).ok();
482    }
483
484    #[test]
485    fn copy_without_trailer_strips_embedded_script() {
486        let src = tmp_path("src");
487        let mid = tmp_path("mid");
488        let dst = tmp_path("dst");
489        fs::write(&src, b"pretend stryke binary bytes").unwrap();
490        // Layer 1: embed script_a.
491        fs::copy(&src, &mid).unwrap();
492        append_embedded_script(&mid, "a.pl", "p 1;").unwrap();
493        // Layer 2: strip + embed script_b — should yield only script_b.
494        copy_exe_without_trailer(&mid, &dst).unwrap();
495        append_embedded_script(&dst, "b.pl", "p 2;").unwrap();
496        let loaded = try_load_embedded(&dst).expect("load layer 2");
497        match loaded {
498            EmbeddedPayload::Script(s) => {
499                assert_eq!(s.name, "b.pl");
500                assert_eq!(s.source, "p 2;");
501            }
502            EmbeddedPayload::Bundle(_) => panic!("expected Script, got Bundle"),
503        }
504        // Compare stripped prefix to original: they must match byte-for-byte.
505        let original = fs::read(&src).unwrap();
506        let mut stripped_dst = fs::read(&dst).unwrap();
507        stripped_dst.truncate(original.len());
508        assert_eq!(stripped_dst, original);
509        fs::remove_file(&src).ok();
510        fs::remove_file(&mid).ok();
511        fs::remove_file(&dst).ok();
512    }
513
514    #[test]
515    fn bad_magic_is_ignored() {
516        let path = tmp_path("bad-magic");
517        let mut bytes = vec![0u8; 200];
518        // Write 32 bytes that look like a trailer but with wrong magic at the end.
519        let tail = &mut bytes[200 - 32..];
520        tail[0..8].copy_from_slice(&10u64.to_le_bytes()); // compressed_len claims 10
521        tail[8..16].copy_from_slice(&20u64.to_le_bytes());
522        tail[16..20].copy_from_slice(&1u32.to_le_bytes());
523        tail[24..32].copy_from_slice(b"NOTPERLZ");
524        fs::write(&path, &bytes).unwrap();
525        assert!(try_load_embedded(&path).is_none());
526        fs::remove_file(&path).ok();
527    }
528}