Skip to main content

fidius_core/
package.rs

1// Copyright 2026 Colliery, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Source package manifest types and parsing.
16//!
17//! A package is a directory containing plugin source code and a `package.toml`
18//! manifest. The manifest has a fixed header (name, version, interface) and
19//! an extensible `[metadata]` section validated via serde against a
20//! host-defined schema type.
21
22use serde::de::DeserializeOwned;
23use serde::{Deserialize, Serialize};
24use std::path::{Path, PathBuf};
25
26/// A parsed package manifest, generic over the host-defined metadata schema.
27///
28/// The `M` type parameter is the host's metadata schema. If the `[metadata]`
29/// section of `package.toml` doesn't deserialize into `M`, parsing fails —
30/// this is how schema validation works.
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct PackageManifest<M> {
33    /// Fixed header fields required by fidius.
34    pub package: PackageHeader,
35    /// Host-defined metadata. Must deserialize from the `[metadata]` section.
36    pub metadata: M,
37    /// Python-runtime fields. Required when `package.runtime == "python"`,
38    /// rejected otherwise. Validated by [`PackageManifest::validate_runtime`]
39    /// after deserialization, since serde alone can't enforce cross-section
40    /// invariants.
41    #[serde(default, skip_serializing_if = "Option::is_none")]
42    pub python: Option<PythonPackageMeta>,
43    /// WASM-component fields. Required when `package.runtime == "wasm"`,
44    /// rejected otherwise. Validated by [`PackageManifest::validate_runtime`].
45    #[serde(default, skip_serializing_if = "Option::is_none")]
46    pub wasm: Option<WasmPackageMeta>,
47}
48
49impl<M> PackageManifest<M> {
50    /// Cross-section validation: runtime + python section must agree.
51    ///
52    /// - `runtime = "rust"` (or absent → "rust") with a `[python]` section is rejected.
53    /// - `runtime = "python"` without a `[python]` section is rejected.
54    /// - Unknown runtime values are rejected (forward compat: a future
55    ///   `runtime = "node"` package shouldn't silently fall back to rust).
56    pub fn validate_runtime(&self) -> Result<(), PackageError> {
57        let runtime = self.package.runtime();
58        match runtime {
59            PackageRuntime::Rust => {
60                if self.python.is_some() {
61                    return Err(PackageError::InvalidManifest(
62                        "[python] section is only valid when runtime = \"python\"".into(),
63                    ));
64                }
65                if self.wasm.is_some() {
66                    return Err(PackageError::InvalidManifest(
67                        "[wasm] section is only valid when runtime = \"wasm\"".into(),
68                    ));
69                }
70                Ok(())
71            }
72            PackageRuntime::Python => {
73                if self.python.is_none() {
74                    return Err(PackageError::InvalidManifest(
75                        "runtime = \"python\" requires a [python] section with `entry_module`"
76                            .into(),
77                    ));
78                }
79                if self.wasm.is_some() {
80                    return Err(PackageError::InvalidManifest(
81                        "[wasm] section is only valid when runtime = \"wasm\"".into(),
82                    ));
83                }
84                Ok(())
85            }
86            PackageRuntime::Wasm => {
87                if self.python.is_some() {
88                    return Err(PackageError::InvalidManifest(
89                        "[python] section is only valid when runtime = \"python\"".into(),
90                    ));
91                }
92                if self.wasm.is_none() {
93                    return Err(PackageError::InvalidManifest(
94                        "runtime = \"wasm\" requires a [wasm] section with `component`".into(),
95                    ));
96                }
97                Ok(())
98            }
99        }
100    }
101}
102
103/// Fixed header fields that every package manifest must have.
104#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct PackageHeader {
106    /// Package name (e.g., `"blur-filter"`).
107    pub name: String,
108    /// Package version (e.g., `"1.2.0"`).
109    pub version: String,
110    /// Name of the interface crate this package implements.
111    pub interface: String,
112    /// Expected interface version.
113    pub interface_version: u32,
114    /// Custom file extension for `.fid` archives (e.g., `"cloacina"`).
115    /// Defaults to `"fid"` when absent.
116    #[serde(default, skip_serializing_if = "Option::is_none")]
117    pub extension: Option<String>,
118    /// Plugin runtime. `"rust"` (default) → cdylib; `"python"` → Python package
119    /// loaded by `fidius-python`. Unknown values are rejected at validation
120    /// time (see [`PackageManifest::validate_runtime`]).
121    #[serde(default, skip_serializing_if = "Option::is_none")]
122    pub runtime: Option<String>,
123}
124
125impl PackageHeader {
126    /// Returns the package extension, defaulting to `"fid"`.
127    pub fn extension(&self) -> &str {
128        self.extension.as_deref().unwrap_or("fid")
129    }
130
131    /// Returns the runtime kind, defaulting to `Rust` when absent. Returns
132    /// `PackageRuntime::Rust` for unknown values; callers that need to reject
133    /// unknown runtimes should use [`Self::runtime_strict`].
134    pub fn runtime(&self) -> PackageRuntime {
135        match self.runtime.as_deref() {
136            None | Some("rust") => PackageRuntime::Rust,
137            Some("python") => PackageRuntime::Python,
138            Some("wasm") => PackageRuntime::Wasm,
139            // Unknown values fall back to Rust for `runtime()`, but the
140            // strict validator rejects them. Keep the lenient form so display
141            // code never panics on an unfamiliar manifest.
142            _ => PackageRuntime::Rust,
143        }
144    }
145
146    /// Returns the runtime kind, erroring on unknown values.
147    pub fn runtime_strict(&self) -> Result<PackageRuntime, PackageError> {
148        match self.runtime.as_deref() {
149            None | Some("rust") => Ok(PackageRuntime::Rust),
150            Some("python") => Ok(PackageRuntime::Python),
151            Some("wasm") => Ok(PackageRuntime::Wasm),
152            Some(other) => Err(PackageError::InvalidManifest(format!(
153                "unknown runtime '{other}': allowed values are \"rust\", \"python\", \"wasm\""
154            ))),
155        }
156    }
157}
158
159/// Plugin runtime kind. Determines which loader the host's `PluginHost`
160/// dispatches to.
161#[derive(Debug, Clone, Copy, PartialEq, Eq)]
162pub enum PackageRuntime {
163    /// Default. Plugin is a cdylib + `PluginRegistry`. Loaded by the existing
164    /// dylib loader in `fidius-host`.
165    Rust,
166    /// Plugin is a directory of `.py` files (+ optional `vendor/`) loaded by
167    /// `fidius-python` via an embedded interpreter. Requires the host crate
168    /// to enable the `python` feature.
169    Python,
170    /// Plugin is a signed `.wasm` **component** (Component Model + WIT),
171    /// loaded by the `WasmComponentExecutor`. Reserved by FIDIUS-I-0021 Phase 1;
172    /// the loader lands in Phase 2 (until then, loading a wasm package errors
173    /// clearly rather than silently falling back to rust).
174    Wasm,
175}
176
177impl PackageRuntime {
178    /// Returns the canonical string form used in `package.toml`.
179    pub fn as_str(&self) -> &'static str {
180        match self {
181            PackageRuntime::Rust => "rust",
182            PackageRuntime::Python => "python",
183            PackageRuntime::Wasm => "wasm",
184        }
185    }
186}
187
188impl std::fmt::Display for PackageRuntime {
189    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
190        f.write_str(self.as_str())
191    }
192}
193
194/// Fields under the `[python]` section of `package.toml`. Required when
195/// `package.runtime == "python"`, rejected otherwise.
196#[derive(Debug, Clone, Serialize, Deserialize)]
197pub struct PythonPackageMeta {
198    /// Python module the loader imports first. Dotted-path form (e.g.
199    /// `"my_plugin.entry"`) corresponding to a file inside the package
200    /// directory or its `vendor/` tree.
201    pub entry_module: String,
202    /// Path to the requirements file consumed by `fidius pack` to vendor
203    /// dependencies into `vendor/`. Defaults to `"requirements.txt"`.
204    #[serde(default, skip_serializing_if = "Option::is_none")]
205    pub requirements: Option<String>,
206}
207
208/// Fields under the `[wasm]` section of `package.toml`. Required when
209/// `package.runtime == "wasm"`, rejected otherwise.
210#[derive(Debug, Clone, Serialize, Deserialize)]
211pub struct WasmPackageMeta {
212    /// Component filename inside the package directory (e.g. `"plugin.wasm"`).
213    /// A WIT component, not a core module.
214    pub component: String,
215    /// Optional precompiled `.cwasm` (produced at pack time by the wasmtime
216    /// engine; engine/version-specific). When present and valid, the loader
217    /// uses the AOT fast path instead of JIT-compiling `component`.
218    #[serde(default, skip_serializing_if = "Option::is_none")]
219    pub precompiled: Option<String>,
220    /// WASI capability allow-list (e.g. `["clocks", "random", "sockets"]`).
221    /// Empty = deny-all sandbox. Consumed by the capability policy (T-0104);
222    /// filesystem is never granted in v1.
223    #[serde(default, skip_serializing_if = "Vec::is_empty")]
224    pub capabilities: Vec<String>,
225}
226
227impl PythonPackageMeta {
228    /// Returns the requirements file path, defaulting to `"requirements.txt"`.
229    pub fn requirements_path(&self) -> &str {
230        self.requirements.as_deref().unwrap_or("requirements.txt")
231    }
232}
233
234/// Errors that can occur when loading a package manifest.
235#[derive(Debug, thiserror::Error)]
236pub enum PackageError {
237    /// The `package.toml` file was not found in the given directory.
238    #[error("package.toml not found in {path}")]
239    ManifestNotFound { path: String },
240
241    /// The manifest file could not be parsed as valid TOML or failed
242    /// schema validation (the `[metadata]` section didn't match `M`).
243    #[error("failed to parse package.toml: {0}")]
244    ParseError(#[from] toml::de::Error),
245
246    /// An I/O error occurred reading the manifest file.
247    #[error("io error reading package.toml: {0}")]
248    Io(#[from] std::io::Error),
249
250    /// Build failed.
251    #[error("package build failed: {0}")]
252    BuildFailed(String),
253
254    /// Package signature file not found.
255    #[error("package.sig not found in {path}")]
256    SignatureNotFound { path: String },
257
258    /// Package signature is invalid (no trusted key verified it).
259    #[error("package signature invalid for {path}")]
260    SignatureInvalid { path: String },
261
262    /// An error occurred creating or reading an archive.
263    #[error("archive error: {0}")]
264    ArchiveError(String),
265
266    /// The archive does not contain a valid package.
267    #[error("invalid archive: {0}")]
268    InvalidArchive(String),
269
270    /// Manifest passed serde parsing but failed cross-section validation
271    /// (e.g. `runtime = "python"` without a `[python]` section, or unknown
272    /// runtime value).
273    #[error("invalid manifest: {0}")]
274    InvalidManifest(String),
275
276    /// Archive entry contains a `..` component that would escape `dest`.
277    #[error("archive entry '{entry}' contains '..' component — rejected")]
278    PathTraversal { entry: String },
279
280    /// Archive entry has an absolute path (root or drive prefix).
281    #[error("archive entry '{entry}' is an absolute path — rejected")]
282    AbsolutePath { entry: String },
283
284    /// Archive contains a symlink entry, which could be used to overwrite
285    /// arbitrary files outside `dest` on a follow-up write.
286    #[error("archive entry '{entry}' is a symlink — rejected")]
287    SymlinkRejected { entry: String },
288
289    /// Archive contains a hardlink entry, same threat model as symlinks.
290    #[error("archive entry '{entry}' is a hardlink — rejected")]
291    HardlinkRejected { entry: String },
292
293    /// Cumulative decompressed size exceeded the configured cap.
294    #[error("archive decompressed size {actual} exceeds limit of {limit} bytes")]
295    SizeLimitExceeded { limit: u64, actual: u64 },
296
297    /// Archive contains more entries than the configured cap allows.
298    #[error("archive contains more than {limit} entries — rejected")]
299    TooManyEntries { limit: u32 },
300}
301
302/// Options controlling archive extraction safety limits.
303///
304/// Construct with `UnpackOptions::default()` for strict defaults suitable for
305/// untrusted input. Override individual fields for known-trusted archives that
306/// legitimately exceed the default caps (e.g. packages that vendor large
307/// native dependencies).
308#[derive(Debug, Clone)]
309pub struct UnpackOptions {
310    /// Maximum total declared uncompressed size of all entries, in bytes.
311    /// Archives exceeding this are rejected as potential decompression bombs.
312    pub max_decompressed: u64,
313    /// Maximum ratio of total declared uncompressed size to compressed
314    /// archive size. Archives exceeding this are rejected.
315    pub max_ratio: u64,
316    /// Maximum number of entries in the archive. Guards against archives
317    /// that exhaust inodes or directory-entry limits via tiny-file spam.
318    pub max_entries: u32,
319}
320
321impl Default for UnpackOptions {
322    fn default() -> Self {
323        Self {
324            max_decompressed: 500 * 1024 * 1024,
325            max_ratio: 10,
326            max_entries: 10_000,
327        }
328    }
329}
330
331/// Load and parse a `package.toml` manifest from a package directory.
332///
333/// The type parameter `M` is the host's metadata schema. If the `[metadata]`
334/// section doesn't deserialize into `M`, this returns `PackageError::ParseError`.
335///
336/// # Example
337///
338/// ```ignore
339/// #[derive(Deserialize)]
340/// struct MySchema {
341///     category: String,
342///     min_host_version: String,
343/// }
344///
345/// let manifest = load_manifest::<MySchema>(Path::new("./my-package/"))?;
346/// println!("Package: {} v{}", manifest.package.name, manifest.package.version);
347/// println!("Category: {}", manifest.metadata.category);
348/// ```
349pub fn load_manifest<M: DeserializeOwned>(dir: &Path) -> Result<PackageManifest<M>, PackageError> {
350    let manifest_path = dir.join("package.toml");
351
352    if !manifest_path.exists() {
353        return Err(PackageError::ManifestNotFound {
354            path: dir.display().to_string(),
355        });
356    }
357
358    let content = std::fs::read_to_string(&manifest_path)?;
359    let manifest: PackageManifest<M> = toml::from_str(&content)?;
360    // Reject unknown runtime values + cross-section invariants. We do this
361    // here (not in serde) because the python-section presence depends on
362    // the runtime field, which serde can't express in a single derive.
363    manifest.package.runtime_strict()?;
364    manifest.validate_runtime()?;
365    Ok(manifest)
366}
367
368/// Load a manifest validating only the fixed header (accepting any metadata).
369///
370/// Uses `toml::Value` as the metadata type so any `[metadata]` section is accepted.
371/// Useful for CLI tools that validate structure without knowing the host's schema.
372pub fn load_manifest_untyped(dir: &Path) -> Result<PackageManifest<toml::Value>, PackageError> {
373    load_manifest::<toml::Value>(dir)
374}
375
376/// Compute a deterministic SHA-256 digest over all package source files.
377///
378/// Walks the package directory, collects all files (excluding `target/`,
379/// `.git/`, and `*.sig` files), sorts by relative path, and feeds each
380/// file's relative path and contents into a SHA-256 hasher.
381///
382/// The resulting 32-byte digest covers the entire package contents.
383/// Sign this digest to protect against tampering.
384pub fn package_digest(dir: &Path) -> Result<[u8; 32], PackageError> {
385    use sha2::{Digest, Sha256};
386
387    let mut files = Vec::new();
388    collect_files(dir, dir, &mut files)?;
389    files.sort();
390
391    let mut hasher = Sha256::new();
392    for rel_path in &files {
393        let abs_path = dir.join(rel_path);
394        let contents = std::fs::read(&abs_path)?;
395        // Hash the relative path (as UTF-8 bytes) then the file contents.
396        // Length-prefix both to prevent ambiguity.
397        let path_bytes = rel_path.as_bytes();
398        hasher.update((path_bytes.len() as u64).to_le_bytes());
399        hasher.update(path_bytes);
400        hasher.update((contents.len() as u64).to_le_bytes());
401        hasher.update(&contents);
402    }
403
404    Ok(hasher.finalize().into())
405}
406
407/// Recursively collect file paths relative to `root`, skipping excluded dirs/files.
408fn collect_files(root: &Path, dir: &Path, out: &mut Vec<String>) -> Result<(), PackageError> {
409    let entries = std::fs::read_dir(dir)?;
410    for entry in entries {
411        let entry = entry?;
412        let path = entry.path();
413        let name = entry.file_name();
414        let name_str = name.to_string_lossy();
415
416        // Skip excluded directories
417        if path.is_dir() {
418            if name_str == "target" || name_str == ".git" {
419                continue;
420            }
421            collect_files(root, &path, out)?;
422            continue;
423        }
424
425        // Skip signature files
426        if name_str.ends_with(".sig") {
427            continue;
428        }
429
430        // Store relative path using forward slashes for cross-platform determinism
431        let rel = path
432            .strip_prefix(root)
433            .expect("path is under root")
434            .to_string_lossy()
435            .replace('\\', "/");
436        out.push(rel);
437    }
438    Ok(())
439}
440
441/// Recursively collect file paths for archiving (includes `.sig` files).
442fn collect_archive_files(
443    root: &Path,
444    dir: &Path,
445    out: &mut Vec<String>,
446) -> Result<(), PackageError> {
447    let entries = std::fs::read_dir(dir)?;
448    for entry in entries {
449        let entry = entry?;
450        let path = entry.path();
451        let name = entry.file_name();
452        let name_str = name.to_string_lossy();
453
454        if path.is_dir() {
455            if name_str == "target" || name_str == ".git" {
456                continue;
457            }
458            collect_archive_files(root, &path, out)?;
459            continue;
460        }
461
462        let rel = path
463            .strip_prefix(root)
464            .expect("path is under root")
465            .to_string_lossy()
466            .replace('\\', "/");
467        out.push(rel);
468    }
469    Ok(())
470}
471
472/// Result of packing a package, including any warnings.
473#[derive(Debug)]
474pub struct PackResult {
475    /// Path to the created `.fid` archive.
476    pub path: PathBuf,
477    /// Whether the package was unsigned (no `package.sig` found).
478    pub unsigned: bool,
479}
480
481/// Vendor Python dependencies into `<dir>/vendor/` by invoking
482/// `python3 -m pip install -r <requirements> --target ./vendor/`.
483///
484/// - If `vendor/` already exists, leave it alone — the plugin author may have
485///   pre-vendored deliberately for reproducibility.
486/// - If the declared requirements file is missing AND `vendor/` is missing,
487///   emit a tracing warning and proceed (zero-dep python plugin).
488/// - If pip fails, surface its stderr as `PackageError::ArchiveError` so the
489///   user sees the resolver/build error directly.
490fn vendor_python_deps(dir: &Path, py: &PythonPackageMeta) -> Result<(), PackageError> {
491    let vendor_dir = dir.join("vendor");
492    if vendor_dir.exists() {
493        tracing::debug!(
494            vendor = %vendor_dir.display(),
495            "pre-existing vendor/ directory — using as-is, skipping pip"
496        );
497        return Ok(());
498    }
499
500    let req_path = dir.join(py.requirements_path());
501    if !req_path.exists() {
502        tracing::warn!(
503            package = %dir.display(),
504            requirements = %req_path.display(),
505            "python package has no requirements file and no vendor/ — packaging without deps"
506        );
507        return Ok(());
508    }
509
510    tracing::info!(
511        requirements = %req_path.display(),
512        vendor = %vendor_dir.display(),
513        "vendoring python deps via pip"
514    );
515
516    // `python3 -m pip` rather than bare `pip` so we use whichever interpreter
517    // happens to be on PATH and avoid relying on a separately-installed pip
518    // shim. `Command` invokes the binary directly, bypassing shell aliases.
519    let output = std::process::Command::new("python3")
520        .arg("-m")
521        .arg("pip")
522        .arg("install")
523        .arg("-r")
524        .arg(&req_path)
525        .arg("--target")
526        .arg(&vendor_dir)
527        .arg("--quiet")
528        .output()
529        .map_err(|e| {
530            PackageError::ArchiveError(format!(
531                "failed to invoke `python3 -m pip` (is python3 on PATH?): {e}"
532            ))
533        })?;
534
535    if !output.status.success() {
536        let stderr = String::from_utf8_lossy(&output.stderr);
537        return Err(PackageError::ArchiveError(format!(
538            "pip install failed (exit {}):\n{}",
539            output.status.code().unwrap_or(-1),
540            stderr.trim()
541        )));
542    }
543
544    Ok(())
545}
546
547/// Create a `.fid` archive (tar + bzip2) from a package directory.
548///
549/// The archive contains a single top-level directory `{name}-{version}/`
550/// with all source files. Excludes `target/` and `.git/` directories.
551/// Includes `package.sig` if present.
552///
553/// For Python packages (`runtime = "python"`), if a `requirements.txt` is
554/// declared and a `vendor/` directory does not yet exist, `pip install -r
555/// <requirements> --target ./vendor/` runs first and the result is included
556/// in the archive. Pre-existing `vendor/` is respected and used as-is.
557///
558/// If `output` is `None`, the archive is written to the current directory
559/// as `{name}-{version}.fid`.
560pub fn pack_package(dir: &Path, output: Option<&Path>) -> Result<PackResult, PackageError> {
561    use bzip2::write::BzEncoder;
562    use bzip2::Compression;
563
564    let manifest = load_manifest_untyped(dir)?;
565    let pkg = &manifest.package;
566    let prefix = format!("{}-{}", pkg.name, pkg.version);
567    let ext = pkg.extension();
568
569    // For Python packages: vendor declared deps into vendor/ before archiving.
570    // Pre-existing vendor/ is respected (plugin author may pre-vendor for
571    // reproducibility), missing requirements + missing vendor/ produces a
572    // tracing warning but is not fatal (a Python plugin with no deps is fine).
573    if matches!(pkg.runtime(), PackageRuntime::Python) {
574        if let Some(py_meta) = manifest.python.as_ref() {
575            vendor_python_deps(dir, py_meta)?;
576        }
577    }
578
579    let unsigned = !dir.join("package.sig").exists();
580
581    let out_path = match output {
582        Some(p) => p.to_path_buf(),
583        None => PathBuf::from(format!("{prefix}.{ext}")),
584    };
585
586    let file = std::fs::File::create(&out_path).map_err(|e| {
587        PackageError::ArchiveError(format!("failed to create {}: {e}", out_path.display()))
588    })?;
589
590    let encoder = BzEncoder::new(file, Compression::best());
591    let mut tar = tar::Builder::new(encoder);
592
593    let mut files = Vec::new();
594    collect_archive_files(dir, dir, &mut files)?;
595    files.sort();
596
597    for rel_path in &files {
598        let abs_path = dir.join(rel_path);
599        let archive_path = format!("{prefix}/{rel_path}");
600        tar.append_path_with_name(&abs_path, &archive_path)
601            .map_err(|e| PackageError::ArchiveError(format!("failed to add {rel_path}: {e}")))?;
602    }
603
604    tar.into_inner()
605        .map_err(|e| PackageError::ArchiveError(format!("failed to finish bz2 stream: {e}")))?
606        .finish()
607        .map_err(|e| PackageError::ArchiveError(format!("failed to finish bz2 stream: {e}")))?;
608
609    Ok(PackResult {
610        path: out_path,
611        unsigned,
612    })
613}
614
615/// Extract a `.fid` archive (tar + bzip2) to a destination directory using
616/// strict safety defaults.
617///
618/// Returns the path to the extracted top-level package directory, which is
619/// guaranteed to exist inside `dest` and contain a `package.toml`.
620///
621/// This function validates every archive entry before extracting and rejects
622/// archives containing: path-traversal components (`..`), absolute paths,
623/// symlinks, hardlinks, more than 10,000 entries, or a cumulative declared
624/// decompressed size exceeding 500 MB or 10× the compressed archive size.
625///
626/// Extraction is staged inside a temporary directory under `dest` and the
627/// package directory is moved into place atomically on success. If validation
628/// fails mid-archive, no files are left in `dest`.
629///
630/// For archives that legitimately exceed the default caps, use
631/// [`unpack_package_with_options`].
632pub fn unpack_package(archive: &Path, dest: &Path) -> Result<PathBuf, PackageError> {
633    unpack_package_with_options(archive, dest, &UnpackOptions::default())
634}
635
636/// Extract a `.fid` archive with caller-provided safety limits.
637///
638/// See [`unpack_package`] for the default-strict variant. Use this when the
639/// archive's size or entry count legitimately exceeds the defaults.
640pub fn unpack_package_with_options(
641    archive: &Path,
642    dest: &Path,
643    options: &UnpackOptions,
644) -> Result<PathBuf, PackageError> {
645    use bzip2::read::BzDecoder;
646    use std::path::Component;
647
648    let file = std::fs::File::open(archive).map_err(|e| {
649        PackageError::ArchiveError(format!("failed to open {}: {e}", archive.display()))
650    })?;
651    let compressed_size = file.metadata().map(|m| m.len()).unwrap_or(0);
652
653    let decoder = BzDecoder::new(file);
654    let mut tar = tar::Archive::new(decoder);
655
656    // Stage extraction inside `dest` so a failed or rejected archive leaves
657    // nothing behind. `dest` must already exist.
658    std::fs::create_dir_all(dest).map_err(PackageError::Io)?;
659    let staging = tempfile::TempDir::new_in(dest).map_err(PackageError::Io)?;
660    let staging_path = staging.path();
661
662    let ratio_cap = compressed_size.saturating_mul(options.max_ratio);
663    let mut total: u64 = 0;
664    let mut count: u32 = 0;
665
666    let entries = tar.entries().map_err(|e| {
667        PackageError::ArchiveError(format!("failed to read {}: {e}", archive.display()))
668    })?;
669
670    for entry in entries {
671        let mut entry = entry.map_err(|e| {
672            PackageError::ArchiveError(format!("failed to read archive entry: {e}"))
673        })?;
674
675        count = count.saturating_add(1);
676        if count > options.max_entries {
677            return Err(PackageError::TooManyEntries {
678                limit: options.max_entries,
679            });
680        }
681
682        let path = entry
683            .path()
684            .map_err(|e| PackageError::ArchiveError(format!("invalid entry path: {e}")))?
685            .into_owned();
686        let entry_display = path.display().to_string();
687
688        // 1. Reject link entries. A symlink or hardlink followed by a regular
689        // file at the same path can overwrite files outside `dest`.
690        let entry_type = entry.header().entry_type();
691        if entry_type.is_symlink() {
692            return Err(PackageError::SymlinkRejected {
693                entry: entry_display,
694            });
695        }
696        if entry_type.is_hard_link() {
697            return Err(PackageError::HardlinkRejected {
698                entry: entry_display,
699            });
700        }
701
702        // 2. Reject `..` components and absolute paths. The `tar` crate has
703        // best-effort guards but they are platform-dependent; check explicitly.
704        for component in path.components() {
705            match component {
706                Component::ParentDir => {
707                    return Err(PackageError::PathTraversal {
708                        entry: entry_display,
709                    });
710                }
711                Component::RootDir | Component::Prefix(_) => {
712                    return Err(PackageError::AbsolutePath {
713                        entry: entry_display,
714                    });
715                }
716                _ => {}
717            }
718        }
719
720        // 3. Enforce cumulative declared-size budget. Tar's own parsing
721        // enforces that actual entry bytes match the declared header size,
722        // so trusting the header here is safe against bomb archives.
723        let declared = entry.header().size().unwrap_or(0);
724        total = total.saturating_add(declared);
725        if total > options.max_decompressed {
726            return Err(PackageError::SizeLimitExceeded {
727                limit: options.max_decompressed,
728                actual: total,
729            });
730        }
731        if compressed_size > 0 && options.max_ratio > 0 && total > ratio_cap {
732            return Err(PackageError::SizeLimitExceeded {
733                limit: ratio_cap,
734                actual: total,
735            });
736        }
737
738        // 4. Extract into the staging area. `unpack_in` itself rejects paths
739        // that escape the base directory, but our explicit checks above mean
740        // we never get here with a dangerous path.
741        entry.unpack_in(staging_path).map_err(|e| {
742            PackageError::ArchiveError(format!("failed to extract entry '{}': {e}", path.display()))
743        })?;
744    }
745
746    // Find the top-level package directory inside staging.
747    let mut pkg_dir_staging: Option<PathBuf> = None;
748    for entry in std::fs::read_dir(staging_path).map_err(PackageError::Io)? {
749        let entry = entry.map_err(PackageError::Io)?;
750        let path = entry.path();
751        if path.is_dir() && path.join("package.toml").exists() {
752            pkg_dir_staging = Some(path);
753            break;
754        }
755    }
756    let pkg_dir_staging = pkg_dir_staging.ok_or_else(|| {
757        PackageError::InvalidArchive("archive does not contain a package.toml".to_string())
758    })?;
759
760    // Atomically move the validated package directory to its final location
761    // inside `dest`. If a directory with the same name already exists it is
762    // removed first, matching the prior `tar::Archive::unpack` behaviour.
763    let pkg_name = pkg_dir_staging
764        .file_name()
765        .ok_or_else(|| {
766            PackageError::InvalidArchive("extracted package has no directory name".to_string())
767        })?
768        .to_os_string();
769    let final_path = dest.join(&pkg_name);
770    if final_path.exists() {
771        std::fs::remove_dir_all(&final_path).map_err(PackageError::Io)?;
772    }
773    std::fs::rename(&pkg_dir_staging, &final_path).map_err(PackageError::Io)?;
774
775    // `staging` TempDir drops here; any residual files are cleaned up.
776    Ok(final_path)
777}
778
779#[cfg(test)]
780mod tests {
781    use super::*;
782    use tempfile::TempDir;
783
784    fn write_manifest(dir: &Path, content: &str) {
785        std::fs::write(dir.join("package.toml"), content).unwrap();
786    }
787
788    #[derive(Debug, Deserialize, PartialEq)]
789    struct TestMeta {
790        category: String,
791        #[serde(default)]
792        tags: Vec<String>,
793    }
794
795    #[test]
796    fn valid_manifest_parses() {
797        let tmp = TempDir::new().unwrap();
798        write_manifest(
799            tmp.path(),
800            r#"
801            [package]
802            name = "test-pkg"
803            version = "1.0.0"
804            interface = "my-api"
805            interface_version = 1
806
807            [metadata]
808            category = "testing"
809            tags = ["a", "b"]
810            "#,
811        );
812
813        let m = load_manifest::<TestMeta>(tmp.path()).unwrap();
814        assert_eq!(m.package.name, "test-pkg");
815        assert_eq!(m.package.version, "1.0.0");
816        assert_eq!(m.package.interface, "my-api");
817        assert_eq!(m.package.interface_version, 1);
818        assert_eq!(m.metadata.category, "testing");
819        assert_eq!(m.metadata.tags, vec!["a", "b"]);
820    }
821
822    #[test]
823    fn missing_required_metadata_field_fails() {
824        let tmp = TempDir::new().unwrap();
825        write_manifest(
826            tmp.path(),
827            r#"
828            [package]
829            name = "bad-pkg"
830            version = "1.0.0"
831            interface = "my-api"
832            interface_version = 1
833
834            [metadata]
835            # missing required "category" field
836            tags = ["x"]
837            "#,
838        );
839
840        let result = load_manifest::<TestMeta>(tmp.path());
841        assert!(result.is_err());
842        let err = result.unwrap_err().to_string();
843        assert!(
844            err.contains("category"),
845            "error should mention missing field: {err}"
846        );
847    }
848
849    #[test]
850    fn missing_manifest_returns_not_found() {
851        let tmp = TempDir::new().unwrap();
852        let result = load_manifest::<TestMeta>(tmp.path());
853        assert!(matches!(result, Err(PackageError::ManifestNotFound { .. })));
854    }
855
856    #[test]
857    fn extra_metadata_fields_ignored() {
858        let tmp = TempDir::new().unwrap();
859        write_manifest(
860            tmp.path(),
861            r#"
862            [package]
863            name = "extra-pkg"
864            version = "1.0.0"
865            interface = "my-api"
866            interface_version = 1
867
868            [metadata]
869            category = "testing"
870            unknown_field = "ignored"
871            "#,
872        );
873
874        // TestMeta doesn't have unknown_field — should still parse (serde ignores unknown by default)
875        let m = load_manifest::<TestMeta>(tmp.path());
876        assert!(m.is_ok());
877        assert_eq!(m.unwrap().metadata.category, "testing");
878    }
879
880    #[test]
881    fn untyped_manifest_accepts_any_metadata() {
882        let tmp = TempDir::new().unwrap();
883        write_manifest(
884            tmp.path(),
885            r#"
886            [package]
887            name = "any-pkg"
888            version = "1.0.0"
889            interface = "my-api"
890            interface_version = 1
891
892            [metadata]
893            foo = "bar"
894            count = 42
895            nested = { a = 1, b = 2 }
896            "#,
897        );
898
899        let m = load_manifest_untyped(tmp.path()).unwrap();
900        assert_eq!(m.package.name, "any-pkg");
901        assert!(m.metadata.is_table());
902    }
903
904    #[test]
905    fn digest_is_deterministic() {
906        let tmp = TempDir::new().unwrap();
907        write_manifest(tmp.path(), "[package]\nname = \"test\"\nversion = \"1.0.0\"\ninterface = \"api\"\ninterface_version = 1\n\n[metadata]\nk = \"v\"\n");
908        std::fs::write(tmp.path().join("src.rs"), b"fn main() {}").unwrap();
909
910        let d1 = package_digest(tmp.path()).unwrap();
911        let d2 = package_digest(tmp.path()).unwrap();
912        assert_eq!(d1, d2);
913    }
914
915    #[test]
916    fn digest_changes_on_file_modification() {
917        let tmp = TempDir::new().unwrap();
918        write_manifest(tmp.path(), "[package]\nname = \"test\"\nversion = \"1.0.0\"\ninterface = \"api\"\ninterface_version = 1\n\n[metadata]\nk = \"v\"\n");
919        std::fs::write(tmp.path().join("src.rs"), b"fn main() {}").unwrap();
920
921        let d1 = package_digest(tmp.path()).unwrap();
922
923        std::fs::write(tmp.path().join("src.rs"), b"fn main() { evil() }").unwrap();
924        let d2 = package_digest(tmp.path()).unwrap();
925
926        assert_ne!(d1, d2);
927    }
928
929    #[test]
930    fn digest_excludes_target_and_sig() {
931        let tmp = TempDir::new().unwrap();
932        write_manifest(tmp.path(), "[package]\nname = \"test\"\nversion = \"1.0.0\"\ninterface = \"api\"\ninterface_version = 1\n\n[metadata]\nk = \"v\"\n");
933        std::fs::write(tmp.path().join("src.rs"), b"fn main() {}").unwrap();
934
935        let d1 = package_digest(tmp.path()).unwrap();
936
937        // Adding target/ dir and .sig file should not change digest
938        std::fs::create_dir(tmp.path().join("target")).unwrap();
939        std::fs::write(tmp.path().join("target/output.dylib"), b"binary").unwrap();
940        std::fs::write(tmp.path().join("package.sig"), b"sig bytes").unwrap();
941
942        let d2 = package_digest(tmp.path()).unwrap();
943        assert_eq!(d1, d2);
944    }
945
946    fn make_package(dir: &Path) {
947        write_manifest(
948            dir,
949            r#"
950            [package]
951            name = "test-pkg"
952            version = "2.0.0"
953            interface = "my-api"
954            interface_version = 1
955
956            [metadata]
957            category = "testing"
958            "#,
959        );
960        std::fs::create_dir_all(dir.join("src")).unwrap();
961        std::fs::write(dir.join("src/lib.rs"), b"fn hello() {}").unwrap();
962    }
963
964    #[test]
965    fn pack_unpack_round_trip() {
966        let pkg_dir = TempDir::new().unwrap();
967        make_package(pkg_dir.path());
968
969        let out_dir = TempDir::new().unwrap();
970        let fid_path = out_dir.path().join("test-pkg-2.0.0.fid");
971
972        let result = pack_package(pkg_dir.path(), Some(&fid_path)).unwrap();
973        assert_eq!(result.path, fid_path);
974        assert!(fid_path.exists());
975        assert!(result.unsigned);
976
977        let extract_dir = TempDir::new().unwrap();
978        let extracted = unpack_package(&fid_path, extract_dir.path()).unwrap();
979
980        assert!(extracted.join("package.toml").exists());
981        assert!(extracted.join("src/lib.rs").exists());
982        assert_eq!(
983            extracted.file_name().unwrap().to_str().unwrap(),
984            "test-pkg-2.0.0"
985        );
986    }
987
988    #[test]
989    fn pack_includes_sig_file() {
990        let pkg_dir = TempDir::new().unwrap();
991        make_package(pkg_dir.path());
992        std::fs::write(pkg_dir.path().join("package.sig"), b"fake-sig").unwrap();
993
994        let out_dir = TempDir::new().unwrap();
995        let fid_path = out_dir.path().join("out.fid");
996
997        let result = pack_package(pkg_dir.path(), Some(&fid_path)).unwrap();
998        assert!(!result.unsigned);
999
1000        let extract_dir = TempDir::new().unwrap();
1001        let extracted = unpack_package(&fid_path, extract_dir.path()).unwrap();
1002        assert!(extracted.join("package.sig").exists());
1003    }
1004
1005    #[test]
1006    fn pack_excludes_target_and_git() {
1007        let pkg_dir = TempDir::new().unwrap();
1008        make_package(pkg_dir.path());
1009        std::fs::create_dir(pkg_dir.path().join("target")).unwrap();
1010        std::fs::write(pkg_dir.path().join("target/out.dylib"), b"bin").unwrap();
1011        std::fs::create_dir(pkg_dir.path().join(".git")).unwrap();
1012        std::fs::write(pkg_dir.path().join(".git/HEAD"), b"ref").unwrap();
1013
1014        let out_dir = TempDir::new().unwrap();
1015        let fid_path = out_dir.path().join("out.fid");
1016        pack_package(pkg_dir.path(), Some(&fid_path)).unwrap();
1017
1018        let extract_dir = TempDir::new().unwrap();
1019        let extracted = unpack_package(&fid_path, extract_dir.path()).unwrap();
1020        assert!(!extracted.join("target").exists());
1021        assert!(!extracted.join(".git").exists());
1022    }
1023
1024    #[test]
1025    fn unpack_invalid_archive_no_manifest() {
1026        let pkg_dir = TempDir::new().unwrap();
1027        // Create a valid bz2 tar but with no package.toml
1028        std::fs::create_dir_all(pkg_dir.path().join("src")).unwrap();
1029        std::fs::write(pkg_dir.path().join("src/lib.rs"), b"fn x() {}").unwrap();
1030
1031        let out_dir = TempDir::new().unwrap();
1032        let fid_path = out_dir.path().join("bad.fid");
1033
1034        // Manually create a tar.bz2 without package.toml
1035        {
1036            use bzip2::write::BzEncoder;
1037            use bzip2::Compression;
1038
1039            let file = std::fs::File::create(&fid_path).unwrap();
1040            let encoder = BzEncoder::new(file, Compression::default());
1041            let mut tar = tar::Builder::new(encoder);
1042            tar.append_path_with_name(
1043                pkg_dir.path().join("src/lib.rs"),
1044                "no-manifest-1.0.0/src/lib.rs",
1045            )
1046            .unwrap();
1047            tar.into_inner().unwrap().finish().unwrap();
1048        }
1049
1050        let extract_dir = TempDir::new().unwrap();
1051        let result = unpack_package(&fid_path, extract_dir.path());
1052        assert!(result.is_err());
1053        let err = result.unwrap_err().to_string();
1054        assert!(err.contains("package.toml"), "error was: {err}");
1055    }
1056
1057    #[test]
1058    fn pack_default_output_name() {
1059        let pkg_dir = TempDir::new().unwrap();
1060        make_package(pkg_dir.path());
1061
1062        let out_dir = TempDir::new().unwrap();
1063        let out_path = out_dir.path().join("test-pkg-2.0.0.fid");
1064
1065        let result = pack_package(pkg_dir.path(), Some(&out_path)).unwrap();
1066        assert_eq!(result.path, out_path);
1067        assert!(out_path.exists());
1068    }
1069
1070    #[test]
1071    fn pack_custom_extension() {
1072        let pkg_dir = TempDir::new().unwrap();
1073        write_manifest(
1074            pkg_dir.path(),
1075            r#"
1076            [package]
1077            name = "my-plugin"
1078            version = "0.3.0"
1079            interface = "my-api"
1080            interface_version = 1
1081            extension = "cloacina"
1082
1083            [metadata]
1084            category = "testing"
1085            "#,
1086        );
1087        std::fs::create_dir_all(pkg_dir.path().join("src")).unwrap();
1088        std::fs::write(pkg_dir.path().join("src/lib.rs"), b"fn hello() {}").unwrap();
1089
1090        let out_dir = TempDir::new().unwrap();
1091        let out_path = out_dir.path().join("my-plugin-0.3.0.cloacina");
1092
1093        let result = pack_package(pkg_dir.path(), Some(&out_path)).unwrap();
1094        assert_eq!(result.path, out_path);
1095        assert!(out_path.exists());
1096
1097        // Verify it unpacks correctly
1098        let extract_dir = TempDir::new().unwrap();
1099        let extracted = unpack_package(&out_path, extract_dir.path()).unwrap();
1100        assert!(extracted.join("package.toml").exists());
1101    }
1102
1103    #[test]
1104    fn extension_defaults_to_fid() {
1105        let header = PackageHeader {
1106            name: "test".to_string(),
1107            version: "1.0.0".to_string(),
1108            interface: "api".to_string(),
1109            interface_version: 1,
1110            extension: None,
1111            runtime: None,
1112        };
1113        assert_eq!(header.extension(), "fid");
1114
1115        let header_custom = PackageHeader {
1116            extension: Some("cloacina".to_string()),
1117            ..header
1118        };
1119        assert_eq!(header_custom.extension(), "cloacina");
1120    }
1121
1122    // ---- Python runtime manifest tests ----
1123
1124    #[test]
1125    fn rust_runtime_default_when_absent() {
1126        let tmp = TempDir::new().unwrap();
1127        write_manifest(
1128            tmp.path(),
1129            r#"
1130            [package]
1131            name = "rust-pkg"
1132            version = "1.0.0"
1133            interface = "api"
1134            interface_version = 1
1135
1136            [metadata]
1137            category = "rust"
1138            "#,
1139        );
1140        let m = load_manifest::<TestMeta>(tmp.path()).unwrap();
1141        assert_eq!(m.package.runtime(), PackageRuntime::Rust);
1142        assert!(m.python.is_none());
1143    }
1144
1145    #[test]
1146    fn python_runtime_with_python_section_parses() {
1147        let tmp = TempDir::new().unwrap();
1148        write_manifest(
1149            tmp.path(),
1150            r#"
1151            [package]
1152            name = "py-pkg"
1153            version = "0.1.0"
1154            interface = "api"
1155            interface_version = 1
1156            runtime = "python"
1157
1158            [metadata]
1159            category = "python"
1160
1161            [python]
1162            entry_module = "py_pkg.entry"
1163            requirements = "deps.txt"
1164            "#,
1165        );
1166        let m = load_manifest::<TestMeta>(tmp.path()).unwrap();
1167        assert_eq!(m.package.runtime(), PackageRuntime::Python);
1168        let py = m.python.as_ref().expect("python section");
1169        assert_eq!(py.entry_module, "py_pkg.entry");
1170        assert_eq!(py.requirements_path(), "deps.txt");
1171    }
1172
1173    #[test]
1174    fn python_runtime_requirements_default() {
1175        let tmp = TempDir::new().unwrap();
1176        write_manifest(
1177            tmp.path(),
1178            r#"
1179            [package]
1180            name = "py-pkg"
1181            version = "0.1.0"
1182            interface = "api"
1183            interface_version = 1
1184            runtime = "python"
1185
1186            [metadata]
1187            category = "python"
1188
1189            [python]
1190            entry_module = "py_pkg.entry"
1191            "#,
1192        );
1193        let m = load_manifest::<TestMeta>(tmp.path()).unwrap();
1194        assert_eq!(
1195            m.python.as_ref().unwrap().requirements_path(),
1196            "requirements.txt"
1197        );
1198    }
1199
1200    #[test]
1201    fn python_runtime_without_python_section_rejected() {
1202        let tmp = TempDir::new().unwrap();
1203        write_manifest(
1204            tmp.path(),
1205            r#"
1206            [package]
1207            name = "py-pkg"
1208            version = "0.1.0"
1209            interface = "api"
1210            interface_version = 1
1211            runtime = "python"
1212
1213            [metadata]
1214            category = "python"
1215            "#,
1216        );
1217        let err = load_manifest::<TestMeta>(tmp.path()).unwrap_err();
1218        match err {
1219            PackageError::InvalidManifest(msg) => {
1220                assert!(
1221                    msg.contains("entry_module"),
1222                    "expected message about entry_module, got: {msg}"
1223                );
1224            }
1225            other => panic!("expected InvalidManifest, got {other:?}"),
1226        }
1227    }
1228
1229    #[test]
1230    fn python_section_without_python_runtime_rejected() {
1231        let tmp = TempDir::new().unwrap();
1232        write_manifest(
1233            tmp.path(),
1234            r#"
1235            [package]
1236            name = "rust-pkg"
1237            version = "1.0.0"
1238            interface = "api"
1239            interface_version = 1
1240
1241            [metadata]
1242            category = "rust"
1243
1244            [python]
1245            entry_module = "py_pkg.entry"
1246            "#,
1247        );
1248        let err = load_manifest::<TestMeta>(tmp.path()).unwrap_err();
1249        assert!(matches!(err, PackageError::InvalidManifest(_)));
1250    }
1251
1252    #[test]
1253    fn unknown_runtime_rejected() {
1254        let tmp = TempDir::new().unwrap();
1255        write_manifest(
1256            tmp.path(),
1257            r#"
1258            [package]
1259            name = "node-pkg"
1260            version = "0.1.0"
1261            interface = "api"
1262            interface_version = 1
1263            runtime = "node"
1264
1265            [metadata]
1266            category = "node"
1267            "#,
1268        );
1269        let err = load_manifest::<TestMeta>(tmp.path()).unwrap_err();
1270        match err {
1271            PackageError::InvalidManifest(msg) => {
1272                assert!(msg.contains("node"), "got: {msg}");
1273            }
1274            other => panic!("expected InvalidManifest, got {other:?}"),
1275        }
1276    }
1277
1278    #[test]
1279    fn package_runtime_display_and_str() {
1280        assert_eq!(PackageRuntime::Rust.as_str(), "rust");
1281        assert_eq!(PackageRuntime::Python.as_str(), "python");
1282        assert_eq!(format!("{}", PackageRuntime::Python), "python");
1283    }
1284
1285    // ---- Attack-class tests for unpack_package ----
1286
1287    use bzip2::write::BzEncoder;
1288    use bzip2::Compression;
1289    use std::io::Read;
1290    use tar::{EntryType, Header};
1291
1292    /// Build a bz2-compressed tar archive from a builder callback.
1293    fn build_archive<F>(path: &Path, build: F)
1294    where
1295        F: FnOnce(&mut tar::Builder<BzEncoder<std::fs::File>>),
1296    {
1297        let file = std::fs::File::create(path).unwrap();
1298        let encoder = BzEncoder::new(file, Compression::default());
1299        let mut tar = tar::Builder::new(encoder);
1300        build(&mut tar);
1301        tar.into_inner().unwrap().finish().unwrap();
1302    }
1303
1304    /// Write a raw entry name directly into a GNU tar header, bypassing
1305    /// `set_path`'s safety validation. This is only safe in tests where we
1306    /// deliberately craft malicious paths.
1307    fn write_name(header: &mut Header, path: &str) {
1308        let gnu = header.as_gnu_mut().expect("gnu header");
1309        let bytes = path.as_bytes();
1310        assert!(bytes.len() < gnu.name.len(), "test path too long");
1311        for slot in gnu.name.iter_mut() {
1312            *slot = 0;
1313        }
1314        gnu.name[..bytes.len()].copy_from_slice(bytes);
1315    }
1316
1317    fn write_linkname(header: &mut Header, link: &str) {
1318        let gnu = header.as_gnu_mut().expect("gnu header");
1319        let bytes = link.as_bytes();
1320        assert!(bytes.len() < gnu.linkname.len(), "test linkname too long");
1321        for slot in gnu.linkname.iter_mut() {
1322            *slot = 0;
1323        }
1324        gnu.linkname[..bytes.len()].copy_from_slice(bytes);
1325    }
1326
1327    /// Append a regular file entry with explicit path and content bytes.
1328    /// Uses the low-level name-writing helper so arbitrary (including
1329    /// malicious) paths can be tested.
1330    fn append_regular(tar: &mut tar::Builder<BzEncoder<std::fs::File>>, path: &str, data: &[u8]) {
1331        let mut header = Header::new_gnu();
1332        write_name(&mut header, path);
1333        header.set_size(data.len() as u64);
1334        header.set_mode(0o644);
1335        header.set_entry_type(EntryType::Regular);
1336        header.set_cksum();
1337        tar.append(&header, data).unwrap();
1338    }
1339
1340    /// Append a link entry with a chosen EntryType (symlink/hardlink).
1341    fn append_link(
1342        tar: &mut tar::Builder<BzEncoder<std::fs::File>>,
1343        path: &str,
1344        link_target: &str,
1345        ty: EntryType,
1346    ) {
1347        let mut header = Header::new_gnu();
1348        write_name(&mut header, path);
1349        write_linkname(&mut header, link_target);
1350        header.set_size(0);
1351        header.set_mode(0o644);
1352        header.set_entry_type(ty);
1353        header.set_cksum();
1354        tar.append(&header, std::io::empty()).unwrap();
1355    }
1356
1357    #[test]
1358    fn unpack_rejects_parent_dir_component() {
1359        let out = TempDir::new().unwrap();
1360        let archive = out.path().join("evil.fid");
1361        build_archive(&archive, |tar| {
1362            append_regular(tar, "../escaped", b"pwn");
1363        });
1364
1365        let extract = TempDir::new().unwrap();
1366        let err = unpack_package(&archive, extract.path()).unwrap_err();
1367        assert!(
1368            matches!(err, PackageError::PathTraversal { .. }),
1369            "expected PathTraversal, got: {err:?}"
1370        );
1371        // Nothing leaked outside staging.
1372        assert!(!out.path().join("escaped").exists());
1373    }
1374
1375    #[test]
1376    fn unpack_rejects_absolute_path() {
1377        let out = TempDir::new().unwrap();
1378        let archive = out.path().join("evil.fid");
1379        build_archive(&archive, |tar| {
1380            append_regular(tar, "/tmp/fidius-escape", b"pwn");
1381        });
1382
1383        let extract = TempDir::new().unwrap();
1384        let err = unpack_package(&archive, extract.path()).unwrap_err();
1385        assert!(
1386            matches!(err, PackageError::AbsolutePath { .. }),
1387            "expected AbsolutePath, got: {err:?}"
1388        );
1389    }
1390
1391    #[test]
1392    fn unpack_rejects_symlink() {
1393        let out = TempDir::new().unwrap();
1394        let archive = out.path().join("evil.fid");
1395        build_archive(&archive, |tar| {
1396            append_link(tar, "link", "/etc/passwd", EntryType::Symlink);
1397        });
1398
1399        let extract = TempDir::new().unwrap();
1400        let err = unpack_package(&archive, extract.path()).unwrap_err();
1401        assert!(
1402            matches!(err, PackageError::SymlinkRejected { .. }),
1403            "expected SymlinkRejected, got: {err:?}"
1404        );
1405    }
1406
1407    #[test]
1408    fn unpack_rejects_hardlink() {
1409        let out = TempDir::new().unwrap();
1410        let archive = out.path().join("evil.fid");
1411        build_archive(&archive, |tar| {
1412            append_link(tar, "link", "existing-file", EntryType::Link);
1413        });
1414
1415        let extract = TempDir::new().unwrap();
1416        let err = unpack_package(&archive, extract.path()).unwrap_err();
1417        assert!(
1418            matches!(err, PackageError::HardlinkRejected { .. }),
1419            "expected HardlinkRejected, got: {err:?}"
1420        );
1421    }
1422
1423    #[test]
1424    fn unpack_symlink_then_file_rejected_at_first_entry() {
1425        // Classic symlink-overwrite attack: entry 1 is a symlink to /tmp/foo,
1426        // entry 2 is a regular file at the same path. Our checks reject entry 1
1427        // so entry 2 is never extracted.
1428        let out = TempDir::new().unwrap();
1429        let sentinel_dir = TempDir::new().unwrap();
1430        let sentinel = sentinel_dir.path().join("target");
1431        std::fs::write(&sentinel, b"original").unwrap();
1432
1433        let archive = out.path().join("evil.fid");
1434        build_archive(&archive, |tar| {
1435            append_link(tar, "bad", sentinel.to_str().unwrap(), EntryType::Symlink);
1436            append_regular(tar, "bad", b"clobber");
1437        });
1438
1439        let extract = TempDir::new().unwrap();
1440        let err = unpack_package(&archive, extract.path()).unwrap_err();
1441        assert!(matches!(err, PackageError::SymlinkRejected { .. }));
1442
1443        // The sentinel file outside the extraction directory is untouched.
1444        assert_eq!(std::fs::read(&sentinel).unwrap(), b"original");
1445    }
1446
1447    #[test]
1448    fn unpack_rejects_declared_size_bomb() {
1449        let out = TempDir::new().unwrap();
1450        let archive = out.path().join("bomb.fid");
1451
1452        // Build a tar manually where a header declares a size far above the cap,
1453        // then write matching zero bytes so tar parsing stays consistent.
1454        let file = std::fs::File::create(&archive).unwrap();
1455        let encoder = BzEncoder::new(file, Compression::best());
1456        let mut tar = tar::Builder::new(encoder);
1457
1458        let declared: u64 = 600 * 1024 * 1024; // > 500 MB default cap
1459        let mut header = Header::new_gnu();
1460        header.set_path("bomb.bin").unwrap();
1461        header.set_size(declared);
1462        header.set_mode(0o644);
1463        header.set_entry_type(EntryType::Regular);
1464        header.set_cksum();
1465
1466        // Use a zero-filled reader so the compressed size stays tiny.
1467        let zeros = std::io::repeat(0u8).take(declared);
1468        tar.append(&header, zeros).unwrap();
1469        tar.into_inner().unwrap().finish().unwrap();
1470
1471        let extract = TempDir::new().unwrap();
1472        let err = unpack_package(&archive, extract.path()).unwrap_err();
1473        assert!(
1474            matches!(err, PackageError::SizeLimitExceeded { .. }),
1475            "expected SizeLimitExceeded, got: {err:?}"
1476        );
1477    }
1478
1479    #[test]
1480    fn unpack_rejects_ratio_bomb() {
1481        // Small compressed archive with many small entries whose cumulative
1482        // declared size exceeds `compressed_size * max_ratio` but is still
1483        // under the absolute cap — should be rejected by the ratio check.
1484        let out = TempDir::new().unwrap();
1485        let archive = out.path().join("ratio.fid");
1486
1487        // Default max_ratio is 10. Use a 4 KB-per-entry file that compresses well.
1488        let payload = vec![b'A'; 4096];
1489        build_archive(&archive, |tar| {
1490            for i in 0..10_000u32 {
1491                append_regular(tar, &format!("file-{i:05}.txt"), &payload);
1492            }
1493        });
1494
1495        let extract = TempDir::new().unwrap();
1496        // Tighten both caps so this triggers on ratio rather than absolute cap.
1497        let options = UnpackOptions {
1498            max_decompressed: u64::MAX,
1499            max_ratio: 2,
1500            max_entries: 20_000,
1501        };
1502        let err = unpack_package_with_options(&archive, extract.path(), &options).unwrap_err();
1503        assert!(
1504            matches!(err, PackageError::SizeLimitExceeded { .. }),
1505            "expected SizeLimitExceeded, got: {err:?}"
1506        );
1507    }
1508
1509    #[test]
1510    fn unpack_rejects_too_many_entries() {
1511        let out = TempDir::new().unwrap();
1512        let archive = out.path().join("spam.fid");
1513        build_archive(&archive, |tar| {
1514            for i in 0..50u32 {
1515                append_regular(tar, &format!("f-{i}"), b"");
1516            }
1517        });
1518
1519        let extract = TempDir::new().unwrap();
1520        let options = UnpackOptions {
1521            max_entries: 10,
1522            ..UnpackOptions::default()
1523        };
1524        let err = unpack_package_with_options(&archive, extract.path(), &options).unwrap_err();
1525        assert!(
1526            matches!(err, PackageError::TooManyEntries { limit: 10 }),
1527            "expected TooManyEntries, got: {err:?}"
1528        );
1529    }
1530
1531    #[test]
1532    fn unpack_staging_cleans_up_on_rejection() {
1533        let out = TempDir::new().unwrap();
1534        let archive = out.path().join("evil.fid");
1535        build_archive(&archive, |tar| {
1536            append_regular(tar, "ok/file.txt", b"ok");
1537            append_regular(tar, "../escape", b"bad");
1538        });
1539
1540        let extract = TempDir::new().unwrap();
1541        let _ = unpack_package(&archive, extract.path()).unwrap_err();
1542
1543        // After rejection `extract` must be empty — the partial `ok/` tree
1544        // lived in a TempDir that has since been dropped.
1545        let remaining: Vec<_> = std::fs::read_dir(extract.path())
1546            .unwrap()
1547            .collect::<Result<_, _>>()
1548            .unwrap();
1549        assert!(
1550            remaining.is_empty(),
1551            "extraction dir not cleaned up: {remaining:?}"
1552        );
1553    }
1554
1555    #[test]
1556    fn unpack_with_options_accepts_large_archive() {
1557        // Round-trip a legitimate package under a looser cap to exercise the
1558        // options path end-to-end.
1559        let pkg_dir = TempDir::new().unwrap();
1560        make_package(pkg_dir.path());
1561
1562        let out_dir = TempDir::new().unwrap();
1563        let fid_path = out_dir.path().join("ok.fid");
1564        pack_package(pkg_dir.path(), Some(&fid_path)).unwrap();
1565
1566        let extract = TempDir::new().unwrap();
1567        let options = UnpackOptions {
1568            max_decompressed: u64::MAX,
1569            max_ratio: u64::MAX,
1570            max_entries: u32::MAX,
1571        };
1572        let extracted = unpack_package_with_options(&fid_path, extract.path(), &options).unwrap();
1573        assert!(extracted.join("package.toml").exists());
1574    }
1575
1576    // ---- Python pack-time vendoring tests ----
1577
1578    /// Build a minimal Python package directory (manifest + entry .py).
1579    fn make_python_package(dir: &Path, with_requirements: Option<&str>) {
1580        let req_line = if with_requirements.is_some() {
1581            "requirements = \"requirements.txt\"\n"
1582        } else {
1583            ""
1584        };
1585        write_manifest(
1586            dir,
1587            &format!(
1588                r#"
1589                [package]
1590                name = "py-pack-test"
1591                version = "0.1.0"
1592                interface = "api"
1593                interface_version = 1
1594                runtime = "python"
1595
1596                [metadata]
1597                category = "python"
1598
1599                [python]
1600                entry_module = "py_pack_test"
1601                {req_line}
1602                "#
1603            ),
1604        );
1605        std::fs::write(
1606            dir.join("py_pack_test.py"),
1607            b"def hello():\n    return 'hi'\n",
1608        )
1609        .unwrap();
1610        if let Some(req) = with_requirements {
1611            std::fs::write(dir.join("requirements.txt"), req.as_bytes()).unwrap();
1612        }
1613    }
1614
1615    #[test]
1616    fn pack_python_with_prevendored_directory_skips_pip() {
1617        // If vendor/ is present we don't invoke pip — even with a requirements
1618        // file pointing at something pip couldn't possibly resolve. Simulating
1619        // pre-vendoring by hand.
1620        let pkg_dir = TempDir::new().unwrap();
1621        make_python_package(
1622            pkg_dir.path(),
1623            Some("definitely-not-a-real-package==999.999.999"),
1624        );
1625        let vendor = pkg_dir.path().join("vendor");
1626        std::fs::create_dir(&vendor).unwrap();
1627        std::fs::write(
1628            vendor.join("fake_module.py"),
1629            b"# pre-vendored placeholder\n",
1630        )
1631        .unwrap();
1632
1633        let out_dir = TempDir::new().unwrap();
1634        let fid = out_dir.path().join("py.fid");
1635        pack_package(pkg_dir.path(), Some(&fid))
1636            .expect("pack should not invoke pip when vendor/ exists");
1637
1638        let extract = TempDir::new().unwrap();
1639        let extracted = unpack_package(&fid, extract.path()).unwrap();
1640        assert!(extracted.join("vendor/fake_module.py").exists());
1641        assert!(extracted.join("py_pack_test.py").exists());
1642    }
1643
1644    #[test]
1645    fn pack_python_with_no_requirements_or_vendor_warns_but_succeeds() {
1646        let pkg_dir = TempDir::new().unwrap();
1647        make_python_package(pkg_dir.path(), None);
1648
1649        let out_dir = TempDir::new().unwrap();
1650        let fid = out_dir.path().join("py.fid");
1651        pack_package(pkg_dir.path(), Some(&fid))
1652            .expect("zero-dep python plugin should pack successfully");
1653
1654        let extract = TempDir::new().unwrap();
1655        let extracted = unpack_package(&fid, extract.path()).unwrap();
1656        assert!(extracted.join("py_pack_test.py").exists());
1657        assert!(!extracted.join("vendor").exists());
1658    }
1659
1660    #[test]
1661    fn pack_python_with_unresolvable_requirement_surfaces_pip_error() {
1662        // Pip is genuinely invoked here — needs python3+pip on PATH. The test
1663        // is testing the failure-surfacing path: we deliberately ask pip to
1664        // install a package that doesn't exist and assert the error is clear.
1665        // Skipped if python3/pip aren't reachable so CI environments without
1666        // them don't fail.
1667        let probe = std::process::Command::new("python3")
1668            .arg("-m")
1669            .arg("pip")
1670            .arg("--version")
1671            .output();
1672        if probe.map(|o| !o.status.success()).unwrap_or(true) {
1673            eprintln!("skipping: python3 -m pip not available in this environment");
1674            return;
1675        }
1676
1677        let pkg_dir = TempDir::new().unwrap();
1678        make_python_package(
1679            pkg_dir.path(),
1680            Some("fidius-this-package-does-not-exist-9999==1.0\n"),
1681        );
1682
1683        let out_dir = TempDir::new().unwrap();
1684        let fid = out_dir.path().join("py.fid");
1685        let err = pack_package(pkg_dir.path(), Some(&fid)).unwrap_err();
1686        match err {
1687            PackageError::ArchiveError(msg) => {
1688                assert!(
1689                    msg.contains("pip install failed"),
1690                    "expected pip-install error, got: {msg}"
1691                );
1692            }
1693            other => panic!("expected ArchiveError, got {other:?}"),
1694        }
1695    }
1696}