Skip to main content

fidius_core/
package.rs

1// Copyright 2026 Colliery, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Source package manifest types and parsing.
16//!
17//! A package is a directory containing plugin source code and a `package.toml`
18//! manifest. The manifest has a fixed header (name, version, interface) and
19//! an extensible `[metadata]` section validated via serde against a
20//! host-defined schema type.
21
22use serde::de::DeserializeOwned;
23use serde::{Deserialize, Serialize};
24use std::path::{Path, PathBuf};
25
26/// A parsed package manifest, generic over the host-defined metadata schema.
27///
28/// The `M` type parameter is the host's metadata schema. If the `[metadata]`
29/// section of `package.toml` doesn't deserialize into `M`, parsing fails —
30/// this is how schema validation works.
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct PackageManifest<M> {
33    /// Fixed header fields required by fidius.
34    pub package: PackageHeader,
35    /// Host-defined metadata. Must deserialize from the `[metadata]` section.
36    pub metadata: M,
37    /// Python-runtime fields. Required when `package.runtime == "python"`,
38    /// rejected otherwise. Validated by [`PackageManifest::validate_runtime`]
39    /// after deserialization, since serde alone can't enforce cross-section
40    /// invariants.
41    #[serde(default, skip_serializing_if = "Option::is_none")]
42    pub python: Option<PythonPackageMeta>,
43}
44
45impl<M> PackageManifest<M> {
46    /// Cross-section validation: runtime + python section must agree.
47    ///
48    /// - `runtime = "rust"` (or absent → "rust") with a `[python]` section is rejected.
49    /// - `runtime = "python"` without a `[python]` section is rejected.
50    /// - Unknown runtime values are rejected (forward compat: a future
51    ///   `runtime = "node"` package shouldn't silently fall back to rust).
52    pub fn validate_runtime(&self) -> Result<(), PackageError> {
53        let runtime = self.package.runtime();
54        match runtime {
55            PackageRuntime::Rust => {
56                if self.python.is_some() {
57                    return Err(PackageError::InvalidManifest(
58                        "[python] section is only valid when runtime = \"python\"".into(),
59                    ));
60                }
61                Ok(())
62            }
63            PackageRuntime::Python => {
64                if self.python.is_none() {
65                    return Err(PackageError::InvalidManifest(
66                        "runtime = \"python\" requires a [python] section with `entry_module`"
67                            .into(),
68                    ));
69                }
70                Ok(())
71            }
72        }
73    }
74}
75
76/// Fixed header fields that every package manifest must have.
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct PackageHeader {
79    /// Package name (e.g., `"blur-filter"`).
80    pub name: String,
81    /// Package version (e.g., `"1.2.0"`).
82    pub version: String,
83    /// Name of the interface crate this package implements.
84    pub interface: String,
85    /// Expected interface version.
86    pub interface_version: u32,
87    /// Custom file extension for `.fid` archives (e.g., `"cloacina"`).
88    /// Defaults to `"fid"` when absent.
89    #[serde(default, skip_serializing_if = "Option::is_none")]
90    pub extension: Option<String>,
91    /// Plugin runtime. `"rust"` (default) → cdylib; `"python"` → Python package
92    /// loaded by `fidius-python`. Unknown values are rejected at validation
93    /// time (see [`PackageManifest::validate_runtime`]).
94    #[serde(default, skip_serializing_if = "Option::is_none")]
95    pub runtime: Option<String>,
96}
97
98impl PackageHeader {
99    /// Returns the package extension, defaulting to `"fid"`.
100    pub fn extension(&self) -> &str {
101        self.extension.as_deref().unwrap_or("fid")
102    }
103
104    /// Returns the runtime kind, defaulting to `Rust` when absent. Returns
105    /// `PackageRuntime::Rust` for unknown values; callers that need to reject
106    /// unknown runtimes should use [`Self::runtime_strict`].
107    pub fn runtime(&self) -> PackageRuntime {
108        match self.runtime.as_deref() {
109            None | Some("rust") => PackageRuntime::Rust,
110            Some("python") => PackageRuntime::Python,
111            // Unknown values fall back to Rust for `runtime()`, but the
112            // strict validator rejects them. Keep the lenient form so display
113            // code never panics on an unfamiliar manifest.
114            _ => PackageRuntime::Rust,
115        }
116    }
117
118    /// Returns the runtime kind, erroring on unknown values.
119    pub fn runtime_strict(&self) -> Result<PackageRuntime, PackageError> {
120        match self.runtime.as_deref() {
121            None | Some("rust") => Ok(PackageRuntime::Rust),
122            Some("python") => Ok(PackageRuntime::Python),
123            Some(other) => Err(PackageError::InvalidManifest(format!(
124                "unknown runtime '{other}': allowed values are \"rust\", \"python\""
125            ))),
126        }
127    }
128}
129
130/// Plugin runtime kind. Determines which loader the host's `PluginHost`
131/// dispatches to.
132#[derive(Debug, Clone, Copy, PartialEq, Eq)]
133pub enum PackageRuntime {
134    /// Default. Plugin is a cdylib + `PluginRegistry`. Loaded by the existing
135    /// dylib loader in `fidius-host`.
136    Rust,
137    /// Plugin is a directory of `.py` files (+ optional `vendor/`) loaded by
138    /// `fidius-python` via an embedded interpreter. Requires the host crate
139    /// to enable the `python` feature.
140    Python,
141}
142
143impl PackageRuntime {
144    /// Returns the canonical string form used in `package.toml`.
145    pub fn as_str(&self) -> &'static str {
146        match self {
147            PackageRuntime::Rust => "rust",
148            PackageRuntime::Python => "python",
149        }
150    }
151}
152
153impl std::fmt::Display for PackageRuntime {
154    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
155        f.write_str(self.as_str())
156    }
157}
158
159/// Fields under the `[python]` section of `package.toml`. Required when
160/// `package.runtime == "python"`, rejected otherwise.
161#[derive(Debug, Clone, Serialize, Deserialize)]
162pub struct PythonPackageMeta {
163    /// Python module the loader imports first. Dotted-path form (e.g.
164    /// `"my_plugin.entry"`) corresponding to a file inside the package
165    /// directory or its `vendor/` tree.
166    pub entry_module: String,
167    /// Path to the requirements file consumed by `fidius pack` to vendor
168    /// dependencies into `vendor/`. Defaults to `"requirements.txt"`.
169    #[serde(default, skip_serializing_if = "Option::is_none")]
170    pub requirements: Option<String>,
171}
172
173impl PythonPackageMeta {
174    /// Returns the requirements file path, defaulting to `"requirements.txt"`.
175    pub fn requirements_path(&self) -> &str {
176        self.requirements.as_deref().unwrap_or("requirements.txt")
177    }
178}
179
180/// Errors that can occur when loading a package manifest.
181#[derive(Debug, thiserror::Error)]
182pub enum PackageError {
183    /// The `package.toml` file was not found in the given directory.
184    #[error("package.toml not found in {path}")]
185    ManifestNotFound { path: String },
186
187    /// The manifest file could not be parsed as valid TOML or failed
188    /// schema validation (the `[metadata]` section didn't match `M`).
189    #[error("failed to parse package.toml: {0}")]
190    ParseError(#[from] toml::de::Error),
191
192    /// An I/O error occurred reading the manifest file.
193    #[error("io error reading package.toml: {0}")]
194    Io(#[from] std::io::Error),
195
196    /// Build failed.
197    #[error("package build failed: {0}")]
198    BuildFailed(String),
199
200    /// Package signature file not found.
201    #[error("package.sig not found in {path}")]
202    SignatureNotFound { path: String },
203
204    /// Package signature is invalid (no trusted key verified it).
205    #[error("package signature invalid for {path}")]
206    SignatureInvalid { path: String },
207
208    /// An error occurred creating or reading an archive.
209    #[error("archive error: {0}")]
210    ArchiveError(String),
211
212    /// The archive does not contain a valid package.
213    #[error("invalid archive: {0}")]
214    InvalidArchive(String),
215
216    /// Manifest passed serde parsing but failed cross-section validation
217    /// (e.g. `runtime = "python"` without a `[python]` section, or unknown
218    /// runtime value).
219    #[error("invalid manifest: {0}")]
220    InvalidManifest(String),
221
222    /// Archive entry contains a `..` component that would escape `dest`.
223    #[error("archive entry '{entry}' contains '..' component — rejected")]
224    PathTraversal { entry: String },
225
226    /// Archive entry has an absolute path (root or drive prefix).
227    #[error("archive entry '{entry}' is an absolute path — rejected")]
228    AbsolutePath { entry: String },
229
230    /// Archive contains a symlink entry, which could be used to overwrite
231    /// arbitrary files outside `dest` on a follow-up write.
232    #[error("archive entry '{entry}' is a symlink — rejected")]
233    SymlinkRejected { entry: String },
234
235    /// Archive contains a hardlink entry, same threat model as symlinks.
236    #[error("archive entry '{entry}' is a hardlink — rejected")]
237    HardlinkRejected { entry: String },
238
239    /// Cumulative decompressed size exceeded the configured cap.
240    #[error("archive decompressed size {actual} exceeds limit of {limit} bytes")]
241    SizeLimitExceeded { limit: u64, actual: u64 },
242
243    /// Archive contains more entries than the configured cap allows.
244    #[error("archive contains more than {limit} entries — rejected")]
245    TooManyEntries { limit: u32 },
246}
247
248/// Options controlling archive extraction safety limits.
249///
250/// Construct with `UnpackOptions::default()` for strict defaults suitable for
251/// untrusted input. Override individual fields for known-trusted archives that
252/// legitimately exceed the default caps (e.g. packages that vendor large
253/// native dependencies).
254#[derive(Debug, Clone)]
255pub struct UnpackOptions {
256    /// Maximum total declared uncompressed size of all entries, in bytes.
257    /// Archives exceeding this are rejected as potential decompression bombs.
258    pub max_decompressed: u64,
259    /// Maximum ratio of total declared uncompressed size to compressed
260    /// archive size. Archives exceeding this are rejected.
261    pub max_ratio: u64,
262    /// Maximum number of entries in the archive. Guards against archives
263    /// that exhaust inodes or directory-entry limits via tiny-file spam.
264    pub max_entries: u32,
265}
266
267impl Default for UnpackOptions {
268    fn default() -> Self {
269        Self {
270            max_decompressed: 500 * 1024 * 1024,
271            max_ratio: 10,
272            max_entries: 10_000,
273        }
274    }
275}
276
277/// Load and parse a `package.toml` manifest from a package directory.
278///
279/// The type parameter `M` is the host's metadata schema. If the `[metadata]`
280/// section doesn't deserialize into `M`, this returns `PackageError::ParseError`.
281///
282/// # Example
283///
284/// ```ignore
285/// #[derive(Deserialize)]
286/// struct MySchema {
287///     category: String,
288///     min_host_version: String,
289/// }
290///
291/// let manifest = load_manifest::<MySchema>(Path::new("./my-package/"))?;
292/// println!("Package: {} v{}", manifest.package.name, manifest.package.version);
293/// println!("Category: {}", manifest.metadata.category);
294/// ```
295pub fn load_manifest<M: DeserializeOwned>(dir: &Path) -> Result<PackageManifest<M>, PackageError> {
296    let manifest_path = dir.join("package.toml");
297
298    if !manifest_path.exists() {
299        return Err(PackageError::ManifestNotFound {
300            path: dir.display().to_string(),
301        });
302    }
303
304    let content = std::fs::read_to_string(&manifest_path)?;
305    let manifest: PackageManifest<M> = toml::from_str(&content)?;
306    // Reject unknown runtime values + cross-section invariants. We do this
307    // here (not in serde) because the python-section presence depends on
308    // the runtime field, which serde can't express in a single derive.
309    manifest.package.runtime_strict()?;
310    manifest.validate_runtime()?;
311    Ok(manifest)
312}
313
314/// Load a manifest validating only the fixed header (accepting any metadata).
315///
316/// Uses `toml::Value` as the metadata type so any `[metadata]` section is accepted.
317/// Useful for CLI tools that validate structure without knowing the host's schema.
318pub fn load_manifest_untyped(dir: &Path) -> Result<PackageManifest<toml::Value>, PackageError> {
319    load_manifest::<toml::Value>(dir)
320}
321
322/// Compute a deterministic SHA-256 digest over all package source files.
323///
324/// Walks the package directory, collects all files (excluding `target/`,
325/// `.git/`, and `*.sig` files), sorts by relative path, and feeds each
326/// file's relative path and contents into a SHA-256 hasher.
327///
328/// The resulting 32-byte digest covers the entire package contents.
329/// Sign this digest to protect against tampering.
330pub fn package_digest(dir: &Path) -> Result<[u8; 32], PackageError> {
331    use sha2::{Digest, Sha256};
332
333    let mut files = Vec::new();
334    collect_files(dir, dir, &mut files)?;
335    files.sort();
336
337    let mut hasher = Sha256::new();
338    for rel_path in &files {
339        let abs_path = dir.join(rel_path);
340        let contents = std::fs::read(&abs_path)?;
341        // Hash the relative path (as UTF-8 bytes) then the file contents.
342        // Length-prefix both to prevent ambiguity.
343        let path_bytes = rel_path.as_bytes();
344        hasher.update((path_bytes.len() as u64).to_le_bytes());
345        hasher.update(path_bytes);
346        hasher.update((contents.len() as u64).to_le_bytes());
347        hasher.update(&contents);
348    }
349
350    Ok(hasher.finalize().into())
351}
352
353/// Recursively collect file paths relative to `root`, skipping excluded dirs/files.
354fn collect_files(root: &Path, dir: &Path, out: &mut Vec<String>) -> Result<(), PackageError> {
355    let entries = std::fs::read_dir(dir)?;
356    for entry in entries {
357        let entry = entry?;
358        let path = entry.path();
359        let name = entry.file_name();
360        let name_str = name.to_string_lossy();
361
362        // Skip excluded directories
363        if path.is_dir() {
364            if name_str == "target" || name_str == ".git" {
365                continue;
366            }
367            collect_files(root, &path, out)?;
368            continue;
369        }
370
371        // Skip signature files
372        if name_str.ends_with(".sig") {
373            continue;
374        }
375
376        // Store relative path using forward slashes for cross-platform determinism
377        let rel = path
378            .strip_prefix(root)
379            .expect("path is under root")
380            .to_string_lossy()
381            .replace('\\', "/");
382        out.push(rel);
383    }
384    Ok(())
385}
386
387/// Recursively collect file paths for archiving (includes `.sig` files).
388fn collect_archive_files(
389    root: &Path,
390    dir: &Path,
391    out: &mut Vec<String>,
392) -> Result<(), PackageError> {
393    let entries = std::fs::read_dir(dir)?;
394    for entry in entries {
395        let entry = entry?;
396        let path = entry.path();
397        let name = entry.file_name();
398        let name_str = name.to_string_lossy();
399
400        if path.is_dir() {
401            if name_str == "target" || name_str == ".git" {
402                continue;
403            }
404            collect_archive_files(root, &path, out)?;
405            continue;
406        }
407
408        let rel = path
409            .strip_prefix(root)
410            .expect("path is under root")
411            .to_string_lossy()
412            .replace('\\', "/");
413        out.push(rel);
414    }
415    Ok(())
416}
417
418/// Result of packing a package, including any warnings.
419#[derive(Debug)]
420pub struct PackResult {
421    /// Path to the created `.fid` archive.
422    pub path: PathBuf,
423    /// Whether the package was unsigned (no `package.sig` found).
424    pub unsigned: bool,
425}
426
427/// Vendor Python dependencies into `<dir>/vendor/` by invoking
428/// `python3 -m pip install -r <requirements> --target ./vendor/`.
429///
430/// - If `vendor/` already exists, leave it alone — the plugin author may have
431///   pre-vendored deliberately for reproducibility.
432/// - If the declared requirements file is missing AND `vendor/` is missing,
433///   emit a tracing warning and proceed (zero-dep python plugin).
434/// - If pip fails, surface its stderr as `PackageError::ArchiveError` so the
435///   user sees the resolver/build error directly.
436fn vendor_python_deps(dir: &Path, py: &PythonPackageMeta) -> Result<(), PackageError> {
437    let vendor_dir = dir.join("vendor");
438    if vendor_dir.exists() {
439        tracing::debug!(
440            vendor = %vendor_dir.display(),
441            "pre-existing vendor/ directory — using as-is, skipping pip"
442        );
443        return Ok(());
444    }
445
446    let req_path = dir.join(py.requirements_path());
447    if !req_path.exists() {
448        tracing::warn!(
449            package = %dir.display(),
450            requirements = %req_path.display(),
451            "python package has no requirements file and no vendor/ — packaging without deps"
452        );
453        return Ok(());
454    }
455
456    tracing::info!(
457        requirements = %req_path.display(),
458        vendor = %vendor_dir.display(),
459        "vendoring python deps via pip"
460    );
461
462    // `python3 -m pip` rather than bare `pip` so we use whichever interpreter
463    // happens to be on PATH and avoid relying on a separately-installed pip
464    // shim. `Command` invokes the binary directly, bypassing shell aliases.
465    let output = std::process::Command::new("python3")
466        .arg("-m")
467        .arg("pip")
468        .arg("install")
469        .arg("-r")
470        .arg(&req_path)
471        .arg("--target")
472        .arg(&vendor_dir)
473        .arg("--quiet")
474        .output()
475        .map_err(|e| {
476            PackageError::ArchiveError(format!(
477                "failed to invoke `python3 -m pip` (is python3 on PATH?): {e}"
478            ))
479        })?;
480
481    if !output.status.success() {
482        let stderr = String::from_utf8_lossy(&output.stderr);
483        return Err(PackageError::ArchiveError(format!(
484            "pip install failed (exit {}):\n{}",
485            output.status.code().unwrap_or(-1),
486            stderr.trim()
487        )));
488    }
489
490    Ok(())
491}
492
493/// Create a `.fid` archive (tar + bzip2) from a package directory.
494///
495/// The archive contains a single top-level directory `{name}-{version}/`
496/// with all source files. Excludes `target/` and `.git/` directories.
497/// Includes `package.sig` if present.
498///
499/// For Python packages (`runtime = "python"`), if a `requirements.txt` is
500/// declared and a `vendor/` directory does not yet exist, `pip install -r
501/// <requirements> --target ./vendor/` runs first and the result is included
502/// in the archive. Pre-existing `vendor/` is respected and used as-is.
503///
504/// If `output` is `None`, the archive is written to the current directory
505/// as `{name}-{version}.fid`.
506pub fn pack_package(dir: &Path, output: Option<&Path>) -> Result<PackResult, PackageError> {
507    use bzip2::write::BzEncoder;
508    use bzip2::Compression;
509
510    let manifest = load_manifest_untyped(dir)?;
511    let pkg = &manifest.package;
512    let prefix = format!("{}-{}", pkg.name, pkg.version);
513    let ext = pkg.extension();
514
515    // For Python packages: vendor declared deps into vendor/ before archiving.
516    // Pre-existing vendor/ is respected (plugin author may pre-vendor for
517    // reproducibility), missing requirements + missing vendor/ produces a
518    // tracing warning but is not fatal (a Python plugin with no deps is fine).
519    if matches!(pkg.runtime(), PackageRuntime::Python) {
520        if let Some(py_meta) = manifest.python.as_ref() {
521            vendor_python_deps(dir, py_meta)?;
522        }
523    }
524
525    let unsigned = !dir.join("package.sig").exists();
526
527    let out_path = match output {
528        Some(p) => p.to_path_buf(),
529        None => PathBuf::from(format!("{prefix}.{ext}")),
530    };
531
532    let file = std::fs::File::create(&out_path).map_err(|e| {
533        PackageError::ArchiveError(format!("failed to create {}: {e}", out_path.display()))
534    })?;
535
536    let encoder = BzEncoder::new(file, Compression::best());
537    let mut tar = tar::Builder::new(encoder);
538
539    let mut files = Vec::new();
540    collect_archive_files(dir, dir, &mut files)?;
541    files.sort();
542
543    for rel_path in &files {
544        let abs_path = dir.join(rel_path);
545        let archive_path = format!("{prefix}/{rel_path}");
546        tar.append_path_with_name(&abs_path, &archive_path)
547            .map_err(|e| PackageError::ArchiveError(format!("failed to add {rel_path}: {e}")))?;
548    }
549
550    tar.into_inner()
551        .map_err(|e| PackageError::ArchiveError(format!("failed to finish bz2 stream: {e}")))?
552        .finish()
553        .map_err(|e| PackageError::ArchiveError(format!("failed to finish bz2 stream: {e}")))?;
554
555    Ok(PackResult {
556        path: out_path,
557        unsigned,
558    })
559}
560
561/// Extract a `.fid` archive (tar + bzip2) to a destination directory using
562/// strict safety defaults.
563///
564/// Returns the path to the extracted top-level package directory, which is
565/// guaranteed to exist inside `dest` and contain a `package.toml`.
566///
567/// This function validates every archive entry before extracting and rejects
568/// archives containing: path-traversal components (`..`), absolute paths,
569/// symlinks, hardlinks, more than 10,000 entries, or a cumulative declared
570/// decompressed size exceeding 500 MB or 10× the compressed archive size.
571///
572/// Extraction is staged inside a temporary directory under `dest` and the
573/// package directory is moved into place atomically on success. If validation
574/// fails mid-archive, no files are left in `dest`.
575///
576/// For archives that legitimately exceed the default caps, use
577/// [`unpack_package_with_options`].
578pub fn unpack_package(archive: &Path, dest: &Path) -> Result<PathBuf, PackageError> {
579    unpack_package_with_options(archive, dest, &UnpackOptions::default())
580}
581
582/// Extract a `.fid` archive with caller-provided safety limits.
583///
584/// See [`unpack_package`] for the default-strict variant. Use this when the
585/// archive's size or entry count legitimately exceeds the defaults.
586pub fn unpack_package_with_options(
587    archive: &Path,
588    dest: &Path,
589    options: &UnpackOptions,
590) -> Result<PathBuf, PackageError> {
591    use bzip2::read::BzDecoder;
592    use std::path::Component;
593
594    let file = std::fs::File::open(archive).map_err(|e| {
595        PackageError::ArchiveError(format!("failed to open {}: {e}", archive.display()))
596    })?;
597    let compressed_size = file.metadata().map(|m| m.len()).unwrap_or(0);
598
599    let decoder = BzDecoder::new(file);
600    let mut tar = tar::Archive::new(decoder);
601
602    // Stage extraction inside `dest` so a failed or rejected archive leaves
603    // nothing behind. `dest` must already exist.
604    std::fs::create_dir_all(dest).map_err(PackageError::Io)?;
605    let staging = tempfile::TempDir::new_in(dest).map_err(PackageError::Io)?;
606    let staging_path = staging.path();
607
608    let ratio_cap = compressed_size.saturating_mul(options.max_ratio);
609    let mut total: u64 = 0;
610    let mut count: u32 = 0;
611
612    let entries = tar.entries().map_err(|e| {
613        PackageError::ArchiveError(format!("failed to read {}: {e}", archive.display()))
614    })?;
615
616    for entry in entries {
617        let mut entry = entry.map_err(|e| {
618            PackageError::ArchiveError(format!("failed to read archive entry: {e}"))
619        })?;
620
621        count = count.saturating_add(1);
622        if count > options.max_entries {
623            return Err(PackageError::TooManyEntries {
624                limit: options.max_entries,
625            });
626        }
627
628        let path = entry
629            .path()
630            .map_err(|e| PackageError::ArchiveError(format!("invalid entry path: {e}")))?
631            .into_owned();
632        let entry_display = path.display().to_string();
633
634        // 1. Reject link entries. A symlink or hardlink followed by a regular
635        // file at the same path can overwrite files outside `dest`.
636        let entry_type = entry.header().entry_type();
637        if entry_type.is_symlink() {
638            return Err(PackageError::SymlinkRejected {
639                entry: entry_display,
640            });
641        }
642        if entry_type.is_hard_link() {
643            return Err(PackageError::HardlinkRejected {
644                entry: entry_display,
645            });
646        }
647
648        // 2. Reject `..` components and absolute paths. The `tar` crate has
649        // best-effort guards but they are platform-dependent; check explicitly.
650        for component in path.components() {
651            match component {
652                Component::ParentDir => {
653                    return Err(PackageError::PathTraversal {
654                        entry: entry_display,
655                    });
656                }
657                Component::RootDir | Component::Prefix(_) => {
658                    return Err(PackageError::AbsolutePath {
659                        entry: entry_display,
660                    });
661                }
662                _ => {}
663            }
664        }
665
666        // 3. Enforce cumulative declared-size budget. Tar's own parsing
667        // enforces that actual entry bytes match the declared header size,
668        // so trusting the header here is safe against bomb archives.
669        let declared = entry.header().size().unwrap_or(0);
670        total = total.saturating_add(declared);
671        if total > options.max_decompressed {
672            return Err(PackageError::SizeLimitExceeded {
673                limit: options.max_decompressed,
674                actual: total,
675            });
676        }
677        if compressed_size > 0 && options.max_ratio > 0 && total > ratio_cap {
678            return Err(PackageError::SizeLimitExceeded {
679                limit: ratio_cap,
680                actual: total,
681            });
682        }
683
684        // 4. Extract into the staging area. `unpack_in` itself rejects paths
685        // that escape the base directory, but our explicit checks above mean
686        // we never get here with a dangerous path.
687        entry.unpack_in(staging_path).map_err(|e| {
688            PackageError::ArchiveError(format!("failed to extract entry '{}': {e}", path.display()))
689        })?;
690    }
691
692    // Find the top-level package directory inside staging.
693    let mut pkg_dir_staging: Option<PathBuf> = None;
694    for entry in std::fs::read_dir(staging_path).map_err(PackageError::Io)? {
695        let entry = entry.map_err(PackageError::Io)?;
696        let path = entry.path();
697        if path.is_dir() && path.join("package.toml").exists() {
698            pkg_dir_staging = Some(path);
699            break;
700        }
701    }
702    let pkg_dir_staging = pkg_dir_staging.ok_or_else(|| {
703        PackageError::InvalidArchive("archive does not contain a package.toml".to_string())
704    })?;
705
706    // Atomically move the validated package directory to its final location
707    // inside `dest`. If a directory with the same name already exists it is
708    // removed first, matching the prior `tar::Archive::unpack` behaviour.
709    let pkg_name = pkg_dir_staging
710        .file_name()
711        .ok_or_else(|| {
712            PackageError::InvalidArchive("extracted package has no directory name".to_string())
713        })?
714        .to_os_string();
715    let final_path = dest.join(&pkg_name);
716    if final_path.exists() {
717        std::fs::remove_dir_all(&final_path).map_err(PackageError::Io)?;
718    }
719    std::fs::rename(&pkg_dir_staging, &final_path).map_err(PackageError::Io)?;
720
721    // `staging` TempDir drops here; any residual files are cleaned up.
722    Ok(final_path)
723}
724
725#[cfg(test)]
726mod tests {
727    use super::*;
728    use tempfile::TempDir;
729
730    fn write_manifest(dir: &Path, content: &str) {
731        std::fs::write(dir.join("package.toml"), content).unwrap();
732    }
733
734    #[derive(Debug, Deserialize, PartialEq)]
735    struct TestMeta {
736        category: String,
737        #[serde(default)]
738        tags: Vec<String>,
739    }
740
741    #[test]
742    fn valid_manifest_parses() {
743        let tmp = TempDir::new().unwrap();
744        write_manifest(
745            tmp.path(),
746            r#"
747            [package]
748            name = "test-pkg"
749            version = "1.0.0"
750            interface = "my-api"
751            interface_version = 1
752
753            [metadata]
754            category = "testing"
755            tags = ["a", "b"]
756            "#,
757        );
758
759        let m = load_manifest::<TestMeta>(tmp.path()).unwrap();
760        assert_eq!(m.package.name, "test-pkg");
761        assert_eq!(m.package.version, "1.0.0");
762        assert_eq!(m.package.interface, "my-api");
763        assert_eq!(m.package.interface_version, 1);
764        assert_eq!(m.metadata.category, "testing");
765        assert_eq!(m.metadata.tags, vec!["a", "b"]);
766    }
767
768    #[test]
769    fn missing_required_metadata_field_fails() {
770        let tmp = TempDir::new().unwrap();
771        write_manifest(
772            tmp.path(),
773            r#"
774            [package]
775            name = "bad-pkg"
776            version = "1.0.0"
777            interface = "my-api"
778            interface_version = 1
779
780            [metadata]
781            # missing required "category" field
782            tags = ["x"]
783            "#,
784        );
785
786        let result = load_manifest::<TestMeta>(tmp.path());
787        assert!(result.is_err());
788        let err = result.unwrap_err().to_string();
789        assert!(
790            err.contains("category"),
791            "error should mention missing field: {err}"
792        );
793    }
794
795    #[test]
796    fn missing_manifest_returns_not_found() {
797        let tmp = TempDir::new().unwrap();
798        let result = load_manifest::<TestMeta>(tmp.path());
799        assert!(matches!(result, Err(PackageError::ManifestNotFound { .. })));
800    }
801
802    #[test]
803    fn extra_metadata_fields_ignored() {
804        let tmp = TempDir::new().unwrap();
805        write_manifest(
806            tmp.path(),
807            r#"
808            [package]
809            name = "extra-pkg"
810            version = "1.0.0"
811            interface = "my-api"
812            interface_version = 1
813
814            [metadata]
815            category = "testing"
816            unknown_field = "ignored"
817            "#,
818        );
819
820        // TestMeta doesn't have unknown_field — should still parse (serde ignores unknown by default)
821        let m = load_manifest::<TestMeta>(tmp.path());
822        assert!(m.is_ok());
823        assert_eq!(m.unwrap().metadata.category, "testing");
824    }
825
826    #[test]
827    fn untyped_manifest_accepts_any_metadata() {
828        let tmp = TempDir::new().unwrap();
829        write_manifest(
830            tmp.path(),
831            r#"
832            [package]
833            name = "any-pkg"
834            version = "1.0.0"
835            interface = "my-api"
836            interface_version = 1
837
838            [metadata]
839            foo = "bar"
840            count = 42
841            nested = { a = 1, b = 2 }
842            "#,
843        );
844
845        let m = load_manifest_untyped(tmp.path()).unwrap();
846        assert_eq!(m.package.name, "any-pkg");
847        assert!(m.metadata.is_table());
848    }
849
850    #[test]
851    fn digest_is_deterministic() {
852        let tmp = TempDir::new().unwrap();
853        write_manifest(tmp.path(), "[package]\nname = \"test\"\nversion = \"1.0.0\"\ninterface = \"api\"\ninterface_version = 1\n\n[metadata]\nk = \"v\"\n");
854        std::fs::write(tmp.path().join("src.rs"), b"fn main() {}").unwrap();
855
856        let d1 = package_digest(tmp.path()).unwrap();
857        let d2 = package_digest(tmp.path()).unwrap();
858        assert_eq!(d1, d2);
859    }
860
861    #[test]
862    fn digest_changes_on_file_modification() {
863        let tmp = TempDir::new().unwrap();
864        write_manifest(tmp.path(), "[package]\nname = \"test\"\nversion = \"1.0.0\"\ninterface = \"api\"\ninterface_version = 1\n\n[metadata]\nk = \"v\"\n");
865        std::fs::write(tmp.path().join("src.rs"), b"fn main() {}").unwrap();
866
867        let d1 = package_digest(tmp.path()).unwrap();
868
869        std::fs::write(tmp.path().join("src.rs"), b"fn main() { evil() }").unwrap();
870        let d2 = package_digest(tmp.path()).unwrap();
871
872        assert_ne!(d1, d2);
873    }
874
875    #[test]
876    fn digest_excludes_target_and_sig() {
877        let tmp = TempDir::new().unwrap();
878        write_manifest(tmp.path(), "[package]\nname = \"test\"\nversion = \"1.0.0\"\ninterface = \"api\"\ninterface_version = 1\n\n[metadata]\nk = \"v\"\n");
879        std::fs::write(tmp.path().join("src.rs"), b"fn main() {}").unwrap();
880
881        let d1 = package_digest(tmp.path()).unwrap();
882
883        // Adding target/ dir and .sig file should not change digest
884        std::fs::create_dir(tmp.path().join("target")).unwrap();
885        std::fs::write(tmp.path().join("target/output.dylib"), b"binary").unwrap();
886        std::fs::write(tmp.path().join("package.sig"), b"sig bytes").unwrap();
887
888        let d2 = package_digest(tmp.path()).unwrap();
889        assert_eq!(d1, d2);
890    }
891
892    fn make_package(dir: &Path) {
893        write_manifest(
894            dir,
895            r#"
896            [package]
897            name = "test-pkg"
898            version = "2.0.0"
899            interface = "my-api"
900            interface_version = 1
901
902            [metadata]
903            category = "testing"
904            "#,
905        );
906        std::fs::create_dir_all(dir.join("src")).unwrap();
907        std::fs::write(dir.join("src/lib.rs"), b"fn hello() {}").unwrap();
908    }
909
910    #[test]
911    fn pack_unpack_round_trip() {
912        let pkg_dir = TempDir::new().unwrap();
913        make_package(pkg_dir.path());
914
915        let out_dir = TempDir::new().unwrap();
916        let fid_path = out_dir.path().join("test-pkg-2.0.0.fid");
917
918        let result = pack_package(pkg_dir.path(), Some(&fid_path)).unwrap();
919        assert_eq!(result.path, fid_path);
920        assert!(fid_path.exists());
921        assert!(result.unsigned);
922
923        let extract_dir = TempDir::new().unwrap();
924        let extracted = unpack_package(&fid_path, extract_dir.path()).unwrap();
925
926        assert!(extracted.join("package.toml").exists());
927        assert!(extracted.join("src/lib.rs").exists());
928        assert_eq!(
929            extracted.file_name().unwrap().to_str().unwrap(),
930            "test-pkg-2.0.0"
931        );
932    }
933
934    #[test]
935    fn pack_includes_sig_file() {
936        let pkg_dir = TempDir::new().unwrap();
937        make_package(pkg_dir.path());
938        std::fs::write(pkg_dir.path().join("package.sig"), b"fake-sig").unwrap();
939
940        let out_dir = TempDir::new().unwrap();
941        let fid_path = out_dir.path().join("out.fid");
942
943        let result = pack_package(pkg_dir.path(), Some(&fid_path)).unwrap();
944        assert!(!result.unsigned);
945
946        let extract_dir = TempDir::new().unwrap();
947        let extracted = unpack_package(&fid_path, extract_dir.path()).unwrap();
948        assert!(extracted.join("package.sig").exists());
949    }
950
951    #[test]
952    fn pack_excludes_target_and_git() {
953        let pkg_dir = TempDir::new().unwrap();
954        make_package(pkg_dir.path());
955        std::fs::create_dir(pkg_dir.path().join("target")).unwrap();
956        std::fs::write(pkg_dir.path().join("target/out.dylib"), b"bin").unwrap();
957        std::fs::create_dir(pkg_dir.path().join(".git")).unwrap();
958        std::fs::write(pkg_dir.path().join(".git/HEAD"), b"ref").unwrap();
959
960        let out_dir = TempDir::new().unwrap();
961        let fid_path = out_dir.path().join("out.fid");
962        pack_package(pkg_dir.path(), Some(&fid_path)).unwrap();
963
964        let extract_dir = TempDir::new().unwrap();
965        let extracted = unpack_package(&fid_path, extract_dir.path()).unwrap();
966        assert!(!extracted.join("target").exists());
967        assert!(!extracted.join(".git").exists());
968    }
969
970    #[test]
971    fn unpack_invalid_archive_no_manifest() {
972        let pkg_dir = TempDir::new().unwrap();
973        // Create a valid bz2 tar but with no package.toml
974        std::fs::create_dir_all(pkg_dir.path().join("src")).unwrap();
975        std::fs::write(pkg_dir.path().join("src/lib.rs"), b"fn x() {}").unwrap();
976
977        let out_dir = TempDir::new().unwrap();
978        let fid_path = out_dir.path().join("bad.fid");
979
980        // Manually create a tar.bz2 without package.toml
981        {
982            use bzip2::write::BzEncoder;
983            use bzip2::Compression;
984
985            let file = std::fs::File::create(&fid_path).unwrap();
986            let encoder = BzEncoder::new(file, Compression::default());
987            let mut tar = tar::Builder::new(encoder);
988            tar.append_path_with_name(
989                pkg_dir.path().join("src/lib.rs"),
990                "no-manifest-1.0.0/src/lib.rs",
991            )
992            .unwrap();
993            tar.into_inner().unwrap().finish().unwrap();
994        }
995
996        let extract_dir = TempDir::new().unwrap();
997        let result = unpack_package(&fid_path, extract_dir.path());
998        assert!(result.is_err());
999        let err = result.unwrap_err().to_string();
1000        assert!(err.contains("package.toml"), "error was: {err}");
1001    }
1002
1003    #[test]
1004    fn pack_default_output_name() {
1005        let pkg_dir = TempDir::new().unwrap();
1006        make_package(pkg_dir.path());
1007
1008        let out_dir = TempDir::new().unwrap();
1009        let out_path = out_dir.path().join("test-pkg-2.0.0.fid");
1010
1011        let result = pack_package(pkg_dir.path(), Some(&out_path)).unwrap();
1012        assert_eq!(result.path, out_path);
1013        assert!(out_path.exists());
1014    }
1015
1016    #[test]
1017    fn pack_custom_extension() {
1018        let pkg_dir = TempDir::new().unwrap();
1019        write_manifest(
1020            pkg_dir.path(),
1021            r#"
1022            [package]
1023            name = "my-plugin"
1024            version = "0.3.0"
1025            interface = "my-api"
1026            interface_version = 1
1027            extension = "cloacina"
1028
1029            [metadata]
1030            category = "testing"
1031            "#,
1032        );
1033        std::fs::create_dir_all(pkg_dir.path().join("src")).unwrap();
1034        std::fs::write(pkg_dir.path().join("src/lib.rs"), b"fn hello() {}").unwrap();
1035
1036        let out_dir = TempDir::new().unwrap();
1037        let out_path = out_dir.path().join("my-plugin-0.3.0.cloacina");
1038
1039        let result = pack_package(pkg_dir.path(), Some(&out_path)).unwrap();
1040        assert_eq!(result.path, out_path);
1041        assert!(out_path.exists());
1042
1043        // Verify it unpacks correctly
1044        let extract_dir = TempDir::new().unwrap();
1045        let extracted = unpack_package(&out_path, extract_dir.path()).unwrap();
1046        assert!(extracted.join("package.toml").exists());
1047    }
1048
1049    #[test]
1050    fn extension_defaults_to_fid() {
1051        let header = PackageHeader {
1052            name: "test".to_string(),
1053            version: "1.0.0".to_string(),
1054            interface: "api".to_string(),
1055            interface_version: 1,
1056            extension: None,
1057            runtime: None,
1058        };
1059        assert_eq!(header.extension(), "fid");
1060
1061        let header_custom = PackageHeader {
1062            extension: Some("cloacina".to_string()),
1063            ..header
1064        };
1065        assert_eq!(header_custom.extension(), "cloacina");
1066    }
1067
1068    // ---- Python runtime manifest tests ----
1069
1070    #[test]
1071    fn rust_runtime_default_when_absent() {
1072        let tmp = TempDir::new().unwrap();
1073        write_manifest(
1074            tmp.path(),
1075            r#"
1076            [package]
1077            name = "rust-pkg"
1078            version = "1.0.0"
1079            interface = "api"
1080            interface_version = 1
1081
1082            [metadata]
1083            category = "rust"
1084            "#,
1085        );
1086        let m = load_manifest::<TestMeta>(tmp.path()).unwrap();
1087        assert_eq!(m.package.runtime(), PackageRuntime::Rust);
1088        assert!(m.python.is_none());
1089    }
1090
1091    #[test]
1092    fn python_runtime_with_python_section_parses() {
1093        let tmp = TempDir::new().unwrap();
1094        write_manifest(
1095            tmp.path(),
1096            r#"
1097            [package]
1098            name = "py-pkg"
1099            version = "0.1.0"
1100            interface = "api"
1101            interface_version = 1
1102            runtime = "python"
1103
1104            [metadata]
1105            category = "python"
1106
1107            [python]
1108            entry_module = "py_pkg.entry"
1109            requirements = "deps.txt"
1110            "#,
1111        );
1112        let m = load_manifest::<TestMeta>(tmp.path()).unwrap();
1113        assert_eq!(m.package.runtime(), PackageRuntime::Python);
1114        let py = m.python.as_ref().expect("python section");
1115        assert_eq!(py.entry_module, "py_pkg.entry");
1116        assert_eq!(py.requirements_path(), "deps.txt");
1117    }
1118
1119    #[test]
1120    fn python_runtime_requirements_default() {
1121        let tmp = TempDir::new().unwrap();
1122        write_manifest(
1123            tmp.path(),
1124            r#"
1125            [package]
1126            name = "py-pkg"
1127            version = "0.1.0"
1128            interface = "api"
1129            interface_version = 1
1130            runtime = "python"
1131
1132            [metadata]
1133            category = "python"
1134
1135            [python]
1136            entry_module = "py_pkg.entry"
1137            "#,
1138        );
1139        let m = load_manifest::<TestMeta>(tmp.path()).unwrap();
1140        assert_eq!(
1141            m.python.as_ref().unwrap().requirements_path(),
1142            "requirements.txt"
1143        );
1144    }
1145
1146    #[test]
1147    fn python_runtime_without_python_section_rejected() {
1148        let tmp = TempDir::new().unwrap();
1149        write_manifest(
1150            tmp.path(),
1151            r#"
1152            [package]
1153            name = "py-pkg"
1154            version = "0.1.0"
1155            interface = "api"
1156            interface_version = 1
1157            runtime = "python"
1158
1159            [metadata]
1160            category = "python"
1161            "#,
1162        );
1163        let err = load_manifest::<TestMeta>(tmp.path()).unwrap_err();
1164        match err {
1165            PackageError::InvalidManifest(msg) => {
1166                assert!(
1167                    msg.contains("entry_module"),
1168                    "expected message about entry_module, got: {msg}"
1169                );
1170            }
1171            other => panic!("expected InvalidManifest, got {other:?}"),
1172        }
1173    }
1174
1175    #[test]
1176    fn python_section_without_python_runtime_rejected() {
1177        let tmp = TempDir::new().unwrap();
1178        write_manifest(
1179            tmp.path(),
1180            r#"
1181            [package]
1182            name = "rust-pkg"
1183            version = "1.0.0"
1184            interface = "api"
1185            interface_version = 1
1186
1187            [metadata]
1188            category = "rust"
1189
1190            [python]
1191            entry_module = "py_pkg.entry"
1192            "#,
1193        );
1194        let err = load_manifest::<TestMeta>(tmp.path()).unwrap_err();
1195        assert!(matches!(err, PackageError::InvalidManifest(_)));
1196    }
1197
1198    #[test]
1199    fn unknown_runtime_rejected() {
1200        let tmp = TempDir::new().unwrap();
1201        write_manifest(
1202            tmp.path(),
1203            r#"
1204            [package]
1205            name = "node-pkg"
1206            version = "0.1.0"
1207            interface = "api"
1208            interface_version = 1
1209            runtime = "node"
1210
1211            [metadata]
1212            category = "node"
1213            "#,
1214        );
1215        let err = load_manifest::<TestMeta>(tmp.path()).unwrap_err();
1216        match err {
1217            PackageError::InvalidManifest(msg) => {
1218                assert!(msg.contains("node"), "got: {msg}");
1219            }
1220            other => panic!("expected InvalidManifest, got {other:?}"),
1221        }
1222    }
1223
1224    #[test]
1225    fn package_runtime_display_and_str() {
1226        assert_eq!(PackageRuntime::Rust.as_str(), "rust");
1227        assert_eq!(PackageRuntime::Python.as_str(), "python");
1228        assert_eq!(format!("{}", PackageRuntime::Python), "python");
1229    }
1230
1231    // ---- Attack-class tests for unpack_package ----
1232
1233    use bzip2::write::BzEncoder;
1234    use bzip2::Compression;
1235    use std::io::Read;
1236    use tar::{EntryType, Header};
1237
1238    /// Build a bz2-compressed tar archive from a builder callback.
1239    fn build_archive<F>(path: &Path, build: F)
1240    where
1241        F: FnOnce(&mut tar::Builder<BzEncoder<std::fs::File>>),
1242    {
1243        let file = std::fs::File::create(path).unwrap();
1244        let encoder = BzEncoder::new(file, Compression::default());
1245        let mut tar = tar::Builder::new(encoder);
1246        build(&mut tar);
1247        tar.into_inner().unwrap().finish().unwrap();
1248    }
1249
1250    /// Write a raw entry name directly into a GNU tar header, bypassing
1251    /// `set_path`'s safety validation. This is only safe in tests where we
1252    /// deliberately craft malicious paths.
1253    fn write_name(header: &mut Header, path: &str) {
1254        let gnu = header.as_gnu_mut().expect("gnu header");
1255        let bytes = path.as_bytes();
1256        assert!(bytes.len() < gnu.name.len(), "test path too long");
1257        for slot in gnu.name.iter_mut() {
1258            *slot = 0;
1259        }
1260        gnu.name[..bytes.len()].copy_from_slice(bytes);
1261    }
1262
1263    fn write_linkname(header: &mut Header, link: &str) {
1264        let gnu = header.as_gnu_mut().expect("gnu header");
1265        let bytes = link.as_bytes();
1266        assert!(bytes.len() < gnu.linkname.len(), "test linkname too long");
1267        for slot in gnu.linkname.iter_mut() {
1268            *slot = 0;
1269        }
1270        gnu.linkname[..bytes.len()].copy_from_slice(bytes);
1271    }
1272
1273    /// Append a regular file entry with explicit path and content bytes.
1274    /// Uses the low-level name-writing helper so arbitrary (including
1275    /// malicious) paths can be tested.
1276    fn append_regular(tar: &mut tar::Builder<BzEncoder<std::fs::File>>, path: &str, data: &[u8]) {
1277        let mut header = Header::new_gnu();
1278        write_name(&mut header, path);
1279        header.set_size(data.len() as u64);
1280        header.set_mode(0o644);
1281        header.set_entry_type(EntryType::Regular);
1282        header.set_cksum();
1283        tar.append(&header, data).unwrap();
1284    }
1285
1286    /// Append a link entry with a chosen EntryType (symlink/hardlink).
1287    fn append_link(
1288        tar: &mut tar::Builder<BzEncoder<std::fs::File>>,
1289        path: &str,
1290        link_target: &str,
1291        ty: EntryType,
1292    ) {
1293        let mut header = Header::new_gnu();
1294        write_name(&mut header, path);
1295        write_linkname(&mut header, link_target);
1296        header.set_size(0);
1297        header.set_mode(0o644);
1298        header.set_entry_type(ty);
1299        header.set_cksum();
1300        tar.append(&header, std::io::empty()).unwrap();
1301    }
1302
1303    #[test]
1304    fn unpack_rejects_parent_dir_component() {
1305        let out = TempDir::new().unwrap();
1306        let archive = out.path().join("evil.fid");
1307        build_archive(&archive, |tar| {
1308            append_regular(tar, "../escaped", b"pwn");
1309        });
1310
1311        let extract = TempDir::new().unwrap();
1312        let err = unpack_package(&archive, extract.path()).unwrap_err();
1313        assert!(
1314            matches!(err, PackageError::PathTraversal { .. }),
1315            "expected PathTraversal, got: {err:?}"
1316        );
1317        // Nothing leaked outside staging.
1318        assert!(!out.path().join("escaped").exists());
1319    }
1320
1321    #[test]
1322    fn unpack_rejects_absolute_path() {
1323        let out = TempDir::new().unwrap();
1324        let archive = out.path().join("evil.fid");
1325        build_archive(&archive, |tar| {
1326            append_regular(tar, "/tmp/fidius-escape", b"pwn");
1327        });
1328
1329        let extract = TempDir::new().unwrap();
1330        let err = unpack_package(&archive, extract.path()).unwrap_err();
1331        assert!(
1332            matches!(err, PackageError::AbsolutePath { .. }),
1333            "expected AbsolutePath, got: {err:?}"
1334        );
1335    }
1336
1337    #[test]
1338    fn unpack_rejects_symlink() {
1339        let out = TempDir::new().unwrap();
1340        let archive = out.path().join("evil.fid");
1341        build_archive(&archive, |tar| {
1342            append_link(tar, "link", "/etc/passwd", EntryType::Symlink);
1343        });
1344
1345        let extract = TempDir::new().unwrap();
1346        let err = unpack_package(&archive, extract.path()).unwrap_err();
1347        assert!(
1348            matches!(err, PackageError::SymlinkRejected { .. }),
1349            "expected SymlinkRejected, got: {err:?}"
1350        );
1351    }
1352
1353    #[test]
1354    fn unpack_rejects_hardlink() {
1355        let out = TempDir::new().unwrap();
1356        let archive = out.path().join("evil.fid");
1357        build_archive(&archive, |tar| {
1358            append_link(tar, "link", "existing-file", EntryType::Link);
1359        });
1360
1361        let extract = TempDir::new().unwrap();
1362        let err = unpack_package(&archive, extract.path()).unwrap_err();
1363        assert!(
1364            matches!(err, PackageError::HardlinkRejected { .. }),
1365            "expected HardlinkRejected, got: {err:?}"
1366        );
1367    }
1368
1369    #[test]
1370    fn unpack_symlink_then_file_rejected_at_first_entry() {
1371        // Classic symlink-overwrite attack: entry 1 is a symlink to /tmp/foo,
1372        // entry 2 is a regular file at the same path. Our checks reject entry 1
1373        // so entry 2 is never extracted.
1374        let out = TempDir::new().unwrap();
1375        let sentinel_dir = TempDir::new().unwrap();
1376        let sentinel = sentinel_dir.path().join("target");
1377        std::fs::write(&sentinel, b"original").unwrap();
1378
1379        let archive = out.path().join("evil.fid");
1380        build_archive(&archive, |tar| {
1381            append_link(tar, "bad", sentinel.to_str().unwrap(), EntryType::Symlink);
1382            append_regular(tar, "bad", b"clobber");
1383        });
1384
1385        let extract = TempDir::new().unwrap();
1386        let err = unpack_package(&archive, extract.path()).unwrap_err();
1387        assert!(matches!(err, PackageError::SymlinkRejected { .. }));
1388
1389        // The sentinel file outside the extraction directory is untouched.
1390        assert_eq!(std::fs::read(&sentinel).unwrap(), b"original");
1391    }
1392
1393    #[test]
1394    fn unpack_rejects_declared_size_bomb() {
1395        let out = TempDir::new().unwrap();
1396        let archive = out.path().join("bomb.fid");
1397
1398        // Build a tar manually where a header declares a size far above the cap,
1399        // then write matching zero bytes so tar parsing stays consistent.
1400        let file = std::fs::File::create(&archive).unwrap();
1401        let encoder = BzEncoder::new(file, Compression::best());
1402        let mut tar = tar::Builder::new(encoder);
1403
1404        let declared: u64 = 600 * 1024 * 1024; // > 500 MB default cap
1405        let mut header = Header::new_gnu();
1406        header.set_path("bomb.bin").unwrap();
1407        header.set_size(declared);
1408        header.set_mode(0o644);
1409        header.set_entry_type(EntryType::Regular);
1410        header.set_cksum();
1411
1412        // Use a zero-filled reader so the compressed size stays tiny.
1413        let zeros = std::io::repeat(0u8).take(declared);
1414        tar.append(&header, zeros).unwrap();
1415        tar.into_inner().unwrap().finish().unwrap();
1416
1417        let extract = TempDir::new().unwrap();
1418        let err = unpack_package(&archive, extract.path()).unwrap_err();
1419        assert!(
1420            matches!(err, PackageError::SizeLimitExceeded { .. }),
1421            "expected SizeLimitExceeded, got: {err:?}"
1422        );
1423    }
1424
1425    #[test]
1426    fn unpack_rejects_ratio_bomb() {
1427        // Small compressed archive with many small entries whose cumulative
1428        // declared size exceeds `compressed_size * max_ratio` but is still
1429        // under the absolute cap — should be rejected by the ratio check.
1430        let out = TempDir::new().unwrap();
1431        let archive = out.path().join("ratio.fid");
1432
1433        // Default max_ratio is 10. Use a 4 KB-per-entry file that compresses well.
1434        let payload = vec![b'A'; 4096];
1435        build_archive(&archive, |tar| {
1436            for i in 0..10_000u32 {
1437                append_regular(tar, &format!("file-{i:05}.txt"), &payload);
1438            }
1439        });
1440
1441        let extract = TempDir::new().unwrap();
1442        // Tighten both caps so this triggers on ratio rather than absolute cap.
1443        let options = UnpackOptions {
1444            max_decompressed: u64::MAX,
1445            max_ratio: 2,
1446            max_entries: 20_000,
1447        };
1448        let err = unpack_package_with_options(&archive, extract.path(), &options).unwrap_err();
1449        assert!(
1450            matches!(err, PackageError::SizeLimitExceeded { .. }),
1451            "expected SizeLimitExceeded, got: {err:?}"
1452        );
1453    }
1454
1455    #[test]
1456    fn unpack_rejects_too_many_entries() {
1457        let out = TempDir::new().unwrap();
1458        let archive = out.path().join("spam.fid");
1459        build_archive(&archive, |tar| {
1460            for i in 0..50u32 {
1461                append_regular(tar, &format!("f-{i}"), b"");
1462            }
1463        });
1464
1465        let extract = TempDir::new().unwrap();
1466        let options = UnpackOptions {
1467            max_entries: 10,
1468            ..UnpackOptions::default()
1469        };
1470        let err = unpack_package_with_options(&archive, extract.path(), &options).unwrap_err();
1471        assert!(
1472            matches!(err, PackageError::TooManyEntries { limit: 10 }),
1473            "expected TooManyEntries, got: {err:?}"
1474        );
1475    }
1476
1477    #[test]
1478    fn unpack_staging_cleans_up_on_rejection() {
1479        let out = TempDir::new().unwrap();
1480        let archive = out.path().join("evil.fid");
1481        build_archive(&archive, |tar| {
1482            append_regular(tar, "ok/file.txt", b"ok");
1483            append_regular(tar, "../escape", b"bad");
1484        });
1485
1486        let extract = TempDir::new().unwrap();
1487        let _ = unpack_package(&archive, extract.path()).unwrap_err();
1488
1489        // After rejection `extract` must be empty — the partial `ok/` tree
1490        // lived in a TempDir that has since been dropped.
1491        let remaining: Vec<_> = std::fs::read_dir(extract.path())
1492            .unwrap()
1493            .collect::<Result<_, _>>()
1494            .unwrap();
1495        assert!(
1496            remaining.is_empty(),
1497            "extraction dir not cleaned up: {remaining:?}"
1498        );
1499    }
1500
1501    #[test]
1502    fn unpack_with_options_accepts_large_archive() {
1503        // Round-trip a legitimate package under a looser cap to exercise the
1504        // options path end-to-end.
1505        let pkg_dir = TempDir::new().unwrap();
1506        make_package(pkg_dir.path());
1507
1508        let out_dir = TempDir::new().unwrap();
1509        let fid_path = out_dir.path().join("ok.fid");
1510        pack_package(pkg_dir.path(), Some(&fid_path)).unwrap();
1511
1512        let extract = TempDir::new().unwrap();
1513        let options = UnpackOptions {
1514            max_decompressed: u64::MAX,
1515            max_ratio: u64::MAX,
1516            max_entries: u32::MAX,
1517        };
1518        let extracted = unpack_package_with_options(&fid_path, extract.path(), &options).unwrap();
1519        assert!(extracted.join("package.toml").exists());
1520    }
1521
1522    // ---- Python pack-time vendoring tests ----
1523
1524    /// Build a minimal Python package directory (manifest + entry .py).
1525    fn make_python_package(dir: &Path, with_requirements: Option<&str>) {
1526        let req_line = if with_requirements.is_some() {
1527            "requirements = \"requirements.txt\"\n"
1528        } else {
1529            ""
1530        };
1531        write_manifest(
1532            dir,
1533            &format!(
1534                r#"
1535                [package]
1536                name = "py-pack-test"
1537                version = "0.1.0"
1538                interface = "api"
1539                interface_version = 1
1540                runtime = "python"
1541
1542                [metadata]
1543                category = "python"
1544
1545                [python]
1546                entry_module = "py_pack_test"
1547                {req_line}
1548                "#
1549            ),
1550        );
1551        std::fs::write(
1552            dir.join("py_pack_test.py"),
1553            b"def hello():\n    return 'hi'\n",
1554        )
1555        .unwrap();
1556        if let Some(req) = with_requirements {
1557            std::fs::write(dir.join("requirements.txt"), req.as_bytes()).unwrap();
1558        }
1559    }
1560
1561    #[test]
1562    fn pack_python_with_prevendored_directory_skips_pip() {
1563        // If vendor/ is present we don't invoke pip — even with a requirements
1564        // file pointing at something pip couldn't possibly resolve. Simulating
1565        // pre-vendoring by hand.
1566        let pkg_dir = TempDir::new().unwrap();
1567        make_python_package(
1568            pkg_dir.path(),
1569            Some("definitely-not-a-real-package==999.999.999"),
1570        );
1571        let vendor = pkg_dir.path().join("vendor");
1572        std::fs::create_dir(&vendor).unwrap();
1573        std::fs::write(
1574            vendor.join("fake_module.py"),
1575            b"# pre-vendored placeholder\n",
1576        )
1577        .unwrap();
1578
1579        let out_dir = TempDir::new().unwrap();
1580        let fid = out_dir.path().join("py.fid");
1581        pack_package(pkg_dir.path(), Some(&fid))
1582            .expect("pack should not invoke pip when vendor/ exists");
1583
1584        let extract = TempDir::new().unwrap();
1585        let extracted = unpack_package(&fid, extract.path()).unwrap();
1586        assert!(extracted.join("vendor/fake_module.py").exists());
1587        assert!(extracted.join("py_pack_test.py").exists());
1588    }
1589
1590    #[test]
1591    fn pack_python_with_no_requirements_or_vendor_warns_but_succeeds() {
1592        let pkg_dir = TempDir::new().unwrap();
1593        make_python_package(pkg_dir.path(), None);
1594
1595        let out_dir = TempDir::new().unwrap();
1596        let fid = out_dir.path().join("py.fid");
1597        pack_package(pkg_dir.path(), Some(&fid))
1598            .expect("zero-dep python plugin should pack successfully");
1599
1600        let extract = TempDir::new().unwrap();
1601        let extracted = unpack_package(&fid, extract.path()).unwrap();
1602        assert!(extracted.join("py_pack_test.py").exists());
1603        assert!(!extracted.join("vendor").exists());
1604    }
1605
1606    #[test]
1607    fn pack_python_with_unresolvable_requirement_surfaces_pip_error() {
1608        // Pip is genuinely invoked here — needs python3+pip on PATH. The test
1609        // is testing the failure-surfacing path: we deliberately ask pip to
1610        // install a package that doesn't exist and assert the error is clear.
1611        // Skipped if python3/pip aren't reachable so CI environments without
1612        // them don't fail.
1613        let probe = std::process::Command::new("python3")
1614            .arg("-m")
1615            .arg("pip")
1616            .arg("--version")
1617            .output();
1618        if probe.map(|o| !o.status.success()).unwrap_or(true) {
1619            eprintln!("skipping: python3 -m pip not available in this environment");
1620            return;
1621        }
1622
1623        let pkg_dir = TempDir::new().unwrap();
1624        make_python_package(
1625            pkg_dir.path(),
1626            Some("fidius-this-package-does-not-exist-9999==1.0\n"),
1627        );
1628
1629        let out_dir = TempDir::new().unwrap();
1630        let fid = out_dir.path().join("py.fid");
1631        let err = pack_package(pkg_dir.path(), Some(&fid)).unwrap_err();
1632        match err {
1633            PackageError::ArchiveError(msg) => {
1634                assert!(
1635                    msg.contains("pip install failed"),
1636                    "expected pip-install error, got: {msg}"
1637                );
1638            }
1639            other => panic!("expected ArchiveError, got {other:?}"),
1640        }
1641    }
1642}