aardvark_core/
bundle.rs

1//! Utilities for handling user-provided Python bundles.
2
3use std::fmt;
4use std::io::{Cursor, Read, Seek};
5use std::path::{Component, Path};
6use std::sync::Arc;
7
8use crate::bundle_manifest::{BundleManifest, MANIFEST_BASENAME};
9use crate::error::{PyRunnerError, Result};
10use blake3::Hasher;
11use zip::read::ZipFile;
12use zip::ZipArchive;
13
14/// Representation of a file contained in a bundle.
15#[derive(Clone)]
16pub struct BundleEntry {
17    path: Arc<str>,
18    data: Arc<[u8]>,
19}
20
21impl BundleEntry {
22    /// Returns the normalized relative path for this entry.
23    pub fn path(&self) -> &str {
24        &self.path
25    }
26
27    /// Returns the raw contents for this entry.
28    pub fn contents(&self) -> &[u8] {
29        &self.data
30    }
31}
32
33impl fmt::Debug for BundleEntry {
34    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
35        f.debug_struct("BundleEntry")
36            .field("path", &self.path)
37            .field("len", &self.data.len())
38            .finish()
39    }
40}
41
42/// An in-memory bundle extracted from a ZIP archive.
43#[derive(Clone, Default)]
44pub struct Bundle {
45    inner: Arc<BundleInner>,
46}
47
48#[derive(Default)]
49struct BundleInner {
50    entries: Vec<BundleEntry>,
51}
52
53impl fmt::Debug for Bundle {
54    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55        f.debug_struct("Bundle")
56            .field("entries", &self.inner.entries)
57            .finish()
58    }
59}
60
61impl Bundle {
62    /// Constructs a bundle from a ZIP archive held entirely in memory.
63    ///
64    /// Cloning the resulting `Bundle` is inexpensive; the file data is reference
65    /// counted internally so you can parse once and reuse it across invocations.
66    pub fn from_zip_bytes(bytes: impl AsRef<[u8]>) -> Result<Self> {
67        let cursor = Cursor::new(bytes.as_ref().to_vec());
68        Self::from_reader(cursor)
69    }
70
71    /// Constructs a bundle from any `Read + Seek` ZIP archive.
72    pub fn from_reader<R: Read + Seek>(reader: R) -> Result<Self> {
73        let mut archive = ZipArchive::new(reader)
74            .map_err(|err| PyRunnerError::Bundle(format!("invalid zip archive: {err}")))?;
75        let mut entries = Vec::with_capacity(archive.len());
76        for i in 0..archive.len() {
77            let file = archive
78                .by_index(i)
79                .map_err(|err| PyRunnerError::Bundle(format!("zip access error: {err}")))?;
80            if file.is_dir() {
81                continue;
82            }
83            let normalized = normalize_entry_path(file.name()).map_err(|err| {
84                PyRunnerError::Bundle(format!("invalid entry '{}': {err}", file.name()))
85            })?;
86            let data = read_zip_file(file)?;
87            entries.push(BundleEntry {
88                path: Arc::<str>::from(normalized),
89                data: Arc::<[u8]>::from(data),
90            });
91        }
92        if entries.is_empty() {
93            return Err(PyRunnerError::Bundle(
94                "bundle did not contain any files".to_owned(),
95            ));
96        }
97        Ok(Self {
98            inner: Arc::new(BundleInner { entries }),
99        })
100    }
101
102    /// Returns all entries in this bundle.
103    pub fn entries(&self) -> &[BundleEntry] {
104        &self.inner.entries
105    }
106
107    /// Returns a stable fingerprint for the bundle contents.
108    pub fn fingerprint(&self) -> BundleFingerprint {
109        let mut hasher = Hasher::new();
110        let mut entries: Vec<_> = self
111            .inner
112            .entries
113            .iter()
114            .map(|entry| (entry.path.clone(), entry.data.clone()))
115            .collect();
116        entries.sort_by(|a, b| a.0.cmp(&b.0));
117        for (path, data) in entries {
118            hasher.update(path.as_bytes());
119            let len_bytes = (data.len() as u64).to_le_bytes();
120            hasher.update(&len_bytes);
121            hasher.update(&data);
122        }
123        BundleFingerprint(hasher.finalize().as_bytes()[..8].try_into().unwrap())
124    }
125
126    /// Parses and returns the embedded bundle manifest, if present.
127    pub fn manifest(&self) -> Result<Option<BundleManifest>> {
128        let entry = self
129            .inner
130            .entries
131            .iter()
132            .find(|entry| entry.path.as_ref() == MANIFEST_BASENAME);
133        match entry {
134            Some(manifest_entry) => {
135                let manifest = BundleManifest::from_bytes(manifest_entry.contents())?;
136                Ok(Some(manifest))
137            }
138            None => Ok(None),
139        }
140    }
141
142    /// Consumes the bundle and returns its entries.
143    pub fn into_entries(self) -> Vec<BundleEntry> {
144        Arc::try_unwrap(self.inner)
145            .map(|inner| inner.entries)
146            .unwrap_or_else(|inner| inner.entries.clone())
147    }
148}
149
150/// Eight-byte bundle fingerprint derived from a BLAKE3 hash.
151#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
152pub struct BundleFingerprint([u8; 8]);
153
154impl BundleFingerprint {
155    /// Returns the fingerprint as a u64 integer.
156    pub fn as_u64(&self) -> u64 {
157        u64::from_le_bytes(self.0)
158    }
159}
160
161fn read_zip_file(mut file: ZipFile<'_>) -> Result<Vec<u8>> {
162    let mut buf = Vec::new();
163    file.read_to_end(&mut buf)
164        .map_err(|err| PyRunnerError::Bundle(format!("failed to read '{}': {err}", file.name())))?;
165    Ok(buf)
166}
167
168fn normalize_entry_path(raw: &str) -> Result<String> {
169    if raw.is_empty() {
170        return Err(PyRunnerError::Bundle("entry has empty name".into()));
171    }
172    let path = Path::new(raw);
173    if path.is_absolute() {
174        return Err(PyRunnerError::Bundle(
175            "absolute paths are not allowed".into(),
176        ));
177    }
178    let mut parts = Vec::new();
179    for component in path.components() {
180        match component {
181            Component::Prefix(_) | Component::RootDir => {
182                return Err(PyRunnerError::Bundle("unsupported path prefix".into()))
183            }
184            Component::CurDir => continue,
185            Component::ParentDir => {
186                if parts.pop().is_none() {
187                    return Err(PyRunnerError::Bundle(
188                        "path traversal outside bundle root is not allowed".into(),
189                    ));
190                }
191            }
192            Component::Normal(token) => {
193                let segment = token.to_str().ok_or_else(|| {
194                    PyRunnerError::Bundle("non-utf8 path segments not supported".into())
195                })?;
196                if segment.is_empty() {
197                    return Err(PyRunnerError::Bundle(
198                        "empty path segment encountered".into(),
199                    ));
200                }
201                parts.push(segment.to_owned());
202            }
203        }
204    }
205    if parts.is_empty() {
206        return Err(PyRunnerError::Bundle("entry resolves to empty path".into()));
207    }
208    Ok(parts.join("/"))
209}