Skip to main content

agentics_contracts/validation/
archive.rs

1//! Shared ZIP archive envelope validation and safe extraction helpers.
2
3use std::collections::HashSet;
4use std::io::{Read, Seek};
5use std::path::{Component, Path};
6
7use agentics_error::{Result, ServiceError};
8
9/// Local challenge/archive validation failures before service-boundary mapping.
10#[derive(Debug, thiserror::Error)]
11pub enum ChallengeValidationError {
12    #[error("archive traversal rejected: {0}")]
13    ArchiveTraversal(String),
14    #[error("invalid manifest: {0}")]
15    InvalidManifest(String),
16    #[error("unsafe path rejected: {0}")]
17    UnsafePath(String),
18    #[error("unsupported target: {0}")]
19    UnsupportedTarget(String),
20}
21
22impl From<ChallengeValidationError> for ServiceError {
23    fn from(error: ChallengeValidationError) -> Self {
24        ServiceError::Validation(error.to_string())
25    }
26}
27
28/// ZIP archive envelope policy for one external contract.
29#[derive(Debug, Clone)]
30pub struct ArchiveEnvelopePolicy {
31    label: String,
32    max_archive_bytes: u64,
33    max_entries: usize,
34    max_expanded_bytes: u64,
35    reject_symlinks: bool,
36}
37
38impl ArchiveEnvelopePolicy {
39    /// Build a policy with the default hostile-archive safety checks enabled.
40    pub fn new(
41        label: impl Into<String>,
42        max_archive_bytes: u64,
43        max_entries: usize,
44        max_expanded_bytes: u64,
45    ) -> Self {
46        Self {
47            label: label.into(),
48            max_archive_bytes,
49            max_entries,
50            max_expanded_bytes,
51            reject_symlinks: true,
52        }
53    }
54
55    /// Borrow the user-facing archive label.
56    pub fn label(&self) -> &str {
57        &self.label
58    }
59
60    /// Maximum compressed archive size in bytes.
61    pub fn max_archive_bytes(&self) -> u64 {
62        self.max_archive_bytes
63    }
64
65    /// Maximum entry count.
66    pub fn max_entries(&self) -> usize {
67        self.max_entries
68    }
69
70    /// Maximum total expanded entry bytes.
71    pub fn max_expanded_bytes(&self) -> u64 {
72        self.max_expanded_bytes
73    }
74}
75
76/// A normalized path inside a ZIP archive.
77#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
78pub struct NormalizedArchivePath(String);
79
80impl NormalizedArchivePath {
81    /// Normalize and validate an archive entry path.
82    pub fn try_new(raw: &str, label: &str) -> Result<Self> {
83        if raw.is_empty() || raw.contains('\0') || raw.starts_with('/') || raw.starts_with('\\') {
84            return Err(ChallengeValidationError::ArchiveTraversal(format!(
85                "{label} contains an unsafe ZIP entry path",
86            ))
87            .into());
88        }
89
90        let trimmed = raw.trim_matches(['/', '\\']);
91        if trimmed.is_empty() {
92            return Err(ChallengeValidationError::ArchiveTraversal(format!(
93                "{label} contains an unsafe ZIP entry path",
94            ))
95            .into());
96        }
97
98        let mut parts = Vec::new();
99        for part in trimmed.split(['/', '\\']) {
100            if part.is_empty() || part == "." || part == ".." {
101                return Err(ChallengeValidationError::UnsafePath(format!(
102                    "{label} contains unsafe path `{raw}`",
103                ))
104                .into());
105            }
106            parts.push(part);
107        }
108
109        Ok(Self(parts.join("/")))
110    }
111
112    /// Normalize a trusted local relative path into archive wire form.
113    pub fn from_relative_path(path: &Path, label: &str) -> Result<Self> {
114        let mut parts = Vec::new();
115        for component in path.components() {
116            match component {
117                Component::Normal(value) => {
118                    let value = value.to_str().ok_or_else(|| {
119                        ServiceError::Validation(format!(
120                            "{label} contains a path that is not valid UTF-8: {}",
121                            path.display()
122                        ))
123                    })?;
124                    parts.push(value);
125                }
126                Component::CurDir => {}
127                Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
128                    return Err(ChallengeValidationError::UnsafePath(format!(
129                        "{label} contains unsafe path `{}`",
130                        path.display(),
131                    ))
132                    .into());
133                }
134            }
135        }
136
137        Self::try_new(&parts.join("/"), label)
138    }
139
140    /// Borrow the canonical ZIP path string.
141    pub fn as_str(&self) -> &str {
142        &self.0
143    }
144
145    /// Borrow as a relative filesystem path for safe joins under a controlled root.
146    pub fn as_path(&self) -> &Path {
147        Path::new(&self.0)
148    }
149}
150
151impl std::fmt::Display for NormalizedArchivePath {
152    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
153        f.write_str(self.as_str())
154    }
155}
156
157/// Validated metadata for one archive entry.
158#[derive(Debug, Clone)]
159pub struct ArchiveEnvelopeEntry {
160    index: usize,
161    path: NormalizedArchivePath,
162    is_dir: bool,
163    size: u64,
164    compressed_size: u64,
165}
166
167impl ArchiveEnvelopeEntry {
168    /// Entry index in the ZIP central directory.
169    pub fn index(&self) -> usize {
170        self.index
171    }
172
173    /// Normalized relative archive path.
174    pub fn path(&self) -> &NormalizedArchivePath {
175        &self.path
176    }
177
178    /// Whether the entry is a directory.
179    pub fn is_dir(&self) -> bool {
180        self.is_dir
181    }
182
183    /// Expanded entry size in bytes.
184    pub fn size(&self) -> u64 {
185        self.size
186    }
187
188    /// Compressed entry size in bytes.
189    pub fn compressed_size(&self) -> u64 {
190        self.compressed_size
191    }
192}
193
194/// Validated archive envelope summary.
195#[derive(Debug, Clone)]
196pub struct ArchiveEnvelope {
197    label: String,
198    archive_size: u64,
199    expanded_size: u64,
200    entries: Vec<ArchiveEnvelopeEntry>,
201}
202
203impl ArchiveEnvelope {
204    /// User-facing archive label from the policy that produced this envelope.
205    pub fn label(&self) -> &str {
206        &self.label
207    }
208
209    /// Compressed archive size in bytes.
210    pub fn archive_size(&self) -> u64 {
211        self.archive_size
212    }
213
214    /// Total expanded entry bytes.
215    pub fn expanded_size(&self) -> u64 {
216        self.expanded_size
217    }
218
219    /// Validated archive entries in central-directory order.
220    pub fn entries(&self) -> &[ArchiveEnvelopeEntry] {
221        &self.entries
222    }
223}
224
225/// Validate a ZIP archive already loaded in memory.
226pub fn inspect_zip_bytes(bytes: &[u8], policy: &ArchiveEnvelopePolicy) -> Result<ArchiveEnvelope> {
227    let archive_size = u64::try_from(bytes.len())
228        .map_err(|_| ServiceError::Validation(format!("{} is too large", policy.label())))?;
229    ensure_archive_size(archive_size, policy)?;
230    let reader = std::io::Cursor::new(bytes);
231    let mut archive = zip::ZipArchive::new(reader)?;
232    inspect_zip_archive(archive_size, &mut archive, policy)
233}
234
235/// Validate a ZIP archive on disk.
236pub fn inspect_zip_file(path: &Path, policy: &ArchiveEnvelopePolicy) -> Result<ArchiveEnvelope> {
237    let archive_size = std::fs::metadata(path)?.len();
238    ensure_archive_size(archive_size, policy)?;
239    let reader = std::fs::File::open(path)?;
240    let mut archive = zip::ZipArchive::new(reader)?;
241    inspect_zip_archive(archive_size, &mut archive, policy)
242}
243
244/// Validate and safely extract a ZIP archive under `target_dir`.
245pub fn extract_zip_file_to_dir(
246    archive_path: &Path,
247    target_dir: &Path,
248    policy: &ArchiveEnvelopePolicy,
249) -> Result<()> {
250    let archive_size = std::fs::metadata(archive_path)?.len();
251    ensure_archive_size(archive_size, policy)?;
252    let reader = std::fs::File::open(archive_path)?;
253    let mut archive = zip::ZipArchive::new(reader)?;
254    let envelope = inspect_zip_archive(archive_size, &mut archive, policy)?;
255    extract_validated_zip_archive(&mut archive, &envelope, target_dir)
256}
257
258/// Validate and safely extract an in-memory ZIP archive under `target_dir`.
259pub fn extract_zip_bytes_to_dir(
260    bytes: &[u8],
261    target_dir: &Path,
262    policy: &ArchiveEnvelopePolicy,
263) -> Result<()> {
264    let archive_size = u64::try_from(bytes.len())
265        .map_err(|_| ServiceError::Validation(format!("{} is too large", policy.label())))?;
266    ensure_archive_size(archive_size, policy)?;
267    let reader = std::io::Cursor::new(bytes);
268    let mut archive = zip::ZipArchive::new(reader)?;
269    let envelope = inspect_zip_archive(archive_size, &mut archive, policy)?;
270    extract_validated_zip_archive(&mut archive, &envelope, target_dir)
271}
272
273/// Extract entries that were already validated from the same archive object.
274fn extract_validated_zip_archive<R: Read + Seek>(
275    archive: &mut zip::ZipArchive<R>,
276    envelope: &ArchiveEnvelope,
277    target_dir: &Path,
278) -> Result<()> {
279    for entry in envelope.entries() {
280        let mut file = archive.by_index(entry.index())?;
281        let outpath = target_dir.join(entry.path().as_path());
282
283        if entry.is_dir() {
284            std::fs::create_dir_all(&outpath)?;
285        } else {
286            if outpath.exists() {
287                return Err(ServiceError::Validation(format!(
288                    "{} cannot overwrite existing path `{}`",
289                    envelope.label(),
290                    entry.path()
291                )));
292            }
293            if let Some(parent) = outpath.parent() {
294                std::fs::create_dir_all(parent)?;
295            }
296            let mut outfile = std::fs::OpenOptions::new()
297                .write(true)
298                .create_new(true)
299                .open(&outpath)?;
300            std::io::copy(&mut file, &mut outfile)?;
301        }
302    }
303
304    Ok(())
305}
306
307/// Validate archive size against policy.
308fn ensure_archive_size(archive_size: u64, policy: &ArchiveEnvelopePolicy) -> Result<()> {
309    if archive_size > policy.max_archive_bytes() {
310        return Err(ServiceError::Validation(format!(
311            "{} must be at most {} bytes",
312            policy.label(),
313            policy.max_archive_bytes()
314        )));
315    }
316    Ok(())
317}
318
319/// Inspect a ZIP archive without extracting it.
320fn inspect_zip_archive<R: Read + Seek>(
321    archive_size: u64,
322    archive: &mut zip::ZipArchive<R>,
323    policy: &ArchiveEnvelopePolicy,
324) -> Result<ArchiveEnvelope> {
325    if archive.len() > policy.max_entries() {
326        return Err(ServiceError::Validation(format!(
327            "{} must contain at most {} entries",
328            policy.label(),
329            policy.max_entries()
330        )));
331    }
332
333    let mut expanded_size = 0u64;
334    let mut seen_paths = HashSet::with_capacity(archive.len());
335    let mut entries = Vec::with_capacity(archive.len());
336    for index in 0..archive.len() {
337        let file = archive.by_index(index)?;
338        if policy.reject_symlinks
339            && file
340                .unix_mode()
341                .is_some_and(|mode| mode & 0o170000 == 0o120000)
342        {
343            return Err(ServiceError::Validation(format!(
344                "{} must not contain symlinks",
345                policy.label()
346            )));
347        }
348
349        let path = NormalizedArchivePath::try_new(file.name(), policy.label())?;
350        if !seen_paths.insert(path.clone()) {
351            return Err(ServiceError::Validation(format!(
352                "{} contains duplicate path `{path}`",
353                policy.label()
354            )));
355        }
356
357        expanded_size = expanded_size
358            .checked_add(file.size())
359            .ok_or_else(|| ServiceError::Validation(format!("{} is too large", policy.label())))?;
360        if expanded_size > policy.max_expanded_bytes() {
361            return Err(ServiceError::Validation(format!(
362                "{} must expand to at most {} bytes",
363                policy.label(),
364                policy.max_expanded_bytes()
365            )));
366        }
367
368        entries.push(ArchiveEnvelopeEntry {
369            index,
370            path,
371            is_dir: file.is_dir(),
372            size: file.size(),
373            compressed_size: file.compressed_size(),
374        });
375    }
376
377    Ok(ArchiveEnvelope {
378        label: policy.label().to_string(),
379        archive_size,
380        expanded_size,
381        entries,
382    })
383}
384
385/// Test helpers for hand-built ZIP payloads.
386#[cfg(test)]
387pub(crate) mod test_support {
388    use std::io::Write;
389
390    /// Build a stored ZIP archive with explicit Unix mode bits.
391    pub(crate) fn raw_stored_zip(entries: Vec<(&str, &[u8], u32)>) -> Vec<u8> {
392        let mut bytes = Vec::new();
393        let mut central_directory = Vec::new();
394        let entry_count = u16::try_from(entries.len()).expect("test ZIP entries fit u16");
395
396        for (name, content, unix_mode) in entries {
397            let local_header_offset =
398                u32::try_from(bytes.len()).expect("test ZIP should fit u32 offsets");
399            let name_bytes = name.as_bytes();
400            let name_len = u16::try_from(name_bytes.len()).expect("test ZIP names are short");
401            let content_len =
402                u32::try_from(content.len()).expect("test ZIP content should fit u32");
403
404            bytes.extend_from_slice(&0x0403_4b50u32.to_le_bytes());
405            bytes.extend_from_slice(&20u16.to_le_bytes());
406            bytes.extend_from_slice(&0u16.to_le_bytes());
407            bytes.extend_from_slice(&0u16.to_le_bytes());
408            bytes.extend_from_slice(&0u16.to_le_bytes());
409            bytes.extend_from_slice(&0u16.to_le_bytes());
410            bytes.extend_from_slice(&0u32.to_le_bytes());
411            bytes.extend_from_slice(&content_len.to_le_bytes());
412            bytes.extend_from_slice(&content_len.to_le_bytes());
413            bytes.extend_from_slice(&name_len.to_le_bytes());
414            bytes.extend_from_slice(&0u16.to_le_bytes());
415            bytes.extend_from_slice(name_bytes);
416            bytes.extend_from_slice(content);
417
418            central_directory.extend_from_slice(&0x0201_4b50u32.to_le_bytes());
419            central_directory.extend_from_slice(&20u16.to_le_bytes());
420            central_directory.extend_from_slice(&20u16.to_le_bytes());
421            central_directory.extend_from_slice(&0u16.to_le_bytes());
422            central_directory.extend_from_slice(&0u16.to_le_bytes());
423            central_directory.extend_from_slice(&0u16.to_le_bytes());
424            central_directory.extend_from_slice(&0u16.to_le_bytes());
425            central_directory.extend_from_slice(&0u32.to_le_bytes());
426            central_directory.extend_from_slice(&content_len.to_le_bytes());
427            central_directory.extend_from_slice(&content_len.to_le_bytes());
428            central_directory.extend_from_slice(&name_len.to_le_bytes());
429            central_directory.extend_from_slice(&0u16.to_le_bytes());
430            central_directory.extend_from_slice(&0u16.to_le_bytes());
431            central_directory.extend_from_slice(&0u16.to_le_bytes());
432            central_directory.extend_from_slice(&0u16.to_le_bytes());
433            central_directory.extend_from_slice(&(unix_mode << 16).to_le_bytes());
434            central_directory.extend_from_slice(&local_header_offset.to_le_bytes());
435            central_directory.extend_from_slice(name_bytes);
436        }
437
438        let central_directory_offset =
439            u32::try_from(bytes.len()).expect("test ZIP should fit u32 offsets");
440        let central_directory_size =
441            u32::try_from(central_directory.len()).expect("test ZIP should fit u32 sizes");
442        bytes.write_all(&central_directory).expect("central dir");
443        bytes.extend_from_slice(&0x0605_4b50u32.to_le_bytes());
444        bytes.extend_from_slice(&0u16.to_le_bytes());
445        bytes.extend_from_slice(&0u16.to_le_bytes());
446        bytes.extend_from_slice(&entry_count.to_le_bytes());
447        bytes.extend_from_slice(&entry_count.to_le_bytes());
448        bytes.extend_from_slice(&central_directory_size.to_le_bytes());
449        bytes.extend_from_slice(&central_directory_offset.to_le_bytes());
450        bytes.extend_from_slice(&0u16.to_le_bytes());
451        bytes
452    }
453}
454
455#[cfg(test)]
456mod tests {
457    use std::io::Write;
458
459    use super::{ArchiveEnvelopePolicy, NormalizedArchivePath, inspect_zip_bytes};
460
461    fn policy() -> ArchiveEnvelopePolicy {
462        ArchiveEnvelopePolicy::new("test archive", 1024, 4, 64)
463    }
464
465    fn zip_with_entries(entries: &[(&str, &[u8])]) -> Vec<u8> {
466        let mut cursor = std::io::Cursor::new(Vec::new());
467        {
468            let mut archive = zip::ZipWriter::new(&mut cursor);
469            let options = zip::write::SimpleFileOptions::default()
470                .compression_method(zip::CompressionMethod::Stored);
471            for (path, content) in entries {
472                archive.start_file(path, options).expect("entry");
473                archive.write_all(content).expect("content");
474            }
475            archive.finish().expect("zip");
476        }
477        cursor.into_inner()
478    }
479
480    #[test]
481    fn validates_archive_envelope() {
482        let bytes = zip_with_entries(&[("dir/file.txt", b"hello")]);
483        let envelope = inspect_zip_bytes(&bytes, &policy()).expect("archive should validate");
484
485        assert_eq!(envelope.entries().len(), 1);
486        assert_eq!(envelope.entries()[0].path().as_str(), "dir/file.txt");
487        assert_eq!(envelope.expanded_size(), 5);
488    }
489
490    #[test]
491    fn rejects_hostile_archive_entries() {
492        for name in ["../evil", "/evil", "a//b", "a/./b"] {
493            let bytes = zip_with_entries(&[(name, b"x")]);
494            assert!(inspect_zip_bytes(&bytes, &policy()).is_err(), "{name}");
495        }
496
497        let duplicate = zip_with_entries(&[("a/b.txt", b"1"), ("a\\b.txt", b"2")]);
498        assert!(inspect_zip_bytes(&duplicate, &policy()).is_err());
499
500        let symlink = super::test_support::raw_stored_zip(vec![("link", b"target", 0o120777)]);
501        assert!(inspect_zip_bytes(&symlink, &policy()).is_err());
502    }
503
504    #[test]
505    fn enforces_archive_limits() {
506        let oversized = zip_with_entries(&[("file.txt", &[b'x'; 65])]);
507        assert!(inspect_zip_bytes(&oversized, &policy()).is_err());
508
509        let too_many = zip_with_entries(&[
510            ("a", b"1"),
511            ("b", b"1"),
512            ("c", b"1"),
513            ("d", b"1"),
514            ("e", b"1"),
515        ]);
516        assert!(inspect_zip_bytes(&too_many, &policy()).is_err());
517
518        let tiny_policy = ArchiveEnvelopePolicy::new("test archive", 8, 4, 64);
519        let bytes = zip_with_entries(&[("file.txt", b"hello")]);
520        assert!(inspect_zip_bytes(&bytes, &tiny_policy).is_err());
521    }
522
523    #[test]
524    fn normalizes_local_relative_paths() {
525        let path = NormalizedArchivePath::from_relative_path(
526            std::path::Path::new("./src/main.rs"),
527            "package path",
528        )
529        .expect("relative path should normalize");
530        assert_eq!(path.as_str(), "src/main.rs");
531    }
532}