Skip to main content

agentics_contracts/validation/
archive.rs

1//! Shared ZIP archive envelope validation and safe extraction helpers.
2
3use std::collections::HashSet;
4use std::io::{Read, Seek};
5use std::path::{Component, Path};
6
7use agentics_error::{Result, ServiceError};
8
9/// Local challenge/archive validation failures before service-boundary mapping.
10#[derive(Debug, thiserror::Error)]
11pub enum ChallengeValidationError {
12    #[error("archive traversal rejected: {0}")]
13    ArchiveTraversal(String),
14    #[error("invalid manifest: {0}")]
15    InvalidManifest(String),
16    #[error("unsafe path rejected: {0}")]
17    UnsafePath(String),
18    #[error("unsupported target: {0}")]
19    UnsupportedTarget(String),
20}
21
22impl From<ChallengeValidationError> for ServiceError {
23    fn from(error: ChallengeValidationError) -> Self {
24        ServiceError::Validation(error.to_string())
25    }
26}
27
28/// ZIP archive envelope policy for one external contract.
29#[derive(Debug, Clone)]
30pub struct ArchiveEnvelopePolicy {
31    label: String,
32    max_archive_bytes: u64,
33    max_entries: usize,
34    max_expanded_bytes: u64,
35    reject_symlinks: bool,
36}
37
38impl ArchiveEnvelopePolicy {
39    /// Build a policy with the default hostile-archive safety checks enabled.
40    pub fn new(
41        label: impl Into<String>,
42        max_archive_bytes: u64,
43        max_entries: usize,
44        max_expanded_bytes: u64,
45    ) -> Self {
46        Self {
47            label: label.into(),
48            max_archive_bytes,
49            max_entries,
50            max_expanded_bytes,
51            reject_symlinks: true,
52        }
53    }
54
55    /// Borrow the user-facing archive label.
56    pub fn label(&self) -> &str {
57        &self.label
58    }
59
60    /// Maximum compressed archive size in bytes.
61    pub fn max_archive_bytes(&self) -> u64 {
62        self.max_archive_bytes
63    }
64
65    /// Maximum entry count.
66    pub fn max_entries(&self) -> usize {
67        self.max_entries
68    }
69
70    /// Maximum total expanded entry bytes.
71    pub fn max_expanded_bytes(&self) -> u64 {
72        self.max_expanded_bytes
73    }
74}
75
76/// A normalized path inside a ZIP archive.
77#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
78pub struct NormalizedArchivePath(String);
79
80impl NormalizedArchivePath {
81    /// Normalize and validate an archive entry path.
82    pub fn try_new(raw: &str, label: &str) -> Result<Self> {
83        if raw.is_empty() || raw.contains('\0') || raw.starts_with('/') || raw.starts_with('\\') {
84            return Err(ChallengeValidationError::ArchiveTraversal(format!(
85                "{label} contains an unsafe ZIP entry path",
86            ))
87            .into());
88        }
89
90        let trimmed = raw.trim_matches(['/', '\\']);
91        if trimmed.is_empty() {
92            return Err(ChallengeValidationError::ArchiveTraversal(format!(
93                "{label} contains an unsafe ZIP entry path",
94            ))
95            .into());
96        }
97
98        let mut parts = Vec::new();
99        for part in trimmed.split(['/', '\\']) {
100            if part.is_empty() || part == "." || part == ".." {
101                return Err(ChallengeValidationError::UnsafePath(format!(
102                    "{label} contains unsafe path `{raw}`",
103                ))
104                .into());
105            }
106            parts.push(part);
107        }
108
109        Ok(Self(parts.join("/")))
110    }
111
112    /// Normalize a trusted local relative path into archive wire form.
113    pub fn from_relative_path(path: &Path, label: &str) -> Result<Self> {
114        let mut parts = Vec::new();
115        for component in path.components() {
116            match component {
117                Component::Normal(value) => {
118                    let value = value.to_str().ok_or_else(|| {
119                        ServiceError::Validation(format!(
120                            "{label} contains a path that is not valid UTF-8: {}",
121                            path.display()
122                        ))
123                    })?;
124                    parts.push(value);
125                }
126                Component::CurDir => {}
127                Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
128                    return Err(ChallengeValidationError::UnsafePath(format!(
129                        "{label} contains unsafe path `{}`",
130                        path.display(),
131                    ))
132                    .into());
133                }
134            }
135        }
136
137        Self::try_new(&parts.join("/"), label)
138    }
139
140    /// Borrow the canonical ZIP path string.
141    pub fn as_str(&self) -> &str {
142        &self.0
143    }
144
145    /// Borrow as a relative filesystem path for safe joins under a controlled root.
146    pub fn as_path(&self) -> &Path {
147        Path::new(&self.0)
148    }
149}
150
151impl std::fmt::Display for NormalizedArchivePath {
152    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
153        f.write_str(self.as_str())
154    }
155}
156
157/// Validated metadata for one archive entry.
158#[derive(Debug, Clone)]
159pub struct ArchiveEnvelopeEntry {
160    index: usize,
161    path: NormalizedArchivePath,
162    is_dir: bool,
163    size: u64,
164    compressed_size: u64,
165}
166
167impl ArchiveEnvelopeEntry {
168    /// Entry index in the ZIP central directory.
169    pub fn index(&self) -> usize {
170        self.index
171    }
172
173    /// Normalized relative archive path.
174    pub fn path(&self) -> &NormalizedArchivePath {
175        &self.path
176    }
177
178    /// Whether the entry is a directory.
179    pub fn is_dir(&self) -> bool {
180        self.is_dir
181    }
182
183    /// Expanded entry size in bytes.
184    pub fn size(&self) -> u64 {
185        self.size
186    }
187
188    /// Compressed entry size in bytes.
189    pub fn compressed_size(&self) -> u64 {
190        self.compressed_size
191    }
192}
193
194/// Validated archive envelope summary.
195#[derive(Debug, Clone)]
196pub struct ArchiveEnvelope {
197    label: String,
198    archive_size: u64,
199    expanded_size: u64,
200    entries: Vec<ArchiveEnvelopeEntry>,
201}
202
203impl ArchiveEnvelope {
204    /// User-facing archive label from the policy that produced this envelope.
205    pub fn label(&self) -> &str {
206        &self.label
207    }
208
209    /// Compressed archive size in bytes.
210    pub fn archive_size(&self) -> u64 {
211        self.archive_size
212    }
213
214    /// Total expanded entry bytes.
215    pub fn expanded_size(&self) -> u64 {
216        self.expanded_size
217    }
218
219    /// Validated archive entries in central-directory order.
220    pub fn entries(&self) -> &[ArchiveEnvelopeEntry] {
221        &self.entries
222    }
223}
224
225/// Validate a ZIP archive already loaded in memory.
226pub fn inspect_zip_bytes(bytes: &[u8], policy: &ArchiveEnvelopePolicy) -> Result<ArchiveEnvelope> {
227    let archive_size = u64::try_from(bytes.len())
228        .map_err(|_| ServiceError::Validation(format!("{} is too large", policy.label())))?;
229    ensure_archive_size(archive_size, policy)?;
230    let reader = std::io::Cursor::new(bytes);
231    let mut archive = zip::ZipArchive::new(reader)?;
232    inspect_zip_archive(archive_size, &mut archive, policy)
233}
234
235/// Validate a ZIP archive on disk.
236pub fn inspect_zip_file(path: &Path, policy: &ArchiveEnvelopePolicy) -> Result<ArchiveEnvelope> {
237    let archive_size = std::fs::metadata(path)?.len();
238    ensure_archive_size(archive_size, policy)?;
239    let reader = std::fs::File::open(path)?;
240    let mut archive = zip::ZipArchive::new(reader)?;
241    inspect_zip_archive(archive_size, &mut archive, policy)
242}
243
244/// Validate and safely extract a ZIP archive under `target_dir`.
245pub fn extract_zip_file_to_dir(
246    archive_path: &Path,
247    target_dir: &Path,
248    policy: &ArchiveEnvelopePolicy,
249) -> Result<()> {
250    let archive_size = std::fs::metadata(archive_path)?.len();
251    ensure_archive_size(archive_size, policy)?;
252    let reader = std::fs::File::open(archive_path)?;
253    let mut archive = zip::ZipArchive::new(reader)?;
254    let envelope = inspect_zip_archive(archive_size, &mut archive, policy)?;
255    extract_validated_zip_archive(&mut archive, &envelope, target_dir)
256}
257
258/// Validate and safely extract an in-memory ZIP archive under `target_dir`.
259pub fn extract_zip_bytes_to_dir(
260    bytes: &[u8],
261    target_dir: &Path,
262    policy: &ArchiveEnvelopePolicy,
263) -> Result<()> {
264    let archive_size = u64::try_from(bytes.len())
265        .map_err(|_| ServiceError::Validation(format!("{} is too large", policy.label())))?;
266    ensure_archive_size(archive_size, policy)?;
267    let reader = std::io::Cursor::new(bytes);
268    let mut archive = zip::ZipArchive::new(reader)?;
269    let envelope = inspect_zip_archive(archive_size, &mut archive, policy)?;
270    extract_validated_zip_archive(&mut archive, &envelope, target_dir)
271}
272
273/// Extract entries that were already validated from the same archive object.
274fn extract_validated_zip_archive<R: Read + Seek>(
275    archive: &mut zip::ZipArchive<R>,
276    envelope: &ArchiveEnvelope,
277    target_dir: &Path,
278) -> Result<()> {
279    for entry in envelope.entries() {
280        let mut file = archive.by_index(entry.index())?;
281        let outpath = target_dir.join(entry.path().as_path());
282
283        if entry.is_dir() {
284            std::fs::create_dir_all(&outpath)?;
285        } else {
286            if outpath.exists() {
287                return Err(ServiceError::Validation(format!(
288                    "{} cannot overwrite existing path `{}`",
289                    envelope.label(),
290                    entry.path()
291                )));
292            }
293            if let Some(parent) = outpath.parent() {
294                std::fs::create_dir_all(parent)?;
295            }
296            let mut outfile = std::fs::OpenOptions::new()
297                .write(true)
298                .create_new(true)
299                .open(&outpath)?;
300            let copied = std::io::copy(&mut file, &mut outfile)?;
301            if copied != entry.size() {
302                return Err(ServiceError::Validation(format!(
303                    "{} entry `{}` extracted {copied} bytes, expected {} bytes",
304                    envelope.label(),
305                    entry.path(),
306                    entry.size()
307                )));
308            }
309        }
310    }
311
312    Ok(())
313}
314
315/// Validate archive size against policy.
316fn ensure_archive_size(archive_size: u64, policy: &ArchiveEnvelopePolicy) -> Result<()> {
317    if archive_size > policy.max_archive_bytes() {
318        return Err(ServiceError::Validation(format!(
319            "{} must be at most {} bytes",
320            policy.label(),
321            policy.max_archive_bytes()
322        )));
323    }
324    Ok(())
325}
326
327/// Inspect a ZIP archive without extracting it.
328fn inspect_zip_archive<R: Read + Seek>(
329    archive_size: u64,
330    archive: &mut zip::ZipArchive<R>,
331    policy: &ArchiveEnvelopePolicy,
332) -> Result<ArchiveEnvelope> {
333    if archive.len() > policy.max_entries() {
334        return Err(ServiceError::Validation(format!(
335            "{} must contain at most {} entries",
336            policy.label(),
337            policy.max_entries()
338        )));
339    }
340
341    let mut expanded_size = 0u64;
342    let mut seen_paths = HashSet::with_capacity(archive.len());
343    let mut entries = Vec::with_capacity(archive.len());
344    for index in 0..archive.len() {
345        let file = archive.by_index(index)?;
346        if policy.reject_symlinks
347            && file
348                .unix_mode()
349                .is_some_and(|mode| mode & 0o170000 == 0o120000)
350        {
351            return Err(ServiceError::Validation(format!(
352                "{} must not contain symlinks",
353                policy.label()
354            )));
355        }
356
357        let path = NormalizedArchivePath::try_new(file.name(), policy.label())?;
358        if !seen_paths.insert(path.clone()) {
359            return Err(ServiceError::Validation(format!(
360                "{} contains duplicate path `{path}`",
361                policy.label()
362            )));
363        }
364
365        expanded_size = expanded_size
366            .checked_add(file.size())
367            .ok_or_else(|| ServiceError::Validation(format!("{} is too large", policy.label())))?;
368        if expanded_size > policy.max_expanded_bytes() {
369            return Err(ServiceError::Validation(format!(
370                "{} must expand to at most {} bytes",
371                policy.label(),
372                policy.max_expanded_bytes()
373            )));
374        }
375
376        entries.push(ArchiveEnvelopeEntry {
377            index,
378            path,
379            is_dir: file.is_dir(),
380            size: file.size(),
381            compressed_size: file.compressed_size(),
382        });
383    }
384
385    Ok(ArchiveEnvelope {
386        label: policy.label().to_string(),
387        archive_size,
388        expanded_size,
389        entries,
390    })
391}
392
393/// Test helpers for hand-built ZIP payloads.
394#[cfg(test)]
395pub(crate) mod test_support {
396    use std::io::Write;
397
398    /// Build a stored ZIP archive with explicit Unix mode bits.
399    pub(crate) fn raw_stored_zip(entries: Vec<(&str, &[u8], u32)>) -> Vec<u8> {
400        let mut bytes = Vec::new();
401        let mut central_directory = Vec::new();
402        let entry_count = u16::try_from(entries.len()).expect("test ZIP entries fit u16");
403
404        for (name, content, unix_mode) in entries {
405            let local_header_offset =
406                u32::try_from(bytes.len()).expect("test ZIP should fit u32 offsets");
407            let name_bytes = name.as_bytes();
408            let name_len = u16::try_from(name_bytes.len()).expect("test ZIP names are short");
409            let content_len =
410                u32::try_from(content.len()).expect("test ZIP content should fit u32");
411
412            bytes.extend_from_slice(&0x0403_4b50u32.to_le_bytes());
413            bytes.extend_from_slice(&20u16.to_le_bytes());
414            bytes.extend_from_slice(&0u16.to_le_bytes());
415            bytes.extend_from_slice(&0u16.to_le_bytes());
416            bytes.extend_from_slice(&0u16.to_le_bytes());
417            bytes.extend_from_slice(&0u16.to_le_bytes());
418            bytes.extend_from_slice(&0u32.to_le_bytes());
419            bytes.extend_from_slice(&content_len.to_le_bytes());
420            bytes.extend_from_slice(&content_len.to_le_bytes());
421            bytes.extend_from_slice(&name_len.to_le_bytes());
422            bytes.extend_from_slice(&0u16.to_le_bytes());
423            bytes.extend_from_slice(name_bytes);
424            bytes.extend_from_slice(content);
425
426            central_directory.extend_from_slice(&0x0201_4b50u32.to_le_bytes());
427            central_directory.extend_from_slice(&20u16.to_le_bytes());
428            central_directory.extend_from_slice(&20u16.to_le_bytes());
429            central_directory.extend_from_slice(&0u16.to_le_bytes());
430            central_directory.extend_from_slice(&0u16.to_le_bytes());
431            central_directory.extend_from_slice(&0u16.to_le_bytes());
432            central_directory.extend_from_slice(&0u16.to_le_bytes());
433            central_directory.extend_from_slice(&0u32.to_le_bytes());
434            central_directory.extend_from_slice(&content_len.to_le_bytes());
435            central_directory.extend_from_slice(&content_len.to_le_bytes());
436            central_directory.extend_from_slice(&name_len.to_le_bytes());
437            central_directory.extend_from_slice(&0u16.to_le_bytes());
438            central_directory.extend_from_slice(&0u16.to_le_bytes());
439            central_directory.extend_from_slice(&0u16.to_le_bytes());
440            central_directory.extend_from_slice(&0u16.to_le_bytes());
441            central_directory.extend_from_slice(&(unix_mode << 16).to_le_bytes());
442            central_directory.extend_from_slice(&local_header_offset.to_le_bytes());
443            central_directory.extend_from_slice(name_bytes);
444        }
445
446        let central_directory_offset =
447            u32::try_from(bytes.len()).expect("test ZIP should fit u32 offsets");
448        let central_directory_size =
449            u32::try_from(central_directory.len()).expect("test ZIP should fit u32 sizes");
450        bytes.write_all(&central_directory).expect("central dir");
451        bytes.extend_from_slice(&0x0605_4b50u32.to_le_bytes());
452        bytes.extend_from_slice(&0u16.to_le_bytes());
453        bytes.extend_from_slice(&0u16.to_le_bytes());
454        bytes.extend_from_slice(&entry_count.to_le_bytes());
455        bytes.extend_from_slice(&entry_count.to_le_bytes());
456        bytes.extend_from_slice(&central_directory_size.to_le_bytes());
457        bytes.extend_from_slice(&central_directory_offset.to_le_bytes());
458        bytes.extend_from_slice(&0u16.to_le_bytes());
459        bytes
460    }
461}
462
463#[cfg(test)]
464mod tests {
465    use std::io::Write;
466
467    use super::{ArchiveEnvelopePolicy, NormalizedArchivePath, inspect_zip_bytes};
468
469    fn policy() -> ArchiveEnvelopePolicy {
470        ArchiveEnvelopePolicy::new("test archive", 1024, 4, 64)
471    }
472
473    fn zip_with_entries(entries: &[(&str, &[u8])]) -> Vec<u8> {
474        let mut cursor = std::io::Cursor::new(Vec::new());
475        {
476            let mut archive = zip::ZipWriter::new(&mut cursor);
477            let options = zip::write::SimpleFileOptions::default()
478                .compression_method(zip::CompressionMethod::Stored);
479            for (path, content) in entries {
480                archive.start_file(path, options).expect("entry");
481                archive.write_all(content).expect("content");
482            }
483            archive.finish().expect("zip");
484        }
485        cursor.into_inner()
486    }
487
488    #[test]
489    fn validates_archive_envelope() {
490        let bytes = zip_with_entries(&[("dir/file.txt", b"hello")]);
491        let envelope = inspect_zip_bytes(&bytes, &policy()).expect("archive should validate");
492
493        assert_eq!(envelope.entries().len(), 1);
494        assert_eq!(envelope.entries()[0].path().as_str(), "dir/file.txt");
495        assert_eq!(envelope.expanded_size(), 5);
496    }
497
498    #[test]
499    fn rejects_hostile_archive_entries() {
500        for name in ["../evil", "/evil", "a//b", "a/./b"] {
501            let bytes = zip_with_entries(&[(name, b"x")]);
502            assert!(inspect_zip_bytes(&bytes, &policy()).is_err(), "{name}");
503        }
504
505        let duplicate = zip_with_entries(&[("a/b.txt", b"1"), ("a\\b.txt", b"2")]);
506        assert!(inspect_zip_bytes(&duplicate, &policy()).is_err());
507
508        let symlink = super::test_support::raw_stored_zip(vec![("link", b"target", 0o120777)]);
509        assert!(inspect_zip_bytes(&symlink, &policy()).is_err());
510    }
511
512    #[test]
513    fn enforces_archive_limits() {
514        let oversized = zip_with_entries(&[("file.txt", &[b'x'; 65])]);
515        assert!(inspect_zip_bytes(&oversized, &policy()).is_err());
516
517        let too_many = zip_with_entries(&[
518            ("a", b"1"),
519            ("b", b"1"),
520            ("c", b"1"),
521            ("d", b"1"),
522            ("e", b"1"),
523        ]);
524        assert!(inspect_zip_bytes(&too_many, &policy()).is_err());
525
526        let tiny_policy = ArchiveEnvelopePolicy::new("test archive", 8, 4, 64);
527        let bytes = zip_with_entries(&[("file.txt", b"hello")]);
528        assert!(inspect_zip_bytes(&bytes, &tiny_policy).is_err());
529    }
530
531    #[test]
532    fn normalizes_local_relative_paths() {
533        let path = NormalizedArchivePath::from_relative_path(
534            std::path::Path::new("./src/main.rs"),
535            "package path",
536        )
537        .expect("relative path should normalize");
538        assert_eq!(path.as_str(), "src/main.rs");
539    }
540}