agentics_contracts/validation/
archive.rs1use std::collections::HashSet;
4use std::io::{Read, Seek};
5use std::path::{Component, Path};
6
7use agentics_error::{Result, ServiceError};
8
9#[derive(Debug, thiserror::Error)]
11pub enum ChallengeValidationError {
12 #[error("archive traversal rejected: {0}")]
13 ArchiveTraversal(String),
14 #[error("invalid manifest: {0}")]
15 InvalidManifest(String),
16 #[error("unsafe path rejected: {0}")]
17 UnsafePath(String),
18 #[error("unsupported target: {0}")]
19 UnsupportedTarget(String),
20}
21
22impl From<ChallengeValidationError> for ServiceError {
23 fn from(error: ChallengeValidationError) -> Self {
24 ServiceError::Validation(error.to_string())
25 }
26}
27
28#[derive(Debug, Clone)]
30pub struct ArchiveEnvelopePolicy {
31 label: String,
32 max_archive_bytes: u64,
33 max_entries: usize,
34 max_expanded_bytes: u64,
35 reject_symlinks: bool,
36}
37
38impl ArchiveEnvelopePolicy {
39 pub fn new(
41 label: impl Into<String>,
42 max_archive_bytes: u64,
43 max_entries: usize,
44 max_expanded_bytes: u64,
45 ) -> Self {
46 Self {
47 label: label.into(),
48 max_archive_bytes,
49 max_entries,
50 max_expanded_bytes,
51 reject_symlinks: true,
52 }
53 }
54
55 pub fn label(&self) -> &str {
57 &self.label
58 }
59
60 pub fn max_archive_bytes(&self) -> u64 {
62 self.max_archive_bytes
63 }
64
65 pub fn max_entries(&self) -> usize {
67 self.max_entries
68 }
69
70 pub fn max_expanded_bytes(&self) -> u64 {
72 self.max_expanded_bytes
73 }
74}
75
76#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
78pub struct NormalizedArchivePath(String);
79
80impl NormalizedArchivePath {
81 pub fn try_new(raw: &str, label: &str) -> Result<Self> {
83 if raw.is_empty() || raw.contains('\0') || raw.starts_with('/') || raw.starts_with('\\') {
84 return Err(ChallengeValidationError::ArchiveTraversal(format!(
85 "{label} contains an unsafe ZIP entry path",
86 ))
87 .into());
88 }
89
90 let trimmed = raw.trim_matches(['/', '\\']);
91 if trimmed.is_empty() {
92 return Err(ChallengeValidationError::ArchiveTraversal(format!(
93 "{label} contains an unsafe ZIP entry path",
94 ))
95 .into());
96 }
97
98 let mut parts = Vec::new();
99 for part in trimmed.split(['/', '\\']) {
100 if part.is_empty() || part == "." || part == ".." {
101 return Err(ChallengeValidationError::UnsafePath(format!(
102 "{label} contains unsafe path `{raw}`",
103 ))
104 .into());
105 }
106 parts.push(part);
107 }
108
109 Ok(Self(parts.join("/")))
110 }
111
112 pub fn from_relative_path(path: &Path, label: &str) -> Result<Self> {
114 let mut parts = Vec::new();
115 for component in path.components() {
116 match component {
117 Component::Normal(value) => {
118 let value = value.to_str().ok_or_else(|| {
119 ServiceError::Validation(format!(
120 "{label} contains a path that is not valid UTF-8: {}",
121 path.display()
122 ))
123 })?;
124 parts.push(value);
125 }
126 Component::CurDir => {}
127 Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
128 return Err(ChallengeValidationError::UnsafePath(format!(
129 "{label} contains unsafe path `{}`",
130 path.display(),
131 ))
132 .into());
133 }
134 }
135 }
136
137 Self::try_new(&parts.join("/"), label)
138 }
139
140 pub fn as_str(&self) -> &str {
142 &self.0
143 }
144
145 pub fn as_path(&self) -> &Path {
147 Path::new(&self.0)
148 }
149}
150
151impl std::fmt::Display for NormalizedArchivePath {
152 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
153 f.write_str(self.as_str())
154 }
155}
156
157#[derive(Debug, Clone)]
159pub struct ArchiveEnvelopeEntry {
160 index: usize,
161 path: NormalizedArchivePath,
162 is_dir: bool,
163 size: u64,
164 compressed_size: u64,
165}
166
167impl ArchiveEnvelopeEntry {
168 pub fn index(&self) -> usize {
170 self.index
171 }
172
173 pub fn path(&self) -> &NormalizedArchivePath {
175 &self.path
176 }
177
178 pub fn is_dir(&self) -> bool {
180 self.is_dir
181 }
182
183 pub fn size(&self) -> u64 {
185 self.size
186 }
187
188 pub fn compressed_size(&self) -> u64 {
190 self.compressed_size
191 }
192}
193
194#[derive(Debug, Clone)]
196pub struct ArchiveEnvelope {
197 label: String,
198 archive_size: u64,
199 expanded_size: u64,
200 entries: Vec<ArchiveEnvelopeEntry>,
201}
202
203impl ArchiveEnvelope {
204 pub fn label(&self) -> &str {
206 &self.label
207 }
208
209 pub fn archive_size(&self) -> u64 {
211 self.archive_size
212 }
213
214 pub fn expanded_size(&self) -> u64 {
216 self.expanded_size
217 }
218
219 pub fn entries(&self) -> &[ArchiveEnvelopeEntry] {
221 &self.entries
222 }
223}
224
225pub fn inspect_zip_bytes(bytes: &[u8], policy: &ArchiveEnvelopePolicy) -> Result<ArchiveEnvelope> {
227 let archive_size = u64::try_from(bytes.len())
228 .map_err(|_| ServiceError::Validation(format!("{} is too large", policy.label())))?;
229 ensure_archive_size(archive_size, policy)?;
230 let reader = std::io::Cursor::new(bytes);
231 let mut archive = zip::ZipArchive::new(reader)?;
232 inspect_zip_archive(archive_size, &mut archive, policy)
233}
234
235pub fn inspect_zip_file(path: &Path, policy: &ArchiveEnvelopePolicy) -> Result<ArchiveEnvelope> {
237 let archive_size = std::fs::metadata(path)?.len();
238 ensure_archive_size(archive_size, policy)?;
239 let reader = std::fs::File::open(path)?;
240 let mut archive = zip::ZipArchive::new(reader)?;
241 inspect_zip_archive(archive_size, &mut archive, policy)
242}
243
244pub fn extract_zip_file_to_dir(
246 archive_path: &Path,
247 target_dir: &Path,
248 policy: &ArchiveEnvelopePolicy,
249) -> Result<()> {
250 let archive_size = std::fs::metadata(archive_path)?.len();
251 ensure_archive_size(archive_size, policy)?;
252 let reader = std::fs::File::open(archive_path)?;
253 let mut archive = zip::ZipArchive::new(reader)?;
254 let envelope = inspect_zip_archive(archive_size, &mut archive, policy)?;
255 extract_validated_zip_archive(&mut archive, &envelope, target_dir)
256}
257
258pub fn extract_zip_bytes_to_dir(
260 bytes: &[u8],
261 target_dir: &Path,
262 policy: &ArchiveEnvelopePolicy,
263) -> Result<()> {
264 let archive_size = u64::try_from(bytes.len())
265 .map_err(|_| ServiceError::Validation(format!("{} is too large", policy.label())))?;
266 ensure_archive_size(archive_size, policy)?;
267 let reader = std::io::Cursor::new(bytes);
268 let mut archive = zip::ZipArchive::new(reader)?;
269 let envelope = inspect_zip_archive(archive_size, &mut archive, policy)?;
270 extract_validated_zip_archive(&mut archive, &envelope, target_dir)
271}
272
273fn extract_validated_zip_archive<R: Read + Seek>(
275 archive: &mut zip::ZipArchive<R>,
276 envelope: &ArchiveEnvelope,
277 target_dir: &Path,
278) -> Result<()> {
279 for entry in envelope.entries() {
280 let mut file = archive.by_index(entry.index())?;
281 let outpath = target_dir.join(entry.path().as_path());
282
283 if entry.is_dir() {
284 std::fs::create_dir_all(&outpath)?;
285 } else {
286 if outpath.exists() {
287 return Err(ServiceError::Validation(format!(
288 "{} cannot overwrite existing path `{}`",
289 envelope.label(),
290 entry.path()
291 )));
292 }
293 if let Some(parent) = outpath.parent() {
294 std::fs::create_dir_all(parent)?;
295 }
296 let mut outfile = std::fs::OpenOptions::new()
297 .write(true)
298 .create_new(true)
299 .open(&outpath)?;
300 std::io::copy(&mut file, &mut outfile)?;
301 }
302 }
303
304 Ok(())
305}
306
307fn ensure_archive_size(archive_size: u64, policy: &ArchiveEnvelopePolicy) -> Result<()> {
309 if archive_size > policy.max_archive_bytes() {
310 return Err(ServiceError::Validation(format!(
311 "{} must be at most {} bytes",
312 policy.label(),
313 policy.max_archive_bytes()
314 )));
315 }
316 Ok(())
317}
318
319fn inspect_zip_archive<R: Read + Seek>(
321 archive_size: u64,
322 archive: &mut zip::ZipArchive<R>,
323 policy: &ArchiveEnvelopePolicy,
324) -> Result<ArchiveEnvelope> {
325 if archive.len() > policy.max_entries() {
326 return Err(ServiceError::Validation(format!(
327 "{} must contain at most {} entries",
328 policy.label(),
329 policy.max_entries()
330 )));
331 }
332
333 let mut expanded_size = 0u64;
334 let mut seen_paths = HashSet::with_capacity(archive.len());
335 let mut entries = Vec::with_capacity(archive.len());
336 for index in 0..archive.len() {
337 let file = archive.by_index(index)?;
338 if policy.reject_symlinks
339 && file
340 .unix_mode()
341 .is_some_and(|mode| mode & 0o170000 == 0o120000)
342 {
343 return Err(ServiceError::Validation(format!(
344 "{} must not contain symlinks",
345 policy.label()
346 )));
347 }
348
349 let path = NormalizedArchivePath::try_new(file.name(), policy.label())?;
350 if !seen_paths.insert(path.clone()) {
351 return Err(ServiceError::Validation(format!(
352 "{} contains duplicate path `{path}`",
353 policy.label()
354 )));
355 }
356
357 expanded_size = expanded_size
358 .checked_add(file.size())
359 .ok_or_else(|| ServiceError::Validation(format!("{} is too large", policy.label())))?;
360 if expanded_size > policy.max_expanded_bytes() {
361 return Err(ServiceError::Validation(format!(
362 "{} must expand to at most {} bytes",
363 policy.label(),
364 policy.max_expanded_bytes()
365 )));
366 }
367
368 entries.push(ArchiveEnvelopeEntry {
369 index,
370 path,
371 is_dir: file.is_dir(),
372 size: file.size(),
373 compressed_size: file.compressed_size(),
374 });
375 }
376
377 Ok(ArchiveEnvelope {
378 label: policy.label().to_string(),
379 archive_size,
380 expanded_size,
381 entries,
382 })
383}
384
385#[cfg(test)]
387pub(crate) mod test_support {
388 use std::io::Write;
389
390 pub(crate) fn raw_stored_zip(entries: Vec<(&str, &[u8], u32)>) -> Vec<u8> {
392 let mut bytes = Vec::new();
393 let mut central_directory = Vec::new();
394 let entry_count = u16::try_from(entries.len()).expect("test ZIP entries fit u16");
395
396 for (name, content, unix_mode) in entries {
397 let local_header_offset =
398 u32::try_from(bytes.len()).expect("test ZIP should fit u32 offsets");
399 let name_bytes = name.as_bytes();
400 let name_len = u16::try_from(name_bytes.len()).expect("test ZIP names are short");
401 let content_len =
402 u32::try_from(content.len()).expect("test ZIP content should fit u32");
403
404 bytes.extend_from_slice(&0x0403_4b50u32.to_le_bytes());
405 bytes.extend_from_slice(&20u16.to_le_bytes());
406 bytes.extend_from_slice(&0u16.to_le_bytes());
407 bytes.extend_from_slice(&0u16.to_le_bytes());
408 bytes.extend_from_slice(&0u16.to_le_bytes());
409 bytes.extend_from_slice(&0u16.to_le_bytes());
410 bytes.extend_from_slice(&0u32.to_le_bytes());
411 bytes.extend_from_slice(&content_len.to_le_bytes());
412 bytes.extend_from_slice(&content_len.to_le_bytes());
413 bytes.extend_from_slice(&name_len.to_le_bytes());
414 bytes.extend_from_slice(&0u16.to_le_bytes());
415 bytes.extend_from_slice(name_bytes);
416 bytes.extend_from_slice(content);
417
418 central_directory.extend_from_slice(&0x0201_4b50u32.to_le_bytes());
419 central_directory.extend_from_slice(&20u16.to_le_bytes());
420 central_directory.extend_from_slice(&20u16.to_le_bytes());
421 central_directory.extend_from_slice(&0u16.to_le_bytes());
422 central_directory.extend_from_slice(&0u16.to_le_bytes());
423 central_directory.extend_from_slice(&0u16.to_le_bytes());
424 central_directory.extend_from_slice(&0u16.to_le_bytes());
425 central_directory.extend_from_slice(&0u32.to_le_bytes());
426 central_directory.extend_from_slice(&content_len.to_le_bytes());
427 central_directory.extend_from_slice(&content_len.to_le_bytes());
428 central_directory.extend_from_slice(&name_len.to_le_bytes());
429 central_directory.extend_from_slice(&0u16.to_le_bytes());
430 central_directory.extend_from_slice(&0u16.to_le_bytes());
431 central_directory.extend_from_slice(&0u16.to_le_bytes());
432 central_directory.extend_from_slice(&0u16.to_le_bytes());
433 central_directory.extend_from_slice(&(unix_mode << 16).to_le_bytes());
434 central_directory.extend_from_slice(&local_header_offset.to_le_bytes());
435 central_directory.extend_from_slice(name_bytes);
436 }
437
438 let central_directory_offset =
439 u32::try_from(bytes.len()).expect("test ZIP should fit u32 offsets");
440 let central_directory_size =
441 u32::try_from(central_directory.len()).expect("test ZIP should fit u32 sizes");
442 bytes.write_all(¢ral_directory).expect("central dir");
443 bytes.extend_from_slice(&0x0605_4b50u32.to_le_bytes());
444 bytes.extend_from_slice(&0u16.to_le_bytes());
445 bytes.extend_from_slice(&0u16.to_le_bytes());
446 bytes.extend_from_slice(&entry_count.to_le_bytes());
447 bytes.extend_from_slice(&entry_count.to_le_bytes());
448 bytes.extend_from_slice(¢ral_directory_size.to_le_bytes());
449 bytes.extend_from_slice(¢ral_directory_offset.to_le_bytes());
450 bytes.extend_from_slice(&0u16.to_le_bytes());
451 bytes
452 }
453}
454
455#[cfg(test)]
456mod tests {
457 use std::io::Write;
458
459 use super::{ArchiveEnvelopePolicy, NormalizedArchivePath, inspect_zip_bytes};
460
461 fn policy() -> ArchiveEnvelopePolicy {
462 ArchiveEnvelopePolicy::new("test archive", 1024, 4, 64)
463 }
464
465 fn zip_with_entries(entries: &[(&str, &[u8])]) -> Vec<u8> {
466 let mut cursor = std::io::Cursor::new(Vec::new());
467 {
468 let mut archive = zip::ZipWriter::new(&mut cursor);
469 let options = zip::write::SimpleFileOptions::default()
470 .compression_method(zip::CompressionMethod::Stored);
471 for (path, content) in entries {
472 archive.start_file(path, options).expect("entry");
473 archive.write_all(content).expect("content");
474 }
475 archive.finish().expect("zip");
476 }
477 cursor.into_inner()
478 }
479
480 #[test]
481 fn validates_archive_envelope() {
482 let bytes = zip_with_entries(&[("dir/file.txt", b"hello")]);
483 let envelope = inspect_zip_bytes(&bytes, &policy()).expect("archive should validate");
484
485 assert_eq!(envelope.entries().len(), 1);
486 assert_eq!(envelope.entries()[0].path().as_str(), "dir/file.txt");
487 assert_eq!(envelope.expanded_size(), 5);
488 }
489
490 #[test]
491 fn rejects_hostile_archive_entries() {
492 for name in ["../evil", "/evil", "a//b", "a/./b"] {
493 let bytes = zip_with_entries(&[(name, b"x")]);
494 assert!(inspect_zip_bytes(&bytes, &policy()).is_err(), "{name}");
495 }
496
497 let duplicate = zip_with_entries(&[("a/b.txt", b"1"), ("a\\b.txt", b"2")]);
498 assert!(inspect_zip_bytes(&duplicate, &policy()).is_err());
499
500 let symlink = super::test_support::raw_stored_zip(vec![("link", b"target", 0o120777)]);
501 assert!(inspect_zip_bytes(&symlink, &policy()).is_err());
502 }
503
504 #[test]
505 fn enforces_archive_limits() {
506 let oversized = zip_with_entries(&[("file.txt", &[b'x'; 65])]);
507 assert!(inspect_zip_bytes(&oversized, &policy()).is_err());
508
509 let too_many = zip_with_entries(&[
510 ("a", b"1"),
511 ("b", b"1"),
512 ("c", b"1"),
513 ("d", b"1"),
514 ("e", b"1"),
515 ]);
516 assert!(inspect_zip_bytes(&too_many, &policy()).is_err());
517
518 let tiny_policy = ArchiveEnvelopePolicy::new("test archive", 8, 4, 64);
519 let bytes = zip_with_entries(&[("file.txt", b"hello")]);
520 assert!(inspect_zip_bytes(&bytes, &tiny_policy).is_err());
521 }
522
523 #[test]
524 fn normalizes_local_relative_paths() {
525 let path = NormalizedArchivePath::from_relative_path(
526 std::path::Path::new("./src/main.rs"),
527 "package path",
528 )
529 .expect("relative path should normalize");
530 assert_eq!(path.as_str(), "src/main.rs");
531 }
532}