1use std::collections::HashSet;
4use std::io::{Read, Seek};
5use std::path::{Component, Path};
6
7use agentics_error::{Result, ServiceError};
8
9#[derive(Debug, thiserror::Error)]
11pub enum ChallengeValidationError {
12 #[error("archive traversal rejected: {0}")]
13 ArchiveTraversal(String),
14 #[error("invalid manifest: {0}")]
15 InvalidManifest(String),
16 #[error("unsafe path rejected: {0}")]
17 UnsafePath(String),
18 #[error("unsupported target: {0}")]
19 UnsupportedTarget(String),
20}
21
22impl From<ChallengeValidationError> for ServiceError {
23 fn from(error: ChallengeValidationError) -> Self {
24 ServiceError::Validation(error.to_string())
25 }
26}
27
28#[derive(Debug, Clone)]
30pub struct ArchiveEnvelopePolicy {
31 label: String,
32 max_archive_bytes: u64,
33 max_entries: usize,
34 max_expanded_bytes: u64,
35 reject_symlinks: bool,
36}
37
38impl ArchiveEnvelopePolicy {
39 pub fn new(
41 label: impl Into<String>,
42 max_archive_bytes: u64,
43 max_entries: usize,
44 max_expanded_bytes: u64,
45 ) -> Self {
46 Self {
47 label: label.into(),
48 max_archive_bytes,
49 max_entries,
50 max_expanded_bytes,
51 reject_symlinks: true,
52 }
53 }
54
55 pub fn label(&self) -> &str {
57 &self.label
58 }
59
60 pub fn max_archive_bytes(&self) -> u64 {
62 self.max_archive_bytes
63 }
64
65 pub fn max_entries(&self) -> usize {
67 self.max_entries
68 }
69
70 pub fn max_expanded_bytes(&self) -> u64 {
72 self.max_expanded_bytes
73 }
74}
75
76#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
78pub struct NormalizedArchivePath(String);
79
80impl NormalizedArchivePath {
81 pub fn try_new(raw: &str, label: &str) -> Result<Self> {
83 if raw.is_empty() || raw.contains('\0') || raw.starts_with('/') || raw.starts_with('\\') {
84 return Err(ChallengeValidationError::ArchiveTraversal(format!(
85 "{label} contains an unsafe ZIP entry path",
86 ))
87 .into());
88 }
89
90 let trimmed = raw.trim_matches(['/', '\\']);
91 if trimmed.is_empty() {
92 return Err(ChallengeValidationError::ArchiveTraversal(format!(
93 "{label} contains an unsafe ZIP entry path",
94 ))
95 .into());
96 }
97
98 let mut parts = Vec::new();
99 for part in trimmed.split(['/', '\\']) {
100 if part.is_empty() || part == "." || part == ".." {
101 return Err(ChallengeValidationError::UnsafePath(format!(
102 "{label} contains unsafe path `{raw}`",
103 ))
104 .into());
105 }
106 parts.push(part);
107 }
108
109 Ok(Self(parts.join("/")))
110 }
111
112 pub fn from_relative_path(path: &Path, label: &str) -> Result<Self> {
114 let mut parts = Vec::new();
115 for component in path.components() {
116 match component {
117 Component::Normal(value) => {
118 let value = value.to_str().ok_or_else(|| {
119 ServiceError::Validation(format!(
120 "{label} contains a path that is not valid UTF-8: {}",
121 path.display()
122 ))
123 })?;
124 parts.push(value);
125 }
126 Component::CurDir => {}
127 Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
128 return Err(ChallengeValidationError::UnsafePath(format!(
129 "{label} contains unsafe path `{}`",
130 path.display(),
131 ))
132 .into());
133 }
134 }
135 }
136
137 Self::try_new(&parts.join("/"), label)
138 }
139
140 pub fn as_str(&self) -> &str {
142 &self.0
143 }
144
145 pub fn as_path(&self) -> &Path {
147 Path::new(&self.0)
148 }
149}
150
151impl std::fmt::Display for NormalizedArchivePath {
152 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
153 f.write_str(self.as_str())
154 }
155}
156
157#[derive(Debug, Clone)]
159pub struct ArchiveEnvelopeEntry {
160 index: usize,
161 path: NormalizedArchivePath,
162 is_dir: bool,
163 size: u64,
164 compressed_size: u64,
165}
166
167impl ArchiveEnvelopeEntry {
168 pub fn index(&self) -> usize {
170 self.index
171 }
172
173 pub fn path(&self) -> &NormalizedArchivePath {
175 &self.path
176 }
177
178 pub fn is_dir(&self) -> bool {
180 self.is_dir
181 }
182
183 pub fn size(&self) -> u64 {
185 self.size
186 }
187
188 pub fn compressed_size(&self) -> u64 {
190 self.compressed_size
191 }
192}
193
194#[derive(Debug, Clone)]
196pub struct ArchiveEnvelope {
197 label: String,
198 archive_size: u64,
199 expanded_size: u64,
200 entries: Vec<ArchiveEnvelopeEntry>,
201}
202
203impl ArchiveEnvelope {
204 pub fn label(&self) -> &str {
206 &self.label
207 }
208
209 pub fn archive_size(&self) -> u64 {
211 self.archive_size
212 }
213
214 pub fn expanded_size(&self) -> u64 {
216 self.expanded_size
217 }
218
219 pub fn entries(&self) -> &[ArchiveEnvelopeEntry] {
221 &self.entries
222 }
223}
224
225pub fn inspect_zip_bytes(bytes: &[u8], policy: &ArchiveEnvelopePolicy) -> Result<ArchiveEnvelope> {
227 let archive_size = u64::try_from(bytes.len())
228 .map_err(|_| ServiceError::Validation(format!("{} is too large", policy.label())))?;
229 ensure_archive_size(archive_size, policy)?;
230 let reader = std::io::Cursor::new(bytes);
231 let mut archive = zip::ZipArchive::new(reader)?;
232 inspect_zip_archive(archive_size, &mut archive, policy)
233}
234
235pub fn inspect_zip_file(path: &Path, policy: &ArchiveEnvelopePolicy) -> Result<ArchiveEnvelope> {
237 let archive_size = std::fs::metadata(path)?.len();
238 ensure_archive_size(archive_size, policy)?;
239 let reader = std::fs::File::open(path)?;
240 let mut archive = zip::ZipArchive::new(reader)?;
241 inspect_zip_archive(archive_size, &mut archive, policy)
242}
243
244pub fn extract_zip_file_to_dir(
246 archive_path: &Path,
247 target_dir: &Path,
248 policy: &ArchiveEnvelopePolicy,
249) -> Result<()> {
250 let archive_size = std::fs::metadata(archive_path)?.len();
251 ensure_archive_size(archive_size, policy)?;
252 let reader = std::fs::File::open(archive_path)?;
253 let mut archive = zip::ZipArchive::new(reader)?;
254 let envelope = inspect_zip_archive(archive_size, &mut archive, policy)?;
255 extract_validated_zip_archive(&mut archive, &envelope, target_dir)
256}
257
258pub fn extract_zip_bytes_to_dir(
260 bytes: &[u8],
261 target_dir: &Path,
262 policy: &ArchiveEnvelopePolicy,
263) -> Result<()> {
264 let archive_size = u64::try_from(bytes.len())
265 .map_err(|_| ServiceError::Validation(format!("{} is too large", policy.label())))?;
266 ensure_archive_size(archive_size, policy)?;
267 let reader = std::io::Cursor::new(bytes);
268 let mut archive = zip::ZipArchive::new(reader)?;
269 let envelope = inspect_zip_archive(archive_size, &mut archive, policy)?;
270 extract_validated_zip_archive(&mut archive, &envelope, target_dir)
271}
272
273fn extract_validated_zip_archive<R: Read + Seek>(
275 archive: &mut zip::ZipArchive<R>,
276 envelope: &ArchiveEnvelope,
277 target_dir: &Path,
278) -> Result<()> {
279 for entry in envelope.entries() {
280 let mut file = archive.by_index(entry.index())?;
281 let outpath = target_dir.join(entry.path().as_path());
282
283 if entry.is_dir() {
284 std::fs::create_dir_all(&outpath)?;
285 } else {
286 if outpath.exists() {
287 return Err(ServiceError::Validation(format!(
288 "{} cannot overwrite existing path `{}`",
289 envelope.label(),
290 entry.path()
291 )));
292 }
293 if let Some(parent) = outpath.parent() {
294 std::fs::create_dir_all(parent)?;
295 }
296 let mut outfile = std::fs::OpenOptions::new()
297 .write(true)
298 .create_new(true)
299 .open(&outpath)?;
300 let copied = std::io::copy(&mut file, &mut outfile)?;
301 if copied != entry.size() {
302 return Err(ServiceError::Validation(format!(
303 "{} entry `{}` extracted {copied} bytes, expected {} bytes",
304 envelope.label(),
305 entry.path(),
306 entry.size()
307 )));
308 }
309 }
310 }
311
312 Ok(())
313}
314
315fn ensure_archive_size(archive_size: u64, policy: &ArchiveEnvelopePolicy) -> Result<()> {
317 if archive_size > policy.max_archive_bytes() {
318 return Err(ServiceError::Validation(format!(
319 "{} must be at most {} bytes",
320 policy.label(),
321 policy.max_archive_bytes()
322 )));
323 }
324 Ok(())
325}
326
327fn inspect_zip_archive<R: Read + Seek>(
329 archive_size: u64,
330 archive: &mut zip::ZipArchive<R>,
331 policy: &ArchiveEnvelopePolicy,
332) -> Result<ArchiveEnvelope> {
333 if archive.len() > policy.max_entries() {
334 return Err(ServiceError::Validation(format!(
335 "{} must contain at most {} entries",
336 policy.label(),
337 policy.max_entries()
338 )));
339 }
340
341 let mut expanded_size = 0u64;
342 let mut seen_paths = HashSet::with_capacity(archive.len());
343 let mut entries = Vec::with_capacity(archive.len());
344 for index in 0..archive.len() {
345 let file = archive.by_index(index)?;
346 if policy.reject_symlinks
347 && file
348 .unix_mode()
349 .is_some_and(|mode| mode & 0o170000 == 0o120000)
350 {
351 return Err(ServiceError::Validation(format!(
352 "{} must not contain symlinks",
353 policy.label()
354 )));
355 }
356
357 let path = NormalizedArchivePath::try_new(file.name(), policy.label())?;
358 if !seen_paths.insert(path.clone()) {
359 return Err(ServiceError::Validation(format!(
360 "{} contains duplicate path `{path}`",
361 policy.label()
362 )));
363 }
364
365 expanded_size = expanded_size
366 .checked_add(file.size())
367 .ok_or_else(|| ServiceError::Validation(format!("{} is too large", policy.label())))?;
368 if expanded_size > policy.max_expanded_bytes() {
369 return Err(ServiceError::Validation(format!(
370 "{} must expand to at most {} bytes",
371 policy.label(),
372 policy.max_expanded_bytes()
373 )));
374 }
375
376 entries.push(ArchiveEnvelopeEntry {
377 index,
378 path,
379 is_dir: file.is_dir(),
380 size: file.size(),
381 compressed_size: file.compressed_size(),
382 });
383 }
384
385 Ok(ArchiveEnvelope {
386 label: policy.label().to_string(),
387 archive_size,
388 expanded_size,
389 entries,
390 })
391}
392
393#[cfg(test)]
395pub(crate) mod test_support {
396 use std::io::Write;
397
398 pub(crate) fn raw_stored_zip(entries: Vec<(&str, &[u8], u32)>) -> Vec<u8> {
400 let mut bytes = Vec::new();
401 let mut central_directory = Vec::new();
402 let entry_count = u16::try_from(entries.len()).expect("test ZIP entries fit u16");
403
404 for (name, content, unix_mode) in entries {
405 let local_header_offset =
406 u32::try_from(bytes.len()).expect("test ZIP should fit u32 offsets");
407 let name_bytes = name.as_bytes();
408 let name_len = u16::try_from(name_bytes.len()).expect("test ZIP names are short");
409 let content_len =
410 u32::try_from(content.len()).expect("test ZIP content should fit u32");
411
412 bytes.extend_from_slice(&0x0403_4b50u32.to_le_bytes());
413 bytes.extend_from_slice(&20u16.to_le_bytes());
414 bytes.extend_from_slice(&0u16.to_le_bytes());
415 bytes.extend_from_slice(&0u16.to_le_bytes());
416 bytes.extend_from_slice(&0u16.to_le_bytes());
417 bytes.extend_from_slice(&0u16.to_le_bytes());
418 bytes.extend_from_slice(&0u32.to_le_bytes());
419 bytes.extend_from_slice(&content_len.to_le_bytes());
420 bytes.extend_from_slice(&content_len.to_le_bytes());
421 bytes.extend_from_slice(&name_len.to_le_bytes());
422 bytes.extend_from_slice(&0u16.to_le_bytes());
423 bytes.extend_from_slice(name_bytes);
424 bytes.extend_from_slice(content);
425
426 central_directory.extend_from_slice(&0x0201_4b50u32.to_le_bytes());
427 central_directory.extend_from_slice(&20u16.to_le_bytes());
428 central_directory.extend_from_slice(&20u16.to_le_bytes());
429 central_directory.extend_from_slice(&0u16.to_le_bytes());
430 central_directory.extend_from_slice(&0u16.to_le_bytes());
431 central_directory.extend_from_slice(&0u16.to_le_bytes());
432 central_directory.extend_from_slice(&0u16.to_le_bytes());
433 central_directory.extend_from_slice(&0u32.to_le_bytes());
434 central_directory.extend_from_slice(&content_len.to_le_bytes());
435 central_directory.extend_from_slice(&content_len.to_le_bytes());
436 central_directory.extend_from_slice(&name_len.to_le_bytes());
437 central_directory.extend_from_slice(&0u16.to_le_bytes());
438 central_directory.extend_from_slice(&0u16.to_le_bytes());
439 central_directory.extend_from_slice(&0u16.to_le_bytes());
440 central_directory.extend_from_slice(&0u16.to_le_bytes());
441 central_directory.extend_from_slice(&(unix_mode << 16).to_le_bytes());
442 central_directory.extend_from_slice(&local_header_offset.to_le_bytes());
443 central_directory.extend_from_slice(name_bytes);
444 }
445
446 let central_directory_offset =
447 u32::try_from(bytes.len()).expect("test ZIP should fit u32 offsets");
448 let central_directory_size =
449 u32::try_from(central_directory.len()).expect("test ZIP should fit u32 sizes");
450 bytes.write_all(¢ral_directory).expect("central dir");
451 bytes.extend_from_slice(&0x0605_4b50u32.to_le_bytes());
452 bytes.extend_from_slice(&0u16.to_le_bytes());
453 bytes.extend_from_slice(&0u16.to_le_bytes());
454 bytes.extend_from_slice(&entry_count.to_le_bytes());
455 bytes.extend_from_slice(&entry_count.to_le_bytes());
456 bytes.extend_from_slice(¢ral_directory_size.to_le_bytes());
457 bytes.extend_from_slice(¢ral_directory_offset.to_le_bytes());
458 bytes.extend_from_slice(&0u16.to_le_bytes());
459 bytes
460 }
461}
462
463#[cfg(test)]
464mod tests {
465 use std::io::Write;
466
467 use super::{ArchiveEnvelopePolicy, NormalizedArchivePath, inspect_zip_bytes};
468
469 fn policy() -> ArchiveEnvelopePolicy {
470 ArchiveEnvelopePolicy::new("test archive", 1024, 4, 64)
471 }
472
473 fn zip_with_entries(entries: &[(&str, &[u8])]) -> Vec<u8> {
474 let mut cursor = std::io::Cursor::new(Vec::new());
475 {
476 let mut archive = zip::ZipWriter::new(&mut cursor);
477 let options = zip::write::SimpleFileOptions::default()
478 .compression_method(zip::CompressionMethod::Stored);
479 for (path, content) in entries {
480 archive.start_file(path, options).expect("entry");
481 archive.write_all(content).expect("content");
482 }
483 archive.finish().expect("zip");
484 }
485 cursor.into_inner()
486 }
487
488 #[test]
489 fn validates_archive_envelope() {
490 let bytes = zip_with_entries(&[("dir/file.txt", b"hello")]);
491 let envelope = inspect_zip_bytes(&bytes, &policy()).expect("archive should validate");
492
493 assert_eq!(envelope.entries().len(), 1);
494 assert_eq!(envelope.entries()[0].path().as_str(), "dir/file.txt");
495 assert_eq!(envelope.expanded_size(), 5);
496 }
497
498 #[test]
499 fn rejects_hostile_archive_entries() {
500 for name in ["../evil", "/evil", "a//b", "a/./b"] {
501 let bytes = zip_with_entries(&[(name, b"x")]);
502 assert!(inspect_zip_bytes(&bytes, &policy()).is_err(), "{name}");
503 }
504
505 let duplicate = zip_with_entries(&[("a/b.txt", b"1"), ("a\\b.txt", b"2")]);
506 assert!(inspect_zip_bytes(&duplicate, &policy()).is_err());
507
508 let symlink = super::test_support::raw_stored_zip(vec![("link", b"target", 0o120777)]);
509 assert!(inspect_zip_bytes(&symlink, &policy()).is_err());
510 }
511
512 #[test]
513 fn enforces_archive_limits() {
514 let oversized = zip_with_entries(&[("file.txt", &[b'x'; 65])]);
515 assert!(inspect_zip_bytes(&oversized, &policy()).is_err());
516
517 let too_many = zip_with_entries(&[
518 ("a", b"1"),
519 ("b", b"1"),
520 ("c", b"1"),
521 ("d", b"1"),
522 ("e", b"1"),
523 ]);
524 assert!(inspect_zip_bytes(&too_many, &policy()).is_err());
525
526 let tiny_policy = ArchiveEnvelopePolicy::new("test archive", 8, 4, 64);
527 let bytes = zip_with_entries(&[("file.txt", b"hello")]);
528 assert!(inspect_zip_bytes(&bytes, &tiny_policy).is_err());
529 }
530
531 #[test]
532 fn normalizes_local_relative_paths() {
533 let path = NormalizedArchivePath::from_relative_path(
534 std::path::Path::new("./src/main.rs"),
535 "package path",
536 )
537 .expect("relative path should normalize");
538 assert_eq!(path.as_str(), "src/main.rs");
539 }
540}