Skip to main content

gravityfile_ops/
archive.rs

1//! Archive operations (extract and compress).
2
3use std::fs::File;
4use std::io::{Read, Write};
5use std::path::{Component, Path, PathBuf};
6
7use thiserror::Error;
8
9/// Maximum decompression ratio allowed (1000:1).
10/// Archives with higher ratios are considered potential zip bombs.
11const MAX_DECOMPRESSION_RATIO: f64 = 1000.0;
12
13/// Maximum total size for extracted content (10 GB).
14const MAX_TOTAL_EXTRACTED_SIZE: u64 = 10 * 1024 * 1024 * 1024;
15
16/// Maximum number of entries allowed in a TAR archive.
17const MAX_ENTRY_COUNT: u64 = 100_000;
18
19/// Maximum size for a single TAR entry (10 GB).
20const MAX_SINGLE_ENTRY_SIZE: u64 = 10 * 1024 * 1024 * 1024;
21
22/// Error that can occur during archive operations.
23#[derive(Debug, Error)]
24pub enum ArchiveError {
25    #[error("I/O error: {0}")]
26    Io(#[from] std::io::Error),
27
28    #[error("ZIP error: {0}")]
29    Zip(#[from] zip::result::ZipError),
30
31    #[error("Unsupported archive format: {0}")]
32    UnsupportedFormat(String),
33
34    #[error("Archive not found: {0}")]
35    NotFound(PathBuf),
36
37    #[error("Destination already exists: {0}")]
38    DestinationExists(PathBuf),
39
40    #[error("Potential decompression bomb detected: {0}")]
41    DecompressionBomb(String),
42}
43
44/// Result of an archive operation.
45pub type ArchiveResult<T> = Result<T, ArchiveError>;
46
47/// Supported archive formats.
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum ArchiveFormat {
50    Zip,
51    Tar,
52    TarGz,
53    TarBz2,
54    TarXz,
55}
56
57impl ArchiveFormat {
58    /// Detect format from file extension.
59    pub fn from_path(path: &Path) -> Option<Self> {
60        let name = path.file_name()?.to_str()?.to_lowercase();
61
62        if name.ends_with(".zip") {
63            Some(Self::Zip)
64        } else if name.ends_with(".tar.gz") || name.ends_with(".tgz") {
65            Some(Self::TarGz)
66        } else if name.ends_with(".tar.bz2") || name.ends_with(".tbz2") {
67            Some(Self::TarBz2)
68        } else if name.ends_with(".tar.xz") || name.ends_with(".txz") {
69            Some(Self::TarXz)
70        } else if name.ends_with(".tar") {
71            Some(Self::Tar)
72        } else {
73            None
74        }
75    }
76
77    /// Get default extension for format.
78    pub fn extension(&self) -> &'static str {
79        match self {
80            Self::Zip => ".zip",
81            Self::Tar => ".tar",
82            Self::TarGz => ".tar.gz",
83            Self::TarBz2 => ".tar.bz2",
84            Self::TarXz => ".tar.xz",
85        }
86    }
87}
88
89/// Extract an archive to a destination directory.
90///
91/// Automatically detects the archive format from the file extension and extracts
92/// all contents to the specified destination directory.
93///
94/// # Arguments
95///
96/// * `archive_path` - Path to the archive file to extract
97/// * `destination` - Directory where contents will be extracted
98///
99/// # Returns
100///
101/// A vector of paths to all extracted files and directories.
102///
103/// # Errors
104///
105/// Returns an error if:
106/// - The archive file doesn't exist ([`ArchiveError::NotFound`])
107/// - The format is not supported ([`ArchiveError::UnsupportedFormat`])
108/// - The archive is corrupted or malformed
109/// - A path traversal attack is detected ([`ArchiveError::Io`])
110/// - A decompression bomb is detected ([`ArchiveError::DecompressionBomb`])
111///
112/// # Security
113///
114/// This function includes multiple protections against malicious archives:
115/// - Path traversal prevention (rejects `..` and absolute paths)
116/// - Symlink attack mitigation (validates canonical paths)
117/// - Decompression bomb detection (ratio and size limits)
118/// - Permission sanitization (strips setuid/setgid bits on Unix)
119///
120/// # Example
121///
122/// ```no_run
123/// use std::path::Path;
124/// use gravityfile_ops::extract_archive;
125///
126/// let extracted = extract_archive(
127///     Path::new("archive.zip"),
128///     Path::new("/tmp/extracted"),
129/// )?;
130/// println!("Extracted {} files", extracted.len());
131/// # Ok::<(), gravityfile_ops::ArchiveError>(())
132/// ```
133pub fn extract_archive(archive_path: &Path, destination: &Path) -> ArchiveResult<Vec<PathBuf>> {
134    // LOW-4: use symlink_metadata so we detect a symlink-as-archive-path correctly
135    if std::fs::symlink_metadata(archive_path).is_err() {
136        return Err(ArchiveError::NotFound(archive_path.to_path_buf()));
137    }
138
139    let format = ArchiveFormat::from_path(archive_path).ok_or_else(|| {
140        ArchiveError::UnsupportedFormat(
141            archive_path
142                .extension()
143                .and_then(|e| e.to_str())
144                .unwrap_or("unknown")
145                .to_string(),
146        )
147    })?;
148
149    // Create destination directory if it doesn't exist
150    std::fs::create_dir_all(destination)?;
151
152    match format {
153        ArchiveFormat::Zip => extract_zip(archive_path, destination),
154        ArchiveFormat::Tar => extract_tar(archive_path, destination),
155        ArchiveFormat::TarGz => extract_tar_gz(archive_path, destination),
156        ArchiveFormat::TarBz2 => extract_tar_bz2(archive_path, destination),
157        ArchiveFormat::TarXz => extract_tar_xz(archive_path, destination),
158    }
159}
160
161/// Extract a ZIP archive.
162///
163/// # Security
164/// This function validates all paths to prevent directory traversal attacks.
165/// Paths containing `..` components or absolute paths are rejected.
166/// Setuid/setgid bits are stripped from extracted file permissions.
167/// Decompression bombs are detected via ratio and size limits.
168fn extract_zip(archive_path: &Path, destination: &Path) -> ArchiveResult<Vec<PathBuf>> {
169    let file = File::open(archive_path)?;
170    let mut archive = zip::ZipArchive::new(file)?;
171    let mut extracted_files = Vec::new();
172
173    // MED-3: canonicalization is required — return error on failure
174    let canonical_dest = destination.canonicalize().map_err(|e| {
175        std::io::Error::new(
176            std::io::ErrorKind::InvalidInput,
177            format!(
178                "Failed to canonicalize destination '{}': {}",
179                destination.display(),
180                e
181            ),
182        )
183    })?;
184
185    // Security: Check for decompression bombs before extraction
186    let mut total_uncompressed: u64 = 0;
187    for i in 0..archive.len() {
188        if let Ok(entry) = archive.by_index_raw(i) {
189            let compressed = entry.compressed_size();
190            let uncompressed = entry.size();
191
192            // Check individual file ratio
193            if compressed > 0 {
194                let ratio = uncompressed as f64 / compressed as f64;
195                if ratio > MAX_DECOMPRESSION_RATIO {
196                    return Err(ArchiveError::DecompressionBomb(format!(
197                        "File '{}' has suspicious ratio {:.0}:1 (max {:.0}:1)",
198                        entry.name(),
199                        ratio,
200                        MAX_DECOMPRESSION_RATIO
201                    )));
202                }
203            }
204
205            total_uncompressed = total_uncompressed.saturating_add(uncompressed);
206        }
207    }
208
209    // Check total extraction size
210    if total_uncompressed > MAX_TOTAL_EXTRACTED_SIZE {
211        return Err(ArchiveError::DecompressionBomb(format!(
212            "Archive would extract to {} bytes (max {} bytes)",
213            total_uncompressed, MAX_TOTAL_EXTRACTED_SIZE
214        )));
215    }
216
217    // MED-1: Extract in two passes — regular files and directories first,
218    // symlinks last. This prevents a previously-extracted symlink from
219    // redirecting `create_dir_all` outside the extraction root.
220    let mut symlink_indices = Vec::new();
221
222    for i in 0..archive.len() {
223        let mut entry = archive.by_index(i)?;
224        let entry_path = entry.enclosed_name().ok_or_else(|| {
225            std::io::Error::new(
226                std::io::ErrorKind::InvalidData,
227                "Invalid file path in archive",
228            )
229        })?;
230
231        // Security: Reject absolute paths
232        if entry_path.is_absolute() {
233            return Err(ArchiveError::Io(std::io::Error::new(
234                std::io::ErrorKind::InvalidData,
235                format!("Absolute path in archive: {}", entry_path.display()),
236            )));
237        }
238
239        // Security: Reject paths with parent directory components
240        if entry_path
241            .components()
242            .any(|c| matches!(c, std::path::Component::ParentDir))
243        {
244            return Err(ArchiveError::Io(std::io::Error::new(
245                std::io::ErrorKind::InvalidData,
246                format!(
247                    "Path traversal attempt in archive: {}",
248                    entry_path.display()
249                ),
250            )));
251        }
252
253        // Defer symlinks to the second pass.
254        if entry.is_symlink() {
255            symlink_indices.push(i);
256            continue;
257        }
258
259        let outpath = destination.join(&entry_path);
260
261        // Security: Double-check that resolved path stays within destination
262        if let Some(parent) = outpath.parent() {
263            std::fs::create_dir_all(parent)?;
264            let canonical_out = parent.canonicalize().map_err(|e| {
265                std::io::Error::new(
266                    std::io::ErrorKind::InvalidData,
267                    format!(
268                        "Failed to canonicalize output path '{}': {}",
269                        parent.display(),
270                        e
271                    ),
272                )
273            })?;
274            if !canonical_out.starts_with(&canonical_dest) {
275                return Err(ArchiveError::Io(std::io::Error::new(
276                    std::io::ErrorKind::InvalidData,
277                    format!("Path escapes destination: {}", entry_path.display()),
278                )));
279            }
280        }
281
282        if entry.is_dir() {
283            std::fs::create_dir_all(&outpath)?;
284        } else {
285            // Regular file
286            let mut outfile = File::create(&outpath)?;
287            std::io::copy(&mut entry, &mut outfile)?;
288
289            // Set permissions on Unix - strip setuid/setgid bits for security
290            #[cfg(unix)]
291            {
292                use std::os::unix::fs::PermissionsExt;
293                if let Some(mode) = entry.unix_mode() {
294                    let safe_mode = mode & 0o777;
295                    std::fs::set_permissions(&outpath, std::fs::Permissions::from_mode(safe_mode))?;
296                }
297            }
298        }
299
300        extracted_files.push(outpath);
301    }
302
303    // Second pass: extract symlinks after all regular entries are in place.
304    for i in symlink_indices {
305        let mut entry = archive.by_index(i)?;
306        let entry_path = entry.enclosed_name().ok_or_else(|| {
307            std::io::Error::new(
308                std::io::ErrorKind::InvalidData,
309                "Invalid file path in archive",
310            )
311        })?;
312
313        let outpath = destination.join(&entry_path);
314
315        // Read the symlink target from the file content
316        let mut target_bytes = Vec::new();
317        std::io::copy(&mut entry, &mut target_bytes)?;
318        let target_str = String::from_utf8_lossy(&target_bytes);
319        let link_target = Path::new(target_str.trim());
320
321        // Reject absolute symlink targets
322        if link_target.is_absolute() {
323            return Err(ArchiveError::Io(std::io::Error::new(
324                std::io::ErrorKind::InvalidData,
325                format!(
326                    "Symlink with absolute target rejected: {} -> {}",
327                    entry_path.display(),
328                    link_target.display()
329                ),
330            )));
331        }
332
333        // Validate target does not escape destination (always, not only
334        // when ParentDir is present — guards against chained symlinks).
335        let symlink_parent = outpath.parent().unwrap_or(destination);
336        let canonical_parent = symlink_parent
337            .canonicalize()
338            .unwrap_or_else(|_| symlink_parent.to_path_buf());
339        let resolved = canonical_parent.join(link_target);
340        let normalized = resolve_path(&resolved);
341
342        if !normalized.starts_with(&canonical_dest) {
343            return Err(ArchiveError::Io(std::io::Error::new(
344                std::io::ErrorKind::InvalidData,
345                format!(
346                    "Symlink escapes destination: {} -> {}",
347                    entry_path.display(),
348                    link_target.display()
349                ),
350            )));
351        }
352
353        // Create the symlink
354        #[cfg(unix)]
355        {
356            std::os::unix::fs::symlink(link_target, &outpath)?;
357        }
358        #[cfg(windows)]
359        {
360            tracing::warn!(
361                "Skipping symlink {} -> {} (Windows symlinks not supported)",
362                entry_path.display(),
363                link_target.display()
364            );
365        }
366
367        extracted_files.push(outpath);
368    }
369
370    Ok(extracted_files)
371}
372
373/// Extract a plain TAR archive.
374fn extract_tar(archive_path: &Path, destination: &Path) -> ArchiveResult<Vec<PathBuf>> {
375    let file = File::open(archive_path)?;
376    extract_tar_from_reader(file, destination)
377}
378
379/// Extract a TAR.GZ archive.
380fn extract_tar_gz(archive_path: &Path, destination: &Path) -> ArchiveResult<Vec<PathBuf>> {
381    let file = File::open(archive_path)?;
382    let decoder = flate2::read::GzDecoder::new(file);
383    extract_tar_from_reader(decoder, destination)
384}
385
386/// Extract a TAR.BZ2 archive.
387fn extract_tar_bz2(archive_path: &Path, destination: &Path) -> ArchiveResult<Vec<PathBuf>> {
388    let file = File::open(archive_path)?;
389    let decoder = bzip2::read::BzDecoder::new(file);
390    extract_tar_from_reader(decoder, destination)
391}
392
393/// Extract a TAR.XZ archive.
394fn extract_tar_xz(archive_path: &Path, destination: &Path) -> ArchiveResult<Vec<PathBuf>> {
395    let file = File::open(archive_path)?;
396    let decoder = xz2::read::XzDecoder::new(file);
397    extract_tar_from_reader(decoder, destination)
398}
399
400/// Resolve `..` and `.` in a path while preserving root/prefix components.
401/// Used for symlink target validation where the resolved path is absolute.
402fn resolve_path(path: &Path) -> PathBuf {
403    let mut resolved = PathBuf::new();
404    for component in path.components() {
405        match component {
406            Component::CurDir => {}
407            Component::ParentDir => {
408                resolved.pop();
409            }
410            other => {
411                resolved.push(other);
412            }
413        }
414    }
415    resolved
416}
417
418/// Validate that a TAR symlink target does not escape the destination.
419///
420/// Returns `Err` if the target is absolute (including Windows Prefix), or if normalizing
421/// the resolved path places it outside `canonical_dest`.
422fn validate_tar_symlink_target(
423    link_target: &Path,
424    outpath: &Path,
425    destination: &Path,
426    canonical_dest: &Path,
427) -> ArchiveResult<()> {
428    // MED-4: Reject Prefix (Windows absolute path) as absolute
429    for component in link_target.components() {
430        if matches!(component, Component::Prefix(_)) {
431            return Err(ArchiveError::Io(std::io::Error::new(
432                std::io::ErrorKind::InvalidData,
433                format!(
434                    "Symlink with absolute (prefixed) target rejected: {}",
435                    link_target.display()
436                ),
437            )));
438        }
439    }
440
441    // Reject standard absolute symlink targets
442    if link_target.is_absolute() {
443        return Err(ArchiveError::Io(std::io::Error::new(
444            std::io::ErrorKind::InvalidData,
445            format!(
446                "Symlink with absolute target rejected: {}",
447                link_target.display()
448            ),
449        )));
450    }
451
452    // Always validate the resolved target against canonical_dest,
453    // not only when ParentDir is present. A target like `safe_subdir`
454    // could itself be a previously-extracted symlink pointing outside
455    // the root (chained symlink attack / TOCTOU zip-slip variant).
456    //
457    // Canonicalize the parent directory (which must already exist) to
458    // resolve any filesystem-level symlinks (e.g. /var -> /private/var
459    // on macOS) so the starts_with check against canonical_dest works.
460    let symlink_parent = outpath.parent().unwrap_or(destination);
461    let canonical_parent = symlink_parent
462        .canonicalize()
463        .unwrap_or_else(|_| symlink_parent.to_path_buf());
464    let resolved = canonical_parent.join(link_target);
465    let normalized = resolve_path(&resolved);
466
467    if !normalized.starts_with(canonical_dest) {
468        return Err(ArchiveError::Io(std::io::Error::new(
469            std::io::ErrorKind::InvalidData,
470            format!("Symlink escapes destination: {}", link_target.display()),
471        )));
472    }
473
474    Ok(())
475}
476
477/// Extract a TAR archive from a reader.
478///
479/// # Security
480/// This function validates all paths to prevent directory traversal attacks.
481/// Paths containing `..` components or absolute paths are rejected.
482///
483/// # Bomb protection (CRIT-4)
484/// - Checks `entry.header().size()` against `MAX_SINGLE_ENTRY_SIZE` before unpacking.
485/// - Maintains a cumulative byte counter against `MAX_TOTAL_EXTRACTED_SIZE`.
486/// - Limits entry count to `MAX_ENTRY_COUNT`.
487fn extract_tar_from_reader<R: Read>(reader: R, destination: &Path) -> ArchiveResult<Vec<PathBuf>> {
488    let mut archive = tar::Archive::new(reader);
489    // Security: Don't preserve setuid/setgid bits
490    archive.set_preserve_permissions(false);
491    // Security: Don't restore extended attributes
492    archive.set_unpack_xattrs(false);
493
494    let mut extracted_files = Vec::new();
495
496    // MED-3: canonicalization is required — return error on failure
497    let canonical_dest = destination.canonicalize().map_err(|e| {
498        std::io::Error::new(
499            std::io::ErrorKind::InvalidInput,
500            format!(
501                "Failed to canonicalize destination '{}': {}",
502                destination.display(),
503                e
504            ),
505        )
506    })?;
507
508    // CRIT-4: bomb protection counters
509    let mut entry_count: u64 = 0;
510    let mut total_extracted_bytes: u64 = 0;
511
512    for entry_result in archive.entries()? {
513        let mut entry = entry_result?;
514
515        // CRIT-4: enforce entry count limit
516        entry_count += 1;
517        if entry_count > MAX_ENTRY_COUNT {
518            return Err(ArchiveError::DecompressionBomb(format!(
519                "Archive exceeds maximum entry count of {}",
520                MAX_ENTRY_COUNT
521            )));
522        }
523
524        let entry_path = entry.path()?.into_owned();
525
526        // Security: Reject absolute paths
527        if entry_path.is_absolute() {
528            return Err(ArchiveError::Io(std::io::Error::new(
529                std::io::ErrorKind::InvalidData,
530                format!("Absolute path in archive: {}", entry_path.display()),
531            )));
532        }
533
534        // Security: Reject paths with parent directory components
535        if entry_path
536            .components()
537            .any(|c| matches!(c, Component::ParentDir))
538        {
539            return Err(ArchiveError::Io(std::io::Error::new(
540                std::io::ErrorKind::InvalidData,
541                format!(
542                    "Path traversal attempt in archive: {}",
543                    entry_path.display()
544                ),
545            )));
546        }
547
548        // Pre-flight check: reject obviously oversized entries before
549        // even attempting to unpack (defense in depth alongside the
550        // post-unpack verification below).
551        let declared_size = entry.header().size().unwrap_or(0);
552        if declared_size > MAX_SINGLE_ENTRY_SIZE {
553            return Err(ArchiveError::DecompressionBomb(format!(
554                "Entry '{}' declares size {} bytes (max {} bytes)",
555                entry_path.display(),
556                declared_size,
557                MAX_SINGLE_ENTRY_SIZE
558            )));
559        }
560
561        let outpath = destination.join(&entry_path);
562
563        // Security: Double-check that resolved path stays within destination
564        // (handles symlink attacks where intermediate directories are symlinks)
565        if let Some(parent) = outpath.parent() {
566            std::fs::create_dir_all(parent)?;
567            let canonical_out = parent.canonicalize().map_err(|e| {
568                std::io::Error::new(
569                    std::io::ErrorKind::InvalidData,
570                    format!(
571                        "Failed to canonicalize output path '{}': {}",
572                        parent.display(),
573                        e
574                    ),
575                )
576            })?;
577            if !canonical_out.starts_with(&canonical_dest) {
578                return Err(ArchiveError::Io(std::io::Error::new(
579                    std::io::ErrorKind::InvalidData,
580                    format!("Path escapes destination: {}", entry_path.display()),
581                )));
582            }
583        }
584
585        // Security: Validate symlink targets (MED-4 normalizer used)
586        let entry_type = entry.header().entry_type();
587        if (entry_type.is_symlink() || entry_type.is_hard_link())
588            && let Ok(Some(link_target)) = entry.link_name()
589        {
590            validate_tar_symlink_target(&link_target, &outpath, destination, &canonical_dest)?;
591        }
592
593        entry.unpack(&outpath)?;
594
595        // CRIT-2 fix: count *actual* bytes written to disk rather than
596        // trusting the attacker-controlled declared header size. A crafted
597        // archive can set all header sizes to 0 while encoding arbitrarily
598        // large content, bypassing the pre-flight check above.
599        let actual_size = std::fs::symlink_metadata(&outpath)
600            .map(|m| m.len())
601            .unwrap_or(0);
602
603        if actual_size > MAX_SINGLE_ENTRY_SIZE {
604            // Best-effort cleanup of the oversized entry.
605            let _ = std::fs::remove_file(&outpath);
606            return Err(ArchiveError::DecompressionBomb(format!(
607                "Entry '{}' extracted to {} bytes (max {} bytes)",
608                entry_path.display(),
609                actual_size,
610                MAX_SINGLE_ENTRY_SIZE
611            )));
612        }
613
614        total_extracted_bytes = total_extracted_bytes.saturating_add(actual_size);
615        if total_extracted_bytes > MAX_TOTAL_EXTRACTED_SIZE {
616            // Best-effort cleanup of the entry that pushed us over.
617            let _ = std::fs::remove_file(&outpath);
618            return Err(ArchiveError::DecompressionBomb(format!(
619                "Archive exceeded maximum total extraction size of {} bytes",
620                MAX_TOTAL_EXTRACTED_SIZE
621            )));
622        }
623
624        extracted_files.push(outpath);
625    }
626
627    Ok(extracted_files)
628}
629
630/// Create an archive from a list of files and/or directories.
631///
632/// Creates a new archive containing the specified files and directories.
633/// Directories are added recursively with all their contents.
634///
635/// # Arguments
636///
637/// * `files` - Paths to files and directories to include in the archive
638/// * `archive_path` - Path where the archive will be created
639/// * `format` - The archive format to use
640///
641/// # Errors
642///
643/// Returns an error if:
644/// - The destination already exists ([`ArchiveError::DestinationExists`])
645/// - Any source file cannot be read
646/// - The archive cannot be written
647///
648/// # Note
649///
650/// Symlinks in source directories are currently skipped (not followed or stored).
651///
652/// # Example
653///
654/// ```no_run
655/// use std::path::PathBuf;
656/// use gravityfile_ops::{create_archive, ArchiveFormat};
657///
658/// create_archive(
659///     &[PathBuf::from("src/"), PathBuf::from("Cargo.toml")],
660///     &PathBuf::from("backup.tar.gz"),
661///     ArchiveFormat::TarGz,
662/// )?;
663/// # Ok::<(), gravityfile_ops::ArchiveError>(())
664/// ```
665pub fn create_archive(
666    files: &[PathBuf],
667    archive_path: &Path,
668    format: ArchiveFormat,
669) -> ArchiveResult<()> {
670    if archive_path.exists() {
671        return Err(ArchiveError::DestinationExists(archive_path.to_path_buf()));
672    }
673
674    match format {
675        ArchiveFormat::Zip => create_zip(files, archive_path),
676        ArchiveFormat::Tar => create_tar(files, archive_path),
677        ArchiveFormat::TarGz => create_tar_gz(files, archive_path),
678        ArchiveFormat::TarBz2 => create_tar_bz2(files, archive_path),
679        ArchiveFormat::TarXz => create_tar_xz(files, archive_path),
680    }
681}
682
683/// Create a ZIP archive.
684fn create_zip(files: &[PathBuf], archive_path: &Path) -> ArchiveResult<()> {
685    let file = File::create(archive_path)?;
686    let mut archive = zip::ZipWriter::new(file);
687    let options = zip::write::SimpleFileOptions::default()
688        .compression_method(zip::CompressionMethod::Deflated);
689
690    for path in files {
691        // MED-5: return error when file_name() is None
692        let name = match path.file_name().and_then(|n| n.to_str()) {
693            Some(n) => n.to_owned(),
694            None => {
695                return Err(ArchiveError::Io(std::io::Error::new(
696                    std::io::ErrorKind::InvalidInput,
697                    format!("Path '{}' has no filename component", path.display()),
698                )));
699            }
700        };
701        add_path_to_zip(&mut archive, path, &name, &options)?;
702    }
703
704    archive.finish()?;
705    Ok(())
706}
707
708/// Recursively add a path to a ZIP archive.
709///
710/// Handles regular files, directories, and symlinks.
711/// Uses a visited set to detect and prevent symlink loops.
712fn add_path_to_zip<W: Write + std::io::Seek>(
713    archive: &mut zip::ZipWriter<W>,
714    path: &Path,
715    name: &str,
716    options: &zip::write::SimpleFileOptions,
717) -> ArchiveResult<()> {
718    add_path_to_zip_with_visited(
719        archive,
720        path,
721        name,
722        options,
723        &mut std::collections::HashSet::new(),
724    )
725}
726
727/// Internal implementation with visited tracking for loop detection.
728fn add_path_to_zip_with_visited<W: Write + std::io::Seek>(
729    archive: &mut zip::ZipWriter<W>,
730    path: &Path,
731    name: &str,
732    options: &zip::write::SimpleFileOptions,
733    visited: &mut std::collections::HashSet<PathBuf>,
734) -> ArchiveResult<()> {
735    // Use symlink_metadata to detect symlinks without following them
736    let metadata = match std::fs::symlink_metadata(path) {
737        Ok(m) => m,
738        Err(e) => {
739            // LOW-3: use tracing::warn! instead of eprintln!
740            tracing::warn!("Cannot access {}: {}", path.display(), e);
741            return Ok(());
742        }
743    };
744
745    if metadata.is_symlink() {
746        // Handle symlink
747        let target = match std::fs::read_link(path) {
748            Ok(t) => t,
749            Err(e) => {
750                // LOW-3: use tracing::warn! instead of eprintln!
751                tracing::warn!("Cannot read symlink {}: {}", path.display(), e);
752                return Ok(());
753            }
754        };
755
756        // Store symlink using Unix external attributes
757        #[cfg(unix)]
758        {
759            // Unix symlink mode: S_IFLNK (0o120000) | 0o777
760            let symlink_mode = 0o120777;
761            let unix_options = options.unix_permissions(symlink_mode);
762
763            archive.start_file(name, unix_options)?;
764            // Write the target path as the file content
765            let target_str = target.to_string_lossy();
766            archive.write_all(target_str.as_bytes())?;
767        }
768
769        #[cfg(not(unix))]
770        {
771            // On non-Unix, store symlink as a regular file with the target path
772            archive.start_file(name, *options)?;
773            let target_str = target.to_string_lossy();
774            archive.write_all(target_str.as_bytes())?;
775        }
776    } else if metadata.is_dir() {
777        // Detect symlink loops by checking canonical path
778        let canonical = match path.canonicalize() {
779            Ok(c) => c,
780            Err(_) => path.to_path_buf(),
781        };
782
783        if !visited.insert(canonical.clone()) {
784            // Already visited this directory (symlink loop)
785            // LOW-3: use tracing::warn! instead of eprintln!
786            tracing::warn!("Skipping symlink loop at {}", path.display());
787            return Ok(());
788        }
789
790        // Add directory entry
791        let dir_name = if name.ends_with('/') {
792            name.to_string()
793        } else {
794            format!("{}/", name)
795        };
796        archive.add_directory(&dir_name, *options)?;
797
798        // Recursively add contents
799        for entry in std::fs::read_dir(path)? {
800            let entry = entry?;
801            let entry_path = entry.path();
802            let entry_name = format!("{}{}", dir_name, entry.file_name().to_string_lossy());
803            add_path_to_zip_with_visited(archive, &entry_path, &entry_name, options, visited)?;
804        }
805
806        // Remove from visited when done with this branch
807        visited.remove(&canonical);
808    } else if metadata.is_file() {
809        archive.start_file(name, *options)?;
810        let mut file = File::open(path)?;
811        std::io::copy(&mut file, archive)?;
812    }
813    // Other file types (devices, sockets) are silently skipped
814
815    Ok(())
816}
817
818/// Create a plain TAR archive.
819fn create_tar(files: &[PathBuf], archive_path: &Path) -> ArchiveResult<()> {
820    let file = File::create(archive_path)?;
821    create_tar_to_writer(files, file)
822}
823
824/// Create a TAR.GZ archive.
825fn create_tar_gz(files: &[PathBuf], archive_path: &Path) -> ArchiveResult<()> {
826    let file = File::create(archive_path)?;
827    let encoder = flate2::write::GzEncoder::new(file, flate2::Compression::default());
828    create_tar_to_writer(files, encoder)
829}
830
831/// Create a TAR.BZ2 archive.
832fn create_tar_bz2(files: &[PathBuf], archive_path: &Path) -> ArchiveResult<()> {
833    let file = File::create(archive_path)?;
834    let encoder = bzip2::write::BzEncoder::new(file, bzip2::Compression::default());
835    create_tar_to_writer(files, encoder)
836}
837
838/// Create a TAR.XZ archive.
839fn create_tar_xz(files: &[PathBuf], archive_path: &Path) -> ArchiveResult<()> {
840    let file = File::create(archive_path)?;
841    let encoder = xz2::write::XzEncoder::new(file, 6);
842    create_tar_to_writer(files, encoder)
843}
844
845/// Create a TAR archive to a writer.
846///
847/// Handles regular files, directories, and symlinks.
848/// The `tar::Builder::append_dir_all` method handles symlinks within directories.
849fn create_tar_to_writer<W: Write>(files: &[PathBuf], writer: W) -> ArchiveResult<()> {
850    let mut archive = tar::Builder::new(writer);
851    // Follow symlinks in directory traversal (safer than storing broken symlinks)
852    archive.follow_symlinks(false); // Store symlinks as symlinks, not as their targets
853
854    for path in files {
855        let name = match path.file_name().and_then(|n| n.to_str()) {
856            Some(n) => n.to_owned(),
857            None => {
858                // LOW-3: use tracing::warn! instead of eprintln!
859                tracing::warn!("Skipping path with no filename: {}", path.display());
860                continue;
861            }
862        };
863
864        // Use symlink_metadata to detect type without following symlinks
865        let metadata = match std::fs::symlink_metadata(path) {
866            Ok(m) => m,
867            Err(e) => {
868                // LOW-3: use tracing::warn! instead of eprintln!
869                tracing::warn!("Cannot access {}: {}", path.display(), e);
870                continue;
871            }
872        };
873
874        if metadata.is_symlink() {
875            // Handle top-level symlink explicitly
876            let target = match std::fs::read_link(path) {
877                Ok(t) => t,
878                Err(e) => {
879                    // LOW-3: use tracing::warn! instead of eprintln!
880                    tracing::warn!("Cannot read symlink {}: {}", path.display(), e);
881                    continue;
882                }
883            };
884
885            // Create symlink entry in TAR
886            let mut header = tar::Header::new_gnu();
887            header.set_entry_type(tar::EntryType::Symlink);
888            header.set_size(0);
889            header.set_mode(0o777);
890            header.set_mtime(
891                metadata
892                    .modified()
893                    .ok()
894                    .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
895                    .map(|d| d.as_secs())
896                    .unwrap_or(0),
897            );
898
899            // Set the link name (symlink target)
900            header.set_link_name(&target)?;
901            header.set_cksum();
902
903            archive.append_data(&mut header, &name, std::io::empty())?;
904        } else if metadata.is_dir() {
905            archive.append_dir_all(&name, path)?;
906        } else if metadata.is_file() {
907            archive.append_path_with_name(path, &name)?;
908        }
909        // Other file types (devices, sockets) are silently skipped
910    }
911
912    archive.finish()?;
913    Ok(())
914}
915
916#[cfg(test)]
917mod tests {
918    use super::*;
919    use tempfile::TempDir;
920
921    #[test]
922    fn test_archive_format_detection() {
923        assert_eq!(
924            ArchiveFormat::from_path(Path::new("test.zip")),
925            Some(ArchiveFormat::Zip)
926        );
927        assert_eq!(
928            ArchiveFormat::from_path(Path::new("test.tar.gz")),
929            Some(ArchiveFormat::TarGz)
930        );
931        assert_eq!(
932            ArchiveFormat::from_path(Path::new("test.tgz")),
933            Some(ArchiveFormat::TarGz)
934        );
935        assert_eq!(
936            ArchiveFormat::from_path(Path::new("test.tar")),
937            Some(ArchiveFormat::Tar)
938        );
939        assert_eq!(ArchiveFormat::from_path(Path::new("test.txt")), None);
940    }
941
942    #[test]
943    fn test_create_and_extract_zip() {
944        let temp_dir = TempDir::new().unwrap();
945        let source_dir = temp_dir.path().join("source");
946        std::fs::create_dir(&source_dir).unwrap();
947
948        // Create test files
949        std::fs::write(source_dir.join("test.txt"), "Hello, World!").unwrap();
950        std::fs::create_dir(source_dir.join("subdir")).unwrap();
951        std::fs::write(source_dir.join("subdir/nested.txt"), "Nested content").unwrap();
952
953        // Create archive
954        let archive_path = temp_dir.path().join("test.zip");
955        create_archive(
956            std::slice::from_ref(&source_dir),
957            &archive_path,
958            ArchiveFormat::Zip,
959        )
960        .unwrap();
961
962        assert!(archive_path.exists());
963
964        // Extract archive
965        let extract_dir = temp_dir.path().join("extracted");
966        let extracted = extract_archive(&archive_path, &extract_dir).unwrap();
967
968        assert!(!extracted.is_empty());
969    }
970
971    #[test]
972    fn test_create_and_extract_tar_gz() {
973        let temp_dir = TempDir::new().unwrap();
974        let source_dir = temp_dir.path().join("source");
975        std::fs::create_dir(&source_dir).unwrap();
976
977        // Create test files
978        std::fs::write(source_dir.join("test.txt"), "Hello from tar.gz!").unwrap();
979        std::fs::create_dir(source_dir.join("subdir")).unwrap();
980        std::fs::write(source_dir.join("subdir/nested.txt"), "Nested in tar").unwrap();
981
982        // Create archive
983        let archive_path = temp_dir.path().join("test.tar.gz");
984        create_archive(
985            std::slice::from_ref(&source_dir),
986            &archive_path,
987            ArchiveFormat::TarGz,
988        )
989        .unwrap();
990
991        assert!(archive_path.exists());
992
993        // Extract archive
994        let extract_dir = temp_dir.path().join("extracted");
995        let extracted = extract_archive(&archive_path, &extract_dir).unwrap();
996
997        assert!(!extracted.is_empty());
998        // Verify content was extracted correctly
999        assert!(extract_dir.join("source").exists() || extract_dir.join("test.txt").exists());
1000    }
1001
1002    #[test]
1003    fn test_path_traversal_prevention_absolute_path() {
1004        let path = Path::new("/etc/passwd");
1005        assert!(path.is_absolute());
1006
1007        let has_parent_dir = path
1008            .components()
1009            .any(|c| matches!(c, std::path::Component::ParentDir));
1010        assert!(path.is_absolute() || has_parent_dir);
1011    }
1012
1013    #[test]
1014    fn test_path_traversal_prevention_parent_dir() {
1015        let path = Path::new("../../../etc/passwd");
1016        let has_parent_dir = path
1017            .components()
1018            .any(|c| matches!(c, std::path::Component::ParentDir));
1019        assert!(has_parent_dir);
1020
1021        let path2 = Path::new("foo/../../../bar");
1022        let has_parent_dir2 = path2
1023            .components()
1024            .any(|c| matches!(c, std::path::Component::ParentDir));
1025        assert!(has_parent_dir2);
1026
1027        let safe_path = Path::new("foo/bar/baz.txt");
1028        let has_parent_dir3 = safe_path
1029            .components()
1030            .any(|c| matches!(c, std::path::Component::ParentDir));
1031        assert!(!has_parent_dir3);
1032    }
1033
1034    #[test]
1035    fn test_empty_archive_handling() {
1036        let temp_dir = TempDir::new().unwrap();
1037
1038        // Create empty zip
1039        let archive_path = temp_dir.path().join("empty.zip");
1040        create_archive(&[], &archive_path, ArchiveFormat::Zip).unwrap();
1041
1042        // Extract should succeed with empty result
1043        let extract_dir = temp_dir.path().join("extracted");
1044        let extracted = extract_archive(&archive_path, &extract_dir).unwrap();
1045        assert!(extracted.is_empty());
1046    }
1047
1048    #[test]
1049    fn test_archive_destination_not_exists() {
1050        let temp_dir = TempDir::new().unwrap();
1051
1052        let test_file = temp_dir.path().join("test.txt");
1053        std::fs::write(&test_file, "test content").unwrap();
1054
1055        let archive_path = temp_dir.path().join("test.zip");
1056        create_archive(&[test_file], &archive_path, ArchiveFormat::Zip).unwrap();
1057
1058        // Extract to non-existent directory (should create it)
1059        let extract_dir = temp_dir.path().join("new_dir/nested/deep");
1060        let extracted = extract_archive(&archive_path, &extract_dir).unwrap();
1061        assert!(!extracted.is_empty());
1062        assert!(extract_dir.exists());
1063    }
1064
1065    #[test]
1066    fn test_archive_already_exists_error() {
1067        let temp_dir = TempDir::new().unwrap();
1068
1069        let test_file = temp_dir.path().join("test.txt");
1070        std::fs::write(&test_file, "test content").unwrap();
1071
1072        let archive_path = temp_dir.path().join("test.zip");
1073
1074        create_archive(
1075            std::slice::from_ref(&test_file),
1076            &archive_path,
1077            ArchiveFormat::Zip,
1078        )
1079        .unwrap();
1080
1081        let result = create_archive(&[test_file], &archive_path, ArchiveFormat::Zip);
1082        assert!(matches!(result, Err(ArchiveError::DestinationExists(_))));
1083    }
1084
1085    #[test]
1086    fn test_zip_path_validation_consistency() {
1087        let temp_dir = TempDir::new().unwrap();
1088        let test_file = temp_dir.path().join("test.txt");
1089        std::fs::write(&test_file, "test").unwrap();
1090
1091        let archive_path = temp_dir.path().join("test.zip");
1092        create_archive(&[test_file], &archive_path, ArchiveFormat::Zip).unwrap();
1093
1094        let extract_dir = temp_dir.path().join("extracted");
1095        let result = extract_archive(&archive_path, &extract_dir);
1096        assert!(result.is_ok());
1097    }
1098
1099    #[test]
1100    fn test_decompression_constants() {
1101        const {
1102            assert!(MAX_DECOMPRESSION_RATIO >= 100.0);
1103            assert!(MAX_DECOMPRESSION_RATIO <= 10000.0);
1104            assert!(MAX_TOTAL_EXTRACTED_SIZE >= 1024 * 1024 * 1024);
1105            assert!(MAX_TOTAL_EXTRACTED_SIZE <= 100 * 1024 * 1024 * 1024);
1106            assert!(MAX_ENTRY_COUNT >= 1_000);
1107            assert!(MAX_ENTRY_COUNT <= 10_000_000);
1108        }
1109    }
1110
1111    #[cfg(unix)]
1112    #[test]
1113    fn test_permission_stripping() {
1114        use std::os::unix::fs::PermissionsExt;
1115
1116        let temp_dir = TempDir::new().unwrap();
1117        let test_file = temp_dir.path().join("test.txt");
1118        std::fs::write(&test_file, "test content").unwrap();
1119
1120        let perms = std::fs::Permissions::from_mode(0o755);
1121        std::fs::set_permissions(&test_file, perms).unwrap();
1122
1123        let archive_path = temp_dir.path().join("test.zip");
1124        create_archive(&[test_file], &archive_path, ArchiveFormat::Zip).unwrap();
1125
1126        let extract_dir = temp_dir.path().join("extracted");
1127        extract_archive(&archive_path, &extract_dir).unwrap();
1128
1129        let extracted_file = extract_dir.join("test.txt");
1130        assert!(extracted_file.exists());
1131
1132        let extracted_perms = std::fs::metadata(&extracted_file).unwrap().permissions();
1133        let mode = extracted_perms.mode();
1134        assert_eq!(mode & 0o7000, 0);
1135    }
1136
1137    #[cfg(unix)]
1138    #[test]
1139    fn test_symlink_in_tar_archive() {
1140        use std::os::unix::fs::symlink;
1141
1142        let temp_dir = TempDir::new().unwrap();
1143        let source_dir = temp_dir.path().join("source");
1144        std::fs::create_dir(&source_dir).unwrap();
1145
1146        let test_file = source_dir.join("target.txt");
1147        std::fs::write(&test_file, "target content").unwrap();
1148
1149        let symlink_path = source_dir.join("link.txt");
1150        symlink("target.txt", &symlink_path).unwrap();
1151
1152        let archive_path = temp_dir.path().join("test.tar");
1153        create_archive(
1154            std::slice::from_ref(&source_dir),
1155            &archive_path,
1156            ArchiveFormat::Tar,
1157        )
1158        .unwrap();
1159
1160        assert!(archive_path.exists());
1161
1162        let extract_dir = temp_dir.path().join("extracted");
1163        let result = extract_archive(&archive_path, &extract_dir);
1164        assert!(result.is_ok(), "extract failed: {:?}", result.err());
1165    }
1166
1167    #[test]
1168    fn test_symlink_validation_paths() {
1169        let safe_target = Path::new("subdir/file.txt");
1170        let has_parent = safe_target
1171            .components()
1172            .any(|c| matches!(c, std::path::Component::ParentDir));
1173        assert!(!has_parent);
1174
1175        let escape_target = Path::new("../../../etc/passwd");
1176        let has_parent2 = escape_target
1177            .components()
1178            .any(|c| matches!(c, std::path::Component::ParentDir));
1179        assert!(has_parent2);
1180
1181        let absolute_target = Path::new("/etc/passwd");
1182        assert!(absolute_target.is_absolute());
1183    }
1184}