zesven 1.1.0

A pure Rust implementation of the 7z archive format
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
//! Safety and resource limit utilities.
//!
//! This module provides utilities for safe extraction including path validation,
//! resource limit enforcement, and compression bomb protection.

use std::io::{self, Read};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};

use crate::{ArchivePath, Error, Result};

/// Policy for validating extraction paths.
///
/// This enum controls how strictly paths from archive entries are validated
/// before extraction. The default is `Strict`, which provides the safest
/// behavior by blocking any potential path traversal attacks.
///
/// # Security
///
/// Path traversal attacks occur when an archive contains entries with paths
/// like `../../../etc/passwd` that could escape the intended extraction
/// directory. Always use `Strict` mode when extracting untrusted archives.
///
/// # Examples
///
/// ```rust
/// use zesven::safety::PathSafety;
///
/// // Default: strict validation (recommended for untrusted archives)
/// let policy = PathSafety::default();
/// assert_eq!(policy, PathSafety::Strict);
/// ```
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum PathSafety {
    /// Strict validation: block any potential path traversal.
    ///
    /// This is the safest mode and the default. It:
    /// - Rejects paths containing `..` components
    /// - Rejects absolute paths (starting with `/`)
    /// - Verifies that resolved paths stay within the destination directory
    #[default]
    Strict,
    /// Allow relative paths but block absolute paths and traversal.
    ///
    /// This mode is slightly more permissive but still blocks obvious attacks.
    /// Use only when you understand the archive source.
    Relaxed,
    /// Disables all path validation (DANGEROUS - use with extreme caution).
    ///
    /// # Security Warning
    ///
    /// **`PathSafety::Disabled` completely disables all path validation.** This allows:
    ///
    /// - **Absolute paths**: Entries like `/etc/passwd` will be extracted to the
    ///   system root, not relative to your destination directory
    /// - **Path traversal**: Entries like `../../../etc/passwd` will escape the
    ///   extraction directory and overwrite arbitrary files
    /// - **Arbitrary file writes**: A malicious archive can overwrite any file
    ///   the process has permission to write
    ///
    /// # When to Use
    ///
    /// Only use `PathSafety::Disabled` when **all** of these conditions are met:
    ///
    /// 1. You created the archive yourself, OR
    /// 2. The archive comes from a cryptographically verified trusted source
    /// 3. You understand exactly what paths are in the archive
    /// 4. You need to extract paths that would be blocked by `Strict` or `Relaxed`
    ///
    /// # Alternatives
    ///
    /// Consider using `PathSafety::Relaxed` instead, which allows more paths
    /// while still blocking obvious attacks. If you need to extract specific
    /// absolute paths, consider extracting to a temporary directory and moving
    /// files manually.
    ///
    /// # Example of Risk
    ///
    /// ```rust,ignore
    /// // A malicious archive could contain:
    /// // - "/etc/cron.d/malware" -> Installs scheduled malware
    /// // - "../../../home/user/.ssh/authorized_keys" -> Gains SSH access
    /// // - "/usr/local/bin/sudo" -> Replaces system utilities
    ///
    /// // With PathSafety::Disabled, all of these would be extracted!
    /// let options = ExtractOptions::new().path_safety(PathSafety::Disabled);
    /// archive.extract("/tmp/safe_dir", (), &options)?; // NOT SAFE!
    /// ```
    Disabled,
}

/// Validates an extraction path against the given safety policy.
///
/// This function checks that the archive path is safe to extract to the
/// destination directory based on the configured policy.
///
/// # Arguments
///
/// * `archive_path` - The path from the archive entry
/// * `dest_root` - The destination directory for extraction
/// * `policy` - The path safety policy to enforce
/// * `entry_index` - The index of the entry being validated (for error reporting)
///
/// # Returns
///
/// The validated full path to extract to, or an error if validation fails.
pub fn validate_extract_path(
    archive_path: &ArchivePath,
    dest_root: &Path,
    policy: PathSafety,
    entry_index: usize,
) -> Result<PathBuf> {
    let path_str = archive_path.as_str();

    // Check for traversal attempts in the archive path
    for component in path_str.split('/') {
        if component == ".." {
            return Err(Error::PathTraversal {
                entry_index,
                path: path_str.to_string(),
            });
        }
    }

    match policy {
        PathSafety::Strict => {
            // Reject absolute paths
            if path_str.starts_with('/') {
                return Err(Error::PathTraversal {
                    entry_index,
                    path: path_str.to_string(),
                });
            }

            let full_path = dest_root.join(path_str);

            // Verify the resolved path stays within dest_root.
            // The destination root MUST be canonicalizable - propagate IO error if not.
            let canonical_dest = dest_root.canonicalize()?;

            // For the target path, we must verify containment.
            // This check MUST NOT be bypassed - silent bypass is a security vulnerability.
            let canonical_full = if full_path.exists() {
                // Path exists - canonicalize it directly
                full_path.canonicalize()?
            } else {
                // For non-existent paths, canonicalize the deepest existing ancestor
                // and verify the path would stay within bounds.
                let mut ancestor = full_path.as_path();
                let mut components_to_append = Vec::new();

                // Walk up until we find an existing directory
                loop {
                    if ancestor.exists() {
                        break;
                    }
                    if let Some(file_name) = ancestor.file_name() {
                        components_to_append.push(file_name.to_os_string());
                    }
                    match ancestor.parent() {
                        Some(p) if !p.as_os_str().is_empty() => {
                            ancestor = p;
                        }
                        _ => {
                            // No existing ancestor found - this shouldn't happen if
                            // dest_root exists (which it must, since we canonicalized it).
                            // Build path relative to canonical_dest.
                            let relative = full_path.strip_prefix(dest_root).map_err(|_| {
                                Error::PathTraversal {
                                    entry_index,
                                    path: path_str.to_string(),
                                }
                            })?;
                            let mut result = canonical_dest.clone();
                            for component in relative.components() {
                                if let std::path::Component::Normal(c) = component {
                                    result.push(c);
                                }
                            }
                            // Verify containment before returning
                            if !result.starts_with(&canonical_dest) {
                                return Err(Error::PathTraversal {
                                    entry_index,
                                    path: path_str.to_string(),
                                });
                            }
                            return Ok(full_path);
                        }
                    }
                }

                // Canonicalize the existing ancestor
                let canonical_ancestor = ancestor.canonicalize()?;

                // Rebuild the path by appending the non-existent components
                let mut result = canonical_ancestor;
                for component in components_to_append.into_iter().rev() {
                    result.push(component);
                }
                result
            };

            // Final containment check - this MUST NOT be bypassed
            if !canonical_full.starts_with(&canonical_dest) {
                return Err(Error::PathTraversal {
                    entry_index,
                    path: path_str.to_string(),
                });
            }

            Ok(full_path)
        }
        PathSafety::Relaxed => {
            // Allow relative paths, block absolute and traversal
            if path_str.starts_with('/') {
                return Err(Error::PathTraversal {
                    entry_index,
                    path: path_str.to_string(),
                });
            }
            Ok(dest_root.join(path_str))
        }
        PathSafety::Disabled => {
            // No validation (dangerous!)
            Ok(dest_root.join(path_str))
        }
    }
}

/// A reader wrapper that enforces resource limits during extraction.
///
/// This wrapper tracks bytes read and checks against configured limits,
/// providing protection against compression bombs and runaway extractions.
pub struct LimitedReader<R> {
    inner: R,
    /// Maximum bytes this entry can produce.
    max_entry_bytes: u64,
    /// Bytes read from this entry so far.
    bytes_read: u64,
    /// Size of the compressed data (for ratio checking).
    compressed_size: u64,
    /// Maximum compression ratio allowed.
    max_ratio: Option<u32>,
    /// Shared counter for total bytes across all entries.
    total_tracker: Option<Arc<AtomicU64>>,
    /// Maximum total bytes.
    max_total_bytes: u64,
}

impl<R> LimitedReader<R> {
    /// Creates a new limited reader.
    pub fn new(inner: R) -> Self {
        Self {
            inner,
            max_entry_bytes: u64::MAX,
            bytes_read: 0,
            compressed_size: 0,
            max_ratio: None,
            total_tracker: None,
            max_total_bytes: u64::MAX,
        }
    }

    /// Sets the maximum bytes for this entry.
    pub fn max_entry_bytes(mut self, max: u64) -> Self {
        self.max_entry_bytes = max;
        self
    }

    /// Sets the compressed size for ratio checking.
    pub fn compressed_size(mut self, size: u64) -> Self {
        self.compressed_size = size;
        self
    }

    /// Sets the maximum compression ratio.
    pub fn max_ratio(mut self, ratio: u32) -> Self {
        self.max_ratio = Some(ratio);
        self
    }

    /// Sets a shared tracker for total bytes.
    pub fn total_tracker(mut self, tracker: Arc<AtomicU64>, max_total: u64) -> Self {
        self.total_tracker = Some(tracker);
        self.max_total_bytes = max_total;
        self
    }

    /// Returns the number of bytes read so far.
    pub fn bytes_read(&self) -> u64 {
        self.bytes_read
    }

    /// Returns the inner reader.
    pub fn into_inner(self) -> R {
        self.inner
    }
}

impl<R: Read> Read for LimitedReader<R> {
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        let n = self.inner.read(buf)?;
        if n == 0 {
            return Ok(0);
        }

        self.bytes_read += n as u64;

        // Check entry limit
        if self.bytes_read > self.max_entry_bytes {
            return Err(io::Error::other(Error::ResourceLimitExceeded(format!(
                "Entry size {} exceeds limit {}",
                self.bytes_read, self.max_entry_bytes
            ))));
        }

        // Check ratio limit using multiplication to avoid integer truncation.
        // Instead of: bytes_read / compressed_size > max_ratio (truncates)
        // We check:   bytes_read > max_ratio * compressed_size (exact)
        if let Some(max_ratio) = self.max_ratio {
            if self.compressed_size > 0 {
                // Use saturating_mul to handle potential overflow safely
                let max_allowed = (max_ratio as u64).saturating_mul(self.compressed_size);
                if self.bytes_read > max_allowed {
                    // Calculate actual ratio for error message (truncation OK for display)
                    let actual_ratio = self.bytes_read / self.compressed_size;
                    return Err(io::Error::other(Error::ResourceLimitExceeded(format!(
                        "Compression ratio {}:1 exceeds limit {}:1 (compressed: {}, uncompressed: {})",
                        actual_ratio, max_ratio, self.compressed_size, self.bytes_read
                    ))));
                }
            }
        }

        // Update total tracker
        if let Some(ref tracker) = self.total_tracker {
            let total = tracker.fetch_add(n as u64, Ordering::Relaxed) + n as u64;
            if total > self.max_total_bytes {
                return Err(io::Error::other(Error::ResourceLimitExceeded(format!(
                    "Total extracted size {} exceeds limit {}",
                    total, self.max_total_bytes
                ))));
            }
        }

        Ok(n)
    }
}

impl<R> std::fmt::Debug for LimitedReader<R> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("LimitedReader")
            .field("max_entry_bytes", &self.max_entry_bytes)
            .field("bytes_read", &self.bytes_read)
            .field("compressed_size", &self.compressed_size)
            .field("max_ratio", &self.max_ratio)
            .finish_non_exhaustive()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Cursor;

    #[test]
    fn test_validate_strict_normal_path() {
        let archive_path = ArchivePath::new("foo/bar.txt").unwrap();
        let dest = std::env::temp_dir();
        let result = validate_extract_path(&archive_path, &dest, PathSafety::Strict, 0);
        assert!(result.is_ok());
        assert_eq!(result.unwrap(), dest.join("foo").join("bar.txt"));
    }

    #[test]
    fn test_validate_strict_rejects_traversal() {
        let archive_path = ArchivePath::new("foo/../bar.txt");
        // ArchivePath should already reject this, but test the function too
        assert!(archive_path.is_err());
    }

    #[test]
    fn test_validate_strict_rejects_absolute() {
        // Can't create ArchivePath with absolute path, so we test the function directly
        // by checking the policy handles paths starting with /
        let dest = std::env::temp_dir();
        // The ArchivePath type prevents absolute paths, so strict mode is safe
        let archive_path = ArchivePath::new("safe/path.txt").unwrap();
        let result = validate_extract_path(&archive_path, &dest, PathSafety::Strict, 0);
        assert!(result.is_ok());
    }

    #[test]
    fn test_validate_disabled_allows_anything() {
        let archive_path = ArchivePath::new("any/path.txt").unwrap();
        let dest = std::env::temp_dir();
        let result = validate_extract_path(&archive_path, &dest, PathSafety::Disabled, 0);
        assert!(result.is_ok());
    }

    #[test]
    fn test_path_traversal_error_contains_entry_index() {
        // Test that the entry index is correctly reported in path traversal errors
        // We can't directly test this with ArchivePath since it rejects "..",
        // but we verify the error type is correct when reported
        let err = Error::PathTraversal {
            entry_index: 42,
            path: "malicious/path".to_string(),
        };
        let err_str = err.to_string();
        assert!(err_str.contains("42"), "Error should contain entry index");
    }

    #[test]
    fn test_limited_reader_under_limit() {
        let data = vec![0u8; 100];
        let mut reader = LimitedReader::new(Cursor::new(data)).max_entry_bytes(1000);

        let mut buf = Vec::new();
        let result = reader.read_to_end(&mut buf);
        assert!(result.is_ok());
        assert_eq!(buf.len(), 100);
    }

    #[test]
    fn test_limited_reader_exceeds_entry_limit() {
        let data = vec![0u8; 200];
        let mut reader = LimitedReader::new(Cursor::new(data)).max_entry_bytes(100);

        let mut buf = Vec::new();
        let result = reader.read_to_end(&mut buf);
        assert!(result.is_err());
    }

    #[test]
    fn test_limited_reader_ratio_check() {
        // Simulate 10 bytes compressed expanding to 2000 bytes
        let data = vec![0u8; 2000];
        let mut reader = LimitedReader::new(Cursor::new(data))
            .compressed_size(10)
            .max_ratio(100); // Allow 100:1 ratio

        let mut buf = Vec::new();
        let result = reader.read_to_end(&mut buf);
        // Should fail because 2000/10 = 200 > 100
        assert!(result.is_err());
    }

    #[test]
    fn test_limited_reader_total_tracker() {
        let tracker = Arc::new(AtomicU64::new(0));

        // First read: 50 bytes
        let data1 = vec![0u8; 50];
        let mut reader1 =
            LimitedReader::new(Cursor::new(data1)).total_tracker(tracker.clone(), 100);
        let mut buf1 = Vec::new();
        assert!(reader1.read_to_end(&mut buf1).is_ok());

        // Second read: 60 bytes (total would be 110, exceeds 100)
        let data2 = vec![0u8; 60];
        let mut reader2 =
            LimitedReader::new(Cursor::new(data2)).total_tracker(tracker.clone(), 100);
        let mut buf2 = Vec::new();
        assert!(reader2.read_to_end(&mut buf2).is_err());
    }

    #[test]
    fn test_limited_reader_bytes_read() {
        let data = vec![0u8; 50];
        let mut reader = LimitedReader::new(Cursor::new(data));

        let mut buf = [0u8; 20];
        let _ = reader.read(&mut buf).unwrap();
        assert_eq!(reader.bytes_read(), 20);

        let _ = reader.read(&mut buf).unwrap();
        assert_eq!(reader.bytes_read(), 40);
    }

    #[test]
    fn test_limited_reader_ratio_no_truncation() {
        // Test that ratio 1.5:1 is correctly detected with max_ratio=1.
        // Prior to fix, integer division (15/10 = 1) would incorrectly pass.
        // With multiplication check (15 > 1*10), this correctly fails.
        let data = vec![0u8; 15];
        let mut reader = LimitedReader::new(Cursor::new(data))
            .compressed_size(10)
            .max_ratio(1); // Only allow 1:1 ratio

        let mut buf = Vec::new();
        let result = reader.read_to_end(&mut buf);
        // Should fail because 15 > 1*10 (ratio 1.5:1 exceeds 1:1 limit)
        assert!(
            result.is_err(),
            "Ratio 1.5:1 should exceed limit of 1:1 - was truncation bug fixed?"
        );
    }

    #[test]
    fn test_limited_reader_ratio_at_exact_boundary() {
        // Test exact boundary: 100:1 ratio with max_ratio=100 should pass
        let data = vec![0u8; 1000];
        let mut reader = LimitedReader::new(Cursor::new(data))
            .compressed_size(10)
            .max_ratio(100); // Allow exactly 100:1 ratio

        let mut buf = Vec::new();
        let result = reader.read_to_end(&mut buf);
        // Should pass because 1000 == 100*10 (exactly at limit)
        assert!(
            result.is_ok(),
            "Ratio exactly at 100:1 should pass when limit is 100"
        );
    }

    #[test]
    fn test_limited_reader_ratio_one_over_boundary() {
        // Test one byte over boundary should fail
        let data = vec![0u8; 1001];
        let mut reader = LimitedReader::new(Cursor::new(data))
            .compressed_size(10)
            .max_ratio(100); // Allow 100:1 ratio

        let mut buf = Vec::new();
        let result = reader.read_to_end(&mut buf);
        // Should fail because 1001 > 100*10 (one byte over limit)
        assert!(
            result.is_err(),
            "Ratio 100.1:1 should exceed limit of 100:1"
        );
    }
}