Skip to main content

uv_extract/
lib.rs

1use std::{fmt::Display, sync::LazyLock};
2
3pub use error::Error;
4use regex::Regex;
5pub use sync::*;
6use uv_static::EnvVars;
7
8mod error;
9pub mod hash;
10pub mod stream;
11mod sync;
12mod vendor;
13
14static CONTROL_CHARACTERS_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\p{C}").unwrap());
15static REPLACEMENT_CHARACTER: &str = "\u{FFFD}";
16
17/// Compression methods that we consider supported.
18///
19/// Our underlying ZIP dependencies may support more.
20pub(crate) enum CompressionMethod {
21    Stored,
22    Deflated,
23    Zstd,
24    // NOTE: This will become `Unsupported(...)` in the future.
25    Deprecated(&'static str),
26}
27
28impl CompressionMethod {
29    /// Returns `true` if this is a well-known compression method that we
30    /// expect other ZIP implementations to support.
31    pub(crate) fn is_well_known(&self) -> bool {
32        matches!(self, Self::Stored | Self::Deflated | Self::Zstd)
33    }
34}
35
36impl Display for CompressionMethod {
37    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
38        match self {
39            Self::Stored => write!(f, "stored"),
40            Self::Deflated => write!(f, "DEFLATE"),
41            Self::Zstd => write!(f, "zstd"),
42            Self::Deprecated(name) => write!(f, "{name}"),
43        }
44    }
45}
46
47impl From<async_zip::Compression> for CompressionMethod {
48    fn from(value: async_zip::Compression) -> Self {
49        match value {
50            async_zip::Compression::Stored => Self::Stored,
51            async_zip::Compression::Deflate => Self::Deflated,
52            async_zip::Compression::Zstd => Self::Zstd,
53            async_zip::Compression::Bz => Self::Deprecated("bzip2"),
54            async_zip::Compression::Lzma => Self::Deprecated("lzma"),
55            async_zip::Compression::Xz => Self::Deprecated("xz"),
56            _ => Self::Deprecated("unknown"),
57        }
58    }
59}
60
61impl From<zip::CompressionMethod> for CompressionMethod {
62    fn from(value: zip::CompressionMethod) -> Self {
63        match value {
64            zip::CompressionMethod::Stored => Self::Stored,
65            zip::CompressionMethod::Deflated => Self::Deflated,
66            zip::CompressionMethod::Zstd => Self::Zstd,
67            zip::CompressionMethod::Bzip2 => Self::Deprecated("bzip2"),
68            zip::CompressionMethod::Lzma => Self::Deprecated("lzma"),
69            zip::CompressionMethod::Xz => Self::Deprecated("xz"),
70            _ => Self::Deprecated("unknown"),
71        }
72    }
73}
74
75/// Validate that a given filename (e.g. reported by a ZIP archive's
76/// local file entries or central directory entries) is "safe" to use.
77///
78/// "Safe" in this context doesn't refer to directory traversal
79/// risk, but whether we believe that other ZIP implementations
80/// handle the name correctly and consistently.
81///
82/// Specifically, we want to avoid names that:
83///
84/// - Contain *any* non-printable characters
85/// - Are empty
86///
87/// In the future, we may also want to check for names that contain
88/// leading/trailing whitespace, or names that are exceedingly long.
89pub(crate) fn validate_archive_member_name(name: &str) -> Result<(), Error> {
90    if name.is_empty() {
91        return Err(Error::EmptyFilename);
92    }
93
94    match CONTROL_CHARACTERS_RE.replace_all(name, REPLACEMENT_CHARACTER) {
95        // No replacements mean no control characters.
96        std::borrow::Cow::Borrowed(_) => Ok(()),
97        std::borrow::Cow::Owned(sanitized) => Err(Error::UnacceptableFilename {
98            filename: sanitized,
99        }),
100    }
101}
102
103/// Returns `true` if ZIP validation is disabled.
104pub(crate) fn insecure_no_validate() -> bool {
105    // TODO(charlie) Parse this in `EnvironmentOptions`.
106    let Some(value) = std::env::var_os(EnvVars::UV_INSECURE_NO_ZIP_VALIDATION) else {
107        return false;
108    };
109    let Some(value) = value.to_str() else {
110        return false;
111    };
112    matches!(
113        value.to_lowercase().as_str(),
114        "y" | "yes" | "t" | "true" | "on" | "1"
115    )
116}
117
118#[cfg(test)]
119mod tests {
120    #[test]
121    fn test_validate_archive_member_name() {
122        for (testcase, ok) in &[
123            // Valid cases.
124            ("normal.txt", true),
125            ("__init__.py", true),
126            ("fine i guess.py", true),
127            ("🌈.py", true),
128            // Invalid cases.
129            ("", false),
130            ("new\nline.py", false),
131            ("carriage\rreturn.py", false),
132            ("tab\tcharacter.py", false),
133            ("null\0byte.py", false),
134            ("control\x01code.py", false),
135            ("control\x02code.py", false),
136            ("control\x03code.py", false),
137            ("control\x04code.py", false),
138            ("backspace\x08code.py", false),
139            ("delete\x7fcode.py", false),
140        ] {
141            assert_eq!(
142                super::validate_archive_member_name(testcase).is_ok(),
143                *ok,
144                "testcase: {testcase}"
145            );
146        }
147    }
148
149    #[test]
150    fn test_unacceptable_filename_error_replaces_control_characters() {
151        let err = super::validate_archive_member_name("bad\nname").unwrap_err();
152        match err {
153            super::Error::UnacceptableFilename { filename } => {
154                assert_eq!(filename, "bad�name");
155            }
156            _ => panic!("expected UnacceptableFilename error"),
157        }
158    }
159}