Skip to main content

uv_extract/
lib.rs

1use std::{fmt::Display, sync::LazyLock};
2
3pub use error::Error;
4use regex::Regex;
5pub use sync::*;
6use uv_static::EnvVars;
7
8mod error;
9pub mod hash;
10pub mod stream;
11mod sync;
12mod vendor;
13
14static CONTROL_CHARACTERS_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\p{C}").unwrap());
15static REPLACEMENT_CHARACTER: &str = "\u{FFFD}";
16
17/// Compression methods that we consider supported.
18///
19/// Our underlying ZIP dependencies may support more.
20pub(crate) enum CompressionMethod {
21    Stored,
22    Deflated,
23    Zstd,
24    // NOTE: This will become `Unsupported(...)` in the future.
25    Deprecated(&'static str),
26}
27
28impl CompressionMethod {
29    /// Returns `true` if this is a well-known compression method that we
30    /// expect other ZIP implementations to support.
31    pub(crate) fn is_well_known(&self) -> bool {
32        matches!(self, Self::Stored | Self::Deflated | Self::Zstd)
33    }
34}
35
36impl Display for CompressionMethod {
37    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
38        match self {
39            Self::Stored => write!(f, "stored"),
40            Self::Deflated => write!(f, "DEFLATE"),
41            Self::Zstd => write!(f, "zstd"),
42            Self::Deprecated(name) => write!(f, "{name}"),
43        }
44    }
45}
46
47impl From<async_zip::Compression> for CompressionMethod {
48    fn from(value: async_zip::Compression) -> Self {
49        match value {
50            async_zip::Compression::Stored => Self::Stored,
51            async_zip::Compression::Deflate => Self::Deflated,
52            async_zip::Compression::Zstd => Self::Zstd,
53            async_zip::Compression::Bz => Self::Deprecated("bzip2"),
54            async_zip::Compression::Lzma => Self::Deprecated("lzma"),
55            async_zip::Compression::Xz => Self::Deprecated("xz"),
56            _ => Self::Deprecated("unknown"),
57        }
58    }
59}
60
61/// Validate that a given filename (e.g. reported by a ZIP archive's
62/// local file entries or central directory entries) is "safe" to use.
63///
64/// "Safe" in this context doesn't refer to directory traversal
65/// risk, but whether we believe that other ZIP implementations
66/// handle the name correctly and consistently.
67///
68/// Specifically, we want to avoid names that:
69///
70/// - Contain *any* non-printable characters
71/// - Are empty
72///
73/// In the future, we may also want to check for names that contain
74/// leading/trailing whitespace, or names that are exceedingly long.
75pub(crate) fn validate_archive_member_name(name: &str) -> Result<(), Error> {
76    if name.is_empty() {
77        return Err(Error::EmptyFilename);
78    }
79
80    match CONTROL_CHARACTERS_RE.replace_all(name, REPLACEMENT_CHARACTER) {
81        // No replacements mean no control characters.
82        std::borrow::Cow::Borrowed(_) => Ok(()),
83        std::borrow::Cow::Owned(sanitized) => Err(Error::UnacceptableFilename {
84            filename: sanitized,
85        }),
86    }
87}
88
89/// Returns `true` if ZIP validation is disabled.
90pub(crate) fn insecure_no_validate() -> bool {
91    // TODO(charlie) Parse this in `EnvironmentOptions`.
92    let Some(value) = std::env::var_os(EnvVars::UV_INSECURE_NO_ZIP_VALIDATION) else {
93        return false;
94    };
95    let Some(value) = value.to_str() else {
96        return false;
97    };
98    matches!(
99        value.to_lowercase().as_str(),
100        "y" | "yes" | "t" | "true" | "on" | "1"
101    )
102}
103
104#[cfg(test)]
105mod tests {
106    #[test]
107    fn test_validate_archive_member_name() {
108        for (testcase, ok) in &[
109            // Valid cases.
110            ("normal.txt", true),
111            ("__init__.py", true),
112            ("fine i guess.py", true),
113            ("🌈.py", true),
114            // Invalid cases.
115            ("", false),
116            ("new\nline.py", false),
117            ("carriage\rreturn.py", false),
118            ("tab\tcharacter.py", false),
119            ("null\0byte.py", false),
120            ("control\x01code.py", false),
121            ("control\x02code.py", false),
122            ("control\x03code.py", false),
123            ("control\x04code.py", false),
124            ("backspace\x08code.py", false),
125            ("delete\x7fcode.py", false),
126        ] {
127            assert_eq!(
128                super::validate_archive_member_name(testcase).is_ok(),
129                *ok,
130                "testcase: {testcase}"
131            );
132        }
133    }
134
135    #[test]
136    fn test_unacceptable_filename_error_replaces_control_characters() {
137        let err = super::validate_archive_member_name("bad\nname").unwrap_err();
138        match err {
139            super::Error::UnacceptableFilename { filename } => {
140                assert_eq!(filename, "bad�name");
141            }
142            _ => panic!("expected UnacceptableFilename error"),
143        }
144    }
145}