crev_lib/util/
mod.rs

1use crev_common::sanitize_name_for_fs;
2pub use crev_common::{run_with_shell_cmd, store_str_to_file, store_to_file_with};
3use crev_data::proof;
4use std::borrow::Cow;
5use std::ffi::OsStr;
6use std::io;
7use std::path::{Path, PathBuf};
8
9pub mod git;
10
11pub fn get_documentation_for(content: &impl proof::Content) -> &'static str {
12    match content.kind() {
13        proof::Trust::KIND => include_str!("../../rc/doc/editing-trust.md"),
14        proof::CodeReview::KIND => include_str!("../../rc/doc/editing-code-review.md"),
15        proof::PackageReview::KIND => include_str!("../../rc/doc/editing-package-review.md"),
16        _ => "unknown proof type",
17    }
18}
19
20#[cfg(target_family = "unix")]
21pub fn chmod_path_to_600(path: &Path) -> io::Result<()> {
22    use std::{fs::Permissions, os::unix::fs::PermissionsExt};
23
24    std::fs::set_permissions(path, Permissions::from_mode(0o600))
25}
26
27#[cfg(not(target_family = "unix"))]
28pub fn chmod_path_to_600(path: &Path) -> io::Result<()> {
29    Ok(())
30}
31
32pub fn get_recursive_digest_for_paths(
33    root_path: &Path,
34    paths: fnv::FnvHashSet<PathBuf>,
35) -> std::result::Result<crev_data::Digest, crev_recursive_digest::DigestError> {
36    let h = crev_recursive_digest::RecursiveDigest::<crev_common::Blake2b256, _, _>::new()
37        .filter(|entry| {
38            let rel_path = entry
39                .path()
40                .strip_prefix(root_path)
41                .expect("must be prefix");
42            paths.contains(rel_path)
43        })
44        .build();
45
46    let digest_vec = h.get_digest_of(root_path)?;
47    Ok(crev_data::Digest::from_bytes(&digest_vec).unwrap())
48}
49
50pub fn get_recursive_digest_for_dir(
51    root_path: &Path,
52    rel_path_ignore_list: &fnv::FnvHashSet<PathBuf>,
53) -> std::result::Result<Vec<u8>, crev_recursive_digest::DigestError> {
54    let h = crev_recursive_digest::RecursiveDigest::<crev_common::Blake2b256, _, _>::new()
55        .filter(|entry| {
56            let rel_path = entry
57                .path()
58                .strip_prefix(root_path)
59                .expect("must be prefix");
60            !rel_path_ignore_list.contains(rel_path)
61        })
62        .build();
63
64    h.get_digest_of(root_path)
65}
66
67fn mark_dangerous_name(
68    orig_name: &OsStr,
69    parent: &Path,
70    idx: usize,
71    changes: &mut Vec<String>,
72) -> PathBuf {
73    let orig_name = match orig_name.to_str() {
74        Some(s) => s,
75        None => {
76            let name = Path::new(orig_name);
77            let alt =
78                sanitize_name_for_fs(&format!("{} {}", name.display(), idx)).with_extension("CREV");
79            changes.push(format!(
80                "Non-Unicode filename '{}' renamed to '{}'",
81                name.display(),
82                alt.display()
83            ));
84            return alt;
85        }
86    };
87
88    // You don't get to spoof anti-spoofing measure
89    if orig_name.contains(".CREV") || orig_name.contains("-CREV") || orig_name.contains("CREV.") {
90        let alt = sanitize_name_for_fs(orig_name).with_extension("CREV");
91        changes.push(format!(
92            "File '{}' is not from cargo-crev. Renamed to '{}'",
93            orig_name,
94            alt.display()
95        ));
96        return alt;
97    }
98
99    // file-systems may be case-insensitive
100    match orig_name.to_ascii_lowercase().as_str() {
101        "cargo.toml" => {
102            changes
103                .push("Cargo.toml could cause IDEs automatically build dependencies".to_string());
104            PathBuf::from("Cargo.CREV.toml")
105        }
106        ".cargo" => {
107            changes.push(".cargo config can replace linkers, source of dependencies".to_string());
108            PathBuf::from("CREV.cargo")
109        }
110        "config" | "config.toml" if parent.file_name().unwrap() == "cargo" => {
111            changes.push("cargo/config can replace linkers, source of dependencies".to_string());
112            PathBuf::from("config.CREV")
113        }
114        "rust-toolchain" | "rust-toolchain.toml" => {
115            changes
116                .push("rust-toolchain file could unexpectedly replace your compiler".to_string());
117            PathBuf::from(format!("{orig_name}.CREV"))
118        }
119        ".cargo-ok" | ".cargo_vcs_info.json" | ".gitignore" => {
120            // they're safe
121            PathBuf::from(orig_name)
122        }
123        n if n.starts_with('.') => {
124            changes.push(format!("Hidden file: '{orig_name}'"));
125            PathBuf::from(format!("CREV{orig_name}"))
126        }
127        n if n.len() > 250 => {
128            let alt = sanitize_name_for_fs(orig_name).with_extension("CREV");
129            changes.push(format!(
130                "Long file name: '{}' renamed to '{}'",
131                orig_name,
132                alt.display()
133            ));
134            alt
135        }
136        // Are there legit use-cases for Unicode names? Killing it avoids risk of homograph or BIDI spoofing
137        n if n.as_bytes().iter().any(|&c| {
138            c < b' '
139                || c >= 0x7F
140                || matches!(
141                    c,
142                    b'\"' | b'`' | b'$' | b'<' | b'\\' | b'*' | b'?' | b'{' | b'['
143                )
144        }) =>
145        {
146            let alt = sanitize_name_for_fs(orig_name).with_extension("CREV");
147            changes.push(format!(
148                "Name contains metacharacters, unprintables, or non-ASCII: '{}' renamed to '{}'",
149                orig_name,
150                alt.display()
151            ));
152            alt
153        }
154        _ => PathBuf::from(orig_name),
155    }
156}
157
158/// Make a copy of the directory, but skip or rename all files that are potentially dangerous in Cargo projects
159pub fn copy_dir_sanitized(
160    src_dir: &Path,
161    dest_dir: &Path,
162    changes: &mut Vec<String>,
163) -> std::io::Result<()> {
164    for (n, entry) in std::fs::read_dir(src_dir)?.enumerate() {
165        let entry = entry?;
166        let src_path = entry.path();
167        let safe_file_name = mark_dangerous_name(&entry.file_name(), src_dir, n, changes);
168        let dest_path = dest_dir.join(safe_file_name);
169        let ft = entry.file_type()?;
170        if ft.is_symlink() {
171            changes.push(format!(
172                "Symlink not copied. The symlink is in '{}'",
173                src_path.display()
174            ));
175        } else if ft.is_file() {
176            // only obviously non-text files get a pass
177            if is_binary_file_extension(&dest_path) {
178                std::fs::copy(&src_path, &dest_path)?;
179            } else {
180                let input = std::fs::read(&src_path)?;
181                let output = escape_tricky_unicode(&input);
182                if output != input {
183                    changes.push(format!(
184                        "Escaped potentially confusing UTF-8 in '{}'",
185                        src_path.display()
186                    ));
187                }
188                std::fs::write(&dest_path, output)?;
189            }
190        } else {
191            assert!(ft.is_dir());
192            let _ = std::fs::create_dir(&dest_path);
193            copy_dir_sanitized(&src_path, &dest_path, changes)?;
194        }
195    }
196    Ok(())
197}
198
199fn is_binary_file_extension(path: &Path) -> bool {
200    path.extension()
201        .and_then(|e| e.to_str())
202        .map_or(false, |e| {
203            matches!(
204                e.to_lowercase().as_str(),
205                "bin"
206                    | "zip"
207                    | "gz"
208                    | "xz"
209                    | "bz2"
210                    | "jpg"
211                    | "jpeg"
212                    | "png"
213                    | "gif"
214                    | "exe"
215                    | "dll"
216            )
217        })
218}
219
220fn escape_tricky_unicode(input: &[u8]) -> Cow<[u8]> {
221    if input.is_ascii() {
222        return input.into();
223    }
224
225    let mut output = Vec::with_capacity(input.len());
226    for ch in input.utf8_chunks() {
227        output.extend_from_slice(escape_tricky_unicode_str(ch.valid()).as_bytes());
228        output.extend_from_slice(ch.invalid());
229    }
230    output.into()
231}
232
233fn escape_tricky_unicode_str(input: &str) -> Cow<str> {
234    if input.is_ascii() {
235        return input.into();
236    }
237
238    use std::fmt::Write;
239    let mut out = String::with_capacity(input.len());
240    for ch in input.chars() {
241        match ch {
242            // https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html
243            // https://www.unicode.org/L2/L2022/22007r2-avoiding-spoof.pdf
244            '\u{115F}' | '\u{1160}' | '\u{13437}' | '\u{13438}' | '\u{1D173}' | '\u{1D174}'
245            | '\u{1D175}' | '\u{1D176}' | '\u{1D177}' | '\u{1D178}' | '\u{1D179}' | '\u{1D17A}'
246            | '\u{202A}' | '\u{202B}' | '\u{202C}' | '\u{202D}' | '\u{202E}' | '\u{2066}'
247            | '\u{2067}' | '\u{2068}' | '\u{2069}' | '\u{206A}' | '\u{206B}' | '\u{206C}'
248            | '\u{206D}' | '\u{206E}' | '\u{206F}' | '\u{3164}' | '\u{FFA0}' | '\u{FFF9}'
249            | '\u{FFFA}' | '\u{FFFB}' => {
250                let _ = write!(&mut out, "\\u{{{:04x}}}", ch as u32);
251            }
252            _ => out.push(ch),
253        }
254    }
255    out.into()
256}
257
258#[test]
259fn escapes_unicode_bidi() {
260    let bidi_test = "\u{202A}\u{202B}\u{202C}\u{202D}\u{202E} | \u{2066} | \x00\u{2067} | \u{2068}\u{FFFF} | \u{2069}";
261    assert_eq!(
262        "\\u{202a}\\u{202b}\\u{202c}\\u{202d}\\u{202e} | \\u{2066} | \u{0}\\u{2067} | \\u{2068}\u{ffff} | \\u{2069}".as_bytes(),
263        &*escape_tricky_unicode(bidi_test.as_bytes()),
264    );
265
266    let binary_test = &b"ABC\0\0\0\x11\xff \xc0\xfa\xda"[..];
267    assert_eq!(binary_test, &*escape_tricky_unicode(binary_test));
268}