quitters/lib.rs
1#![forbid(unsafe_code)]
2
3//! Obtains the dependency list from a compiled Rust binary by parsing its panic messages.
4//! Recovers both crate names and versions.
5//!
6//! ## Caveats
7//! * If the crate never panics, it will not show up.
8//! The Rust compiler is very good at removing unreachable panics,
9//! so we can only discover at around a half of all dependencies.
10//! * C code such as `openssl-src` never shows up, because it can't panic.
11//! * Only crates installed from a registry are discovered. Crates from local workspace or git don't show up.
12//!
13//! # Alternatives
14//! [`cargo auditable`](https://crates.io/crates/cargo-auditable) embeds the **complete** dependency information
15//! into binaries, which can then be recovered using [`auditable-info`](https://crates.io/crates/auditable-info).
16//! It should be used instead of `quitters` whenever possible, unless you're specifically after panics.
17
18use std::collections::BTreeSet;
19
20use once_cell::sync::OnceCell;
21use regex::bytes::Regex;
22use semver::Version;
23
24// This regex works suprisingly well. We can even split the crate name and version reliably
25// because crate names publishable on crates.io cannot contain the `.` character,
26// which *must* appear in the version string.
27// Versions like "1" are not valid in Cargo, or under the semver spec.
28const REGEX_STRING: &str =
29 "(?-u)cargo/registry/src/[^/]+/(?P<crate>[0-9A-Za-z_-]+)-(?P<version>[0-9]+\\.[0-9]+\\.[0-9]+[0-9A-Za-z+.-]*)/";
30
31// Compiled regular expressions use interior mutability and may cause contention
32// in heavily multi-threaded workloads. This should not be an issue here
33// because we only use `.captures_iter()`, which acquires the mutable state
34// only once per invocation and for a short amount of time:
35// https://github.com/rust-lang/regex/blob/0d0023e412f7ead27b0809f5d2f95690d0f0eaef/PERFORMANCE.md#using-a-regex-from-multiple-threads
36// This could be refactored into cloning in case it *does* end up being a bottleneck in practice,
37// which would sacrifice ergonomics.
38static REGEX_UNIX: OnceCell<Regex> = OnceCell::new();
39static REGEX_WINDOWS: OnceCell<Regex> = OnceCell::new();
40
41/// Obtains the dependency list from a compiled Rust binary by parsing its panic messages.
42///
43/// ## Caveats
44/// * If the crate never panics, it will not show up.
45/// The Rust compiler is very good at removing unreachable panics,
46/// so we can only discover at around a half of all dependencies.
47/// * C code such as `openssl-src` never shows up, because it can't panic.
48/// * Only crates installed from a registry are discovered. Crates from local workspace or git don't show up.
49///
50/// ## Usage
51/// ```rust,ignore
52/// let file = std::fs::read("target/release/my-program")?;
53/// let versions = quitters::versions(&file);
54/// for (krate, version) in versions.iter() {
55/// println!("{krate} v{version}")
56/// }
57/// ```
58pub fn versions(data: &[u8]) -> BTreeSet<(&str, Version)> {
59 // You might think that just making two functions, versions_unix and versions_windows
60 // and then calling the appropriate function for your platform would be faster,
61 // since \ paths cannot be used on Unix. I briefly thought so!
62 // However, cross-compilation from Windows to Unix would put \ paths into a Unix binary.
63 // So that optimization would miss cross-compiled binaries.
64 // It only gets you a 20% reduction in runtime because the I/O dominates anyway.
65 //
66 // A significant optimization to tackle the I/O problem would be only ever reading things
67 // into the CPU cache as opposed to loading the entire file to memory.
68 // Basically streaming the data. This requires special handling of the start and end,
69 // so either needs a state-machine-based parser like nom or capping the possible match length.
70 // The latter is doable but only makes sense if it turns out that the current approach is too slow.
71 let re = REGEX_UNIX.get_or_init(|| Regex::new(REGEX_STRING).unwrap());
72 let versions = versions_for_regex(data, re);
73 if !versions.is_empty() {
74 versions
75 } else {
76 // Sadly the single-pass RegexSet only lets you check for presence of matches,
77 // and doesn't let you find out where they are.
78 // And using a composite regex like `unix_regex|windows_regex` is as slow as two passes,
79 // so we'll just use two passes. That's what Regex crate documentation recommends, too.
80 let re = REGEX_WINDOWS.get_or_init(|| {
81 let windows_regex = REGEX_STRING.replace('/', "\\\\");
82 Regex::new(&windows_regex).unwrap()
83 });
84 versions_for_regex(data, re)
85 }
86}
87
88fn versions_for_regex<'a>(data: &'a [u8], re: &Regex) -> BTreeSet<(&'a str, Version)> {
89 let mut versions = BTreeSet::new();
90 for c in re.captures_iter(data) {
91 if let Some(parsed) = parse_capture(c) {
92 versions.insert(parsed);
93 }
94 }
95 versions
96}
97
98/// Extracts crate and version from a single regex match
99fn parse_capture(c: regex::bytes::Captures) -> Option<(&str, Version)> {
100 Some((
101 std::str::from_utf8(c.name("crate").unwrap().as_bytes()).ok()?,
102 Version::parse(std::str::from_utf8(c.name("version").unwrap().as_bytes()).ok()?).ok()?,
103 ))
104}
105
106#[cfg(test)]
107mod tests {
108 use super::*;
109
110 #[test]
111 fn two_crates_one_line() {
112 let data = b"\x7FELF/cargo/registry/src/github.com-1ecc6299db9ec823/xz2-0.1.6/src/stream.rsunknown return code: lzma data errorNoCheckProgramMemFormatOptionszstd returned null pointer when creating new context/cargo/registry/src/github.com-1ecc6299db9ec823/zstd-safe-5.0.2+zstd.1.5.2/src/lib.rsbad error message from zstdGiven position outside of the buffer bounds.";
113 assert_eq!(versions(data).len(), 2);
114 }
115
116 #[test]
117 fn complex_versions() {
118 for version_suffix in [
119 "",
120 "+foobar",
121 "+Fo0bar",
122 "+zstd.1.5.2",
123 "-rc",
124 "-alpha.1",
125 "-alpha.1+zstd.1.5.2",
126 ] {
127 let string = format!("new context/cargo/registry/src/github.com-1ecc6299db9ec823/zstd-safe-5.0.2{}/src/lib.rsbad error message from zstdGiven position outside of the buffer bounds.", version_suffix);
128 let expected_version = format!("5.0.2{}", version_suffix);
129 assert!(versions(string.as_bytes())
130 .contains(&("zstd-safe", Version::parse(&expected_version).unwrap())));
131 }
132 }
133
134 #[test]
135 fn windows_matching() {
136 let data = br"C:\Users\runneradmin\.cargo\registry\src\github.com-1ecc6299db9ec823\rustc-demangle-0.1.21\src\legacy.rs";
137 assert!(versions(data).contains(&("rustc-demangle", Version::parse("0.1.21").unwrap())))
138 }
139}