tame_index/
utils.rs

1//! Provides several useful functions for determining the disk location of a
2//! remote registry index
3
4use crate::{Error, InvalidUrl, InvalidUrlError, PathBuf};
5
6pub mod flock;
7#[cfg(feature = "__git")]
8pub mod git;
9
10/// Returns the storage directory (in utf-8) used by Cargo, often known as
11/// `.cargo` or `CARGO_HOME`
12#[inline]
13pub fn cargo_home() -> Result<crate::PathBuf, crate::Error> {
14    Ok(crate::PathBuf::from_path_buf(home::cargo_home()?)?)
15}
16
17/// Encodes a slice of bytes into a hexadecimal string to the specified buffer
18pub(crate) fn encode_hex<'out, const I: usize, const O: usize>(
19    input: &[u8; I],
20    output: &'out mut [u8; O],
21) -> &'out str {
22    assert_eq!(I * 2, O);
23
24    const CHARS: &[u8] = b"0123456789abcdef";
25
26    for (i, &byte) in input.iter().enumerate() {
27        let i = i * 2;
28        output[i] = CHARS[(byte >> 4) as usize];
29        output[i + 1] = CHARS[(byte & 0xf) as usize];
30    }
31
32    // SAFETY: we only emit ASCII hex characters
33    #[allow(unsafe_code)]
34    unsafe {
35        std::str::from_utf8_unchecked(output)
36    }
37}
38
39/// The details for a remote url
40pub struct UrlDir {
41    /// The unique directory name for the url
42    pub dir_name: String,
43    /// The canonical url for the remote url
44    pub canonical: String,
45}
46
47/// Canonicalizes a `git+` url the same as cargo.
48///
49/// This is similar to cargo's `CanonicalUrl`, which previously was only used for
50/// git+ url's, but since cargo 1.85.0 is now used as part of the hash for all
51/// sources. Note that cargo removes queries and fragments _only_ from git+ URLs
52/// and that happens before canonicalization, so this function does not handle them
53/// specifically as we only care about sparse and git registry URLs
54pub fn canonicalize_url(mut url: &str) -> Result<String, Error> {
55    let scheme_ind = url.find("://").map(|i| i + 3).ok_or_else(|| InvalidUrl {
56        url: url.to_owned(),
57        source: InvalidUrlError::MissingScheme,
58    })?;
59
60    // Could use the Url crate for this, but it's simple enough and we don't
61    // need to deal with every possible url (I hope...)
62    let (host, path_length) = match url[scheme_ind..].find('/') {
63        Some(end) => (
64            &url[scheme_ind..scheme_ind + end],
65            url.len() - (end + scheme_ind),
66        ),
67        None => (&url[scheme_ind..], 0),
68    };
69
70    // trim port
71    let host = host.split(':').next().unwrap();
72
73    if path_length > 1 && url.ends_with('/') {
74        url = &url[..url.len() - 1];
75    }
76
77    if url.ends_with(".git") {
78        url = &url[..url.len() - 4];
79    }
80
81    // cargo special cases github.com for reasons, so do the same
82    Ok(if host == "github.com" {
83        url.to_lowercase()
84    } else {
85        url.to_owned()
86    })
87}
88
89/// Converts a url into a relative path and its canonical form
90///
91/// Cargo uses a small algorithm to create unique directory names for any url
92/// so that they can be located in the same root without clashing
93///
94/// This function currently only supports 2 different URL kinds
95///
96/// * `(?:registry+)?<git registry url>`
97/// * `sparse+<sparse registry url>`
98#[allow(deprecated)]
99pub fn url_to_local_dir(url: &str, stable: bool) -> Result<UrlDir, Error> {
100    use std::hash::{Hash, Hasher, SipHasher};
101
102    // This is extremely irritating, but we need to use usize for the kind, which
103    // impacts the hash calculation, making it different based on pointer size.
104    //
105    // The reason for this is that cargo just uses #[derive(Hash)] for the SourceKind
106    // https://github.com/rust-lang/cargo/blob/88b4b3bcd3bbb66873734d97ae412a6bcf9b75ee/crates/cargo-util-schemas/src/core/source_kind.rs#L4-L5,
107    // which then uses https://doc.rust-lang.org/core/intrinsics/fn.discriminant_value.html
108    // to get the discriminant and add to the hash...and that is pointer width :(
109    //
110    // Note that these are isize instead of usize because contrary to what one
111    // would expect from the automatic discriminant assigned by rustc starting
112    // at 0 and incrementing by 1 each time...it's actually signed, which can
113    // be seen by overriding `Hasher::write_isize` and hashing a discriminant
114    //
115    // This is unfortunately a hard requirement because of https://github.com/rust-lang/rustc-stable-hash/blob/24e9848c89917abca155c8f854118e6d00ad4a30/src/stable_hasher.rs#L263-L299
116    // where it specializes _only_ isize to only write a u8 if the value is less
117    // than 0xff, something that doesn't happen for usize, which of course affects
118    // the calculated hash
119    const GIT_REGISTRY: isize = 2;
120    const SPARSE_REGISTRY: isize = 3;
121
122    // Ensure we have a registry or bare url
123    let (url, scheme_ind, kind) = {
124        let mut scheme_ind = url.find("://").ok_or_else(|| InvalidUrl {
125            url: url.to_owned(),
126            source: InvalidUrlError::MissingScheme,
127        })?;
128
129        let scheme_str = &url[..scheme_ind];
130
131        let (url, kind) = match scheme_str.split_once('+') {
132            Some(("sparse", _)) => (url, SPARSE_REGISTRY),
133            // If there is no scheme modifier, assume git registry, same as cargo
134            None => (url, GIT_REGISTRY),
135            Some(("registry", _)) => {
136                scheme_ind -= 9;
137                (&url[9..], GIT_REGISTRY)
138            }
139            Some((_, _)) => {
140                return Err(InvalidUrl {
141                    url: url.to_owned(),
142                    source: InvalidUrlError::UnknownSchemeModifier,
143                }
144                .into());
145            }
146        };
147
148        (url, scheme_ind + 3, kind)
149    };
150
151    let (dir_name, url) = if stable {
152        let canonical = canonicalize_url(url)?;
153
154        let hash = {
155            let mut hasher = rustc_stable_hash::StableSipHasher128::new();
156            kind.hash(&mut hasher);
157            canonical.hash(&mut hasher);
158            Hasher::finish(&hasher)
159        };
160
161        let mut raw_ident = [0u8; 16];
162        let ident = encode_hex(&hash.to_le_bytes(), &mut raw_ident);
163
164        let dir_name = {
165            let host = match url[scheme_ind..].find('/') {
166                Some(end) => &url[scheme_ind..scheme_ind + end],
167                None => &url[scheme_ind..],
168            };
169
170            // trim port
171            let host = host.split(':').next().unwrap();
172            host.split_once('@').map_or(host, |(_user, host)| host)
173        };
174
175        (format!("{dir_name}-{ident}"), canonical)
176    } else {
177        let hash = {
178            let mut hasher = SipHasher::new();
179            kind.hash(&mut hasher);
180            url.hash(&mut hasher);
181            hasher.finish()
182        };
183        let mut raw_ident = [0u8; 16];
184        let ident = encode_hex(&hash.to_le_bytes(), &mut raw_ident);
185
186        // Could use the Url crate for this, but it's simple enough and we don't
187        // need to deal with every possible url (I hope...)
188        let host = match url[scheme_ind..].find('/') {
189            Some(end) => &url[scheme_ind..scheme_ind + end],
190            None => &url[scheme_ind..],
191        };
192
193        // trim port
194        let host = host.split(':').next().unwrap();
195        let host = host.split_once('@').map_or(host, |(_user, host)| host);
196
197        (format!("{host}-{ident}"), url.to_owned())
198    };
199
200    Ok(UrlDir {
201        dir_name,
202        canonical: url,
203    })
204}
205
206/// Get the disk location of the specified url, as well as its canonical form
207///
208/// If not specified, the root directory is the user's default cargo home
209pub fn get_index_details(
210    url: &str,
211    root: Option<PathBuf>,
212    stable: bool,
213) -> Result<(PathBuf, String), Error> {
214    let url_dir = url_to_local_dir(url, stable)?;
215
216    let mut path = match root {
217        Some(path) => path,
218        None => cargo_home()?,
219    };
220
221    path.push("registry");
222    path.push("index");
223    path.push(url_dir.dir_name);
224
225    Ok((path, url_dir.canonical))
226}
227
228use std::io;
229
230/// Parses the output of `cargo -V` to get the semver
231///
232/// This handles the 2? cases that I am aware of
233///
234/// 1. Official cargo prints `cargo <semver>(?:-<channel>)? (<sha1[..7]> <date>)`
235/// 2. Non-official builds may drop the additional metadata and just print `cargo <semver>`
236#[inline]
237fn parse_cargo_semver(s: &str) -> Result<semver::Version, Error> {
238    let semver = s.trim().split(' ').nth(1).ok_or_else(|| {
239        io::Error::new(
240            io::ErrorKind::InvalidData,
241            "cargo version information was in an invalid format",
242        )
243    })?;
244
245    Ok(semver.parse()?)
246}
247
248/// Retrieves the current version of cargo being used
249pub fn cargo_version(working_dir: Option<&crate::Path>) -> Result<crate::Version, Error> {
250    let mut cargo = std::process::Command::new(
251        std::env::var_os("CARGO")
252            .as_deref()
253            .unwrap_or(std::ffi::OsStr::new("cargo")),
254    );
255
256    cargo.arg("-V");
257
258    if let Some(wd) = working_dir {
259        cargo.current_dir(wd);
260    }
261
262    cargo.stdout(std::process::Stdio::piped());
263
264    let output = cargo.output()?;
265    if !output.status.success() {
266        return Err(io::Error::other("failed to request cargo version information").into());
267    }
268
269    let stdout = String::from_utf8(output.stdout)
270        .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?;
271
272    parse_cargo_semver(&stdout)
273}
274
275#[cfg(test)]
276mod test {
277    use super::{get_index_details, url_to_local_dir};
278    use crate::PathBuf;
279
280    #[test]
281    #[cfg(all(target_pointer_width = "64", target_endian = "little"))]
282    fn matches_cargo() {
283        assert_eq!(
284            get_index_details(crate::CRATES_IO_INDEX, Some(PathBuf::new()), false).unwrap(),
285            (
286                "registry/index/github.com-1ecc6299db9ec823".into(),
287                crate::CRATES_IO_INDEX.to_owned()
288            )
289        );
290
291        assert_eq!(
292            get_index_details(crate::CRATES_IO_HTTP_INDEX, Some(PathBuf::new()), false).unwrap(),
293            (
294                "registry/index/index.crates.io-6f17d22bba15001f".into(),
295                crate::CRATES_IO_HTTP_INDEX.to_owned(),
296            )
297        );
298
299        const NON_CRATES_IO_GITHUB: &str = "https://github.com/EmbarkStudios/cargo-test-index";
300        assert_eq!(
301            get_index_details(NON_CRATES_IO_GITHUB, Some(PathBuf::new()), false).unwrap(),
302            (
303                "registry/index/github.com-655148e0a865c9e0".into(),
304                NON_CRATES_IO_GITHUB.to_owned(),
305            )
306        );
307
308        const NON_GITHUB_INDEX: &str =
309            "https://dl.cloudsmith.io/public/embark/deny/cargo/index.git";
310        assert_eq!(
311            get_index_details(NON_GITHUB_INDEX, Some(PathBuf::new()), false).unwrap(),
312            (
313                "registry/index/dl.cloudsmith.io-955e041deb7d37e6".into(),
314                NON_GITHUB_INDEX.to_owned(),
315            )
316        );
317
318        // Just verifies that any non git+ or sparse+ url is treated as a git
319        // registry for purposes of hashing
320        const FAKE_REGISTRY: &str = "https://github.com/RustSec/advisory-db";
321
322        assert_eq!(
323            url_to_local_dir(FAKE_REGISTRY, false).unwrap().dir_name,
324            "github.com-a946fc29ac602819"
325        );
326    }
327
328    #[test]
329    fn matches_cargo_1850() {
330        assert_eq!(
331            get_index_details(crate::CRATES_IO_HTTP_INDEX, Some(PathBuf::new()), true).unwrap(),
332            (
333                "registry/index/index.crates.io-1949cf8c6b5b557f".into(),
334                crate::CRATES_IO_HTTP_INDEX.to_owned(),
335            )
336        );
337        assert_eq!(
338            get_index_details(crate::CRATES_IO_INDEX, Some(PathBuf::new()), true).unwrap(),
339            (
340                "registry/index/github.com-25cdd57fae9f0462".into(),
341                crate::CRATES_IO_INDEX.to_owned(),
342            )
343        );
344        assert_eq!(
345            get_index_details(
346                "https://github.com/EmbarkStudios/cargo-test-index",
347                Some(PathBuf::new()),
348                true
349            )
350            .unwrap(),
351            (
352                "registry/index/github.com-513223c940e0f1e9".into(),
353                "https://github.com/embarkstudios/cargo-test-index".to_owned(),
354            )
355        );
356
357        assert_eq!(
358            get_index_details(
359                "sparse+https://cargo.cloudsmith.io/embark/deny/",
360                Some(PathBuf::new()),
361                true
362            )
363            .unwrap(),
364            (
365                "registry/index/cargo.cloudsmith.io-2fc1f5411e6e72fd".into(),
366                "sparse+https://cargo.cloudsmith.io/embark/deny".to_owned(),
367            )
368        );
369    }
370
371    #[test]
372    #[cfg(all(target_pointer_width = "32", target_endian = "little"))]
373    fn matches_cargo_32bit() {
374        assert_eq!(
375            get_index_details(crate::CRATES_IO_HTTP_INDEX, Some(PathBuf::new()), false).unwrap(),
376            (
377                "registry/index/index.crates.io-1cd66030c949c28d".into(),
378                crate::CRATES_IO_HTTP_INDEX.to_owned(),
379            )
380        );
381    }
382
383    #[test]
384    fn gets_cargo_version() {
385        const MINIMUM: semver::Version = semver::Version::new(1, 70, 0);
386        let version = super::cargo_version(None).unwrap();
387        assert!(version >= MINIMUM);
388    }
389
390    #[test]
391    fn parses_cargo_semver() {
392        use super::parse_cargo_semver as pcs;
393
394        assert_eq!(
395            pcs("cargo 1.71.0 (cfd3bbd8f 2023-06-08)\n").unwrap(),
396            semver::Version::new(1, 71, 0)
397        );
398        assert_eq!(
399            pcs("cargo 1.73.0-nightly (7ac9416d8 2023-07-24)\n").unwrap(),
400            "1.73.0-nightly".parse().unwrap()
401        );
402        assert_eq!(
403            pcs("cargo 1.70.0\n").unwrap(),
404            semver::Version::new(1, 70, 0)
405        );
406    }
407}