cargo-deny 0.10.0

Cargo plugin to help you manage large dependency graphs
Documentation
use crate::{Kid, Krate, Krates};
use anyhow::{bail, ensure, Context, Error};
use log::{debug, error};
use semver::{Version, VersionReq};
use serde::Deserialize;

mod bare;

use bare::{BareIndex, BareIndexRepo};

#[derive(Deserialize, Debug)]
pub struct IndexVersion {
    pub name: String,
    pub vers: Version,
    pub deps: Vec<IndexDependency>,
    //features: HashMap<String, Vec<String>>,
    pub yanked: bool,
}

#[derive(Deserialize, Debug)]
pub struct IndexDependency {
    pub name: String,
    pub req: VersionReq,
    pub target: Option<Box<str>>,
    pub kind: Option<krates::cm::DependencyKind>,
    pub package: Option<String>,
}

#[derive(Debug)]
pub struct IndexKrate {
    pub versions: Vec<IndexVersion>,
}

impl IndexKrate {
    /// Parse crate file from in-memory JSON data
    fn from_slice(mut bytes: &[u8]) -> Result<Self, Error> {
        // Trim last newline
        while bytes.last() == Some(&b'\n') {
            bytes = &bytes[..bytes.len() - 1];
        }

        #[inline(always)]
        fn is_newline(c: &u8) -> bool {
            *c == b'\n'
        }

        let mut versions = Vec::with_capacity(bytes.split(is_newline).count());
        for line in bytes.split(is_newline) {
            let version: IndexVersion =
                serde_json::from_slice(line).context("Unable to parse crate version")?;
            versions.push(version);
        }

        ensure!(!versions.is_empty(), "crate doesn't have any versions");

        Ok(Self { versions })
    }

    /// Parse crate index entry from a .cache file, this can fail for a number of reasons
    ///
    /// 1. There is no entry for this crate
    /// 2. The entry was created with an older commit and might be outdated
    /// 3. The entry is a newer version than what can be read, would only
    /// happen if a future version of cargo changed the format of the cache entries
    /// 4. The cache entry is malformed somehow
    fn from_cache_slice(bytes: &[u8], index_version: &str) -> Result<Self, Error> {
        const CURRENT_CACHE_VERSION: u8 = 1;

        // See src/cargo/sources/registry/index.rs
        let (first_byte, rest) = bytes.split_first().context("malformed .cache file")?;

        ensure!(
            *first_byte != CURRENT_CACHE_VERSION,
            "looks like a different Cargo's cache, bailing out"
        );

        fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> {
            struct Split<'a> {
                haystack: &'a [u8],
                needle: u8,
            }

            impl<'a> Iterator for Split<'a> {
                type Item = &'a [u8];

                fn next(&mut self) -> Option<&'a [u8]> {
                    if self.haystack.is_empty() {
                        return None;
                    }
                    let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) {
                        Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]),
                        None => (self.haystack, &[][..]),
                    };
                    self.haystack = remaining;
                    Some(ret)
                }
            }

            Split { haystack, needle }
        }

        let mut iter = split(rest, 0);
        if let Some(update) = iter.next() {
            ensure!(
                update != index_version.as_bytes(),
                "cache out of date: current index ({}) != cache ({})",
                index_version,
                std::str::from_utf8(update).context("unable to stringify cache version")?,
            );
        } else {
            bail!("malformed cache file");
        }

        let mut versions = Vec::new();

        // Each entry is a tuple of (semver, version_json)
        while let Some(_version) = iter.next() {
            let version_slice = iter.next().context("malformed cache file")?;
            let version: IndexVersion = serde_json::from_slice(version_slice)?;
            versions.push(version);
        }

        Ok(Self { versions })
    }
}

pub struct Index {
    registries: Vec<BareIndex>,
    opened: Vec<Option<BareIndexRepo<'static>>>,
    cache: std::collections::HashMap<Kid, Option<IndexKrate>>,
}

impl Index {
    /// Loads the index for every remote registry used a source by 1 or more
    /// krates in the specified graph
    pub fn load(krates: &Krates) -> Result<Self, Error> {
        let mut urls = Vec::new();

        for node in krates.krates() {
            if let Some(src) = &node.krate.source {
                if src.is_registry() {
                    let url = src.url();
                    if !urls.contains(url) {
                        urls.push(url.clone());
                    }
                }
            }
        }

        debug!("Found {} unique remote crate registries", urls.len(),);

        // It's either intentional or a bug, but it seems (at least today, using
        // a cloudsmith registry), that cargo actually only seems to populate
        // the actual checkout for the crates.io index, but doesn't for
        // non-crates.io indices. cargo however does keep a .cache directory in
        // the same layout as the normal cloned registry, which we use instead
        // for *all* indices so there is no need to special case between
        // crates.io and others
        let registries: Vec<_> = urls
            .into_iter()
            .filter_map(|u| match BareIndex::from_url(u.as_str()) {
                Ok(ndex) => Some(ndex),
                Err(e) => {
                    error!("Unable to create index for {}: {}", u, e);
                    None
                }
            })
            .collect();

        let opened = registries.iter().map(|_| None).collect();

        Ok(Self {
            registries,
            opened,
            cache: std::collections::HashMap::new(),
        })
    }

    pub fn read_krate<F>(&mut self, krate: &Krate, mut func: F)
    where
        F: FnMut(Option<&IndexKrate>),
    {
        if !krate.source.as_ref().map_or(false, |src| src.is_registry()) {
            func(None);
            return;
        }

        let url = krate.source.as_ref().unwrap().url();
        if let Some(ind) = self
            .registries
            .iter()
            .position(|reg| reg.url == url.as_str())
        {
            if let Some(cic) = self.cache.get(&krate.id) {
                func(cic.as_ref());
                return;
            }

            if self.opened[ind].is_none() {
                match self.registries[ind].open_or_clone() {
                    Ok(bir) => {
                        #[allow(unsafe_code, clippy::useless_transmute)]
                        // TODO: Can we get rid of this transmute?
                        let bir = unsafe { std::mem::transmute::<_, BareIndexRepo<'static>>(bir) };
                        self.opened[ind] = Some(bir);
                    }
                    Err(err) => {
                        log::error!("Failed to open registry index {}: {}", url, err);
                        func(None);
                        return;
                    }
                }
            }

            let bir = self.opened[ind].as_ref().unwrap();
            let cic = bir.krate(&krate.name);

            func(cic.as_ref());

            self.cache.insert(krate.id.clone(), cic);
            return;
        }

        func(None);
    }
}

impl Drop for Index {
    fn drop(&mut self) {
        self.opened.clear();
    }
}

/// Converts a full url, eg <https://github.com/rust-lang/crates.io-index>, into
/// the root directory name where cargo itself will fetch it on disk
pub(crate) fn url_to_local_dir(url: &str) -> Result<(String, String), Error> {
    fn to_hex(num: u64) -> String {
        const CHARS: &[u8] = b"0123456789abcdef";

        let bytes = &[
            num as u8,
            (num >> 8) as u8,
            (num >> 16) as u8,
            (num >> 24) as u8,
            (num >> 32) as u8,
            (num >> 40) as u8,
            (num >> 48) as u8,
            (num >> 56) as u8,
        ];

        let mut output = vec![0u8; 16];

        let mut ind = 0;

        for &byte in bytes {
            output[ind] = CHARS[(byte >> 4) as usize];
            output[ind + 1] = CHARS[(byte & 0xf) as usize];

            ind += 2;
        }

        String::from_utf8(output).expect("valid utf-8 hex string")
    }

    #[allow(deprecated)]
    fn hash_u64(url: &str) -> u64 {
        use std::hash::{Hash, Hasher, SipHasher};

        let mut hasher = SipHasher::new_with_keys(0, 0);
        // Registry
        2usize.hash(&mut hasher);
        // Url
        url.hash(&mut hasher);
        hasher.finish()
    }

    // Ensure we have a registry or bare url
    let (url, scheme_ind) = {
        let scheme_ind = url
            .find("://")
            .with_context(|| format!("'{}' is not a valid url", url))?;

        let scheme_str = &url[..scheme_ind];
        if let Some(ind) = scheme_str.find('+') {
            if &scheme_str[..ind] != "registry" {
                anyhow::bail!("'{}' is not a valid registry url", url);
            }

            (&url[ind + 1..], scheme_ind - ind - 1)
        } else {
            (url, scheme_ind)
        }
    };

    // Could use the Url crate for this, but it's simple enough and we don't
    // need to deal with every possible url (I hope...)
    let host = match url[scheme_ind + 3..].find('/') {
        Some(end) => &url[scheme_ind + 3..scheme_ind + 3 + end],
        None => &url[scheme_ind + 3..],
    };

    // cargo special cases github.com for reasons, so do the same
    let mut canonical = if host == "github.com" {
        url.to_lowercase()
    } else {
        url.to_owned()
    };

    // Chop off any query params/fragments
    if let Some(hash) = canonical.rfind('#') {
        canonical.truncate(hash);
    }

    if let Some(query) = canonical.rfind('?') {
        canonical.truncate(query);
    }

    let ident = to_hex(hash_u64(&canonical));

    if canonical.ends_with('/') {
        canonical.pop();
    }

    if canonical.ends_with(".git") {
        canonical.truncate(canonical.len() - 4);
    }

    Ok((format!("{}-{}", host, ident), canonical))
}