cargo_deny/
lib.rs

1#![doc = include_str!("../README.md")]
2
3pub use semver::Version;
4use std::{cmp, collections::BTreeMap, fmt};
5use url::Url;
6
7pub mod advisories;
8pub mod bans;
9pub mod cfg;
10pub mod diag;
11/// Configuration and logic for checking crate licenses
12pub mod licenses;
13pub mod root_cfg;
14pub mod sarif;
15pub mod sources;
16
17#[doc(hidden)]
18pub mod test_utils;
19
20pub use camino::{Utf8Path as Path, Utf8PathBuf as PathBuf};
21pub use cfg::UnvalidatedConfig;
22use krates::cm;
23pub use krates::{DepKind, Kid};
24pub use toml_span::{
25    Deserialize, Error,
26    span::{Span, Spanned},
27};
28
29/// The possible lint levels for the various lints. These function similarly
30/// to the standard [Rust lint levels](https://doc.rust-lang.org/rustc/lints/levels.html)
31#[derive(PartialEq, Eq, Clone, Copy, Debug, Default, strum::VariantNames, strum::VariantArray)]
32#[cfg_attr(test, derive(serde::Serialize))]
33#[cfg_attr(test, serde(rename_all = "kebab-case"))]
34#[strum(serialize_all = "kebab-case")]
35pub enum LintLevel {
36    /// A debug or info diagnostic _may_ be emitted if the lint is violated
37    Allow,
38    /// A warning will be emitted if the lint is violated, but the command
39    /// will succeed
40    #[default]
41    Warn,
42    /// An error will be emitted if the lint is violated, and the command
43    /// will fail with a non-zero exit code
44    Deny,
45}
46
47#[macro_export]
48macro_rules! enum_deser {
49    ($enum:ty) => {
50        impl<'de> toml_span::Deserialize<'de> for $enum {
51            fn deserialize(
52                value: &mut toml_span::value::Value<'de>,
53            ) -> Result<Self, toml_span::DeserError> {
54                let s = value.take_string(Some(stringify!($enum)))?;
55
56                use strum::{VariantArray, VariantNames};
57
58                let Some(pos) = <$enum as VariantNames>::VARIANTS
59                    .iter()
60                    .position(|v| *v == s.as_ref())
61                else {
62                    return Err(toml_span::Error::from((
63                        toml_span::ErrorKind::UnexpectedValue {
64                            expected: <$enum as VariantNames>::VARIANTS,
65                            value: None,
66                        },
67                        value.span,
68                    ))
69                    .into());
70                };
71
72                Ok(<$enum as VariantArray>::VARIANTS[pos])
73            }
74        }
75    };
76}
77
78enum_deser!(LintLevel);
79
80#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
81pub enum Source {
82    /// crates.io, the boolean indicates whether it is a sparse index
83    CratesIo(bool),
84    /// A remote git patch
85    Git {
86        spec: GitSpec,
87        url: Url,
88        spec_value: Option<String>,
89    },
90    /// A remote non-sparse registry index
91    Registry(Url),
92    /// A remote sparse index
93    Sparse(Url),
94}
95
96/// The directory name under which crates sourced from the crates.io sparse
97/// registry are placed
98fn crates_io_sparse_dir() -> &'static str {
99    static mut CRATES_IO_SPARSE_DIR: String = String::new();
100    static CRATES_IO_INIT: parking_lot::Once = parking_lot::Once::new();
101
102    #[allow(unsafe_code)]
103    // SAFETY: We're mutating a static, but we only allow one mutation
104    unsafe {
105        CRATES_IO_INIT.call_once(|| {
106            let Ok(version) = tame_index::utils::cargo_version(None) else {
107                return;
108            };
109            let Ok(url_dir) = tame_index::utils::url_to_local_dir(
110                tame_index::CRATES_IO_HTTP_INDEX,
111                version >= semver::Version::new(1, 85, 0),
112            ) else {
113                return;
114            };
115            CRATES_IO_SPARSE_DIR = url_dir.dir_name;
116        });
117
118        #[allow(static_mut_refs)]
119        &CRATES_IO_SPARSE_DIR
120    }
121}
122
123impl Source {
124    pub fn crates_io(is_sparse: bool) -> Self {
125        Self::CratesIo(is_sparse)
126    }
127
128    /// Parses the source url to get its kind
129    ///
130    /// Note that the path is the path to the manifest of the package. This is
131    /// used to determine if the crates.io registry is git or sparse, as, currently,
132    /// cargo always uses the git registry+ url for crates.io, even if it uses the
133    /// sparse registry.
134    ///
135    /// This method therefore assumes that the crates sources are laid out in the
136    /// canonical cargo structure, though it can be rooted somewhere other than
137    /// `CARGO_HOME`
138    fn from_metadata(urls: String, manifest_path: &Path) -> anyhow::Result<Self> {
139        use anyhow::Context as _;
140
141        let (kind, url_str) = urls
142            .split_once('+')
143            .with_context(|| format!("'{urls}' is not a valid crate source"))?;
144
145        match kind {
146            "sparse" => {
147                // This code won't ever be hit in current cargo, but could in the future
148                if urls == tame_index::CRATES_IO_HTTP_INDEX {
149                    Ok(Self::crates_io(true))
150                } else {
151                    Url::parse(&urls)
152                        .map(Self::Sparse)
153                        .context("failed to parse url")
154                }
155            }
156            "registry" => {
157                if url_str == tame_index::CRATES_IO_INDEX {
158                    // registry/src/index.crates.io-6f17d22bba15001f/crate-version/Cargo.toml
159                    let is_sparse = manifest_path.ancestors().nth(2).is_some_and(|dir| {
160                        dir.file_name()
161                            .is_some_and(|dir_name| dir_name == crates_io_sparse_dir())
162                    });
163                    Ok(Self::crates_io(is_sparse))
164                } else {
165                    Url::parse(url_str)
166                        .map(Self::Registry)
167                        .context("failed to parse url")
168                }
169            }
170            "git" => {
171                let mut url = Url::parse(url_str).context("failed to parse url")?;
172                let (spec, spec_value) = normalize_git_url(&mut url);
173
174                Ok(Self::Git {
175                    url,
176                    spec,
177                    spec_value,
178                })
179            }
180            unknown => anyhow::bail!("unknown source spec '{unknown}' for url {urls}"),
181        }
182    }
183
184    #[inline]
185    pub fn is_git(&self) -> bool {
186        matches!(self, Self::Git { .. })
187    }
188
189    #[inline]
190    pub fn git_spec(&self) -> Option<GitSpec> {
191        let Self::Git { spec, .. } = self else {
192            return None;
193        };
194        Some(*spec)
195    }
196
197    #[inline]
198    pub fn is_registry(&self) -> bool {
199        !self.is_git()
200    }
201
202    #[inline]
203    pub fn is_crates_io(&self) -> bool {
204        matches!(self, Self::CratesIo(_))
205    }
206
207    #[inline]
208    pub fn to_rustsec(&self) -> rustsec::package::SourceId {
209        use rustsec::package::SourceId;
210        // TODO: Change this once rustsec supports sparse indices
211        match self {
212            Self::CratesIo(_) => SourceId::default(),
213            Self::Registry(url) => SourceId::for_registry(url).unwrap(),
214            Self::Sparse(sparse) => {
215                // There is currently no way to publicly construct a sparse registry
216                // id other than this method
217                SourceId::from_url(sparse.as_str()).unwrap()
218            }
219            Self::Git { .. } => unreachable!(),
220        }
221    }
222
223    #[inline]
224    pub fn matches_rustsec(&self, sid: Option<&rustsec::package::SourceId>) -> bool {
225        let Some(sid) = sid else {
226            return self.is_crates_io();
227        };
228        if !sid.is_remote_registry() {
229            return false;
230        }
231
232        let (Self::Registry(url) | Self::Sparse(url)) = self else {
233            return false;
234        };
235        sid.url() == url
236    }
237}
238
239impl fmt::Display for Source {
240    #[inline]
241    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
242        match self {
243            Self::CratesIo(_) => {
244                write!(f, "registry+{}", tame_index::CRATES_IO_INDEX)
245            }
246            Self::Git { url, .. } => {
247                write!(f, "git+{url}")
248            }
249            Self::Registry(url) => {
250                write!(f, "registry+{url}")
251            }
252            Self::Sparse(url) => {
253                write!(f, "{url}")
254            }
255        }
256    }
257}
258
259#[derive(Debug)]
260pub struct Krate {
261    pub name: String,
262    pub id: Kid,
263    pub version: Version,
264    pub source: Option<Source>,
265    pub authors: Vec<String>,
266    pub repository: Option<String>,
267    pub description: Option<String>,
268    pub manifest_path: PathBuf,
269    pub license: Option<String>,
270    pub license_file: Option<PathBuf>,
271    pub deps: Vec<cm::Dependency>,
272    pub features: BTreeMap<String, Vec<String>>,
273    pub targets: Vec<cm::Target>,
274    pub publish: Option<Vec<String>>,
275}
276
277#[cfg(test)]
278impl Default for Krate {
279    fn default() -> Self {
280        Self {
281            name: "".to_owned(),
282            version: Version::new(0, 1, 0),
283            authors: Vec::new(),
284            id: Kid::default(),
285            source: None,
286            description: None,
287            deps: Vec::new(),
288            license: None,
289            license_file: None,
290            targets: Vec::new(),
291            features: BTreeMap::new(),
292            manifest_path: PathBuf::new(),
293            repository: None,
294            publish: None,
295        }
296    }
297}
298
299impl PartialOrd for Krate {
300    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
301        Some(self.cmp(other))
302    }
303}
304
305impl Ord for Krate {
306    fn cmp(&self, other: &Self) -> cmp::Ordering {
307        self.id.cmp(&other.id)
308    }
309}
310
311impl PartialEq for Krate {
312    fn eq(&self, other: &Self) -> bool {
313        self.id == other.id
314    }
315}
316
317impl Eq for Krate {}
318
319impl krates::KrateDetails for Krate {
320    #[inline]
321    fn name(&self) -> &str {
322        &self.name
323    }
324
325    #[inline]
326    fn version(&self) -> &semver::Version {
327        &self.version
328    }
329}
330
331impl From<cm::Package> for Krate {
332    fn from(pkg: cm::Package) -> Self {
333        let source = pkg.source.and_then(|src| {
334            let url = src.to_string();
335
336            Source::from_metadata(url, &pkg.manifest_path)
337                .map_err(|err| {
338                    log::warn!(
339                        "unable to parse source url for {}:{}: {err}",
340                        pkg.name,
341                        pkg.version
342                    );
343                    err
344                })
345                .ok()
346        });
347
348        Self {
349            name: pkg.name,
350            id: pkg.id.into(),
351            version: pkg.version,
352            authors: pkg.authors,
353            repository: pkg.repository,
354            source,
355            targets: pkg.targets,
356            license: pkg.license,
357            license_file: pkg.license_file,
358            description: pkg.description,
359            manifest_path: pkg.manifest_path,
360            deps: pkg.dependencies,
361            // {
362            //     let mut deps = pkg.dependencies;
363            //     deps.sort_by(|a, b| a.name.cmp(&b.name));
364            //     deps
365            // },
366            features: pkg.features,
367            publish: pkg.publish,
368        }
369    }
370}
371
372impl Krate {
373    /// Returns true if the crate is marked as `publish = false`, or
374    /// it is only published to the specified private registries
375    pub(crate) fn is_private(&self, private_registries: &[&str]) -> bool {
376        self.publish.as_ref().is_some_and(|v| {
377            if v.is_empty() {
378                true
379            } else {
380                v.iter()
381                    .all(|reg| private_registries.contains(&reg.as_str()))
382            }
383        })
384    }
385
386    /// Determines if the specified url matches the source
387    #[inline]
388    pub(crate) fn matches_url(&self, url: &Url, exact: bool) -> bool {
389        let Some(src) = &self.source else {
390            return false;
391        };
392
393        let kurl = match src {
394            Source::CratesIo(_is_sparse) => {
395                // It's irrelevant if it's sparse or not for crates.io, they're the same
396                // index, just different protocols/kinds
397                return url
398                    .as_str()
399                    .ends_with(&tame_index::CRATES_IO_HTTP_INDEX[8..])
400                    || url.as_str().ends_with(&tame_index::CRATES_IO_INDEX[10..]);
401            }
402            Source::Sparse(surl) | Source::Registry(surl) | Source::Git { url: surl, .. } => surl,
403        };
404
405        kurl.host() == url.host()
406            && ((exact && kurl.path() == url.path())
407                || (!exact && kurl.path().starts_with(url.path())))
408    }
409
410    #[inline]
411    pub(crate) fn is_crates_io(&self) -> bool {
412        self.source.as_ref().is_some_and(|src| src.is_crates_io())
413    }
414
415    #[inline]
416    pub(crate) fn is_git_source(&self) -> bool {
417        self.source.as_ref().is_some_and(|src| src.is_git())
418    }
419
420    #[inline]
421    pub(crate) fn is_registry(&self) -> bool {
422        self.source.as_ref().is_some_and(|src| src.is_registry())
423    }
424}
425
426impl fmt::Display for Krate {
427    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
428        write!(f, "{} = {}", self.name, self.version)
429    }
430}
431
432pub type Krates = krates::Krates<Krate>;
433
434#[inline]
435pub fn binary_search<T, Q>(s: &[T], query: &Q) -> Result<usize, usize>
436where
437    T: std::borrow::Borrow<Q>,
438    Q: Ord + ?Sized,
439{
440    s.binary_search_by(|i| i.borrow().cmp(query))
441}
442
443#[inline]
444pub fn contains<T, Q>(s: &[T], query: &Q) -> bool
445where
446    T: std::borrow::Borrow<Q>,
447    Q: Eq + ?Sized,
448{
449    s.iter().any(|i| i.borrow() == query)
450}
451
452#[inline]
453pub fn hash(data: &[u8]) -> u32 {
454    use std::hash::Hasher;
455    // We use the 32-bit hash instead of the 64 even though
456    // it is significantly slower due to the TOML limitation
457    // if only supporting i64
458    let mut xx = twox_hash::XxHash32::default();
459    xx.write(data);
460    xx.finish() as u32
461}
462
463/// Common context for the various checks. Some checks require additional
464/// information though.
465pub struct CheckCtx<'ctx, T> {
466    /// The configuration for the check
467    pub cfg: T,
468    /// The krates graph to check
469    pub krates: &'ctx Krates,
470    /// The spans for each unique crate in a synthesized "lock file"
471    pub krate_spans: &'ctx diag::KrateSpans<'ctx>,
472    /// Requests for additional information the check can provide to be
473    /// serialized to the diagnostic
474    pub serialize_extra: bool,
475    /// Allows for ANSI colorization of diagnostic content
476    pub colorize: bool,
477    /// Log level specified by the user, may be used by checks to determine what
478    /// information to emit in diagnostics
479    pub log_level: log::LevelFilter,
480    /// Files that can show span information in diagnostics
481    pub files: &'ctx diag::Files,
482}
483
484/// Checks if a version satisfies the specifies the specified version requirement.
485/// If the requirement is `None` then it is also satisfied.
486#[inline]
487pub fn match_req(version: &Version, req: Option<&semver::VersionReq>) -> bool {
488    req.is_none_or(|req| req.matches(version))
489}
490
491#[inline]
492pub fn match_krate(krate: &Krate, pid: &cfg::PackageSpec) -> bool {
493    krate.name == pid.name.value && match_req(&krate.version, pid.version_req.as_ref())
494}
495
496use sources::cfg::GitSpec;
497
498/// Normalizes the URL so that different representations can be compared to each other.
499///
500/// At the moment we just remove a tailing `.git` but there are more possible optimisations.
501///
502/// See <https://github.com/rust-lang/cargo/blob/1f6c6bd5e7bbdf596f7e88e6db347af5268ab113/src/cargo/util/canonical_url.rs#L31-L57>
503/// for what cargo does
504#[inline]
505pub(crate) fn normalize_git_url(url: &mut Url) -> (GitSpec, Option<String>) {
506    const GIT_EXT: &str = ".git";
507
508    let needs_chopping = url.path().ends_with(&GIT_EXT);
509    if needs_chopping {
510        let last = {
511            let last = url.path_segments().unwrap().next_back().unwrap();
512            last[..last.len() - GIT_EXT.len()].to_owned()
513        };
514        url.path_segments_mut().unwrap().pop().push(&last);
515    }
516
517    if url.path().ends_with('/') {
518        url.path_segments_mut().unwrap().pop_if_empty();
519    }
520
521    let mut spec = GitSpec::Any;
522    let mut spec_value = None;
523
524    for (k, v) in url.query_pairs() {
525        spec = match k.as_ref() {
526            "branch" | "ref" => GitSpec::Branch,
527            "tag" => GitSpec::Tag,
528            "rev" => GitSpec::Rev,
529            _ => continue,
530        };
531
532        spec_value = Some(v.into_owned());
533    }
534
535    if url
536        .query_pairs()
537        .any(|(k, v)| k == "branch" && v == "master")
538    {
539        if url.query_pairs().count() == 1 {
540            url.set_query(None);
541        } else {
542            let mut nq = String::new();
543            for (k, v) in url.query_pairs() {
544                if k == "branch" && v == "master" {
545                    continue;
546                }
547
548                use std::fmt::Write;
549                write!(&mut nq, "{k}={v}&").unwrap();
550            }
551
552            // pop trailing &
553            nq.pop();
554            url.set_query(Some(&nq));
555        }
556    }
557
558    (spec, spec_value)
559}
560
561/// Helper function to convert a std `PathBuf` to a camino one
562#[inline]
563#[allow(clippy::disallowed_types)]
564pub fn utf8path(pb: std::path::PathBuf) -> anyhow::Result<PathBuf> {
565    use anyhow::Context;
566    PathBuf::try_from(pb).context("non-utf8 path")
567}
568
569/// Adds the crates.io index with the specified settings to the builder for
570/// feature resolution
571pub fn krates_with_index(
572    kb: &mut krates::Builder,
573    config_root: Option<PathBuf>,
574    cargo_home: Option<PathBuf>,
575) -> anyhow::Result<()> {
576    use anyhow::Context as _;
577    let crates_io = tame_index::IndexUrl::crates_io(config_root, cargo_home.as_deref(), None)
578        .context("unable to determine crates.io url")?;
579
580    let index = tame_index::index::ComboIndexCache::new(
581        tame_index::IndexLocation::new(crates_io).with_root(cargo_home.clone()),
582    )
583    .context("unable to open local crates.io index")?;
584
585    // Note we don't take a lock here ourselves, since we are calling cargo
586    // it will take the lock and only give us results if it gets access, if we
587    // took a look we would deadlock here
588    let lock = tame_index::utils::flock::FileLock::unlocked();
589
590    let index_cache_build = move |krates: std::collections::BTreeSet<String>| {
591        let mut cache = std::collections::BTreeMap::new();
592        for name in krates {
593            let read = || -> Option<krates::index::IndexKrate> {
594                let name = name.as_str().try_into().ok()?;
595                let krate = index.cached_krate(name, &lock).ok()??;
596                let versions = krate
597                    .versions
598                    .into_iter()
599                    .filter_map(|kv| {
600                        // The index (currently) can have both features, and
601                        // features2, the features method gives us an iterator
602                        // over both
603                        kv.version.parse::<semver::Version>().ok().map(|version| {
604                            krates::index::IndexKrateVersion {
605                                version,
606                                features: kv
607                                    .features()
608                                    .map(|(k, v)| (k.clone(), v.clone()))
609                                    .collect(),
610                            }
611                        })
612                    })
613                    .collect();
614
615                Some(krates::index::IndexKrate { versions })
616            };
617
618            let krate = read();
619            cache.insert(name, krate);
620        }
621
622        cache
623    };
624
625    kb.with_crates_io_index(Box::new(index_cache_build));
626
627    Ok(())
628}
629
630#[cfg(test)]
631mod test {
632    use super::{Krate, PathBuf, Source, Url};
633
634    #[test]
635    fn parses_sources() {
636        let empty_dir = super::Path::new("");
637        let crates_io_git = Source::from_metadata(
638            format!("registry+{}", tame_index::CRATES_IO_INDEX),
639            empty_dir,
640        )
641        .unwrap();
642        let crates_io_sparse =
643            Source::from_metadata(tame_index::CRATES_IO_HTTP_INDEX.to_owned(), empty_dir).unwrap();
644        let crates_io_sparse_but_git = Source::from_metadata(
645            format!("registry+{}", tame_index::CRATES_IO_INDEX),
646            super::Path::new(&format!(
647                "registry/src/{}/cargo-deny-0.69.0/Cargo.toml",
648                super::crates_io_sparse_dir(),
649            )),
650        )
651        .unwrap();
652
653        assert!(
654            crates_io_git.is_registry()
655                && crates_io_sparse.is_registry()
656                && crates_io_sparse_but_git.is_registry()
657        );
658        assert!(
659            crates_io_git.is_crates_io()
660                && crates_io_sparse.is_crates_io()
661                && crates_io_sparse_but_git.is_crates_io()
662        );
663
664        assert!(
665            Source::from_metadata(
666                "registry+https://my-own-my-precious.com/".to_owned(),
667                empty_dir
668            )
669            .unwrap()
670            .is_registry()
671        );
672        assert!(
673            Source::from_metadata("sparse+https://my-registry.rs/".to_owned(), empty_dir)
674                .unwrap()
675                .is_registry()
676        );
677
678        let src = Source::from_metadata("git+https://github.com/EmbarkStudios/wasmtime?branch=v6.0.1-profiler#84b8cacceacb585ef53774c3790b2372ba080067".to_owned(), empty_dir).unwrap();
679
680        assert!(src.is_git());
681    }
682
683    /// Sanity checks that the crates.io sparse registry still uses the same
684    /// local directory. Really this should be doing a cargo invocation, but
685    /// meh, we depend on tame-index to stay up to date
686    #[test]
687    fn validate_crates_io_sparse_dir_name() {
688        let stable =
689            tame_index::utils::cargo_version(None).unwrap() >= tame_index::Version::new(1, 85, 0);
690        assert_eq!(
691            tame_index::utils::url_to_local_dir(tame_index::CRATES_IO_HTTP_INDEX, stable)
692                .unwrap()
693                .dir_name,
694            super::crates_io_sparse_dir(),
695        );
696    }
697
698    #[test]
699    fn inexact_match_fails_for_different_hosts() {
700        let krate = Krate {
701            source: Some(
702                Source::from_metadata(
703                    "git+ssh://git@repo1.test.org/path/test.git".to_owned(),
704                    &PathBuf::new(),
705                )
706                .unwrap(),
707            ),
708            ..Krate::default()
709        };
710        let url = Url::parse("ssh://git@repo2.test.org:8000").unwrap();
711
712        assert!(!krate.matches_url(&url, false));
713    }
714
715    #[test]
716    fn inexact_match_passes_for_same_hosts() {
717        let krate = Krate {
718            source: Some(
719                Source::from_metadata(
720                    "git+ssh://git@repo1.test.org/path/test.git".to_owned(),
721                    &PathBuf::new(),
722                )
723                .unwrap(),
724            ),
725            ..Krate::default()
726        };
727        let url = Url::parse("ssh://git@repo1.test.org:8000").unwrap();
728
729        assert!(krate.matches_url(&url, false));
730    }
731}