Skip to main content

cargo_deny/
lib.rs

1#![doc = include_str!("../README.md")]
2
3pub use semver::Version;
4use std::{cmp, collections::BTreeMap, fmt};
5use url::Url;
6
7pub mod advisories;
8pub mod bans;
9pub mod cfg;
10pub mod diag;
11/// Configuration and logic for checking crate licenses
12pub mod licenses;
13pub mod root_cfg;
14pub mod sarif;
15pub mod sources;
16
17#[doc(hidden)]
18pub mod test_utils;
19
20pub use camino::{Utf8Path as Path, Utf8PathBuf as PathBuf};
21pub use cfg::UnvalidatedConfig;
22use krates::cm;
23pub use krates::{DepKind, Kid};
24pub use toml_span::{
25    Deserialize, Error,
26    span::{Span, Spanned},
27};
28
29/// The possible lint levels for the various lints. These function similarly
30/// to the standard [Rust lint levels](https://doc.rust-lang.org/rustc/lints/levels.html)
31#[derive(PartialEq, Eq, Clone, Copy, Debug, Default, strum::VariantNames, strum::VariantArray)]
32#[cfg_attr(test, derive(serde::Serialize))]
33#[cfg_attr(test, serde(rename_all = "kebab-case"))]
34#[strum(serialize_all = "kebab-case")]
35pub enum LintLevel {
36    /// A debug or info diagnostic _may_ be emitted if the lint is violated
37    Allow,
38    /// A warning will be emitted if the lint is violated, but the command
39    /// will succeed
40    #[default]
41    Warn,
42    /// An error will be emitted if the lint is violated, and the command
43    /// will fail with a non-zero exit code
44    Deny,
45}
46
47#[macro_export]
48macro_rules! enum_deser {
49    ($enum:ty) => {
50        impl<'de> toml_span::Deserialize<'de> for $enum {
51            fn deserialize(
52                value: &mut toml_span::value::Value<'de>,
53            ) -> Result<Self, toml_span::DeserError> {
54                let s = value.take_string(Some(stringify!($enum)))?;
55
56                use strum::{VariantArray, VariantNames};
57
58                let Some(pos) = <$enum as VariantNames>::VARIANTS
59                    .iter()
60                    .position(|v| *v == s.as_ref())
61                else {
62                    return Err(toml_span::Error::from((
63                        toml_span::ErrorKind::UnexpectedValue {
64                            expected: <$enum as VariantNames>::VARIANTS,
65                            value: None,
66                        },
67                        value.span,
68                    ))
69                    .into());
70                };
71
72                Ok(<$enum as VariantArray>::VARIANTS[pos])
73            }
74        }
75    };
76}
77
78enum_deser!(LintLevel);
79
80#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
81pub enum Source {
82    /// crates.io, the boolean indicates whether it is a sparse index
83    CratesIo(bool),
84    /// A remote git patch
85    Git {
86        spec: GitSpec,
87        url: Url,
88        spec_value: Option<String>,
89    },
90    /// A remote non-sparse registry index
91    Registry(Url),
92    /// A remote sparse index
93    Sparse(Url),
94}
95
96/// The directory name under which crates sourced from the crates.io sparse
97/// registry are placed
98fn crates_io_sparse_dir() -> &'static str {
99    static mut CRATES_IO_SPARSE_DIR: String = String::new();
100    static CRATES_IO_INIT: parking_lot::Once = parking_lot::Once::new();
101
102    #[allow(unsafe_code)]
103    // SAFETY: We're mutating a static, but we only allow one mutation
104    unsafe {
105        CRATES_IO_INIT.call_once(|| {
106            let Ok(version) = tame_index::utils::cargo_version(None) else {
107                return;
108            };
109            let Ok(url_dir) = tame_index::utils::url_to_local_dir(
110                tame_index::CRATES_IO_HTTP_INDEX,
111                version >= semver::Version::new(1, 85, 0),
112            ) else {
113                return;
114            };
115            CRATES_IO_SPARSE_DIR = url_dir.dir_name;
116        });
117
118        #[allow(static_mut_refs)]
119        &CRATES_IO_SPARSE_DIR
120    }
121}
122
123impl Source {
124    pub fn crates_io(is_sparse: bool) -> Self {
125        Self::CratesIo(is_sparse)
126    }
127
128    /// Parses the source url to get its kind
129    ///
130    /// Note that the path is the path to the manifest of the package. This is
131    /// used to determine if the crates.io registry is git or sparse, as, currently,
132    /// cargo always uses the git registry+ url for crates.io, even if it uses the
133    /// sparse registry.
134    ///
135    /// This method therefore assumes that the crates sources are laid out in the
136    /// canonical cargo structure, though it can be rooted somewhere other than
137    /// `CARGO_HOME`
138    fn from_metadata(urls: String, manifest_path: Option<&Path>) -> anyhow::Result<Self> {
139        use anyhow::Context as _;
140
141        let (kind, url_str) = urls
142            .split_once('+')
143            .with_context(|| format!("'{urls}' is not a valid crate source"))?;
144
145        match kind {
146            "sparse" => {
147                // This code won't ever be hit in current cargo, but could in the future
148                if urls == tame_index::CRATES_IO_HTTP_INDEX {
149                    Ok(Self::crates_io(true))
150                } else {
151                    Url::parse(&urls)
152                        .map(Self::Sparse)
153                        .context("failed to parse url")
154                }
155            }
156            "registry" => {
157                if url_str == tame_index::CRATES_IO_INDEX {
158                    // registry/src/index.crates.io-6f17d22bba15001f/crate-version/Cargo.toml
159                    let is_sparse = manifest_path.is_none_or(|mp| {
160                        mp.ancestors().nth(2).is_some_and(|dir| {
161                            dir.file_name()
162                                .is_some_and(|dir_name| dir_name == crates_io_sparse_dir())
163                        })
164                    });
165                    Ok(Self::crates_io(is_sparse))
166                } else {
167                    Url::parse(url_str)
168                        .map(Self::Registry)
169                        .context("failed to parse url")
170                }
171            }
172            "git" => {
173                let mut url = Url::parse(url_str).context("failed to parse url")?;
174                let (spec, spec_value) = normalize_git_url(&mut url);
175
176                Ok(Self::Git {
177                    url,
178                    spec,
179                    spec_value,
180                })
181            }
182            unknown => anyhow::bail!("unknown source spec '{unknown}' for url {urls}"),
183        }
184    }
185
186    #[inline]
187    pub fn is_git(&self) -> bool {
188        matches!(self, Self::Git { .. })
189    }
190
191    #[inline]
192    pub fn git_spec(&self) -> Option<GitSpec> {
193        let Self::Git { spec, .. } = self else {
194            return None;
195        };
196        Some(*spec)
197    }
198
199    #[inline]
200    pub fn is_registry(&self) -> bool {
201        !self.is_git()
202    }
203
204    #[inline]
205    pub fn is_crates_io(&self) -> bool {
206        matches!(self, Self::CratesIo(_))
207    }
208
209    #[inline]
210    pub fn matches_rustsec(&self, sid: Option<&Self>) -> bool {
211        let Some(sid) = sid else {
212            return self.is_crates_io();
213        };
214
215        match (self, sid) {
216            (Self::Registry(a), Self::Registry(b)) | (Self::Sparse(a), Self::Sparse(b)) => a == b,
217            _ => false,
218        }
219    }
220}
221
222impl fmt::Display for Source {
223    #[inline]
224    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
225        match self {
226            Self::CratesIo(_) => {
227                write!(f, "registry+{}", tame_index::CRATES_IO_INDEX)
228            }
229            Self::Git { url, .. } => {
230                write!(f, "git+{url}")
231            }
232            Self::Registry(url) => {
233                write!(f, "registry+{url}")
234            }
235            Self::Sparse(url) => {
236                write!(f, "{url}")
237            }
238        }
239    }
240}
241
242#[derive(Debug)]
243pub struct Krate {
244    pub name: String,
245    pub id: Kid,
246    pub version: Version,
247    pub source: Option<Source>,
248    pub authors: Vec<String>,
249    pub repository: Option<String>,
250    pub description: Option<String>,
251    pub manifest_path: PathBuf,
252    pub license: Option<String>,
253    pub license_file: Option<PathBuf>,
254    pub deps: Vec<cm::Dependency>,
255    pub features: BTreeMap<String, Vec<String>>,
256    pub targets: Vec<cm::Target>,
257    pub publish: Option<Vec<String>>,
258}
259
260#[cfg(test)]
261impl Default for Krate {
262    fn default() -> Self {
263        Self {
264            name: "".to_owned(),
265            version: Version::new(0, 1, 0),
266            authors: Vec::new(),
267            id: Kid::default(),
268            source: None,
269            description: None,
270            deps: Vec::new(),
271            license: None,
272            license_file: None,
273            targets: Vec::new(),
274            features: BTreeMap::new(),
275            manifest_path: PathBuf::new(),
276            repository: None,
277            publish: None,
278        }
279    }
280}
281
282impl PartialOrd for Krate {
283    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
284        Some(self.cmp(other))
285    }
286}
287
288impl Ord for Krate {
289    fn cmp(&self, other: &Self) -> cmp::Ordering {
290        self.id.cmp(&other.id)
291    }
292}
293
294impl PartialEq for Krate {
295    fn eq(&self, other: &Self) -> bool {
296        self.id == other.id
297    }
298}
299
300impl Eq for Krate {}
301
302impl krates::KrateDetails for Krate {
303    #[inline]
304    fn name(&self) -> &str {
305        &self.name
306    }
307
308    #[inline]
309    fn version(&self) -> &semver::Version {
310        &self.version
311    }
312}
313
314impl From<cm::Package> for Krate {
315    fn from(pkg: cm::Package) -> Self {
316        let source = pkg.source.and_then(|src| {
317            let url = src.to_string();
318
319            Source::from_metadata(url, Some(&pkg.manifest_path))
320                .map_err(|err| {
321                    log::warn!(
322                        "unable to parse source url for {}:{}: {err}",
323                        pkg.name,
324                        pkg.version
325                    );
326                    err
327                })
328                .ok()
329        });
330
331        Self {
332            name: pkg.name,
333            id: pkg.id.into(),
334            version: pkg.version,
335            authors: pkg.authors,
336            repository: pkg.repository,
337            source,
338            targets: pkg.targets,
339            license: pkg.license,
340            license_file: pkg.license_file,
341            description: pkg.description,
342            manifest_path: pkg.manifest_path,
343            deps: pkg.dependencies,
344            // {
345            //     let mut deps = pkg.dependencies;
346            //     deps.sort_by(|a, b| a.name.cmp(&b.name));
347            //     deps
348            // },
349            features: pkg.features,
350            publish: pkg.publish,
351        }
352    }
353}
354
355impl Krate {
356    /// Returns true if the crate is marked as `publish = false`, or
357    /// it is only published to the specified private registries
358    pub(crate) fn is_private(&self, private_registries: &[&str]) -> bool {
359        self.publish.as_ref().is_some_and(|v| {
360            if v.is_empty() {
361                true
362            } else {
363                v.iter()
364                    .all(|reg| private_registries.contains(&reg.as_str()))
365            }
366        })
367    }
368
369    /// Determines if the specified url matches the source
370    #[inline]
371    pub(crate) fn matches_url(&self, url: &Url, exact: bool) -> bool {
372        let Some(src) = &self.source else {
373            return false;
374        };
375
376        let kurl = match src {
377            Source::CratesIo(_is_sparse) => {
378                // It's irrelevant if it's sparse or not for crates.io, they're the same
379                // index, just different protocols/kinds
380                return url
381                    .as_str()
382                    .ends_with(&tame_index::CRATES_IO_HTTP_INDEX[8..])
383                    || url.as_str().ends_with(&tame_index::CRATES_IO_INDEX[10..]);
384            }
385            Source::Sparse(surl) | Source::Registry(surl) | Source::Git { url: surl, .. } => surl,
386        };
387
388        kurl.host() == url.host()
389            && ((exact && kurl.path() == url.path())
390                || (!exact && kurl.path().starts_with(url.path())))
391    }
392
393    #[inline]
394    pub(crate) fn is_crates_io(&self) -> bool {
395        self.source.as_ref().is_some_and(|src| src.is_crates_io())
396    }
397
398    #[inline]
399    pub(crate) fn is_git_source(&self) -> bool {
400        self.source.as_ref().is_some_and(|src| src.is_git())
401    }
402
403    #[inline]
404    pub(crate) fn is_registry(&self) -> bool {
405        self.source.as_ref().is_some_and(|src| src.is_registry())
406    }
407}
408
409impl fmt::Display for Krate {
410    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
411        write!(f, "{} = {}", self.name, self.version)
412    }
413}
414
415pub type Krates = krates::Krates<Krate>;
416
417#[inline]
418pub fn binary_search<T, Q>(s: &[T], query: &Q) -> Result<usize, usize>
419where
420    T: std::borrow::Borrow<Q>,
421    Q: Ord + ?Sized,
422{
423    s.binary_search_by(|i| i.borrow().cmp(query))
424}
425
426#[inline]
427pub fn contains<T, Q>(s: &[T], query: &Q) -> bool
428where
429    T: std::borrow::Borrow<Q>,
430    Q: Eq + ?Sized,
431{
432    s.iter().any(|i| i.borrow() == query)
433}
434
435#[inline]
436pub fn hash(data: &[u8]) -> u32 {
437    use std::hash::Hasher;
438    // We use the 32-bit hash instead of the 64 even though
439    // it is significantly slower due to the TOML limitation
440    // if only supporting i64
441    let mut xx = twox_hash::XxHash32::default();
442    xx.write(data);
443    xx.finish() as u32
444}
445
446#[derive(Clone, Copy)]
447pub enum SerializeAdvisory {
448    Json,
449    Sarif,
450    No,
451}
452
453/// Common context for the various checks. Some checks require additional
454/// information though.
455pub struct CheckCtx<'ctx, T> {
456    /// The configuration for the check
457    pub cfg: T,
458    /// The krates graph to check
459    pub krates: &'ctx Krates,
460    /// The spans for each unique crate in a synthesized "lock file"
461    pub krate_spans: &'ctx diag::KrateSpans<'ctx>,
462    /// Allows for ANSI colorization of diagnostic content
463    pub colorize: bool,
464    /// Log level specified by the user, may be used by checks to determine what
465    /// information to emit in diagnostics
466    pub log_level: log::LevelFilter,
467    /// Files that can show span information in diagnostics
468    pub files: &'ctx diag::Files,
469}
470
471/// Checks if a version satisfies the specifies the specified version requirement.
472/// If the requirement is `None` then it is also satisfied.
473#[inline]
474pub fn match_req(version: &Version, req: Option<&semver::VersionReq>) -> bool {
475    req.is_none_or(|req| req.matches(version))
476}
477
478#[inline]
479pub fn match_krate(krate: &Krate, pid: &cfg::PackageSpec) -> bool {
480    krate.name == pid.name.value && match_req(&krate.version, pid.version_req.as_ref())
481}
482
483use sources::cfg::GitSpec;
484
485/// Normalizes the URL so that different representations can be compared to each other.
486///
487/// At the moment we just remove a tailing `.git` but there are more possible optimisations.
488///
489/// See <https://github.com/rust-lang/cargo/blob/1f6c6bd5e7bbdf596f7e88e6db347af5268ab113/src/cargo/util/canonical_url.rs#L31-L57>
490/// for what cargo does
491#[inline]
492pub(crate) fn normalize_git_url(url: &mut Url) -> (GitSpec, Option<String>) {
493    const GIT_EXT: &str = ".git";
494
495    let needs_chopping = url.path().ends_with(&GIT_EXT);
496    if needs_chopping {
497        let last = {
498            let last = url.path_segments().unwrap().next_back().unwrap();
499            last[..last.len() - GIT_EXT.len()].to_owned()
500        };
501        url.path_segments_mut().unwrap().pop().push(&last);
502    }
503
504    if url.path().ends_with('/') {
505        url.path_segments_mut().unwrap().pop_if_empty();
506    }
507
508    let mut spec = GitSpec::Any;
509    let mut spec_value = None;
510
511    for (k, v) in url.query_pairs() {
512        spec = match k.as_ref() {
513            "branch" | "ref" => GitSpec::Branch,
514            "tag" => GitSpec::Tag,
515            "rev" => GitSpec::Rev,
516            _ => continue,
517        };
518
519        spec_value = Some(v.into_owned());
520    }
521
522    if url
523        .query_pairs()
524        .any(|(k, v)| k == "branch" && v == "master")
525    {
526        if url.query_pairs().count() == 1 {
527            url.set_query(None);
528        } else {
529            let mut nq = String::new();
530            for (k, v) in url.query_pairs() {
531                if k == "branch" && v == "master" {
532                    continue;
533                }
534
535                use std::fmt::Write;
536                write!(&mut nq, "{k}={v}&").unwrap();
537            }
538
539            // pop trailing &
540            nq.pop();
541            url.set_query(Some(&nq));
542        }
543    }
544
545    (spec, spec_value)
546}
547
548/// Helper function to convert a std `PathBuf` to a camino one
549#[inline]
550#[allow(clippy::disallowed_types)]
551pub fn utf8path(pb: std::path::PathBuf) -> anyhow::Result<PathBuf> {
552    use anyhow::Context;
553    PathBuf::try_from(pb).context("non-utf8 path")
554}
555
556/// Adds the crates.io index with the specified settings to the builder for
557/// feature resolution
558pub fn krates_with_index(
559    kb: &mut krates::Builder,
560    config_root: Option<PathBuf>,
561    cargo_home: Option<PathBuf>,
562) -> anyhow::Result<()> {
563    use anyhow::Context as _;
564    let crates_io = tame_index::IndexUrl::crates_io(config_root, cargo_home.as_deref(), None)
565        .context("unable to determine crates.io url")?;
566
567    let index = tame_index::index::ComboIndexCache::new(
568        tame_index::IndexLocation::new(crates_io).with_root(cargo_home.clone()),
569    )
570    .context("unable to open local crates.io index")?;
571
572    // Note we don't take a lock here ourselves, since we are calling cargo
573    // it will take the lock and only give us results if it gets access, if we
574    // took a look we would deadlock here
575    let lock = tame_index::utils::flock::FileLock::unlocked();
576
577    let index_cache_build = move |krates: std::collections::BTreeSet<String>| {
578        let mut cache = std::collections::BTreeMap::new();
579        for name in krates {
580            let read = || -> Option<krates::index::IndexKrate> {
581                let name = name.as_str().try_into().ok()?;
582                let krate = index.cached_krate(name, &lock).ok()??;
583                let versions = krate
584                    .versions
585                    .into_iter()
586                    .filter_map(|kv| {
587                        // The index (currently) can have both features, and
588                        // features2, the features method gives us an iterator
589                        // over both
590                        kv.version.parse::<semver::Version>().ok().map(|version| {
591                            krates::index::IndexKrateVersion {
592                                version,
593                                features: kv
594                                    .features()
595                                    .map(|(k, v)| (k.clone(), v.clone()))
596                                    .collect(),
597                            }
598                        })
599                    })
600                    .collect();
601
602                Some(krates::index::IndexKrate { versions })
603            };
604
605            let krate = read();
606            cache.insert(name, krate);
607        }
608
609        cache
610    };
611
612    kb.with_crates_io_index(Box::new(index_cache_build));
613
614    Ok(())
615}
616
617#[cfg(test)]
618mod test {
619    use super::{Krate, Path, Source, Url};
620
621    #[test]
622    fn parses_sources() {
623        let empty_dir = Some(Path::new(""));
624        let crates_io_git = Source::from_metadata(
625            format!("registry+{}", tame_index::CRATES_IO_INDEX),
626            empty_dir,
627        )
628        .unwrap();
629        let crates_io_sparse =
630            Source::from_metadata(tame_index::CRATES_IO_HTTP_INDEX.to_owned(), empty_dir).unwrap();
631        let crates_io_sparse_but_git = Source::from_metadata(
632            format!("registry+{}", tame_index::CRATES_IO_INDEX),
633            Some(Path::new(&format!(
634                "registry/src/{}/cargo-deny-0.69.0/Cargo.toml",
635                super::crates_io_sparse_dir(),
636            ))),
637        )
638        .unwrap();
639
640        assert!(
641            crates_io_git.is_registry()
642                && crates_io_sparse.is_registry()
643                && crates_io_sparse_but_git.is_registry()
644        );
645        assert!(
646            crates_io_git.is_crates_io()
647                && crates_io_sparse.is_crates_io()
648                && crates_io_sparse_but_git.is_crates_io()
649        );
650
651        assert!(
652            Source::from_metadata("registry+https://my-own-my-precious.com/".to_owned(), None)
653                .unwrap()
654                .is_registry()
655        );
656        assert!(
657            Source::from_metadata("sparse+https://my-registry.rs/".to_owned(), None)
658                .unwrap()
659                .is_registry()
660        );
661
662        let src = Source::from_metadata("git+https://github.com/EmbarkStudios/wasmtime?branch=v6.0.1-profiler#84b8cacceacb585ef53774c3790b2372ba080067".to_owned(), empty_dir).unwrap();
663
664        assert!(src.is_git());
665    }
666
667    /// Sanity checks that the crates.io sparse registry still uses the same
668    /// local directory. Really this should be doing a cargo invocation, but
669    /// meh, we depend on tame-index to stay up to date
670    #[test]
671    fn validate_crates_io_sparse_dir_name() {
672        let stable =
673            tame_index::utils::cargo_version(None).unwrap() >= tame_index::Version::new(1, 85, 0);
674        assert_eq!(
675            tame_index::utils::url_to_local_dir(tame_index::CRATES_IO_HTTP_INDEX, stable)
676                .unwrap()
677                .dir_name,
678            super::crates_io_sparse_dir(),
679        );
680    }
681
682    #[test]
683    fn inexact_match_fails_for_different_hosts() {
684        let krate = Krate {
685            source: Some(
686                Source::from_metadata(
687                    "git+ssh://git@repo1.test.org/path/test.git".to_owned(),
688                    None,
689                )
690                .unwrap(),
691            ),
692            ..Krate::default()
693        };
694        let url = Url::parse("ssh://git@repo2.test.org:8000").unwrap();
695
696        assert!(!krate.matches_url(&url, false));
697    }
698
699    #[test]
700    fn inexact_match_passes_for_same_hosts() {
701        let krate = Krate {
702            source: Some(
703                Source::from_metadata(
704                    "git+ssh://git@repo1.test.org/path/test.git".to_owned(),
705                    None,
706                )
707                .unwrap(),
708            ),
709            ..Krate::default()
710        };
711        let url = Url::parse("ssh://git@repo1.test.org:8000").unwrap();
712
713        assert!(krate.matches_url(&url, false));
714    }
715}