1#![doc = include_str!("../README.md")]
2
3use std::borrow::Cow;
4#[doc(hidden)]
5pub use ahash::HashMap;
6#[doc(hidden)]
7pub use semver::Version as SemVer;
8#[doc(hidden)]
9pub use semver::VersionReq;
10#[doc(hidden)]
11pub use unicase::Ascii;
12
13use futures::future::join_all;
14use log::debug;
15use quick_error::quick_error;
16use std::collections::hash_map::Entry;
17use std::fmt;
18use std::time::Duration;
19
20pub const DEFAULT_REGISTRY_URL: &str = "https://raw.githubusercontent.com/bholley/cargo-vet/main/registry.toml";
21
22quick_error! {
23 #[derive(Debug)]
24 pub enum Error {
25 Req(e: reqwest::Error) {
27 display("Error fetching vet")
28 source(e)
29 from()
30 }
31 Toml(e: Box<toml::de::Error>) {
33 display("Error parsing vet")
34 source(&**e)
35 from(e: toml::de::Error) -> (Box::new(e))
36 }
37 Other(msg: Box<str>) {
38 display("{}", msg)
39 }
40 }
41}
42
43pub struct MiniVet<'client> {
45 client: Cow<'client, reqwest::Client>,
46}
47
48#[derive(Debug, Clone, serde::Deserialize)]
50pub struct AuditsUrl {
51 #[serde(deserialize_with = "maybe_vec")]
52 pub url: Vec<String>,
53}
54
55#[derive(Debug, Clone, serde::Deserialize)]
57pub struct Registry {
58 pub registry: HashMap<String, AuditsUrl>,
59}
60
61#[derive(Debug, Clone, serde::Deserialize)]
63#[serde(untagged)]
64enum StrOrVec {
65 Vec(Vec<String>),
66 Str(String),
67}
68
69impl Default for StrOrVec {
70 fn default() -> Self {
71 Self::Vec(Vec::new())
72 }
73}
74
75impl AsRef<[String]> for StrOrVec {
76 #[inline]
77 fn as_ref(&self) -> &[String] {
78 match self {
79 Self::Str(s) => std::slice::from_ref(s),
80 Self::Vec(v) => v,
81 }
82 }
83}
84
85impl From<StrOrVec> for Vec<String> {
86 #[inline]
87 fn from(s: StrOrVec) -> Vec<String> {
88 match s {
89 StrOrVec::Str(s) => vec![s],
90 StrOrVec::Vec(v) => v,
91 }
92 }
93}
94
95impl AsMut<Vec<String>> for StrOrVec {
96 fn as_mut(&mut self) -> &mut Vec<String> {
97 match self {
98 Self::Str(_) => {
99 let val = std::mem::replace(self, StrOrVec::Str(String::new()));
100 *self = StrOrVec::Vec(vec![match val {
101 StrOrVec::Str(s) => s,
102 _ => unreachable!(),
103 }]);
104 match self {
105 StrOrVec::Vec(v) => v,
106 _ => unreachable!(),
107 }
108 },
109 Self::Vec(v) => v,
110 }
111 }
112}
113
114#[derive(Debug, Clone, serde::Deserialize)]
116#[serde(untagged)]
117pub enum StrOrNum {
118 Str(String),
119 Num(i64),
120}
121
122#[derive(Debug, Clone, Copy, PartialEq)]
125pub struct VetVersionRef<'a> {
126 pub version: &'a str,
127 pub git_rev: Option<&'a str>,
128}
129
130impl<'a> VetVersionRef<'a> {
132 fn new(s: &'a str) -> Self {
133 let (version, git_rev) = s.split_once("@git:").map(|(v, g)| (v, Some(g))).unwrap_or((s, None));
134 Self { version, git_rev }
135 }
136}
137
138impl fmt::Display for VetVersionRef<'_> {
139 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
140 f.write_str(self.version)?;
141 if let Some(rev) = self.git_rev {
142 f.write_str("@git:")?;
143 f.write_str(rev)?;
144 }
145 Ok(())
146 }
147}
148
149#[derive(Debug, Clone, serde::Deserialize)]
151#[serde(rename_all = "kebab-case")]
152pub struct Audit {
153 pub version: Option<String>,
155 pub delta: Option<String>,
161
162 pub violation: Option<VersionReq>,
164
165 #[serde(deserialize_with = "maybe_vec")]
167 pub criteria: Vec<String>,
168
169 #[serde(default, deserialize_with = "maybe_vec")]
173 pub who: Vec<String>,
174
175 pub notes: Option<String>,
177
178 #[serde(default, deserialize_with = "maybe_vec")]
180 pub aggregated_from: Vec<String>,
181}
182
183impl Audit {
184 fn review_key_without_criteria(&self, crate_name: &str) -> String {
186 let mut w = self.who.iter().map(|s| s.as_str()).collect::<Vec<_>>();
187 w.sort_unstable();
188 let mut k = w.join("|");
189 k.try_reserve(100 + crate_name.len()).unwrap();
190 k.push_str(crate_name);
191 k.push(';');
192 k.push_str(self.delta.as_deref().unwrap_or_default());
193 k.push_str(self.version.as_deref().unwrap_or_default());
194 k.push('!');
195 k.push_str(&self.violation.as_ref().map(|v| v.to_string()).unwrap_or_default());
196 k.push('+');
197 k.push_str(self.notes.as_deref().unwrap_or_default());
198 k
199 }
200
201 #[must_use]
203 pub fn delta(&self) -> Option<(VetVersionRef<'_>, VetVersionRef<'_>)> {
204 self.delta.as_deref().and_then(|d| {
205 let (from, to) = d.split_once("->")?;
206 Some((VetVersionRef::new(from.trim_end()), VetVersionRef::new(to.trim_start())))
207 })
208 }
209}
210
211#[derive(Debug, Clone, serde::Deserialize)]
213#[serde(rename_all = "kebab-case")]
214pub struct WildcardAudit {
215 #[serde(default, deserialize_with = "maybe_vec")]
216 pub who: Vec<String>,
217
218 #[serde(deserialize_with = "maybe_vec")]
220 pub criteria: Vec<String>,
221
222 pub user_id: Option<StrOrNum>,
224 pub start: String,
226 pub end: String,
228 #[serde(default)]
230 pub renew: bool,
231 pub notes: Option<String>,
232
233 #[serde(default, deserialize_with = "maybe_vec")]
235 pub aggregated_from: Vec<String>,
236}
237
238#[derive(Debug, Clone, serde::Deserialize)]
240#[serde(rename_all = "kebab-case")]
241pub struct Trusted {
242 #[serde(deserialize_with = "maybe_vec")]
243 pub criteria: Vec<String>,
244
245 pub user_id: StrOrNum,
247
248 pub start: String,
250 pub end: String,
252
253 pub notes: Option<String>,
254
255 #[doc(hidden)]
257 #[serde(default, deserialize_with = "maybe_vec")]
258 pub who: Vec<String>,
259
260 #[serde(default, deserialize_with = "maybe_vec")]
262 pub aggregated_from: Vec<String>,
263}
264
265#[derive(Debug, Clone, serde::Deserialize)]
267#[serde(rename_all = "kebab-case")]
268pub struct Criterion {
269 pub description: Option<String>,
270 pub description_url: Option<String>,
271 #[serde(default, deserialize_with = "maybe_vec")]
273 pub implies: Vec<String>,
274
275 #[serde(default, deserialize_with = "maybe_vec")]
277 pub aggregated_from: Vec<String>,
278}
279
280#[derive(Debug, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, serde::Deserialize)]
282#[serde(transparent)]
283#[repr(transparent)]
284pub struct CrateName(
285 #[serde(deserialize_with = "u")]
286 pub Ascii<String>
287);
288
289impl std::ops::Deref for CrateName {
290 type Target = str;
291 fn deref(&self) -> &str {
292 &self.0
293 }
294}
295
296impl<'a> std::borrow::Borrow<Ascii<String>> for CrateName {
297 fn borrow(&self) -> &Ascii<String> {
298 &self.0
299 }
300}
301
302#[derive(Debug, Clone, serde::Deserialize)]
304#[serde(rename_all = "kebab-case")]
305pub struct AuditsFile {
306 #[serde(default)]
308 pub criteria: HashMap<String, Criterion>,
309
310 #[serde(default)]
312 pub audits: HashMap<CrateName, Vec<Audit>>,
313
314 #[serde(default)]
316 pub wildcard_audits: HashMap<CrateName, Vec<WildcardAudit>>,
317
318 #[serde(default)]
320 pub trusted: HashMap<CrateName, Vec<Trusted>>,
321}
322
323fn u<'de, D>(deserializer: D) -> Result<Ascii<String>, D::Error>
324where
325 D: serde::Deserializer<'de>,
326{
327 let value: String = serde::Deserialize::deserialize(deserializer)?;
328 Ok(Ascii::new(value))
329}
330
331fn maybe_vec<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
332where
333 D: serde::Deserializer<'de>,
334{
335 let value: StrOrVec = serde::Deserialize::deserialize(deserializer)?;
336 Ok(match value {
337 StrOrVec::Str(s) => vec![s],
338 StrOrVec::Vec(v) => v,
339 })
340}
341
342#[derive(Debug, Clone)]
344pub struct AuditSource {
345 pub name: String,
347 pub url: String,
349 pub audits: AuditsFile,
351}
352
353impl AuditSource {
354 #[must_use]
355 pub fn view_url(&self) -> &str {
356 self.url.strip_suffix("?format=TEXT").unwrap_or(&self.url)
358 }
359
360 #[must_use] pub fn host(&self) -> &str {
361 self.url.strip_prefix("https://").unwrap_or(&self.url).split('/').next().unwrap()
362 }
363
364 #[must_use] pub fn github_repo(&self) -> Option<(&str, &str)> {
366 let mut parts = self.url
367 .strip_prefix("https://raw.githubusercontent.com/")?
368 .split('/');
369 Some((parts.next()?, parts.next()?))
370 }
371}
372
373#[derive(Debug, Clone)]
375pub struct Review<'a> {
376 pub crate_name: &'a str,
377 pub src: &'a AuditSource,
378 pub audit: AuditKind<'a>,
379}
380
381impl<'a> Review<'a> {
382 #[must_use]
384 pub fn criteria(&self) -> &[String] {
385 match self.audit {
386 AuditKind::Audit(a) => a.criteria.as_ref(),
387 AuditKind::WildcardAudit(a) => a.criteria.as_ref(),
388 AuditKind::Trusted(a) => a.criteria.as_ref(),
389 }
390 }
391
392 #[must_use]
394 pub fn aggregated_from(&self) -> &[String] {
395 match self.audit {
396 AuditKind::Audit(a) => a.aggregated_from.as_ref(),
397 AuditKind::WildcardAudit(a) => a.aggregated_from.as_ref(),
398 AuditKind::Trusted(a) => a.aggregated_from.as_ref(),
399 }
400 }
401
402 #[must_use]
404 pub fn aggregated_from_host(&self) -> Option<&str> {
405 self.aggregated_from().iter().find_map(|url| {
406 if let Some(gh) = url.strip_prefix("https://raw.githubusercontent.com/") {
407 if let Some((pos, _)) = gh.bytes().enumerate().filter(|&(_, c)| c == b'/').nth(1) {
409 return Some(&gh[..pos]);
410 }
411 }
412 let url = url.strip_prefix("https://").unwrap_or(url);
413 let url = url.split('/').next()?;
414
415 let known_host = match self.src.github_repo() {
416 Some(("google", _)) => Some(".googlesource.com"),
417 Some(("mozilla", _)) => Some(".mozilla.org"),
418 _ => None,
419 };
420 if let Some(h) = known_host {
421 return Some(url.strip_suffix(h).unwrap_or(url));
422 }
423 Some(url)
424 })
425 }
426
427 #[must_use]
429 pub fn who(&self) -> &[String] {
430 match self.audit {
431 AuditKind::Audit(a) => a.who.as_ref(),
432 AuditKind::WildcardAudit(a) => a.who.as_ref(),
433 AuditKind::Trusted(a) => a.who.as_ref(),
434 }
435 }
436
437 #[must_use]
439 pub fn delta(&self) -> Option<(VetVersionRef<'_>, VetVersionRef<'_>)> {
440 if let AuditKind::Audit(a) = self.audit {
441 a.delta()
442 } else { None }
443 }
444
445 #[must_use]
447 pub fn start(&self) -> Option<&str> {
448 match self.audit {
449 AuditKind::Audit(_) => None,
450 AuditKind::WildcardAudit(a) => Some(a.start.as_str()),
451 AuditKind::Trusted(a) => Some(a.start.as_str()),
452 }
453 }
454
455 #[must_use]
457 pub fn end(&self) -> Option<&str> {
458 match self.audit {
459 AuditKind::Audit(_) => None,
460 AuditKind::WildcardAudit(a) => Some(a.end.as_str()),
461 AuditKind::Trusted(a) => Some(a.end.as_str()),
462 }
463 }
464
465 pub fn version(&self) -> Option<VetVersionRef<'_>> {
467 match self.audit {
468 AuditKind::Audit(a) => {
469 a.version.as_deref().map(VetVersionRef::new).or_else(|| a.delta().map(|d| d.1))
470 },
471 AuditKind::WildcardAudit(_) |
472 AuditKind::Trusted(_) => None,
473 }
474 }
475
476 #[must_use]
477 pub fn notes(&self) -> Option<&str> {
478 match self.audit {
479 AuditKind::Audit(a) => a.notes.as_deref(),
480 AuditKind::WildcardAudit(a) => a.notes.as_deref(),
481 AuditKind::Trusted(a) => a.notes.as_deref(),
482 }
483 }
484
485 #[must_use]
487 pub fn violation(&self) -> Option<&VersionReq> {
488 if let AuditKind::Audit(a) = self.audit {
489 a.violation.as_ref()
490 } else { None }
491 }
492
493 #[must_use] pub fn is_wildcard_trust(&self) -> bool {
494 matches!(self.audit, AuditKind::WildcardAudit(_) | AuditKind::Trusted(_))
495 }
496
497 #[must_use]
498 pub fn audit_type_label(&self) -> &str {
499 match self.audit {
500 AuditKind::Audit(a) => if a.violation.is_some() { "Rejected" } else { "Audited" },
501 AuditKind::WildcardAudit(_) => "Self-approved",
502 AuditKind::Trusted(_) => "Trusted without checking",
503 }
504 }
505}
506
507#[derive(Debug, Clone)]
509pub enum AuditKind<'a> {
510 Audit(&'a Audit),
512 WildcardAudit(&'a WildcardAudit),
514 Trusted(&'a Trusted),
516}
517
518#[derive(Debug, Clone)]
520pub struct AuditSources {
521 pub all: Vec<AuditSource>,
522}
523
524impl AuditSources {
525 #[must_use]
527 pub fn for_crate(&self, name: &str) -> Vec<Review<'_>> {
528 let mut out = vec![];
529 let name = &Ascii::new(name.to_string());
530
531 for src in &self.all {
532 if let Some((cname, audits)) = src.audits.audits.get_key_value(name) {
533 for audit in audits {
534 out.push(Review {
535 crate_name: cname.0.as_str(),
536 src,
537 audit: AuditKind::Audit(audit),
538 });
539 }
540 }
541 if let Some((cname, waudits)) = src.audits.wildcard_audits.get_key_value(name) {
542 for audit in waudits {
543 out.push(Review {
544 crate_name: cname.0.as_str(),
545 src,
546 audit: AuditKind::WildcardAudit(audit),
547 });
548 }
549 }
550 }
551
552 if out.is_empty() {
554 for src in &self.all {
555 if let Some((cname, taudits)) = src.audits.trusted.get_key_value(name) {
556 for audit in taudits {
557 out.push(Review {
558 crate_name: cname.0.as_str(),
559 src,
560 audit: AuditKind::Trusted(audit),
561 });
562 }
563 }
564 }
565 }
566 out
567 }
568
569 pub fn normalized(mut self, resolve_crates_io_id: &(dyn Fn(i64) -> Option<String> + Send + Sync)) -> Self {
576 for s in &mut self.all {
577 s.audits.criteria.retain(|k, _| k != "safe-to-deploy" && k != "safe-to-run");
579
580 for c in &mut s.audits.criteria.values_mut() {
581 if let (Some(url), None) = (&c.description_url, &c.description) {
583 if url.starts_with("https://") {
584 c.description = Some(format!("[See URL]({url})"));
585 }
586 }
587 }
588
589 for t in s.audits.trusted.values_mut().flatten() {
591 if let StrOrNum::Num(id) = t.user_id {
592 if let Some(s) = resolve_crates_io_id(id) {
593 t.user_id = StrOrNum::Str(s);
594 }
595 }
596 }
597 for t in &mut s.audits.wildcard_audits.values_mut().flatten() {
598 if let Some(StrOrNum::Num(id)) = t.user_id {
599 if let Some(s) = resolve_crates_io_id(id) {
600 t.user_id = Some(StrOrNum::Str(s));
601 }
602 }
603 }
604
605 for (CrateName(crate_name), audits) in &mut s.audits.audits.iter_mut() {
606 let mut tmp = HashMap::<String, &mut Audit>::default();
607 tmp.reserve(audits.len());
608
609 for a in &mut *audits {
611 match tmp.entry(a.review_key_without_criteria(crate_name)) {
613 Entry::Occupied(mut dupe) => {
614 let dupe = &mut dupe.get_mut().criteria;
615 if dupe.len() + a.criteria.len() > 20 { continue; }
617 for c in a.criteria.drain(..) {
618 if !dupe.contains(&c) {
619 dupe.push(c);
620 }
621 }
622 },
623 Entry::Vacant(e) => {
624 e.insert(a);
625 },
626 }
627 }
628
629 audits.retain(|a| !a.criteria.is_empty());
631
632 let mut has_full_reviews_up_to = audits.iter()
635 .filter(|a| a.delta.is_none() && a.criteria.iter().any(|c| c == "safe-to-deploy"))
636 .filter(|a| a.violation.is_none() && a.delta.is_none() && a.criteria.iter().any(|c| c == "safe-to-deploy"))
638 .filter_map(|a| SemVer::parse(a.version.as_deref()?).ok())
639 .filter(|v| v.pre.is_empty())
640 .max();
641
642 audits.retain_mut(|a| {
644 if let Some((f, t)) = a.delta() {
645 let Ok(f_semver) = SemVer::parse(f.version) else {
646 return false;
647 };
648 let Ok(t_semver) = SemVer::parse(t.version) else {
649 return false;
650 };
651 if t_semver < f_semver {
652 return false;
655 }
656
657 if let Some(max) = &has_full_reviews_up_to {
659 let suitable = f_semver.pre.is_empty() && t_semver.pre.is_empty() && a.violation.is_none();
661 let from_overlaps = f_semver < *max || (f_semver == *max && f.git_rev.is_none()); if suitable && from_overlaps && t_semver > *max && a.criteria.iter().any(|c| c == "safe-to-deploy") {
663 debug!("found base for {f} -> {t} in {max} in {crate_name}@{}", s.name);
664 a.version = Some(t.to_string());
665 a.delta = None;
666 has_full_reviews_up_to = Some(t_semver);
667 }
668 }
669 }
670 true
671 });
672 }
673 }
674 self
675 }
676}
677
678impl MiniVet<'static> {
679 #[must_use]
680 pub fn new() -> Self {
681 Self::new_with_client(Cow::Owned(reqwest::Client::builder()
682 .user_agent("lib.rs/mini-vet")
683 .connect_timeout(Duration::from_secs(4))
684 .timeout(Duration::from_secs(10))
685 .build().unwrap()))
686 }
687}
688
689impl<'client> MiniVet<'client> {
690 #[must_use]
692 pub fn new_with_client(client: Cow<'client, reqwest::Client>) -> Self {
693 Self { client }
694 }
695
696 async fn fetch_text(&self, url: &str) -> Result<String, Error> {
697 Ok(Box::pin(async move {
698 self.client.get(url).send().await?.error_for_status()?.text().await
699 }).await?)
700 }
701
702 async fn fetch_audits_from_url(&self, url: &str) -> Result<AuditsFile, Error> {
703 debug!("fetching vet audit from {url}");
704
705 let data = self.fetch_text(url).await?;
706 Ok(toml::from_str(&data).or_else(|e| {
707 if let Some(debased) = base64::decode(&data).ok().and_then(|d| String::from_utf8(d).ok()) {
708 log::warn!("worked around google vcs mystery base64 for {url}");
709 toml::from_str(&debased)
710 } else {
711 Err(e)
712 }
713 }).map_err(|e| {
714 log::error!("parse error {e} of {url}, data: {:#?}", toml::from_str::<toml::Value>(&data));
715 e
716 })?)
717 }
718
719 pub async fn fetch_registry_from_url(&self, url: &str) -> Result<AuditSources, Error> {
724 debug!("fetching vet registry from {url}");
725 let reg: Registry = {
726 let data = self.fetch_text(url).await?;
727 toml::from_str(&data)?
728 };
729 self.fetch_registry(reg).await
730 }
731
732 pub async fn fetch_registry(&self, reg: Registry) -> Result<AuditSources, Error> {
736 let all = join_all(reg.registry.into_iter().flat_map(|(name, r)| r.url.into_iter().map(move |url| { let name = name.clone(); async move {
737 let audits = self.fetch_audits_from_url(&url).await?;
738 Ok::<_, Error>(AuditSource {
739 name,
740 url,
741 audits,
742 })
743 }}))).await.into_iter().collect::<Result<Vec<_>, _>>()?;
744 Ok(AuditSources {
745 all,
746 })
747 }
748}
749
750#[cfg(test)]
751#[tokio::test(flavor = "multi_thread")]
752async fn vetload() {
753 let _ = env_logger::try_init();
754 use std::error::Error;
755 fn is_send<T: Send>(v: T) -> T {v}
756 let v = is_send(MiniVet::new().fetch_registry_from_url("https://raw.githubusercontent.com/bholley/cargo-vet/main/registry.toml")).await.map_err(|e| {
757 log::error!("{:?}", e.source()); e
758 }).unwrap();
759 assert!(v.all.len() > 4);
760 assert!(v.for_crate("Cc").len() >= 5);
761}