rustsec_admin/synchronizer.rs
1//! RustSec Advisory DB Synchronizer
2//!
3//! Update the RustSec advisories from external sources.
4//! We use the OSV format as input, as it is the interoperable standard.
5//!
6//! ## GitHub Advisory Database
7//!
8//! Our unique source of external information is the [GitHub Advisory Database](https://github.com/advisories).
9//! Their Rust vulnerabilities have various possible origins:
10//!
11//! * Reported directly to GitHub using their build-in security advisories feature
12//! * imported from a CVE, using metadata from [NVD](https://nvd.nist.gov/vuln)
13//! * imported from RustSec.
14//! When importing a RustSec inventory, they assign it a GHSA and CVE IDs.
15//!
16//! The data from this database allows us to:
17//!
18//! * Find advisories missing in RustSec
19//! * We want to manually review those before importing them, to ensure
20//! the content match our standards and processes.
21//! * Add GHSA and CVE aliases to our vulnerabilities.
22//! CVE are specially important
23//! as they are the most use ID for vulnerabilities.
24//! * Add missing metadata to our advisories
25//!
26//! GitHub exposes a GraphQL API, but we chose to use their OSV export as a source.
27//!
28//! ## osv.dev
29//!
30//! osv.dev imports from both GitHub Security Advisories and RustSec,
31//! and exposes its advisories through both an HTTP API and ZIP files.
32//!
33//!
34//! Workflow:
35//!
36//! ```text
37//! ┌───────────────────────────────────┐
38//! │ │
39//! ┌────┴────┐ ┌─────────┐ ┌─▼────┐
40//! │ RustSec │─────────▶ OSV.dev ◀────────│ GHSA │
41//! └────▲────┘ └────┬────┘ └──────┘
42//! │ │
43//! └───────────────────┘
44//! ```
45//!
46//! We use the ZIP file export as a source as we need all advisories at once.
47//!
48//! The file containing crates.io vulnerabilities is available with:
49//!
50//! ```shell
51//! gsutil cp gs://osv-vulnerabilities/crates.io/all.zip .
52//! # or
53//! curl -o advisories.zip https://osv-vulnerabilities.storage.googleapis.com/crates.io/all.zip
54//! ```
55//!
56//! ## Sync process
57//!
58//! ### Get aliases for advisories imported from RustSec
59//!
60//! We can detect advisories imported from RustSec quite reliabilly by looking for a reference to the
61//! advisory file in the `advisory-db` repository.
62//! In this case, we can also check if there is only one RustSec advisory to make sure
63//! it is really an alias.
64//!
65//! Then we can add the GHSA id and the CVE id as aliases in the RustSec advisory.
66//!
67//! ## List missing advisories
68//!
69//! When an advisory contains no reference to an existing RustSec advisory, it is likely
70//! missing.
71
72use crate::{
73 error::{Error, ErrorKind},
74 lock::acquire_cargo_package_lock,
75 prelude::*,
76};
77use rustsec::advisory::{Id, IdKind, Parts};
78use rustsec::osv::OsvAdvisory;
79use rustsec::{Advisory, Collection};
80use std::fs::read_to_string;
81use std::iter::FromIterator;
82use std::{
83 fs, iter,
84 path::{Path, PathBuf},
85};
86use tame_index::{index::RemoteGitIndex, KrateName};
87use toml_edit::{value, Document};
88
89/// Advisory synchronizer
90#[allow(dead_code)]
91pub struct Synchronizer {
92 /// Path to the advisory database
93 repo_path: PathBuf,
94
95 /// Loaded crates.io index
96 crates_index: RemoteGitIndex,
97
98 /// Loaded Advisory DB
99 advisory_db: rustsec::Database,
100
101 /// OSV advisories to synchronize from
102 osv: Vec<OsvAdvisory>,
103
104 /// Number of updated advisories
105 updated_advisories: usize,
106
107 /// Missing advisories
108 missing_advisories: Vec<OsvAdvisory>,
109}
110
111impl Synchronizer {
112 /// Create a new synchronizer for the database at the given path
113 pub fn new(repo_path: impl Into<PathBuf>, osv_path: impl Into<PathBuf>) -> Result<Self, Error> {
114 let repo_path = repo_path.into();
115 let cargo_package_lock = acquire_cargo_package_lock()?;
116 let mut crates_index = RemoteGitIndex::new(
117 tame_index::GitIndex::new(tame_index::IndexLocation::new(
118 tame_index::IndexUrl::CratesIoGit,
119 ))?,
120 &cargo_package_lock,
121 )?;
122 crates_index.fetch(&cargo_package_lock)?;
123 let advisory_db = rustsec::Database::open(&repo_path)?;
124
125 let osv = Self::load_osv_export(&osv_path.into())?;
126 status_info!(
127 "Info",
128 "Loaded {} advisories from {}",
129 osv.len(),
130 repo_path.display()
131 );
132
133 Ok(Self {
134 repo_path,
135 crates_index,
136 advisory_db,
137 osv,
138 updated_advisories: 0,
139 missing_advisories: vec![],
140 })
141 }
142
143 /// Borrow the loaded advisory database
144 pub fn advisory_db(&self) -> &rustsec::Database {
145 &self.advisory_db
146 }
147
148 /// Synchronize data
149 pub fn sync(&mut self) -> Result<(usize, Vec<OsvAdvisory>), Error> {
150 // A single OSV advisory could describe a vulnerability affecting several crates
151 // (even if GitHub does not produce such advisories currently).
152 // Additionally, a single RustSec advisory can cover several OSV advisories
153 // depending on the way it was reported.
154 // Therefore, we make as few assumptions as possible here.
155 for osv in self.osv.clone() {
156 if osv.withdrawn() {
157 // Ignore withdrawn advisories from the start
158 continue;
159 }
160
161 // The list of RustSec ids referenced by this OSV advisory,
162 // generally one for a GHSA created from RustSec.
163 // When imported, they can be considered actual aliases.
164 let rustsec_ids_in_osv = osv.rustsec_refs_imported();
165 // The list of crates affected by the advisory, normally one
166 // for a GHSA created from RustSec.
167 let affected_crates = osv.crates();
168
169 // The list of RustSec advisories already having this advisory id as alias
170 let rustsec_ids_alias: Vec<Id> = self
171 .advisory_db
172 .iter()
173 .filter_map(|a| {
174 if a.metadata.aliases.contains(osv.id()) {
175 Some(a.id().clone())
176 } else {
177 None
178 }
179 })
180 .collect();
181
182 // Build the full list of rs aliases
183 let mut rs_aliases = rustsec_ids_in_osv.clone();
184 rs_aliases.extend(rustsec_ids_alias.clone());
185 rs_aliases.sort();
186 rs_aliases.dedup();
187
188 // This advisory does not link to RustSec (i.e., was not imported)
189 // and is not aliased from RustSec. Let's consider importing it.
190 if rs_aliases.is_empty() {
191 for c in affected_crates {
192 let crate_name: KrateName = match c.as_str().try_into() {
193 Ok(k) => k,
194 Err(_e) => {
195 status_info!(
196 "Info",
197 "Crate name {} in {} advisory is invalid, skipping",
198 c,
199 osv.id(),
200 );
201 continue;
202 }
203 };
204
205 if let Ok(Some(_)) = self.crates_index.krate(
206 crate_name,
207 true,
208 &acquire_cargo_package_lock().unwrap(),
209 ) {
210 self.missing_advisories.push(osv.clone());
211 } else {
212 status_info!(
213 "Info",
214 "Unknown crate {} in {} advisory, skipping",
215 c,
216 osv.id()
217 );
218 continue;
219 }
220 }
221 } else {
222 // Update advisories from known links
223 for rs_id in rs_aliases {
224 // ensure all these advisories have up-to-date aliases
225 // missing alias to GHSA
226 let rs_advisory = self
227 .advisory_db
228 .get(&rs_id)
229 .expect("Referenced advisory not in rustsec")
230 .clone();
231
232 // ensure the crate name matches
233 if !affected_crates
234 .iter()
235 .any(|c| c == rs_advisory.metadata.package.as_str())
236 {
237 status_info!(
238 "Info",
239 "Crate names {:?} in {} advisory not matching existing advisory {}, skipping",
240 affected_crates,
241 osv.id(),
242 rs_advisory.id()
243 );
244 continue;
245 }
246
247 self.update_advisory_from_alias(&rs_advisory, &osv)?;
248 }
249 }
250 }
251 Ok((self.updated_advisories, self.missing_advisories.clone()))
252 }
253
254 /// Add missing data to advisory from an external source
255 ///
256 /// For now, only add missing aliases.
257 fn update_advisory_from_alias(
258 &mut self,
259 advisory: &Advisory,
260 external: &OsvAdvisory,
261 ) -> Result<(), Error> {
262 let mut missing_aliases = vec![];
263 let missing_related = vec![];
264 for external_id in external.aliases().iter().chain(iter::once(external.id())) {
265 // Heuristic based on advisory kind
266 match external_id.kind() {
267 IdKind::Cve | IdKind::Ghsa => {
268 if external_id != advisory.id()
269 && !advisory.metadata.aliases.contains(external_id)
270 {
271 missing_aliases.push(external_id.clone());
272 status_info!(
273 "Info",
274 "Adding missing alias {} for {}",
275 external_id,
276 advisory.id()
277 );
278 }
279 }
280 _ => continue,
281 }
282 }
283 if !missing_aliases.is_empty() || !missing_related.is_empty() {
284 self.update_aliases(
285 &self
286 .repo_path
287 .join(Collection::Crates.to_string())
288 .join(advisory.metadata.package.as_str())
289 .join(format!("{}.md", advisory.id())),
290 &missing_aliases,
291 &missing_related,
292 )?;
293 }
294 Ok(())
295 }
296
297 /// Edit advisory file to extend aliases field
298 fn update_aliases(
299 &mut self,
300 advisory_path: &Path,
301 missing_aliases: &[Id],
302 missing_related: &[Id],
303 ) -> Result<(), Error> {
304 let content = read_to_string(advisory_path)?;
305 // First extract toml and markdown content
306 // We can't parse as Advisory as we want to preserve formatting
307 let parts = Parts::parse(&content)?;
308 // Parse toml
309 let mut metadata = parts
310 .front_matter
311 .parse::<Document>()
312 .expect("invalid TOML front matter");
313
314 // Aliases
315 let mut aliases: Vec<String> = metadata["advisory"]
316 .get("aliases")
317 .map(|i| {
318 i.as_array()
319 .unwrap()
320 .into_iter()
321 .map(|v| v.as_str().unwrap().to_string())
322 .collect()
323 })
324 .unwrap_or_else(Vec::new);
325 aliases.extend(missing_aliases.iter().map(|a| a.to_string()));
326 aliases.sort();
327 aliases.dedup();
328 if !aliases.is_empty() {
329 metadata["advisory"]["aliases"] = value(toml_edit::Array::from_iter(aliases.iter()));
330 }
331
332 // Related
333 // FIXME: dedup implementation
334 let mut related: Vec<String> = metadata["advisory"]
335 .get("related")
336 .map(|i| {
337 i.as_array()
338 .unwrap()
339 .into_iter()
340 .map(|v| v.as_str().unwrap().to_string())
341 .collect()
342 })
343 .unwrap_or_else(Vec::new);
344 related.extend(missing_related.iter().map(|a| a.to_string()));
345 related.sort();
346 related.dedup();
347 if !related.is_empty() {
348 metadata["advisory"]["related"] = value(toml_edit::Array::from_iter(related.iter()));
349 }
350
351 let updated = format!("```toml\n{}```\n\n{}", metadata, parts.markdown);
352 fs::write(advisory_path, updated)?;
353 status_info!("Info", "Written {}", advisory_path.display());
354 self.updated_advisories += 1;
355 Ok(())
356 }
357
358 /// Load an OSV advisory from a JSON file
359 fn load_osv_file(path: impl AsRef<Path>) -> Result<OsvAdvisory, Error> {
360 let path = path.as_ref();
361
362 let advisory_data = read_to_string(path)
363 .map_err(|e| format_err!(ErrorKind::Io, "couldn't open {}: {}", path.display(), e))?;
364
365 let advisory: OsvAdvisory = serde_json::from_str(&advisory_data).map_err(|e| {
366 format_err!(ErrorKind::Parse, "error parsing {}: {}", path.display(), e)
367 })?;
368
369 Ok(advisory)
370 }
371
372 /// Load data from an OSV export
373 fn load_osv_export(path: &Path) -> Result<Vec<OsvAdvisory>, Error> {
374 let mut result = vec![];
375 for advisory_entry in fs::read_dir(path).unwrap() {
376 let advisory_path = advisory_entry.unwrap().path();
377 if advisory_path.extension() != Some("json".as_ref()) {
378 // Skip non-JSON files
379 continue;
380 }
381 if advisory_path.to_string_lossy().contains("RUSTSEC-") {
382 // Don't parse advisories already coming from RustSec
383 continue;
384 }
385 let advisory = Self::load_osv_file(advisory_path)?;
386 result.push(advisory)
387 }
388 Ok(result)
389 }
390}