rustsec_admin/
synchronizer.rs

1//! RustSec Advisory DB Synchronizer
2//!
3//! Update the RustSec advisories from external sources.
4//! We use the OSV format as input, as it is the interoperable standard.
5//!
6//! ## GitHub Advisory Database
7//!
8//! Our unique source of external information is the [GitHub Advisory Database](https://github.com/advisories).
9//! Their Rust vulnerabilities have various possible origins:
10//!
11//! * Reported directly to GitHub using their build-in security advisories feature
12//! * imported from a CVE, using metadata from [NVD](https://nvd.nist.gov/vuln)
13//! * imported from RustSec.
14//!   When importing a RustSec inventory, they assign it a GHSA and CVE IDs.
15//!
16//! The data from this database allows us to:
17//!
18//! * Find advisories missing in RustSec
19//!   * We want to manually review those before importing them, to ensure
20//!     the content match our standards and processes.
21//! * Add GHSA and CVE aliases to our vulnerabilities.
22//!   CVE are specially important
23//!   as they are the most use ID for vulnerabilities.
24//! * Add missing metadata to our advisories
25//!
26//! GitHub exposes a GraphQL API, but we chose to use their OSV export as a source.
27//!
28//! ## osv.dev
29//!
30//! osv.dev imports from both GitHub Security Advisories and RustSec,
31//! and exposes its advisories through both an HTTP API and ZIP files.
32//!
33//!
34//! Workflow:
35//!    
36//! ```text                                                     
37//!          ┌───────────────────────────────────┐
38//!          │                                   │
39//!     ┌────┴────┐         ┌─────────┐        ┌─▼────┐
40//!     │ RustSec │─────────▶ OSV.dev ◀────────│ GHSA │
41//!     └────▲────┘         └────┬────┘        └──────┘
42//!          │                   │
43//!          └───────────────────┘
44//! ```
45//!
46//! We use the ZIP file export as a source as we need all advisories at once.
47//!
48//! The file containing crates.io vulnerabilities is available with:
49//!
50//! ```shell
51//! gsutil cp gs://osv-vulnerabilities/crates.io/all.zip .
52//! # or
53//! curl -o advisories.zip https://osv-vulnerabilities.storage.googleapis.com/crates.io/all.zip
54//! ```
55//!
56//! ## Sync process
57//!
58//! ### Get aliases for advisories imported from RustSec
59//!
60//! We can detect advisories imported from RustSec quite reliabilly by looking for a reference to the
61//! advisory file in the `advisory-db` repository.
62//! In this case, we can also check if there is only one RustSec advisory to make sure
63//! it is really an alias.
64//!
65//! Then we can add the GHSA id and the CVE id as aliases in the RustSec advisory.
66//!
67//! ## List missing advisories
68//!
69//! When an advisory contains no reference to an existing RustSec advisory, it is likely
70//! missing.
71
72use crate::{
73    error::{Error, ErrorKind},
74    lock::acquire_cargo_package_lock,
75    prelude::*,
76};
77use rustsec::advisory::{Id, IdKind, Parts};
78use rustsec::osv::OsvAdvisory;
79use rustsec::{Advisory, Collection};
80use std::fs::read_to_string;
81use std::iter::FromIterator;
82use std::{
83    fs, iter,
84    path::{Path, PathBuf},
85};
86use tame_index::{index::RemoteGitIndex, KrateName};
87use toml_edit::{value, Document};
88
89/// Advisory synchronizer
90#[allow(dead_code)]
91pub struct Synchronizer {
92    /// Path to the advisory database
93    repo_path: PathBuf,
94
95    /// Loaded crates.io index
96    crates_index: RemoteGitIndex,
97
98    /// Loaded Advisory DB
99    advisory_db: rustsec::Database,
100
101    /// OSV advisories to synchronize from
102    osv: Vec<OsvAdvisory>,
103
104    /// Number of updated advisories
105    updated_advisories: usize,
106
107    /// Missing advisories
108    missing_advisories: Vec<OsvAdvisory>,
109}
110
111impl Synchronizer {
112    /// Create a new synchronizer for the database at the given path
113    pub fn new(repo_path: impl Into<PathBuf>, osv_path: impl Into<PathBuf>) -> Result<Self, Error> {
114        let repo_path = repo_path.into();
115        let cargo_package_lock = acquire_cargo_package_lock()?;
116        let mut crates_index = RemoteGitIndex::new(
117            tame_index::GitIndex::new(tame_index::IndexLocation::new(
118                tame_index::IndexUrl::CratesIoGit,
119            ))?,
120            &cargo_package_lock,
121        )?;
122        crates_index.fetch(&cargo_package_lock)?;
123        let advisory_db = rustsec::Database::open(&repo_path)?;
124
125        let osv = Self::load_osv_export(&osv_path.into())?;
126        status_info!(
127            "Info",
128            "Loaded {} advisories from {}",
129            osv.len(),
130            repo_path.display()
131        );
132
133        Ok(Self {
134            repo_path,
135            crates_index,
136            advisory_db,
137            osv,
138            updated_advisories: 0,
139            missing_advisories: vec![],
140        })
141    }
142
143    /// Borrow the loaded advisory database
144    pub fn advisory_db(&self) -> &rustsec::Database {
145        &self.advisory_db
146    }
147
148    /// Synchronize data
149    pub fn sync(&mut self) -> Result<(usize, Vec<OsvAdvisory>), Error> {
150        // A single OSV advisory could describe a vulnerability affecting several crates
151        // (even if GitHub does not produce such advisories currently).
152        // Additionally, a single RustSec advisory can cover several OSV advisories
153        // depending on the way it was reported.
154        // Therefore, we make as few assumptions as possible here.
155        for osv in self.osv.clone() {
156            if osv.withdrawn() {
157                // Ignore withdrawn advisories from the start
158                continue;
159            }
160
161            // The list of RustSec ids referenced by this OSV advisory,
162            // generally one for a GHSA created from RustSec.
163            // When imported, they can be considered actual aliases.
164            let rustsec_ids_in_osv = osv.rustsec_refs_imported();
165            // The list of crates affected by the advisory, normally one
166            // for a GHSA created from RustSec.
167            let affected_crates = osv.crates();
168
169            // The list of RustSec advisories already having this advisory id as alias
170            let rustsec_ids_alias: Vec<Id> = self
171                .advisory_db
172                .iter()
173                .filter_map(|a| {
174                    if a.metadata.aliases.contains(osv.id()) {
175                        Some(a.id().clone())
176                    } else {
177                        None
178                    }
179                })
180                .collect();
181
182            // Build the full list of rs aliases
183            let mut rs_aliases = rustsec_ids_in_osv.clone();
184            rs_aliases.extend(rustsec_ids_alias.clone());
185            rs_aliases.sort();
186            rs_aliases.dedup();
187
188            // This advisory does not link to RustSec (i.e., was not imported)
189            // and is not aliased from RustSec. Let's consider importing it.
190            if rs_aliases.is_empty() {
191                for c in affected_crates {
192                    let crate_name: KrateName = match c.as_str().try_into() {
193                        Ok(k) => k,
194                        Err(_e) => {
195                            status_info!(
196                                "Info",
197                                "Crate name {} in {} advisory is invalid, skipping",
198                                c,
199                                osv.id(),
200                            );
201                            continue;
202                        }
203                    };
204
205                    if let Ok(Some(_)) = self.crates_index.krate(
206                        crate_name,
207                        true,
208                        &acquire_cargo_package_lock().unwrap(),
209                    ) {
210                        self.missing_advisories.push(osv.clone());
211                    } else {
212                        status_info!(
213                            "Info",
214                            "Unknown crate {} in {} advisory, skipping",
215                            c,
216                            osv.id()
217                        );
218                        continue;
219                    }
220                }
221            } else {
222                // Update advisories from known links
223                for rs_id in rs_aliases {
224                    // ensure all these advisories have up-to-date aliases
225                    // missing alias to GHSA
226                    let rs_advisory = self
227                        .advisory_db
228                        .get(&rs_id)
229                        .expect("Referenced advisory not in rustsec")
230                        .clone();
231
232                    // ensure the crate name matches
233                    if !affected_crates
234                        .iter()
235                        .any(|c| c == rs_advisory.metadata.package.as_str())
236                    {
237                        status_info!(
238                            "Info",
239                            "Crate names {:?} in {} advisory not matching existing advisory {}, skipping",
240                            affected_crates,
241                            osv.id(),
242                            rs_advisory.id()
243                        );
244                        continue;
245                    }
246
247                    self.update_advisory_from_alias(&rs_advisory, &osv)?;
248                }
249            }
250        }
251        Ok((self.updated_advisories, self.missing_advisories.clone()))
252    }
253
254    /// Add missing data to advisory from an external source
255    ///
256    /// For now, only add missing aliases.
257    fn update_advisory_from_alias(
258        &mut self,
259        advisory: &Advisory,
260        external: &OsvAdvisory,
261    ) -> Result<(), Error> {
262        let mut missing_aliases = vec![];
263        let missing_related = vec![];
264        for external_id in external.aliases().iter().chain(iter::once(external.id())) {
265            // Heuristic based on advisory kind
266            match external_id.kind() {
267                IdKind::Cve | IdKind::Ghsa => {
268                    if external_id != advisory.id()
269                        && !advisory.metadata.aliases.contains(external_id)
270                    {
271                        missing_aliases.push(external_id.clone());
272                        status_info!(
273                            "Info",
274                            "Adding missing alias {} for {}",
275                            external_id,
276                            advisory.id()
277                        );
278                    }
279                }
280                _ => continue,
281            }
282        }
283        if !missing_aliases.is_empty() || !missing_related.is_empty() {
284            self.update_aliases(
285                &self
286                    .repo_path
287                    .join(Collection::Crates.to_string())
288                    .join(advisory.metadata.package.as_str())
289                    .join(format!("{}.md", advisory.id())),
290                &missing_aliases,
291                &missing_related,
292            )?;
293        }
294        Ok(())
295    }
296
297    /// Edit advisory file to extend aliases field
298    fn update_aliases(
299        &mut self,
300        advisory_path: &Path,
301        missing_aliases: &[Id],
302        missing_related: &[Id],
303    ) -> Result<(), Error> {
304        let content = read_to_string(advisory_path)?;
305        // First extract toml and markdown content
306        // We can't parse as Advisory as we want to preserve formatting
307        let parts = Parts::parse(&content)?;
308        // Parse toml
309        let mut metadata = parts
310            .front_matter
311            .parse::<Document>()
312            .expect("invalid TOML front matter");
313
314        // Aliases
315        let mut aliases: Vec<String> = metadata["advisory"]
316            .get("aliases")
317            .map(|i| {
318                i.as_array()
319                    .unwrap()
320                    .into_iter()
321                    .map(|v| v.as_str().unwrap().to_string())
322                    .collect()
323            })
324            .unwrap_or_else(Vec::new);
325        aliases.extend(missing_aliases.iter().map(|a| a.to_string()));
326        aliases.sort();
327        aliases.dedup();
328        if !aliases.is_empty() {
329            metadata["advisory"]["aliases"] = value(toml_edit::Array::from_iter(aliases.iter()));
330        }
331
332        // Related
333        // FIXME: dedup implementation
334        let mut related: Vec<String> = metadata["advisory"]
335            .get("related")
336            .map(|i| {
337                i.as_array()
338                    .unwrap()
339                    .into_iter()
340                    .map(|v| v.as_str().unwrap().to_string())
341                    .collect()
342            })
343            .unwrap_or_else(Vec::new);
344        related.extend(missing_related.iter().map(|a| a.to_string()));
345        related.sort();
346        related.dedup();
347        if !related.is_empty() {
348            metadata["advisory"]["related"] = value(toml_edit::Array::from_iter(related.iter()));
349        }
350
351        let updated = format!("```toml\n{}```\n\n{}", metadata, parts.markdown);
352        fs::write(advisory_path, updated)?;
353        status_info!("Info", "Written {}", advisory_path.display());
354        self.updated_advisories += 1;
355        Ok(())
356    }
357
358    /// Load an OSV advisory from a JSON file
359    fn load_osv_file(path: impl AsRef<Path>) -> Result<OsvAdvisory, Error> {
360        let path = path.as_ref();
361
362        let advisory_data = read_to_string(path)
363            .map_err(|e| format_err!(ErrorKind::Io, "couldn't open {}: {}", path.display(), e))?;
364
365        let advisory: OsvAdvisory = serde_json::from_str(&advisory_data).map_err(|e| {
366            format_err!(ErrorKind::Parse, "error parsing {}: {}", path.display(), e)
367        })?;
368
369        Ok(advisory)
370    }
371
372    /// Load data from an OSV export
373    fn load_osv_export(path: &Path) -> Result<Vec<OsvAdvisory>, Error> {
374        let mut result = vec![];
375        for advisory_entry in fs::read_dir(path).unwrap() {
376            let advisory_path = advisory_entry.unwrap().path();
377            if advisory_path.extension() != Some("json".as_ref()) {
378                // Skip non-JSON files
379                continue;
380            }
381            if advisory_path.to_string_lossy().contains("RUSTSEC-") {
382                // Don't parse advisories already coming from RustSec
383                continue;
384            }
385            let advisory = Self::load_osv_file(advisory_path)?;
386            result.push(advisory)
387        }
388        Ok(result)
389    }
390}