pub(crate) mod checksum;
pub mod entry;
mod third_party;
mod toolchain;
pub mod utils;
pub use entry::{CacheEntry, SecondaryIndexes};
use serde::de::DeserializeOwned;
pub use utils::RkyvCowBytes;
use std::collections::BTreeSet;
use std::marker::PhantomData;
use anyhow::Context;
use guppy::PackageId;
use guppy::graph::{PackageGraph, PackageMetadata};
use itertools::Itertools;
use r2d2_sqlite::SqliteConnectionManager;
use rusqlite::params;
use crate::TOOLCHAIN_CRATES;
use crate::crate_data::CrateData;
use crate::indexing::{ExternalReExports, ImportIndex, ImportPath2Id};
use third_party::ThirdPartyCrateCache;
use toolchain::ToolchainCache;
pub(crate) static BINCODE_CONFIG: bincode::config::Configuration = bincode::config::standard();
#[derive(Debug)]
pub struct RustdocGlobalFsCache<A> {
cargo_fingerprint: String,
third_party_cache: ThirdPartyCrateCache,
toolchain_cache: ToolchainCache,
connection_pool: r2d2::Pool<SqliteConnectionManager>,
_annotation: PhantomData<A>,
}
impl<A> Clone for RustdocGlobalFsCache<A> {
fn clone(&self) -> Self {
Self {
cargo_fingerprint: self.cargo_fingerprint.clone(),
third_party_cache: self.third_party_cache.clone(),
toolchain_cache: self.toolchain_cache.clone(),
connection_pool: self.connection_pool.clone(),
_annotation: PhantomData,
}
}
}
pub enum RustdocCacheKey<'a> {
ThirdPartyCrate(PackageMetadata<'a>),
ToolchainCrate(&'a str),
}
impl std::fmt::Debug for RustdocCacheKey<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RustdocCacheKey::ThirdPartyCrate(metadata) => f
.debug_struct("ThirdPartyCrate")
.field("id", &metadata.id())
.field("name", &metadata.name())
.field("version", &metadata.version())
.finish(),
RustdocCacheKey::ToolchainCrate(name) => f
.debug_struct("ToolchainCrate")
.field("name", name)
.finish(),
}
}
}
pub enum HydratedCacheEntry<A> {
Raw(CrateData),
Processed(ProcessedCacheEntry<A>),
}
pub struct ProcessedCacheEntry<A> {
pub package_id: PackageId,
pub crate_data: CrateData,
pub import_path2id: ImportPath2Id,
pub import_index: ImportIndex,
pub external_re_exports: ExternalReExports,
pub annotated_items: A,
}
impl<A> ProcessedCacheEntry<A> {
pub fn into_crate(self) -> (crate::queries::Crate, A) {
let krate = crate::queries::Crate::new(
crate::queries::CrateCore {
package_id: self.package_id,
krate: self.crate_data,
},
self.import_path2id,
self.external_re_exports,
self.import_index,
);
(krate, self.annotated_items)
}
}
impl<'a> RustdocCacheKey<'a> {
pub fn new(package_id: &'a PackageId, package_graph: &'a PackageGraph) -> RustdocCacheKey<'a> {
if TOOLCHAIN_CRATES.contains(&package_id.repr()) {
RustdocCacheKey::ToolchainCrate(package_id.repr())
} else {
RustdocCacheKey::ThirdPartyCrate(package_graph.metadata(package_id).unwrap())
}
}
}
impl<A: DeserializeOwned + Default> RustdocGlobalFsCache<A> {
#[tracing::instrument(name = "Initialize on-disk rustdoc cache", skip_all)]
pub fn new(
cache_fingerprint: &str,
toolchain_name: &str,
cache_workspace_package_docs: bool,
package_graph: &PackageGraph,
cache_dir: &std::path::Path,
) -> Result<Self, anyhow::Error> {
std::thread::scope(|scope| {
let handle = scope.spawn(|| cargo_fingerprint(toolchain_name));
let pool = Self::setup_database(cache_fingerprint, cache_dir)?;
let connection = pool.get()?;
let third_party_cache = ThirdPartyCrateCache::new(
&connection,
cache_workspace_package_docs,
package_graph,
)?;
let toolchain_cache = ToolchainCache::new(&connection)?;
let cargo_fingerprint = handle
.join()
.expect("Failed to compute on `cargo`'s fingerprint")?;
Ok(Self {
cargo_fingerprint,
connection_pool: pool,
third_party_cache,
toolchain_cache,
_annotation: PhantomData,
})
})
}
pub fn get(
&self,
cache_key: &RustdocCacheKey,
package_graph: &PackageGraph,
) -> Result<Option<HydratedCacheEntry<A>>, anyhow::Error> {
let connection = self.connection_pool.get()?;
match cache_key {
RustdocCacheKey::ThirdPartyCrate(metadata) => self.third_party_cache.get::<A>(
metadata,
&self.cargo_fingerprint,
&connection,
package_graph,
),
RustdocCacheKey::ToolchainCrate(name) => {
self.toolchain_cache
.get::<A>(name, &self.cargo_fingerprint, &connection)
}
}
}
}
impl<A> RustdocGlobalFsCache<A> {
pub fn insert(
&self,
cache_key: &RustdocCacheKey,
cache_entry: CacheEntry,
package_graph: &PackageGraph,
) -> Result<(), anyhow::Error> {
let connection = self.connection_pool.get()?;
match cache_key {
RustdocCacheKey::ThirdPartyCrate(metadata) => {
let Some(key) = self.third_party_cache.cache_key(
metadata,
&self.cargo_fingerprint,
package_graph,
) else {
return Ok(());
};
self.third_party_cache.insert(key, &connection, cache_entry)
}
RustdocCacheKey::ToolchainCrate(name) => {
self.toolchain_cache
.insert(name, cache_entry, &self.cargo_fingerprint, &connection)
}
}
}
#[tracing::instrument(skip_all, level = "trace")]
pub fn persist_access_log(
&self,
package_ids: &BTreeSet<PackageId>,
project_fingerprint: &str,
) -> Result<(), anyhow::Error> {
let connection = self.connection_pool.get()?;
let mut stmt = connection.prepare_cached(
"INSERT INTO project2package_id_access_log (
project_fingerprint,
package_ids
) VALUES (?, ?)
ON CONFLICT(project_fingerprint) DO UPDATE SET package_ids=excluded.package_ids;
",
)?;
stmt.execute(params![
project_fingerprint,
bincode::encode_to_vec(
package_ids.iter().map(|s| s.repr()).collect_vec(),
BINCODE_CONFIG
)?
])?;
Ok(())
}
#[tracing::instrument(skip_all, level = "trace")]
pub fn get_access_log(
&self,
project_fingerprint: &str,
) -> Result<BTreeSet<PackageId>, anyhow::Error> {
let connection = self.connection_pool.get()?;
let mut stmt = connection.prepare_cached(
"SELECT package_ids FROM project2package_id_access_log WHERE project_fingerprint = ?",
)?;
let mut rows = stmt.query(params![project_fingerprint])?;
let Some(row) = rows.next()? else {
return Ok(BTreeSet::new());
};
let package_ids: Vec<&str> =
bincode::borrow_decode_from_slice(row.get_ref_unwrap(0).as_bytes()?, BINCODE_CONFIG)?.0;
Ok(package_ids.into_iter().map(PackageId::new).collect())
}
fn setup_database(
cache_fingerprint: &str,
cache_dir: &std::path::Path,
) -> Result<r2d2::Pool<SqliteConnectionManager>, anyhow::Error> {
fs_err::create_dir_all(cache_dir).with_context(|| {
format!(
"Failed to create the cache directory at {}",
cache_dir.to_string_lossy()
)
})?;
let cache_path = cache_dir.join(format!("{cache_fingerprint}.db"));
#[derive(Debug)]
struct SqlitePragmas;
impl r2d2::CustomizeConnection<rusqlite::Connection, rusqlite::Error> for SqlitePragmas {
fn on_acquire(&self, conn: &mut rusqlite::Connection) -> Result<(), rusqlite::Error> {
conn.execute_batch(
"PRAGMA mmap_size=262144000;",
)?;
Ok(())
}
}
let manager = SqliteConnectionManager::file(cache_path);
let pool = r2d2::Pool::builder()
.max_size(num_cpus::get() as u32)
.connection_customizer(Box::new(SqlitePragmas))
.build(manager)
.context("Failed to open/create a SQLite database to store the rustdoc cache")?;
let connection = pool.get()?;
connection.execute_batch(
"PRAGMA journal_mode=WAL;
PRAGMA synchronous=NORMAL;",
)?;
connection.execute(
"CREATE TABLE IF NOT EXISTS project2package_id_access_log (
project_fingerprint TEXT NOT NULL,
package_ids BLOB NOT NULL,
PRIMARY KEY (project_fingerprint)
)",
[],
)?;
Ok(pool)
}
}
pub(crate) fn cargo_fingerprint(toolchain_name: &str) -> Result<String, anyhow::Error> {
let err_msg = || {
format!(
"Failed to run `cargo --verbose --version` on `{toolchain_name}`.\n\
Is the `{toolchain_name}` toolchain installed?\n\
If not, invoke\n
rustup toolchain install {toolchain_name} -c rust-docs-json
to fix it.",
)
};
let mut cmd = std::process::Command::new("rustup");
cmd.arg("run")
.arg(toolchain_name)
.arg("cargo")
.arg("--verbose")
.arg("--version");
let output = cmd.output().with_context(err_msg)?;
if !output.status.success() {
anyhow::bail!(err_msg());
}
let output = String::from_utf8(output.stdout).with_context(|| {
format!("An invocation of `cargo --verbose --version` for the `{toolchain_name}` toolchain returned non-UTF8 data as output.")
})?;
Ok(output)
}