use std::borrow::Cow;
use std::collections::BTreeSet;
use std::sync::Arc;
use ahash::{HashMap, HashSet, HashSetExt};
use elsa::FrozenMap;
use guppy::PackageId;
use guppy::graph::PackageGraph;
use rustdoc_types::Item;
use tracing::Span;
use tracing_log_error::log_error;
use crate::TOOLCHAIN_CRATES;
use crate::cache::{CacheEntry, HydratedCacheEntry, RustdocCacheKey, RustdocGlobalFsCache};
use crate::compute::{CannotGetCrateData, ComputeProgress, compute_crate_docs};
use crate::indexing::CrateIndexer;
use crate::queries::Crate;
use rustdoc_ext::GlobalItemId;
use rayon::iter::IntoParallelRefIterator;
pub struct CrateCollection<I: CrateIndexer> {
package_graph: PackageGraph,
indexer: I,
package_id2krate: FrozenMap<PackageId, Box<Crate>>,
annotated_items: FrozenMap<PackageId, Box<I::Annotations>>,
disk_cache: RustdocGlobalFsCache<I::Annotations>,
project_fingerprint: String,
access_log: FrozenMap<PackageId, Box<()>>,
toolchain_name: String,
progress: Box<dyn ComputeProgress + Send + Sync>,
}
impl<I: CrateIndexer> std::fmt::Debug for CrateCollection<I> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("CrateCollection")
.field("package_graph", &self.package_graph)
.finish_non_exhaustive()
}
}
impl<I: CrateIndexer> CrateCollection<I> {
pub fn new(
indexer: I,
toolchain_name: String,
package_graph: PackageGraph,
project_fingerprint: String,
disk_cache: RustdocGlobalFsCache<I::Annotations>,
progress: Box<dyn ComputeProgress + Send + Sync>,
) -> Self {
Self {
indexer,
package_id2krate: FrozenMap::new(),
annotated_items: FrozenMap::new(),
package_graph,
disk_cache,
project_fingerprint,
access_log: FrozenMap::new(),
toolchain_name,
progress,
}
}
pub fn package_graph(&self) -> &PackageGraph {
&self.package_graph
}
#[tracing::instrument(skip_all, level = "trace")]
pub fn bootstrap<Iter>(&self, extra_package_ids: Iter) -> Result<(), anyhow::Error>
where
Iter: Iterator<Item = PackageId>,
{
let package_ids = self
.disk_cache
.get_access_log(&self.project_fingerprint)
.unwrap_or_else(|e| {
log_error!(
*e,
level: tracing::Level::WARN,
"Failed to retrieve the crate access log from the on-disk cache"
);
BTreeSet::new()
});
let package_ids = package_ids
.into_iter()
.chain(extra_package_ids)
.filter(|id| self.package_graph.metadata(id).is_ok())
.chain(TOOLCHAIN_CRATES.iter().map(|s| PackageId::new(*s)))
.collect::<BTreeSet<_>>();
self.compute_batch(package_ids.into_iter())
}
fn process_cache_entry(
&self,
entry: HydratedCacheEntry<I::Annotations>,
package_id: PackageId,
) -> (Crate, I::Annotations) {
match entry {
HydratedCacheEntry::Processed(processed) => processed.into_crate(),
HydratedCacheEntry::Raw(crate_data) => {
let result = self.indexer.index(crate_data, package_id);
(result.krate, result.annotations)
}
}
}
#[tracing::instrument(skip_all, level = "trace")]
pub fn compute_batch<Iter>(&self, package_ids: Iter) -> Result<(), anyhow::Error>
where
Iter: Iterator<Item = PackageId>,
{
let missing_ids = package_ids
.filter(|package_id| self.get_crate_by_package_id(package_id).is_none())
.collect::<BTreeSet<_>>();
let package_graph = &self.package_graph;
let cache = &self.disk_cache;
let tracing_span = Span::current();
let map_op = move |id: PackageId| {
tracing_span.in_scope(|| {
let cache_key = RustdocCacheKey::new(&id, package_graph);
match cache.get(&cache_key, package_graph) {
Ok(None) => (id, None),
Ok(Some(entry)) => (id, Some(entry)),
Err(e) => {
log_error!(
*e,
level: tracing::Level::WARN,
package_id = id.repr(),
"Failed to retrieve the documentation from the on-disk cache",
);
(id, None)
}
}
})
};
let mut to_be_computed = vec![];
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
for (package_id, cached) in missing_ids.into_par_iter().map(map_op).collect::<Vec<_>>() {
if let Some(entry) = cached {
let (krate, annotations) = self.process_cache_entry(entry, package_id.clone());
self.annotated_items
.insert(package_id.clone(), Box::new(annotations));
self.package_id2krate.insert(package_id, Box::new(krate));
continue;
}
to_be_computed.push(package_id);
}
let results = compute_crate_docs(
&self.toolchain_name,
&self.package_graph,
to_be_computed.into_iter(),
self.package_graph.workspace().root().as_std_path(),
self.progress.as_ref(),
)?;
let indexer = &self.indexer;
let package_graph = self.package_graph();
let indexed_krates = results
.into_par_iter()
.map(move |(package_id, krate)| {
let result = indexer.index_raw(krate, package_id.to_owned());
(
package_id,
result.krate,
result.annotations,
result.can_cache_indexes,
)
})
.collect::<Vec<_>>();
let mut cache_entries: HashMap<_, _> = indexed_krates
.par_iter()
.filter_map(|(package_id, krate, annotations, cache_indexes)| {
let data = if *cache_indexes {
CacheEntry::from_crate(krate, annotations)
} else {
CacheEntry::from_crate_raw(krate)
};
let cache_key = RustdocCacheKey::new(package_id, package_graph);
match data {
Ok(v) => Some((package_id, (cache_key, v))),
Err(e) => {
log_error!(
*e,
level: tracing::Level::WARN,
package_id = package_id.repr(),
"Failed to convert the computed JSON docs into the format used by the on-disk cache",
);
None
}
}
})
.collect();
let mut to_be_inserted = HashSet::with_capacity(indexed_krates.len());
for (package_id, _, _, _) in &indexed_krates {
let Some((cache_key, cache_data)) = cache_entries.remove(&package_id) else {
continue;
};
if let Err(e) = self
.disk_cache
.insert(&cache_key, cache_data, package_graph)
{
log_error!(
*e,
level: tracing::Level::WARN,
package_id = package_id.repr(),
"Failed to store the computed JSON docs in the on-disk cache",
);
}
to_be_inserted.insert(package_id.to_owned());
}
for (package_id, krate, annotations, _) in indexed_krates {
if to_be_inserted.contains(&package_id) {
self.annotated_items
.insert(package_id.clone(), Box::new(annotations));
self.package_id2krate.insert(package_id, Box::new(krate));
}
}
Ok(())
}
pub fn get_or_compute(&self, package_id: &PackageId) -> Result<&Crate, CannotGetCrateData> {
self.access_log.insert(package_id.to_owned(), Box::new(()));
if let Some(krate) = self.get_crate_by_package_id(package_id) {
return Ok(krate);
}
let cache_key = RustdocCacheKey::new(package_id, &self.package_graph);
match self.disk_cache.get(&cache_key, &self.package_graph) {
Ok(Some(entry)) => {
let (krate, annotations) = self.process_cache_entry(entry, package_id.clone());
self.annotated_items
.insert(package_id.to_owned(), Box::new(annotations));
self.package_id2krate
.insert(package_id.to_owned(), Box::new(krate));
return Ok(self.get_crate_by_package_id(package_id).unwrap());
}
Err(e) => {
log_error!(*e, level: tracing::Level::WARN, package_id = package_id.repr(), "Failed to retrieve the documentation from the on-disk cache");
}
Ok(None) => {}
}
let krate = compute_crate_docs(
&self.toolchain_name,
&self.package_graph,
std::iter::once(package_id.to_owned()),
self.package_graph.workspace().root().as_std_path(),
self.progress.as_ref(),
)
.map_err(|e| CannotGetCrateData {
package_spec: package_id.to_string(),
source: Arc::new(e),
})?
.remove(package_id)
.unwrap();
let result = self.indexer.index_raw(krate, package_id.to_owned());
let cache_entry_data = if result.can_cache_indexes {
CacheEntry::from_crate(&result.krate, &result.annotations)
} else {
CacheEntry::from_crate_raw(&result.krate)
};
match cache_entry_data {
Ok(cache_entry) => {
if let Err(e) = self
.disk_cache
.insert(&cache_key, cache_entry, &self.package_graph)
{
log_error!(
*e,
level: tracing::Level::WARN,
package_id = package_id.repr(),
"Failed to store the computed JSON docs in the on-disk cache",
);
}
}
Err(e) => {
log_error!(
*e,
level: tracing::Level::WARN,
package_id = package_id.repr(),
"Failed to convert the computed JSON docs into the format used by the on-disk cache",
);
}
}
self.annotated_items
.insert(package_id.to_owned(), Box::new(result.annotations));
self.package_id2krate
.insert(package_id.to_owned(), Box::new(result.krate));
Ok(self.get_crate_by_package_id(package_id).unwrap())
}
pub fn get_crate_by_package_id(&self, package_id: &PackageId) -> Option<&Crate> {
self.package_id2krate.get(package_id)
}
pub fn get_annotated_items(&self, package_id: &PackageId) -> Option<&I::Annotations> {
self.annotated_items.get(package_id)
}
pub fn get_item_by_global_type_id(&self, type_id: &GlobalItemId) -> Cow<'_, Item> {
let krate = self.get_or_compute(&type_id.package_id).unwrap();
krate.get_item_by_local_type_id(&type_id.rustdoc_item_id)
}
pub fn get_canonical_path_by_global_type_id(
&self,
type_id: &GlobalItemId,
) -> Result<&[String], anyhow::Error> {
let krate = self.get_or_compute(&type_id.package_id).unwrap();
krate.get_canonical_path(type_id)
}
pub fn get_canonical_path_by_local_type_id(
&self,
used_by_package_id: &PackageId,
item_id: &rustdoc_types::Id,
re_exporter_crate_name: Option<&str>,
) -> Result<(GlobalItemId, &[String]), anyhow::Error> {
let (definition_package_id, path) = {
let used_by_krate = self.get_or_compute(used_by_package_id)?;
let local_type_summary = used_by_krate.get_summary_by_local_type_id(item_id)?;
(
used_by_krate.compute_package_id_for_crate_id_with_hint(
local_type_summary.crate_id,
self,
if local_type_summary.path.first().map(|s| s.as_str()) != re_exporter_crate_name
{
re_exporter_crate_name
} else {
None
},
)?,
local_type_summary.path.clone(),
)
};
let definition_krate = self.get_or_compute(&definition_package_id)?;
let type_id = definition_krate.get_item_id_by_path(&path, self)??;
let canonical_path = self.get_canonical_path_by_global_type_id(&type_id)?;
Ok((type_id.clone(), canonical_path))
}
}
impl<I: CrateIndexer> rustdoc_ir::CanonicalPathResolver for CrateCollection<I> {
fn canonical_path(&self, id: &GlobalItemId) -> Option<Vec<String>> {
self.get_canonical_path_by_global_type_id(id)
.ok()
.map(<[String]>::to_vec)
}
}
impl<I: CrateIndexer> Drop for CrateCollection<I> {
fn drop(&mut self) {
let access_log = std::mem::take(&mut self.access_log);
let package_ids = access_log.into_map().into_keys().collect();
if let Err(e) = self
.disk_cache
.persist_access_log(&package_ids, &self.project_fingerprint)
{
log_error!(
*e,
level: tracing::Level::WARN,
"Failed to persist the crate access log to the on-disk cache",
);
}
}
}