use crate::sources::CrateProvenance;
use crate::{RustdocData, sources::RustdocVersion};
use anyhow::{Context, Result, anyhow};
use fieldwork::Fieldwork;
use rustdoc_types::FORMAT_VERSION;
use semver::{Version, VersionReq};
use serde::Deserialize;
use trillium_client::{Client, Status};
use trillium_rustls::RustlsConfig;
use trillium_smol::ClientConfig;
use std::path::PathBuf;
#[derive(Deserialize)]
struct CratesIoResponse {
#[serde(rename = "crate")]
krate: CrateMetadata,
versions: Vec<CrateVersion>,
}
#[derive(Deserialize, Debug)]
struct CrateMetadata {
pub(super) name: String,
pub(super) default_version: Version,
pub(super) description: String,
}
#[derive(Deserialize, Debug)]
struct CrateVersion {
pub(super) num: Version,
}
const MIN_FORMAT_VERSION: u32 = 55;
#[derive(Debug, Fieldwork)]
pub struct DocsRsClient {
http_client: Client,
#[field(get)]
cache_dir: PathBuf,
format_version: u32,
}
#[derive(Debug)]
pub(super) struct ResolvedMetadata {
pub(super) name: String,
pub(super) version: Version,
pub(super) description: String,
}
impl DocsRsClient {
pub fn new(cache_dir: PathBuf) -> Result<Self> {
let http_client = Client::new(RustlsConfig::<ClientConfig>::default()).with_default_pool();
Ok(Self {
http_client,
cache_dir,
format_version: FORMAT_VERSION,
})
}
pub(super) async fn resolve(
&self,
crate_name: &str,
version_req: &VersionReq,
) -> Result<Option<ResolvedMetadata>> {
let Some((
CrateMetadata {
name,
default_version,
description,
},
versions,
)) = self
.metadata(crate_name, version_req != &VersionReq::STAR)
.await?
else {
return Ok(None);
};
let version = if version_req.matches(&default_version) {
Some(default_version)
} else {
versions
.into_iter()
.filter(|version| version_req.matches(version))
.max()
};
Ok(version.map(|version| ResolvedMetadata {
name,
version,
description,
}))
}
pub async fn get_crate(
&self,
crate_name: &str,
version: &Version,
) -> Result<Option<RustdocData>> {
log::debug!("DocsRsClient::get_crate('{}', {:?})", crate_name, version);
if let Some(cached) = self.load_from_cache(crate_name, version).await? {
return Ok(Some(cached));
}
let mut bytes = None;
for format_ver in (MIN_FORMAT_VERSION..=self.format_version).rev() {
log::debug!(
"Trying to fetch {} version {} with format {}",
crate_name,
version,
format_ver
);
if let Some(fetched) = self
.fetch_from_docsrs(crate_name, version, format_ver)
.await?
{
bytes = Some(fetched);
break;
}
}
let Some(bytes) = bytes else {
return Ok(None);
};
let json = self.decompress_zstd(&bytes)?;
let RustdocVersion {
format_version,
crate_version,
} = sonic_rs::serde::from_slice(&json).context("Failed to parse JSON metadata")?;
let Some(crate_version) = crate_version else {
return Ok(None);
};
log::info!("Fetched crate {crate_name}@{crate_version}, format version {format_version}");
let fs_path = self
.save_to_cache(crate_name, &crate_version, format_version, &json)
.await?;
let crate_data = crate::conversions::load_and_normalize(&json, Some(format_version))
.context("Failed to normalize rustdoc JSON")?;
let data = RustdocData {
crate_data,
name: crate_name.to_string(),
provenance: CrateProvenance::DocsRs,
fs_path,
version: Some(crate_version),
path_to_id: Default::default(),
};
Ok(Some(data))
}
async fn metadata(
&self,
crate_name: &str,
include_versions: bool,
) -> Result<Option<(CrateMetadata, Vec<Version>)>> {
let include = if include_versions {
"versions"
} else {
"default_version"
};
let url = format!("https://crates.io/api/v1/crates/{crate_name}?include={include}");
log::debug!("Resolving latest version from crates.io: {}", &url);
let conn = self.http_client.get(url).await?;
if let Some(Status::NotFound) = conn.status() {
return Ok(None);
}
let mut conn = conn
.success()
.map_err(|e| anyhow!("Failed to query crates.io: {}", e))?;
let bytes = conn
.response_body()
.read_bytes()
.await
.context("Failed to read crates.io response")?;
let CratesIoResponse { krate, versions } =
sonic_rs::serde::from_slice(&bytes).context("Failed to parse crates.io response")?;
Ok(Some((krate, versions.into_iter().map(|v| v.num).collect())))
}
fn cache_path(
&self,
crate_name: &str,
version: &Version,
source_format_version: u32,
) -> PathBuf {
self.cache_dir
.join(source_format_version.to_string())
.join(crate_name)
.join(format!("{version}.json"))
}
async fn load_from_cache(
&self,
crate_name: &str,
version: &Version,
) -> Result<Option<RustdocData>> {
for source_format in (MIN_FORMAT_VERSION..=self.format_version).rev() {
let path = self.cache_path(crate_name, version, source_format);
if !path.exists() {
continue;
}
log::info!(
"Found cached file with format version {}: {}",
source_format,
path.display()
);
let start = std::time::Instant::now();
let json = async_fs::read(&path)
.await
.context("Failed to read cached file")?;
let read_elapsed = start.elapsed();
log::debug!(
"⏱️ Read {} ({:.2} MB) in {:?}",
crate_name,
json.len() as f64 / 1_000_000.0,
read_elapsed
);
let start = std::time::Instant::now();
let crate_data = crate::conversions::load_and_normalize(&json, Some(source_format))
.context("Failed to normalize cached JSON")?;
let parse_elapsed = start.elapsed();
log::debug!("⏱️ Parsed {} in {:?}", crate_name, parse_elapsed);
let version = crate_data
.crate_version
.as_ref()
.and_then(|v| Version::parse(v).ok());
let data = RustdocData {
crate_data,
name: crate_name.to_string(),
provenance: CrateProvenance::LocalDependency,
fs_path: path,
version,
path_to_id: Default::default(),
};
return Ok(Some(data));
}
Ok(None)
}
async fn fetch_from_docsrs(
&self,
crate_name: &str,
version: &Version,
format_version: u32,
) -> Result<Option<Vec<u8>>> {
let url = format!("https://docs.rs/crate/{crate_name}/{version}/json/{format_version}");
log::debug!("Fetching from docs.rs: {}", url);
let mut conn = self.http_client.get(url).await?;
if let Some(Status::NotFound) = conn.status() {
return Ok(None);
}
if let Some(status) = conn.status()
&& status.is_redirection()
&& let Some(location) = conn.response_headers().get("location")
{
let location_str = location.to_string();
let redirect_url = if location_str.starts_with("http") {
location_str
} else {
format!("https://docs.rs{}", location_str)
};
log::debug!("Following redirect to: {}", redirect_url);
conn = self.http_client.get(redirect_url).await?;
}
let mut conn = conn
.success()
.map_err(|e| anyhow!("HTTP request failed: {}", e))?;
let bytes = conn
.response_body()
.read_bytes()
.await
.context("Failed to read response body")?;
Ok(Some(bytes))
}
fn decompress_zstd(&self, compressed: &[u8]) -> Result<Vec<u8>> {
zstd::decode_all(compressed).context("Failed to decompress zstd data")
}
async fn save_to_cache(
&self,
crate_name: &str,
version: &Version,
format_version: u32,
json: &[u8],
) -> Result<PathBuf> {
let path = self.cache_path(crate_name, version, format_version);
if let Some(parent) = path.parent() {
async_fs::create_dir_all(parent)
.await
.context("Failed to create cache directory")?;
}
async_fs::write(&path, json)
.await
.context("Failed to write cache file")?;
log::debug!(
"Cached to {} (format version {})",
path.display(),
format_version
);
Ok(path)
}
}