use super::provider::LOG_TARGET;
use super::{DocMetricState, DocsData, DocsMetrics};
use crate::Result;
use crate::facts::CrateSpec;
use chrono::{DateTime, Utc};
use ohno::{IntoAppError, app_err};
use regex::Regex;
use std::collections::HashMap;
use std::io::Read;
use std::sync::LazyLock;
static INTRA_DOC_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[`([^`\]]+)`\]").expect("invalid regex"));
static CODE_BLOCK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"```[\s\S]*?```").expect("invalid regex"));
static LINK_REFERENCE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[`([^`\]]+)`\]:\s*(\S+)").expect("invalid regex"));
macro_rules! generate_version_support {
($version:literal, $module:ident) => {
pastey::paste! {
#[doc = $version]
fn [<calculate_metrics_v $version>](json_value: serde_json::Value, crate_spec: &CrateSpec) -> Result<DocsMetrics> {
use $module as rustdoc_types;
log::debug!(target: LOG_TARGET, "Parsing rustdoc JSON v{} for {crate_spec}", $version);
let krate: rustdoc_types::Crate = serde_json::from_value(json_value)
.into_app_err_with(|| format!("could not parse rustdoc JSON v{} structure for {crate_spec}", $version))?;
let index_len = krate.index.len();
log::debug!(target: LOG_TARGET, "Successfully parsed rustdoc JSON v{} for {crate_spec}, found {index_len} items in index", $version);
log::debug!(target: LOG_TARGET, "Root item ID for {crate_spec}: {:?}", krate.root);
Ok(process_crate_items(
&krate.index,
&krate.root,
crate_spec,
|item| matches!(item.visibility, rustdoc_types::Visibility::Public),
|item| matches!(item.inner, rustdoc_types::ItemEnum::Use(_)),
))
}
}
impl ItemLike for $module::Item {
type Id = $module::Id;
fn name(&self) -> Option<&str> {
self.name.as_deref()
}
fn docs(&self) -> Option<&str> {
self.docs.as_deref()
}
fn links(&self) -> &HashMap<String, Self::Id> {
&self.links
}
}
};
}
generate_version_support!("50", rustdoc_types_v50);
generate_version_support!("51", rustdoc_types_v51);
generate_version_support!("52", rustdoc_types_v52);
generate_version_support!("53", rustdoc_types_v53);
generate_version_support!("54", rustdoc_types_v54);
generate_version_support!("55", rustdoc_types_v55);
generate_version_support!("56", rustdoc_types_v56);
generate_version_support!("57", rustdoc_types_v57);
pub fn calculate_docs_metrics(reader: impl Read, crate_spec: &CrateSpec, now: DateTime<Utc>) -> Result<DocsData> {
log::debug!(target: LOG_TARGET, "Parsing rustdoc JSON for {crate_spec}");
let json_value: serde_json::Value =
serde_json::from_reader(reader).into_app_err_with(|| format!("could not parse JSON for {crate_spec}"))?;
let format_version = json_value
.get("format_version")
.and_then(serde_json::Value::as_u64)
.ok_or_else(|| app_err!("rustdoc JSON for {crate_spec} is missing 'format_version' field"))?;
log::debug!(target: LOG_TARGET, "Found rustdoc JSON format version {format_version} for {crate_spec}");
let state = match format_version {
50 => calculate_metrics_v50(json_value, crate_spec).map(DocMetricState::Found)?,
51 => calculate_metrics_v51(json_value, crate_spec).map(DocMetricState::Found)?,
52 => calculate_metrics_v52(json_value, crate_spec).map(DocMetricState::Found)?,
53 => calculate_metrics_v53(json_value, crate_spec).map(DocMetricState::Found)?,
54 => calculate_metrics_v54(json_value, crate_spec).map(DocMetricState::Found)?,
55 => calculate_metrics_v55(json_value, crate_spec).map(DocMetricState::Found)?,
56 => calculate_metrics_v56(json_value, crate_spec).map(DocMetricState::Found)?,
57 => calculate_metrics_v57(json_value, crate_spec).map(DocMetricState::Found)?,
_ => {
log::debug!(target: LOG_TARGET, "Unsupported rustdoc JSON format version {format_version} for {crate_spec}");
DocMetricState::UnknownFormatVersion(format_version)
}
};
Ok(DocsData {
timestamp: now,
metrics: state,
})
}
fn process_crate_items<Id, Item>(
index: &HashMap<Id, Item>,
root_id: &Id,
crate_spec: &CrateSpec,
is_public: impl Fn(&Item) -> bool,
is_use_item: impl Fn(&Item) -> bool,
) -> DocsMetrics
where
Id: core::fmt::Debug + Eq + core::hash::Hash,
Item: ItemLike,
{
let mut number_of_public_api_elements = 0;
let mut documented_count = 0;
let mut number_of_examples_in_docs = 0;
let mut has_crate_level_docs = false;
let mut broken_doc_links = 0;
let mut private_items = 0;
let mut use_items = 0;
let index_len = index.len();
log::debug!(target: LOG_TARGET, "Starting to iterate through {index_len} items for {crate_spec}");
for (id, item) in index {
if !is_public(item) {
private_items += 1;
continue;
}
if is_use_item(item) {
use_items += 1;
continue;
}
number_of_public_api_elements += 1;
if let Some(docs) = item.docs()
&& !docs.trim().is_empty()
{
documented_count += 1;
let fences = docs.lines().filter(|line| line.trim_start().starts_with("```")).count();
let examples = fences / 2; number_of_examples_in_docs += examples;
let broken = count_broken_links::<Item::Id>(docs, item.links(), item.name());
broken_doc_links += broken;
if let Some(name) = item.name()
&& name == crate_spec.name()
&& id == root_id
{
log::debug!(target: LOG_TARGET, "Found crate-level docs for {crate_spec} (root item name matches)");
has_crate_level_docs = true;
}
}
}
log::debug!(target: LOG_TARGET, "Iteration complete for {crate_spec}: processed {index_len} items (private={private_items}, use_items={use_items}, public_api={number_of_public_api_elements})");
log::debug!(target: LOG_TARGET, "Finished processing items for {crate_spec}: public_api={number_of_public_api_elements}, documented={documented_count}, examples={number_of_examples_in_docs}, broken_links={broken_doc_links}, has_crate_docs={has_crate_level_docs}");
#[expect(clippy::cast_precision_loss, reason = "loss of precision acceptable for percentage calculation")]
let doc_coverage_percentage = if number_of_public_api_elements > 0 {
documented_count as f64 / number_of_public_api_elements as f64 * 100.0
} else {
100.0
};
log::debug!(target: LOG_TARGET, "Calculated coverage percentage for {crate_spec}: {doc_coverage_percentage}%");
let metrics = DocsMetrics {
doc_coverage_percentage,
public_api_elements: number_of_public_api_elements,
undocumented_elements: number_of_public_api_elements - documented_count,
examples_in_docs: number_of_examples_in_docs as u64,
has_crate_level_docs,
broken_doc_links,
};
log::debug!(target: LOG_TARGET, "Returning DocsMetrics for {crate_spec}: {metrics:?}");
metrics
}
fn count_broken_links<Id>(docs: &str, resolved_links: &HashMap<String, Id>, _item_name: Option<&str>) -> u64 {
let mut broken_count = 0;
let mut skipped_inline = 0;
let mut skipped_external = 0;
let mut skipped_short = 0;
let mut skipped_resolved = 0;
log::trace!(target: LOG_TARGET, "Checking for broken links. Docs length: {} chars, resolved_links count: {}", docs.len(), resolved_links.len());
let docs_without_code_blocks = CODE_BLOCK_REGEX.replace_all(docs, "");
let docs_to_check = docs_without_code_blocks.as_ref();
let mut link_references = HashMap::new();
for cap in LINK_REFERENCE_REGEX.captures_iter(docs_to_check) {
if let (Some(link_text), Some(target)) = (cap.get(1), cap.get(2)) {
let _ = link_references.insert(link_text.as_str(), target.as_str());
log::trace!(target: LOG_TARGET, "Found link reference: [`{}`] -> {}", link_text.as_str(), target.as_str());
}
}
for cap in INTRA_DOC_LINK_REGEX.captures_iter(docs_to_check) {
if let Some(link_text) = cap.get(1) {
let text = link_text.as_str();
let match_end = cap.get(0).expect("match exists").end();
if docs_to_check.get(match_end..).is_some_and(|s| s.starts_with('(')) {
skipped_inline += 1;
log::trace!(target: LOG_TARGET, "Skipping inline link: [`{text}`](...)");
continue;
}
let inline_target = (|| {
let remainder = docs_to_check.get(match_end..)?.strip_prefix('[')?;
let end_pos = remainder.find(']')?;
remainder.get(..end_pos)
})();
if text.contains("://") {
skipped_external += 1;
log::trace!(target: LOG_TARGET, "Skipping external link: [`{text}`]");
continue;
}
let text_len = text.len();
if text_len <= 2 {
skipped_short += 1;
log::trace!(target: LOG_TARGET, "Skipping short link (len={text_len}): [`{text}`]");
continue;
}
let text_with_backticks = format!("`{text}`");
let text_without_parens = text.strip_suffix("()").unwrap_or(text);
let text_without_parens_with_backticks = format!("`{text_without_parens}`");
let is_resolved = resolved_links.contains_key(text)
|| resolved_links.contains_key(text_with_backticks.as_str())
|| resolved_links.contains_key(text_without_parens)
|| resolved_links.contains_key(text_without_parens_with_backticks.as_str())
|| inline_target.is_some_and(|target| resolved_links.contains_key(target))
|| link_references.get(text).is_some_and(|target| resolved_links.contains_key(*target))
|| link_references
.get(text_without_parens)
.is_some_and(|target| resolved_links.contains_key(*target))
|| (text_without_parens.contains("::") && {
let last_component = text_without_parens.rsplit("::").next().unwrap_or("");
resolved_links.contains_key(last_component)
|| link_references
.get(last_component)
.is_some_and(|target| resolved_links.contains_key(*target))
});
if is_resolved {
skipped_resolved += 1;
log::trace!(target: LOG_TARGET, "Resolved link: [`{text}`]");
continue;
}
broken_count += 1;
log::trace!(target: LOG_TARGET, "Broken link: [`{text}`]");
}
}
let total_matches = broken_count + skipped_inline + skipped_external + skipped_short + skipped_resolved;
log::trace!(target: LOG_TARGET, "Link analysis: total_matches={total_matches}, broken={broken_count}, skipped(inline={skipped_inline}, external={skipped_external}, short={skipped_short}, resolved={skipped_resolved})");
broken_count
}
trait ItemLike {
type Id;
fn name(&self) -> Option<&str>;
fn docs(&self) -> Option<&str>;
fn links(&self) -> &HashMap<String, Self::Id>;
}