use super::provider::LOG_TARGET;
use super::{DocsData, DocsMetrics};
use crate::Result;
use crate::facts::CrateSpec;
use ohno::{IntoAppError, app_err};
use regex::Regex;
use crate::HashMap;
use std::io::Read;
use std::sync::LazyLock;
static INTRA_DOC_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[`([^`\]]+)`\]").expect("invalid regex"));
static CODE_BLOCK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"```[\s\S]*?```").expect("invalid regex"));
static LINK_REFERENCE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[`([^`\]]+)`\]:\s*(\S+)").expect("invalid regex"));
macro_rules! generate_version_support {
($version:literal, $module:ident) => {
pastey::paste! {
#[doc = $version]
fn [<calculate_metrics_v $version>](json_value: serde_json::Value, crate_spec: &CrateSpec) -> Result<DocsMetrics> {
use $module as rustdoc_types;
log::debug!(target: LOG_TARGET, "Parsing rustdoc JSON v{} for {crate_spec}", $version);
let krate: rustdoc_types::Crate = serde_json::from_value(json_value)
.into_app_err_with(|| format!("parsing rustdoc JSON v{} structure for {crate_spec}", $version))?;
let index_len = krate.index.len();
log::debug!(target: LOG_TARGET, "Successfully parsed rustdoc JSON v{} for {crate_spec}, found {index_len} items in index", $version);
log::debug!(target: LOG_TARGET, "Root item ID for {crate_spec}: {:?}", krate.root);
Ok(process_crate_items(
&krate.index,
&krate.root,
crate_spec,
|item| matches!(item.visibility, rustdoc_types::Visibility::Public),
|item| matches!(item.inner, rustdoc_types::ItemEnum::Use(_)),
))
}
}
impl ItemLike for $module::Item {
type Id = $module::Id;
fn name(&self) -> Option<&str> {
self.name.as_deref()
}
fn docs(&self) -> Option<&str> {
self.docs.as_deref()
}
fn links(&self) -> &std::collections::HashMap<String, Self::Id> {
&self.links
}
}
};
}
generate_version_support!("50", rustdoc_types_v50);
generate_version_support!("51", rustdoc_types_v51);
generate_version_support!("52", rustdoc_types_v52);
generate_version_support!("53", rustdoc_types_v53);
generate_version_support!("54", rustdoc_types_v54);
generate_version_support!("55", rustdoc_types_v55);
generate_version_support!("56", rustdoc_types_v56);
generate_version_support!("57", rustdoc_types_v57);
pub fn calculate_docs_metrics(reader: impl Read, crate_spec: &CrateSpec) -> Result<DocsData> {
log::debug!(target: LOG_TARGET, "Parsing rustdoc JSON for {crate_spec}");
let json_value: serde_json::Value =
serde_json::from_reader(reader).into_app_err_with(|| format!("parsing JSON for {crate_spec}"))?;
let format_version = json_value
.get("format_version")
.and_then(serde_json::Value::as_u64)
.ok_or_else(|| app_err!("rustdoc JSON for {crate_spec} is missing 'format_version' field"))?;
log::debug!(target: LOG_TARGET, "Found rustdoc JSON format version {format_version} for {crate_spec}");
let metrics = match format_version {
50 => calculate_metrics_v50(json_value, crate_spec)?,
51 => calculate_metrics_v51(json_value, crate_spec)?,
52 => calculate_metrics_v52(json_value, crate_spec)?,
53 => calculate_metrics_v53(json_value, crate_spec)?,
54 => calculate_metrics_v54(json_value, crate_spec)?,
55 => calculate_metrics_v55(json_value, crate_spec)?,
56 => calculate_metrics_v56(json_value, crate_spec)?,
57 => calculate_metrics_v57(json_value, crate_spec)?,
_ => {
log::debug!(target: LOG_TARGET, "Unsupported rustdoc JSON format version {format_version} for {crate_spec}");
return Err(app_err!(
"unsupported rustdoc JSON format version {format_version} for {crate_spec}"
));
}
};
Ok(DocsData {
metrics,
})
}
fn process_crate_items<Id, Item>(
index: &std::collections::HashMap<Id, Item>,
root_id: &Id,
crate_spec: &CrateSpec,
is_public: impl Fn(&Item) -> bool,
is_use_item: impl Fn(&Item) -> bool,
) -> DocsMetrics
where
Id: core::fmt::Debug + Eq + core::hash::Hash,
Item: ItemLike,
{
let mut number_of_public_api_elements = 0;
let mut documented_count = 0;
let mut number_of_examples_in_docs = 0;
let mut has_crate_level_docs = false;
let mut broken_doc_links = 0;
let mut private_items = 0;
let mut use_items = 0;
let index_len = index.len();
let normalized_crate_name = crate_spec.name().replace('-', "_");
log::debug!(target: LOG_TARGET, "Starting to iterate through {index_len} items for {crate_spec}");
for (id, item) in index {
if !is_public(item) {
private_items += 1;
continue;
}
if is_use_item(item) {
use_items += 1;
continue;
}
number_of_public_api_elements += 1;
if let Some(docs) = item.docs()
&& !docs.trim().is_empty()
{
documented_count += 1;
let fences = docs.lines().filter(|line| line.trim_start().starts_with("```")).count();
let examples = fences / 2; number_of_examples_in_docs += examples;
let broken = count_broken_links::<Item::Id>(docs, item.links(), item.name());
broken_doc_links += broken;
if let Some(name) = item.name()
&& name == normalized_crate_name
&& id == root_id
{
log::debug!(target: LOG_TARGET, "Found crate-level docs for {crate_spec} (root item name matches)");
has_crate_level_docs = true;
}
}
}
log::debug!(target: LOG_TARGET, "Processed {index_len} items for {crate_spec}: private={private_items}, use_items={use_items}, public_api={number_of_public_api_elements}, documented={documented_count}, examples={number_of_examples_in_docs}, broken_links={broken_doc_links}, has_crate_docs={has_crate_level_docs}");
#[expect(clippy::cast_precision_loss, reason = "loss of precision acceptable for percentage calculation")]
let doc_coverage_percentage = if number_of_public_api_elements > 0 {
documented_count as f64 / number_of_public_api_elements as f64 * 100.0
} else {
100.0
};
let metrics = DocsMetrics {
doc_coverage_percentage,
public_api_elements: number_of_public_api_elements,
undocumented_elements: number_of_public_api_elements - documented_count,
examples_in_docs: number_of_examples_in_docs as u64,
has_crate_level_docs,
broken_doc_links,
};
log::debug!(target: LOG_TARGET, "Returning DocsMetrics for {crate_spec}: {metrics:?}");
metrics
}
fn count_broken_links<Id>(docs: &str, resolved_links: &std::collections::HashMap<String, Id>, _item_name: Option<&str>) -> u64 {
let mut broken_count = 0;
let mut skipped_inline = 0;
let mut skipped_external = 0;
let mut skipped_short = 0;
let mut skipped_resolved = 0;
log::trace!(target: LOG_TARGET, "Checking for broken links. Docs length: {} chars, resolved_links count: {}", docs.len(), resolved_links.len());
let docs_without_code_blocks = CODE_BLOCK_REGEX.replace_all(docs, "");
let docs_to_check = docs_without_code_blocks.as_ref();
let mut link_references = HashMap::default();
for cap in LINK_REFERENCE_REGEX.captures_iter(docs_to_check) {
if let (Some(link_text), Some(target)) = (cap.get(1), cap.get(2)) {
let _ = link_references.insert(link_text.as_str(), target.as_str());
log::trace!(target: LOG_TARGET, "Found link reference: [`{}`] -> {}", link_text.as_str(), target.as_str());
}
}
for cap in INTRA_DOC_LINK_REGEX.captures_iter(docs_to_check) {
if let Some(link_text) = cap.get(1) {
let text = link_text.as_str();
let match_end = cap.get(0).expect("match exists").end();
if docs_to_check.get(match_end..).is_some_and(|s| s.starts_with('(')) {
skipped_inline += 1;
log::trace!(target: LOG_TARGET, "Skipping inline link: [`{text}`](...)");
continue;
}
let inline_target = (|| {
let remainder = docs_to_check.get(match_end..)?.strip_prefix('[')?;
let end_pos = remainder.find(']')?;
remainder.get(..end_pos)
})();
if text.contains("://") {
skipped_external += 1;
log::trace!(target: LOG_TARGET, "Skipping external link: [`{text}`]");
continue;
}
let text_len = text.len();
if text_len <= 2 {
skipped_short += 1;
log::trace!(target: LOG_TARGET, "Skipping short link (len={text_len}): [`{text}`]");
continue;
}
let text_without_parens = text.strip_suffix("()").unwrap_or(text);
let is_resolved = resolved_links.contains_key(text)
|| resolved_links.contains_key(text_without_parens)
|| resolved_links.contains_key(&format!("`{text}`"))
|| resolved_links.contains_key(&format!("`{text_without_parens}`"))
|| inline_target.is_some_and(|target| resolved_links.contains_key(target))
|| link_references.get(text).is_some_and(|target| resolved_links.contains_key(*target))
|| link_references
.get(text_without_parens)
.is_some_and(|target| resolved_links.contains_key(*target))
|| (text_without_parens.contains("::") && {
let last_component = text_without_parens.rsplit("::").next().unwrap_or("");
resolved_links.contains_key(last_component)
|| link_references
.get(last_component)
.is_some_and(|target| resolved_links.contains_key(*target))
});
if is_resolved {
skipped_resolved += 1;
log::trace!(target: LOG_TARGET, "Resolved link: [`{text}`]");
continue;
}
broken_count += 1;
log::trace!(target: LOG_TARGET, "Broken link: [`{text}`]");
}
}
let total_matches = broken_count + skipped_inline + skipped_external + skipped_short + skipped_resolved;
log::trace!(target: LOG_TARGET, "Link analysis: total_matches={total_matches}, broken={broken_count}, skipped(inline={skipped_inline}, external={skipped_external}, short={skipped_short}, resolved={skipped_resolved})");
broken_count
}
trait ItemLike {
type Id;
fn name(&self) -> Option<&str>;
fn docs(&self) -> Option<&str>;
fn links(&self) -> &std::collections::HashMap<String, Self::Id>;
}
#[cfg(test)]
mod tests {
use super::*;
use crate::facts::CrateSpec;
use semver::Version;
use serde_json::json;
use std::sync::Arc;
fn make_rustdoc_json(
root_module_name: &str,
root_docs: Option<&str>,
extra_items: &[(u32, serde_json::Value)],
) -> serde_json::Value {
let extra_ids: Vec<u32> = extra_items.iter().map(|(id, _)| *id).collect();
let mut index = serde_json::Map::new();
let _ = index.insert(
"0".into(),
json!({
"id": 0,
"crate_id": 0,
"name": root_module_name,
"span": null,
"visibility": "public",
"docs": root_docs,
"links": {},
"attrs": [],
"deprecation": null,
"inner": {
"module": {
"is_crate": true,
"items": extra_ids,
"is_stripped": false
}
}
}),
);
for (id, item_json) in extra_items {
let _ = index.insert(id.to_string(), item_json.clone());
}
json!({
"format_version": 57,
"root": 0,
"crate_version": "0.1.0",
"includes_private": false,
"index": index,
"paths": {
"0": { "crate_id": 0, "path": [root_module_name], "kind": "module" }
},
"external_crates": {},
"target": {
"triple": "x86_64-unknown-linux-gnu",
"target_features": []
}
})
}
fn make_public_struct(id: u32, name: &str, docs: Option<&str>) -> (u32, serde_json::Value) {
(
id,
json!({
"id": id,
"crate_id": 0,
"name": name,
"span": null,
"visibility": "public",
"docs": docs,
"links": {},
"attrs": [],
"deprecation": null,
"inner": {
"struct": {
"kind": { "plain": { "fields": [], "has_stripped_fields": false } },
"generics": { "params": [], "where_predicates": [] },
"impls": []
}
}
}),
)
}
fn crate_spec(name: &str) -> CrateSpec {
CrateSpec::from_arcs(Arc::from(name), Arc::new(Version::new(0, 1, 0)))
}
#[test]
fn crate_level_docs_detected_for_simple_name() {
let json = make_rustdoc_json("my_crate", Some("Top-level docs"), &[]);
let reader = serde_json::to_vec(&json).unwrap();
let data = calculate_docs_metrics(reader.as_slice(), &crate_spec("my_crate")).unwrap();
assert!(data.metrics.has_crate_level_docs);
}
#[test]
fn crate_level_docs_detected_when_name_has_hyphens() {
let json = make_rustdoc_json("pin_project_lite", Some("A lightweight pin-project."), &[]);
let reader = serde_json::to_vec(&json).unwrap();
let data = calculate_docs_metrics(reader.as_slice(), &crate_spec("pin-project-lite")).unwrap();
assert!(
data.metrics.has_crate_level_docs,
"should detect crate-level docs even when crate name has hyphens"
);
}
#[test]
fn crate_level_docs_false_when_root_has_no_docs() {
let json = make_rustdoc_json("my_crate", None, &[]);
let reader = serde_json::to_vec(&json).unwrap();
let data = calculate_docs_metrics(reader.as_slice(), &crate_spec("my_crate")).unwrap();
assert!(!data.metrics.has_crate_level_docs);
}
#[test]
fn crate_level_docs_false_when_root_docs_are_empty() {
let json = make_rustdoc_json("my_crate", Some(" "), &[]);
let reader = serde_json::to_vec(&json).unwrap();
let data = calculate_docs_metrics(reader.as_slice(), &crate_spec("my_crate")).unwrap();
assert!(!data.metrics.has_crate_level_docs);
}
#[test]
fn coverage_counts_public_items() {
let json = make_rustdoc_json(
"my_crate",
Some("Crate docs"),
&[
make_public_struct(1, "Documented", Some("Has docs.")),
make_public_struct(2, "Undocumented", None),
],
);
let reader = serde_json::to_vec(&json).unwrap();
let data = calculate_docs_metrics(reader.as_slice(), &crate_spec("my_crate")).unwrap();
assert_eq!(data.metrics.public_api_elements, 3);
assert_eq!(data.metrics.undocumented_elements, 1);
}
#[test]
fn counts_code_examples_in_docs() {
let docs_with_two_examples = "Some docs\n\n```rust\nlet x = 1;\n```\n\nMore text\n\n```\nlet y = 2;\n```\n";
let json = make_rustdoc_json("my_crate", Some(docs_with_two_examples), &[]);
let reader = serde_json::to_vec(&json).unwrap();
let data = calculate_docs_metrics(reader.as_slice(), &crate_spec("my_crate")).unwrap();
assert_eq!(data.metrics.examples_in_docs, 2);
}
#[test]
fn full_coverage_when_all_items_documented() {
let json = make_rustdoc_json(
"my_crate",
Some("Crate docs"),
&[make_public_struct(1, "Foo", Some("Foo docs"))],
);
let reader = serde_json::to_vec(&json).unwrap();
let data = calculate_docs_metrics(reader.as_slice(), &crate_spec("my_crate")).unwrap();
assert!((data.metrics.doc_coverage_percentage - 100.0).abs() < f64::EPSILON);
assert_eq!(data.metrics.undocumented_elements, 0);
}
}