cargo-aprz-lib 0.14.0

Internal library for cargo-aprz
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
//! Metrics calculation for rustdoc JSON documentation
//!
//! This module handles parsing rustdoc JSON in various format versions and extracting
//! documentation metrics.

use super::provider::LOG_TARGET;
use super::{DocsData, DocsMetrics};
use crate::Result;
use crate::facts::CrateSpec;
use ohno::{IntoAppError, app_err};
use regex::Regex;
use crate::HashMap;
use std::io::Read;
use std::sync::LazyLock;

/// Pattern to match intra-doc code links: [`text`]
/// Only matches backtick-enclosed links which are the standard for code references
static INTRA_DOC_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[`([^`\]]+)`\]").expect("invalid regex"));

/// Pattern to match code blocks (triple backticks)
static CODE_BLOCK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"```[\s\S]*?```").expect("invalid regex"));

/// Pattern to match reference-style link definitions: [`text`]: target
/// These define aliases where the link text in the docs maps to a different resolution target
static LINK_REFERENCE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[`([^`\]]+)`\]:\s*(\S+)").expect("invalid regex"));

/// Macro to generate all code needed for a rustdoc JSON format version
///
/// This generates both:
/// 1. The version-specific `calculate_metrics_vN` function
/// 2. The `ItemLike` trait implementation for that version's types
macro_rules! generate_version_support {
    ($version:literal, $module:ident) => {
        pastey::paste! {
            /// Parse and calculate metrics for rustdoc JSON format version
            #[doc = $version]
            fn [<calculate_metrics_v $version>](json_value: serde_json::Value, crate_spec: &CrateSpec) -> Result<DocsMetrics> {
                use $module as rustdoc_types;

                log::debug!(target: LOG_TARGET, "Parsing rustdoc JSON v{} for {crate_spec}", $version);
                let krate: rustdoc_types::Crate = serde_json::from_value(json_value)
                    .into_app_err_with(|| format!("parsing rustdoc JSON v{} structure for {crate_spec}", $version))?;

                let index_len = krate.index.len();
                log::debug!(target: LOG_TARGET, "Successfully parsed rustdoc JSON v{} for {crate_spec}, found {index_len} items in index", $version);
                log::debug!(target: LOG_TARGET, "Root item ID for {crate_spec}: {:?}", krate.root);

                Ok(process_crate_items(
                    &krate.index,
                    &krate.root,
                    crate_spec,
                    |item| matches!(item.visibility, rustdoc_types::Visibility::Public),
                    |item| matches!(item.inner, rustdoc_types::ItemEnum::Use(_)),
                ))
            }
        }

        // Generate ItemLike trait implementation
        impl ItemLike for $module::Item {
            type Id = $module::Id;

            fn name(&self) -> Option<&str> {
                self.name.as_deref()
            }

            fn docs(&self) -> Option<&str> {
                self.docs.as_deref()
            }

            fn links(&self) -> &std::collections::HashMap<String, Self::Id> {
                &self.links
            }
        }
    };
}

// Generate all code for each supported version
generate_version_support!("50", rustdoc_types_v50);
generate_version_support!("51", rustdoc_types_v51);
generate_version_support!("52", rustdoc_types_v52);
generate_version_support!("53", rustdoc_types_v53);
generate_version_support!("54", rustdoc_types_v54);
generate_version_support!("55", rustdoc_types_v55);
generate_version_support!("56", rustdoc_types_v56);
generate_version_support!("57", rustdoc_types_v57);

pub fn calculate_docs_metrics(reader: impl Read, crate_spec: &CrateSpec) -> Result<DocsData> {
    log::debug!(target: LOG_TARGET, "Parsing rustdoc JSON for {crate_spec}");
    let json_value: serde_json::Value =
        serde_json::from_reader(reader).into_app_err_with(|| format!("parsing JSON for {crate_spec}"))?;

    let format_version = json_value
        .get("format_version")
        .and_then(serde_json::Value::as_u64)
        .ok_or_else(|| app_err!("rustdoc JSON for {crate_spec} is missing 'format_version' field"))?;

    log::debug!(target: LOG_TARGET, "Found rustdoc JSON format version {format_version} for {crate_spec}");

    let metrics = match format_version {
        50 => calculate_metrics_v50(json_value, crate_spec)?,
        51 => calculate_metrics_v51(json_value, crate_spec)?,
        52 => calculate_metrics_v52(json_value, crate_spec)?,
        53 => calculate_metrics_v53(json_value, crate_spec)?,
        54 => calculate_metrics_v54(json_value, crate_spec)?,
        55 => calculate_metrics_v55(json_value, crate_spec)?,
        56 => calculate_metrics_v56(json_value, crate_spec)?,
        57 => calculate_metrics_v57(json_value, crate_spec)?,
        _ => {
            log::debug!(target: LOG_TARGET, "Unsupported rustdoc JSON format version {format_version} for {crate_spec}");
            return Err(app_err!(
                "unsupported rustdoc JSON format version {format_version} for {crate_spec}"
            ));
        }
    };

    Ok(DocsData {
        metrics,
    })
}

/// Process crate items and calculate documentation metrics
///
/// This generic function works with items from any rustdoc-types version by accepting
/// closures that check visibility and item type in a version-specific way.
fn process_crate_items<Id, Item>(
    index: &std::collections::HashMap<Id, Item>,
    root_id: &Id,
    crate_spec: &CrateSpec,
    is_public: impl Fn(&Item) -> bool,
    is_use_item: impl Fn(&Item) -> bool,
) -> DocsMetrics
where
    Id: core::fmt::Debug + Eq + core::hash::Hash,
    Item: ItemLike,
{
    let mut number_of_public_api_elements = 0;
    let mut documented_count = 0;
    let mut number_of_examples_in_docs = 0;
    let mut has_crate_level_docs = false;
    let mut broken_doc_links = 0;
    let mut private_items = 0;
    let mut use_items = 0;

    let index_len = index.len();
    // Normalize the crate name: crates.io uses hyphens (e.g., "pin-project-lite") but
    // rustdoc JSON uses underscores (e.g., "pin_project_lite") for the root module name.
    let normalized_crate_name = crate_spec.name().replace('-', "_");
    log::debug!(target: LOG_TARGET, "Starting to iterate through {index_len} items for {crate_spec}");

    for (id, item) in index {
        // Only count public API items
        if !is_public(item) {
            private_items += 1;
            continue;
        }

        // Skip re-exports (Use items) - they inherit docs from the original item
        if is_use_item(item) {
            use_items += 1;
            continue;
        }

        number_of_public_api_elements += 1;

        // Check if item has documentation
        if let Some(docs) = item.docs()
            && !docs.trim().is_empty()
        {
            documented_count += 1;

            let fences = docs.lines().filter(|line| line.trim_start().starts_with("```")).count();
            let examples = fences / 2; // Divide by 2 since each codebase block has opening and closing fence
            number_of_examples_in_docs += examples;

            let broken = count_broken_links::<Item::Id>(docs, item.links(), item.name());
            broken_doc_links += broken;

            if let Some(name) = item.name()
                && name == normalized_crate_name
                && id == root_id
            {
                log::debug!(target: LOG_TARGET, "Found crate-level docs for {crate_spec} (root item name matches)");
                has_crate_level_docs = true;
            }
        }
    }

    log::debug!(target: LOG_TARGET, "Processed {index_len} items for {crate_spec}: private={private_items}, use_items={use_items}, public_api={number_of_public_api_elements}, documented={documented_count}, examples={number_of_examples_in_docs}, broken_links={broken_doc_links}, has_crate_docs={has_crate_level_docs}");

    #[expect(clippy::cast_precision_loss, reason = "loss of precision acceptable for percentage calculation")]
    let doc_coverage_percentage = if number_of_public_api_elements > 0 {
        documented_count as f64 / number_of_public_api_elements as f64 * 100.0
    } else {
        100.0
    };

    let metrics = DocsMetrics {
        doc_coverage_percentage,
        public_api_elements: number_of_public_api_elements,
        undocumented_elements: number_of_public_api_elements - documented_count,
        examples_in_docs: number_of_examples_in_docs as u64,
        has_crate_level_docs,
        broken_doc_links,
    };

    log::debug!(target: LOG_TARGET, "Returning DocsMetrics for {crate_spec}: {metrics:?}");
    metrics
}

/// Count broken intra-doc links in documentation
///
/// Looks for markdown link patterns that appear to be intra-doc links but aren't
/// in the resolved links map. Only considers backtick-enclosed links like [`Type`]
/// which are the standard way to reference code elements in Rust documentation.
///
/// Handles reference-style link definitions where the link text in the docs
/// (e.g., `` [`anyhow::Error::from_boxed`] ``) is defined to resolve to a different target
/// (e.g., `Self::from_boxed`) via a line like: `` [`anyhow::Error::from_boxed`]: Self::from_boxed ``
fn count_broken_links<Id>(docs: &str, resolved_links: &std::collections::HashMap<String, Id>, _item_name: Option<&str>) -> u64 {
    let mut broken_count = 0;
    let mut skipped_inline = 0;
    let mut skipped_external = 0;
    let mut skipped_short = 0;
    let mut skipped_resolved = 0;

    log::trace!(target: LOG_TARGET, "Checking for broken links. Docs length: {} chars, resolved_links count: {}", docs.len(), resolved_links.len());

    // Remove code blocks to avoid false positives from examples
    let docs_without_code_blocks = CODE_BLOCK_REGEX.replace_all(docs, "");
    let docs_to_check = docs_without_code_blocks.as_ref();

    // Parse reference-style link definitions: [`link_text`]: target
    // These map the link text as written in the docs to the actual resolution target
    let mut link_references = HashMap::default();
    for cap in LINK_REFERENCE_REGEX.captures_iter(docs_to_check) {
        if let (Some(link_text), Some(target)) = (cap.get(1), cap.get(2)) {
            let _ = link_references.insert(link_text.as_str(), target.as_str());
            log::trace!(target: LOG_TARGET, "Found link reference: [`{}`] -> {}", link_text.as_str(), target.as_str());
        }
    }

    for cap in INTRA_DOC_LINK_REGEX.captures_iter(docs_to_check) {
        if let Some(link_text) = cap.get(1) {
            let text = link_text.as_str();

            // Get the position after the match to check for inline link syntax
            let match_end = cap.get(0).expect("match exists").end();

            // Skip inline links like [`text`](url) - check if next char is '('
            if docs_to_check.get(match_end..).is_some_and(|s| s.starts_with('(')) {
                skipped_inline += 1;
                log::trace!(target: LOG_TARGET, "Skipping inline link: [`{text}`](...)");
                continue;
            }

            // Check for inline reference-style links like [`text`][target]
            // Extract the target if present (it's in square brackets but WITHOUT backticks)
            let inline_target = (|| {
                let remainder = docs_to_check.get(match_end..)?.strip_prefix('[')?;
                let end_pos = remainder.find(']')?;
                remainder.get(..end_pos)
            })();

            // Skip external links (contain ://)
            if text.contains("://") {
                skipped_external += 1;
                log::trace!(target: LOG_TARGET, "Skipping external link: [`{text}`]");
                continue;
            }

            // Skip very short "links" (1-2 chars) - likely false positives
            let text_len = text.len();
            if text_len <= 2 {
                skipped_short += 1;
                log::trace!(target: LOG_TARGET, "Skipping short link (len={text_len}): [`{text}`]");
                continue;
            }

            // Check if it's resolved - try multiple strategies:
            // 1. Direct match in resolved_links (with and without backticks)
            // 2. Via an inline reference target [`text`][target]
            // 3. Via a reference definition (link text -> target, then check if target is in resolved_links)
            // 4. Strip trailing () for method references and try again
            // 5. Try without module path if it contains ::

            let text_without_parens = text.strip_suffix("()").unwrap_or(text);

            let is_resolved = resolved_links.contains_key(text)
                || resolved_links.contains_key(text_without_parens)
                || resolved_links.contains_key(&format!("`{text}`"))
                || resolved_links.contains_key(&format!("`{text_without_parens}`"))
                || inline_target.is_some_and(|target| resolved_links.contains_key(target))
                || link_references.get(text).is_some_and(|target| resolved_links.contains_key(*target))
                || link_references
                    .get(text_without_parens)
                    .is_some_and(|target| resolved_links.contains_key(*target))
                || (text_without_parens.contains("::") && {
                    // Try just the last component (e.g., "Error" from "std::error::Error", or "chain" from "Error::chain")
                    let last_component = text_without_parens.rsplit("::").next().unwrap_or("");
                    resolved_links.contains_key(last_component)
                        || link_references
                            .get(last_component)
                            .is_some_and(|target| resolved_links.contains_key(*target))
                });

            if is_resolved {
                skipped_resolved += 1;
                log::trace!(target: LOG_TARGET, "Resolved link: [`{text}`]");
                continue;
            }

            // This looks like an intra-doc link but isn't resolved
            broken_count += 1;
            log::trace!(target: LOG_TARGET, "Broken link: [`{text}`]");
        }
    }

    let total_matches = broken_count + skipped_inline + skipped_external + skipped_short + skipped_resolved;
    log::trace!(target: LOG_TARGET, "Link analysis: total_matches={total_matches}, broken={broken_count}, skipped(inline={skipped_inline}, external={skipped_external}, short={skipped_short}, resolved={skipped_resolved})");

    broken_count
}

/// Trait to abstract over different rustdoc-types Item versions
trait ItemLike {
    type Id;
    fn name(&self) -> Option<&str>;
    fn docs(&self) -> Option<&str>;
    fn links(&self) -> &std::collections::HashMap<String, Self::Id>;
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::facts::CrateSpec;
    use semver::Version;
    use serde_json::json;
    use std::sync::Arc;

    /// Build a minimal valid rustdoc JSON v57 value.
    ///
    /// `root_module_name` is the name of the root module item (rustdoc always uses
    /// underscores here, e.g. `"pin_project_lite"`).  Extra items can be injected
    /// through `extra_items` (id, item-json pairs) — they are automatically added
    /// to the root module's `items` array.
    fn make_rustdoc_json(
        root_module_name: &str,
        root_docs: Option<&str>,
        extra_items: &[(u32, serde_json::Value)],
    ) -> serde_json::Value {
        let extra_ids: Vec<u32> = extra_items.iter().map(|(id, _)| *id).collect();

        let mut index = serde_json::Map::new();

        // Root module (id 0)
        let _ = index.insert(
            "0".into(),
            json!({
                "id": 0,
                "crate_id": 0,
                "name": root_module_name,
                "span": null,
                "visibility": "public",
                "docs": root_docs,
                "links": {},
                "attrs": [],
                "deprecation": null,
                "inner": {
                    "module": {
                        "is_crate": true,
                        "items": extra_ids,
                        "is_stripped": false
                    }
                }
            }),
        );

        for (id, item_json) in extra_items {
            let _ = index.insert(id.to_string(), item_json.clone());
        }

        json!({
            "format_version": 57,
            "root": 0,
            "crate_version": "0.1.0",
            "includes_private": false,
            "index": index,
            "paths": {
                "0": { "crate_id": 0, "path": [root_module_name], "kind": "module" }
            },
            "external_crates": {},
            "target": {
                "triple": "x86_64-unknown-linux-gnu",
                "target_features": []
            }
        })
    }

    /// Build a public struct item with the given name and optional docs.
    fn make_public_struct(id: u32, name: &str, docs: Option<&str>) -> (u32, serde_json::Value) {
        (
            id,
            json!({
                "id": id,
                "crate_id": 0,
                "name": name,
                "span": null,
                "visibility": "public",
                "docs": docs,
                "links": {},
                "attrs": [],
                "deprecation": null,
                "inner": {
                    "struct": {
                        "kind": { "plain": { "fields": [], "has_stripped_fields": false } },
                        "generics": { "params": [], "where_predicates": [] },
                        "impls": []
                    }
                }
            }),
        )
    }

    fn crate_spec(name: &str) -> CrateSpec {
        CrateSpec::from_arcs(Arc::from(name), Arc::new(Version::new(0, 1, 0)))
    }

    // -----------------------------------------------------------------------
    // Crate-level docs detection
    // -----------------------------------------------------------------------

    #[test]
    fn crate_level_docs_detected_for_simple_name() {
        let json = make_rustdoc_json("my_crate", Some("Top-level docs"), &[]);
        let reader = serde_json::to_vec(&json).unwrap();

        let data = calculate_docs_metrics(reader.as_slice(), &crate_spec("my_crate")).unwrap();
        assert!(data.metrics.has_crate_level_docs);
    }

    #[test]
    fn crate_level_docs_detected_when_name_has_hyphens() {
        // The CrateSpec uses hyphens (crates.io convention) but rustdoc JSON
        // uses underscores for the root module name.
        let json = make_rustdoc_json("pin_project_lite", Some("A lightweight pin-project."), &[]);
        let reader = serde_json::to_vec(&json).unwrap();

        let data = calculate_docs_metrics(reader.as_slice(), &crate_spec("pin-project-lite")).unwrap();
        assert!(
            data.metrics.has_crate_level_docs,
            "should detect crate-level docs even when crate name has hyphens"
        );
    }

    #[test]
    fn crate_level_docs_false_when_root_has_no_docs() {
        let json = make_rustdoc_json("my_crate", None, &[]);
        let reader = serde_json::to_vec(&json).unwrap();

        let data = calculate_docs_metrics(reader.as_slice(), &crate_spec("my_crate")).unwrap();
        assert!(!data.metrics.has_crate_level_docs);
    }

    #[test]
    fn crate_level_docs_false_when_root_docs_are_empty() {
        let json = make_rustdoc_json("my_crate", Some("   "), &[]);
        let reader = serde_json::to_vec(&json).unwrap();

        let data = calculate_docs_metrics(reader.as_slice(), &crate_spec("my_crate")).unwrap();
        assert!(!data.metrics.has_crate_level_docs);
    }

    // -----------------------------------------------------------------------
    // Documentation coverage
    // -----------------------------------------------------------------------

    #[test]
    fn coverage_counts_public_items() {
        let json = make_rustdoc_json(
            "my_crate",
            Some("Crate docs"),
            &[
                make_public_struct(1, "Documented", Some("Has docs.")),
                make_public_struct(2, "Undocumented", None),
            ],
        );
        let reader = serde_json::to_vec(&json).unwrap();

        let data = calculate_docs_metrics(reader.as_slice(), &crate_spec("my_crate")).unwrap();
        // 3 public items: root module + 2 structs
        assert_eq!(data.metrics.public_api_elements, 3);
        // 2 documented: root module + "Documented"
        assert_eq!(data.metrics.undocumented_elements, 1);
    }

    // -----------------------------------------------------------------------
    // Examples in docs
    // -----------------------------------------------------------------------

    #[test]
    fn counts_code_examples_in_docs() {
        let docs_with_two_examples = "Some docs\n\n```rust\nlet x = 1;\n```\n\nMore text\n\n```\nlet y = 2;\n```\n";
        let json = make_rustdoc_json("my_crate", Some(docs_with_two_examples), &[]);
        let reader = serde_json::to_vec(&json).unwrap();

        let data = calculate_docs_metrics(reader.as_slice(), &crate_spec("my_crate")).unwrap();
        assert_eq!(data.metrics.examples_in_docs, 2);
    }

    #[test]
    fn full_coverage_when_all_items_documented() {
        let json = make_rustdoc_json(
            "my_crate",
            Some("Crate docs"),
            &[make_public_struct(1, "Foo", Some("Foo docs"))],
        );
        let reader = serde_json::to_vec(&json).unwrap();

        let data = calculate_docs_metrics(reader.as_slice(), &crate_spec("my_crate")).unwrap();
        assert!((data.metrics.doc_coverage_percentage - 100.0).abs() < f64::EPSILON);
        assert_eq!(data.metrics.undocumented_elements, 0);
    }
}