Skip to main content

sourcey_rustdoc/
extract.rs

1use std::collections::{BTreeMap, HashMap, HashSet};
2
3use rustdoc_types::{Crate, Id, Item as RdItem, ItemEnum, StructKind, VariantKind, Visibility as RdVisibility};
4
5use crate::diagnostics::{self, codes};
6use crate::doctest::extract_doctests;
7use crate::links::{format_id, LinkContext};
8use crate::signature::{lower_generics, lower_signature, render_type};
9use crate::spec::{
10    AssocConstItem, AssocTypeItem, ConstantItem, CrateSpec, Deprecation, Diagnostic, EnumItem,
11    FunctionItem, ImplItem, Item, ItemId, ItemInner, LinkTarget, MacroItem, ModuleSpec,
12    ProcMacroItem, ProcMacroKindKind, SourceLocation, Stability, StabilityLevel, StaticItem,
13    StructFieldItem, StructItem, StructKindKind, TraitAliasItem, TraitItem, TypeAliasItem,
14    TypePath, UnionItem, UseItem, VariantItem, VariantKindKind, Visibility,
15};
16
17pub struct ExtractOptions {
18    pub include_private: bool,
19    pub include_hidden: bool,
20    pub crate_name_hint: String,
21}
22
23pub struct ExtractResult {
24    pub crate_spec: CrateSpec,
25    pub diagnostics: Vec<Diagnostic>,
26}
27
28pub fn extract_crate(krate: &Crate, opts: &ExtractOptions) -> ExtractResult {
29    let links = LinkContext::new(krate);
30    let mut diagnostics: Vec<Diagnostic> = Vec::new();
31    let mut items: BTreeMap<String, Item> = BTreeMap::new();
32    let mut modules: Vec<ModuleSpec> = Vec::new();
33    let mut emitted: HashSet<ItemId> = HashSet::new();
34
35    for (id, rd_item) in &krate.index {
36        if !should_emit(rd_item, opts) {
37            continue;
38        }
39        let item_id = ItemId(format_id(id));
40        if !emitted.insert(item_id.clone()) {
41            continue;
42        }
43        match &rd_item.inner {
44            ItemEnum::Module(m) => {
45                modules.push(lower_module(id, rd_item, m, krate));
46                // Also surface modules as items so per-module sidebar entries,
47                // search categories, and parent-module pages can link to
48                // their children.
49                if let Some(item) = lower_item(id, rd_item, krate, &links, &mut diagnostics) {
50                    items.insert(item.id.0.clone(), item);
51                }
52            }
53            _ => {
54                if let Some(item) = lower_item(id, rd_item, krate, &links, &mut diagnostics) {
55                    items.insert(item.id.0.clone(), item);
56                }
57            }
58        }
59    }
60
61    let root_module_id = ItemId(format_id(&krate.root));
62    // Prefer the user-supplied crate name (matches Cargo.toml `[package].name`,
63    // hyphens preserved). Fall back to rustdoc's lib target name (underscored)
64    // when no hint is set.
65    let crate_name = if opts.crate_name_hint.is_empty() {
66        krate
67            .index
68            .get(&krate.root)
69            .and_then(|i| i.name.clone())
70            .unwrap_or_default()
71    } else {
72        opts.crate_name_hint.clone()
73    };
74    let crate_version = krate.crate_version.clone();
75
76    let crate_spec = CrateSpec {
77        name: crate_name,
78        version: crate_version,
79        root_module_id,
80        modules,
81        items,
82        external_crates: links.external_crates(),
83        diagnostics: diagnostics.clone(),
84    };
85    if diagnostics.iter().any(|d| d.code == codes::INTRA_DOC_LINK_UNRESOLVED) {
86        // Carry intra-doc-link diagnostics into the crate-level diagnostics
87        // (already in `crate_spec.diagnostics`); we return the same set in the
88        // top-level summary for caller-level visibility.
89    }
90    ExtractResult {
91        crate_spec,
92        diagnostics,
93    }
94}
95
96fn should_emit(item: &RdItem, opts: &ExtractOptions) -> bool {
97    if !opts.include_hidden {
98        for attr in &item.attrs {
99            if let rustdoc_types::Attribute::Other(s) = attr {
100                if s.contains("doc(hidden)") {
101                    return false;
102                }
103            }
104        }
105    }
106    if opts.include_private {
107        return true;
108    }
109    // Without --document-private-items, rustdoc's `index` only contains
110    // items it considers documented (public, or implicitly public via a
111    // public parent). Most items inside impl blocks, trait assoc items,
112    // enum variants, and struct fields carry `Default` visibility but are
113    // still public because their parent is. Trust rustdoc's filtering and
114    // only drop explicit `Crate`/`Restricted` visibility.
115    match &item.visibility {
116        RdVisibility::Public | RdVisibility::Default => true,
117        RdVisibility::Crate | RdVisibility::Restricted { .. } => false,
118    }
119}
120
121fn lower_module(id: &Id, item: &RdItem, m: &rustdoc_types::Module, krate: &Crate) -> ModuleSpec {
122    let path = path_for_id(id, krate);
123    let item_ids: Vec<ItemId> = m.items.iter().map(|i| ItemId(format_id(i))).collect();
124    let sub_module_paths: Vec<Vec<String>> = m
125        .items
126        .iter()
127        .filter_map(|i| krate.index.get(i))
128        .filter(|child| matches!(child.inner, ItemEnum::Module(_)))
129        .map(|child| {
130            let mut p = path.clone();
131            if let Some(name) = &child.name {
132                p.push(name.clone());
133            }
134            p
135        })
136        .collect();
137    ModuleSpec {
138        id: ItemId(format_id(id)),
139        path,
140        docs_markdown: item.docs.clone(),
141        doc_aliases: extract_doc_aliases(item),
142        item_ids,
143        sub_module_paths,
144        source: lower_source(item),
145    }
146}
147
148fn lower_item(
149    id: &Id,
150    rd_item: &RdItem,
151    krate: &Crate,
152    links: &LinkContext<'_>,
153    diagnostics: &mut Vec<Diagnostic>,
154) -> Option<Item> {
155    let item_id = ItemId(format_id(id));
156    let inner = lower_inner(rd_item, krate, links, diagnostics)?;
157    let docs_markdown = rd_item.docs.clone();
158    let doctests = docs_markdown
159        .as_deref()
160        .map(extract_doctests)
161        .unwrap_or_default();
162    let item = Item {
163        id: item_id,
164        name: rd_item.name.clone(),
165        path: path_for_id(id, krate),
166        visibility: lower_visibility(&rd_item.visibility),
167        source: lower_source(rd_item),
168        docs_markdown,
169        doc_aliases: extract_doc_aliases(rd_item),
170        deprecation: rd_item.deprecation.as_ref().map(|d| Deprecation {
171            since: d.since.clone(),
172            note: d.note.clone(),
173        }),
174        stability: extract_stability(rd_item),
175        feature_gates: extract_feature_gates(rd_item),
176        attrs_structured: extract_structured_attrs(rd_item),
177        links: rd_item
178            .links
179            .iter()
180            .map(|(label, id)| {
181                let target = links.resolve_id(id).map(|resolved| {
182                    if resolved.external {
183                        LinkTarget::External {
184                            crate_name: links
185                                .external_crate_index
186                                .get(&resolved.crate_id)
187                                .map(|ec| ec.name.clone())
188                                .unwrap_or_default(),
189                            path: resolved.path,
190                            html_root_url: resolved.html_root_url,
191                        }
192                    } else {
193                        LinkTarget::Internal { id: resolved.id }
194                    }
195                }).unwrap_or(LinkTarget::Internal { id: ItemId(format_id(id)) });
196                (label.clone(), target)
197            })
198            .collect(),
199        inner,
200        doctests,
201    };
202    Some(item)
203}
204
205fn lower_inner(
206    rd_item: &RdItem,
207    krate: &Crate,
208    links: &LinkContext<'_>,
209    diagnostics: &mut Vec<Diagnostic>,
210) -> Option<ItemInner> {
211    let inner = match &rd_item.inner {
212        ItemEnum::Function(f) => ItemInner::Function(FunctionItem {
213            signature: lower_signature(
214                &f.sig,
215                rd_item.name.as_deref().unwrap_or(""),
216                f.header.is_const,
217                f.header.is_async,
218                f.header.is_unsafe,
219                links,
220            ),
221            generics: lower_generics(&f.generics),
222            is_const: f.header.is_const,
223            is_async: f.header.is_async,
224            is_unsafe: f.header.is_unsafe,
225            has_body: f.has_body,
226        }),
227        ItemEnum::Struct(s) => ItemInner::Struct(StructItem {
228            struct_kind: match &s.kind {
229                StructKind::Plain { .. } => StructKindKind::Plain,
230                StructKind::Tuple(_) => StructKindKind::Tuple,
231                StructKind::Unit => StructKindKind::Unit,
232            },
233            generics: lower_generics(&s.generics),
234            fields: extract_struct_fields(&s.kind),
235            has_stripped_fields: matches!(
236                &s.kind,
237                StructKind::Plain {
238                    has_stripped_fields: true,
239                    ..
240                }
241            ),
242            impls: s.impls.iter().map(|i| ItemId(format_id(i))).collect(),
243        }),
244        ItemEnum::Enum(e) => ItemInner::Enum(EnumItem {
245            generics: lower_generics(&e.generics),
246            variants: e.variants.iter().map(|i| ItemId(format_id(i))).collect(),
247            has_stripped_variants: e.has_stripped_variants,
248            impls: e.impls.iter().map(|i| ItemId(format_id(i))).collect(),
249        }),
250        ItemEnum::Variant(v) => ItemInner::Variant(VariantItem {
251            variant_kind: match &v.kind {
252                VariantKind::Plain => VariantKindKind::Plain,
253                VariantKind::Tuple(_) => VariantKindKind::Tuple,
254                VariantKind::Struct { .. } => VariantKindKind::Struct,
255            },
256            discriminant: v.discriminant.as_ref().map(|d| d.expr.clone()),
257        }),
258        ItemEnum::Union(u) => ItemInner::Union(UnionItem {
259            generics: lower_generics(&u.generics),
260            fields: u.fields.iter().map(|i| ItemId(format_id(i))).collect(),
261            has_stripped_fields: u.has_stripped_fields,
262            impls: u.impls.iter().map(|i| ItemId(format_id(i))).collect(),
263        }),
264        ItemEnum::Trait(t) => ItemInner::Trait(TraitItem {
265            is_auto: t.is_auto,
266            is_unsafe: t.is_unsafe,
267            is_dyn_compatible: t.is_dyn_compatible,
268            generics: lower_generics(&t.generics),
269            bounds: t.bounds.iter().map(crate::signature::render_bound).collect(),
270            items: t.items.iter().map(|i| ItemId(format_id(i))).collect(),
271            implementations: t
272                .implementations
273                .iter()
274                .map(|i| ItemId(format_id(i)))
275                .collect(),
276        }),
277        ItemEnum::TraitAlias(a) => ItemInner::TraitAlias(TraitAliasItem {
278            generics: lower_generics(&a.generics),
279            bounds: a.params.iter().map(crate::signature::render_bound).collect(),
280        }),
281        ItemEnum::Impl(i) => ItemInner::Impl(ImplItem {
282            generics: lower_generics(&i.generics),
283            trait_path: i.trait_.as_ref().map(|p| type_path_from_resolved(p, links)),
284            for_type: type_path_for_type(&i.for_, links),
285            items: i.items.iter().map(|x| ItemId(format_id(x))).collect(),
286            is_negative: i.is_negative,
287            is_synthetic: i.is_synthetic,
288            is_blanket: i.blanket_impl.is_some(),
289            provided_trait_methods: i.provided_trait_methods.clone(),
290        }),
291        ItemEnum::TypeAlias(a) => ItemInner::TypeAlias(TypeAliasItem {
292            aliased_type: type_path_for_type(&a.type_, links),
293            generics: lower_generics(&a.generics),
294        }),
295        ItemEnum::Constant { type_, const_ } => ItemInner::Constant(ConstantItem {
296            type_display: render_type(type_),
297            expr: const_.expr.clone(),
298            value: const_.value.clone(),
299            is_literal: const_.is_literal,
300        }),
301        ItemEnum::Static(s) => ItemInner::Static(StaticItem {
302            type_display: render_type(&s.type_),
303            expr: s.expr.clone(),
304            is_mutable: s.is_mutable,
305            is_unsafe: s.is_unsafe,
306        }),
307        ItemEnum::Macro(source) => ItemInner::Macro(MacroItem {
308            source: source.clone(),
309        }),
310        ItemEnum::ProcMacro(pm) => ItemInner::ProcMacro(ProcMacroItem {
311            macro_kind: match pm.kind {
312                rustdoc_types::MacroKind::Bang => ProcMacroKindKind::Bang,
313                rustdoc_types::MacroKind::Attr => ProcMacroKindKind::Attr,
314                rustdoc_types::MacroKind::Derive => ProcMacroKindKind::Derive,
315            },
316            helpers: pm.helpers.clone(),
317        }),
318        ItemEnum::AssocType {
319            generics,
320            bounds,
321            type_,
322        } => ItemInner::AssocType(AssocTypeItem {
323            generics: lower_generics(generics),
324            bounds: bounds.iter().map(crate::signature::render_bound).collect(),
325            default_display: type_.as_ref().map(render_type),
326        }),
327        ItemEnum::AssocConst { type_, value } => ItemInner::AssocConst(AssocConstItem {
328            type_display: render_type(type_),
329            default_display: value.clone(),
330        }),
331        ItemEnum::Use(u) => ItemInner::Use(UseItem {
332            source: u.source.clone(),
333            name: u.name.clone(),
334            target_id: u.id.as_ref().map(|i| ItemId(format_id(i))),
335            is_glob: u.is_glob,
336        }),
337        ItemEnum::StructField(t) => ItemInner::StructField(StructFieldItem {
338            type_display: render_type(t),
339        }),
340        ItemEnum::Module(_) => ItemInner::Module,
341        ItemEnum::Primitive(_) => ItemInner::Primitive,
342        ItemEnum::ExternType => ItemInner::ExternType,
343        ItemEnum::ExternCrate { .. } => return None,
344    };
345    let _ = (krate, links, diagnostics);
346    Some(inner)
347}
348
349fn lower_visibility(v: &RdVisibility) -> Visibility {
350    match v {
351        RdVisibility::Public => Visibility::Public,
352        RdVisibility::Crate => Visibility::Crate,
353        RdVisibility::Restricted { path, .. } => Visibility::Restricted { path: path.clone() },
354        RdVisibility::Default => Visibility::Default,
355    }
356}
357
358fn lower_source(item: &RdItem) -> Option<SourceLocation> {
359    item.span.as_ref().map(|s| SourceLocation {
360        file: s.filename.to_string_lossy().into_owned(),
361        line_start: s.begin.0 as u32,
362        line_end: s.end.0 as u32,
363    })
364}
365
366fn extract_doc_aliases(item: &RdItem) -> Vec<String> {
367    let mut out: Vec<String> = Vec::new();
368    for attr in &item.attrs {
369        if let rustdoc_types::Attribute::Other(s) = attr {
370            if let Some(rest) = s.strip_prefix("#[doc(alias = \"") {
371                if let Some(end) = rest.find("\")]") {
372                    out.push(rest[..end].to_string());
373                }
374            } else if let Some(rest) = s.strip_prefix("#[doc(alias(") {
375                if let Some(end) = rest.find("))]") {
376                    for raw in rest[..end].split(',') {
377                        let trimmed = raw.trim().trim_matches('"');
378                        if !trimmed.is_empty() {
379                            out.push(trimmed.to_string());
380                        }
381                    }
382                }
383            }
384        }
385    }
386    out
387}
388
389fn extract_stability(item: &RdItem) -> Option<Stability> {
390    for attr in &item.attrs {
391        if let rustdoc_types::Attribute::Other(s) = attr {
392            if s.starts_with("#[stable(") {
393                return Some(Stability {
394                    level: StabilityLevel::Stable,
395                    since: extract_attr_string(s, "since"),
396                    feature: extract_attr_string(s, "feature"),
397                    issue: None,
398                });
399            }
400            if s.starts_with("#[unstable(") {
401                return Some(Stability {
402                    level: StabilityLevel::Unstable,
403                    since: None,
404                    feature: extract_attr_string(s, "feature"),
405                    issue: extract_attr_string(s, "issue")
406                        .and_then(|i| i.trim_matches(|c: char| !c.is_ascii_digit()).parse().ok()),
407                });
408            }
409        }
410    }
411    None
412}
413
414fn extract_attr_string(s: &str, key: &str) -> Option<String> {
415    let needle = format!("{} = \"", key);
416    let start = s.find(&needle)? + needle.len();
417    let rest = &s[start..];
418    let end = rest.find('"')?;
419    Some(rest[..end].to_string())
420}
421
422fn extract_feature_gates(_item: &RdItem) -> Vec<String> {
423    // Phase 1: feature gates are not directly carried on items in rustdoc JSON
424    // until the renderer reconstructs them from cfg attrs. Placeholder so the
425    // field exists in the schema and Phase 3 rendering can fill it in.
426    Vec::new()
427}
428
429fn extract_structured_attrs(item: &RdItem) -> Vec<String> {
430    item.attrs
431        .iter()
432        .map(|attr| match attr {
433            rustdoc_types::Attribute::Other(s) => s.clone(),
434            other => format!("{:?}", other),
435        })
436        .collect()
437}
438
439fn extract_struct_fields(kind: &StructKind) -> Vec<ItemId> {
440    match kind {
441        StructKind::Plain { fields, .. } => fields.iter().map(|i| ItemId(format_id(i))).collect(),
442        StructKind::Tuple(ids) => ids
443            .iter()
444            .filter_map(|maybe| maybe.as_ref().map(|i| ItemId(format_id(i))))
445            .collect(),
446        StructKind::Unit => Vec::new(),
447    }
448}
449
450fn type_path_from_resolved(p: &rustdoc_types::Path, links: &LinkContext<'_>) -> TypePath {
451    let resolved = links.resolve_id(&p.id);
452    TypePath {
453        crate_id: resolved.as_ref().map(|r| r.crate_id).unwrap_or(0),
454        path: resolved.map(|r| r.path).unwrap_or_else(|| vec![p.path.clone()]),
455        display: p.path.clone(),
456        external: false,
457        html_root_url: None,
458    }
459}
460
461fn type_path_for_type(ty: &rustdoc_types::Type, links: &LinkContext<'_>) -> TypePath {
462    if let rustdoc_types::Type::ResolvedPath(p) = ty {
463        return type_path_from_resolved(p, links);
464    }
465    TypePath {
466        crate_id: 0,
467        path: vec![render_type(ty)],
468        display: render_type(ty),
469        external: false,
470        html_root_url: None,
471    }
472}
473
474fn path_for_id(id: &Id, krate: &Crate) -> Vec<String> {
475    if let Some(summary) = krate.paths.get(id) {
476        return summary.path.clone();
477    }
478    if let Some(item) = krate.index.get(id) {
479        if let Some(name) = &item.name {
480            return vec![name.clone()];
481        }
482    }
483    Vec::new()
484}
485
486#[allow(dead_code)]
487fn unused_diagnostics_marker() -> Diagnostic {
488    diagnostics::warning(codes::INTRA_DOC_LINK_UNRESOLVED, "marker")
489}
490
491// Keep imports referenced even though the immediate use is in helper modules.
492const _: fn(&HashMap<u32, ()>) = |_| {};