crate_paths_cli_core/
parser.rs

1use crate::item::ItemEntry;
2use crate::item_kind::ItemKind;
3use scraper::{Html, Selector};
4use std::collections::HashMap;
5use thiserror::Error;
6
7#[derive(Debug, Error)]
8pub enum ParserError {
9    #[error("failed to parse selector: {0}")]
10    SelectorParse(#[from] scraper::error::SelectorErrorKind<'static>),
11    #[error("empty href : {0}")]
12    EmptyHref(String),
13    #[error(
14        "content is empty. Maybe docs.rs page failed to build the target crate, or the html parser failed."
15    )]
16    EmptyContent,
17}
18
19pub fn parse_html_to_items(
20    crate_name: &str,
21    html_str: &str,
22) -> Result<Vec<ItemEntry>, ParserError> {
23    let document = Html::parse_document(html_str);
24
25    // Look for every <a> under <ul class="all-items">.
26    // Each link’s href looks like "struct.Arg.html" or "builder/struct.Arg.html",
27    // so we can infer ItemKind from the prefix (before the first '.').
28    let selector = Selector::parse("ul.all-items li a").map_err(ParserError::SelectorParse)?;
29
30    let mut items_map: HashMap<String, ItemEntry> = HashMap::new();
31    for a in document.select(&selector) {
32        // "Arg" or "builder::Arg" as link‐text
33        let path = a.text().collect::<Vec<_>>().concat();
34
35        // Href attribute, e.g. "struct.Arg.html" or "builder/struct.Arg.html"
36        let href = a
37            .value()
38            .attr("href")
39            .ok_or(ParserError::EmptyHref(path.clone()))?;
40
41        // Infer the kind from the filename prefix: split on '/', then split on '.'
42        let kind = match href
43            .split('/')
44            .next_back()
45            .and_then(|filename| filename.split('.').next())
46        {
47            Some("struct") => ItemKind::Struct,
48            Some("enum") => ItemKind::Enum,
49            Some("trait") => ItemKind::Trait,
50            Some("traits") => ItemKind::Trait,
51            Some("type") => ItemKind::TypeAlias,
52            Some("constant") => ItemKind::Constant,
53            Some("fn") => ItemKind::Function,
54            Some("macro") => ItemKind::Macro,
55            Some("union") => ItemKind::Union,
56            Some("mod") => ItemKind::Module,
57            Some("static") => ItemKind::Static,
58            Some("derive") => ItemKind::ProcDerive,
59            Some("attr") => ItemKind::ProcAttribute,
60            Some("extern_type") | Some("externtype") => ItemKind::ExternType,
61            // rust libs only
62            Some("primitive") => ItemKind::Primitive,
63            Some("keyword") => ItemKind::Keyword,
64            _ => {
65                eprintln!("Warning: Could not determine ItemKind for href: {}", href);
66                continue;
67            },
68        };
69
70        let item_name = path.split("::").last().unwrap().to_string();
71        let key = format!("{}::{}", crate_name, path);
72
73        // error[E0658]: `let` expressions in this position are unstable
74        #[allow(clippy::collapsible_if)]
75        if let Some(existing_item) = items_map.get_mut(&key) {
76            if !existing_item.kinds().contains(&kind) {
77                let mut new_kinds = existing_item.kinds().clone();
78                new_kinds.push(kind);
79                *existing_item = ItemEntry::new(
80                    existing_item.crate_name().clone(),
81                    existing_item.item_name().clone(),
82                    existing_item.path().clone(),
83                    new_kinds,
84                );
85            }
86        } else {
87            items_map.entry(key).or_insert_with(|| {
88                ItemEntry::new(crate_name.to_owned(), item_name, path, vec![kind])
89            });
90        }
91    }
92
93    let mut items: Vec<ItemEntry> = items_map.into_values().collect();
94    items.sort_by(|a, b| a.path().cmp(b.path()));
95
96    match items.is_empty() {
97        true => Err(ParserError::EmptyContent),
98        false => Ok(items),
99    }
100}