crate_paths_cli_core/
parser.rs1use crate::item::ItemEntry;
2use crate::item_kind::ItemKind;
3use scraper::{Html, Selector};
4use std::collections::HashMap;
5use thiserror::Error;
6
7#[derive(Debug, Error)]
8pub enum ParserError {
9 #[error("failed to parse selector: {0}")]
10 SelectorParse(#[from] scraper::error::SelectorErrorKind<'static>),
11 #[error("empty href : {0}")]
12 EmptyHref(String),
13 #[error(
14 "content is empty. Maybe docs.rs page failed to build the target crate, or the html parser failed."
15 )]
16 EmptyContent,
17}
18
19pub fn parse_html_to_items(
20 crate_name: &str,
21 html_str: &str,
22) -> Result<Vec<ItemEntry>, ParserError> {
23 let document = Html::parse_document(html_str);
24
25 let selector = Selector::parse("ul.all-items li a").map_err(ParserError::SelectorParse)?;
29
30 let mut items_map: HashMap<String, ItemEntry> = HashMap::new();
31 for a in document.select(&selector) {
32 let path = a.text().collect::<Vec<_>>().concat();
34
35 let href = a
37 .value()
38 .attr("href")
39 .ok_or(ParserError::EmptyHref(path.clone()))?;
40
41 let kind = match href
43 .split('/')
44 .next_back()
45 .and_then(|filename| filename.split('.').next())
46 {
47 Some("struct") => ItemKind::Struct,
48 Some("enum") => ItemKind::Enum,
49 Some("trait") => ItemKind::Trait,
50 Some("traits") => ItemKind::Trait,
51 Some("type") => ItemKind::TypeAlias,
52 Some("constant") => ItemKind::Constant,
53 Some("fn") => ItemKind::Function,
54 Some("macro") => ItemKind::Macro,
55 Some("union") => ItemKind::Union,
56 Some("mod") => ItemKind::Module,
57 Some("static") => ItemKind::Static,
58 Some("derive") => ItemKind::ProcDerive,
59 Some("attr") => ItemKind::ProcAttribute,
60 Some("extern_type") | Some("externtype") => ItemKind::ExternType,
61 Some("primitive") => ItemKind::Primitive,
63 Some("keyword") => ItemKind::Keyword,
64 _ => {
65 eprintln!("Warning: Could not determine ItemKind for href: {}", href);
66 continue;
67 },
68 };
69
70 let item_name = path.split("::").last().unwrap().to_string();
71 let key = format!("{}::{}", crate_name, path);
72
73 #[allow(clippy::collapsible_if)]
75 if let Some(existing_item) = items_map.get_mut(&key) {
76 if !existing_item.kinds().contains(&kind) {
77 let mut new_kinds = existing_item.kinds().clone();
78 new_kinds.push(kind);
79 *existing_item = ItemEntry::new(
80 existing_item.crate_name().clone(),
81 existing_item.item_name().clone(),
82 existing_item.path().clone(),
83 new_kinds,
84 );
85 }
86 } else {
87 items_map.entry(key).or_insert_with(|| {
88 ItemEntry::new(crate_name.to_owned(), item_name, path, vec![kind])
89 });
90 }
91 }
92
93 let mut items: Vec<ItemEntry> = items_map.into_values().collect();
94 items.sort_by(|a, b| a.path().cmp(b.path()));
95
96 match items.is_empty() {
97 true => Err(ParserError::EmptyContent),
98 false => Ok(items),
99 }
100}