use std::collections::HashMap;
use scraper::ElementRef;
use crate::Result;
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct ScrapedAriaRole {
pub name: String,
pub implicit_values: Option<String>,
pub is_abstract: bool,
pub base: Option<String>,
pub are_children_presentational: bool,
pub children: Vec<String>,
pub disallowed: Vec<String>,
pub inherited: Vec<String>,
pub must_contain: Vec<String>,
pub name_from: Option<String>,
pub is_name_required: bool,
pub parent: Vec<String>,
pub properties: Vec<String>,
pub related: Option<String>,
pub required: Vec<String>,
pub scope: Vec<String>,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct ScrapedAriaProperty {
pub kind: PropertyKind,
pub name: String,
pub idl_name: Option<String>,
pub description: Option<String>,
pub is_global: bool,
pub applicability: Vec<String>,
pub descendants: Vec<String>,
pub related: Option<String>,
pub value_kind: String,
pub values: Vec<String>,
}
#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)]
pub enum PropertyKind {
Property,
State,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct ScrapedAriaElement {
pub id: String,
pub name: String,
pub implicit_roles: Vec<String>,
pub allowed_roles: Vec<String>,
pub allowed_properties: Vec<String>,
pub global: Option<String>,
pub checked: Option<String>,
pub strong: Vec<String>,
pub links: Vec<String>,
}
pub fn scrape_aria(spec: String) -> Result<(Vec<ScrapedAriaRole>, Vec<ScrapedAriaProperty>)> {
let document = scraper::Html::parse_document(&spec);
let roles = scrape_aria_roles(&document)?;
let properties = scrape_aria_properties_and_states(&document)?;
Ok((roles, properties))
}
pub fn scrape_html_aria(spec: String) -> Result<Vec<ScrapedAriaElement>> {
let document = scraper::Html::parse_document(&spec);
let mut specs = vec![];
let selector = scraper::Selector::parse("#docconformance").unwrap();
let header = document.select(&selector).next().unwrap();
let section = ElementRef::wrap(header.parent().unwrap()).unwrap();
let selector = scraper::Selector::parse("table").unwrap();
let table = section.select(&selector).next().unwrap();
let selector = scraper::Selector::parse("tbody tr").unwrap();
for row in table.select(&selector) {
let id = extract_id("th", row).unwrap().to_owned();
let element = extract_str("th", row).unwrap();
let implicit_roles = extract_vec("td:nth-child(2) a[href^=\"#index-aria-\"]", row);
let selector = scraper::Selector::parse("td:nth-child(3)").unwrap();
let allowances = row.select(&selector).next().unwrap();
let allowed_roles = extract_vec("a[href^=\"#index-aria-\"]", allowances);
let allowed_properties = extract_vec("a[data-cite^=\"wai-aria-1.2#aria-\"]", allowances);
let global = extract_str("a[data-cite=\"wai-aria-1.2#global_states\"]", allowances);
let checked = extract_str("a[href=\"#att-checked\"]", allowances);
let strong = extract_vec("strong", allowances);
let links = extract_vec("a:not([href]):not([data-cite])", allowances);
specs.push(ScrapedAriaElement {
id,
name: element,
implicit_roles,
allowed_roles,
allowed_properties,
global,
checked,
strong,
links,
})
}
Ok(specs)
}
fn scrape_aria_roles(document: &scraper::Html) -> Result<Vec<ScrapedAriaRole>> {
let mut specs = vec![];
let selector = scraper::Selector::parse(".role").unwrap();
for element in document.select(&selector) {
let Some(name) = extract_str(".role-name code", element) else {
continue;
};
let implicit_values = extract_str(".implicit-values", element);
let is_abstract = extract_bool(".role-abstract", element);
let base = extract_str(".role-base", element);
let are_children_presentational = extract_bool(".role-childpresentational", element);
let children = extract_vec(".role-children code", element);
let disallowed = extract_vec(".role-disallowed code", element);
let inherited = extract_vec(".role-inherited code", element);
let must_contain = extract_vec(".role-mustcontain code", element);
let name_from = extract_str(".role-namefrom", element);
let is_name_required = extract_bool(".role-namerequired", element);
let parent = extract_vec(".role-parent code", element);
let properties = extract_vec(".role-properties code", element);
let related = extract_str(".role-related", element);
let required = extract_vec(".required-properties code", element);
let scope = extract_vec(".role-scope code", element);
specs.push(ScrapedAriaRole {
name,
implicit_values,
is_abstract,
base,
are_children_presentational,
children,
disallowed,
inherited,
must_contain,
name_from,
is_name_required,
parent,
properties,
related,
required,
scope,
})
}
Ok(specs)
}
fn scrape_aria_properties_and_states(document: &scraper::Html) -> Result<Vec<ScrapedAriaProperty>> {
let mut global_properties = vec![];
let selector = scraper::Selector::parse("#global_states li a").unwrap();
for element in document.select(&selector) {
global_properties.push(element.value().attr("href").unwrap()[1..].to_string());
}
let mut descriptions = HashMap::new();
let dt_selector = scraper::Selector::parse("dl#index_state_prop dt").unwrap();
let dd_selector = scraper::Selector::parse("dl#index_state_prop dd").unwrap();
for (dt, dd) in document
.select(&dt_selector)
.zip(document.select(&dd_selector))
{
descriptions.insert(dt.text().collect::<String>(), dd.text().collect::<String>());
}
let mut idl_attribute_names = HashMap::new();
let selector =
scraper::Selector::parse("#accessibilityroleandproperties-correspondence tr").unwrap();
for row in document.select(&selector) {
if let Some(idl) = extract_str("[data-idl=\"attribute\"]", row) {
if let Some(property) = extract_str(".property-reference, .state-reference", row) {
idl_attribute_names.insert(property, idl);
}
}
}
let mut specs = vec![];
let selector = scraper::Selector::parse(".property, .state").unwrap();
for element in document.select(&selector) {
let Some(name) = extract_str(".property-name code, .state-name code", element) else {
continue;
};
let idl_name = idl_attribute_names.get(&name).cloned();
let description = descriptions.remove(&name);
let kind = if element.value().classes().any(|x| x == "property") {
PropertyKind::Property
} else {
PropertyKind::State
};
let is_global = global_properties.contains(&name);
let applicability = extract_vec(
".property-applicability code, .state-applicability code",
element,
);
let descendants = extract_vec(
".property-descendants code, .state-descendants code",
element,
);
let related = extract_str(".property-related, .state-related", element);
let value_kind = extract_str(".property-value, .state-value", element).unwrap();
let values = extract_vec(".value-name", element);
specs.push(ScrapedAriaProperty {
kind,
name,
idl_name,
description,
is_global,
applicability,
descendants,
related,
value_kind,
values,
});
}
Ok(specs)
}
fn extract_id<'a>(selector: &str, element: scraper::ElementRef<'a>) -> Option<&'a str> {
let selector = scraper::Selector::parse(selector).unwrap();
element
.select(&selector)
.next()
.and_then(|el| el.value().attr("id"))
}
fn extract_str(selector: &str, element: scraper::ElementRef) -> Option<String> {
let selector = scraper::Selector::parse(selector).unwrap();
element
.select(&selector)
.next()
.map(|el| el.text().collect::<String>().trim().to_owned())
}
fn extract_bool(selector: &str, element: scraper::ElementRef) -> bool {
let selector = scraper::Selector::parse(selector).unwrap();
if let Some(el) = element.select(&selector).next() {
if el.text().next() == Some("True") {
return true;
}
}
false
}
fn extract_vec(selector: &str, element: scraper::ElementRef) -> Vec<String> {
let selector = scraper::Selector::parse(selector).unwrap();
element
.select(&selector)
.map(|el| el.text().collect())
.collect()
}