html_bindgen/scrape/
aria.rs

1use std::collections::HashMap;
2
3use scraper::ElementRef;
4
5use crate::Result;
6
7/// The raw role values extracted from the WAI-ARIA spec
8#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
9pub struct ScrapedAriaRole {
10    pub name: String,
11
12    /// Implicit value for role
13    pub implicit_values: Option<String>,
14    pub is_abstract: bool,
15    /// Base concept
16    pub base: Option<String>,
17    pub are_children_presentational: bool,
18    /// Subclass roles
19    pub children: Vec<String>,
20    /// Prohibited states and properties
21    pub disallowed: Vec<String>,
22    /// Inherited states and properties
23    pub inherited: Vec<String>,
24    /// Allowed accessibility child roles
25    pub must_contain: Vec<String>,
26    pub name_from: Option<String>,
27    pub is_name_required: bool,
28    /// Superclass role
29    pub parent: Vec<String>,
30    /// Supported states and properties
31    pub properties: Vec<String>,
32    /// Related concepts
33    pub related: Option<String>,
34    /// Required states and properties
35    pub required: Vec<String>,
36    /// Required accessibility parent roles
37    pub scope: Vec<String>,
38}
39
40/// The raw property and state values extracted from the WAI-ARIA spec
41#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
42pub struct ScrapedAriaProperty {
43    pub kind: PropertyKind,
44    pub name: String,
45    pub idl_name: Option<String>,
46    pub description: Option<String>,
47    pub is_global: bool,
48
49    /// Used in roles
50    pub applicability: Vec<String>,
51    /// Inherits into roles
52    pub descendants: Vec<String>,
53    /// Related concepts
54    pub related: Option<String>,
55    pub value_kind: String,
56    pub values: Vec<String>,
57}
58
59/// Whether a `ScrapedProperty` is an ARIA property or state
60#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)]
61pub enum PropertyKind {
62    Property,
63    State,
64}
65
66/// The raw element values extracted from the WAI-ARIA spec
67#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
68pub struct ScrapedAriaElement {
69    pub id: String,
70    pub name: String,
71    pub implicit_roles: Vec<String>,
72    pub allowed_roles: Vec<String>,
73    pub allowed_properties: Vec<String>,
74    pub global: Option<String>,
75    pub checked: Option<String>,
76    pub strong: Vec<String>,
77    pub links: Vec<String>,
78}
79
80/// Parse the W3C WAI-ARIA standards document.
81pub fn scrape_aria(spec: String) -> Result<(Vec<ScrapedAriaRole>, Vec<ScrapedAriaProperty>)> {
82    let document = scraper::Html::parse_document(&spec);
83    let roles = scrape_aria_roles(&document)?;
84    let properties = scrape_aria_properties_and_states(&document)?;
85    Ok((roles, properties))
86}
87
88/// Parse the W3C ARIA in HTML standards document.
89pub fn scrape_html_aria(spec: String) -> Result<Vec<ScrapedAriaElement>> {
90    let document = scraper::Html::parse_document(&spec);
91    let mut specs = vec![];
92
93    let selector = scraper::Selector::parse("#docconformance").unwrap();
94    let header = document.select(&selector).next().unwrap();
95    let section = ElementRef::wrap(header.parent().unwrap()).unwrap();
96    let selector = scraper::Selector::parse("table").unwrap();
97    let table = section.select(&selector).next().unwrap();
98    let selector = scraper::Selector::parse("tbody tr").unwrap();
99    for row in table.select(&selector) {
100        let id = extract_id("th", row).unwrap().to_owned();
101        let element = extract_str("th", row).unwrap();
102        let implicit_roles = extract_vec("td:nth-child(2) a[href^=\"#index-aria-\"]", row);
103
104        let selector = scraper::Selector::parse("td:nth-child(3)").unwrap();
105        let allowances = row.select(&selector).next().unwrap();
106        let allowed_roles = extract_vec("a[href^=\"#index-aria-\"]", allowances);
107        let allowed_properties = extract_vec("a[data-cite^=\"wai-aria-1.2#aria-\"]", allowances);
108        let global = extract_str("a[data-cite=\"wai-aria-1.2#global_states\"]", allowances);
109        let checked = extract_str("a[href=\"#att-checked\"]", allowances);
110        let strong = extract_vec("strong", allowances);
111        let links = extract_vec("a:not([href]):not([data-cite])", allowances);
112
113        specs.push(ScrapedAriaElement {
114            id,
115            name: element,
116            implicit_roles,
117            allowed_roles,
118            allowed_properties,
119            global,
120            checked,
121            strong,
122            links,
123        })
124    }
125    Ok(specs)
126}
127
128/// Scrape the ARIA role definitions
129fn scrape_aria_roles(document: &scraper::Html) -> Result<Vec<ScrapedAriaRole>> {
130    let mut specs = vec![];
131
132    let selector = scraper::Selector::parse(".role").unwrap();
133    for element in document.select(&selector) {
134        let Some(name) = extract_str(".role-name code", element) else {
135            continue;
136        };
137
138        let implicit_values = extract_str(".implicit-values", element);
139        let is_abstract = extract_bool(".role-abstract", element);
140        let base = extract_str(".role-base", element);
141        let are_children_presentational = extract_bool(".role-childpresentational", element);
142        let children = extract_vec(".role-children code", element);
143        let disallowed = extract_vec(".role-disallowed code", element);
144        let inherited = extract_vec(".role-inherited code", element);
145        let must_contain = extract_vec(".role-mustcontain code", element);
146        let name_from = extract_str(".role-namefrom", element);
147        let is_name_required = extract_bool(".role-namerequired", element);
148        let parent = extract_vec(".role-parent code", element);
149        let properties = extract_vec(".role-properties code", element);
150        let related = extract_str(".role-related", element);
151        let required = extract_vec(".required-properties code", element);
152        let scope = extract_vec(".role-scope code", element);
153
154        specs.push(ScrapedAriaRole {
155            name,
156            implicit_values,
157            is_abstract,
158            base,
159            are_children_presentational,
160            children,
161            disallowed,
162            inherited,
163            must_contain,
164            name_from,
165            is_name_required,
166            parent,
167            properties,
168            related,
169            required,
170            scope,
171        })
172    }
173
174    Ok(specs)
175}
176
177/// Scrape the ARIA property and state definitions
178fn scrape_aria_properties_and_states(document: &scraper::Html) -> Result<Vec<ScrapedAriaProperty>> {
179    let mut global_properties = vec![];
180    let selector = scraper::Selector::parse("#global_states li a").unwrap();
181    for element in document.select(&selector) {
182        global_properties.push(element.value().attr("href").unwrap()[1..].to_string());
183    }
184
185    let mut descriptions = HashMap::new();
186    let dt_selector = scraper::Selector::parse("dl#index_state_prop dt").unwrap();
187    let dd_selector = scraper::Selector::parse("dl#index_state_prop dd").unwrap();
188    for (dt, dd) in document
189        .select(&dt_selector)
190        .zip(document.select(&dd_selector))
191    {
192        descriptions.insert(dt.text().collect::<String>(), dd.text().collect::<String>());
193    }
194
195    let mut idl_attribute_names = HashMap::new();
196    let selector =
197        scraper::Selector::parse("#accessibilityroleandproperties-correspondence tr").unwrap();
198    for row in document.select(&selector) {
199        if let Some(idl) = extract_str("[data-idl=\"attribute\"]", row) {
200            if let Some(property) = extract_str(".property-reference, .state-reference", row) {
201                idl_attribute_names.insert(property, idl);
202            }
203        }
204    }
205
206    let mut specs = vec![];
207
208    let selector = scraper::Selector::parse(".property, .state").unwrap();
209    for element in document.select(&selector) {
210        let Some(name) = extract_str(".property-name code, .state-name code", element) else {
211            continue;
212        };
213        let idl_name = idl_attribute_names.get(&name).cloned();
214        let description = descriptions.remove(&name);
215
216        let kind = if element.value().classes().any(|x| x == "property") {
217            PropertyKind::Property
218        } else {
219            PropertyKind::State
220        };
221
222        let is_global = global_properties.contains(&name);
223        let applicability = extract_vec(
224            ".property-applicability code, .state-applicability code",
225            element,
226        );
227        let descendants = extract_vec(
228            ".property-descendants code, .state-descendants code",
229            element,
230        );
231        let related = extract_str(".property-related, .state-related", element);
232        let value_kind = extract_str(".property-value, .state-value", element).unwrap();
233        let values = extract_vec(".value-name", element);
234
235        specs.push(ScrapedAriaProperty {
236            kind,
237            name,
238            idl_name,
239            description,
240            is_global,
241            applicability,
242            descendants,
243            related,
244            value_kind,
245            values,
246        });
247    }
248
249    Ok(specs)
250}
251
252/// Attempt to extract the id attribute of `selector` from `element`.
253fn extract_id<'a>(selector: &str, element: scraper::ElementRef<'a>) -> Option<&'a str> {
254    let selector = scraper::Selector::parse(selector).unwrap();
255    element
256        .select(&selector)
257        .next()
258        .and_then(|el| el.value().attr("id"))
259}
260
261/// Attempt to extract the text content of `selector` from `element`.
262fn extract_str(selector: &str, element: scraper::ElementRef) -> Option<String> {
263    let selector = scraper::Selector::parse(selector).unwrap();
264    element
265        .select(&selector)
266        .next()
267        .map(|el| el.text().collect::<String>().trim().to_owned())
268}
269
270/// Extract a boolean value from `element` using `selector`.
271///
272/// If the selector matches an element and that element's text content is "True" then
273/// return `true`, else return `false`.
274fn extract_bool(selector: &str, element: scraper::ElementRef) -> bool {
275    let selector = scraper::Selector::parse(selector).unwrap();
276    if let Some(el) = element.select(&selector).next() {
277        if el.text().next() == Some("True") {
278            return true;
279        }
280    }
281
282    false
283}
284
285/// Extract a list of `String`s from `element` using `selector`
286fn extract_vec(selector: &str, element: scraper::ElementRef) -> Vec<String> {
287    let selector = scraper::Selector::parse(selector).unwrap();
288    element
289        .select(&selector)
290        .map(|el| el.text().collect())
291        .collect()
292}