1use std::collections::HashMap;
2
3use scraper::ElementRef;
4
5use crate::Result;
6
7#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
9pub struct ScrapedAriaRole {
10 pub name: String,
11
12 pub implicit_values: Option<String>,
14 pub is_abstract: bool,
15 pub base: Option<String>,
17 pub are_children_presentational: bool,
18 pub children: Vec<String>,
20 pub disallowed: Vec<String>,
22 pub inherited: Vec<String>,
24 pub must_contain: Vec<String>,
26 pub name_from: Option<String>,
27 pub is_name_required: bool,
28 pub parent: Vec<String>,
30 pub properties: Vec<String>,
32 pub related: Option<String>,
34 pub required: Vec<String>,
36 pub scope: Vec<String>,
38}
39
40#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
42pub struct ScrapedAriaProperty {
43 pub kind: PropertyKind,
44 pub name: String,
45 pub idl_name: Option<String>,
46 pub description: Option<String>,
47 pub is_global: bool,
48
49 pub applicability: Vec<String>,
51 pub descendants: Vec<String>,
53 pub related: Option<String>,
55 pub value_kind: String,
56 pub values: Vec<String>,
57}
58
59#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)]
61pub enum PropertyKind {
62 Property,
63 State,
64}
65
66#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
68pub struct ScrapedAriaElement {
69 pub id: String,
70 pub name: String,
71 pub implicit_roles: Vec<String>,
72 pub allowed_roles: Vec<String>,
73 pub allowed_properties: Vec<String>,
74 pub global: Option<String>,
75 pub checked: Option<String>,
76 pub strong: Vec<String>,
77 pub links: Vec<String>,
78}
79
80pub fn scrape_aria(spec: String) -> Result<(Vec<ScrapedAriaRole>, Vec<ScrapedAriaProperty>)> {
82 let document = scraper::Html::parse_document(&spec);
83 let roles = scrape_aria_roles(&document)?;
84 let properties = scrape_aria_properties_and_states(&document)?;
85 Ok((roles, properties))
86}
87
88pub fn scrape_html_aria(spec: String) -> Result<Vec<ScrapedAriaElement>> {
90 let document = scraper::Html::parse_document(&spec);
91 let mut specs = vec![];
92
93 let selector = scraper::Selector::parse("#docconformance").unwrap();
94 let header = document.select(&selector).next().unwrap();
95 let section = ElementRef::wrap(header.parent().unwrap()).unwrap();
96 let selector = scraper::Selector::parse("table").unwrap();
97 let table = section.select(&selector).next().unwrap();
98 let selector = scraper::Selector::parse("tbody tr").unwrap();
99 for row in table.select(&selector) {
100 let id = extract_id("th", row).unwrap().to_owned();
101 let element = extract_str("th", row).unwrap();
102 let implicit_roles = extract_vec("td:nth-child(2) a[href^=\"#index-aria-\"]", row);
103
104 let selector = scraper::Selector::parse("td:nth-child(3)").unwrap();
105 let allowances = row.select(&selector).next().unwrap();
106 let allowed_roles = extract_vec("a[href^=\"#index-aria-\"]", allowances);
107 let allowed_properties = extract_vec("a[data-cite^=\"wai-aria-1.2#aria-\"]", allowances);
108 let global = extract_str("a[data-cite=\"wai-aria-1.2#global_states\"]", allowances);
109 let checked = extract_str("a[href=\"#att-checked\"]", allowances);
110 let strong = extract_vec("strong", allowances);
111 let links = extract_vec("a:not([href]):not([data-cite])", allowances);
112
113 specs.push(ScrapedAriaElement {
114 id,
115 name: element,
116 implicit_roles,
117 allowed_roles,
118 allowed_properties,
119 global,
120 checked,
121 strong,
122 links,
123 })
124 }
125 Ok(specs)
126}
127
128fn scrape_aria_roles(document: &scraper::Html) -> Result<Vec<ScrapedAriaRole>> {
130 let mut specs = vec![];
131
132 let selector = scraper::Selector::parse(".role").unwrap();
133 for element in document.select(&selector) {
134 let Some(name) = extract_str(".role-name code", element) else {
135 continue;
136 };
137
138 let implicit_values = extract_str(".implicit-values", element);
139 let is_abstract = extract_bool(".role-abstract", element);
140 let base = extract_str(".role-base", element);
141 let are_children_presentational = extract_bool(".role-childpresentational", element);
142 let children = extract_vec(".role-children code", element);
143 let disallowed = extract_vec(".role-disallowed code", element);
144 let inherited = extract_vec(".role-inherited code", element);
145 let must_contain = extract_vec(".role-mustcontain code", element);
146 let name_from = extract_str(".role-namefrom", element);
147 let is_name_required = extract_bool(".role-namerequired", element);
148 let parent = extract_vec(".role-parent code", element);
149 let properties = extract_vec(".role-properties code", element);
150 let related = extract_str(".role-related", element);
151 let required = extract_vec(".required-properties code", element);
152 let scope = extract_vec(".role-scope code", element);
153
154 specs.push(ScrapedAriaRole {
155 name,
156 implicit_values,
157 is_abstract,
158 base,
159 are_children_presentational,
160 children,
161 disallowed,
162 inherited,
163 must_contain,
164 name_from,
165 is_name_required,
166 parent,
167 properties,
168 related,
169 required,
170 scope,
171 })
172 }
173
174 Ok(specs)
175}
176
177fn scrape_aria_properties_and_states(document: &scraper::Html) -> Result<Vec<ScrapedAriaProperty>> {
179 let mut global_properties = vec![];
180 let selector = scraper::Selector::parse("#global_states li a").unwrap();
181 for element in document.select(&selector) {
182 global_properties.push(element.value().attr("href").unwrap()[1..].to_string());
183 }
184
185 let mut descriptions = HashMap::new();
186 let dt_selector = scraper::Selector::parse("dl#index_state_prop dt").unwrap();
187 let dd_selector = scraper::Selector::parse("dl#index_state_prop dd").unwrap();
188 for (dt, dd) in document
189 .select(&dt_selector)
190 .zip(document.select(&dd_selector))
191 {
192 descriptions.insert(dt.text().collect::<String>(), dd.text().collect::<String>());
193 }
194
195 let mut idl_attribute_names = HashMap::new();
196 let selector =
197 scraper::Selector::parse("#accessibilityroleandproperties-correspondence tr").unwrap();
198 for row in document.select(&selector) {
199 if let Some(idl) = extract_str("[data-idl=\"attribute\"]", row) {
200 if let Some(property) = extract_str(".property-reference, .state-reference", row) {
201 idl_attribute_names.insert(property, idl);
202 }
203 }
204 }
205
206 let mut specs = vec![];
207
208 let selector = scraper::Selector::parse(".property, .state").unwrap();
209 for element in document.select(&selector) {
210 let Some(name) = extract_str(".property-name code, .state-name code", element) else {
211 continue;
212 };
213 let idl_name = idl_attribute_names.get(&name).cloned();
214 let description = descriptions.remove(&name);
215
216 let kind = if element.value().classes().any(|x| x == "property") {
217 PropertyKind::Property
218 } else {
219 PropertyKind::State
220 };
221
222 let is_global = global_properties.contains(&name);
223 let applicability = extract_vec(
224 ".property-applicability code, .state-applicability code",
225 element,
226 );
227 let descendants = extract_vec(
228 ".property-descendants code, .state-descendants code",
229 element,
230 );
231 let related = extract_str(".property-related, .state-related", element);
232 let value_kind = extract_str(".property-value, .state-value", element).unwrap();
233 let values = extract_vec(".value-name", element);
234
235 specs.push(ScrapedAriaProperty {
236 kind,
237 name,
238 idl_name,
239 description,
240 is_global,
241 applicability,
242 descendants,
243 related,
244 value_kind,
245 values,
246 });
247 }
248
249 Ok(specs)
250}
251
252fn extract_id<'a>(selector: &str, element: scraper::ElementRef<'a>) -> Option<&'a str> {
254 let selector = scraper::Selector::parse(selector).unwrap();
255 element
256 .select(&selector)
257 .next()
258 .and_then(|el| el.value().attr("id"))
259}
260
261fn extract_str(selector: &str, element: scraper::ElementRef) -> Option<String> {
263 let selector = scraper::Selector::parse(selector).unwrap();
264 element
265 .select(&selector)
266 .next()
267 .map(|el| el.text().collect::<String>().trim().to_owned())
268}
269
270fn extract_bool(selector: &str, element: scraper::ElementRef) -> bool {
275 let selector = scraper::Selector::parse(selector).unwrap();
276 if let Some(el) = element.select(&selector).next() {
277 if el.text().next() == Some("True") {
278 return true;
279 }
280 }
281
282 false
283}
284
285fn extract_vec(selector: &str, element: scraper::ElementRef) -> Vec<String> {
287 let selector = scraper::Selector::parse(selector).unwrap();
288 element
289 .select(&selector)
290 .map(|el| el.text().collect())
291 .collect()
292}