html_bindgen/parse/elements/
mod.rs1use crate::scrape::ScrapedElement;
2use crate::Result;
3use convert_case::{Case, Casing};
4use serde::{Deserialize, Serialize};
5
6use super::{Attribute, AttributeType, ParsedCategory, ParsedRelationship};
7use categories::parse_categories;
8
9mod categories;
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ParsedElement {
14 pub tag_name: String,
15 pub struct_name: String,
16 pub submodule_name: String,
17 pub mdn_link: String,
18 pub has_global_attributes: bool,
19 pub has_closing_tag: bool,
20 pub attributes: Vec<Attribute>,
21 pub dom_interface: String,
22 pub content_categories: Vec<ParsedCategory>,
23 pub permitted_content: Vec<ParsedRelationship>,
24 pub permitted_parents: Vec<ParsedRelationship>,
25}
26
27pub fn parse_elements(
28 scraped_iter: impl Iterator<Item = Result<ScrapedElement>>,
29) -> Result<Vec<ParsedElement>> {
30 let mut scraped = vec![];
31 for el in scraped_iter {
32 let el = el?;
33 scraped.push(el);
34 }
35
36 let mut tag_names = vec![];
37 for scraped in scraped.iter().cloned() {
38 tag_names.push(scraped.tag_name.to_owned());
40 }
41 let mut output = vec![];
42 for scraped in scraped {
43 let tag_name = scraped.tag_name;
44 let struct_name = parse_struct_name(&tag_name);
45 let (has_global_attributes, attributes) = parse_attrs(scraped.content_attributes);
46 let dom_interface = parse_dom_interface(&scraped.dom_interface);
47 let mut permitted_parents = parse_relationships(&scraped.contexts, &tag_names);
48 append_super_categories(&mut permitted_parents);
49 output.push(ParsedElement {
50 struct_name,
51 dom_interface,
52 has_closing_tag: parse_tags(scraped.tag_omission),
53 attributes,
54 has_global_attributes,
55 submodule_name: parse_kinds(scraped.submodule_name),
56 mdn_link: parse_mdn_link(&tag_name),
57 content_categories: parse_content_categories(&scraped.categories),
58 permitted_content: parse_relationships(&scraped.content_model, &tag_names),
59 permitted_parents,
60 tag_name,
61 });
62 }
63 Ok(output)
64}
65
66fn parse_mdn_link(tag_name: &str) -> String {
67 format!("https://developer.mozilla.org/en-US/docs/Web/HTML/Element/{tag_name}")
68}
69
70pub fn parse_struct_name(tag_name: &str) -> String {
71 match tag_name {
72 "a" => "Anchor".to_owned(),
73 "abbr" => "Abbreviation".to_owned(),
74 "area" => "ImageMapArea".to_owned(),
75 "b" => "Bold".to_owned(),
76 "bdi" => "BidirectionalIsolate".to_owned(),
77 "bdo" => "BidirectionalTextOverride".to_owned(),
78 "bgsound" => "BackgroundSound".to_owned(),
79 "blockquote" => "BlockQuote".to_owned(),
80 "br" => "LineBreak".to_owned(),
81 "col" => "TableColumn".to_owned(),
82 "colgroup" => "TableColumnGroup".to_owned(),
83 "datalist" => "DataList".to_owned(),
84 "dd" => "DescriptionDetails".to_owned(),
85 "del" => "DeletedText".to_owned(),
86 "dfn" => "Definition".to_owned(),
87 "dir" => "Directory".to_owned(),
88 "div" => "Division".to_owned(),
89 "dl" => "DescriptionList".to_owned(),
90 "dt" => "DescriptionTerm".to_owned(),
91 "em" => "Emphasis".to_owned(),
92 "figcaption" => "FigureCaption".to_owned(),
93 "hgroup" => "HeadingGroup".to_owned(),
94 "h1" => "Heading1".to_owned(),
95 "h2" => "Heading2".to_owned(),
96 "h3" => "Heading3".to_owned(),
97 "h4" => "Heading4".to_owned(),
98 "h5" => "Heading5".to_owned(),
99 "h6" => "Heading6".to_owned(),
100 "hr" => "ThematicBreak".to_owned(),
101 "i" => "Italic".to_owned(),
102 "img" => "Image".to_owned(),
103 "ins" => "InsertedText".to_owned(),
104 "kbd" => "KeyboardInput".to_owned(),
105 "li" => "ListItem".to_owned(),
106 "map" => "ImageMap".to_owned(),
107 "mark" => "MarkText".to_owned(),
108 "nav" => "Navigation".to_owned(),
109 "nobr" => "NonBreakingText".to_owned(),
110 "noscript" => "NoScript".to_owned(),
111 "ol" => "OrderedList".to_owned(),
112 "optgroup" => "OptionGroup".to_owned(),
113 "p" => "Paragraph".to_owned(),
114 "pre" => "PreformattedText".to_owned(),
115 "q" => "Quotation".to_owned(),
116 "rp" => "RubyFallbackParenthesis".to_owned(),
117 "rt" => "RubyText".to_owned(),
118 "rtc" => "RubyTextContainer".to_owned(),
119 "ruby" => "RubyAnnotation".to_owned(),
120 "s" => "StrikeThrough".to_owned(),
121 "samp" => "SampleOutput".to_owned(),
122 "small" => "SideComment".to_owned(),
123 "source" => "MediaSource".to_owned(),
124 "span" => "Span".to_owned(),
125 "sub" => "SubScript".to_owned(),
126 "sup" => "SuperScript".to_owned(),
127 "textarea" => "TextArea".to_owned(),
128 "tbody" => "TableBody".to_owned(),
129 "td" => "TableCell".to_owned(),
130 "tfoot" => "TableFoot".to_owned(),
131 "th" => "TableHeader".to_owned(),
132 "thead" => "TableHead".to_owned(),
133 "tr" => "TableRow".to_owned(),
134 "track" => "TextTrack".to_owned(),
135 "tt" => "TeletypeText".to_owned(),
136 "u" => "Underline".to_owned(),
137 "ul" => "UnorderedList".to_owned(),
138 "var" => "Variable".to_owned(),
139 "wbr" => "LineBreakOpportunity".to_owned(),
140 other => other.to_case(Case::UpperCamel),
141 }
142}
143
144fn parse_tags(input: Vec<String>) -> bool {
145 let s = input.join("");
146 match &*s {
147 "Neither tag is omissible." | "" => true,
148 "No end tag." => false,
149 _ => true,
153 }
154}
155
156fn parse_attrs(content_attributes: Vec<String>) -> (bool, Vec<Attribute>) {
157 let mut has_global_attributes = false;
158 let mut output = vec![];
159 for s in content_attributes {
160 if s == "Global attributes" {
161 has_global_attributes = true;
162 continue;
163 } else if !s.contains("—") {
164 continue;
165 }
166 let mut iter = s.split("—");
167 let name = iter.next().unwrap().trim().to_owned();
168 let description = iter.next().unwrap().trim().to_owned();
169
170 let (name, description) = match name.as_str() {
173 "If the element is not a child of an ul or menu element: value" => {
174 ("value".to_owned(), format!("{description}. Only if the element is not a child of an `ul` or `menu` element."))
175 }
176 _ => if let Some((name, condition)) = name.split_once(" ") {
177 (name.to_owned(), format!("{description} {condition}"))
178 } else {
179 (name, description)
180 }
181 };
182
183 let field_name = super::normalize_field_name(&name);
185
186 output.push(Attribute {
187 ty: AttributeType::String,
188 name,
189 description,
190 field_name,
191 });
192 }
193 (has_global_attributes, output)
194}
195
196fn parse_kinds(kind: String) -> String {
197 let s = match kind.as_str() {
198 "the-root-element" => "root",
199 "interactive-elements" => "interactive",
200 "grouping-content" => "text",
201 "text-level-semantics" => "text",
202 "document-metadata" => "metadata",
203 "embedded-content" => "embedded",
204 "forms" => "forms",
205 "tables" => "tables",
206 "sections" => "sections",
207 "edits" => "edits",
208 "scripting-3" => "scripting",
209 other => panic!("unknown category: {other}"),
210 };
211 s.to_owned()
212}
213
214fn parse_content_categories(categories: &[String]) -> Vec<ParsedCategory> {
215 let mut cat_output = vec![];
216 for line in categories {
217 for line in parse_categories(line.as_str()) {
218 match line.as_str() {
219 "metadata" => cat_output.push(ParsedCategory::Metadata),
220 "flow" => cat_output.push(ParsedCategory::Flow),
221 "sectioning" => cat_output.push(ParsedCategory::Sectioning),
222 "heading" => cat_output.push(ParsedCategory::Heading),
223 "phrasing" => cat_output.push(ParsedCategory::Phrasing),
224 "embedded" => cat_output.push(ParsedCategory::Embedded),
225 "interactive" => cat_output.push(ParsedCategory::Interactive),
226 "palpable" => cat_output.push(ParsedCategory::Palpable),
227 "transparent" => cat_output.push(ParsedCategory::Transparent),
228 "script-supporting" => cat_output.push(ParsedCategory::ScriptSupporting),
229 other => eprintln!("unknown content kind: {other}"),
230 }
231 }
232 }
233
234 cat_output.dedup();
235 cat_output.sort();
236 cat_output
237}
238
239fn parse_relationships(categories: &[String], tag_names: &[String]) -> Vec<ParsedRelationship> {
240 let mut cat_output = vec![];
241 for line in categories {
242 for line in parse_categories(line.as_str()) {
243 match line.as_str() {
244 "metadata" => cat_output.push(ParsedCategory::Metadata.into()),
245 "flow" => cat_output.push(ParsedCategory::Flow.into()),
246 "sectioning" => cat_output.push(ParsedCategory::Sectioning.into()),
247 "heading" => cat_output.push(ParsedCategory::Heading.into()),
248 "phrasing" => cat_output.push(ParsedCategory::Phrasing.into()),
249 "embedded" => cat_output.push(ParsedCategory::Embedded.into()),
250 "interactive" => cat_output.push(ParsedCategory::Interactive.into()),
251 "palpable" => cat_output.push(ParsedCategory::Palpable.into()),
252 "transparent" => cat_output.push(ParsedCategory::Transparent.into()),
253 "script-supporting" => cat_output.push(ParsedCategory::ScriptSupporting.into()),
254 tag_name => {
255 if tag_names.contains(&tag_name.to_owned()) {
256 cat_output.push(ParsedRelationship::Element(parse_struct_name(tag_name)));
257 } else if tag_name == "text" {
258 cat_output.push(ParsedRelationship::Element(parse_struct_name("Text")));
259 } else {
260 eprintln!("unknown tag name: {tag_name}");
261 }
262 }
263 }
264 }
265 }
266
267 cat_output.sort();
268 cat_output.dedup();
269 cat_output
270}
271
272fn append_super_categories(cat_output: &mut Vec<ParsedRelationship>) {
273 let mut additional_output = vec![];
274
275 for relationship in &*cat_output {
276 if let ParsedRelationship::Category(category) = relationship {
277 match category {
278 ParsedCategory::Heading
279 | ParsedCategory::Sectioning
280 | ParsedCategory::Phrasing
281 | ParsedCategory::Interactive => {
282 additional_output.push(ParsedCategory::Flow.into());
283 }
284 ParsedCategory::Embedded => {
285 additional_output.push(ParsedCategory::Phrasing.into());
286 additional_output.push(ParsedCategory::Flow.into());
287 }
288 ParsedCategory::Transparent => {
289 additional_output.push(ParsedCategory::Flow.into());
291 }
292 _ => continue,
293 }
294 }
295 }
296
297 cat_output.append(&mut additional_output);
298 cat_output.sort();
299 cat_output.dedup();
300}
301
302fn parse_dom_interface(lines: &[String]) -> String {
304 let line = lines.get(0).as_deref().unwrap().clone();
305
306 if line.starts_with("Uses") {
307 let line = line.strip_prefix("Uses").unwrap();
308 let line = line.strip_suffix(".").unwrap();
309 line.trim().to_owned()
310 } else if line.starts_with("Use") {
311 let line = line.strip_prefix("Use").unwrap();
312 let line = line.strip_suffix(".").unwrap();
313 line.trim().to_owned()
314 } else {
315 crate::utils::extract_webidl_name(&line).unwrap()
316 }
317}