html_bindgen/parse/elements/
mod.rs

1use crate::scrape::ScrapedElement;
2use crate::Result;
3use convert_case::{Case, Casing};
4use serde::{Deserialize, Serialize};
5
6use super::{Attribute, AttributeType, ParsedCategory, ParsedRelationship};
7use categories::parse_categories;
8
9mod categories;
10
11/// The parsed values converted from the raw spec
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ParsedElement {
14    pub tag_name: String,
15    pub struct_name: String,
16    pub submodule_name: String,
17    pub mdn_link: String,
18    pub has_global_attributes: bool,
19    pub has_closing_tag: bool,
20    pub attributes: Vec<Attribute>,
21    pub dom_interface: String,
22    pub content_categories: Vec<ParsedCategory>,
23    pub permitted_content: Vec<ParsedRelationship>,
24    pub permitted_parents: Vec<ParsedRelationship>,
25}
26
27pub fn parse_elements(
28    scraped_iter: impl Iterator<Item = Result<ScrapedElement>>,
29) -> Result<Vec<ParsedElement>> {
30    let mut scraped = vec![];
31    for el in scraped_iter {
32        let el = el?;
33        scraped.push(el);
34    }
35
36    let mut tag_names = vec![];
37    for scraped in scraped.iter().cloned() {
38        // let struct_name = parse_struct_name(&scraped.tag_name);
39        tag_names.push(scraped.tag_name.to_owned());
40    }
41    let mut output = vec![];
42    for scraped in scraped {
43        let tag_name = scraped.tag_name;
44        let struct_name = parse_struct_name(&tag_name);
45        let (has_global_attributes, attributes) = parse_attrs(scraped.content_attributes);
46        let dom_interface = parse_dom_interface(&scraped.dom_interface);
47        let mut permitted_parents = parse_relationships(&scraped.contexts, &tag_names);
48        append_super_categories(&mut permitted_parents);
49        output.push(ParsedElement {
50            struct_name,
51            dom_interface,
52            has_closing_tag: parse_tags(scraped.tag_omission),
53            attributes,
54            has_global_attributes,
55            submodule_name: parse_kinds(scraped.submodule_name),
56            mdn_link: parse_mdn_link(&tag_name),
57            content_categories: parse_content_categories(&scraped.categories),
58            permitted_content: parse_relationships(&scraped.content_model, &tag_names),
59            permitted_parents,
60            tag_name,
61        });
62    }
63    Ok(output)
64}
65
66fn parse_mdn_link(tag_name: &str) -> String {
67    format!("https://developer.mozilla.org/en-US/docs/Web/HTML/Element/{tag_name}")
68}
69
70pub fn parse_struct_name(tag_name: &str) -> String {
71    match tag_name {
72        "a" => "Anchor".to_owned(),
73        "abbr" => "Abbreviation".to_owned(),
74        "area" => "ImageMapArea".to_owned(),
75        "b" => "Bold".to_owned(),
76        "bdi" => "BidirectionalIsolate".to_owned(),
77        "bdo" => "BidirectionalTextOverride".to_owned(),
78        "bgsound" => "BackgroundSound".to_owned(),
79        "blockquote" => "BlockQuote".to_owned(),
80        "br" => "LineBreak".to_owned(),
81        "col" => "TableColumn".to_owned(),
82        "colgroup" => "TableColumnGroup".to_owned(),
83        "datalist" => "DataList".to_owned(),
84        "dd" => "DescriptionDetails".to_owned(),
85        "del" => "DeletedText".to_owned(),
86        "dfn" => "Definition".to_owned(),
87        "dir" => "Directory".to_owned(),
88        "div" => "Division".to_owned(),
89        "dl" => "DescriptionList".to_owned(),
90        "dt" => "DescriptionTerm".to_owned(),
91        "em" => "Emphasis".to_owned(),
92        "figcaption" => "FigureCaption".to_owned(),
93        "hgroup" => "HeadingGroup".to_owned(),
94        "h1" => "Heading1".to_owned(),
95        "h2" => "Heading2".to_owned(),
96        "h3" => "Heading3".to_owned(),
97        "h4" => "Heading4".to_owned(),
98        "h5" => "Heading5".to_owned(),
99        "h6" => "Heading6".to_owned(),
100        "hr" => "ThematicBreak".to_owned(),
101        "i" => "Italic".to_owned(),
102        "img" => "Image".to_owned(),
103        "ins" => "InsertedText".to_owned(),
104        "kbd" => "KeyboardInput".to_owned(),
105        "li" => "ListItem".to_owned(),
106        "map" => "ImageMap".to_owned(),
107        "mark" => "MarkText".to_owned(),
108        "nav" => "Navigation".to_owned(),
109        "nobr" => "NonBreakingText".to_owned(),
110        "noscript" => "NoScript".to_owned(),
111        "ol" => "OrderedList".to_owned(),
112        "optgroup" => "OptionGroup".to_owned(),
113        "p" => "Paragraph".to_owned(),
114        "pre" => "PreformattedText".to_owned(),
115        "q" => "Quotation".to_owned(),
116        "rp" => "RubyFallbackParenthesis".to_owned(),
117        "rt" => "RubyText".to_owned(),
118        "rtc" => "RubyTextContainer".to_owned(),
119        "ruby" => "RubyAnnotation".to_owned(),
120        "s" => "StrikeThrough".to_owned(),
121        "samp" => "SampleOutput".to_owned(),
122        "small" => "SideComment".to_owned(),
123        "source" => "MediaSource".to_owned(),
124        "span" => "Span".to_owned(),
125        "sub" => "SubScript".to_owned(),
126        "sup" => "SuperScript".to_owned(),
127        "textarea" => "TextArea".to_owned(),
128        "tbody" => "TableBody".to_owned(),
129        "td" => "TableCell".to_owned(),
130        "tfoot" => "TableFoot".to_owned(),
131        "th" => "TableHeader".to_owned(),
132        "thead" => "TableHead".to_owned(),
133        "tr" => "TableRow".to_owned(),
134        "track" => "TextTrack".to_owned(),
135        "tt" => "TeletypeText".to_owned(),
136        "u" => "Underline".to_owned(),
137        "ul" => "UnorderedList".to_owned(),
138        "var" => "Variable".to_owned(),
139        "wbr" => "LineBreakOpportunity".to_owned(),
140        other => other.to_case(Case::UpperCamel),
141    }
142}
143
144fn parse_tags(input: Vec<String>) -> bool {
145    let s = input.join("");
146    match &*s {
147        "Neither tag is omissible." | "" => true,
148        "No end tag." => false,
149        // NOTE: There are a bunch of conditional cases which allow omitting end tags
150        // but for the sake of convenience we just don't bother with any of those.
151        // That's mostly important for parsers, which we're not defining here.
152        _ => true,
153    }
154}
155
156fn parse_attrs(content_attributes: Vec<String>) -> (bool, Vec<Attribute>) {
157    let mut has_global_attributes = false;
158    let mut output = vec![];
159    for s in content_attributes {
160        if s == "Global attributes" {
161            has_global_attributes = true;
162            continue;
163        } else if !s.contains("—") {
164            continue;
165        }
166        let mut iter = s.split("—");
167        let name = iter.next().unwrap().trim().to_owned();
168        let description = iter.next().unwrap().trim().to_owned();
169
170        // Add conditional attributes and document their conditionality.
171        // This probably won't be the final way this is done as this loses some of the type-safety guarantees.
172        let (name, description) = match name.as_str() {
173            "If the element is not a child of an ul or menu element: value" => {
174                ("value".to_owned(), format!("{description}. Only if the element is not a child of an `ul` or `menu` element."))
175            }
176            _ => if let Some((name, condition)) = name.split_once(" ") {
177                (name.to_owned(), format!("{description} {condition}"))
178            } else {
179                (name, description)
180            }
181        };
182
183        // Rename attributes which are labeled after keywords
184        let field_name = super::normalize_field_name(&name);
185
186        output.push(Attribute {
187            ty: AttributeType::String,
188            name,
189            description,
190            field_name,
191        });
192    }
193    (has_global_attributes, output)
194}
195
196fn parse_kinds(kind: String) -> String {
197    let s = match kind.as_str() {
198        "the-root-element" => "root",
199        "interactive-elements" => "interactive",
200        "grouping-content" => "text",
201        "text-level-semantics" => "text",
202        "document-metadata" => "metadata",
203        "embedded-content" => "embedded",
204        "forms" => "forms",
205        "tables" => "tables",
206        "sections" => "sections",
207        "edits" => "edits",
208        "scripting-3" => "scripting",
209        other => panic!("unknown category: {other}"),
210    };
211    s.to_owned()
212}
213
214fn parse_content_categories(categories: &[String]) -> Vec<ParsedCategory> {
215    let mut cat_output = vec![];
216    for line in categories {
217        for line in parse_categories(line.as_str()) {
218            match line.as_str() {
219                "metadata" => cat_output.push(ParsedCategory::Metadata),
220                "flow" => cat_output.push(ParsedCategory::Flow),
221                "sectioning" => cat_output.push(ParsedCategory::Sectioning),
222                "heading" => cat_output.push(ParsedCategory::Heading),
223                "phrasing" => cat_output.push(ParsedCategory::Phrasing),
224                "embedded" => cat_output.push(ParsedCategory::Embedded),
225                "interactive" => cat_output.push(ParsedCategory::Interactive),
226                "palpable" => cat_output.push(ParsedCategory::Palpable),
227                "transparent" => cat_output.push(ParsedCategory::Transparent),
228                "script-supporting" => cat_output.push(ParsedCategory::ScriptSupporting),
229                other => eprintln!("unknown content kind: {other}"),
230            }
231        }
232    }
233
234    cat_output.dedup();
235    cat_output.sort();
236    cat_output
237}
238
239fn parse_relationships(categories: &[String], tag_names: &[String]) -> Vec<ParsedRelationship> {
240    let mut cat_output = vec![];
241    for line in categories {
242        for line in parse_categories(line.as_str()) {
243            match line.as_str() {
244                "metadata" => cat_output.push(ParsedCategory::Metadata.into()),
245                "flow" => cat_output.push(ParsedCategory::Flow.into()),
246                "sectioning" => cat_output.push(ParsedCategory::Sectioning.into()),
247                "heading" => cat_output.push(ParsedCategory::Heading.into()),
248                "phrasing" => cat_output.push(ParsedCategory::Phrasing.into()),
249                "embedded" => cat_output.push(ParsedCategory::Embedded.into()),
250                "interactive" => cat_output.push(ParsedCategory::Interactive.into()),
251                "palpable" => cat_output.push(ParsedCategory::Palpable.into()),
252                "transparent" => cat_output.push(ParsedCategory::Transparent.into()),
253                "script-supporting" => cat_output.push(ParsedCategory::ScriptSupporting.into()),
254                tag_name => {
255                    if tag_names.contains(&tag_name.to_owned()) {
256                        cat_output.push(ParsedRelationship::Element(parse_struct_name(tag_name)));
257                    } else if tag_name == "text" {
258                        cat_output.push(ParsedRelationship::Element(parse_struct_name("Text")));
259                    } else {
260                        eprintln!("unknown tag name: {tag_name}");
261                    }
262                }
263            }
264        }
265    }
266
267    cat_output.sort();
268    cat_output.dedup();
269    cat_output
270}
271
272fn append_super_categories(cat_output: &mut Vec<ParsedRelationship>) {
273    let mut additional_output = vec![];
274
275    for relationship in &*cat_output {
276        if let ParsedRelationship::Category(category) = relationship {
277            match category {
278                ParsedCategory::Heading
279                | ParsedCategory::Sectioning
280                | ParsedCategory::Phrasing
281                | ParsedCategory::Interactive => {
282                    additional_output.push(ParsedCategory::Flow.into());
283                }
284                ParsedCategory::Embedded => {
285                    additional_output.push(ParsedCategory::Phrasing.into());
286                    additional_output.push(ParsedCategory::Flow.into());
287                }
288                ParsedCategory::Transparent => {
289                    // NOTE(yosh): Sure, why not.
290                    additional_output.push(ParsedCategory::Flow.into());
291                }
292                _ => continue,
293            }
294        }
295    }
296
297    cat_output.append(&mut additional_output);
298    cat_output.sort();
299    cat_output.dedup();
300}
301
302/// Find out which WebIDL interface this element relies on.
303fn parse_dom_interface(lines: &[String]) -> String {
304    let line = lines.get(0).as_deref().unwrap().clone();
305
306    if line.starts_with("Uses") {
307        let line = line.strip_prefix("Uses").unwrap();
308        let line = line.strip_suffix(".").unwrap();
309        line.trim().to_owned()
310    } else if line.starts_with("Use") {
311        let line = line.strip_prefix("Use").unwrap();
312        let line = line.strip_suffix(".").unwrap();
313        line.trim().to_owned()
314    } else {
315        crate::utils::extract_webidl_name(&line).unwrap()
316    }
317}