html_bindgen/merge/
mod.rs

1//! Unify all the parsed sources into a single, final source.
2
3use std::collections::{BTreeSet, HashMap};
4
5use crate::parse::{
6    Attribute, AttributeType, ParsedAriaElement, ParsedAriaProperty, ParsedAriaRole,
7    ParsedCategory, ParsedElement, ParsedInterface, ParsedRelationship,
8};
9use crate::Result;
10use serde::{Deserialize, Serialize};
11
12/// The final source of truth we used to generate code from.
13///
14/// Created by combining all of the previously parsed data.
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct MergedElement {
17    pub tag_name: String,
18    pub struct_name: String,
19    pub submodule_name: String,
20    pub mdn_link: String,
21    pub has_global_attributes: bool,
22    pub has_closing_tag: bool,
23    pub attributes: Vec<Attribute>,
24    pub dom_interface: String,
25    pub content_categories: Vec<MergedCategory>,
26    pub permitted_child_elements: Vec<String>,
27}
28
29/// Each element in HTML falls into zero or more categories that group elements
30/// with similar characteristics together.
31///
32/// Unlike `ParsedCategory`, this can no longer hold any child elements.
33#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
34pub enum MergedCategory {
35    Metadata,
36    Flow,
37    Sectioning,
38    Heading,
39    Phrasing,
40    Embedded,
41    Interactive,
42    Palpable,
43    ScriptSupporting,
44    Transparent,
45}
46
47impl From<ParsedCategory> for MergedCategory {
48    fn from(value: ParsedCategory) -> Self {
49        match value {
50            ParsedCategory::Metadata => Self::Metadata,
51            ParsedCategory::Flow => Self::Flow,
52            ParsedCategory::Sectioning => Self::Sectioning,
53            ParsedCategory::Heading => Self::Heading,
54            ParsedCategory::Phrasing => Self::Phrasing,
55            ParsedCategory::Embedded => Self::Embedded,
56            ParsedCategory::Interactive => Self::Interactive,
57            ParsedCategory::Palpable => Self::Palpable,
58            ParsedCategory::ScriptSupporting => Self::ScriptSupporting,
59            ParsedCategory::Transparent => Self::Transparent,
60        }
61    }
62}
63
64pub fn merge(
65    parsed_elements: impl Iterator<Item = Result<ParsedElement>>,
66    parsed_interfaces: impl Iterator<Item = Result<ParsedInterface>>,
67    parsed_aria_elements: impl Iterator<Item = Result<ParsedAriaElement>>,
68    parsed_aria_roles: impl Iterator<Item = Result<ParsedAriaRole>>,
69    parsed_aria_properties: impl Iterator<Item = Result<ParsedAriaProperty>>,
70) -> Result<Vec<MergedElement>> {
71    let mut elements = HashMap::new();
72    for el in parsed_elements {
73        let el = el?;
74        let key = el.struct_name.clone();
75        elements.insert(key, el);
76    }
77
78    let mut interfaces = HashMap::new();
79    for interface in parsed_interfaces {
80        let interface = interface?;
81        let key = interface.name.clone();
82        interfaces.insert(key, interface);
83    }
84
85    let aria_elements = parsed_aria_elements
86        .map(|x| x.map(|y| (y.tag_name.clone(), y)))
87        .collect::<Result<HashMap<_, _>>>()?;
88    let aria_roles = parsed_aria_roles
89        .map(|x| x.map(|y| (y.name.clone(), y)))
90        .collect::<Result<HashMap<_, _>>>()?;
91    let aria_properties = parsed_aria_properties
92        .map(|x| x.map(|y| (y.name.clone(), y)))
93        .collect::<Result<HashMap<_, _>>>()?;
94
95    let by_content_type = categorize_elements(&elements);
96    let mut children_map = children_per_element(&elements, &by_content_type);
97    insert_text_content(&elements, &mut children_map);
98    let attributes_map = merge_attributes(
99        &elements,
100        &interfaces,
101        &aria_elements,
102        &aria_roles,
103        &aria_properties,
104    );
105
106    let mut output = vec![];
107    for (_, el) in elements.into_iter() {
108        let mut permitted_child_elements = children_map.get(&el.struct_name).unwrap().clone();
109        permitted_child_elements.dedup();
110        let attributes = attributes_map.get(&el.struct_name).unwrap().clone();
111        output.push(MergedElement {
112            tag_name: el.tag_name,
113            struct_name: el.struct_name,
114            submodule_name: el.submodule_name,
115            mdn_link: el.mdn_link,
116            has_global_attributes: el.has_global_attributes,
117            has_closing_tag: el.has_closing_tag,
118            dom_interface: el.dom_interface,
119            content_categories: convert_parsed_categories(&el.content_categories),
120            attributes,
121            permitted_child_elements,
122        })
123    }
124    Ok(output)
125}
126
127/// In order to correctly handle `PhrasingContent` we add one more item to the
128/// mix: `Text`, which in later stages we'll replace with a Rust string type.
129fn insert_text_content(
130    elements: &HashMap<String, ParsedElement>,
131    children_map: &mut HashMap<String, Vec<String>>,
132) {
133    for (_, parent_el) in elements {
134        let has_phrasing = parent_el
135            .permitted_content
136            .contains(&ParsedRelationship::Category(ParsedCategory::Phrasing));
137        let has_transparent = parent_el
138            .permitted_content
139            .contains(&ParsedRelationship::Category(ParsedCategory::Transparent));
140        let has_flow = parent_el
141            .permitted_content
142            .contains(&ParsedRelationship::Category(ParsedCategory::Flow));
143
144        // NOTE: `script` is a one-off, see: https://github.com/yoshuawuyts/html/issues/41
145        if has_phrasing || has_flow || has_transparent || parent_el.tag_name == "script" {
146            let vec = children_map.get_mut(&parent_el.struct_name).unwrap();
147            vec.push("Text".to_owned());
148            vec.dedup();
149            vec.sort();
150        }
151    }
152}
153
154/// Create a hashmap that keeps track of which elements
155/// belong to which content type.
156///
157/// Also extract the individual elements related to each content type, and keep
158/// them for later insertion into the parent types.
159fn categorize_elements(
160    elements: &HashMap<String, ParsedElement>,
161) -> HashMap<ParsedCategory, Vec<String>> {
162    let mut output = HashMap::new();
163    for (name, el) in elements {
164        for cat in &el.content_categories {
165            let vec: &mut Vec<_> = output.entry(cat.clone()).or_default();
166            vec.push(name.clone());
167        }
168    }
169
170    // make sure we add the "transparent" and "script-supporting" content categories
171    let _ = output.entry(ParsedCategory::Transparent).or_default();
172    let _ = output.entry(ParsedCategory::ScriptSupporting).or_default();
173
174    output
175}
176
177/// Which child elements belong to the parent element?
178/// We decide which element is a valid child element in the following way:
179///  1. We look at an element's `permitted_child_elements` field, to figure out
180///     which categories can be taken as child elements.
181///  2. Then we look at each element in the category, and see whether it can take
182///     the parent element as its parent.
183///  3. If the parent can have the child, and the child can have the parent, we
184///     add it to the parent's child list.
185fn children_per_element(
186    elements: &HashMap<String, ParsedElement>,
187    by_content_type: &HashMap<ParsedCategory, Vec<String>>,
188) -> HashMap<String, Vec<String>> {
189    // Because not all elements will have children,
190    // we create empty lists for all elements first.
191    let mut output = elements
192        .iter()
193        .map(|(name, _)| (name.clone(), vec![]))
194        .collect::<HashMap<_, _>>();
195
196    // First we start by iterating over all elements.
197    for (_, parent_el) in elements {
198        // Then we take a look at which elements they can take as children.
199        for child_relationship in &parent_el.permitted_content {
200            match child_relationship {
201                ParsedRelationship::Element(child_el_name) => {
202                    // Check that the child can have the current element as a
203                    // parent.
204                    if child_el_name != "Text" {
205                        let child_el = elements.get(child_el_name).unwrap();
206                        if !child_can_have_parent(child_el, parent_el) {
207                            continue;
208                        }
209                    }
210                    output
211                        .get_mut(&parent_el.struct_name)
212                        .unwrap()
213                        .push(child_el_name.to_owned());
214                }
215                ParsedRelationship::Category(child_category) => {
216                    // If the content type is transparent, then all children match
217                    // so we just add all of them.
218                    //
219                    // FIXME: this is not actually correct, but we're just going
220                    // with it like this for now.
221                    if let ParsedCategory::Transparent = child_category {
222                        for (_, child_el) in elements.iter() {
223                            output
224                                .get_mut(&parent_el.struct_name)
225                                .unwrap()
226                                .push(child_el.struct_name.to_owned());
227                        }
228                        continue;
229                    }
230
231                    // Otherwise look at the content type, find all children for that
232                    // type and then intsert those.
233                    for child_el_name in by_content_type.get(&child_category).unwrap() {
234                        let child_el = elements.get(child_el_name).unwrap();
235                        if child_can_have_parent(child_el, parent_el) {
236                            output
237                                .get_mut(&parent_el.struct_name)
238                                .unwrap()
239                                .push(child_el.struct_name.to_owned());
240                        }
241                    }
242                }
243            }
244        }
245    }
246
247    // Check whether
248    fn child_can_have_parent(child: &ParsedElement, proposed_parent: &ParsedElement) -> bool {
249        // If the parent allows "transparent" content, then all children are valid.
250        // The spec will often add more contstraints, but we don't yet have type
251        // states so we ignore them for now.
252        if proposed_parent
253            .permitted_content
254            .contains(&ParsedRelationship::Category(ParsedCategory::Transparent))
255        {
256            return true;
257        }
258
259        // Check whether the child can have the parent element as a parent.
260        for parent_relationship in &child.permitted_parents {
261            match parent_relationship {
262                ParsedRelationship::Element(parent_el_name) => {
263                    if parent_el_name == &proposed_parent.struct_name {
264                        return true;
265                    }
266                }
267                ParsedRelationship::Category(parent_category) => {
268                    if proposed_parent
269                        .permitted_content
270                        .contains(&(*parent_category).clone().into())
271                    {
272                        return true;
273                    }
274                }
275            }
276        }
277        false
278    }
279
280    // Some elements belong to more than one category, so they can end
281    // up in the list more than once. This makes sure that the list
282    // is always in order, and only contains unique elements.
283    output.iter_mut().for_each(|(_, value)| {
284        value.dedup();
285        value.sort()
286    });
287
288    output
289}
290
291/// Merge WebIDL attributes into the regular attributes list.
292fn merge_attributes(
293    elements: &HashMap<String, ParsedElement>,
294    interfaces: &HashMap<String, ParsedInterface>,
295    aria_elements: &HashMap<String, ParsedAriaElement>,
296    aria_roles: &HashMap<String, ParsedAriaRole>,
297    aria_properties: &HashMap<String, ParsedAriaProperty>,
298) -> HashMap<String, Vec<Attribute>> {
299    let mut output = elements
300        .iter()
301        .map(|(name, _)| (name.clone(), vec![]))
302        .collect::<HashMap<_, _>>();
303
304    let interface_map = interfaces
305        .iter()
306        .map(|(name, interface)| {
307            let map = interface
308                .attributes
309                .iter()
310                .map(|attr| (attr.name.to_lowercase().clone(), attr.clone()))
311                .collect::<HashMap<String, Attribute>>();
312            (name.clone(), map)
313        })
314        .collect::<HashMap<String, _>>();
315
316    // From https://www.w3.org/TR/role-attribute/
317    let role_attr = Attribute {
318        name: "role".to_owned(),
319        description:
320            "Describes the role(s) the current element plays in the context of the document."
321                .to_owned(),
322        field_name: "role".to_owned(),
323        ty: AttributeType::String,
324    };
325
326    for el in elements.values() {
327        let vec = output.entry(el.struct_name.clone()).or_default();
328        match interface_map.get(&el.dom_interface) {
329            Some(interface) => {
330                for attr in &el.attributes {
331                    let attr = match interface.get(&attr.name) {
332                        Some(other) => Attribute {
333                            name: attr.name.clone(),
334                            description: attr.description.clone(),
335                            field_name: other.field_name.clone(),
336                            ty: other.ty.clone(),
337                        },
338                        None => attr.clone(),
339                    };
340                    vec.push(attr);
341                }
342            }
343            None => {
344                vec.extend(el.attributes.iter().cloned());
345            }
346        };
347
348        if let Some(aria_el) = aria_elements.get(&el.tag_name) {
349            if !aria_el.no_role || !aria_el.allowed_roles.is_empty() {
350                vec.push(role_attr.clone());
351            }
352
353            let mut properties = if aria_el.no_aria_attributes {
354                aria_el.allowed_aria_attributes.clone()
355            } else {
356                let mut properties = if aria_el.any_role {
357                    aria_roles.values().fold(BTreeSet::new(), |mut set, role| {
358                        set.extend(role.allowed_properties.iter().cloned());
359                        set
360                    })
361                } else {
362                    let mut properties = BTreeSet::new();
363                    for role in &aria_el.allowed_roles {
364                        if let Some(role) = aria_roles.get(role) {
365                            properties.extend(role.allowed_properties.iter().cloned());
366                        }
367                    }
368                    properties
369                };
370
371                properties.extend(aria_el.allowed_aria_attributes.iter().cloned());
372
373                if aria_el.global_aria_attributes {
374                    properties.extend(
375                        aria_properties
376                            .values()
377                            .filter(|x| x.is_global)
378                            .map(|x| x.name.clone()),
379                    );
380                }
381
382                properties
383            };
384
385            for p in &aria_el.prohibited_aria_attributes {
386                properties.remove(p);
387            }
388
389            vec.extend(
390                properties
391                    .into_iter()
392                    .filter_map(|x| aria_properties.get(&x).cloned())
393                    .map(Attribute::from),
394            );
395        }
396    }
397    for (_, vec) in output.iter_mut() {
398        vec.dedup();
399    }
400    output
401}
402
403/// Take a list of parsed categories and output a list of merged categories + a
404/// list of child elements.
405fn convert_parsed_categories(categories: &[ParsedCategory]) -> Vec<MergedCategory> {
406    categories.into_iter().cloned().map(Into::into).collect()
407}