#!/usr/bin/env rust-script
use heck::{AsKebabCase, ToSnakeCase, ToUpperCamelCase};
use itertools::Itertools;
use scraper::{Element, ElementRef, Html, Selector};
use std::{collections::BTreeMap, io::Write};
fn main() {
let agent = ureq::agent();
let resp = agent
.get("https://developer.mozilla.org/en-US/docs/Web/HTML/Element")
.call()
.unwrap();
let html = resp.into_string().unwrap();
let document = Html::parse_document(&html);
let selector =
Selector::parse("td:first-child > a[href^='/en-US/docs/Web/HTML/Element/']:only-child")
.unwrap();
let mut elems = Vec::new();
let global_attrs = BTreeMap::from_iter(get_global_attrs(false));
let owned_global_attrs = BTreeMap::from_iter(get_global_attrs(true));
let mut buf = String::from(
"// generated by gen.rs + rustfmt - not in a build.rs because HTML tags don't change too often
//! An auto-generated crate containing all HTML tags and their attributes.
//! This crate is generated from the [MDN HTML element reference](https://developer.mozilla.org/en-US/docs/Web/HTML/Element).
//!
//! The `<element>Owned` variants are the same as the `<element>` variants, but without lifetimes.
#![no_std]
#[cfg(feature = \"alloc\")]
extern crate alloc;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum AttributeValue<'life> {
Str(&'life str),
Bool(bool),
}
impl<'life> core::convert::From<&'life str> for AttributeValue<'life> {
fn from(s: &'life str) -> Self {
AttributeValue::Str(s)
}
}
impl<'life> core::convert::From<bool> for AttributeValue<'life> {
fn from(b: bool) -> Self {
AttributeValue::Bool(b)
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg(feature = \"alloc\")]
pub enum AttributeValueOwned {
#[cfg(not(feature = \"tendril\"))]
Str(alloc::string::String),
#[cfg(feature = \"tendril\")]
Str(tendril::Tendril<tendril::fmt::UTF8>),
Bool(bool),
}
#[cfg(all(feature = \"alloc\", not(feature = \"tendril\")))]
impl core::convert::From<alloc::string::String> for AttributeValueOwned {
fn from(s: alloc::string::String) -> Self {
AttributeValueOwned::Str(s)
}
}
#[cfg(all(feature = \"alloc\", feature = \"tendril\"))]
impl core::convert::From<tendril::Tendril<tendril::fmt::UTF8>> for AttributeValueOwned {
fn from(s: tendril::Tendril<tendril::fmt::UTF8>) -> Self {
AttributeValueOwned::Str(s)
}
}
#[cfg(feature = \"alloc\")]
impl core::convert::From<bool> for AttributeValueOwned {
fn from(b: bool) -> Self {
AttributeValueOwned::Bool(b)
}
} ",
)
.into_bytes();
for e in document.select(&selector) {
let url = format!(
"https://developer.mozilla.org{}",
e.value().attr("href").unwrap()
);
let name = e.text().next().unwrap();
let name = &name[1..name.len() - 1];
let name = name.to_upper_camel_case();
get_and_write_elem(
&agent,
&url,
name,
global_attrs.clone(),
owned_global_attrs.clone(),
&mut elems,
&mut buf,
);
}
for name in ["H1", "H2", "H3", "H4", "H5", "H6"] {
get_and_write_elem(
&agent,
"https://developer.mozilla.org/en-US/docs/Web/HTML/Element/Heading_Elements",
name.to_string(),
global_attrs.clone(),
owned_global_attrs.clone(),
&mut elems,
&mut buf,
);
}
{
let doc = "/// An unknown element.".to_string();
let tag_name_doc = "The tag name of the element.".to_string();
write_elem(
doc.clone(),
"Unknown".to_string(),
&{
let mut attrs = global_attrs.clone();
attrs.insert(
"tag_name".to_string(),
(
tag_name_doc.clone(),
"AttributeValue<'life>".to_string(),
false,
),
);
attrs
},
false,
false,
&mut buf,
);
write_elem(
doc,
"Unknown".to_string(),
&{
let mut attrs = owned_global_attrs.clone();
attrs.insert(
"tag_name".to_string(),
(
tag_name_doc.clone(),
"AttributeValueOwned".to_string(),
false,
),
);
attrs
},
false,
true,
&mut buf,
);
elems.push(("Unknown".to_string(), false));
}
write_elem_enum(&elems, &global_attrs, false, &mut buf);
write_elem_enum(&elems, &owned_global_attrs, true, &mut buf);
std::fs::write("src/lib.rs", buf).unwrap();
std::process::Command::new("rustfmt")
.arg("src/lib.rs")
.status()
.unwrap();
}
fn get_global_attrs(owned: bool) -> Vec<(String, (String, String, bool))> {
let agent = ureq::agent();
let resp = agent
.get("https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes")
.call()
.unwrap();
let html = resp.into_string().unwrap();
let document = Html::parse_document(&html);
let selector = Selector::parse("dl").unwrap();
let mut dls = document.select(&selector);
let mut attrs = dl_to_attrs(dls.next().unwrap(), owned);
attrs.extend(dl_to_attrs(dls.next().unwrap(), owned)); attrs.push((
"extra".to_string(),
(
"/// Extra attributes of the element.
/// This is a map of attribute names to their values, and the attribute names are in lowercase."
.to_string(),
if owned {
"alloc::collections::BTreeMap<alloc::string::String, AttributeValueOwned>"
} else {
"alloc::collections::BTreeMap<&'life str, AttributeValue<'life>>"
}
.to_string(),
true,
),
));
attrs
}
fn get_attrs(document: &Html, owned: bool) -> Vec<(String, (String, String, bool))> {
let selector = Selector::parse(".section-content > dl").unwrap();
if let Some(dl) = document.select(&selector).next() {
dl_to_attrs(dl, owned)
} else {
Vec::new()
}
}
fn get_mdn_doc(document: &Html, url: &str) -> String {
let mut summary = document
.select(&Selector::parse(".main-page-content > .section-content > p").unwrap())
.map(|e| e.inner_html())
.collect::<Vec<_>>();
if summary.len() == 0 {
summary = document
.select(
&Selector::parse(
".main-page-content > section[aria-labelledby='summary'] > .section-content",
)
.unwrap(),
)
.map(|e| e.inner_html())
.collect::<Vec<_>>();
}
let summary = summary
.join("\n\n")
.replace("<br>", "\n\n")
.replace('\n', "\n/// ");
format!("/// {}\n///\n/// More information: <{url}>", summary)
}
fn dl_to_attrs(dl: ElementRef, owned: bool) -> Vec<(String, (String, String, bool))> {
let mut attrs = Vec::new();
for e in dl
.children()
.filter_map(ElementRef::wrap)
.filter(|e| e.value().name() == "dt")
{
let name = e.text().next().unwrap();
let desc = e
.next_sibling_element()
.unwrap()
.inner_html()
.replace("<br>", "\n\n")
.replace('\n', "\n/// ");
let name = name.to_snake_case();
let (ty, special) = match name.as_str() {
"data" => (
if owned {
"alloc::collections::BTreeMap<alloc::string::String, AttributeValueOwned>"
} else {
"alloc::collections::BTreeMap<&'life str, AttributeValue<'life>>"
},
true,
),
_ => (
if owned {
"AttributeValueOwned"
} else {
"AttributeValue<'life>"
},
false,
),
};
attrs.push((
if ["type", "loop", "async", "for", "as"].contains(&&*name) {
format!("{name}_")
} else {
name
},
(desc, ty.to_string(), special),
));
}
attrs
}
fn write_elem(
doc: String,
name: String,
attrs: &BTreeMap<String, (String, String, bool)>,
deprecated: bool,
owned: bool,
buf: &mut Vec<u8>,
) {
writeln!(
buf,
"{0}
{1}
{2}
#[derive(Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord)]
pub struct {name}{4}{3} {{
{5}
}}
#[allow(deprecated)]
{2}
impl{3} {name}{4}{3} {{
/// Get the tag name of the element.
/// This is the same as the name of the struct, in kebab-case.
pub fn tag() -> &'static str {{
\"{kebab}\"
}}
/// Sets an attribute of the element.
/// This sets the attribute of the struct. If the attribute is not a known attribute, it is added to the `extra` map.
/// If the `alloc` feature is disabled, this function will silently fail.
///
/// # Note
/// This only works when the attribute is lowercase.
pub fn set_attr(&mut self, name: &{6} str, value: {7}) {{
match name {{
{8},
#[cfg(feature = \"alloc\")]
_ => {{
#[allow(clippy::useless_conversion)]
self.extra.insert(name.into(), value.into());
}}
#[cfg(not(feature = \"alloc\"))]
_ => {{}}
}}
}}
}}",
doc,
if deprecated { "#[deprecated]" } else { "" },
if owned {
"#[cfg(feature = \"alloc\")]"
} else {
""
},
if owned { "" } else { "<'life>" },
if owned { "Owned" } else { "" },
attrs
.iter()
.format_with(",\n/// ", |(name, (desc, ty, special)), f| f(&format_args!(
"{desc}
{}
pub {name}: {}",
if *special {
"#[cfg(feature = \"alloc\")]"
} else {
""
},
if *special {
ty.to_string()
} else {
format!("core::option::Option<{}>", ty)
},
))),
if owned { "" } else { " 'life" },
if owned {
"impl core::convert::Into<AttributeValueOwned>"
} else {
"impl core::convert::Into<AttributeValue<'life>>"
},
attrs
.iter()
.filter(|(_, (_, _, special))| !special)
.format_with(",\n", |(name, _), f| f(&format_args!(
"\"{name}\" => self.{name} = Some(value.into())",
))),
kebab = AsKebabCase(name.clone()),
)
.unwrap();
}
fn write_elem_enum(
elems: &Vec<(String, bool)>,
global_attrs: &BTreeMap<String, (String, String, bool)>,
owned: bool,
buf: &mut Vec<u8>,
) {
writeln!(
buf,
"#[allow(deprecated)]
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
{}
pub enum Element{} {{
{}
}}",
if owned {
"#[cfg(feature = \"alloc\")]"
} else {
""
},
if owned { "Owned" } else { "<'life>" },
elems
.iter()
.format_with(",\n", |(e, dep), f| f(&format_args!(
"{} {e}({e}{})",
if *dep { "#[deprecated]" } else { "" },
if owned { "Owned" } else { "<'life>" },
)))
)
.unwrap();
writeln!(
buf,
"#[allow(deprecated)]
{0}
impl{1} Element{2}{1} {{
/// Gets an element from a lowercase tag name.
pub fn from_tag(tag: &str) -> Self {{
match tag {{
{3},
_ => Self::default(),
}}
}}
/// Gets the tag name of the element.
pub fn tag(&self) -> &'static str {{
match self {{
{4},
}}
}}
/// Sets an attribute of the element.
/// This sets the attribute of the struct. If the attribute is not a known attribute, it is added to the `extra` map.
/// If the `alloc` feature is disabled, this function will silently fail.
///
/// # Note
/// This only works when the attribute is lowercase.
pub fn set_attr(&mut self, name: &{5} str, value: {6}) {{
match self {{
{7},
}}
}}
}}",
if owned {
"#[cfg(feature = \"alloc\")]"
} else {
""
},
if owned { "" } else { "<'life>" },
if owned { "Owned" } else { "" },
elems.iter().format_with(",\n", |(e, _), f| f(&format_args!(
"\"{}\" => Self::{e}({e}{}::default())",
AsKebabCase(e),
if owned { "Owned" } else { "" },
))),
elems.iter().format_with(",\n", |(e, _), f| f(&format_args!(
"Self::{e}(_) => {e}::tag()",
))),
if owned { "" } else { " 'life" },
if owned {
"impl core::convert::Into<AttributeValueOwned>"
} else {
"impl core::convert::Into<AttributeValue<'life>>"
},
elems.iter().format_with(",\n", |(e, _), f| f(&format_args!(
"Self::{e}(e) => e.set_attr(name, value)",
))),
)
.unwrap();
writeln!(
buf,
"#[allow(deprecated)]
{0}
impl{1} Element{2}{1} {{
{3}
{4}
}}",
if owned {
"#[cfg(feature = \"alloc\")]"
} else {
""
},
if owned { "" } else { "<'life>" },
if owned { "Owned" } else { "" },
global_attrs
.iter()
.format_with("\n", |(name, (desc, ty, special)), f| f(&format_args!(
"{desc}
{}
pub fn {name}(&self) -> {}{}{}{} {{
match self {{
{}
}}
}}",
if *special || owned {
"#[cfg(feature = \"alloc\")]"
} else {
""
},
if *special {
""
} else {
"core::option::Option<"
},
if *special || owned { "&" } else { "" },
ty,
if *special { "" } else { ">" },
elems.iter().format_with(",", |(e, _), f| f(&format_args!(
"Self::{e}(e) => {}e.{name}{}",
if *special { "&" } else { "" },
if !*special && owned { ".as_ref()" } else { "" }
)))
))),
global_attrs
.iter()
.filter(|(_, (_, _, special))| !special)
.format_with("\n", |(name, (desc, ty, special)), f| f(&format_args!(
"{desc}
{}
pub fn set_{name}(&mut self, val: {ty}) {{
match self {{
{}
}};
}}",
if *special || owned {
"#[cfg(feature = \"alloc\")]"
} else {
""
},
elems.iter().format_with(",", |(e, _), f| f(&format_args!(
"Self::{e}(e) => e.{name}.replace(val)",
)))
)))
)
.unwrap();
writeln!(
buf,
"#[allow(deprecated)]
{0}
impl{1} Default for Element{2}{1} {{
fn default() -> Self {{
Self::Unknown(Unknown{2}::default())
}}
}}",
if owned {
"#[cfg(feature = \"alloc\")]"
} else {
""
},
if owned { "" } else { "<'life>" },
if owned { "Owned" } else { "" },
)
.unwrap();
}
fn get_and_write_elem(
agent: &ureq::Agent,
url: &str,
name: String,
global_attrs: BTreeMap<String, (String, String, bool)>,
owned_global_attrs: BTreeMap<String, (String, String, bool)>,
elems: &mut Vec<(String, bool)>,
mut buf: &mut Vec<u8>,
) {
let resp = agent.get(&url).call().unwrap();
let html = resp.into_string().unwrap();
let document = Html::parse_document(&html);
let deprecated = document
.select(
&Selector::parse(".main-page-content > .section-content > .notecard.deprecated")
.unwrap(),
)
.count()
!= 0;
elems.push((name.clone(), deprecated));
let mut attrs = global_attrs;
attrs.extend(get_attrs(&document, false));
write_elem(
get_mdn_doc(&document, &url),
name.clone(),
&attrs,
deprecated,
false,
&mut buf,
);
let mut attrs = owned_global_attrs;
attrs.extend(get_attrs(&document, true));
write_elem(
get_mdn_doc(&document, &url),
name,
&attrs,
deprecated,
true,
buf,
);
}