#!/usr/bin/env rust-script
use heck::{AsKebabCase, ToSnakeCase, ToUpperCamelCase};
use itertools::Itertools;
use scraper::{Element, ElementRef, Html, Selector};
use std::{collections::BTreeMap, io::Write};
fn main() {
let agent = ureq::agent();
let resp = agent
.get("https://developer.mozilla.org/en-US/docs/Web/HTML/Element")
.call()
.unwrap();
let html = resp.into_string().unwrap();
let document = Html::parse_document(&html);
let selector =
Selector::parse("td:first-child > a[href^='/en-US/docs/Web/HTML/Element/']:only-child")
.unwrap();
let mut elems = Vec::new();
let global_attrs = BTreeMap::from_iter(get_global_attrs());
let mut buf = String::from(
"// generated by gen.rs + rustfmt - not in a build.rs because HTML tags don't change too often
#![no_std]
#[cfg(feature = \"alloc\")]
extern crate alloc;",
)
.into_bytes();
for e in document.select(&selector) {
let url = format!(
"https://developer.mozilla.org{}",
e.value().attr("href").unwrap()
);
let name = e.text().next().unwrap();
let name = &name[1..name.len() - 1];
let name = name.to_upper_camel_case();
let resp = agent.get(&url).call().unwrap();
let html = resp.into_string().unwrap();
let document = Html::parse_document(&html);
let deprecated = document
.select(
&Selector::parse(".main-page-content > .section-content > .notecard.deprecated")
.unwrap(),
)
.count()
!= 0;
elems.push((name.clone(), deprecated));
let mut attrs = global_attrs.clone();
attrs.extend(get_attrs(&document));
writeln!(
buf,
"{}
{}
#[derive(Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord)]
pub struct {name}<'life> {{
{}
}}",
get_mdn_doc(&document, &url),
if deprecated { "#[deprecated]" } else { "" },
attrs
.iter()
.format_with(",\n/// ", |(name, (desc, ty, alloc)), f| f(&format_args!(
"{desc}
{}
pub {name}: core::option::Option<{ty}>",
if *alloc {
"#[cfg(feature = \"alloc\")]"
} else {
""
},
))),
)
.unwrap();
}
writeln!(
buf,
"#[allow(deprecated)]
pub enum Element<'life> {{
{}
}}",
elems
.iter()
.format_with(",\n", |(e, dep), f| f(&format_args!(
"{} {e}({e}<'life>)",
if *dep { "#[deprecated]" } else { "" },
)))
)
.unwrap();
writeln!(
buf,
"#[allow(deprecated)]
impl<'life> Element<'life> {{
/// Gets an element from a lowercase tag name.
pub fn from_tag(tag: &str) -> core::option::Option<Self> {{
match tag {{
{},
_ => None,
}}
}}
}}",
elems.iter().format_with(",", |(e, _), f| f(&format_args!(
"\"{}\" => Some(Self::{e}({e}::default()))",
AsKebabCase(e)
)))
)
.unwrap();
writeln!(
buf,
"#[allow(deprecated)]
impl<'life> Element<'life> {{
{}
}}",
global_attrs
.iter()
.format_with("\n", |(name, (desc, ty, alloc)), f| f(&format_args!(
"{desc}
{}
pub fn {name}(&self) -> core::option::Option<{}{ty}> {{
match self {{
{}
}}
}}",
if *alloc {
"#[cfg(feature = \"alloc\")]"
} else {
""
},
if *alloc { "&" } else { "" },
elems.iter().format_with(",", |(e, _), f| f(&format_args!(
"Self::{e}(e) => e.{name}{}",
if *alloc { ".as_ref()" } else { "" }
)))
)))
)
.unwrap();
std::fs::write("src/lib.rs", buf).unwrap();
std::process::Command::new("rustfmt")
.arg("src/lib.rs")
.status()
.unwrap();
}
fn get_global_attrs() -> Vec<(String, (String, String, bool))> {
let agent = ureq::agent();
let resp = agent
.get("https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes")
.call()
.unwrap();
let html = resp.into_string().unwrap();
let document = Html::parse_document(&html);
let selector = Selector::parse("dl").unwrap();
let dl = document.select(&selector).next().unwrap();
dl_to_attrs(dl)
}
fn get_attrs(document: &Html) -> Vec<(String, (String, String, bool))> {
let selector = Selector::parse(".section-content > dl").unwrap();
if let Some(dl) = document.select(&selector).next() {
dl_to_attrs(dl)
} else {
Vec::new()
}
}
fn get_mdn_doc(document: &Html, url: &str) -> String {
let mut summary = document
.select(&Selector::parse(".main-page-content > .section-content > p").unwrap())
.map(|e| e.inner_html())
.collect::<Vec<_>>();
if summary.len() == 0 {
summary = document
.select(
&Selector::parse(
".main-page-content > section[aria-labelledby='summary'] > .section-content",
)
.unwrap(),
)
.map(|e| e.inner_html())
.collect::<Vec<_>>();
}
let summary = summary
.join("\n\n")
.replace("<br>", "\n\n")
.replace('\n', "\n/// ");
format!("/// {}\n///\n/// More information: <{url}>", summary)
}
fn dl_to_attrs(dl: ElementRef) -> Vec<(String, (String, String, bool))> {
let mut attrs = Vec::new();
for e in dl
.children()
.filter_map(ElementRef::wrap)
.filter(|e| e.value().name() == "dt")
{
let name = e.text().next().unwrap();
let desc = e
.next_sibling_element()
.unwrap()
.inner_html()
.replace("<br>", "\n\n")
.replace('\n', "\n/// ");
let name = name.to_snake_case();
let (ty, alloc) = match name.as_str() {
"data" => ("alloc::collections::BTreeMap<&'life str, &'life str>", true),
_ => (
match name.as_str() {
"autofocus" | "checked" | "disabled" | "multiple" | "readonly" | "required"
| "selected" | "novalidate" | "formnovalidate" | "hidden" => "bool",
_ => "&'life str",
},
false,
),
};
attrs.push((
if ["type", "loop", "async", "for", "as"].contains(&&*name) {
format!("{name}_")
} else {
name
},
(desc, ty.to_string(), alloc),
));
}
attrs
}