html2maud/
lib.rs

1use regex::Regex;
2use std::collections::HashMap;
3use std::fmt::Write;
4use tl::*;
5
6fn format_empty_blocks(s: &str) -> String {
7    let re = Regex::new(r"(?m)\{\s*\}").unwrap();
8    re.replace_all(s, "{}").into_owned()
9}
10
11fn convert_hash_id_to_id_attribute(input: &str) -> String {
12    let re = Regex::new(r"#([a-zA-Z0-9_-]+)").unwrap();
13    let mut id_counts = HashMap::new();
14
15    // Count occurrences of each #id
16    for caps in re.captures_iter(input) {
17        let id = caps.get(1).unwrap().as_str();
18        *id_counts.entry(id.to_string()).or_insert(0) += 1;
19    }
20
21    // Collect the keys into a vector to avoid borrowing issues
22    let ids: Vec<String> = id_counts.keys().cloned().collect();
23
24    // Additionally, count occurrences of id in other contexts
25    for id in ids {
26        let re_id = Regex::new(&format!(r#"[^\#]{}[^\-]"#, id)).unwrap();
27        for _ in re_id.find_iter(input) {
28            *id_counts.get_mut(&id).unwrap() += 1;
29        }
30    }
31
32    // Replace #id with id="id" only if id occurs more than once
33    let result = re.replace_all(input, |caps: &regex::Captures| {
34        let id = caps.get(1).unwrap().as_str();
35        if id_counts.get(id).unwrap_or(&0) > &1 {
36            format!(r#"id="{}""#, id)
37        } else {
38            format!("#{}", id)
39        }
40    });
41
42    result.into_owned()
43}
44
45fn remove_empty_lines(s: &str) -> String {
46    let lines = s.lines();
47    let non_empty_lines: Vec<&str> = lines.filter(|line| line.trim().len() > 0).collect();
48    convert_hash_id_to_id_attribute(&format_empty_blocks(&non_empty_lines.join("\n")))
49}
50
51pub fn html2maud(html: &str) -> String {
52    let mut maud_template = String::new();
53
54    let dom = tl::parse(html, tl::ParserOptions::default()).unwrap();
55    let parser = dom.parser();
56
57    fn spaces(count: usize) -> String {
58        return "    ".repeat(count).as_str().to_owned();
59    }
60
61    fn handle_tag(tag: &HTMLTag, parser: &Parser, maud_template: &mut String, indent: usize) {
62        let tag_name = tag.name().as_utf8_str();
63
64        let use_semicolon = match tag_name.as_ref().to_string().as_str() {
65            "meta" | "link" | "br" | "img" | "input" | "hr" | "col" | "area" | "base" | "wbr"
66            | "track" | "param" => true,
67            _ => false,
68        };
69
70        write!(maud_template, "{}{}", spaces(indent), &tag_name).unwrap();
71
72        match tag.attributes().class_iter() {
73            None => {}
74            Some(classes) => {
75                write!(maud_template, ".\"").unwrap();
76                let mut class_strings = Vec::new();
77                for class in classes {
78                    let escaped_class = if class.contains("-") {
79                        format!("{}", &class)
80                    } else {
81                        class.to_owned()
82                    };
83                    class_strings.push(escaped_class);
84                }
85                let classes_str = class_strings.join(" ");
86                write!(maud_template, "{}\"", classes_str).unwrap();
87            }
88        }
89
90        let id = tag.attributes().id().map(|x| x.as_utf8_str());
91        match &id {
92            Option::Some(x) => {
93                let escaped_id = if x.contains("-") {
94                    /* format!("\"{}\"", &x) */
95                    format!("{}", &x)
96                } else {
97                    x.to_string()
98                };
99                //write!(maud_template, " id=\"{}\"", &escaped_id).unwrap();
100                write!(maud_template, " #{}", &escaped_id).unwrap();
101            }
102            Option::None => {}
103        }
104
105        for (key, value_opt) in tag.attributes().iter() {
106            if !(key.eq("id") || key.eq("class")) {
107                write!(maud_template, " {}", key).unwrap();
108                match value_opt {
109                    None => {}
110                    Some(value) => write!(maud_template, "=\"{}\"", value).unwrap(),
111                }
112            }
113        }
114
115        if !use_semicolon {
116            write!(maud_template, " {{\n").unwrap();
117        } else {
118            write!(maud_template, ";\n").unwrap();
119        }
120
121        let children = tag.children();
122        let nodes = children.top().as_slice();
123        let mut first_node = true;
124        for child_node in nodes {
125            if first_node {
126                first_node = false;
127            } else {
128                write!(maud_template, "\n").unwrap();
129            }
130            handle_node(child_node.get(parser), parser, maud_template, indent + 1);
131        }
132
133        if !use_semicolon {
134            write!(maud_template, "{}}}\n", spaces(indent)).unwrap();
135        }
136    }
137
138    fn handle_node(
139        node_opt: Option<&Node>,
140        parser: &Parser,
141        maud_template: &mut String,
142        indent: usize,
143    ) {
144        match node_opt {
145            None => {}
146            Some(node) => match node {
147                Node::Tag(tag) => handle_tag(tag, parser, maud_template, indent),
148                Node::Comment(_) => {}
149                Node::Raw(raw) => {
150                    let text = raw.as_utf8_str();
151                    let trimmed_text = text.trim();
152                    if !trimmed_text.is_empty() {
153                        write!(
154                            maud_template,
155                            "{}\"{}\"\n",
156                            spaces(indent),
157                            trimmed_text.replace("\"", "\\\"")
158                        )
159                        .unwrap();
160                    }
161                }
162            },
163        }
164    }
165
166    write!(maud_template, "html! {{\n").unwrap();
167    for node_handle in dom.children() {
168        handle_node(node_handle.get(parser), parser, &mut maud_template, 1);
169    }
170    write!(maud_template, "\n}}\n").unwrap();
171
172    remove_empty_lines(&maud_template)
173}