subscript_compiler/frontend/pass/
html_normalize.rs

1//! AST HTML canonicalization. 
2//! 
3//! This should really be implemented using the HTML AST; but it’s easier using
4//! the frontend AST instead, since we don’t need to replicate AST utilities.
5
6use std::iter::FromIterator;
7use std::collections::{HashSet, HashMap};
8use std::rc::Rc;
9use std::cell::RefCell;
10use std::borrow::Cow;
11use std::convert::TryFrom;
12use either::Either;
13use crate::frontend::data::*;
14use crate::frontend::ast::*;
15
16///////////////////////////////////////////////////////////////////////////////
17// TABLE OF CONTENTS
18///////////////////////////////////////////////////////////////////////////////
19
20fn generate_toc_heading_id_from_child_nodes<'a>(children: &Vec<Node<'a>>) -> String {
21    use pct_str::PctStr;
22    let contents = children.iter()
23        .map(Node::to_string)
24        .map(|x| {
25            x   .replace("-", "--")
26                .replace(" ", "-")
27        })
28        .collect::<Vec<_>>()
29        .join("");
30    let pct_str = PctStr::new(&contents).unwrap();
31    pct_str.as_str().to_owned()
32}
33
34
35fn get_headings<'a>(node: &Node<'a>) -> Vec<(Vec<String>, String, String, String)> {
36    let headings = Rc::new(RefCell::new(Vec::new()));
37    let f = {
38        let headings = headings.clone();
39        move |env: NodeEnvironment<'a>, node: Node<'a>| {
40            match &node {
41                Node::Tag(tag) if tag.is_heading_node() => {
42                    let id = generate_toc_heading_id_from_child_nodes(&tag.children);
43                    let parents = env.parents
44                        .into_iter()
45                        .map(|x| x.to_string())
46                        .collect::<Vec<_>>()
47                        .clone();
48                    let name = tag.name.data.to_string();
49                    let text = tag.children
50                        .iter()
51                        .flat_map(|x| x.clone().unblock())
52                        .map(|x| x.to_string())
53                        .collect::<Vec<_>>()
54                        .join("");
55                    headings.borrow_mut().push((parents, name, text, id));
56                }
57                _ => ()
58            }
59            node
60        }
61    };
62    let _ = node.clone().transform(
63        NodeEnvironment::default(),
64        Rc::new(f),
65    );
66    let info = headings
67        .clone()
68        .borrow()
69        .iter()
70        .map(|x| {
71            x.clone()
72        })
73        .collect::<Vec<_>>();
74    info
75}
76
77pub(crate) fn generate_table_of_contents_tree<'a>(input: &Node<'a>) -> Node<'a> {
78    let children = get_headings(input)
79        .into_iter()
80        .map(|(parents, ty, contents, id)| {
81            let mut a = Tag::new(
82                Ann::unannotated("a"),
83                vec![Node::unannotated_string(contents)]
84            );
85            a.insert_unannotated_parameter(
86                &format!("href=#{}", id)
87            );
88            let a = Node::Tag(a);
89            let mut li = Tag::new(
90                Ann::unannotated("li"),
91                vec![a]
92            );
93            li.insert_unannotated_parameter(
94                &format!("type={}", ty)
95            );
96            let li = Node::Tag(li);
97            li
98        })
99        .collect::<Vec<_>>();
100    let mut tag = Tag::new(
101        Ann::unannotated("ul"),
102        children
103    );
104    tag.insert_unannotated_parameter("id=toc");
105    let node = Node::Tag(tag);
106    node
107}
108
109pub fn annotate_heading_nodes<'a>(input: Node<'a>) -> Node<'a> {
110    let f = |env: NodeEnvironment, node: Node<'a>| -> Node<'a> {
111        match node {
112            Node::Tag(mut tag) if tag.is_heading_node() => {
113                let id = generate_toc_heading_id_from_child_nodes(&tag.children);
114                tag.insert_unannotated_parameter(
115                    &format!("id={}", id)
116                );
117                Node::Tag(tag)
118            }
119            x => x,
120        }
121    };
122    input.transform(NodeEnvironment::default(), Rc::new(f))
123}
124
125
126///////////////////////////////////////////////////////////////////////////////
127// WHERE TAG PROCESSING
128///////////////////////////////////////////////////////////////////////////////
129
130/// This is where we expand the patterns defined in `\!where` tags.
131fn match_and_apply_rewrite_rule<'a>(
132    pattern: Vec<Node<'a>>,
133    target: Vec<Node<'a>>,
134    children: Vec<Node<'a>>,
135) -> Vec<Node<'a>> {
136    let mut left: Vec<Node<'a>> = Vec::<Node>::new();
137    let mut current = children;
138    while current.len() > 0 && current.len() >= pattern.len() {
139        let matches = current
140            .iter()
141            .zip(pattern.iter())
142            .all(|(x, y)| x.syntactically_equal(y));
143        if matches {
144            // ADD NEW PATTENR TO LEFT
145            left.extend(target.clone());
146            let _ = current
147                .drain(..pattern.len())
148                .collect::<Vec<_>>();
149            continue;
150        }
151        left.push(current.remove(0));
152    }
153    left.extend(current);
154    left
155}
156
157
158///////////////////////////////////////////////////////////////////////////////
159// AST-TO-AST PASSES
160///////////////////////////////////////////////////////////////////////////////
161
162/// All compiler passes for same scope children.
163fn child_list_passes<'a>(children: Vec<Node<'a>>) -> Vec<Node<'a>> {
164    // APPLY AFTER REMOVING ALL TOKENS
165    fn merge_text_content<'a>(xs: Vec<Node<'a>>) -> Vec<Node<'a>> {
166        let mut results = Vec::new();
167        for current in xs.into_iter() {
168            let left = results
169                .last_mut()
170                .and_then(Node::unwrap_string_mut);
171            if let Some(left) = left {
172                if let Some(txt) = current.unwrap_string() {
173                    *left = Ann::unannotated(left.data.to_owned() + txt.data.to_owned());
174                    continue;
175                }
176            }
177            results.push(current);
178        }
179        results
180    }
181    fn block_passes<'a>(xs: Vec<Node<'a>>) -> Vec<Node<'a>> {
182        /// Put all 'block passes' here
183        merge_text_content(xs)
184    }
185    let node = Node::new_fragment(children);
186    let node = node.transform_children(Rc::new(block_passes));
187    node.into_fragment()
188}
189
190/// All node to node passes.
191fn node_passes<'a>(node: Node<'a>) -> Node<'a> {
192    fn apply_rewrite_rules<'a>(tag: Tag<'a>) -> Tag<'a> {
193        let mut children = tag.children;
194        for RewriteRule{from, to} in tag.rewrite_rules {
195            let from = from.unwrap_curly_brace();
196            let to = to.unwrap_curly_brace();
197            match (from, to) {
198                (Some(from), Some(to)) => {
199                    children = match_and_apply_rewrite_rule(
200                        from.clone(),
201                        to.clone(),
202                        children,
203                    );
204                }
205                _ => ()
206            }
207        }
208        Tag {
209            name: tag.name,
210            parameters: tag.parameters,
211            children,
212            rewrite_rules: Vec::new(),
213        }
214    }
215    fn process_tags<'a>(env: NodeEnvironment, mut tag: Tag<'a>) -> Tag<'a> {
216        let name: &str = &(tag.name.data);
217        // DON'T DO THIS IN A MATH ENV
218        if env.is_default_env() {
219            // Apply this after any multi-argument specific tag processing.
220            // Because e.g. `\h1{hello }{world}` will become `<h1>hello world</h1>`.
221            // Really only a problem if we happen to be converting to LaTeX.
222            tag.children = tag.children
223                .into_iter()
224                .flat_map(Node::unblock)
225                .collect();
226        }
227        // REWRITE SUBSCRIPT TAGS INTO VALID HTML
228        if name == "note" {
229            tag.name = Ann::unannotated(Cow::Borrowed("div"));
230            tag.insert_unannotated_parameter("macro=note");
231        }
232        else if name == "img" {
233            let value = tag
234                .get_parameter("width")
235                .map(|x| x.data)
236                .and_then(|x| {
237                    let x: &str = &x;
238                    let x = x.split_once("=").map(|x| x.1);
239                    if let Some(x) = x {
240                        return x.parse::<f32>().ok()
241                    }
242                    None
243                });
244            if let Some(width) = value {
245                tag.insert_unannotated_parameter(&format!(
246                    "style='width:{}px;'",
247                    width
248                ));
249            } else {
250                println!(
251                    "[WARNING!] invalid width tag; given {:?}",
252                    tag.get_parameter("width")
253                );
254            }
255        }
256        else if name == "layout" {
257            tag.name = Ann::unannotated(Cow::Borrowed("div"));
258            tag.insert_unannotated_parameter("macro=layout");
259        }
260        tag
261    }
262    let f = |env: NodeEnvironment, node: Node<'a>| -> Node<'a> {
263        match node {
264            Node::Tag(tag) => {
265                let tag = apply_rewrite_rules(tag);
266                let tag = process_tags(env, tag);
267                Node::Tag(tag)
268            }
269            node @ Node::Enclosure(_) => node,
270            node @ Node::String(_) => node,
271            node @ Node::Ident(_) => node,
272            node @ Node::InvalidToken(_) => node,
273        }
274    };
275    node.transform(NodeEnvironment::default(), Rc::new(f))
276}
277
278
279///////////////////////////////////////////////////////////////////////////////
280// AST TO CODEGEN
281///////////////////////////////////////////////////////////////////////////////
282
283/// Internal
284pub fn html_canonicalization<'a>(nodes: Vec<Node<'a>>) -> Vec<Node<'a>> {
285    fn passes<'a>(children: Vec<Node<'a>>) -> Vec<Node<'a>> {
286        let children = children
287            .into_iter()
288            .map(node_passes)
289            .collect();
290        child_list_passes(children)
291    }
292    let result = passes(nodes);
293    let result = result
294        .into_iter()
295        .map(crate::frontend::pass::math::latex_pass)
296        .collect::<Vec<_>>();
297    result
298}
299