subscript_compiler/frontend/
data.rs

1//! Common data types used throughout the compiler.
2use std::borrow::Cow;
3use std::collections::{HashSet, VecDeque, LinkedList};
4use std::iter::FromIterator;
5use lazy_static::lazy_static;
6
7
8pub static INLINE_MATH_TAG: &'static str = "[inline-math]";
9pub static BLOCK_MATH_TAGS: &[&'static str] = &[
10    "equation",
11];
12
13
14lazy_static! {
15    pub static ref HEADING_TAG_NAMES: HashSet<&'static str> = HashSet::from_iter(vec![
16        "h1",
17        "h2",
18        "h3",
19        "h4",
20        "h5",
21        "h6",
22    ]);
23}
24
25pub static ALL_SUBSCRIPT_TAGS: &[&'static str] = &[
26    "note",
27    "layout",
28    "equation",
29];
30
31pub static ALLOWED_HTML_TAGS: &[&'static str] = &[
32    "address",
33    "article",
34    "aside",
35    "footer",
36    "header",
37    "h1",
38    "section",
39    "blockquote",
40    "dd",
41    "div",
42    "dl",
43    "dt",
44    "figcaption",
45    "figure",
46    "hr",
47    "li",
48    "ol",
49    "p",
50    "pre",
51    "ul",
52    "a",
53    "abbr",
54    "b",
55    "bdi",
56    "bdo",
57    "br",
58    "cite",
59    "code",
60    "data",
61    "dfn",
62    "em",
63    "i",
64    "kbd",
65    "mark",
66    "q",
67    "rb",
68    "rp",
69    "rt",
70    "rtc",
71    "ruby",
72    "s",
73    "samp",
74    "small",
75    "span",
76    "strong",
77    "sub",
78    "sup",
79    "time",
80    "u",
81    "var",
82    "wbr",
83    "area",
84    "audio",
85    "img",
86    "map",
87    "track",
88    "video",
89    "embed",
90    "iframe",
91    "object",
92    "param",
93    "picture",
94    "portal",
95    "source",
96    "svg",
97    "math",
98    "canvas",
99    "noscript",
100    "script",
101    "del",
102    "ins",
103    "caption",
104    "col",
105    "colgroup",
106    "table",
107    "tbody",
108    "td",
109    "tfoot",
110    "th",
111    "thead",
112    "tr",
113    "button",
114    "datalist",
115    "fieldset",
116    "form",
117    "input",
118    "label",
119    "legend",
120    "meter",
121    "optgroup",
122    "option",
123    "output",
124    "progress",
125    "select",
126    "textarea",
127    "details",
128    "dialog",
129    "menu",
130    "summary",
131    "slot",
132    "template",
133    "acronym",
134    "applet",
135    "basefont",
136    "bgsound",
137    "big",
138    "blink",
139    "center",
140    "content",
141    "dir",
142    "font",
143    "frame",
144    "frameset",
145    "hgroup",
146    "image",
147    "keygen",
148    "marquee",
149    "menuitem",
150    "nobr",
151    "noembed",
152    "noframes",
153    "plaintext",
154    "rb",
155    "rtc",
156    "shadow",
157    "spacer",
158    "strike",
159    "tt",
160];
161
162
163///////////////////////////////////////////////////////////////////////////////
164// LAYOUT
165///////////////////////////////////////////////////////////////////////////////
166
167#[derive(Debug, Clone)]
168pub enum LayoutKind {
169    Block,
170    Inline,
171}
172
173
174///////////////////////////////////////////////////////////////////////////////
175// STRING DATA TYPES
176///////////////////////////////////////////////////////////////////////////////
177
178pub type Atom<'a> = Cow<'a, str>;
179
180#[derive(Debug, Clone, Hash, Eq, Default)]
181pub struct Text<'a>(pub Cow<'a, str>);
182
183impl<'a> Text<'a> {
184    pub fn new(value: &'a str) -> Self {
185        Text(Cow::Borrowed(value))
186    }
187    pub fn from_string(value: String) -> Self {
188        Text(Cow::Owned(value))
189    }
190    pub fn len(&self) -> usize {
191        self.0.len()
192    }
193    pub fn append(self, other: Text<'a>) -> Self {
194        Text(self.0 + other.0)
195    }
196}
197impl<'a> std::fmt::Display for Text<'a> {
198    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
199        write!(f, "{}", self.0)
200    }
201}
202impl<'a> PartialEq for Text<'a> {
203    fn eq(&self, other: &Self) -> bool {
204        let left = &*self.0;
205        let right = &*other.0;
206        left == right
207    }
208}
209
210impl<'a> PartialEq<str> for Text<'a> {
211    fn eq(&self, other: &str) -> bool {
212        let left = &*self.0;
213        let right = other;
214        left == right
215    }
216}
217
218
219
220pub static TOKEN_SET: &'static [&'static str] = &["\\", "[", "]", "{", "}", "(", ")", "=>", "_", "^"];
221
222fn get_end_kind_for(begin_kind: &str) -> &str {
223    match begin_kind {
224        "{" => "}",
225        "[" => "]",
226        "(" => ")",
227        _ => unreachable!()
228    }
229}
230
231fn get_begin_kind_for(end_kind: &str) -> &str {
232    match end_kind {
233        "}" => "{",
234        "]" => "[",
235        ")" => "(",
236        _ => unreachable!()
237    }
238}
239
240pub fn is_token<'a>(value: &'a str) -> bool {
241    for tk in TOKEN_SET {
242        if *tk == value {
243            return true;
244        }
245    }
246    false
247}
248
249
250///////////////////////////////////////////////////////////////////////////////
251// COMMON AST RELATED DATA TYPES
252///////////////////////////////////////////////////////////////////////////////
253
254#[derive(Debug, Clone, PartialEq)]
255pub struct RewriteRule<T> {
256    pub from: T,
257    pub to: T,
258}
259
260#[derive(Debug, Clone)]
261pub struct CurlyBrace<T>(pub Vec<T>);
262
263#[derive(Debug, Clone)]
264pub struct SquareParen<T>(pub Vec<T>);
265
266#[derive(Debug, Clone, PartialEq)]
267pub enum EnclosureKind<'a> {
268    CurlyBrace,
269    SquareParen,
270    Parens,
271    /// Intenral - akin to HTML fragment which is just a list of nodes.
272    Fragment,
273    Error {
274        open: Atom<'a>,
275        close: Option<Atom<'a>>,
276    },
277}
278
279impl<'a> EnclosureKind<'a> {
280    pub fn new(open: Atom<'a>, close: Atom<'a>) -> EnclosureKind<'a> {
281        EnclosureKind::parse(open, Some(close))
282    }
283    pub fn parse(open: Atom<'a>, close: Option<Atom<'a>>) -> EnclosureKind<'a> {
284        let open_str: &str = &open;
285        match (open_str, close.as_ref()) {
286            ("{", Some(x)) if x == "}" => EnclosureKind::CurlyBrace,
287            ("[", Some(x)) if x == "]" => EnclosureKind::SquareParen,
288            ("(", Some(x)) if x == ")" => EnclosureKind::Parens,
289            (_, _) => EnclosureKind::Error {open, close},
290        }
291    }
292}
293
294#[derive(Debug, Clone, PartialEq)]
295pub struct Enclosure<'a, T> {
296    pub kind: EnclosureKind<'a>,
297    pub children: Vec<T>,
298}
299
300impl<'a, T> Enclosure<'a, T> {
301    pub fn is_curly_brace(&self) -> bool {
302        match self.kind {
303            EnclosureKind::CurlyBrace => true,
304            _ => false,
305        }
306    }
307    pub fn is_square_parens(&self) -> bool {
308        match self.kind {
309            EnclosureKind::SquareParen => true,
310            _ => false,
311        }
312    }
313    pub fn is_parens(&self) -> bool {
314        match self.kind {
315            EnclosureKind::Parens => true,
316            _ => false,
317        }
318    }
319    pub fn is_error(&self) -> bool {
320        match self.kind {
321            EnclosureKind::Error{..} => true,
322            _ => false,
323        }
324    }
325    pub fn is_fragment(&self) -> bool {
326        match self.kind {
327            EnclosureKind::Fragment{..} => true,
328            _ => false,
329        }
330    }
331}
332
333impl<'a, T> Enclosure<'a, T> {
334    pub fn new_curly_brace(children: Vec<T>) -> Self {
335        Enclosure {
336            kind: EnclosureKind::CurlyBrace,
337            children
338        }
339    }
340    pub fn new_curly_brace_(child: T) -> Self {
341        Enclosure {
342            kind: EnclosureKind::CurlyBrace,
343            children: vec![child]
344        }
345    }
346}