tex2typst_rs/
typst_writer.rs

1use crate::definitions::{TypstNode, TypstNodeData, TypstNodeType, TypstToken, TypstTokenType};
2use regex::Regex;
3use std::sync::LazyLock;
4
5static TYPST_LEFT_PARENTHESIS: LazyLock<TypstToken> = LazyLock::new(|| TypstToken {
6    token_type: TypstTokenType::Element,
7    value: "(".to_string(),
8});
9
10static TYPST_RIGHT_PARENTHESIS: LazyLock<TypstToken> = LazyLock::new(|| TypstToken {
11    token_type: TypstTokenType::Element,
12    value: ")".to_string(),
13});
14
15static TYPST_COMMA: LazyLock<TypstToken> = LazyLock::new(|| TypstToken {
16    token_type: TypstTokenType::Element,
17    value: ",".to_string(),
18});
19
20static TYPST_NEWLINE: LazyLock<TypstToken> = LazyLock::new(|| TypstToken {
21    token_type: TypstTokenType::Symbol,
22    value: "\n".to_string(),
23});
24
25pub struct TypstWriter {
26    pub buffer: String,
27    pub queue: Vec<TypstToken>,
28    inside_function_depth: usize,
29}
30
31impl TypstWriter {
32    pub fn new() -> Self {
33        TypstWriter {
34            buffer: String::new(),
35            queue: Vec::new(),
36            inside_function_depth: 0,
37        }
38    }
39
40    fn write_buffer(&mut self, token: &TypstToken) {
41        let new_str = token.to_string();
42
43        if new_str.is_empty() {
44            return;
45        }
46
47        let mut no_need_space = false;
48        // putting the first token in clause
49        no_need_space |= self.buffer.ends_with(&['(', '[', '|']) && new_str.starts_with(char::is_alphanumeric);
50        // closing a clause
51        no_need_space |= new_str.starts_with(&[')', '}', ']', '|']);
52        // putting the opening '(' for a function
53        no_need_space |= !self.buffer.ends_with('=') && new_str.starts_with('(');
54        // putting punctuation
55        no_need_space |= new_str.starts_with(&['_', '^', ',', ';', '!']);
56        // putting a prime
57        no_need_space |= new_str == "'";
58        // continue a number
59        no_need_space |= self.buffer.ends_with(char::is_numeric) && new_str.starts_with(char::is_numeric);
60        // leading sign. e.g. produce "+1" instead of " +1"
61        no_need_space |= self.buffer.ends_with(&['(', '[', '{']) && new_str.starts_with(&['-', '+'])
62            || self.buffer == "-"
63            || self.buffer == "+";
64        // new line
65        no_need_space |= new_str.starts_with('\n');
66        // buffer is empty
67        no_need_space |= self.buffer.is_empty();
68        // str is starting with a space itself
69        no_need_space |= new_str.starts_with(char::is_whitespace);
70        // "&=" instead of "& ="
71        no_need_space |= self.buffer.ends_with('&') && new_str == "=";
72        // before or after a slash e.g. "a/b" instead of "a / b"
73        no_need_space |= self.buffer.ends_with('/') || new_str.starts_with('/');
74        // other cases
75        no_need_space |= self.buffer.ends_with(&[' ', '_', '^', '{', '(']);
76
77        if !no_need_space {
78            self.buffer.push(' ');
79        }
80
81        self.buffer.push_str(&new_str);
82    }
83
84    // Serialize a tree of TypstNode into a list of TypstToken
85    pub fn serialize(&mut self, node: &TypstNode) -> Result<(), String> {
86        use TypstNodeType as N;
87        use TypstTokenType as T;
88        match node.node_type {
89            N::Empty => Ok(()),
90            N::Atom => {
91                if node.content == "," && self.inside_function_depth > 0 {
92                    self.queue.push(TypstToken::new(T::Symbol, "comma".to_string()));
93                } else {
94                    self.queue.push(TypstToken::new(T::Element, node.content.clone()));
95                }
96                Ok(())
97            }
98            N::Symbol => {
99                self.queue.push(TypstToken::new(T::Symbol, node.content.clone()));
100                Ok(())
101            }
102            N::Text => {
103                self.queue.push(TypstToken::new(T::Text, node.content.clone()));
104                Ok(())
105            }
106            N::Comment => {
107                self.queue.push(TypstToken::new(T::Comment, node.content.clone()));
108                Ok(())
109            }
110            N::Whitespace => {
111                for c in node.content.chars() {
112                    if c == ' ' {
113                    } else if c == '\n' {
114                        self.queue.push(TypstToken::new(T::Symbol, c.to_string()));
115                    } else {
116                        return Err(format!("Unexpected whitespace character: {}", c));
117                    }
118                }
119                Ok(())
120            }
121            N::NoBreakSpace => {
122                self.queue.push(TypstToken::new(T::Symbol, "space.nobreak".to_string()));
123                Ok(())
124            }
125            N::Group => {
126                if let Some(args) = &node.args {
127                    for item in args {
128                        self.serialize(item)?;
129                    }
130                }
131                Ok(())
132            }
133            N::Supsub => {
134                if let TypstNodeData::Supsub(data) = node.data.as_ref().unwrap().as_ref() {
135                    self.append_with_brackets_if_needed(&data.base)?;
136
137                    let mut trailing_space_needed = false;
138                    let has_prime = data
139                        .sup
140                        .as_ref()
141                        .map_or(false, |sup| sup.node_type == N::Atom && sup.content == "'");
142                    if has_prime {
143                        self.queue.push(TypstToken::new(T::Element, "'".to_string()));
144                        trailing_space_needed = false;
145                    }
146                    if let Some(sub) = &data.sub {
147                        self.queue.push(TypstToken::new(T::Element, "_".to_string()));
148                        trailing_space_needed = self.append_with_brackets_if_needed(sub)?;
149                    }
150                    if let Some(sup) = &data.sup {
151                        if !has_prime {
152                            self.queue.push(TypstToken::new(T::Element, "^".to_string()));
153                            trailing_space_needed = self.append_with_brackets_if_needed(sup)?;
154                        }
155                    }
156                    if trailing_space_needed {
157                        self.queue.push(TypstToken::new(T::Control, " ".to_string()));
158                    }
159                }
160                Ok(())
161            }
162            N::FuncCall => {
163                self.queue.push(TypstToken::new(T::Symbol, node.content.clone()));
164                self.inside_function_depth += 1;
165                self.queue.push(TYPST_LEFT_PARENTHESIS.clone());
166                if let Some(args) = &node.args {
167                    for (i, arg) in args.iter().enumerate() {
168                        self.serialize(arg)?;
169                        if i < args.len() - 1 {
170                            self.queue.push(TypstToken::new(T::Element, ",".to_string()));
171                        }
172                    }
173                }
174                if let Some(options) = &node.options {
175                    for (key, value) in options {
176                        self.queue
177                            .push(TypstToken::new(T::Symbol, format!(", {}: {}", key, value)));
178                    }
179                }
180                self.queue.push(TYPST_RIGHT_PARENTHESIS.clone());
181                self.inside_function_depth -= 1;
182                Ok(())
183            }
184            N::Fraction => {
185                let num = &node.args.as_ref().unwrap()[0];
186                let den = &node.args.as_ref().unwrap()[1];
187                self.smart_parenthesis(num)?;
188                self.queue.push(TypstToken::new(T::Symbol, "/".to_string()));
189                self.smart_parenthesis(den)?;
190                Ok(())
191            }
192            N::Align => {
193                if let TypstNodeData::Array(matrix) = node.data.as_ref().unwrap().as_ref() {
194                    for (i, row) in matrix.iter().enumerate() {
195                        for (j, cell) in row.iter().enumerate() {
196                            if j > 0 {
197                                self.queue.push(TypstToken::new(T::Element, "&".to_string()));
198                            }
199                            self.serialize(cell)?;
200                        }
201                        if i < matrix.len() - 1 {
202                            self.queue.push(TypstToken::new(T::Symbol, "\\".to_string()));
203                        }
204                    }
205                }
206                Ok(())
207            }
208            N::Matrix => {
209                if let TypstNodeData::Array(matrix) = node.data.as_ref().unwrap().as_ref() {
210                    self.queue.push(TypstToken::new(T::Symbol, "mat".to_string()));
211                    self.inside_function_depth += 1;
212                    self.queue.push(TYPST_LEFT_PARENTHESIS.clone());
213                    if let Some(options) = &node.options {
214                        for (key, value) in options {
215                            self.queue
216                                .push(TypstToken::new(T::Symbol, format!("{}: {}, ", key, value)));
217                        }
218                    }
219                    for (i, row) in matrix.iter().enumerate() {
220                        for (j, cell) in row.iter().enumerate() {
221                            self.serialize(cell)?;
222                            if j < row.len() - 1 {
223                                self.queue.push(TypstToken::new(T::Element, ",".to_string()));
224                            } else if i < matrix.len() - 1 {
225                                self.queue.push(TypstToken::new(T::Element, ";".to_string()));
226                            }
227                        }
228                    }
229                    self.queue.push(TYPST_RIGHT_PARENTHESIS.clone());
230                    self.inside_function_depth -= 1;
231                }
232                Ok(())
233            }
234            N::Unknown => {
235                self.queue.push(TypstToken::new(T::Symbol, node.content.clone()));
236                Ok(())
237            }
238        }
239    }
240
241    fn smart_parenthesis(&mut self, node: &TypstNode) -> Result<(), String> {
242        if node.node_type == TypstNodeType::Group {
243            self.queue.push(TYPST_LEFT_PARENTHESIS.clone());
244            self.serialize(node)?;
245            self.queue.push(TYPST_RIGHT_PARENTHESIS.clone());
246        } else {
247            self.serialize(node)?;
248        }
249        Ok(())
250    }
251
252    fn append_with_brackets_if_needed(&mut self, node: &TypstNode) -> Result<bool, String> {
253        let mut need_to_wrap = matches!(
254            node.node_type,
255            TypstNodeType::Group | TypstNodeType::Supsub | TypstNodeType::Empty
256        );
257
258        if node.node_type == TypstNodeType::Group {
259            if let Some(args) = &node.args {
260                let first = &args[0];
261                let last = &args[args.len() - 1];
262                if is_delimiter(first) && is_delimiter(last) {
263                    need_to_wrap = false;
264                }
265            }
266        }
267
268        if need_to_wrap {
269            self.queue.push(TYPST_LEFT_PARENTHESIS.clone());
270            self.serialize(node)?;
271            self.queue.push(TYPST_RIGHT_PARENTHESIS.clone());
272        } else {
273            self.serialize(node)?;
274        }
275
276        Ok(!need_to_wrap)
277    }
278
279    fn flush_queue(&mut self) {
280        let soft_space = TypstToken::new(TypstTokenType::Control, " ".to_string());
281
282        // delete soft spaces if they are not needed
283        let queue_len = self.queue.len();
284        for i in 0..queue_len {
285            if self.queue[i].eq(&soft_space) {
286                if i == queue_len - 1 {
287                    self.queue[i].value = "".to_string();
288                } else {
289                    let next_is_end = self.queue[i + 1] == *TYPST_RIGHT_PARENTHESIS
290                        || self.queue[i + 1] == *TYPST_COMMA
291                        || self.queue[i + 1] == *TYPST_NEWLINE;
292                    if next_is_end {
293                        self.queue[i].value = "".to_string();
294                    }
295                }
296            }
297        }
298
299        let queue = std::mem::take(&mut self.queue);
300        for token in queue {
301            self.write_buffer(&token);
302        }
303
304        self.queue.clear();
305    }
306
307    pub fn replace_with_shorthand(&mut self, shorthand_list: &Vec<SymbolShorthand>) {
308        for token in self.queue.iter_mut() {
309            for shorthand in shorthand_list {
310                if token.value == shorthand.original {
311                    token.value = shorthand.shorthand.clone();
312                }
313            }
314        }
315    }
316
317    pub fn finalize(&mut self) -> Result<String, String> {
318        self.flush_queue();
319
320        let smart_floor_pass = |input: &str| -> String {
321            let re = Regex::new(r"floor\.l\s*(.*?)\s*floor\.r").unwrap();
322            let mut res = re.replace_all(input, "floor($1)").to_string();
323            res = res.replace("floor()", "floor(\"\")");
324            res
325        };
326
327        let smart_ceil_pass = |input: &str| -> String {
328            let re = Regex::new(r"ceil\.l\s*(.*?)\s*ceil\.r").unwrap();
329            let mut res = re.replace_all(input, "ceil($1)").to_string();
330            res = res.replace("ceil()", "ceil(\"\")");
331            res
332        };
333
334        let smart_round_pass = |input: &str| -> String {
335            let re = Regex::new(r"floor\.l\s*(.*?)\s*ceil\.r").unwrap();
336            let mut res = re.replace_all(input, "round($1)").to_string();
337            res = res.replace("round()", "round(\"\")");
338            res
339        };
340
341        let all_passes = [smart_floor_pass, smart_ceil_pass, smart_round_pass];
342        for pass in &all_passes {
343            self.buffer = pass(&self.buffer);
344        }
345
346        Ok(self.buffer.clone())
347    }
348}
349
350fn is_delimiter(c: &TypstNode) -> bool {
351    matches!(c.node_type, TypstNodeType::Atom)
352        && ["(", ")", "[", "]", "{", "}", "|", "⌊", "⌋", "⌈", "⌉"].contains(&c.content.as_str())
353}
354
355pub struct SymbolShorthand {
356    pub original: String,
357    pub shorthand: String,
358}