tylax 0.3.5

Tylax - High-performance bidirectional LaTeX ↔ Typst converter
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
//! Utility functions for LaTeX to Typst conversion
//!
//! This module contains pure utility functions that don't depend on converter state.

use mitex_parser::syntax::{SyntaxElement, SyntaxKind, SyntaxNode};

// =============================================================================
// Text Processing Utilities
// =============================================================================

/// Sanitize a label name for Typst compatibility
/// Converts colons to hyphens since Typst labels work better with hyphens
pub fn sanitize_label(label: &str) -> String {
    label.replace([':', ' ', '_'], "-")
}

/// Convert integer to Roman numeral
pub fn to_roman_numeral(num: usize) -> String {
    if num == 0 {
        return "0".to_string();
    }

    let values = [
        (1000, "M"),
        (900, "CM"),
        (500, "D"),
        (400, "CD"),
        (100, "C"),
        (90, "XC"),
        (50, "L"),
        (40, "XL"),
        (10, "X"),
        (9, "IX"),
        (5, "V"),
        (4, "IV"),
        (1, "I"),
    ];

    let mut result = String::new();
    let mut n = num;

    for (value, symbol) in values {
        while n >= value {
            result.push_str(symbol);
            n -= value;
        }
    }

    result
}

// =============================================================================
// Command Protection/Restoration
// =============================================================================

/// Protect zero-argument commands from being lost during parsing.
/// Replaces specific commands with Unicode private use area placeholders that survive the MiTeX parser.
pub fn protect_zero_arg_commands(input: &str) -> String {
    let mut result = input.to_string();
    // Use text placeholders wrapped in Private Use Area characters to avoid parser interference.
    result = result.replace("\\today", "\u{E000}TODAY\u{E001}");
    result = result.replace("\\LaTeX", "\u{E000}LATEX\u{E001}");
    result = result.replace("\\TeX", "\u{E000}TEX\u{E001}");
    result = result.replace("\\XeTeX", "\u{E000}XETEX\u{E001}");
    result = result.replace("\\LuaTeX", "\u{E000}LUATEX\u{E001}");
    result = result.replace("\\pdfTeX", "\u{E000}PDFTEX\u{E001}");
    result = result.replace("\\BibTeX", "\u{E000}BIBTEX\u{E001}");
    result
}

/// Restore protected commands after conversion
pub fn restore_protected_commands(input: &str) -> String {
    let mut result = input.to_string();
    result = result.replace("\u{E000}TODAY\u{E001}", "#datetime.today().display()");
    result = result.replace("\u{E000}LATEX\u{E001}", "LaTeX");
    result = result.replace("\u{E000}TEX\u{E001}", "TeX");
    result = result.replace("\u{E000}XETEX\u{E001}", "XeTeX");
    result = result.replace("\u{E000}LUATEX\u{E001}", "LuaTeX");
    result = result.replace("\u{E000}PDFTEX\u{E001}", "pdfTeX");
    result = result.replace("\u{E000}BIBTEX\u{E001}", "BibTeX");
    result
}

// =============================================================================
// Whitespace Cleaning
// =============================================================================

/// Clean up excessive whitespace in the output.
///
/// This function performs the following normalizations:
/// - Removes leading/trailing blank lines
/// - Collapses multiple consecutive blank lines into one (preserving paragraph breaks)
/// - Trims trailing whitespace on each line
/// - Preserves code blocks (```...```) exactly as-is
pub fn clean_whitespace(input: &str) -> String {
    let mut result = String::new();
    let mut consecutive_newlines = 0;
    let mut in_code_block = false;

    for line in input.lines() {
        let trimmed = line.trim_end();

        // Check for code block delimiters (``` with optional language)
        if trimmed.starts_with("```") {
            in_code_block = !in_code_block;
            // Output code block delimiter as-is
            result.push_str(line);
            result.push('\n');
            consecutive_newlines = 1;
            continue;
        }

        // Inside code block: preserve everything as-is
        if in_code_block {
            result.push_str(line);
            result.push('\n');
            continue;
        }

        // Outside code block: apply whitespace cleanup
        if trimmed.is_empty() {
            consecutive_newlines += 1;
            // Allow at most one blank line (which is two newlines in a row)
            if consecutive_newlines <= 2 {
                result.push('\n');
            }
        } else {
            // Non-empty line - reset counter and output
            result.push_str(trimmed);
            result.push('\n');
            consecutive_newlines = 1; // Count this line's newline
        }
    }

    // Remove leading blank lines
    let result = result.trim_start_matches('\n').to_string();
    // Remove trailing blank lines but keep one final newline
    let result = result.trim_end().to_string();
    if result.is_empty() {
        result
    } else {
        result + "\n"
    }
}

// =============================================================================
// AST Text Extraction
// =============================================================================

/// Extract all text from a node (strips braces - use for math/simple content)
pub fn extract_node_text(node: &SyntaxNode) -> String {
    let mut text = String::new();
    for child in node.children_with_tokens() {
        match child {
            SyntaxElement::Token(t) => {
                if !matches!(
                    t.kind(),
                    SyntaxKind::TokenLBrace
                        | SyntaxKind::TokenRBrace
                        | SyntaxKind::TokenLBracket
                        | SyntaxKind::TokenRBracket
                ) {
                    text.push_str(t.text());
                }
            }
            SyntaxElement::Node(n) => {
                text.push_str(&extract_node_text(&n));
            }
        }
    }
    text
}

/// Extract all text from a node preserving braces (use for text content with commands)
pub fn extract_node_text_with_braces(node: &SyntaxNode) -> String {
    let mut text = String::new();
    for child in node.children_with_tokens() {
        match child {
            SyntaxElement::Token(t) => {
                text.push_str(t.text());
            }
            SyntaxElement::Node(n) => {
                text.push_str(&extract_node_text_with_braces(&n));
            }
        }
    }
    text
}

/// Extract text content from an argument node
pub fn extract_arg_content(node: &SyntaxNode) -> String {
    let mut content = String::new();
    for child in node.children_with_tokens() {
        match child.kind() {
            SyntaxKind::TokenLBrace
            | SyntaxKind::TokenRBrace
            | SyntaxKind::TokenLBracket
            | SyntaxKind::TokenRBracket => continue,
            SyntaxKind::ItemCurly | SyntaxKind::ItemBracket => {
                if let SyntaxElement::Node(n) = child {
                    content.push_str(&extract_node_text(&n));
                }
            }
            _ => {
                if let SyntaxElement::Token(t) = child {
                    content.push_str(t.text());
                } else if let SyntaxElement::Node(n) = child {
                    content.push_str(&extract_node_text(&n));
                }
            }
        }
    }
    content.trim().to_string()
}

/// Extract argument content preserving inner braces but stripping outermost
pub fn extract_arg_content_with_braces(node: &SyntaxNode) -> String {
    let mut content = String::new();
    for child in node.children_with_tokens() {
        match child.kind() {
            // Skip the outermost braces/brackets (direct tokens)
            SyntaxKind::TokenLBrace
            | SyntaxKind::TokenRBrace
            | SyntaxKind::TokenLBracket
            | SyntaxKind::TokenRBracket => continue,
            // For ItemCurly/ItemBracket, extract their *inner* content (skip their braces)
            SyntaxKind::ItemCurly | SyntaxKind::ItemBracket => {
                if let SyntaxElement::Node(n) = child {
                    // Recurse but skip the curly/bracket's own braces
                    content.push_str(&extract_curly_inner_content(&n));
                }
            }
            _ => {
                if let SyntaxElement::Token(t) = child {
                    content.push_str(t.text());
                } else if let SyntaxElement::Node(n) = child {
                    content.push_str(&extract_node_text_with_braces(&n));
                }
            }
        }
    }
    content.trim().to_string()
}

/// Extract inner content of a curly/bracket node, skipping its braces
pub fn extract_curly_inner_content(node: &SyntaxNode) -> String {
    let mut content = String::new();
    for child in node.children_with_tokens() {
        match child.kind() {
            // Skip the braces of this curly node
            SyntaxKind::TokenLBrace
            | SyntaxKind::TokenRBrace
            | SyntaxKind::TokenLBracket
            | SyntaxKind::TokenRBracket => continue,
            _ => {
                if let SyntaxElement::Token(t) = child {
                    content.push_str(t.text());
                } else if let SyntaxElement::Node(n) = child {
                    // For nested structures, preserve their braces
                    content.push_str(&extract_node_text_with_braces(&n));
                }
            }
        }
    }
    content
}

pub fn contains_top_level_separator(text: &str, separator: char) -> bool {
    let mut paren_depth = 0usize;
    let mut bracket_depth = 0usize;
    let mut brace_depth = 0usize;

    for ch in text.chars() {
        match ch {
            '(' => paren_depth += 1,
            ')' if paren_depth > 0 => paren_depth -= 1,
            '[' => bracket_depth += 1,
            ']' if bracket_depth > 0 => bracket_depth -= 1,
            '{' => brace_depth += 1,
            '}' if brace_depth > 0 => brace_depth -= 1,
            _ => {}
        }

        if ch == separator && paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 {
            return true;
        }
    }

    false
}

// =============================================================================
// Caption Text Conversion
// =============================================================================

/// Convert caption/title/author text that may contain inline math and formatting commands
/// Handles LaTeX math mode ($...$) and text formatting commands
pub fn convert_caption_text(text: &str) -> String {
    let mut result = String::new();
    let mut chars = text.chars().peekable();

    while let Some(ch) = chars.next() {
        if ch == '$' {
            // Collect math content until closing $
            let mut math_content = String::new();
            while let Some(&next) = chars.peek() {
                if next == '$' {
                    chars.next(); // consume closing $
                    break;
                }
                math_content.push(chars.next().unwrap());
            }
            // Convert the math content
            let converted = super::latex_math_to_typst(&math_content);
            result.push('$');
            result.push_str(&converted);
            result.push('$');
        } else if ch == '\\' {
            // Handle backslash commands in text mode
            let mut cmd = String::new();
            while let Some(&next) = chars.peek() {
                if next.is_ascii_alphabetic() {
                    cmd.push(chars.next().unwrap());
                } else {
                    break;
                }
            }

            // Check if this command takes a braced argument
            let has_arg = crate::data::symbols::is_caption_text_command(&cmd);

            // Extract argument content if present
            let arg_content = if has_arg {
                // Skip whitespace
                while let Some(&' ') = chars.peek() {
                    chars.next();
                }
                // Check for opening brace
                if chars.peek() == Some(&'{') {
                    chars.next(); // consume '{'
                    let mut content = String::new();
                    let mut brace_depth = 1;
                    for c in chars.by_ref() {
                        if c == '{' {
                            brace_depth += 1;
                            content.push(c);
                        } else if c == '}' {
                            brace_depth -= 1;
                            if brace_depth == 0 {
                                break;
                            }
                            content.push(c);
                        } else {
                            content.push(c);
                        }
                    }
                    Some(content)
                } else {
                    None
                }
            } else {
                None
            };

            // Convert common text-mode commands
            match cmd.as_str() {
                "textbf" | "bf" => {
                    result.push('*');
                    if let Some(content) = arg_content {
                        result.push_str(&convert_caption_text(&content));
                    }
                    result.push('*');
                }
                "textit" | "it" | "emph" => {
                    result.push('_');
                    if let Some(content) = arg_content {
                        result.push_str(&convert_caption_text(&content));
                    }
                    result.push('_');
                }
                "texttt" => {
                    result.push('`');
                    if let Some(content) = arg_content {
                        result.push_str(&content); // Don't recurse for monospace
                    }
                    result.push('`');
                }
                "textsc" => {
                    result.push_str("#smallcaps[");
                    if let Some(content) = arg_content {
                        result.push_str(&convert_caption_text(&content));
                    }
                    result.push(']');
                }
                "underline" => {
                    result.push_str("#underline[");
                    if let Some(content) = arg_content {
                        result.push_str(&convert_caption_text(&content));
                    }
                    result.push(']');
                }
                "textrm" | "text" | "mbox" | "hbox" => {
                    // Just include the content
                    if let Some(content) = arg_content {
                        result.push_str(&convert_caption_text(&content));
                    }
                }
                "textsf" => {
                    result.push_str("#text(font: \"sans-serif\")[");
                    if let Some(content) = arg_content {
                        result.push_str(&convert_caption_text(&content));
                    }
                    result.push(']');
                }
                // Date/time commands
                "today" => result.push_str("#datetime.today().display()"),

                // LaTeX logo commands
                "LaTeX" => result.push_str("LaTeX"),
                "TeX" => result.push_str("TeX"),
                "XeTeX" => result.push_str("XeTeX"),
                "LuaTeX" => result.push_str("LuaTeX"),
                "pdfTeX" => result.push_str("pdfTeX"),
                "BibTeX" => result.push_str("BibTeX"),

                // Common escapes
                "&" => result.push('&'),
                "%" => result.push('%'),
                "_" => result.push_str("\\_"), // _ needs escaping in text mode
                "#" => result.push_str("\\#"), // # needs escaping in Typst
                "$" => result.push_str("\\$"), // $ needs escaping in Typst
                "{" => result.push('{'),
                "}" => result.push('}'),
                "\\" => result.push_str("\\ "), // line break
                "" => {
                    // Just a backslash followed by non-alpha (like \\ or \&)
                    // Already consumed, do nothing
                }
                _ => {
                    // For unknown commands, skip the backslash (don't output raw LaTeX)
                    // If there's an argument, output its content
                    if let Some(content) = arg_content {
                        result.push_str(&convert_caption_text(&content));
                    }
                    // Otherwise, just skip the unknown command
                }
            }
        } else {
            result.push(ch);
        }
    }

    result
}