Skip to main content

panache_parser/parser/inlines/
latex.rs

1//! LaTeX command and environment parsing.
2//!
3//! Supports the `raw_tex` extension which preserves LaTeX commands and environments.
4//!
5//! Inline LaTeX commands: \cite{ref}, \textbf{text}, etc.
6//! Block LaTeX environments: \begin{tabular}...\end{tabular}
7
8use crate::syntax::SyntaxKind;
9use rowan::GreenNodeBuilder;
10
11/// Try to parse an inline LaTeX command starting at the given position.
12/// Returns the number of **bytes** consumed if successful, or None.
13///
14/// LaTeX command pattern: \commandname[optional]{required}
15/// - Starts with backslash
16/// - Command name: letters only (a-zA-Z)
17/// - Optional arguments in square brackets: [...]
18/// - Required arguments in curly braces: {...}
19pub(crate) fn try_parse_latex_command(text: &str) -> Option<usize> {
20    let bytes = text.as_bytes();
21
22    if bytes.is_empty() || bytes[0] != b'\\' {
23        return None;
24    }
25
26    // Check for escaped backslash (\\) - this is a hard line break, not LaTeX
27    if bytes.len() > 1 && bytes[1] == b'\\' {
28        return None;
29    }
30
31    let mut pos = 1; // Skip initial backslash
32
33    // Parse command name (letters only)
34    let command_start = pos;
35    while pos < bytes.len() && bytes[pos].is_ascii_alphabetic() {
36        pos += 1;
37    }
38
39    // Must have at least one letter in command name
40    if pos == command_start {
41        return None;
42    }
43
44    // Parse optional and required arguments
45    while pos < bytes.len() {
46        match bytes[pos] {
47            b'[' => {
48                // Optional argument in square brackets
49                pos = skip_bracketed_arg(text, pos)?;
50            }
51            b'{' => {
52                // Required argument in curly braces
53                pos = skip_braced_arg(text, pos)?;
54            }
55            _ => {
56                // End of command
57                break;
58            }
59        }
60    }
61
62    // Must have consumed at least the backslash and command name
63    if pos > 1 { Some(pos) } else { None }
64}
65
66/// Skip a bracketed argument [...], handling nested brackets.
67/// Returns the position after the closing bracket, or None if invalid.
68fn skip_bracketed_arg(text: &str, start: usize) -> Option<usize> {
69    let bytes = text.as_bytes();
70
71    if bytes.get(start)? != &b'[' {
72        return None;
73    }
74
75    let mut pos = start + 1;
76    let mut depth = 1;
77
78    while pos < bytes.len() && depth > 0 {
79        match bytes[pos] {
80            b'[' => depth += 1,
81            b']' => depth -= 1,
82            b'\\' if pos + 1 < bytes.len() => {
83                // Skip escaped character (both the backslash and the next byte)
84                pos += 2;
85                continue;
86            }
87            _ => {}
88        }
89        pos += 1;
90    }
91
92    if depth == 0 { Some(pos) } else { None }
93}
94
95/// Skip a braced argument {...}, handling nested braces.
96/// Returns the position after the closing brace, or None if invalid.
97fn skip_braced_arg(text: &str, start: usize) -> Option<usize> {
98    let bytes = text.as_bytes();
99
100    if bytes.get(start)? != &b'{' {
101        return None;
102    }
103
104    let mut pos = start + 1;
105    let mut depth = 1;
106
107    while pos < bytes.len() && depth > 0 {
108        match bytes[pos] {
109            b'{' => depth += 1,
110            b'}' => depth -= 1,
111            b'\\' if pos + 1 < bytes.len() => {
112                // Skip escaped character (both the backslash and the next byte)
113                pos += 2;
114                continue;
115            }
116            _ => {}
117        }
118        pos += 1;
119    }
120
121    if depth == 0 { Some(pos) } else { None }
122}
123
124/// Parse a LaTeX command and add it to the builder.
125pub(crate) fn parse_latex_command(builder: &mut GreenNodeBuilder, text: &str, len: usize) {
126    builder.start_node(SyntaxKind::LATEX_COMMAND.into());
127    builder.token(SyntaxKind::TEXT.into(), &text[..len]);
128    builder.finish_node();
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134
135    #[test]
136    fn test_simple_command() {
137        assert_eq!(try_parse_latex_command(r"\cite{ref}"), Some(10));
138        assert_eq!(try_parse_latex_command(r"\textbf{bold}"), Some(13));
139    }
140
141    #[test]
142    fn test_command_with_optional_arg() {
143        assert_eq!(
144            try_parse_latex_command(r"\includegraphics[width=5cm]{file.png}"),
145            Some(37)
146        );
147    }
148
149    #[test]
150    fn test_multiple_arguments() {
151        assert_eq!(try_parse_latex_command(r"\newcommand{\foo}{bar}"), Some(22));
152    }
153
154    #[test]
155    fn test_nested_braces() {
156        assert_eq!(
157            try_parse_latex_command(r"\command{text with {nested} braces}"),
158            Some(35)
159        );
160    }
161
162    #[test]
163    fn test_no_arguments() {
164        // Command without arguments - valid
165        assert_eq!(try_parse_latex_command(r"\LaTeX "), Some(6));
166    }
167
168    #[test]
169    fn test_escaped_backslash() {
170        // \\ is a line break, not LaTeX
171        assert_eq!(try_parse_latex_command(r"\\"), None);
172    }
173
174    #[test]
175    fn test_not_latex() {
176        assert_eq!(try_parse_latex_command(r"\123"), None); // Numbers not allowed
177        assert_eq!(try_parse_latex_command(r"\ "), None); // No command name
178        assert_eq!(try_parse_latex_command("no backslash"), None);
179    }
180
181    #[test]
182    fn test_unclosed_braces() {
183        assert_eq!(try_parse_latex_command(r"\cite{ref"), None);
184    }
185
186    #[test]
187    fn test_unclosed_brackets() {
188        assert_eq!(try_parse_latex_command(r"\command[opt"), None);
189    }
190}