1use crate::SourceSpan;
10use crate::lexer::{Token, TokenKind, TokenStream};
11
12#[derive(Clone, Copy, Debug, PartialEq, Eq)]
17pub enum CommandEvent<'src> {
18 Command {
21 name: &'src str,
23 span: SourceSpan,
25 },
26 EnvironmentEnter {
28 name: &'src str,
30 span: SourceSpan,
32 },
33 EnvironmentExit {
35 name: &'src str,
37 span: SourceSpan,
39 },
40 TextModeEnter {
43 span: SourceSpan,
45 },
46 TextModeExit {
48 span: SourceSpan,
50 },
51}
52
53#[must_use]
61pub fn inspect_math_body(source: &str) -> Vec<CommandEvent<'_>> {
62 let stream = TokenStream::new(source);
63 let tokens = stream.tokens();
64 let mut events = Vec::new();
65 let mut text_stack: Vec<usize> = Vec::new();
66 let mut env_stack: Vec<&str> = Vec::new();
67 let mut depth: usize = 0;
68
69 let mut index = 0;
70 while let Some(token) = tokens.get(index) {
71 match token.kind() {
72 TokenKind::CommandWord(raw) => {
73 let name = raw.strip_prefix('\\').unwrap_or(raw);
74 let next_index = index.saturating_add(1);
75 if name == "begin" {
76 if let Some((env_name, group_end_index, end_span)) = read_braced_name(source, tokens, next_index) {
77 let span = SourceSpan::new(token.span().start(), end_span.end());
78 events.push(CommandEvent::EnvironmentEnter { name: env_name, span });
79 env_stack.push(env_name);
80 index = group_end_index.saturating_add(1);
81 continue;
82 }
83 events.push(CommandEvent::Command {
84 name,
85 span: token.span(),
86 });
87 } else if name == "end" {
88 if let Some((env_name, group_end_index, end_span)) = read_braced_name(source, tokens, next_index) {
89 let span = SourceSpan::new(token.span().start(), end_span.end());
90 if env_stack.last() == Some(&env_name) {
91 env_stack.pop();
92 }
93 events.push(CommandEvent::EnvironmentExit { name: env_name, span });
94 index = group_end_index.saturating_add(1);
95 continue;
96 }
97 events.push(CommandEvent::Command {
98 name,
99 span: token.span(),
100 });
101 } else if is_text_mode_command(name) {
102 events.push(CommandEvent::Command {
103 name,
104 span: token.span(),
105 });
106 if let Some(open_index) = skip_trivia(tokens, next_index)
107 && let Some(open_token) = tokens.get(open_index)
108 && matches!(open_token.kind(), TokenKind::LeftBrace)
109 {
110 events.push(CommandEvent::TextModeEnter {
111 span: open_token.span(),
112 });
113 text_stack.push(depth.saturating_add(1));
114 }
115 } else {
116 events.push(CommandEvent::Command {
117 name,
118 span: token.span(),
119 });
120 }
121 }
122 TokenKind::LeftBrace => {
123 depth = depth.saturating_add(1);
124 }
125 TokenKind::RightBrace => {
126 if text_stack.last() == Some(&depth) {
127 text_stack.pop();
128 events.push(CommandEvent::TextModeExit { span: token.span() });
129 }
130 depth = depth.saturating_sub(1);
131 }
132 TokenKind::ControlSymbol(_)
133 | TokenKind::LeftBracket
134 | TokenKind::RightBracket
135 | TokenKind::LeftParen
136 | TokenKind::RightParen
137 | TokenKind::Superscript
138 | TokenKind::Subscript
139 | TokenKind::Alignment
140 | TokenKind::RowSeparator
141 | TokenKind::Comment(_)
142 | TokenKind::Whitespace(_)
143 | TokenKind::Number(_)
144 | TokenKind::Identifier(_)
145 | TokenKind::Punctuation(_)
146 | TokenKind::UnicodeSymbol(_)
147 | TokenKind::Error
148 | TokenKind::Eof => {}
149 }
150 index = index.saturating_add(1);
151 }
152
153 events
154}
155
156fn skip_trivia(tokens: &[Token<'_>], start: usize) -> Option<usize> {
157 let mut index = start;
158 while let Some(token) = tokens.get(index) {
159 if matches!(token.kind(), TokenKind::Whitespace(_) | TokenKind::Comment(_)) {
160 index = index.saturating_add(1);
161 continue;
162 }
163 if matches!(token.kind(), TokenKind::Eof) {
164 return None;
165 }
166 return Some(index);
167 }
168 None
169}
170
171fn read_braced_name<'src>(
177 source: &'src str,
178 tokens: &[Token<'src>],
179 start: usize,
180) -> Option<(&'src str, usize, SourceSpan)> {
181 let open_index = skip_trivia(tokens, start)?;
182 let open_token = tokens.get(open_index)?;
183 if !matches!(open_token.kind(), TokenKind::LeftBrace) {
184 return None;
185 }
186 let content_start = open_token.span().end();
187 let mut cursor = open_index.saturating_add(1);
188 while let Some(token) = tokens.get(cursor) {
189 if matches!(token.kind(), TokenKind::RightBrace) {
190 let close_span = token.span();
191 let content_end = close_span.start();
192 let raw = source.get(content_start..content_end)?;
193 let trimmed = raw.trim();
194 if trimmed.is_empty() {
195 return None;
196 }
197 let offset = raw.find(trimmed).unwrap_or(0);
198 let start_offset = content_start.saturating_add(offset);
199 let end_offset = start_offset.saturating_add(trimmed.len());
200 let borrowed = source.get(start_offset..end_offset)?;
201 return Some((borrowed, cursor, close_span));
202 }
203 if matches!(token.kind(), TokenKind::Eof) {
204 return None;
205 }
206 cursor = cursor.saturating_add(1);
207 }
208 None
209}
210
211fn is_text_mode_command(name: &str) -> bool {
212 matches!(
213 name,
214 "text" | "textrm" | "textbf" | "textit" | "textsf" | "texttt" | "textnormal" | "mbox" | "hbox"
215 )
216}
217
218#[cfg(test)]
219mod tests {
220 #![allow(
221 clippy::indexing_slicing,
222 clippy::panic,
223 clippy::unwrap_used,
224 reason = "tests assert event shape and span text against known inputs"
225 )]
226
227 use super::*;
228
229 fn names(events: &[CommandEvent<'_>]) -> Vec<String> {
230 events
231 .iter()
232 .map(|event| match event {
233 CommandEvent::Command { name, .. } => format!("cmd:{name}"),
234 CommandEvent::EnvironmentEnter { name, .. } => format!("env+:{name}"),
235 CommandEvent::EnvironmentExit { name, .. } => format!("env-:{name}"),
236 CommandEvent::TextModeEnter { .. } => "text+".to_owned(),
237 CommandEvent::TextModeExit { .. } => "text-".to_owned(),
238 })
239 .collect()
240 }
241
242 #[test]
243 fn enumerates_top_level_commands_with_spans() {
244 let source = r"\alpha + \beta";
245 let events = inspect_math_body(source);
246 assert_eq!(names(&events), vec!["cmd:alpha", "cmd:beta"]);
247 let CommandEvent::Command { span, .. } = events[0] else {
248 panic!("expected command event");
249 };
250 assert_eq!(&source[span.as_range()], r"\alpha");
251 }
252
253 #[test]
254 fn pairs_begin_and_end_for_environments() {
255 let source = r"\begin{matrix}a & b\end{matrix}";
256 let events = inspect_math_body(source);
257 assert_eq!(names(&events), vec!["env+:matrix", "env-:matrix"]);
258 }
259
260 #[test]
261 fn captures_starred_environment_names() {
262 let source = r"\begin{align*}x\end{align*}";
263 let events = inspect_math_body(source);
264 assert_eq!(names(&events), vec!["env+:align*", "env-:align*"]);
265 }
266
267 #[test]
268 fn enters_and_exits_text_mode_on_text_command() {
269 let source = r"\text{hello \alpha}";
270 let events = inspect_math_body(source);
271 assert_eq!(names(&events), vec!["cmd:text", "text+", "cmd:alpha", "text-"],);
272 }
273
274 #[test]
275 fn pairs_nested_text_with_outer_brace_groups() {
276 let source = r"{x \text{y \alpha} z}";
277 let events = inspect_math_body(source);
278 assert_eq!(names(&events), vec!["cmd:text", "text+", "cmd:alpha", "text-"],);
279 }
280
281 #[test]
282 fn surfaces_commands_the_parser_rejects() {
283 let source = r"\xrightarrow{f} \ce{H2O}";
284 let events = inspect_math_body(source);
285 assert_eq!(names(&events), vec!["cmd:xrightarrow", "cmd:ce"]);
286 }
287
288 #[test]
289 fn falls_back_to_command_when_begin_has_no_argument() {
290 let source = r"\begin";
291 let events = inspect_math_body(source);
292 assert_eq!(names(&events), vec!["cmd:begin"]);
293 }
294
295 #[test]
296 fn does_not_emit_text_events_inside_unrelated_groups() {
297 let source = r"{a + b}";
298 let events = inspect_math_body(source);
299 assert!(events.is_empty());
300 }
301}