1use crate::event::{DelimiterType, Dimension, DimensionUnit, Glue, GroupingKind, Line};
2
3use super::{
4 tables::{primitive_color, token_to_delim},
5 Argument, CharToken, ErrorKind, InnerResult, Token,
6};
7
8pub fn definition<'a>(input: &mut &'a str) -> InnerResult<(&'a str, &'a str, &'a str)> {
14 let control_sequence = control_sequence(input)?;
15 let (parameter_text, rest) = input.split_once('{').ok_or(ErrorKind::MissingExpansion)?;
16
17 if let Some(idx) = parameter_text.find(|c: char| c == '%' || c == '}') {
18 return Err(if parameter_text.as_bytes()[idx] == b'%' {
19 ErrorKind::CommentInParamText
20 } else {
21 ErrorKind::BracesInParamText
22 });
23 }
24
25 *input = rest;
26 let replacement_text = group_content(input, GroupingKind::Normal)?;
27
28 Ok((control_sequence, parameter_text, replacement_text))
29}
30
31pub fn argument<'a>(input: &mut &'a str) -> InnerResult<Argument<'a>> {
33 if let Some(rest) = input.trim_start().strip_prefix('{') {
34 *input = rest;
35 let content = group_content(input, GroupingKind::Normal)?;
36 Ok(Argument::Group(content))
37 } else {
38 Ok(Argument::Token(token(input)?))
39 }
40}
41
42pub fn optional_argument<'a>(input: &mut &'a str) -> Option<&'a str> {
43 if let Some(rest) = input.trim_start().strip_prefix('[') {
44 *input = rest;
45 let content = group_content(input, GroupingKind::OptionalArgument).ok()?;
46 Some(content)
47 } else {
48 None
49 }
50}
51
52pub fn brace_argument<'a>(input: &mut &'a str) -> InnerResult<&'a str> {
53 if let Some(rest) = input.trim_start().strip_prefix('{') {
54 *input = rest;
55 group_content(input, GroupingKind::Normal)
56 } else {
57 Err(ErrorKind::GroupArgument)
58 }
59}
60
61pub fn group_content<'a>(input: &mut &'a str, grouping_kind: GroupingKind) -> InnerResult<&'a str> {
66 let start = grouping_kind.opening_str();
67 let end = grouping_kind.closing_str();
68 let mut escaped = false;
69 let mut index = 0;
70 let mut depth = 0u32;
71 let bytes = input.as_bytes();
72 while escaped || depth > 0 || !bytes[index..].starts_with(end.as_bytes()) {
73 if index + end.len() > input.len() {
74 *input = &input[input.len()..];
75 return Err(ErrorKind::UnbalancedGroup(Some(grouping_kind)));
76 }
77 if !escaped && bytes[index..].starts_with(start.as_bytes()) {
78 depth += 1;
79 index += start.len();
80 continue;
81 }
82 if !escaped && bytes[index..].starts_with(end.as_bytes()) {
83 if depth.checked_sub(1).is_none() {
84 break;
85 }
86 depth -= 1;
87 index += end.len();
88 continue;
89 }
90 match bytes[index] {
91 b'\\' => escaped = !escaped,
92 b'%' if !escaped => {
93 let Some(rest_pos) = bytes[index..].iter().position(|&c| c == b'\n') else {
94 return Err(ErrorKind::UnbalancedGroup(Some(grouping_kind)));
95 };
96 index += rest_pos;
97 }
98 _ => escaped = false,
99 }
100 index += 1;
101 }
102 let (argument, rest) = input.split_at(index);
103 *input = &rest[end.len()..];
104 Ok(argument)
105}
106
107pub fn content_with_suffix<'a>(input: &mut &'a str, suffix: &str) -> InnerResult<&'a str> {
109 let mut escaped = false;
110 let mut index = 0;
111 let bytes = input.as_bytes();
112 while escaped || !bytes[index..].starts_with(suffix.as_bytes()) {
113 if index + suffix.len() > input.len() {
114 *input = &input[input.len()..];
115 return Err(ErrorKind::MacroSuffixNotFound);
116 }
117 match bytes[index] {
118 b'\\' => escaped = !escaped,
119 b'%' if !escaped => {
120 let rest_pos = bytes[index..]
121 .iter()
122 .position(|&c| c == b'\n')
123 .unwrap_or(bytes.len());
124 index += rest_pos;
125 }
126 b'{' if !escaped => {
127 let content = group_content(&mut &input[index + 1..], GroupingKind::Normal)?;
128 index += content.len() + 1;
129 }
130 _ => escaped = false,
131 }
132 index += 1;
133 }
134 let (argument, rest) = input.split_at(index);
135 *input = &rest[suffix.len()..];
136 Ok(argument)
137}
138
139pub fn delimiter(input: &mut &str) -> InnerResult<(char, DelimiterType)> {
144 let maybe_delim = token(input)?;
145 token_to_delim(maybe_delim).ok_or(ErrorKind::Delimiter)
146}
147
148pub fn futurelet_assignment<'a>(input: &mut &'a str) -> InnerResult<(&'a str, Token<'a>, &'a str)> {
153 let control_sequence = control_sequence(input)?;
154
155 let input_with_tokens = *input;
156
157 let _ = token(input)?;
158 let token = token(input)?;
159 Ok((control_sequence, token, input_with_tokens))
160}
161
162pub fn let_assignment<'a>(input: &mut &'a str) -> InnerResult<(&'a str, Token<'a>)> {
166 let control_sequence = control_sequence(input)?;
167 if let Some(s) = input.trim_start().strip_prefix('=') {
168 *input = s;
169 }
170 let token = token(input)?;
171 Ok((control_sequence, token))
172}
173
174pub fn control_sequence<'a>(input: &mut &'a str) -> InnerResult<&'a str> {
176 if let Some(rest) = input.strip_prefix('\\') {
177 *input = rest;
178 rhs_control_sequence(input)
179 } else {
180 input
181 .chars()
182 .next()
183 .map_or(Err(ErrorKind::EmptyControlSequence), |_| {
184 Err(ErrorKind::ControlSequence)
185 })
186 }
187}
188
189pub fn limit_modifiers(input: &mut &str) -> Option<bool> {
190 let mut output = None;
191 while let Some((rest, limits)) = input
192 .trim_start()
193 .strip_prefix(r"\limits")
194 .map(|rest| (rest, true))
195 .or_else(|| {
196 input
197 .trim_start()
198 .strip_prefix(r"\nolimits")
199 .map(|rest| (rest, false))
200 })
201 {
202 *input = rest;
203 output = Some(limits);
204 }
205 output
206}
207
208pub fn rhs_control_sequence<'a>(input: &mut &'a str) -> InnerResult<&'a str> {
212 if input.is_empty() {
213 return Err(ErrorKind::EmptyControlSequence);
214 }
215
216 let len = input
217 .chars()
218 .take_while(|c| c.is_ascii_alphabetic())
219 .count()
220 .max(1);
221
222 let (control_sequence, rest) = input.split_at(len);
223 *input = rest.trim_start();
224 Ok(control_sequence)
225}
226
227pub fn glue(input: &mut &str) -> InnerResult<Glue> {
229 let mut dimen = (dimension(input)?, None, None);
230 if let Some(s) = input.trim_start().strip_prefix("plus") {
231 *input = s;
232 dimen.1 = Some(dimension(input)?);
233 }
234 if let Some(s) = input.trim_start().strip_prefix("minus") {
235 *input = s;
236 dimen.2 = Some(dimension(input)?);
237 }
238 Ok(dimen)
239}
240
241pub fn dimension(input: &mut &str) -> InnerResult<Dimension> {
243 let number = floating_point(input)?;
244 let unit = dimension_unit(input)?;
245 Ok(Dimension::new(number, unit))
246}
247
248pub fn dimension_unit(input: &mut &str) -> InnerResult<DimensionUnit> {
250 *input = input.trim_start();
251 let unit = input.get(0..2).ok_or(ErrorKind::DimensionUnit)?;
252 let unit = match unit {
253 "em" => DimensionUnit::Em,
254 "ex" => DimensionUnit::Ex,
255 "pt" => DimensionUnit::Pt,
256 "pc" => DimensionUnit::Pc,
257 "in" => DimensionUnit::In,
258 "bp" => DimensionUnit::Bp,
259 "cm" => DimensionUnit::Cm,
260 "mm" => DimensionUnit::Mm,
261 "dd" => DimensionUnit::Dd,
262 "cc" => DimensionUnit::Cc,
263 "sp" => DimensionUnit::Sp,
264 "mu" => DimensionUnit::Mu,
265 _ => return Err(ErrorKind::DimensionUnit),
266 };
267
268 *input = &input[2..];
269 one_optional_space(input);
270
271 Ok(unit)
272}
273
274#[allow(dead_code)]
278pub fn integer(input: &mut &str) -> InnerResult<isize> {
279 let signum = signs(input)?;
280
281 let unsigned_int = unsigned_integer(input)?;
282
283 Ok(unsigned_int as isize * signum)
284}
285
286pub fn unsigned_integer(input: &mut &str) -> InnerResult<usize> {
287 let next_char = input.chars().next().ok_or(ErrorKind::Number)?;
289 if next_char.is_ascii_digit() {
290 return Ok(decimal(input));
291 }
292 *input = &input[1..];
293 match next_char {
294 '`' => {
295 let mut next_byte = *input.as_bytes().first().ok_or(ErrorKind::Number)?;
296 if next_byte == b'\\' {
297 *input = &input[1..];
298 next_byte = *input.as_bytes().first().ok_or(ErrorKind::Number)?;
299 }
300 if next_byte.is_ascii() {
301 *input = &input[1..];
302 Ok(next_byte as usize)
303 } else {
304 Err(ErrorKind::CharacterNumber)
305 }
306 }
307 '\'' => Ok(octal(input)),
308 '"' => Ok(hexadecimal(input)),
309 _ => Err(ErrorKind::Number),
310 }
311}
312
313pub fn signs(input: &mut &str) -> InnerResult<isize> {
315 let mut minus_count = 0;
316 *input = input
317 .trim_start_matches(|c: char| {
318 if c == '-' {
319 minus_count += 1;
320 true
321 } else {
322 c == '+' || c.is_whitespace()
323 }
324 })
325 .trim_start();
326 Ok(if minus_count % 2 == 0 { 1 } else { -1 })
327}
328
329pub fn hexadecimal(input: &mut &str) -> usize {
331 let mut number = 0;
332 *input = input.trim_start_matches(|c: char| {
333 if c.is_ascii_alphanumeric() && c < 'G' {
334 number =
335 number * 16 + c.to_digit(16).expect("the character is a valid hex digit") as usize;
336 true
337 } else {
338 false
339 }
340 });
341 one_optional_space(input);
342
343 number
344}
345
346pub fn floating_point(input: &mut &str) -> InnerResult<f32> {
348 let signum = signs(input)?;
349
350 let mut number = 0.;
351 *input = input.trim_start_matches(|c: char| {
352 if c.is_ascii_digit() {
353 number = number * 10. + (c as u8 - b'0') as f32;
354 true
355 } else {
356 false
357 }
358 });
359
360 if let Some(stripped_decimal_point) = input.strip_prefix(|c| c == '.' || c == ',') {
361 let mut decimal = 0.;
362 let mut decimal_divisor = 1.;
363 *input = stripped_decimal_point.trim_start_matches(|c: char| {
364 if c.is_ascii_digit() {
365 decimal = decimal * 10. + (c as u8 - b'0') as f32;
366 decimal_divisor *= 10.;
367 true
368 } else {
369 false
370 }
371 });
372 number += decimal / decimal_divisor;
373 };
374
375 Ok(signum as f32 * number)
376}
377
378pub fn decimal(input: &mut &str) -> usize {
380 let mut number = 0;
381 *input = input.trim_start_matches(|c: char| {
382 if c.is_ascii_digit() {
383 number = number * 10 + (c as u8 - b'0') as usize;
384 true
385 } else {
386 false
387 }
388 });
389 one_optional_space(input);
390
391 number
392}
393
394pub fn octal(input: &mut &str) -> usize {
396 let mut number = 0;
397 *input = input.trim_start_matches(|c: char| {
398 if c.is_ascii_digit() {
399 number = number * 8 + (c as u8 - b'0') as usize;
400 true
401 } else {
402 false
403 }
404 });
405 one_optional_space(input);
406
407 number
408}
409
410pub fn one_optional_space(input: &mut &str) -> bool {
412 let mut chars = input.chars();
413 if chars.next().is_some_and(|c| c.is_whitespace()) {
414 *input = &input[1..];
415 true
416 } else {
417 false
418 }
419}
420
421pub fn token<'a>(input: &mut &'a str) -> InnerResult<Token<'a>> {
425 *input = input.trim_start();
426 match input.chars().next() {
427 Some('\\') => {
428 *input = &input[1..];
429 Ok(Token::ControlSequence(rhs_control_sequence(input)?))
430 }
431 Some('%') => {
432 let (_, rest) = input
433 .split_once('\n')
434 .unwrap_or(("", &input[input.len()..]));
435 *input = rest;
436 token(input)
437 }
438 Some(c) => {
439 let context = *input;
440 *input = input.split_at(c.len_utf8()).1;
441 Ok(Token::Character(CharToken::from_str(context)))
442 }
443 None => Err(ErrorKind::Token),
444 }
445}
446
447pub fn color(color: &str) -> Option<(u8, u8, u8)> {
448 match color.strip_prefix('#') {
449 Some(color) if color.len() == 6 => {
450 let r = u8::from_str_radix(&color[..2], 16).ok()?;
451 let g = u8::from_str_radix(&color[2..4], 16).ok()?;
452 let b = u8::from_str_radix(&color[4..], 16).ok()?;
453 Some((r, g, b))
454 }
455 None => primitive_color(color),
456 _ => None,
457 }
458}
459
460pub fn horizontal_lines(content: &mut &str) -> Box<[Line]> {
461 let mut horizontal_lines = Vec::new();
462 while let Some((rest, line)) = content
463 .trim_start()
464 .strip_prefix("\\hline")
465 .map(|rest| (rest, Line::Solid))
466 .or_else(|| {
467 content
468 .trim_start()
469 .strip_prefix("\\hdashline")
470 .map(|rest| (rest, Line::Dashed))
471 })
472 {
473 horizontal_lines.push(line);
474 *content = rest;
475 }
476
477 horizontal_lines.into()
478}
479
480#[cfg(test)]
481mod tests {
482 use crate::{
483 event::{Dimension, DimensionUnit, GroupingKind},
484 parser::{lex, Token},
485 };
486
487 #[test]
488 fn signs() {
489 let mut input = " + +- \\test";
490 assert_eq!(lex::signs(&mut input).unwrap(), -1);
491 assert_eq!(input, "\\test");
492 }
493
494 #[test]
495 fn no_signs() {
496 let mut input = "\\mycommand";
497 assert_eq!(lex::signs(&mut input).unwrap(), 1);
498 assert_eq!(input, "\\mycommand");
499 }
500
501 #[test]
504 fn definition_texbook() {
505 let mut input = "\\cs AB#1#2C$#3\\$ {#3{ab#1}#1 c##\\x #2}";
506
507 let (cs, param, repl) = lex::definition(&mut input).unwrap();
508 assert_eq!(cs, "cs");
509 assert_eq!(param, "AB#1#2C$#3\\$ ");
510 assert_eq!(repl, "#3{ab#1}#1 c##\\x #2");
511 assert_eq!(input, "");
512 }
513
514 #[test]
515 fn complex_definition() {
516 let mut input = r"\foo #1\test#2#{##\####2#2 \{{\}} \{\{\{} 5 + 5 = 10";
517 let (cs, param, repl) = lex::definition(&mut input).unwrap();
518
519 assert_eq!(cs, "foo");
520 assert_eq!(param, r"#1\test#2#");
521 assert_eq!(repl, r"##\####2#2 \{{\}} \{\{\{");
522 assert_eq!(input, " 5 + 5 = 10");
523 }
524
525 #[test]
526 fn let_assignment() {
527 let mut input = r"\foo = \bar";
528 let (cs, token) = lex::let_assignment(&mut input).unwrap();
529
530 assert_eq!(cs, "foo");
531 assert_eq!(token, Token::ControlSequence("bar"));
532 assert_eq!(input, "");
533 }
534
535 #[test]
536 fn futurelet_assignment() {
537 let mut input = r"\foo\bar\baz blah";
538 let (cs, token, rest) = lex::futurelet_assignment(&mut input).unwrap();
539
540 assert_eq!(cs, "foo");
541 assert_eq!(token, Token::ControlSequence("baz"));
542 assert_eq!(rest, r"\bar\baz blah");
543 }
544
545 #[test]
546 fn dimension() {
547 let mut input = "1.2pt";
548 let dim = lex::dimension(&mut input).unwrap();
549
550 assert_eq!(dim, Dimension::new(1.2, DimensionUnit::Pt));
551 assert_eq!(input, "");
552 }
553
554 #[test]
555 fn complex_glue() {
556 let mut input = "1.2 pt plus 3.4pt minus 5.6pt nope";
557 let glue = lex::glue(&mut input).unwrap();
558
559 assert_eq!(
560 glue,
561 (
562 Dimension::new(1.2, DimensionUnit::Pt),
563 Some(Dimension::new(3.4, DimensionUnit::Pt)),
564 Some(Dimension::new(5.6, DimensionUnit::Pt))
565 )
566 );
567 assert_eq!(input, "nope");
568 }
569
570 #[test]
571 fn numbers() {
572 let mut input = "123 -\"AEF24 --'3475 `\\a -.47";
573 assert_eq!(lex::integer(&mut input).unwrap(), 123);
574 assert_eq!(lex::integer(&mut input).unwrap(), -716580);
575 assert_eq!(lex::integer(&mut input).unwrap(), 1853);
576 assert_eq!(lex::integer(&mut input).unwrap(), 97);
577 assert_eq!(lex::floating_point(&mut input).unwrap(), -0.47);
578 assert_eq!(input, "");
579 }
580
581 #[test]
582 fn group_content() {
583 let mut input =
584 "this { { is a test } to see if { the content parsing { of this } } } works }";
585 let content = lex::group_content(&mut input, GroupingKind::Normal).unwrap();
586 assert_eq!(
587 content,
588 "this { { is a test } to see if { the content parsing { of this } } } works "
589 );
590 }
591}