Skip to main content

carta_readers/
native.rs

1//! Native reader: parses the document model's printed textual form back into the model.
2//!
3//! The native format is the human-readable rendering of the AST — constructor names applied to
4//! their arguments, with strings, tuples, lists, and records written in a small constructor-
5//! application value syntax (`Para [ Str "x" ]`, `("id", ["class"], [("k","v")])`). Parsing has two
6//! stages: a
7//! lexer (`tokenize`) splits the source into `Token`s, and a recursive-descent `Parser`
8//! consumes them type-directedly — each AST shape has a dedicated method, so the same `(…, …, …)`
9//! tuple is read as the type the position calls for.
10//!
11//! Top level accepts, in order of preference, a whole document (`Pandoc <meta> <blocks>`), a block
12//! list, a single block, an inline list, or a single inline; the last three are wrapped to form a
13//! document (a lone inline or inline list becomes a `Plain` block).
14
15use std::collections::BTreeMap;
16
17use carta_ast::{
18    Alignment, Attr, Block, Caption, Cell, Citation, CitationMode, ColSpec, ColWidth, Document,
19    Format, Inline, ListAttributes, ListNumberDelim, ListNumberStyle, MathType, MetaValue,
20    QuoteType, Row, Table, TableBody, TableFoot, TableHead, Target,
21};
22use carta_core::{Error, Reader, ReaderOptions, Result};
23
24/// Parses the document model's printed textual form into the document model.
25#[derive(Debug, Default, Clone, Copy)]
26pub struct NativeReader;
27
28impl Reader for NativeReader {
29    fn read(&self, input: &str, _options: &ReaderOptions) -> Result<Document> {
30        let tokens = tokenize(input)?;
31        let mut parser = Parser { tokens, pos: 0 };
32        let document = parser.parse_document()?;
33        if parser.pos != parser.tokens.len() {
34            return Err(syntax_error("unexpected trailing input"));
35        }
36        Ok(document)
37    }
38}
39
40fn syntax_error(message: impl Into<String>) -> Error {
41    Error::Io(std::io::Error::new(
42        std::io::ErrorKind::InvalidData,
43        message.into(),
44    ))
45}
46
47#[derive(Debug, Clone, PartialEq)]
48enum Token {
49    LParen,
50    RParen,
51    LBracket,
52    RBracket,
53    LBrace,
54    RBrace,
55    Comma,
56    Equals,
57    Ident(String),
58    Str(String),
59    Num(String),
60}
61
62fn tokenize(input: &str) -> Result<Vec<Token>> {
63    let chars: Vec<char> = input.chars().collect();
64    let mut pos = 0;
65    let mut tokens = Vec::new();
66    while let Some(&c) = chars.get(pos) {
67        if c.is_whitespace() {
68            pos += 1;
69            continue;
70        }
71        match c {
72            '(' => push_punct(&mut tokens, &mut pos, Token::LParen),
73            ')' => push_punct(&mut tokens, &mut pos, Token::RParen),
74            '[' => push_punct(&mut tokens, &mut pos, Token::LBracket),
75            ']' => push_punct(&mut tokens, &mut pos, Token::RBracket),
76            '{' => push_punct(&mut tokens, &mut pos, Token::LBrace),
77            '}' => push_punct(&mut tokens, &mut pos, Token::RBrace),
78            ',' => push_punct(&mut tokens, &mut pos, Token::Comma),
79            '=' => push_punct(&mut tokens, &mut pos, Token::Equals),
80            '"' => {
81                let (text, next) = lex_string(&chars, pos)?;
82                tokens.push(Token::Str(text));
83                pos = next;
84            }
85            '-' => {
86                let (number, next) = lex_number(&chars, pos)?;
87                tokens.push(Token::Num(number));
88                pos = next;
89            }
90            _ if c.is_ascii_digit() => {
91                let (number, next) = lex_number(&chars, pos)?;
92                tokens.push(Token::Num(number));
93                pos = next;
94            }
95            _ if c.is_alphabetic() || c == '_' => {
96                let (ident, next) = lex_ident(&chars, pos);
97                tokens.push(Token::Ident(ident));
98                pos = next;
99            }
100            _ => return Err(syntax_error(format!("unexpected character '{c}'"))),
101        }
102    }
103    Ok(tokens)
104}
105
106fn push_punct(tokens: &mut Vec<Token>, pos: &mut usize, token: Token) {
107    tokens.push(token);
108    *pos += 1;
109}
110
111fn lex_ident(chars: &[char], start: usize) -> (String, usize) {
112    let mut pos = start;
113    let mut ident = String::new();
114    while let Some(&c) = chars.get(pos) {
115        if c.is_alphanumeric() || c == '_' || c == '\'' {
116            ident.push(c);
117            pos += 1;
118        } else {
119            break;
120        }
121    }
122    (ident, pos)
123}
124
125fn lex_number(chars: &[char], start: usize) -> Result<(String, usize)> {
126    let mut pos = start;
127    let mut number = String::new();
128    if chars.get(pos) == Some(&'-') {
129        number.push('-');
130        pos += 1;
131    }
132    let digits_start = pos;
133    pos = consume_digits(chars, pos, &mut number);
134    if pos == digits_start {
135        return Err(syntax_error("expected a digit"));
136    }
137    if chars.get(pos) == Some(&'.') {
138        number.push('.');
139        pos += 1;
140        pos = consume_digits(chars, pos, &mut number);
141    }
142    if matches!(chars.get(pos), Some('e' | 'E')) {
143        if let Some(&exp) = chars.get(pos) {
144            number.push(exp);
145        }
146        pos += 1;
147        if matches!(chars.get(pos), Some('+' | '-')) {
148            if let Some(&sign) = chars.get(pos) {
149                number.push(sign);
150            }
151            pos += 1;
152        }
153        pos = consume_digits(chars, pos, &mut number);
154    }
155    Ok((number, pos))
156}
157
158fn consume_digits(chars: &[char], start: usize, out: &mut String) -> usize {
159    let mut pos = start;
160    while let Some(&c) = chars.get(pos) {
161        if c.is_ascii_digit() {
162            out.push(c);
163            pos += 1;
164        } else {
165            break;
166        }
167    }
168    pos
169}
170
171/// ASCII control-code mnemonics as emitted for non-printable characters (`\ESC`, `\SOH`, …),
172/// longest first so maximal-munch matching prefers `SOH` over `SO`.
173const CONTROL_MNEMONICS: &[(&str, u32)] = &[
174    ("NUL", 0),
175    ("SOH", 1),
176    ("STX", 2),
177    ("ETX", 3),
178    ("EOT", 4),
179    ("ENQ", 5),
180    ("ACK", 6),
181    ("BEL", 7),
182    ("DLE", 16),
183    ("DC1", 17),
184    ("DC2", 18),
185    ("DC3", 19),
186    ("DC4", 20),
187    ("NAK", 21),
188    ("SYN", 22),
189    ("ETB", 23),
190    ("CAN", 24),
191    ("SUB", 26),
192    ("ESC", 27),
193    ("DEL", 127),
194    ("BS", 8),
195    ("HT", 9),
196    ("LF", 10),
197    ("VT", 11),
198    ("FF", 12),
199    ("CR", 13),
200    ("SO", 14),
201    ("SI", 15),
202    ("EM", 25),
203    ("FS", 28),
204    ("GS", 29),
205    ("RS", 30),
206    ("US", 31),
207    ("SP", 32),
208];
209
210fn lex_string(chars: &[char], start: usize) -> Result<(String, usize)> {
211    let mut pos = start + 1;
212    let mut text = String::new();
213    loop {
214        match chars.get(pos) {
215            None => return Err(syntax_error("unterminated string literal")),
216            Some('"') => return Ok((text, pos + 1)),
217            Some('\\') => pos = lex_escape(chars, pos, &mut text)?,
218            Some(&c) => {
219                text.push(c);
220                pos += 1;
221            }
222        }
223    }
224}
225
226/// Decodes one escape sequence starting at the backslash at `pos`, appending its character (if
227/// any) to `text`, and returns the index just past the sequence.
228fn lex_escape(chars: &[char], pos: usize, text: &mut String) -> Result<usize> {
229    let escaped = chars
230        .get(pos + 1)
231        .copied()
232        .ok_or_else(|| syntax_error("dangling escape at end of string"))?;
233    match escaped {
234        'n' => Ok(push_char(text, '\n', pos + 2)),
235        't' => Ok(push_char(text, '\t', pos + 2)),
236        'r' => Ok(push_char(text, '\r', pos + 2)),
237        'f' => Ok(push_char(text, '\u{0C}', pos + 2)),
238        'v' => Ok(push_char(text, '\u{0B}', pos + 2)),
239        'a' => Ok(push_char(text, '\u{07}', pos + 2)),
240        'b' => Ok(push_char(text, '\u{08}', pos + 2)),
241        '\\' => Ok(push_char(text, '\\', pos + 2)),
242        '"' => Ok(push_char(text, '"', pos + 2)),
243        '\'' => Ok(push_char(text, '\'', pos + 2)),
244        '&' => Ok(pos + 2),
245        '^' => {
246            let control = chars
247                .get(pos + 2)
248                .copied()
249                .ok_or_else(|| syntax_error("dangling control escape"))?;
250            let code = (control as u32)
251                .checked_sub(64)
252                .ok_or_else(|| syntax_error("invalid control escape"))?;
253            Ok(push_char(text, code_to_char(code)?, pos + 3))
254        }
255        'x' => lex_radix_escape(chars, pos + 2, 16, text),
256        'o' => lex_radix_escape(chars, pos + 2, 8, text),
257        d if d.is_ascii_digit() => lex_decimal_escape(chars, pos + 1, text),
258        w if w.is_whitespace() => lex_gap(chars, pos + 1),
259        u if u.is_ascii_uppercase() => lex_mnemonic_escape(chars, pos + 1, text),
260        other => Err(syntax_error(format!("unknown string escape '\\{other}'"))),
261    }
262}
263
264fn push_char(text: &mut String, c: char, next: usize) -> usize {
265    text.push(c);
266    next
267}
268
269fn code_to_char(code: u32) -> Result<char> {
270    char::from_u32(code).ok_or_else(|| syntax_error(format!("invalid character code {code}")))
271}
272
273fn lex_decimal_escape(chars: &[char], start: usize, text: &mut String) -> Result<usize> {
274    let mut pos = start;
275    let mut code: u32 = 0;
276    while let Some(&c) = chars.get(pos) {
277        if let Some(digit) = c.to_digit(10) {
278            code = code
279                .checked_mul(10)
280                .and_then(|value| value.checked_add(digit))
281                .ok_or_else(|| syntax_error("character code out of range"))?;
282            pos += 1;
283        } else {
284            break;
285        }
286    }
287    Ok(push_char(text, code_to_char(code)?, pos))
288}
289
290fn lex_radix_escape(chars: &[char], start: usize, radix: u32, text: &mut String) -> Result<usize> {
291    let mut pos = start;
292    let mut code: u32 = 0;
293    let mut seen = false;
294    while let Some(&c) = chars.get(pos) {
295        if let Some(digit) = c.to_digit(radix) {
296            code = code
297                .checked_mul(radix)
298                .and_then(|value| value.checked_add(digit))
299                .ok_or_else(|| syntax_error("character code out of range"))?;
300            seen = true;
301            pos += 1;
302        } else {
303            break;
304        }
305    }
306    if !seen {
307        return Err(syntax_error("empty numeric escape"));
308    }
309    Ok(push_char(text, code_to_char(code)?, pos))
310}
311
312fn lex_mnemonic_escape(chars: &[char], start: usize, text: &mut String) -> Result<usize> {
313    for &(name, code) in CONTROL_MNEMONICS {
314        if mnemonic_matches(chars, start, name) {
315            return Ok(push_char(text, code_to_char(code)?, start + name.len()));
316        }
317    }
318    Err(syntax_error("unknown control-code escape"))
319}
320
321fn mnemonic_matches(chars: &[char], start: usize, name: &str) -> bool {
322    name.chars()
323        .enumerate()
324        .all(|(offset, expected)| chars.get(start + offset) == Some(&expected))
325}
326
327/// A string gap (`\<whitespace>\`) carries no character; skip the whitespace run and its closing
328/// backslash.
329fn lex_gap(chars: &[char], start: usize) -> Result<usize> {
330    let mut pos = start;
331    while let Some(&c) = chars.get(pos) {
332        if c.is_whitespace() {
333            pos += 1;
334        } else {
335            break;
336        }
337    }
338    if chars.get(pos) == Some(&'\\') {
339        Ok(pos + 1)
340    } else {
341        Err(syntax_error("unterminated string gap"))
342    }
343}
344
345struct Parser {
346    tokens: Vec<Token>,
347    pos: usize,
348}
349
350/// Defines a parser method that reads a constructor name and maps it to a value of `$ty`. Each arm
351/// pairs a constructor name with the expression it produces (which may itself consume further tokens,
352/// as a constructor carrying a payload does); an unrecognized name is a syntax error naming `$label`.
353macro_rules! parse_constructor {
354    (
355        $method:ident -> $ty:ty, $label:literal {
356            $( $tag:literal => $value:expr ),* $(,)?
357        }
358    ) => {
359        fn $method(&mut self) -> Result<$ty> {
360            match self.constructor()?.as_str() {
361                $( $tag => Ok($value), )*
362                other => {
363                    Err(syntax_error(format!(concat!("unknown ", $label, " '{}'"), other)))
364                }
365            }
366        }
367    };
368}
369
370impl Parser {
371    fn peek(&self) -> Option<&Token> {
372        self.tokens.get(self.pos)
373    }
374
375    fn peek_ident(&self) -> Option<&str> {
376        match self.peek() {
377            Some(Token::Ident(name)) => Some(name.as_str()),
378            _ => None,
379        }
380    }
381
382    fn advance(&mut self) -> Result<Token> {
383        match self.tokens.get(self.pos) {
384            Some(token) => {
385                let token = token.clone();
386                self.pos += 1;
387                Ok(token)
388            }
389            None => Err(syntax_error("unexpected end of input")),
390        }
391    }
392
393    /// Consume the current token by moving it out of the stream, leaving a cheap placeholder behind.
394    /// Used where the token owns heap data (idents, strings, numbers) that the caller takes ownership
395    /// of; consumed positions are never revisited.
396    fn take(&mut self) -> Result<Token> {
397        match self.tokens.get_mut(self.pos) {
398            Some(slot) => {
399                let token = std::mem::replace(slot, Token::Comma);
400                self.pos += 1;
401                Ok(token)
402            }
403            None => Err(syntax_error("unexpected end of input")),
404        }
405    }
406
407    fn eat(&mut self, expected: &Token) -> Result<()> {
408        match self.tokens.get(self.pos) {
409            Some(found) if found == expected => {
410                self.pos += 1;
411                Ok(())
412            }
413            Some(found) => Err(syntax_error(format!(
414                "expected {expected:?}, found {found:?}"
415            ))),
416            None => Err(syntax_error("unexpected end of input")),
417        }
418    }
419
420    fn eat_ident(&mut self, name: &str) -> Result<()> {
421        match self.tokens.get(self.pos) {
422            Some(Token::Ident(found)) if found == name => {
423                self.pos += 1;
424                Ok(())
425            }
426            Some(found) => Err(syntax_error(format!("expected '{name}', found {found:?}"))),
427            None => Err(syntax_error("unexpected end of input")),
428        }
429    }
430
431    fn constructor(&mut self) -> Result<String> {
432        match self.take()? {
433            Token::Ident(name) => Ok(name),
434            found => Err(syntax_error(format!(
435                "expected a constructor, found {found:?}"
436            ))),
437        }
438    }
439
440    fn open_paren(&mut self) -> bool {
441        if self.peek() == Some(&Token::LParen) {
442            self.pos += 1;
443            true
444        } else {
445            false
446        }
447    }
448
449    fn close_if(&mut self, opened: bool) -> Result<()> {
450        if opened {
451            self.eat(&Token::RParen)
452        } else {
453            Ok(())
454        }
455    }
456
457    fn parse_list<T>(&mut self, element: fn(&mut Self) -> Result<T>) -> Result<Vec<T>> {
458        self.eat(&Token::LBracket)?;
459        let mut items = Vec::new();
460        if self.peek() == Some(&Token::RBracket) {
461            self.pos += 1;
462            return Ok(items);
463        }
464        loop {
465            items.push(element(self)?);
466            match self.advance()? {
467                Token::Comma => {}
468                Token::RBracket => break,
469                found => {
470                    return Err(syntax_error(format!(
471                        "expected ',' or ']', found {found:?}"
472                    )));
473                }
474            }
475        }
476        Ok(items)
477    }
478
479    fn parse_string(&mut self) -> Result<String> {
480        match self.take()? {
481            Token::Str(text) => Ok(text),
482            found => Err(syntax_error(format!("expected a string, found {found:?}"))),
483        }
484    }
485
486    fn parse_i32(&mut self) -> Result<i32> {
487        let opened = self.open_paren();
488        let value = match self.take()? {
489            Token::Num(number) => number
490                .parse::<i32>()
491                .map_err(|error| syntax_error(format!("invalid integer '{number}': {error}")))?,
492            found => {
493                return Err(syntax_error(format!(
494                    "expected an integer, found {found:?}"
495                )));
496            }
497        };
498        self.close_if(opened)?;
499        Ok(value)
500    }
501
502    fn parse_f64(&mut self) -> Result<f64> {
503        let opened = self.open_paren();
504        let value = match self.take()? {
505            Token::Num(number) => number
506                .parse::<f64>()
507                .map_err(|error| syntax_error(format!("invalid number '{number}': {error}")))?,
508            found => return Err(syntax_error(format!("expected a number, found {found:?}"))),
509        };
510        self.close_if(opened)?;
511        Ok(value)
512    }
513
514    fn parse_document(&mut self) -> Result<Document> {
515        if self.peek_ident() == Some("Pandoc") {
516            self.pos += 1;
517            let meta = self.parse_meta()?;
518            let blocks = self.parse_block_list()?;
519            return Ok(Document {
520                meta: meta.into_iter().map(|(k, v)| (k.into(), v)).collect(),
521                blocks,
522                ..Default::default()
523            });
524        }
525        if self.peek() == Some(&Token::LBracket) {
526            let blocks = match self.tokens.get(self.pos + 1) {
527                Some(Token::RBracket) => self.parse_block_list()?,
528                Some(Token::Ident(name)) if is_block_tag(name) => self.parse_block_list()?,
529                Some(Token::Ident(name)) if is_inline_tag(name) => {
530                    vec![Block::Plain(self.parse_inline_list()?)]
531                }
532                _ => return Err(syntax_error("unrecognized list element")),
533            };
534            return Ok(Document {
535                blocks,
536                ..Default::default()
537            });
538        }
539        match self.peek_ident() {
540            Some(name) if is_block_tag(name) => {
541                let block = self.parse_block()?;
542                Ok(Document {
543                    blocks: vec![block],
544                    ..Default::default()
545                })
546            }
547            Some(name) if is_inline_tag(name) => {
548                let inline = self.parse_inline()?;
549                Ok(Document {
550                    blocks: vec![Block::Plain(vec![inline])],
551                    ..Default::default()
552                })
553            }
554            _ => Err(syntax_error("input is not a recognized native document")),
555        }
556    }
557
558    fn parse_meta(&mut self) -> Result<BTreeMap<String, MetaValue>> {
559        let opened = self.open_paren();
560        self.eat_ident("Meta")?;
561        self.eat(&Token::LBrace)?;
562        self.eat_ident("unMeta")?;
563        self.eat(&Token::Equals)?;
564        let map = self.parse_from_list()?;
565        self.eat(&Token::RBrace)?;
566        self.close_if(opened)?;
567        Ok(map)
568    }
569
570    fn parse_from_list(&mut self) -> Result<BTreeMap<String, MetaValue>> {
571        let opened = self.open_paren();
572        self.eat_ident("fromList")?;
573        let pairs = self.parse_list(Self::parse_meta_pair)?;
574        self.close_if(opened)?;
575        Ok(pairs.into_iter().collect())
576    }
577
578    fn parse_meta_pair(&mut self) -> Result<(String, MetaValue)> {
579        self.eat(&Token::LParen)?;
580        let key = self.parse_string()?;
581        self.eat(&Token::Comma)?;
582        let value = self.parse_meta_value()?;
583        self.eat(&Token::RParen)?;
584        Ok((key, value))
585    }
586
587    fn parse_meta_value(&mut self) -> Result<MetaValue> {
588        let name = self.constructor()?;
589        match name.as_str() {
590            "MetaMap" => Ok(MetaValue::MetaMap(
591                self.parse_from_list()?
592                    .into_iter()
593                    .map(|(k, v)| (k.into(), v))
594                    .collect(),
595            )),
596            "MetaList" => Ok(MetaValue::MetaList(
597                self.parse_list(Self::parse_meta_value)?,
598            )),
599            "MetaBool" => Ok(MetaValue::MetaBool(self.parse_bool()?)),
600            "MetaString" => Ok(MetaValue::MetaString(self.parse_string()?.into())),
601            "MetaInlines" => Ok(MetaValue::MetaInlines(self.parse_inline_list()?)),
602            "MetaBlocks" => Ok(MetaValue::MetaBlocks(self.parse_block_list()?)),
603            other => Err(syntax_error(format!("unknown metadata value '{other}'"))),
604        }
605    }
606
607    fn parse_bool(&mut self) -> Result<bool> {
608        match self.constructor()?.as_str() {
609            "True" => Ok(true),
610            "False" => Ok(false),
611            other => Err(syntax_error(format!("expected a boolean, found '{other}'"))),
612        }
613    }
614
615    fn parse_block_list(&mut self) -> Result<Vec<Block>> {
616        self.parse_list(Self::parse_block)
617    }
618
619    fn parse_inline_list(&mut self) -> Result<Vec<Inline>> {
620        self.parse_list(Self::parse_inline)
621    }
622
623    fn parse_block(&mut self) -> Result<Block> {
624        let name = self.constructor()?;
625        match name.as_str() {
626            "Plain" => Ok(Block::Plain(self.parse_inline_list()?)),
627            "Para" => Ok(Block::Para(self.parse_inline_list()?)),
628            "LineBlock" => Ok(Block::LineBlock(self.parse_list(Self::parse_inline_list)?)),
629            "CodeBlock" => {
630                let attr = self.parse_attr()?;
631                let text = self.parse_string()?;
632                Ok(Block::CodeBlock(Box::new(attr), text.into()))
633            }
634            "RawBlock" => {
635                let format = self.parse_format()?;
636                let text = self.parse_string()?;
637                Ok(Block::RawBlock(format, text.into()))
638            }
639            "BlockQuote" => Ok(Block::BlockQuote(self.parse_block_list()?)),
640            "OrderedList" => {
641                let attributes = self.parse_list_attributes()?;
642                let items = self.parse_list(Self::parse_block_list)?;
643                Ok(Block::OrderedList(attributes, items))
644            }
645            "BulletList" => Ok(Block::BulletList(self.parse_list(Self::parse_block_list)?)),
646            "DefinitionList" => Ok(Block::DefinitionList(
647                self.parse_list(Self::parse_definition_item)?,
648            )),
649            "Header" => {
650                let level = self.parse_i32()?;
651                let attr = self.parse_attr()?;
652                let inlines = self.parse_inline_list()?;
653                Ok(Block::Header(level, Box::new(attr), inlines))
654            }
655            "HorizontalRule" => Ok(Block::HorizontalRule),
656            "Table" => Ok(Block::Table(Box::new(self.parse_table()?))),
657            "Figure" => {
658                let attr = self.parse_attr()?;
659                let caption = self.parse_caption()?;
660                let blocks = self.parse_block_list()?;
661                Ok(Block::Figure(Box::new(attr), Box::new(caption), blocks))
662            }
663            "Div" => {
664                let attr = self.parse_attr()?;
665                let blocks = self.parse_block_list()?;
666                Ok(Block::Div(Box::new(attr), blocks))
667            }
668            other => Err(syntax_error(format!("unknown block '{other}'"))),
669        }
670    }
671
672    fn parse_inline(&mut self) -> Result<Inline> {
673        let name = self.constructor()?;
674        match name.as_str() {
675            "Str" => Ok(Inline::Str(self.parse_string()?.into())),
676            "Emph" => Ok(Inline::Emph(self.parse_inline_list()?)),
677            "Underline" => Ok(Inline::Underline(self.parse_inline_list()?)),
678            "Strong" => Ok(Inline::Strong(self.parse_inline_list()?)),
679            "Strikeout" => Ok(Inline::Strikeout(self.parse_inline_list()?)),
680            "Superscript" => Ok(Inline::Superscript(self.parse_inline_list()?)),
681            "Subscript" => Ok(Inline::Subscript(self.parse_inline_list()?)),
682            "SmallCaps" => Ok(Inline::SmallCaps(self.parse_inline_list()?)),
683            "Quoted" => {
684                let quote = self.parse_quote_type()?;
685                let inlines = self.parse_inline_list()?;
686                Ok(Inline::Quoted(quote, inlines))
687            }
688            "Cite" => {
689                let citations = self.parse_list(Self::parse_citation)?;
690                let inlines = self.parse_inline_list()?;
691                Ok(Inline::Cite(citations, inlines))
692            }
693            "Code" => {
694                let attr = self.parse_attr()?;
695                let text = self.parse_string()?;
696                Ok(Inline::Code(Box::new(attr), text.into()))
697            }
698            "Space" => Ok(Inline::Space),
699            "SoftBreak" => Ok(Inline::SoftBreak),
700            "LineBreak" => Ok(Inline::LineBreak),
701            "Math" => {
702                let math = self.parse_math_type()?;
703                let text = self.parse_string()?;
704                Ok(Inline::Math(math, text.into()))
705            }
706            "RawInline" => {
707                let format = self.parse_format()?;
708                let text = self.parse_string()?;
709                Ok(Inline::RawInline(format, text.into()))
710            }
711            "Link" => {
712                let attr = self.parse_attr()?;
713                let inlines = self.parse_inline_list()?;
714                let target = self.parse_target()?;
715                Ok(Inline::Link(Box::new(attr), inlines, Box::new(target)))
716            }
717            "Image" => {
718                let attr = self.parse_attr()?;
719                let inlines = self.parse_inline_list()?;
720                let target = self.parse_target()?;
721                Ok(Inline::Image(Box::new(attr), inlines, Box::new(target)))
722            }
723            "Note" => Ok(Inline::Note(self.parse_block_list()?)),
724            "Span" => {
725                let attr = self.parse_attr()?;
726                let inlines = self.parse_inline_list()?;
727                Ok(Inline::Span(Box::new(attr), inlines))
728            }
729            other => Err(syntax_error(format!("unknown inline '{other}'"))),
730        }
731    }
732
733    fn parse_attr(&mut self) -> Result<Attr> {
734        self.eat(&Token::LParen)?;
735        let id = self.parse_string()?;
736        self.eat(&Token::Comma)?;
737        let classes = self.parse_list(Self::parse_string)?;
738        self.eat(&Token::Comma)?;
739        let attributes = self.parse_list(Self::parse_string_pair)?;
740        self.eat(&Token::RParen)?;
741        Ok(Attr {
742            id: id.into(),
743            classes: classes.into_iter().map(Into::into).collect(),
744            attributes: attributes
745                .into_iter()
746                .map(|(k, v)| (k.into(), v.into()))
747                .collect(),
748        })
749    }
750
751    fn parse_string_pair(&mut self) -> Result<(String, String)> {
752        self.eat(&Token::LParen)?;
753        let key = self.parse_string()?;
754        self.eat(&Token::Comma)?;
755        let value = self.parse_string()?;
756        self.eat(&Token::RParen)?;
757        Ok((key, value))
758    }
759
760    fn parse_target(&mut self) -> Result<Target> {
761        self.eat(&Token::LParen)?;
762        let url = self.parse_string()?;
763        self.eat(&Token::Comma)?;
764        let title = self.parse_string()?;
765        self.eat(&Token::RParen)?;
766        Ok(Target {
767            url: url.into(),
768            title: title.into(),
769        })
770    }
771
772    fn parse_format(&mut self) -> Result<Format> {
773        let opened = self.open_paren();
774        self.eat_ident("Format")?;
775        let name = self.parse_string()?;
776        self.close_if(opened)?;
777        Ok(Format(name.into()))
778    }
779
780    fn parse_list_attributes(&mut self) -> Result<ListAttributes> {
781        self.eat(&Token::LParen)?;
782        let start = self.parse_i32()?;
783        self.eat(&Token::Comma)?;
784        let style = self.parse_list_number_style()?;
785        self.eat(&Token::Comma)?;
786        let delim = self.parse_list_number_delim()?;
787        self.eat(&Token::RParen)?;
788        Ok(ListAttributes {
789            start,
790            style,
791            delim,
792        })
793    }
794
795    fn parse_definition_item(&mut self) -> Result<(Vec<Inline>, Vec<Vec<Block>>)> {
796        self.eat(&Token::LParen)?;
797        let term = self.parse_inline_list()?;
798        self.eat(&Token::Comma)?;
799        let definitions = self.parse_list(Self::parse_block_list)?;
800        self.eat(&Token::RParen)?;
801        Ok((term, definitions))
802    }
803
804    parse_constructor! {
805        parse_quote_type -> QuoteType, "quote type" {
806            "SingleQuote" => QuoteType::SingleQuote,
807            "DoubleQuote" => QuoteType::DoubleQuote,
808        }
809    }
810
811    parse_constructor! {
812        parse_math_type -> MathType, "math type" {
813            "InlineMath" => MathType::InlineMath,
814            "DisplayMath" => MathType::DisplayMath,
815        }
816    }
817
818    parse_constructor! {
819        parse_list_number_style -> ListNumberStyle, "list number style" {
820            "DefaultStyle" => ListNumberStyle::DefaultStyle,
821            "Example" => ListNumberStyle::Example,
822            "Decimal" => ListNumberStyle::Decimal,
823            "LowerRoman" => ListNumberStyle::LowerRoman,
824            "UpperRoman" => ListNumberStyle::UpperRoman,
825            "LowerAlpha" => ListNumberStyle::LowerAlpha,
826            "UpperAlpha" => ListNumberStyle::UpperAlpha,
827        }
828    }
829
830    parse_constructor! {
831        parse_list_number_delim -> ListNumberDelim, "list number delimiter" {
832            "DefaultDelim" => ListNumberDelim::DefaultDelim,
833            "Period" => ListNumberDelim::Period,
834            "OneParen" => ListNumberDelim::OneParen,
835            "TwoParens" => ListNumberDelim::TwoParens,
836        }
837    }
838
839    parse_constructor! {
840        parse_citation_mode -> CitationMode, "citation mode" {
841            "AuthorInText" => CitationMode::AuthorInText,
842            "SuppressAuthor" => CitationMode::SuppressAuthor,
843            "NormalCitation" => CitationMode::NormalCitation,
844        }
845    }
846
847    parse_constructor! {
848        parse_alignment -> Alignment, "alignment" {
849            "AlignLeft" => Alignment::AlignLeft,
850            "AlignRight" => Alignment::AlignRight,
851            "AlignCenter" => Alignment::AlignCenter,
852            "AlignDefault" => Alignment::AlignDefault,
853        }
854    }
855
856    fn parse_col_width(&mut self) -> Result<ColWidth> {
857        match self.constructor()?.as_str() {
858            "ColWidthDefault" => Ok(ColWidth::ColWidthDefault),
859            "ColWidth" => Ok(ColWidth::ColWidth(self.parse_f64()?)),
860            other => Err(syntax_error(format!("unknown column width '{other}'"))),
861        }
862    }
863
864    fn parse_col_spec(&mut self) -> Result<ColSpec> {
865        self.eat(&Token::LParen)?;
866        let align = self.parse_alignment()?;
867        self.eat(&Token::Comma)?;
868        let width = self.parse_col_width()?;
869        self.eat(&Token::RParen)?;
870        Ok(ColSpec { align, width })
871    }
872
873    fn parse_citation(&mut self) -> Result<Citation> {
874        let opened = self.open_paren();
875        self.eat_ident("Citation")?;
876        self.eat(&Token::LBrace)?;
877        let mut citation = Citation {
878            id: carta_ast::Text::default(),
879            prefix: Vec::new(),
880            suffix: Vec::new(),
881            mode: CitationMode::NormalCitation,
882            note_num: 0,
883            hash: 0,
884        };
885        loop {
886            let field = self.constructor()?;
887            self.eat(&Token::Equals)?;
888            match field.as_str() {
889                "citationId" => citation.id = self.parse_string()?.into(),
890                "citationPrefix" => citation.prefix = self.parse_inline_list()?,
891                "citationSuffix" => citation.suffix = self.parse_inline_list()?,
892                "citationMode" => citation.mode = self.parse_citation_mode()?,
893                "citationNoteNum" => citation.note_num = self.parse_i32()?,
894                "citationHash" => citation.hash = self.parse_i32()?,
895                other => return Err(syntax_error(format!("unknown citation field '{other}'"))),
896            }
897            match self.advance()? {
898                Token::Comma => {}
899                Token::RBrace => break,
900                found => {
901                    return Err(syntax_error(format!(
902                        "expected ',' or '}}', found {found:?}"
903                    )));
904                }
905            }
906        }
907        self.close_if(opened)?;
908        Ok(citation)
909    }
910
911    fn parse_caption(&mut self) -> Result<Caption> {
912        let opened = self.open_paren();
913        self.eat_ident("Caption")?;
914        let short = self.parse_maybe_inlines()?;
915        let long = self.parse_block_list()?;
916        self.close_if(opened)?;
917        Ok(Caption { short, long })
918    }
919
920    fn parse_maybe_inlines(&mut self) -> Result<Option<Vec<Inline>>> {
921        let opened = self.open_paren();
922        let result = if self.peek_ident() == Some("Nothing") {
923            self.pos += 1;
924            None
925        } else {
926            self.eat_ident("Just")?;
927            Some(self.parse_inline_list()?)
928        };
929        self.close_if(opened)?;
930        Ok(result)
931    }
932
933    fn parse_table(&mut self) -> Result<Table> {
934        let attr = self.parse_attr()?;
935        let caption = self.parse_caption()?;
936        let col_specs = self.parse_list(Self::parse_col_spec)?;
937        let head = self.parse_table_head()?;
938        let bodies = self.parse_list(Self::parse_table_body)?;
939        let foot = self.parse_table_foot()?;
940        Ok(Table {
941            attr,
942            caption,
943            col_specs,
944            head,
945            bodies,
946            foot,
947        })
948    }
949
950    fn parse_table_head(&mut self) -> Result<TableHead> {
951        let opened = self.open_paren();
952        self.eat_ident("TableHead")?;
953        let attr = self.parse_attr()?;
954        let rows = self.parse_list(Self::parse_row)?;
955        self.close_if(opened)?;
956        Ok(TableHead { attr, rows })
957    }
958
959    fn parse_table_foot(&mut self) -> Result<TableFoot> {
960        let opened = self.open_paren();
961        self.eat_ident("TableFoot")?;
962        let attr = self.parse_attr()?;
963        let rows = self.parse_list(Self::parse_row)?;
964        self.close_if(opened)?;
965        Ok(TableFoot { attr, rows })
966    }
967
968    fn parse_table_body(&mut self) -> Result<TableBody> {
969        let opened = self.open_paren();
970        self.eat_ident("TableBody")?;
971        let attr = self.parse_attr()?;
972        let row_head_columns = self.parse_int_newtype("RowHeadColumns")?;
973        let head = self.parse_list(Self::parse_row)?;
974        let body = self.parse_list(Self::parse_row)?;
975        self.close_if(opened)?;
976        Ok(TableBody {
977            attr,
978            row_head_columns,
979            head,
980            body,
981        })
982    }
983
984    fn parse_row(&mut self) -> Result<Row> {
985        let opened = self.open_paren();
986        self.eat_ident("Row")?;
987        let attr = self.parse_attr()?;
988        let cells = self.parse_list(Self::parse_cell)?;
989        self.close_if(opened)?;
990        Ok(Row { attr, cells })
991    }
992
993    fn parse_cell(&mut self) -> Result<Cell> {
994        let opened = self.open_paren();
995        self.eat_ident("Cell")?;
996        let attr = self.parse_attr()?;
997        let align = self.parse_alignment()?;
998        let row_span = self.parse_int_newtype("RowSpan")?;
999        let col_span = self.parse_int_newtype("ColSpan")?;
1000        let content = self.parse_block_list()?;
1001        self.close_if(opened)?;
1002        Ok(Cell {
1003            attr,
1004            align,
1005            row_span,
1006            col_span,
1007            content,
1008        })
1009    }
1010
1011    fn parse_int_newtype(&mut self, name: &str) -> Result<i32> {
1012        let opened = self.open_paren();
1013        self.eat_ident(name)?;
1014        let value = self.parse_i32()?;
1015        self.close_if(opened)?;
1016        Ok(value)
1017    }
1018}
1019
1020fn is_block_tag(name: &str) -> bool {
1021    carta_ast::BLOCK_TAGS.contains(&name)
1022}
1023
1024fn is_inline_tag(name: &str) -> bool {
1025    carta_ast::INLINE_TAGS.contains(&name)
1026}
1027
1028#[cfg(test)]
1029mod tests {
1030    use super::*;
1031
1032    fn parse(input: &str) -> Document {
1033        NativeReader
1034            .read(input, &ReaderOptions::default())
1035            .expect("native input should parse")
1036    }
1037
1038    fn parse_err(input: &str) -> String {
1039        NativeReader
1040            .read(input, &ReaderOptions::default())
1041            .expect_err("native input should fail")
1042            .to_string()
1043    }
1044
1045    fn only_block(input: &str) -> Block {
1046        let Document { blocks, .. } = parse(input);
1047        match blocks.into_iter().next() {
1048            Some(block) => block,
1049            None => panic!("expected a single block"),
1050        }
1051    }
1052
1053    fn str_inline(text: &str) -> Inline {
1054        Inline::Str(text.to_string().into())
1055    }
1056
1057    #[test]
1058    fn parses_full_document_with_meta() {
1059        let document = parse(
1060            r#"Pandoc (Meta {unMeta = fromList [("title", MetaInlines [Str "Hi"])]}) [Para [Str "Body"]]"#,
1061        );
1062        assert_eq!(
1063            document.meta.get("title"),
1064            Some(&MetaValue::MetaInlines(vec![str_inline("Hi")]))
1065        );
1066        assert_eq!(document.blocks, vec![Block::Para(vec![str_inline("Body")])]);
1067    }
1068
1069    #[test]
1070    fn parses_every_meta_value_shape() {
1071        let document = parse(
1072            r#"Pandoc (Meta {unMeta = fromList [("m", MetaMap (fromList [("k", MetaString "v")])), ("l", MetaList [MetaBool True, MetaBool False]), ("b", MetaBlocks [Plain [Str "p"]])]}) []"#,
1073        );
1074        assert_eq!(
1075            document.meta.get("m"),
1076            Some(&MetaValue::MetaMap(
1077                [(
1078                    "k".to_string().into(),
1079                    MetaValue::MetaString("v".to_string().into())
1080                )]
1081                .into_iter()
1082                .collect()
1083            ))
1084        );
1085        assert_eq!(
1086            document.meta.get("l"),
1087            Some(&MetaValue::MetaList(vec![
1088                MetaValue::MetaBool(true),
1089                MetaValue::MetaBool(false)
1090            ]))
1091        );
1092        assert_eq!(
1093            document.meta.get("b"),
1094            Some(&MetaValue::MetaBlocks(vec![Block::Plain(vec![
1095                str_inline("p")
1096            ])]))
1097        );
1098    }
1099
1100    #[test]
1101    fn bare_block_list_is_wrapped_into_document() {
1102        let document = parse(r#"[Para [Str "a"], HorizontalRule]"#);
1103        assert_eq!(
1104            document.blocks,
1105            vec![Block::Para(vec![str_inline("a")]), Block::HorizontalRule]
1106        );
1107    }
1108
1109    #[test]
1110    fn empty_list_is_an_empty_document() {
1111        assert_eq!(parse("[]").blocks, vec![]);
1112    }
1113
1114    #[test]
1115    fn bare_inline_list_becomes_a_plain_block() {
1116        let document = parse(r#"[Str "a", Space, Str "b"]"#);
1117        assert_eq!(
1118            document.blocks,
1119            vec![Block::Plain(vec![
1120                str_inline("a"),
1121                Inline::Space,
1122                str_inline("b")
1123            ])]
1124        );
1125    }
1126
1127    #[test]
1128    fn single_block_is_wrapped() {
1129        assert_eq!(only_block("HorizontalRule"), Block::HorizontalRule);
1130    }
1131
1132    #[test]
1133    fn single_inline_becomes_a_plain_block() {
1134        assert_eq!(
1135            only_block(r#"Str "lonely""#),
1136            Block::Plain(vec![str_inline("lonely")])
1137        );
1138    }
1139
1140    #[test]
1141    fn parses_code_block_with_attr() {
1142        assert_eq!(
1143            only_block(r#"CodeBlock ("i", ["rust", "numberLines"], [("k", "v")]) "let x = 1;""#),
1144            Block::CodeBlock(
1145                Box::new(Attr {
1146                    id: "i".to_string().into(),
1147                    classes: vec!["rust".to_string().into(), "numberLines".to_string().into()],
1148                    attributes: vec![("k".to_string().into(), "v".to_string().into())],
1149                }),
1150                "let x = 1;".to_string().into()
1151            )
1152        );
1153    }
1154
1155    #[test]
1156    fn parses_raw_block_with_format_in_parens() {
1157        assert_eq!(
1158            only_block(r#"RawBlock (Format "html") "<hr>""#),
1159            Block::RawBlock(Format("html".to_string().into()), "<hr>".to_string().into())
1160        );
1161    }
1162
1163    #[test]
1164    fn parses_line_block() {
1165        assert_eq!(
1166            only_block(r#"LineBlock [[Str "one"], [Str "two"]]"#),
1167            Block::LineBlock(vec![vec![str_inline("one")], vec![str_inline("two")]])
1168        );
1169    }
1170
1171    #[test]
1172    fn parses_ordered_list_attributes() {
1173        assert_eq!(
1174            only_block(r#"OrderedList (3, UpperRoman, TwoParens) [[Plain [Str "x"]]]"#),
1175            Block::OrderedList(
1176                ListAttributes {
1177                    start: 3,
1178                    style: ListNumberStyle::UpperRoman,
1179                    delim: ListNumberDelim::TwoParens,
1180                },
1181                vec![vec![Block::Plain(vec![str_inline("x")])]]
1182            )
1183        );
1184    }
1185
1186    #[test]
1187    fn parses_definition_list() {
1188        assert_eq!(
1189            only_block(r#"DefinitionList [([Str "term"], [[Plain [Str "def"]]])]"#),
1190            Block::DefinitionList(vec![(
1191                vec![str_inline("term")],
1192                vec![vec![Block::Plain(vec![str_inline("def")])]]
1193            )])
1194        );
1195    }
1196
1197    #[test]
1198    fn parses_header_with_level_and_attr() {
1199        assert_eq!(
1200            only_block(r#"Header 2 ("h", [], []) [Str "Title"]"#),
1201            Block::Header(
1202                2,
1203                Box::new(Attr {
1204                    id: "h".to_string().into(),
1205                    classes: vec![],
1206                    attributes: vec![],
1207                }),
1208                vec![str_inline("Title")]
1209            )
1210        );
1211    }
1212
1213    #[test]
1214    fn parses_div_and_blockquote() {
1215        assert_eq!(
1216            only_block(r#"Div ("d", [], []) [BlockQuote [Para [Str "q"]]]"#),
1217            Block::Div(
1218                Box::new(Attr {
1219                    id: "d".to_string().into(),
1220                    classes: vec![],
1221                    attributes: vec![],
1222                }),
1223                vec![Block::BlockQuote(vec![Block::Para(vec![str_inline("q")])])]
1224            )
1225        );
1226    }
1227
1228    #[test]
1229    fn parses_figure_with_caption() {
1230        let block = only_block(
1231            r#"Figure ("f", [], []) (Caption Nothing [Plain [Str "cap"]]) [Para [Str "body"]]"#,
1232        );
1233        let Block::Figure(attr, caption, blocks) = block else {
1234            panic!("expected a figure");
1235        };
1236        assert_eq!(attr.id, "f");
1237        assert_eq!(caption.short, None);
1238        assert_eq!(caption.long, vec![Block::Plain(vec![str_inline("cap")])]);
1239        assert_eq!(blocks, vec![Block::Para(vec![str_inline("body")])]);
1240    }
1241
1242    #[test]
1243    fn parses_caption_with_short_inlines() {
1244        let block =
1245            only_block(r#"Figure ("", [], []) (Caption (Just [Str "s"]) [Plain [Str "l"]]) []"#);
1246        let Block::Figure(_, caption, _) = block else {
1247            panic!("expected a figure");
1248        };
1249        assert_eq!(caption.short, Some(vec![str_inline("s")]));
1250    }
1251
1252    #[test]
1253    fn parses_every_inline_constructor() {
1254        let block = only_block(
1255            r#"Para [Emph [Str "e"], Underline [Str "u"], Strong [Str "s"], Strikeout [Str "k"], Superscript [Str "p"], Subscript [Str "b"], SmallCaps [Str "c"], Space, SoftBreak, LineBreak]"#,
1256        );
1257        assert_eq!(
1258            block,
1259            Block::Para(vec![
1260                Inline::Emph(vec![str_inline("e")]),
1261                Inline::Underline(vec![str_inline("u")]),
1262                Inline::Strong(vec![str_inline("s")]),
1263                Inline::Strikeout(vec![str_inline("k")]),
1264                Inline::Superscript(vec![str_inline("p")]),
1265                Inline::Subscript(vec![str_inline("b")]),
1266                Inline::SmallCaps(vec![str_inline("c")]),
1267                Inline::Space,
1268                Inline::SoftBreak,
1269                Inline::LineBreak,
1270            ])
1271        );
1272    }
1273
1274    #[test]
1275    fn parses_quoted_math_and_code_inlines() {
1276        let block = only_block(
1277            r#"Para [Quoted DoubleQuote [Str "q"], Math InlineMath "x^2", Code ("", [], []) "f()"]"#,
1278        );
1279        assert_eq!(
1280            block,
1281            Block::Para(vec![
1282                Inline::Quoted(QuoteType::DoubleQuote, vec![str_inline("q")]),
1283                Inline::Math(MathType::InlineMath, "x^2".to_string().into()),
1284                Inline::Code(Box::default(), "f()".to_string().into()),
1285            ])
1286        );
1287    }
1288
1289    #[test]
1290    fn parses_link_image_span_and_note() {
1291        let block = only_block(
1292            r#"Para [Link ("", [], []) [Str "t"] ("/u", "ti"), Image ("", [], []) [Str "alt"] ("/i", ""), Span ("sp", [], []) [Str "s"], Note [Para [Str "n"]]]"#,
1293        );
1294        assert_eq!(
1295            block,
1296            Block::Para(vec![
1297                Inline::Link(
1298                    Box::default(),
1299                    vec![str_inline("t")],
1300                    Box::new(Target {
1301                        url: "/u".to_string().into(),
1302                        title: "ti".to_string().into()
1303                    })
1304                ),
1305                Inline::Image(
1306                    Box::default(),
1307                    vec![str_inline("alt")],
1308                    Box::new(Target {
1309                        url: "/i".to_string().into(),
1310                        title: carta_ast::Text::default()
1311                    })
1312                ),
1313                Inline::Span(
1314                    Box::new(Attr {
1315                        id: "sp".to_string().into(),
1316                        classes: vec![],
1317                        attributes: vec![],
1318                    }),
1319                    vec![str_inline("s")]
1320                ),
1321                Inline::Note(vec![Block::Para(vec![str_inline("n")])]),
1322            ])
1323        );
1324    }
1325
1326    #[test]
1327    fn parses_raw_inline_with_bare_format() {
1328        let block = only_block(r#"Para [RawInline (Format "tex") "\\hi"]"#);
1329        assert_eq!(
1330            block,
1331            Block::Para(vec![Inline::RawInline(
1332                Format("tex".to_string().into()),
1333                "\\hi".to_string().into()
1334            )])
1335        );
1336    }
1337
1338    #[test]
1339    fn parses_cite_with_all_fields() {
1340        let block = only_block(
1341            r#"Para [Cite [Citation {citationId = "x", citationPrefix = [Str "see"], citationSuffix = [Str "p1"], citationMode = AuthorInText, citationNoteNum = 2, citationHash = 0}] [Str "[@x]"]]"#,
1342        );
1343        let Block::Para(inlines) = block else {
1344            panic!("expected a paragraph");
1345        };
1346        let citation = match inlines.first() {
1347            Some(Inline::Cite(citations, _)) => citations.first().cloned(),
1348            _ => None,
1349        };
1350        let citation = citation.expect("a citation");
1351        assert_eq!(citation.id, "x");
1352        assert_eq!(citation.prefix, vec![str_inline("see")]);
1353        assert_eq!(citation.suffix, vec![str_inline("p1")]);
1354        assert_eq!(citation.mode, CitationMode::AuthorInText);
1355        assert_eq!(citation.note_num, 2);
1356    }
1357
1358    #[test]
1359    fn parses_table_with_head_body_and_foot() {
1360        let input = r#"Table ("", [], []) (Caption Nothing [])
1361            [(AlignDefault, ColWidthDefault), (AlignRight, ColWidth 0.5)]
1362            (TableHead ("", [], []) [Row ("", [], []) [Cell ("", [], []) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "H"]]]])
1363            [TableBody ("", [], []) (RowHeadColumns 0) [] [Row ("", [], []) [Cell ("", [], []) AlignLeft (RowSpan 1) (ColSpan 1) [Plain [Str "B"]]]]]
1364            (TableFoot ("", [], []) [])"#;
1365        let block = only_block(input);
1366        let Block::Table(table) = block else {
1367            panic!("expected a table");
1368        };
1369        assert_eq!(table.col_specs.len(), 2);
1370        assert_eq!(
1371            table.col_specs.last().map(|spec| spec.width.clone()),
1372            Some(ColWidth::ColWidth(0.5))
1373        );
1374        assert_eq!(table.head.rows.len(), 1);
1375        assert_eq!(table.bodies.len(), 1);
1376        assert_eq!(table.foot.rows.len(), 0);
1377    }
1378
1379    #[test]
1380    fn decodes_simple_string_escapes() {
1381        let block = only_block(r#"Para [Str "a\nb\tc\rd\\e\"f"]"#);
1382        assert_eq!(block, Block::Para(vec![str_inline("a\nb\tc\rd\\e\"f")]));
1383    }
1384
1385    #[test]
1386    fn decodes_control_and_numeric_escapes() {
1387        // \f \v \a \b control bytes, an empty \& separator, decimal, hex, and octal escapes.
1388        let block = only_block(r#"Para [Str "\f\v\a\b\&\65\x41\o101"]"#);
1389        assert_eq!(
1390            block,
1391            Block::Para(vec![str_inline("\u{0C}\u{0B}\u{07}\u{08}AAA")])
1392        );
1393    }
1394
1395    #[test]
1396    fn decodes_caret_and_mnemonic_control_escapes() {
1397        // \^A is control-A (U+0001); \ESC and \NUL are mnemonic control codes.
1398        let block = only_block(r#"Para [Str "\^A\ESC\NUL"]"#);
1399        assert_eq!(block, Block::Para(vec![str_inline("\u{01}\u{1B}\u{00}")]));
1400    }
1401
1402    #[test]
1403    fn decodes_string_gap() {
1404        let block = only_block("Para [Str \"a\\   \\b\"]");
1405        assert_eq!(block, Block::Para(vec![str_inline("ab")]));
1406    }
1407
1408    #[test]
1409    fn parses_negative_and_floating_numbers() {
1410        assert_eq!(
1411            only_block(r"OrderedList (-2, Decimal, Period) []"),
1412            Block::OrderedList(
1413                ListAttributes {
1414                    start: -2,
1415                    style: ListNumberStyle::Decimal,
1416                    delim: ListNumberDelim::Period,
1417                },
1418                vec![]
1419            )
1420        );
1421        let block = only_block(
1422            r#"Table ("", [], []) (Caption Nothing []) [(AlignDefault, ColWidth 1.5e-1)] (TableHead ("", [], []) []) [] (TableFoot ("", [], []) [])"#,
1423        );
1424        let Block::Table(table) = block else {
1425            panic!("expected a table");
1426        };
1427        assert_eq!(
1428            table.col_specs.first().map(|spec| spec.width.clone()),
1429            Some(ColWidth::ColWidth(0.15))
1430        );
1431    }
1432
1433    #[test]
1434    fn rejects_unterminated_string() {
1435        assert!(parse_err(r#"Para [Str "oops]"#).contains("unterminated string"));
1436    }
1437
1438    #[test]
1439    fn rejects_unexpected_character() {
1440        assert!(parse_err("Para [Str @]").contains("unexpected character"));
1441    }
1442
1443    #[test]
1444    fn rejects_unknown_constructor() {
1445        assert!(parse_err("Bogus []").contains("not a recognized native document"));
1446    }
1447
1448    #[test]
1449    fn rejects_unknown_block_in_list() {
1450        assert!(parse_err("Para [Wat]").contains("unknown inline"));
1451    }
1452
1453    #[test]
1454    fn rejects_trailing_input() {
1455        assert!(parse_err("HorizontalRule HorizontalRule").contains("trailing input"));
1456    }
1457
1458    #[test]
1459    fn rejects_unknown_escape() {
1460        assert!(parse_err(r#"Para [Str "\q"]"#).contains("unknown string escape"));
1461    }
1462}