Skip to main content

brief/
parser.rs

1use crate::ast::{Block, CodeAttrs, Document, Inline, ListItem, Row, ShortArgs, TaskState};
2use crate::diag::{Code, Diagnostic};
3use crate::inline::{parse_args, parse_inline};
4use crate::span::{SourceMap, Span};
5use crate::token::{Token, TokenKind};
6
7pub fn parse(tokens: Vec<Token>, src: &SourceMap) -> (Document, Vec<Diagnostic>) {
8    let (metadata, fm_consumed, fm_diags) = parse_frontmatter(&tokens, src);
9    let mut p = Parser {
10        _src: src,
11        toks: tokens,
12        pos: fm_consumed,
13        diags: fm_diags,
14    };
15    let mut blocks = p.parse_blocks(0, None);
16    // Anything left after a top-level parse must be a stray `@end`.
17    while !p.at_eof() {
18        let span = p.peek().span;
19        match &p.peek().kind {
20            TokenKind::Eof => break,
21            TokenKind::Blank => {
22                p.pos += 1;
23            }
24            TokenKind::Line(s) if s.trim() == "@end" => {
25                p.diags.push(Diagnostic::new(Code::StrayEnd, span));
26                p.pos += 1;
27            }
28            _ => {
29                blocks.append(&mut p.parse_blocks(0, None));
30            }
31        }
32    }
33    (
34        Document {
35            blocks,
36            metadata,
37            resolved_refs: Default::default(),
38        },
39        p.diags,
40    )
41}
42
43fn parse_frontmatter(
44    toks: &[Token],
45    src: &SourceMap,
46) -> (Option<toml::Table>, usize, Vec<Diagnostic>) {
47    // Returns (metadata, tokens_consumed, diagnostics).
48    //
49    // Frontmatter must be the very first content. We detect it by checking
50    // that the first token is `Line("+++")` at byte offset 0 with indent 0.
51    // If anything else comes first (a Blank, a comment, an indented `+++`,
52    // or a non-`+++` line), there is no frontmatter and we return
53    // (None, 0, empty).
54    if toks.is_empty() {
55        return (None, 0, Vec::new());
56    }
57    let first = &toks[0];
58    let opens = match &first.kind {
59        TokenKind::Line(s) => s == "+++" && first.indent == 0 && first.span.start == 0,
60        _ => false,
61    };
62    if !opens {
63        return (None, 0, Vec::new());
64    }
65
66    // Body starts at the first byte after `+++\n` (or `+++\r\n`).
67    // The lexer's next token's `span.start` is exactly that byte.
68    let mut idx = 1usize;
69    let body_start = toks
70        .get(idx)
71        .map(|t| t.span.start as usize)
72        .unwrap_or(src.source.len());
73
74    let mut diags = Vec::new();
75    while idx < toks.len() {
76        match &toks[idx].kind {
77            TokenKind::Eof => {
78                diags.push(
79                    Diagnostic::new(Code::UnterminatedFrontmatter, first.span)
80                        .label("frontmatter opened with `+++` is never closed"),
81                );
82                return (None, idx, diags);
83            }
84            TokenKind::Line(s) if s == "+++" && toks[idx].indent == 0 => {
85                let close = &toks[idx];
86                let body_end = close.span.start as usize;
87                let body = &src.source[body_start..body_end];
88                idx += 1; // consume the closing `+++`
89                match toml::from_str::<toml::Table>(body) {
90                    Ok(t) => return (Some(t), idx, diags),
91                    Err(e) => {
92                        let (off, len) = match e.span() {
93                            Some(r) => (body_start + r.start, r.end - r.start),
94                            None => (body_start, body.len()),
95                        };
96                        // `.max(1)` keeps the diagnostic caret renderable; a
97                        // zero-length span produces no caret in the error UI.
98                        let span = Span::new(off, len.max(1));
99                        diags.push(
100                            Diagnostic::new(Code::FrontmatterToml, span).label(e.to_string()),
101                        );
102                        return (None, idx, diags);
103                    }
104                }
105            }
106            _ => {
107                idx += 1;
108            }
109        }
110    }
111    diags.push(
112        Diagnostic::new(Code::UnterminatedFrontmatter, first.span)
113            .label("frontmatter opened with `+++` is never closed"),
114    );
115    (None, idx, diags)
116}
117
118struct Parser<'a> {
119    _src: &'a SourceMap,
120    toks: Vec<Token>,
121    pos: usize,
122    diags: Vec<Diagnostic>,
123}
124
125impl<'a> Parser<'a> {
126    fn peek(&self) -> &Token {
127        &self.toks[self.pos]
128    }
129    fn at_eof(&self) -> bool {
130        matches!(self.peek().kind, TokenKind::Eof)
131    }
132
133    fn parse_blocks(&mut self, base_indent: u16, end_at_indent_below: Option<u16>) -> Vec<Block> {
134        let mut out = Vec::new();
135        loop {
136            if self.at_eof() {
137                break;
138            }
139            match &self.peek().kind {
140                TokenKind::Eof => break,
141                TokenKind::Blank => {
142                    self.pos += 1;
143                    continue;
144                }
145                TokenKind::Line(_) => {
146                    let indent = self.peek().indent;
147                    if indent < base_indent {
148                        break;
149                    }
150                    if let Some(min) = end_at_indent_below {
151                        if indent < min {
152                            break;
153                        }
154                    }
155                    let line = if let TokenKind::Line(s) = &self.peek().kind {
156                        s.clone()
157                    } else {
158                        unreachable!()
159                    };
160                    let trimmed = line[indent as usize..].to_string();
161
162                    // `@end` is always a terminator for the parent block-shortcode;
163                    // stop here so the caller can consume it. Stray @ends are
164                    // surfaced by the top-level driver in `parse()`.
165                    if trimmed.trim() == "@end" {
166                        break;
167                    }
168                    if trimmed.starts_with("//") {
169                        self.pos += 1;
170                        continue;
171                    }
172                    if trimmed.starts_with("/*") {
173                        self.consume_block_comment(&trimmed);
174                        continue;
175                    }
176                    if let Some(b) = self.try_block_at(&trimmed, indent) {
177                        out.push(b);
178                    } else {
179                        out.push(self.parse_paragraph(indent));
180                    }
181                }
182            }
183        }
184        out
185    }
186
187    fn try_block_at(&mut self, trimmed: &str, indent: u16) -> Option<Block> {
188        if trimmed.starts_with('#') {
189            return Some(self.parse_heading());
190        }
191        if trimmed == "---" {
192            return Some(self.parse_hr());
193        }
194        if trimmed.starts_with("```") {
195            return Some(self.parse_code_fence());
196        }
197        if trimmed.starts_with("- ") {
198            return Some(self.parse_unordered_list(indent));
199        }
200        if leading_ordered_marker(trimmed).is_some() {
201            return Some(self.parse_ordered_list(indent));
202        }
203        if trimmed.starts_with('>') {
204            return Some(self.parse_blockquote(indent));
205        }
206        if trimmed == "@t" || trimmed.starts_with("@t ") || trimmed.starts_with("@t(") {
207            return Some(self.parse_table(indent));
208        }
209        if trimmed == "@dl" || trimmed.starts_with("@dl ") || trimmed.starts_with("@dl(") {
210            return Some(self.parse_definition_list(indent));
211        }
212        if trimmed.starts_with('@') {
213            return self.parse_block_shortcode_or_inline(indent);
214        }
215        if trimmed.starts_with('|') {
216            let span = self.peek().span;
217            self.diags.push(
218                Diagnostic::new(Code::StrayContent, span)
219                    .label("`|` only appears inside a `@t` table"),
220            );
221            self.pos += 1;
222            return Some(Block::Paragraph {
223                content: vec![],
224                span,
225            });
226        }
227        None
228    }
229
230    fn consume_block_comment(&mut self, trimmed: &str) {
231        if trimmed.ends_with("*/") && trimmed.len() >= 4 {
232            self.pos += 1;
233            return;
234        }
235        self.pos += 1;
236        loop {
237            match &self.peek().kind {
238                TokenKind::Eof => {
239                    self.diags.push(
240                        Diagnostic::new(Code::UnterminatedBlock, self.peek().span)
241                            .label("unterminated /* */ comment"),
242                    );
243                    return;
244                }
245                TokenKind::Blank => {
246                    self.pos += 1;
247                }
248                TokenKind::Line(s) => {
249                    let s = s.clone();
250                    self.pos += 1;
251                    if s.trim_end().ends_with("*/") {
252                        return;
253                    }
254                }
255            }
256        }
257    }
258
259    fn parse_heading(&mut self) -> Block {
260        let tok = self.peek().clone();
261        let line = if let TokenKind::Line(ref s) = tok.kind {
262            s.clone()
263        } else {
264            unreachable!()
265        };
266        self.pos += 1;
267        let indent = tok.indent as usize;
268        let s = &line[indent..];
269        let mut level = 0u8;
270        let bytes = s.as_bytes();
271        while (level as usize) < bytes.len() && bytes[level as usize] == b'#' {
272            level += 1;
273            if level > 6 {
274                break;
275            }
276        }
277        let mut hash_count = level as usize;
278        while hash_count < bytes.len() && bytes[hash_count] == b'#' {
279            hash_count += 1;
280        }
281        if hash_count > 6 {
282            let span = Span::new(tok.span.start as usize + indent, hash_count);
283            self.diags.push(
284                Diagnostic::new(Code::HeadingTooDeep, span)
285                    .label("Brief supports heading levels 1-6 only"),
286            );
287            return Block::Paragraph {
288                content: vec![],
289                span: tok.span,
290            };
291        }
292        if bytes.get(level as usize) != Some(&b' ') {
293            self.diags.push(
294                Diagnostic::new(Code::HeadingNoSpace, tok.span)
295                    .help("write `# heading` with exactly one space after the `#`s"),
296            );
297            return Block::Paragraph {
298                content: vec![],
299                span: tok.span,
300            };
301        }
302        if bytes.get(level as usize + 1) == Some(&b' ') {
303            self.diags.push(
304                Diagnostic::new(Code::HeadingNoSpace, tok.span)
305                    .label("multiple spaces after heading marker"),
306            );
307        }
308        let text_offset = indent + level as usize + 1;
309        let raw_text = &line[text_offset..];
310
311        // Anchor detection: look for a trailing `{#name}` block.
312        // Only triggers when the line ends with `}` and contains `{#`.
313        let (heading_text, anchor) = parse_heading_anchor(
314            raw_text,
315            tok.span.start + text_offset as u32,
316            &mut self.diags,
317        );
318
319        let (content, idiags) = parse_inline(heading_text, tok.span.start + text_offset as u32);
320        self.diags.extend(idiags);
321        Block::Heading {
322            level,
323            content,
324            anchor,
325            span: tok.span,
326        }
327    }
328
329    fn parse_paragraph(&mut self, indent: u16) -> Block {
330        let first = self.peek().clone();
331        let mut span = first.span;
332        let mut text = String::new();
333        let mut hard_break_indices: Vec<usize> = Vec::new();
334        let mut first_line = true;
335        loop {
336            match &self.peek().kind {
337                TokenKind::Line(s) => {
338                    let tok_indent = self.peek().indent;
339                    if tok_indent != indent {
340                        break;
341                    }
342                    let trimmed = &s[indent as usize..];
343                    // The first paragraph line is always consumed: the dispatcher
344                    // already decided this isn't a block. Subsequent continuation
345                    // lines stop at any block-starting sigil.
346                    if !first_line && leading_block_sigil(trimmed) {
347                        break;
348                    }
349                    first_line = false;
350                    if !text.is_empty() {
351                        text.push(' ');
352                    }
353                    let mut line_text = trimmed.to_string();
354                    let hard = line_text.ends_with('\\');
355                    if hard {
356                        line_text.pop();
357                        hard_break_indices.push(text.len() + line_text.len());
358                    }
359                    text.push_str(&line_text);
360                    span = span.join(self.peek().span);
361                    self.pos += 1;
362                }
363                _ => break,
364            }
365        }
366        let mut content: Vec<Inline> = Vec::new();
367        let mut cursor = 0usize;
368        let base = first.span.start + first.indent as u32;
369        for hb in &hard_break_indices {
370            let chunk = &text[cursor..*hb];
371            let (mut inl, d) = parse_inline(chunk, base + cursor as u32);
372            self.diags.extend(d);
373            content.append(&mut inl);
374            content.push(Inline::HardBreak {
375                span: Span::new(base as usize + *hb, 1),
376            });
377            cursor = *hb;
378        }
379        let chunk = &text[cursor..];
380        let (mut inl, d) = parse_inline(chunk, base + cursor as u32);
381        self.diags.extend(d);
382        content.append(&mut inl);
383        Block::Paragraph { content, span }
384    }
385
386    fn parse_hr(&mut self) -> Block {
387        let tok = self.peek().clone();
388        self.pos += 1;
389        Block::HorizontalRule { span: tok.span }
390    }
391
392    fn parse_code_fence(&mut self) -> Block {
393        let open = self.peek().clone();
394        let line = if let TokenKind::Line(ref s) = open.kind {
395            s.clone()
396        } else {
397            unreachable!()
398        };
399        self.pos += 1;
400        let indent = open.indent as usize;
401        let after = &line[indent + 3..];
402        if after.starts_with('`') {
403            self.diags.push(
404                Diagnostic::new(Code::UnterminatedFence, open.span)
405                    .label("opening fence must be exactly three backticks"),
406            );
407        }
408        let info_offset = open.span.start as usize + indent + 3;
409        let (lang, attrs) = parse_fence_info(after, info_offset as u32, open.span, &mut self.diags);
410        let mut body = String::new();
411        let mut span = open.span;
412        loop {
413            match &self.peek().kind {
414                TokenKind::Eof => {
415                    self.diags.push(
416                        Diagnostic::new(Code::UnterminatedFence, open.span)
417                            .label("fence opened here is never closed"),
418                    );
419                    break;
420                }
421                TokenKind::Blank => {
422                    body.push('\n');
423                    span = span.join(self.peek().span);
424                    self.pos += 1;
425                }
426                TokenKind::Line(s) => {
427                    if s.trim() == "```" {
428                        span = span.join(self.peek().span);
429                        self.pos += 1;
430                        break;
431                    }
432                    body.push_str(s);
433                    body.push('\n');
434                    span = span.join(self.peek().span);
435                    self.pos += 1;
436                }
437            }
438        }
439        if body.ends_with('\n') {
440            body.pop();
441        }
442        Block::CodeBlock {
443            lang,
444            body,
445            attrs,
446            span,
447        }
448    }
449
450    fn parse_unordered_list(&mut self, indent: u16) -> Block {
451        let start_span = self.peek().span;
452        let mut items: Vec<ListItem> = Vec::new();
453        loop {
454            if self.at_eof() {
455                break;
456            }
457            let tok = self.peek().clone();
458            let line = if let TokenKind::Line(ref s) = tok.kind {
459                s.clone()
460            } else {
461                break;
462            };
463            if tok.indent != indent {
464                break;
465            }
466            let trimmed = &line[indent as usize..];
467            if !trimmed.starts_with("- ") {
468                break;
469            }
470            let after_marker = &trimmed[2..];
471            // Task-list modifier: exactly `[x] ` (Done) or `[ ] ` (Todo) at
472            // the start of item content. Lowercase `x` only; one space; one
473            // marker length only. Anything else is plain inline content.
474            let (task, item_text, content_offset) =
475                if let Some(rest) = after_marker.strip_prefix("[x] ") {
476                    (Some(TaskState::Done), rest, 4u32)
477                } else if let Some(rest) = after_marker.strip_prefix("[ ] ") {
478                    (Some(TaskState::Todo), rest, 4u32)
479                } else {
480                    (None, after_marker, 0u32)
481                };
482            let (content, d) = parse_inline(
483                item_text,
484                tok.span.start + indent as u32 + 2 + content_offset,
485            );
486            self.diags.extend(d);
487            self.pos += 1;
488            let mut children: Vec<Block> = Vec::new();
489            self.skip_blanks();
490            if let TokenKind::Line(_) = &self.peek().kind {
491                if self.peek().indent >= indent + 2 {
492                    children = self.parse_blocks(indent + 2, Some(indent + 2));
493                }
494            }
495            items.push(ListItem {
496                content,
497                children,
498                task,
499                span: tok.span,
500            });
501        }
502        let span = items.iter().fold(start_span, |a, it| a.join(it.span));
503        Block::List {
504            ordered: false,
505            items,
506            span,
507        }
508    }
509
510    fn parse_ordered_list(&mut self, indent: u16) -> Block {
511        let start_span = self.peek().span;
512        let mut items: Vec<ListItem> = Vec::new();
513        let mut expected: u32 = 1;
514        loop {
515            if self.at_eof() {
516                break;
517            }
518            let tok = self.peek().clone();
519            let line = if let TokenKind::Line(ref s) = tok.kind {
520                s.clone()
521            } else {
522                break;
523            };
524            if tok.indent != indent {
525                break;
526            }
527            let trimmed = &line[indent as usize..];
528            let Some((num, marker_len)) = leading_ordered_marker(trimmed) else {
529                break;
530            };
531            if num != expected {
532                let span = Span::new(tok.span.start as usize + indent as usize, marker_len);
533                self.diags.push(
534                    Diagnostic::new(Code::OrderedListSequence, span)
535                        .label(format!("got `{}.`, expected `{}.`", num, expected))
536                        .help("ordered lists must number sequentially starting from 1"),
537                );
538            }
539            expected = expected.saturating_add(1);
540            let after_marker = &trimmed[marker_len..];
541            // Task-list modifier: exactly `[x] ` (Done) or `[ ] ` (Todo) at
542            // the start of item content. Lowercase `x` only; one space; one
543            // marker length only. Anything else is plain inline content.
544            let (task, item_text, content_offset) =
545                if let Some(rest) = after_marker.strip_prefix("[x] ") {
546                    (Some(TaskState::Done), rest, 4u32)
547                } else if let Some(rest) = after_marker.strip_prefix("[ ] ") {
548                    (Some(TaskState::Todo), rest, 4u32)
549                } else {
550                    (None, after_marker, 0u32)
551                };
552            let (content, d) = parse_inline(
553                item_text,
554                tok.span.start + indent as u32 + marker_len as u32 + content_offset,
555            );
556            self.diags.extend(d);
557            self.pos += 1;
558            let mut children: Vec<Block> = Vec::new();
559            self.skip_blanks();
560            if let TokenKind::Line(_) = &self.peek().kind {
561                if self.peek().indent >= indent + 2 {
562                    children = self.parse_blocks(indent + 2, Some(indent + 2));
563                }
564            }
565            items.push(ListItem {
566                content,
567                children,
568                task,
569                span: tok.span,
570            });
571        }
572        let span = items.iter().fold(start_span, |a, it| a.join(it.span));
573        Block::List {
574            ordered: true,
575            items,
576            span,
577        }
578    }
579
580    fn parse_blockquote(&mut self, indent: u16) -> Block {
581        let mut lines: Vec<(u8, String, Span)> = Vec::new();
582        let start = self.peek().span;
583        loop {
584            if self.at_eof() {
585                break;
586            }
587            let tok = self.peek().clone();
588            let line = if let TokenKind::Line(ref s) = tok.kind {
589                s.clone()
590            } else {
591                break;
592            };
593            if tok.indent != indent {
594                break;
595            }
596            let trimmed = &line[indent as usize..];
597            let mut depth: u8 = 0;
598            let mut idx = 0usize;
599            let bytes = trimmed.as_bytes();
600            while idx < bytes.len() && bytes[idx] == b'>' {
601                depth += 1;
602                idx += 1;
603            }
604            if depth == 0 {
605                break;
606            }
607            if bytes.get(idx) != Some(&b' ') {
608                self.diags.push(
609                    Diagnostic::new(Code::BadBlockquote, tok.span)
610                        .label("expected one space after `>`"),
611                );
612                self.pos += 1;
613                break;
614            }
615            let body = trimmed[idx + 1..].to_string();
616            lines.push((depth, body, tok.span));
617            self.pos += 1;
618        }
619        let (children, span) = build_blockquote(&lines, 1);
620        Block::Blockquote {
621            children,
622            span: if span == Span::DUMMY { start } else { span },
623        }
624    }
625
626    fn parse_table(&mut self, indent: u16) -> Block {
627        let directive = self.peek().clone();
628        let line = if let TokenKind::Line(ref s) = directive.kind {
629            s.clone()
630        } else {
631            unreachable!()
632        };
633        self.pos += 1;
634        let trimmed = &line[indent as usize..];
635        let mut cursor = 2usize;
636        let args = if trimmed.as_bytes().get(cursor) == Some(&b'(') {
637            match parse_args(trimmed, &mut cursor) {
638                Ok(a) => a,
639                Err(d) => {
640                    self.diags.push(d);
641                    ShortArgs::default()
642                }
643            }
644        } else {
645            ShortArgs::default()
646        };
647        let mut rows: Vec<Row> = Vec::new();
648        loop {
649            if self.at_eof() {
650                break;
651            }
652            let tok = self.peek().clone();
653            let row_line = if let TokenKind::Line(ref s) = tok.kind {
654                s.clone()
655            } else {
656                break;
657            };
658            let trimmed = row_line.trim_start();
659            if !trimmed.starts_with('|') {
660                break;
661            }
662            let split = split_cells(trimmed);
663            if let Some(rel) = split.unclosed_backtick_at {
664                // `rel` is relative to `trimmed`. The token spans `row_line`, which
665                // includes leading whitespace; account for that when computing the
666                // absolute offset into the SourceMap.
667                let leading_ws = row_line.len() - trimmed.len();
668                debug_assert!(
669                    rel < trimmed.len(),
670                    "unclosed_backtick_at {} out of bounds for trimmed (len {})",
671                    rel,
672                    trimmed.len()
673                );
674                let abs = tok.span.start as usize + leading_ws + rel;
675                self.diags.push(
676                    Diagnostic::new(Code::UnterminatedCode, Span::new(abs, 1))
677                        .label("inline code span never closed inside a table row"),
678                );
679                self.pos += 1;
680                continue;
681            }
682            let cells = split.cells;
683            let mut parsed_cells: Vec<Vec<Inline>> = Vec::new();
684            for c in cells {
685                let (inl, d) = parse_inline(c.trim(), tok.span.start);
686                self.diags.extend(d);
687                parsed_cells.push(inl);
688            }
689            rows.push(Row {
690                cells: parsed_cells,
691                span: tok.span,
692            });
693            self.pos += 1;
694        }
695        if rows.is_empty() {
696            self.diags.push(
697                Diagnostic::new(Code::StrayContent, directive.span)
698                    .label("`@t` must be followed by at least a header row"),
699            );
700            return Block::Paragraph {
701                content: vec![],
702                span: directive.span,
703            };
704        }
705        let header = rows.remove(0);
706        let cols = header.cells.len();
707        for r in &rows {
708            if r.cells.len() != cols {
709                self.diags.push(
710                    Diagnostic::new(Code::TableColumnMismatch, r.span).label(format!(
711                        "table row has {} cells, expected {}",
712                        r.cells.len(),
713                        cols
714                    )),
715                );
716            }
717        }
718        if let Some(crate::shortcode::ArgValue::Array(a)) = args.keyword.get("align") {
719            if a.len() != cols {
720                self.diags.push(
721                    Diagnostic::new(Code::AlignArrayLength, directive.span).label(format!(
722                        "`align` has {} entries but table has {} columns",
723                        a.len(),
724                        cols
725                    )),
726                );
727            }
728        }
729        let span = rows
730            .iter()
731            .fold(directive.span.join(header.span), |a, r| a.join(r.span));
732        Block::Table {
733            args,
734            header,
735            rows,
736            span,
737        }
738    }
739
740    fn parse_definition_list(&mut self, indent: u16) -> Block {
741        use crate::ast::DefinitionItem;
742        let directive = self.peek().clone();
743        let line = if let TokenKind::Line(ref s) = directive.kind {
744            s.clone()
745        } else {
746            unreachable!()
747        };
748        self.pos += 1;
749        let trimmed = &line[indent as usize..];
750        let mut cursor = 3usize;
751        let args = if trimmed.as_bytes().get(cursor) == Some(&b'(') {
752            match parse_args(trimmed, &mut cursor) {
753                Ok(a) => a,
754                Err(d) => {
755                    self.diags.push(d);
756                    ShortArgs::default()
757                }
758            }
759        } else {
760            ShortArgs::default()
761        };
762
763        let mut items: Vec<DefinitionItem> = Vec::new();
764        let mut pending_term: Option<(Vec<Inline>, Span)> = None;
765        // (text accumulator, base offset of first line, span covering the
766        // definition's lines).
767        let mut pending_def: Option<(String, u32, Span)> = None;
768        let cont_indent = indent + 2;
769        let mut end_span = directive.span;
770
771        // Closes any open definition into items, paired with the pending term.
772        let finalize_def = |items: &mut Vec<DefinitionItem>,
773                            pending_term: &mut Option<(Vec<Inline>, Span)>,
774                            pending_def: &mut Option<(String, u32, Span)>,
775                            diags: &mut Vec<Diagnostic>| {
776            if let Some((text, base, span)) = pending_def.take() {
777                let term_pair = pending_term.take();
778                let (def_inl, dd) = parse_inline(&text, base);
779                diags.extend(dd);
780                if let Some((term, t_span)) = term_pair {
781                    let pair_span = t_span.join(span);
782                    items.push(DefinitionItem {
783                        term,
784                        definition: def_inl,
785                        span: pair_span,
786                    });
787                } else {
788                    // Should not happen — a definition without a term is
789                    // caught at the `: ` line itself.
790                }
791            }
792        };
793
794        loop {
795            if self.at_eof() {
796                self.diags.push(
797                    Diagnostic::new(Code::UnterminatedBlock, directive.span)
798                        .label("`@dl` block was never closed with `@end`"),
799                );
800                break;
801            }
802            let tok = self.peek().clone();
803            match tok.kind {
804                TokenKind::Eof => {
805                    self.diags.push(
806                        Diagnostic::new(Code::UnterminatedBlock, directive.span)
807                            .label("`@dl` block was never closed with `@end`"),
808                    );
809                    break;
810                }
811                TokenKind::Blank => {
812                    finalize_def(
813                        &mut items,
814                        &mut pending_term,
815                        &mut pending_def,
816                        &mut self.diags,
817                    );
818                    self.pos += 1;
819                    continue;
820                }
821                TokenKind::Line(ref s) => {
822                    if let Some(pd) = pending_def.as_mut()
823                        && tok.indent == cont_indent
824                    {
825                        // Continuation of the active definition.
826                        let body = &s[cont_indent as usize..];
827                        pd.0.push(' ');
828                        pd.0.push_str(body);
829                        pd.2 = pd.2.join(tok.span);
830                        self.pos += 1;
831                        continue;
832                    }
833                    if tok.indent != indent {
834                        // Anything else not at @dl's indent inside a `@dl`
835                        // body is unexpected — skip it; subsequent tasks
836                        // refine error reporting.
837                        self.pos += 1;
838                        continue;
839                    }
840                    let body = &s[indent as usize..];
841                    if body.trim() == "@end" {
842                        finalize_def(
843                            &mut items,
844                            &mut pending_term,
845                            &mut pending_def,
846                            &mut self.diags,
847                        );
848                        end_span = tok.span;
849                        self.pos += 1;
850                        break;
851                    }
852                    if let Some(rest) = body.strip_prefix(": ") {
853                        if pending_term.is_none() && pending_def.is_none() {
854                            self.diags.push(
855                                Diagnostic::new(Code::BadDefinitionList, tok.span)
856                                    .label("definition without a term"),
857                            );
858                            self.pos += 1;
859                            continue;
860                        }
861                        if pending_def.is_some() {
862                            self.diags.push(
863                                Diagnostic::new(Code::BadDefinitionList, tok.span).label(
864                                    "multiple definitions per term are not supported in v0.3",
865                                ),
866                            );
867                            // Drop the duplicate definition: do not consume
868                            // pending_term, do not start a new pending_def.
869                            self.pos += 1;
870                            continue;
871                        }
872                        let base = tok.span.start + indent as u32 + 2;
873                        pending_def = Some((rest.to_string(), base, tok.span));
874                        self.pos += 1;
875                    } else {
876                        // Term line.
877                        finalize_def(
878                            &mut items,
879                            &mut pending_term,
880                            &mut pending_def,
881                            &mut self.diags,
882                        );
883                        if let Some((_t, t_span)) = pending_term.take() {
884                            self.diags.push(
885                                Diagnostic::new(Code::BadDefinitionList, t_span)
886                                    .label("term without a definition"),
887                            );
888                        }
889                        let base = tok.span.start + indent as u32;
890                        let (term, td) = parse_inline(body, base);
891                        self.diags.extend(td);
892                        pending_term = Some((term, tok.span));
893                        self.pos += 1;
894                    }
895                }
896            }
897        }
898
899        // Final flush after EOF or @end.
900        finalize_def(
901            &mut items,
902            &mut pending_term,
903            &mut pending_def,
904            &mut self.diags,
905        );
906        if let Some((_t, t_span)) = pending_term {
907            self.diags.push(
908                Diagnostic::new(Code::BadDefinitionList, t_span).label("term without a definition"),
909            );
910        }
911        if items.is_empty() && !self.diags.iter().any(|d| d.code == Code::BadDefinitionList) {
912            self.diags.push(
913                Diagnostic::new(Code::BadDefinitionList, directive.span)
914                    .label("`@dl` must contain at least one term/definition pair"),
915            );
916        }
917
918        Block::DefinitionList {
919            args,
920            items,
921            span: directive.span.join(end_span),
922        }
923    }
924
925    fn parse_block_shortcode_or_inline(&mut self, indent: u16) -> Option<Block> {
926        let tok = self.peek().clone();
927        let line = if let TokenKind::Line(ref s) = tok.kind {
928            s.clone()
929        } else {
930            return None;
931        };
932        let trimmed = &line[indent as usize..];
933        let mut cursor = 1usize;
934        let bytes = trimmed.as_bytes();
935        if cursor >= bytes.len() || !bytes[cursor].is_ascii_alphabetic() {
936            return None;
937        }
938        let name_start = cursor;
939        while cursor < bytes.len()
940            && (bytes[cursor].is_ascii_alphanumeric() || bytes[cursor] == b'-')
941        {
942            cursor += 1;
943        }
944        let name = trimmed[name_start..cursor].to_string();
945        let mut args = ShortArgs::default();
946        if bytes.get(cursor) == Some(&b'(') {
947            match parse_args(trimmed, &mut cursor) {
948                Ok(a) => args = a,
949                Err(d) => self.diags.push(d),
950            }
951        }
952        if !trimmed[cursor..].trim().is_empty() {
953            return None;
954        }
955        self.pos += 1;
956        let children = self.parse_blocks(indent, Some(indent));
957        let mut end_span = tok.span;
958        match &self.peek().kind {
959            TokenKind::Line(s) if s.trim() == "@end" && self.peek().indent == indent => {
960                end_span = self.peek().span;
961                self.pos += 1;
962            }
963            _ => {
964                self.diags.push(
965                    Diagnostic::new(Code::UnterminatedBlock, tok.span)
966                        .label(format!("`@{}` block was never closed with `@end`", name)),
967                );
968            }
969        }
970        Some(Block::BlockShortcode {
971            name,
972            args,
973            children,
974            span: tok.span.join(end_span),
975        })
976    }
977
978    fn skip_blanks(&mut self) {
979        while matches!(self.peek().kind, TokenKind::Blank) {
980            self.pos += 1;
981        }
982    }
983}
984
985fn build_blockquote(items: &[(u8, String, Span)], depth: u8) -> (Vec<Block>, Span) {
986    let mut paras: Vec<Block> = Vec::new();
987    let mut full_span = Span::DUMMY;
988    let mut i = 0;
989    while i < items.len() {
990        let (d, body, span) = &items[i];
991        if *d < depth {
992            break;
993        }
994        full_span = if full_span == Span::DUMMY {
995            *span
996        } else {
997            full_span.join(*span)
998        };
999        if *d == depth {
1000            let (content, _) = parse_inline(body, span.start);
1001            paras.push(Block::Paragraph {
1002                content,
1003                span: *span,
1004            });
1005            i += 1;
1006        } else {
1007            let mut j = i;
1008            while j < items.len() && items[j].0 > depth {
1009                j += 1;
1010            }
1011            let (children, child_span) = build_blockquote(&items[i..j], depth + 1);
1012            paras.push(Block::Blockquote {
1013                children,
1014                span: child_span,
1015            });
1016            i = j;
1017        }
1018    }
1019    (paras, full_span)
1020}
1021
1022fn parse_fence_info(
1023    after: &str,
1024    base: u32,
1025    fence_span: Span,
1026    diags: &mut Vec<Diagnostic>,
1027) -> (Option<String>, CodeAttrs) {
1028    // The info string follows the opening ```; the language tag is the first
1029    // whitespace-separated token, and any subsequent `@<ident>` tokens are
1030    // fence attributes (v0.2: `@nominify`, `@minify`).
1031    let mut attrs = CodeAttrs::default();
1032    let bytes = after.as_bytes();
1033    let mut i = 0usize;
1034    while i < bytes.len() && bytes[i] == b' ' {
1035        i += 1;
1036    }
1037    if i == bytes.len() {
1038        return (None, attrs);
1039    }
1040    let lang_start = i;
1041    while i < bytes.len() && bytes[i] != b' ' {
1042        i += 1;
1043    }
1044    let lang_tok = &after[lang_start..i];
1045    let lang = if lang_tok.is_empty() {
1046        None
1047    } else if lang_tok.starts_with('@') {
1048        // The first non-whitespace token is an attribute, not a language.
1049        i = lang_start;
1050        None
1051    } else {
1052        Some(lang_tok.to_string())
1053    };
1054
1055    while i < bytes.len() {
1056        while i < bytes.len() && bytes[i] == b' ' {
1057            i += 1;
1058        }
1059        if i >= bytes.len() {
1060            break;
1061        }
1062        let tok_start = i;
1063        while i < bytes.len() && bytes[i] != b' ' {
1064            i += 1;
1065        }
1066        let tok = &after[tok_start..i];
1067        if tok.is_empty() {
1068            continue;
1069        }
1070        let tok_span = Span::new(base as usize + tok_start, tok.len());
1071        if !tok.starts_with('@') {
1072            diags.push(
1073                Diagnostic::new(Code::UnknownCodeAttribute, tok_span)
1074                    .label(format!("`{}` is not a valid code-fence attribute", tok))
1075                    .help("attributes must be `@`-prefixed identifiers (e.g. `@nominify`)"),
1076            );
1077            continue;
1078        }
1079        let name = &tok[1..];
1080        match name {
1081            "nominify" => {
1082                if attrs.minify || attrs.keep_comments {
1083                    diags.push(
1084                        Diagnostic::new(Code::ConflictingCodeAttributes, fence_span)
1085                            .label("`@nominify` conflicts with `@minify`/`@minify-keep-comments`"),
1086                    );
1087                }
1088                attrs.nominify = true;
1089            }
1090            "minify" => {
1091                if attrs.nominify {
1092                    diags.push(
1093                        Diagnostic::new(Code::ConflictingCodeAttributes, fence_span)
1094                            .label("`@nominify` and `@minify` cannot both be set"),
1095                    );
1096                }
1097                attrs.minify = true;
1098            }
1099            "minify-keep-comments" => {
1100                if attrs.nominify {
1101                    diags.push(
1102                        Diagnostic::new(Code::ConflictingCodeAttributes, fence_span)
1103                            .label("`@nominify` and `@minify-keep-comments` cannot both be set"),
1104                    );
1105                }
1106                attrs.keep_comments = true;
1107            }
1108            _ => {
1109                diags.push(
1110                    Diagnostic::new(Code::UnknownCodeAttribute, tok_span)
1111                        .label(format!("unknown code-fence attribute `{}`", tok))
1112                        .help("v0.3 supports `@nominify`, `@minify`, `@minify-keep-comments`"),
1113                );
1114            }
1115        }
1116    }
1117    (lang, attrs)
1118}
1119
1120/// Parse a trailing `{#anchor}` block from heading text.
1121///
1122/// Returns `(text_to_use_for_inline_parse, anchor_name)`. If the anchor
1123/// block is present but malformed, a diagnostic is pushed and `anchor` is
1124/// `None`, but the malformed block is still stripped from `text_to_use`
1125/// where possible.
1126///
1127/// Anchor syntax is triggered whenever `{#` appears in the heading text.
1128/// If `{#` is present but the format does not match the strict form
1129/// (` {#name}` at end of line, name `[a-z0-9-]+`), it is a `BadHeadingAnchor`
1130/// error.
1131///
1132/// Exception: `{#` in the middle of text with NO closing `}` at
1133/// end-of-line is treated as plain text — it's clearly not intended as
1134/// anchor syntax.
1135///
1136/// `base` is the byte offset of `text` in the source, used for diagnostics.
1137fn parse_heading_anchor<'a>(
1138    text: &'a str,
1139    base: u32,
1140    diags: &mut Vec<Diagnostic>,
1141) -> (&'a str, Option<String>) {
1142    // Quick exit: if there's no `{#` anywhere, no anchor syntax is possible.
1143    if !text.contains("{#") {
1144        return (text, None);
1145    }
1146
1147    // Find the last `{#` occurrence (there can be at most one anchor block).
1148    let hash_open = match text.rfind("{#") {
1149        Some(i) => i,
1150        None => return (text, None),
1151    };
1152
1153    // Check if the `}` at end-of-line closes this `{#`.
1154    // Case A: `{#name}` at end of line (the only valid form).
1155    // Case B: `{#name}` NOT at end of line (content after `}`) → malformed.
1156    // Case C: no `}` after the `{#` at all → the `{#` is inside text, leave alone.
1157
1158    let after_hash = &text[hash_open..];
1159    let rbrace = match after_hash.find('}') {
1160        Some(i) => i,
1161        None => {
1162            // `{#` with no closing `}` anywhere → plain text, no error.
1163            return (text, None);
1164        }
1165    };
1166
1167    let candidate = &after_hash[..rbrace + 1]; // e.g. `{#abc}`
1168    let after_candidate = &after_hash[rbrace + 1..]; // what comes after `}`
1169
1170    // If there is content after the `}`, the anchor block is NOT at end of line.
1171    // This is a BadHeadingAnchor (rule: no content after `}`).
1172    if !after_candidate.is_empty() {
1173        let anchor_span = Span::new(base as usize + hash_open, candidate.len());
1174        diags.push(
1175            Diagnostic::new(Code::BadHeadingAnchor, anchor_span)
1176                .label("anchor block must be `{#anchor}` with exactly one space before `{` and no content after `}`"),
1177        );
1178        // Leave the heading text alone (don't strip anything).
1179        return (text, None);
1180    }
1181
1182    // The candidate `{#...}` IS at end of line.
1183    // Validate: exactly one space before `{`.
1184    let before = &text[..hash_open];
1185
1186    let malformed = if before.is_empty() {
1187        // No content before `{#...}` → no space before `{`.
1188        true
1189    } else {
1190        let last_ch = before.chars().last().unwrap();
1191        if last_ch != ' ' {
1192            // No space before `{`
1193            true
1194        } else {
1195            // Check for double space (before ends with "  ")
1196            let before_trim = &before[..before.len() - 1];
1197            before_trim.ends_with(' ')
1198        }
1199    };
1200
1201    // Extract the name (everything between `{#` and `}`).
1202    let name_part = &candidate[2..candidate.len() - 1]; // strip `{#` and `}`
1203
1204    let anchor_span_start = base as usize + hash_open;
1205    let anchor_span = Span::new(anchor_span_start, candidate.len());
1206
1207    if malformed {
1208        diags.push(
1209            Diagnostic::new(Code::BadHeadingAnchor, anchor_span)
1210                .label("anchor block must be `{#anchor}` with exactly one space before `{` and no content after `}`"),
1211        );
1212        // Strip the malformed block from text.
1213        return (&text[..hash_open], None);
1214    }
1215
1216    // Validate the name: `[a-z0-9-]+`, non-empty.
1217    let name_is_valid = !name_part.is_empty()
1218        && name_part
1219            .bytes()
1220            .all(|b| b.is_ascii_lowercase() || b.is_ascii_digit() || b == b'-');
1221
1222    // Strip ` {#name}` from the text (one space + anchor block).
1223    // `hash_open - 1` skips the single space before `{`.
1224    let stripped = &text[..hash_open - 1];
1225
1226    if !name_is_valid {
1227        let name_span = Span::new(anchor_span_start + 2, name_part.len().max(1));
1228        diags.push(
1229            Diagnostic::new(Code::BadHeadingAnchor, name_span)
1230                .label("anchor must match `[a-z0-9-]+`")
1231                .help("use lowercase letters, digits, and hyphens only"),
1232        );
1233        return (stripped, None);
1234    }
1235
1236    (stripped, Some(name_part.to_string()))
1237}
1238
1239fn leading_block_sigil(s: &str) -> bool {
1240    if s.is_empty() {
1241        return false;
1242    }
1243    let b = s.as_bytes()[0];
1244    if b == b'#' || b == b'>' || b == b'|' || b == b'`' {
1245        return true;
1246    }
1247    if s == "---" {
1248        return true;
1249    }
1250    if s.starts_with("- ") {
1251        return true;
1252    }
1253    if leading_ordered_marker(s).is_some() {
1254        return true;
1255    }
1256    if s.starts_with("//") || s.starts_with("/*") {
1257        return true;
1258    }
1259    if s == "@end" || s.starts_with("@end ") {
1260        return true;
1261    }
1262    if b == b'@' {
1263        // a leading directive starts a block; non-directive @ inside text wouldn't appear at line start
1264        return true;
1265    }
1266    false
1267}
1268
1269fn leading_ordered_marker(s: &str) -> Option<(u32, usize)> {
1270    let bytes = s.as_bytes();
1271    let mut i = 0;
1272    while i < bytes.len() && bytes[i].is_ascii_digit() {
1273        i += 1;
1274    }
1275    if i == 0 {
1276        return None;
1277    }
1278    if bytes.get(i) != Some(&b'.') {
1279        return None;
1280    }
1281    if bytes.get(i + 1) != Some(&b' ') {
1282        return None;
1283    }
1284    let n: u32 = s[..i].parse().ok()?;
1285    Some((n, i + 2))
1286}
1287
1288/// Outcome of splitting a table row.
1289///
1290/// `cells` borrow from the slice that was passed in (the `line` argument
1291/// to `split_cells`). The caller is responsible for providing the line
1292/// in whatever frame it wants `unclosed_backtick_at` to be relative to —
1293/// see the doc comment on that field.
1294#[derive(Debug)]
1295struct RowSplit<'a> {
1296    cells: Vec<&'a str>,
1297    /// `Some(byte_offset)` if a backtick code span opened in this row
1298    /// and never closed. The offset is relative to **the input slice
1299    /// passed to `split_cells`** (i.e. the same `&str` whose address
1300    /// the cell slices are also relative to). Callers that need an
1301    /// absolute span into the source must add the offset of `line` from
1302    /// the source start themselves.
1303    unclosed_backtick_at: Option<usize>,
1304}
1305
1306fn split_cells(line: &str) -> RowSplit<'_> {
1307    let bytes = line.as_bytes();
1308    // The leading `|` is the row-opener, not a cell separator.
1309    let body_start = if bytes.first() == Some(&b'|') { 1 } else { 0 };
1310    let body = &line[body_start..];
1311    let body_bytes = body.as_bytes();
1312    let mut cells: Vec<&str> = Vec::new();
1313    let mut cell_start = 0usize;
1314    let mut i = 0usize;
1315    // Offset is relative to `line` (the parameter) — not to `body` —
1316    // so callers can add `tok.span.start + leading_ws` and get an
1317    // absolute byte offset into the source map.
1318    let mut unclosed: Option<usize> = None;
1319    while i < body_bytes.len() {
1320        let b = body_bytes[i];
1321        if b == b'\\' {
1322            // Escape: skip the backslash and one following byte. The
1323            // body is later passed to parse_inline which handles UTF-8;
1324            // here we just need to *not* re-trigger on the escaped byte.
1325            i += 1;
1326            if i < body_bytes.len() {
1327                i += 1;
1328            }
1329            continue;
1330        }
1331        if b == b'`' {
1332            // Span length: 1 or 2 backticks. Matches the inline parser
1333            // (see `inline.rs::parse_code`).
1334            let ticks = if body_bytes.get(i + 1) == Some(&b'`') {
1335                2
1336            } else {
1337                1
1338            };
1339            let span_open = i;
1340            let needle: &[u8] = if ticks == 2 { b"``" } else { b"`" };
1341            let mut j = i + ticks;
1342            let mut closed = false;
1343            while j + ticks <= body_bytes.len() {
1344                if &body_bytes[j..j + ticks] == needle {
1345                    j += ticks;
1346                    closed = true;
1347                    break;
1348                }
1349                j += 1;
1350            }
1351            if !closed {
1352                // Record the unclosed-backtick offset *relative to
1353                // `line`* (so we add `body_start`, since `span_open` is
1354                // relative to `body`). Stop splitting; the caller will
1355                // emit a single Code::UnterminatedCode diagnostic.
1356                unclosed = Some(body_start + span_open);
1357                break;
1358            }
1359            i = j;
1360            continue;
1361        }
1362        if b == b'|' {
1363            cells.push(&body[cell_start..i]);
1364            cell_start = i + 1;
1365            i += 1;
1366            continue;
1367        }
1368        i += 1;
1369    }
1370    if unclosed.is_none() {
1371        // Push the final cell. Strip a trailing empty cell only when the
1372        // *source* used a `|` as a row-closer (cell_start lands right
1373        // after the trailing `|`, leaving an empty slice).
1374        let last = &body[cell_start..];
1375        if !(last.is_empty() && cell_start > 0 && body_bytes[cell_start - 1] == b'|') {
1376            cells.push(last);
1377        }
1378    }
1379    let trimmed: Vec<&str> = cells.into_iter().map(str::trim).collect();
1380    RowSplit {
1381        cells: trimmed,
1382        unclosed_backtick_at: unclosed,
1383    }
1384}
1385
1386#[cfg(test)]
1387mod tests {
1388    use super::*;
1389    use crate::lexer::lex;
1390
1391    fn p(s: &str) -> (Document, Vec<Diagnostic>) {
1392        let src = SourceMap::new("d.brf", s);
1393        let toks = lex(&src).unwrap();
1394        parse(toks, &src)
1395    }
1396
1397    #[test]
1398    fn heading_levels() {
1399        let (doc, d) = p("# A\n## B\n");
1400        assert!(d.is_empty(), "{:?}", d);
1401        assert_eq!(doc.blocks.len(), 2);
1402        if let Block::Heading { level, .. } = doc.blocks[0] {
1403            assert_eq!(level, 1);
1404        }
1405        if let Block::Heading { level, .. } = doc.blocks[1] {
1406            assert_eq!(level, 2);
1407        }
1408    }
1409
1410    #[test]
1411    fn heading_too_deep() {
1412        let (_, d) = p("####### x\n");
1413        assert!(d.iter().any(|x| x.code == Code::HeadingTooDeep));
1414    }
1415
1416    #[test]
1417    fn ordered_sequence() {
1418        let (_, d) = p("1. one\n3. three\n");
1419        assert!(d.iter().any(|x| x.code == Code::OrderedListSequence));
1420    }
1421
1422    #[test]
1423    fn ordered_ok() {
1424        let (doc, d) = p("1. one\n2. two\n");
1425        assert!(d.is_empty(), "{:?}", d);
1426        assert!(matches!(doc.blocks[0], Block::List { ordered: true, .. }));
1427    }
1428
1429    #[test]
1430    fn unordered_nested() {
1431        let (doc, d) = p("- a\n  - a1\n- b\n");
1432        assert!(d.is_empty(), "{:?}", d);
1433        if let Block::List { items, .. } = &doc.blocks[0] {
1434            assert_eq!(items.len(), 2);
1435            assert_eq!(items[0].children.len(), 1);
1436        } else {
1437            panic!();
1438        }
1439    }
1440
1441    #[test]
1442    fn paragraph_join() {
1443        let (doc, d) = p("one\ntwo\n");
1444        assert!(d.is_empty());
1445        if let Block::Paragraph { content, .. } = &doc.blocks[0] {
1446            if let Inline::Text { value, .. } = &content[0] {
1447                assert_eq!(value, "one two");
1448            }
1449        }
1450    }
1451
1452    #[test]
1453    fn code_block() {
1454        let (doc, d) = p("```rust\nfn x() {}\n```\n");
1455        assert!(d.is_empty(), "{:?}", d);
1456        if let Block::CodeBlock {
1457            lang, body, attrs, ..
1458        } = &doc.blocks[0]
1459        {
1460            assert_eq!(lang.as_deref(), Some("rust"));
1461            assert_eq!(body, "fn x() {}");
1462            assert_eq!(*attrs, CodeAttrs::default());
1463        } else {
1464            panic!();
1465        }
1466    }
1467
1468    #[test]
1469    fn code_fence_nominify_attr() {
1470        let (doc, d) = p("```json @nominify\n{\"a\":1}\n```\n");
1471        assert!(d.is_empty(), "{:?}", d);
1472        if let Block::CodeBlock { lang, attrs, .. } = &doc.blocks[0] {
1473            assert_eq!(lang.as_deref(), Some("json"));
1474            assert!(attrs.nominify);
1475            assert!(!attrs.minify);
1476        } else {
1477            panic!();
1478        }
1479    }
1480
1481    #[test]
1482    fn code_fence_minify_attr() {
1483        let (doc, d) = p("```rust @minify\nfn x() {}\n```\n");
1484        assert!(d.is_empty(), "{:?}", d);
1485        if let Block::CodeBlock { lang, attrs, .. } = &doc.blocks[0] {
1486            assert_eq!(lang.as_deref(), Some("rust"));
1487            assert!(attrs.minify);
1488        } else {
1489            panic!();
1490        }
1491    }
1492
1493    #[test]
1494    fn code_fence_unknown_attr_errors() {
1495        let (_, d) = p("```json @bogus\n{}\n```\n");
1496        assert!(
1497            d.iter().any(|x| x.code == Code::UnknownCodeAttribute),
1498            "{:?}",
1499            d
1500        );
1501    }
1502
1503    #[test]
1504    fn code_fence_attr_without_at_sigil_errors() {
1505        let (_, d) = p("```json bogus\n{}\n```\n");
1506        assert!(
1507            d.iter().any(|x| x.code == Code::UnknownCodeAttribute),
1508            "{:?}",
1509            d
1510        );
1511    }
1512
1513    #[test]
1514    fn code_fence_conflicting_attrs() {
1515        let (_, d) = p("```json @nominify @minify\n{}\n```\n");
1516        assert!(
1517            d.iter().any(|x| x.code == Code::ConflictingCodeAttributes),
1518            "{:?}",
1519            d
1520        );
1521    }
1522
1523    #[test]
1524    fn code_fence_attr_only_no_lang() {
1525        // An attribute on the first non-whitespace position means the block
1526        // has no language; the attribute still parses.
1527        let (doc, d) = p("``` @nominify\nbody\n```\n");
1528        assert!(d.is_empty(), "{:?}", d);
1529        if let Block::CodeBlock { lang, attrs, .. } = &doc.blocks[0] {
1530            assert!(lang.is_none());
1531            assert!(attrs.nominify);
1532        } else {
1533            panic!();
1534        }
1535    }
1536
1537    #[test]
1538    fn table_basic() {
1539        let (doc, d) = p("@t\n| A | B\n| 1 | 2\n");
1540        assert!(d.is_empty(), "{:?}", d);
1541        if let Block::Table { rows, .. } = &doc.blocks[0] {
1542            assert_eq!(rows.len(), 1);
1543        } else {
1544            panic!("{:?}", doc.blocks);
1545        }
1546    }
1547
1548    #[test]
1549    fn table_column_mismatch() {
1550        let (_, d) = p("@t\n| A | B | C\n| 1 | 2\n");
1551        assert!(d.iter().any(|x| x.code == Code::TableColumnMismatch));
1552    }
1553
1554    #[test]
1555    fn table_pipe_inside_inline_code_span_is_not_a_separator() {
1556        // Production report issue #2: documenting `|>` should not blow up
1557        // the row's column count.
1558        let (doc, d) = p("@t\n| Op | Meaning\n| `|>` | pipeline\n");
1559        assert!(d.is_empty(), "{:?}", d);
1560        if let crate::ast::Block::Table { rows, .. } = &doc.blocks[0] {
1561            assert_eq!(rows.len(), 1, "{:?}", rows);
1562            assert_eq!(rows[0].cells.len(), 2);
1563        } else {
1564            panic!("expected table");
1565        }
1566    }
1567
1568    #[test]
1569    fn table_pipe_inside_double_backtick_span_is_not_a_separator() {
1570        let (doc, d) = p("@t\n| A | B\n| ``a ` b | c`` | d\n");
1571        assert!(d.is_empty(), "{:?}", d);
1572        if let crate::ast::Block::Table { rows, .. } = &doc.blocks[0] {
1573            assert_eq!(rows.len(), 1);
1574            assert_eq!(rows[0].cells.len(), 2);
1575        } else {
1576            panic!();
1577        }
1578    }
1579
1580    #[test]
1581    fn table_unclosed_backtick_in_row_reports_unterminated_code_not_column_mismatch() {
1582        let (_doc, d) = p("@t\n| A | B\n| `oops | c\n");
1583        assert!(
1584            d.iter().any(|x| x.code == Code::UnterminatedCode),
1585            "{:?}",
1586            d
1587        );
1588        assert!(
1589            !d.iter().any(|x| x.code == Code::TableColumnMismatch),
1590            "{:?}",
1591            d
1592        );
1593    }
1594
1595    #[test]
1596    fn table_unclosed_backtick_with_indented_row_diagnostic_anchors_correctly() {
1597        // Two-space-indented `@t` (legal — tables can appear inside
1598        // indented contexts). Diagnostic offset must include the leading
1599        // whitespace, not just the post-trim column.
1600        let (_doc, d) = p("  @t\n  | A | B\n  | `oops | c\n");
1601        let unterm: Vec<_> = d
1602            .iter()
1603            .filter(|x| x.code == Code::UnterminatedCode)
1604            .collect();
1605        assert_eq!(unterm.len(), 1, "{:?}", d);
1606    }
1607
1608    #[test]
1609    fn block_shortcode() {
1610        let (doc, d) = p("@callout(kind: warning)\nbody\n@end\n");
1611        assert!(d.is_empty(), "{:?}", d);
1612        assert!(matches!(doc.blocks[0], Block::BlockShortcode { .. }));
1613    }
1614
1615    #[test]
1616    fn hr() {
1617        let (doc, _) = p("---\n");
1618        assert!(matches!(doc.blocks[0], Block::HorizontalRule { .. }));
1619    }
1620
1621    #[test]
1622    fn frontmatter_basic() {
1623        let input = "+++\ntitle = \"hi\"\nn = 3\n+++\n# Doc\n";
1624        let (doc, d) = p(input);
1625        assert!(d.is_empty(), "{:?}", d);
1626        let meta = doc.metadata.as_ref().expect("metadata present");
1627        assert_eq!(meta.get("title").and_then(|v| v.as_str()), Some("hi"));
1628        assert_eq!(meta.get("n").and_then(|v| v.as_integer()), Some(3));
1629        assert_eq!(doc.blocks.len(), 1);
1630        assert!(matches!(doc.blocks[0], Block::Heading { level: 1, .. }));
1631    }
1632
1633    #[test]
1634    fn frontmatter_empty_table() {
1635        let (doc, d) = p("+++\n+++\n");
1636        assert!(d.is_empty(), "{:?}", d);
1637        let meta = doc.metadata.as_ref().expect("metadata present");
1638        assert!(meta.is_empty());
1639        assert!(doc.blocks.is_empty());
1640    }
1641
1642    #[test]
1643    fn frontmatter_unterminated() {
1644        let (_, d) = p("+++\nfoo = 1\n");
1645        assert!(
1646            d.iter().any(|x| x.code == Code::UnterminatedFrontmatter),
1647            "{:?}",
1648            d
1649        );
1650    }
1651
1652    #[test]
1653    fn frontmatter_bad_toml() {
1654        let (_, d) = p("+++\nfoo === 1\n+++\n");
1655        assert!(d.iter().any(|x| x.code == Code::FrontmatterToml), "{:?}", d);
1656    }
1657
1658    #[test]
1659    fn frontmatter_only_first_line() {
1660        // Leading blank line means the document does not start with `+++`,
1661        // so this is a stray paragraph, not frontmatter.
1662        let (doc, _d) = p("\n+++\nfoo = 1\n+++\n");
1663        assert!(doc.metadata.is_none());
1664    }
1665
1666    #[test]
1667    fn frontmatter_indented_is_not_frontmatter() {
1668        // Leading spaces on the opening line mean it's not a delimiter.
1669        let (doc, _d) = p("  +++\nfoo = 1\n+++\n");
1670        assert!(doc.metadata.is_none());
1671    }
1672
1673    #[test]
1674    fn frontmatter_no_open_means_none() {
1675        let (doc, _d) = p("# Heading\n");
1676        assert!(doc.metadata.is_none());
1677    }
1678
1679    #[test]
1680    fn frontmatter_crlf() {
1681        let input = "+++\r\ntitle = \"hi\"\r\n+++\r\n# Doc\r\n";
1682        let (doc, d) = p(input);
1683        assert!(d.is_empty(), "{:?}", d);
1684        let meta = doc.metadata.as_ref().expect("metadata present");
1685        assert_eq!(meta.get("title").and_then(|v| v.as_str()), Some("hi"));
1686    }
1687
1688    #[test]
1689    fn dl_basic_two_pairs() {
1690        let (doc, d) = p("@dl\nTerm 1\n: Definition 1.\nTerm 2\n: Definition 2.\n@end\n");
1691        assert!(d.is_empty(), "{:?}", d);
1692        let dl = match &doc.blocks[0] {
1693            Block::DefinitionList { items, .. } => items,
1694            other => panic!("expected DefinitionList, got {:?}", other),
1695        };
1696        assert_eq!(dl.len(), 2);
1697        let term0 = match &dl[0].term[0] {
1698            Inline::Text { value, .. } => value.as_str(),
1699            _ => panic!("expected Text in term"),
1700        };
1701        let def0 = match &dl[0].definition[0] {
1702            Inline::Text { value, .. } => value.as_str(),
1703            _ => panic!("expected Text in definition"),
1704        };
1705        assert_eq!(term0, "Term 1");
1706        assert_eq!(def0, "Definition 1.");
1707        let term1 = match &dl[1].term[0] {
1708            Inline::Text { value, .. } => value.as_str(),
1709            _ => panic!("expected Text in term"),
1710        };
1711        assert_eq!(term1, "Term 2");
1712    }
1713
1714    #[test]
1715    fn dl_continuation_joins_with_space() {
1716        let input = "@dl\nTerm\n: Definition that\n  spans two lines.\n@end\n";
1717        let (doc, d) = p(input);
1718        assert!(d.is_empty(), "{:?}", d);
1719        let items = match &doc.blocks[0] {
1720            Block::DefinitionList { items, .. } => items,
1721            other => panic!("expected DefinitionList, got {:?}", other),
1722        };
1723        assert_eq!(items.len(), 1);
1724        let def_text = match &items[0].definition[0] {
1725            Inline::Text { value, .. } => value.as_str(),
1726            _ => panic!("expected Text"),
1727        };
1728        assert_eq!(def_text, "Definition that spans two lines.");
1729    }
1730
1731    #[test]
1732    fn dl_definition_without_term_is_b0505() {
1733        let (_, d) = p("@dl\n: Stray definition.\nTerm\n: Def.\n@end\n");
1734        assert!(
1735            d.iter().any(|x| x.code == Code::BadDefinitionList),
1736            "{:?}",
1737            d
1738        );
1739    }
1740
1741    #[test]
1742    fn dl_term_without_definition_is_b0505() {
1743        let (_, d) = p("@dl\nLonely term\n@end\n");
1744        assert!(
1745            d.iter().any(|x| x.code == Code::BadDefinitionList),
1746            "{:?}",
1747            d
1748        );
1749    }
1750
1751    #[test]
1752    fn dl_multiple_definitions_per_term_is_b0505() {
1753        let (_, d) = p("@dl\nTerm\n: First def.\n: Second def.\n@end\n");
1754        assert!(
1755            d.iter().any(|x| x.code == Code::BadDefinitionList),
1756            "{:?}",
1757            d
1758        );
1759    }
1760
1761    #[test]
1762    fn dl_empty_body_is_b0505() {
1763        let (_, d) = p("@dl\n@end\n");
1764        assert!(
1765            d.iter().any(|x| x.code == Code::BadDefinitionList),
1766            "{:?}",
1767            d
1768        );
1769    }
1770
1771    #[test]
1772    fn dl_unterminated_is_b0306() {
1773        let (_, d) = p("@dl\nTerm\n: Def.\n");
1774        assert!(
1775            d.iter().any(|x| x.code == Code::UnterminatedBlock),
1776            "{:?}",
1777            d
1778        );
1779    }
1780}