Skip to main content

carta_readers/
jira.rs

1//! Reader for Jira wiki markup — the line-oriented "text formatting notation" used in Jira
2//! issue fields and comments.
3//!
4//! Blocks are recognised by a line prefix (`hN.`, `bq.`, list markers, table pipes, `----`) or a
5//! paired brace macro (`{code}`, `{noformat}`, `{quote}`, `{panel}`). Inline markup — text effects
6//! with flanking delimiters, links, images, monospaced and coloured spans, anchors, symbols, and
7//! emoticons — is applied to the text of each line; markup does not span a line boundary.
8
9use carta_ast::{
10    Alignment, Attr, Block, Caption, Cell, ColSpec, ColWidth, Document, Inline, ListAttributes,
11    ListNumberDelim, ListNumberStyle, Row, Table, TableBody, TableFoot, TableHead, Target,
12    ToCompactString,
13};
14use carta_core::{Reader, ReaderOptions, Result};
15
16/// Parses Jira wiki markup into the document model.
17#[derive(Debug, Default, Clone, Copy)]
18pub struct JiraReader;
19
20impl Reader for JiraReader {
21    fn read(&self, input: &str, _options: &ReaderOptions) -> Result<Document> {
22        Ok(Document {
23            blocks: parse_blocks_from_str(input),
24            ..Document::default()
25        })
26    }
27}
28
29// ---------------------------------------------------------------------------
30// Block layer
31// ---------------------------------------------------------------------------
32
33fn parse_blocks_from_str(input: &str) -> Vec<Block> {
34    blocks_from_str(input, true, true)
35}
36
37/// Parses the content of a single list item as blocks. A list item carries full block structure —
38/// headings, rules, tables, blockquotes, and brace macros — but the stand-alone colour Div is not a
39/// list-item construct, so its marker lines stay literal text there.
40fn parse_list_item_blocks(input: &str) -> Vec<Block> {
41    blocks_from_str(input, false, true)
42}
43
44/// Parses the content of a single table cell as blocks. Lists and brace macros carry block
45/// structure, but a line whose prefix names a heading, blockquote, or horizontal rule stays
46/// paragraph text, and the stand-alone colour Div is not a cell construct.
47fn parse_table_cell(input: &str) -> Vec<Block> {
48    let mut blocks = blocks_from_str(input, false, false);
49    // A cell's own paragraphs carry no surrounding whitespace — so the text that resumes after a
50    // brace macro on the same line loses its leading space. The trim does not recurse: paragraphs
51    // nested inside a list or blockquote keep their own leading whitespace.
52    for block in &mut blocks {
53        if let Block::Para(inlines) = block {
54            trim_edge_whitespace(inlines);
55        }
56    }
57    blocks
58}
59
60/// Drops leading and trailing whitespace inlines (spaces and line breaks) from `inlines`.
61fn trim_edge_whitespace(inlines: &mut Vec<Inline>) {
62    let is_ws = |inline: &Inline| matches!(inline, Inline::Space | Inline::LineBreak);
63    while inlines.first().is_some_and(is_ws) {
64        inlines.remove(0);
65    }
66    while inlines.last().is_some_and(is_ws) {
67        inlines.pop();
68    }
69}
70
71fn blocks_from_str(input: &str, color_block: bool, line_prefix_blocks: bool) -> Vec<Block> {
72    // A carriage return is never a line separator or whitespace here: a `\r\n` pair collapses to a
73    // single line break and a lone `\r` is dropped, so every carriage return is removed up front.
74    let chars: Vec<char> = input.chars().filter(|&c| c != '\r').collect();
75    BlockParser {
76        chars: &chars,
77        pos: 0,
78        color_block,
79        line_prefix_blocks,
80    }
81    .parse_blocks()
82}
83
84#[derive(Clone, Copy, PartialEq, Eq)]
85enum MacroKind {
86    Code,
87    Noformat,
88    Quote,
89    Panel,
90}
91
92struct BlockParser<'a> {
93    chars: &'a [char],
94    pos: usize,
95    /// Whether a stand-alone `{color:…}`/`{color}` pair forms a block-level coloured `Div`. Disabled
96    /// while parsing a list item's content, where those lines stay literal text.
97    color_block: bool,
98    /// Whether a line whose prefix names a block — a heading (`hN.`), a blockquote (`bq.`), or a
99    /// horizontal rule (`----`) — is recognised as that block. Disabled inside a table cell, where
100    /// only lists and brace macros carry block structure and such lines stay paragraph text.
101    line_prefix_blocks: bool,
102}
103
104impl BlockParser<'_> {
105    fn len(&self) -> usize {
106        self.chars.len()
107    }
108
109    fn at_end(&self) -> bool {
110        self.pos >= self.len()
111    }
112
113    /// Index of the newline at or after `from`, or the input length when none remains.
114    fn line_end_from(&self, from: usize) -> usize {
115        let mut j = from;
116        while j < self.len() && self.chars.get(j) != Some(&'\n') {
117            j += 1;
118        }
119        j
120    }
121
122    fn line_end(&self) -> usize {
123        self.line_end_from(self.pos)
124    }
125
126    fn is_blank(&self, start: usize, end: usize) -> bool {
127        (start..end).all(|k| self.chars.get(k).is_some_and(|&c| is_space(c)))
128    }
129
130    fn advance_line(&mut self) {
131        let e = self.line_end();
132        self.pos = if e < self.len() { e + 1 } else { e };
133    }
134
135    fn skip_blank_lines(&mut self) {
136        while !self.at_end() {
137            let e = self.line_end();
138            if self.is_blank(self.pos, e) {
139                self.advance_line();
140            } else {
141                break;
142            }
143        }
144    }
145
146    fn parse_blocks(&mut self) -> Vec<Block> {
147        let mut blocks = Vec::new();
148        loop {
149            self.skip_blank_lines();
150            if self.at_end() {
151                break;
152            }
153            if let Some(macro_blocks) = self.try_macro() {
154                blocks.extend(macro_blocks);
155                continue;
156            }
157            if self.color_block
158                && let Some(block) = self.try_color_block()
159            {
160                blocks.push(block);
161                continue;
162            }
163            if self.line_prefix_blocks {
164                if let Some(block) = self.try_heading() {
165                    blocks.push(block);
166                    continue;
167                }
168                if let Some(block) = self.try_horizontal_rule() {
169                    blocks.push(block);
170                    continue;
171                }
172                if let Some(block) = self.try_blockquote() {
173                    blocks.push(block);
174                    continue;
175                }
176            }
177            if self.table_here() {
178                blocks.push(self.parse_table());
179                continue;
180            }
181            if self.list_here() {
182                self.parse_list_group(&mut blocks);
183                continue;
184            }
185            blocks.push(self.parse_paragraph());
186        }
187        blocks
188    }
189
190    // --- block-start predicates -------------------------------------------
191
192    fn macro_here(&self) -> Option<MacroKind> {
193        let p = self.pos;
194        if matches_at(self.chars, p, "{code}") || matches_at(self.chars, p, "{code:") {
195            Some(MacroKind::Code)
196        } else if matches_at(self.chars, p, "{noformat}") || matches_at(self.chars, p, "{noformat:")
197        {
198            Some(MacroKind::Noformat)
199        } else if matches_at(self.chars, p, "{quote}") {
200            Some(MacroKind::Quote)
201        } else if matches_at(self.chars, p, "{panel}") || matches_at(self.chars, p, "{panel:") {
202            Some(MacroKind::Panel)
203        } else {
204            None
205        }
206    }
207
208    fn heading_here(&self) -> Option<i32> {
209        if self.chars.get(self.pos) != Some(&'h') || self.chars.get(self.pos + 2) != Some(&'.') {
210            return None;
211        }
212        self.chars
213            .get(self.pos + 1)
214            .and_then(|c| c.to_digit(10))
215            .filter(|d| (1..=6).contains(d))
216            .and_then(|d| i32::try_from(d).ok())
217    }
218
219    fn horizontal_rule_here(&self) -> bool {
220        // A rule is exactly four hyphens at the line start; only trailing whitespace is allowed, so
221        // any leading indentation makes the line an ordinary paragraph instead.
222        let e = trim_end(self.chars, self.pos, self.line_end());
223        e - self.pos == 4 && (self.pos..e).all(|k| self.chars.get(k) == Some(&'-'))
224    }
225
226    fn blockquote_here(&self) -> bool {
227        matches_at(self.chars, self.pos, "bq.")
228    }
229
230    /// A line beginning with a colour marker — an opening `{color:…}` or a closing `{color}` — starts
231    /// a new block, so it ends any paragraph that runs into it.
232    fn color_marker_line_here(&self) -> bool {
233        matches_at(self.chars, self.pos, "{color:") || matches_at(self.chars, self.pos, "{color}")
234    }
235
236    fn table_here(&self) -> bool {
237        // A line of only delimiters carries no cells, so it is ordinary text rather than a row.
238        self.chars.get(self.pos) == Some(&'|')
239            && !parse_table_row(self.chars, self.pos, self.line_end()).is_empty()
240    }
241
242    /// A run of one or more list-marker characters, optionally indented, followed by a space and at
243    /// least one non-space character of item text. A marker with no content after it is ordinary text.
244    fn list_here(&self) -> bool {
245        let mut k = self.pos;
246        while matches!(self.chars.get(k), Some(' ' | '\t')) {
247            k += 1;
248        }
249        let marker_start = k;
250        while matches!(self.chars.get(k), Some('*' | '-' | '#')) {
251            k += 1;
252        }
253        if k == marker_start || self.chars.get(k) != Some(&' ') {
254            return false;
255        }
256        let content_start = k + 1;
257        trim_end(self.chars, content_start, self.line_end()) > content_start
258    }
259
260    fn line_starts_block(&self) -> bool {
261        self.macro_here().is_some()
262            || self.color_marker_line_here()
263            || self.heading_here().is_some()
264            || self.horizontal_rule_here()
265            || self.blockquote_here()
266            || self.table_here()
267            || self.list_here()
268    }
269
270    // --- simple blocks -----------------------------------------------------
271
272    fn try_heading(&mut self) -> Option<Block> {
273        let level = self.heading_here()?;
274        let e = self.line_end();
275        // A bare block macro in the content makes the line a paragraph that the block layer then
276        // splits at the macro, rather than a heading carrying the macro as literal text.
277        if self.first_block_macro(self.pos + 3, e).is_some() {
278            return None;
279        }
280        let (ts, te) = trim(self.chars, self.pos + 3, e);
281        let inlines = drop_trailing_break(parse_inlines(self.chars, ts, te));
282        self.advance_line();
283        Some(Block::Header(level, Box::default(), inlines))
284    }
285
286    fn try_horizontal_rule(&mut self) -> Option<Block> {
287        if !self.horizontal_rule_here() {
288            return None;
289        }
290        self.advance_line();
291        Some(Block::HorizontalRule)
292    }
293
294    fn try_blockquote(&mut self) -> Option<Block> {
295        if !self.blockquote_here() {
296            return None;
297        }
298        let e = self.line_end();
299        // A bare block macro in the content makes the line a paragraph that the block layer then
300        // splits at the macro, rather than a blockquote carrying the macro as literal text.
301        if self.first_block_macro(self.pos + 3, e).is_some() {
302            return None;
303        }
304        let (ts, te) = trim(self.chars, self.pos + 3, e);
305        let inlines = drop_trailing_break(parse_inlines(self.chars, ts, te));
306        self.advance_line();
307        Some(Block::BlockQuote(vec![Block::Para(inlines)]))
308    }
309
310    fn parse_paragraph(&mut self) -> Block {
311        let para_start = self.pos;
312        // The first line is always part of the paragraph; this guarantees forward progress. Its
313        // leading whitespace is kept (it collapses to a single leading space). Continuation lines
314        // join across the newline, which the inline layer renders as a soft line break, absorbing
315        // the whitespace around it.
316        let mut content_end = self.line_end();
317        self.advance_line();
318        loop {
319            if self.at_end() {
320                break;
321            }
322            let e = self.line_end();
323            if self.is_blank(self.pos, e) || self.line_starts_block() {
324                break;
325            }
326            content_end = e;
327            self.advance_line();
328        }
329        // A bare block macro that opens partway through the text ends the paragraph at that point;
330        // the block layer processes the macro on the next pass.
331        if let Some(macro_pos) = self.first_block_macro(para_start, content_end) {
332            self.pos = macro_pos;
333            content_end = macro_pos;
334        }
335        let para_end = trim_end(self.chars, para_start, content_end);
336        Block::Para(drop_trailing_break(parse_inlines(
337            self.chars, para_start, para_end,
338        )))
339    }
340
341    /// Index of the first bare block macro (`{code}`, `{noformat}`, `{quote}`, `{panel}`) in
342    /// `lo..hi`, skipping a token whose `{` is escaped by a preceding backslash. The parameterised
343    /// forms (`{code:…}` and friends) are recognised only at the start of a block, so they are not
344    /// reported here.
345    fn first_block_macro(&self, lo: usize, hi: usize) -> Option<usize> {
346        let mut k = lo;
347        while k < hi {
348            if self.chars.get(k) == Some(&'\\') {
349                k += 2;
350                continue;
351            }
352            if bare_block_macro_at(self.chars, k) {
353                return Some(k);
354            }
355            k += 1;
356        }
357        None
358    }
359
360    /// A block-level colour span: an opening `{color:VALUE}` whose matching close is a line holding
361    /// only `{color}`. The text between is parsed as blocks and wrapped in a `Div` carrying the
362    /// colour. An unrecognised value, an absent or non-standalone close, a nested block construct, or
363    /// empty content all leave the markup for the inline layer or as literal text.
364    fn try_color_block(&mut self) -> Option<Block> {
365        if !matches_at(self.chars, self.pos, "{color:") {
366            return None;
367        }
368        let value_start = self.pos + "{color:".len();
369        let open_line_end = self.line_end();
370        let brace = (value_start..open_line_end).find(|&k| self.chars.get(k) == Some(&'}'))?;
371        let value = color_value(&slice_to_string(self.chars, value_start, brace))?;
372        let content_start = brace + 1;
373
374        let mut ls = next_line_start(open_line_end, self.len());
375        let close_line_start = loop {
376            if ls >= self.len() {
377                return None;
378            }
379            let le = self.line_end_from(ls);
380            if matches_at(self.chars, ls, "{color}") && self.is_blank(ls + "{color}".len(), le) {
381                break ls;
382            }
383            let probe = BlockParser {
384                chars: self.chars,
385                pos: ls,
386                color_block: self.color_block,
387                line_prefix_blocks: self.line_prefix_blocks,
388            };
389            if !self.is_blank(ls, le) && probe.line_starts_block() {
390                return None;
391            }
392            ls = next_line_start(le, self.len());
393        };
394
395        let inner = parse_color_block_inner(self.chars.get(content_start..close_line_start)?);
396        // Content that begins with a blank line yields no leading paragraph, so the markup does not
397        // form a block.
398        match inner.first() {
399            None => return None,
400            Some(Block::Para(inlines)) if inlines.is_empty() => return None,
401            _ => {}
402        }
403
404        let close_line_end = self.line_end_from(close_line_start);
405        self.pos = next_line_start(close_line_end, self.len());
406        let attr = Attr {
407            id: carta_ast::Text::default(),
408            classes: Vec::new(),
409            attributes: vec![("color".into(), value.into())],
410        };
411        Some(Block::Div(Box::new(attr), inner))
412    }
413
414    // --- tables ------------------------------------------------------------
415
416    fn parse_table(&mut self) -> Block {
417        let mut rows: Vec<Vec<(bool, String)>> = Vec::new();
418        while !self.at_end() {
419            let e = self.line_end();
420            if self.is_blank(self.pos, e) || self.chars.get(self.pos) != Some(&'|') {
421                break;
422            }
423            let cells = parse_table_row(self.chars, self.pos, e);
424            if cells.is_empty() {
425                // A delimiter-only line has no cells; it ends the table and reparses as text.
426                break;
427            }
428            rows.push(cells);
429            self.advance_line();
430        }
431
432        let col_count = rows.iter().map(Vec::len).max().unwrap_or(0);
433        let mut head_rows = Vec::new();
434        let mut body_rows = Vec::new();
435        let mut still_header = true;
436        for cells in &rows {
437            let all_header = !cells.is_empty() && cells.iter().all(|(is_header, _)| *is_header);
438            let row = build_table_row(cells, col_count);
439            if still_header && all_header {
440                head_rows.push(row);
441            } else {
442                still_header = false;
443                body_rows.push(row);
444            }
445        }
446
447        let table = Table {
448            attr: Attr::default(),
449            caption: Caption {
450                short: None,
451                long: Vec::new(),
452            },
453            col_specs: vec![
454                ColSpec {
455                    align: Alignment::AlignDefault,
456                    width: ColWidth::ColWidthDefault,
457                };
458                col_count
459            ],
460            head: TableHead {
461                attr: Attr::default(),
462                rows: head_rows,
463            },
464            bodies: vec![TableBody {
465                attr: Attr::default(),
466                row_head_columns: 0,
467                head: Vec::new(),
468                body: body_rows,
469            }],
470            foot: TableFoot::default(),
471        };
472        Block::Table(Box::new(table))
473    }
474
475    // --- lists -------------------------------------------------------------
476
477    fn parse_list_group(&mut self, out: &mut Vec<Block>) {
478        let mut items: Vec<ListItem> = Vec::new();
479        loop {
480            if self.at_end() {
481                break;
482            }
483            let e = self.line_end();
484            // A blank line and a horizontal rule both close the list; everything else that is not a
485            // new marker is item content (a continuation line), so headings, tables, blockquotes, and
486            // brace macros that follow an item are absorbed into it rather than ending the list.
487            if self.is_blank(self.pos, e) || self.horizontal_rule_here() {
488                break;
489            }
490            if self.list_here() {
491                let mut k = self.pos;
492                while matches!(self.chars.get(k), Some(' ' | '\t')) {
493                    k += 1;
494                }
495                let marker_start = k;
496                while matches!(self.chars.get(k), Some('*' | '-' | '#')) {
497                    k += 1;
498                }
499                let marker = slice_to_string(self.chars, marker_start, k);
500                // Exactly one space separates the marker from the item text; any further leading
501                // whitespace is part of the content.
502                let content_start = k + 1;
503                items.push(ListItem {
504                    marker,
505                    text: slice_to_string(self.chars, content_start, e),
506                });
507                self.advance_line();
508            } else if let Some(last) = items.last_mut() {
509                last.text.push('\n');
510                last.text
511                    .push_str(&slice_to_string(self.chars, self.pos, e));
512                self.advance_line();
513            } else {
514                break;
515            }
516        }
517        build_lists(&items, 1, out);
518    }
519
520    // --- brace macros ------------------------------------------------------
521
522    fn try_macro(&mut self) -> Option<Vec<Block>> {
523        let kind = self.macro_here()?;
524        let fence_end = (self.pos..self.len()).find(|&k| self.chars.get(k) == Some(&'}'))?;
525        let inside = slice_to_string(self.chars, self.pos + 1, fence_end);
526        let params = inside.split_once(':').map(|(_, p)| p.to_string());
527        let has_params = params.is_some();
528        let open_line_end = self.line_end_from(fence_end);
529        let open_trailing_blank = self.is_blank(fence_end + 1, open_line_end);
530        match kind {
531            MacroKind::Code => self.parse_code(
532                params.as_deref(),
533                open_line_end,
534                has_params,
535                open_trailing_blank,
536            ),
537            MacroKind::Noformat => Some(self.parse_noformat(
538                params.as_deref(),
539                fence_end,
540                open_line_end,
541                open_trailing_blank,
542            )),
543            MacroKind::Quote => Some(self.parse_quote(fence_end)),
544            MacroKind::Panel => self.parse_panel(
545                params.as_deref(),
546                fence_end,
547                has_params,
548                open_trailing_blank,
549            ),
550        }
551    }
552
553    /// Parses a `{code}` block. Its open fence must be alone on its line: any non-blank content
554    /// after the closing brace disqualifies the block. With parameters present such an open line is
555    /// not a code block at all (it reverts to text); a bare `{code}` with trailing content instead
556    /// consumes the remainder of the input. The content begins on the next line and runs to a close
557    /// line that ends with `{code}` (any text before the close on that line is kept).
558    fn parse_code(
559        &mut self,
560        params: Option<&str>,
561        open_line_end: usize,
562        has_params: bool,
563        open_trailing_blank: bool,
564    ) -> Option<Vec<Block>> {
565        if !open_trailing_blank {
566            if has_params {
567                return None;
568            }
569            self.pos = self.len();
570            return Some(Vec::new());
571        }
572        let content_start = next_line_start(open_line_end, self.len());
573        let (classes, attributes) = verbatim_params(MacroKind::Code, params);
574        let attr = Attr {
575            id: carta_ast::Text::default(),
576            classes: classes.into_iter().map(Into::into).collect(),
577            attributes: attributes
578                .into_iter()
579                .map(|(k, v)| (k.into(), v.into()))
580                .collect(),
581        };
582        if let Some((content, resume)) = self.scan_code_content(content_start) {
583            self.pos = resume;
584            Some(vec![Block::CodeBlock(Box::new(attr), content.into())])
585        } else {
586            self.pos = self.len();
587            Some(Vec::new())
588        }
589    }
590
591    /// Collects the lines from `start` up to a `{code}` close. A close is a line that ends with the
592    /// token once trailing whitespace is ignored; text before the token on that line is content, and
593    /// parsing resumes on the following line. Returns the content and resume index, or `None` when no
594    /// close is found.
595    fn scan_code_content(&self, start: usize) -> Option<(String, usize)> {
596        const CLOSE: &str = "{code}";
597        let close_len = CLOSE.chars().count();
598        let mut content = String::new();
599        let mut cur = start;
600        while cur < self.len() {
601            let le = self.line_end_from(cur);
602            let te = trim_end(self.chars, cur, le);
603            if te >= cur + close_len && matches_at(self.chars, te - close_len, CLOSE) {
604                content.push_str(&slice_to_string(self.chars, cur, te - close_len));
605                return Some((content, next_line_start(le, self.len())));
606            }
607            content.push_str(&slice_to_string(self.chars, cur, le));
608            content.push('\n');
609            cur = next_line_start(le, self.len());
610        }
611        None
612    }
613
614    /// Parses a `{noformat}` block. Unlike `{code}`, content may begin on the open line: when the
615    /// rest of that line is blank the content starts on the next line, otherwise it starts right
616    /// after the closing brace. The block ends at the first `{noformat}`; any text after the close on
617    /// its line continues as following content.
618    fn parse_noformat(
619        &mut self,
620        params: Option<&str>,
621        fence_end: usize,
622        open_line_end: usize,
623        open_trailing_blank: bool,
624    ) -> Vec<Block> {
625        const CLOSE: &str = "{noformat}";
626        let content_start = if open_trailing_blank {
627            next_line_start(open_line_end, self.len())
628        } else {
629            fence_end + 1
630        };
631        let (classes, attributes) = verbatim_params(MacroKind::Noformat, params);
632        let attr = Attr {
633            id: carta_ast::Text::default(),
634            classes: classes.into_iter().map(Into::into).collect(),
635            attributes: attributes
636                .into_iter()
637                .map(|(k, v)| (k.into(), v.into()))
638                .collect(),
639        };
640        if let Some(close) = find_token(self.chars, content_start, CLOSE) {
641            let content = slice_to_string(self.chars, content_start, close);
642            self.pos = close + CLOSE.chars().count();
643            vec![Block::CodeBlock(Box::new(attr), content.into())]
644        } else {
645            self.pos = self.len();
646            Vec::new()
647        }
648    }
649
650    /// Consume the text between the current fence and its closing `close_token`, advancing past the
651    /// token. When the closing token is absent the whole remaining input is consumed and `None` is
652    /// returned.
653    fn take_fenced(&mut self, fence_end: usize, close_token: &str) -> Option<String> {
654        match find_token(self.chars, fence_end + 1, close_token) {
655            None => {
656                self.pos = self.len();
657                None
658            }
659            Some(close) => {
660                let content = slice_to_string(self.chars, fence_end + 1, close);
661                self.pos = close + close_token.len();
662                Some(content)
663            }
664        }
665    }
666
667    fn parse_quote(&mut self, fence_end: usize) -> Vec<Block> {
668        let Some(content) = self.take_fenced(fence_end, "{quote}") else {
669            return Vec::new();
670        };
671        vec![Block::BlockQuote(parse_blocks_from_str(&content))]
672    }
673
674    /// Parses a `{panel}` block. Like `{code}`, its open fence must stand alone on its line: a
675    /// parameterised open line with trailing content reverts to text, while a bare `{panel}` with
676    /// trailing content consumes the remainder of the input.
677    fn parse_panel(
678        &mut self,
679        params: Option<&str>,
680        fence_end: usize,
681        has_params: bool,
682        open_trailing_blank: bool,
683    ) -> Option<Vec<Block>> {
684        if !open_trailing_blank {
685            if has_params {
686                return None;
687            }
688            self.pos = self.len();
689            return Some(Vec::new());
690        }
691        let Some(content) = self.take_fenced(fence_end, "{panel}") else {
692            return Some(Vec::new());
693        };
694        let (title, attributes) = panel_params(params);
695        let mut inner = Vec::new();
696        if let Some(title) = title {
697            inner.push(Block::Div(
698                Box::new(Attr {
699                    id: carta_ast::Text::default(),
700                    classes: vec!["panelheader".into()],
701                    attributes: Vec::new(),
702                }),
703                vec![Block::Plain(vec![Inline::Strong(plain_inlines(&title))])],
704            ));
705        }
706        inner.extend(parse_blocks_from_str(&content));
707        Some(vec![Block::Div(
708            Box::new(Attr {
709                id: carta_ast::Text::default(),
710                classes: vec!["panel".into()],
711                attributes: attributes
712                    .into_iter()
713                    .map(|(k, v)| (k.into(), v.into()))
714                    .collect(),
715            }),
716            inner,
717        )])
718    }
719}
720
721/// The start of the line after the one ending at `line_end`, or `len` when that line ends the input.
722fn next_line_start(line_end: usize, len: usize) -> usize {
723    if line_end < len {
724        line_end + 1
725    } else {
726        line_end
727    }
728}
729
730/// Parses the body of a block-level colour span. The newline that follows the opening marker leads
731/// the first paragraph as a line break; blank lines beyond it separate the body into paragraphs.
732fn parse_color_block_inner(content: &[char]) -> Vec<Block> {
733    let mut parser = BlockParser {
734        chars: content,
735        pos: 0,
736        color_block: true,
737        line_prefix_blocks: true,
738    };
739    let mut blocks = Vec::new();
740    let mut first = true;
741    loop {
742        if first {
743            first = false;
744        } else {
745            parser.skip_blank_lines();
746        }
747        if parser.at_end() {
748            break;
749        }
750        blocks.push(parser.parse_paragraph());
751    }
752    blocks
753}
754
755struct ListItem {
756    marker: String,
757    text: String,
758}
759
760/// Builds the nested list blocks for `items` at the given (1-based) depth, appending sibling lists
761/// to `out`. A marker's length is its nesting depth: items are grouped by the marker character at
762/// this depth, a different character starts a separate sibling list, and any item whose marker is
763/// longer than this depth nests inside the current item — so a lone `*** x` produces three nested
764/// lists even with no shallower item preceding it.
765fn build_lists(items: &[ListItem], depth: usize, out: &mut Vec<Block>) {
766    let mut idx = 0;
767    while idx < items.len() {
768        let Some(group_char) = marker_char(items, idx, depth) else {
769            idx += 1;
770            continue;
771        };
772        let mut list_items: Vec<Vec<Block>> = Vec::new();
773        while marker_char(items, idx, depth) == Some(group_char) {
774            let mut item_blocks = Vec::new();
775            // An item whose marker length is exactly this depth owns the text; a longer marker is an
776            // implicit parent that carries only its nested child list.
777            let owns_content = marker_len(items, idx) == depth;
778            let child_start = if owns_content {
779                if let Some(item) = items.get(idx) {
780                    item_blocks.extend(parse_list_item_blocks(&item.text));
781                }
782                idx + 1
783            } else {
784                idx
785            };
786            let mut child_end = child_start;
787            while marker_len(items, child_end) > depth
788                && marker_char(items, child_end, depth) == Some(group_char)
789            {
790                child_end += 1;
791            }
792            if let Some(children) = items.get(child_start..child_end) {
793                build_lists(children, depth + 1, &mut item_blocks);
794            }
795            list_items.push(item_blocks);
796            idx = child_end;
797        }
798        if group_char == '#' {
799            out.push(Block::OrderedList(
800                ListAttributes {
801                    start: 1,
802                    style: ListNumberStyle::DefaultStyle,
803                    delim: ListNumberDelim::DefaultDelim,
804                },
805                list_items,
806            ));
807        } else {
808            out.push(Block::BulletList(list_items));
809        }
810    }
811}
812
813fn marker_char(items: &[ListItem], idx: usize, depth: usize) -> Option<char> {
814    items
815        .get(idx)
816        .and_then(|it| it.marker.chars().nth(depth - 1))
817}
818
819fn marker_len(items: &[ListItem], idx: usize) -> usize {
820    items.get(idx).map_or(0, |it| it.marker.chars().count())
821}
822
823/// Removes a forced line break that ends a block's inline content. A line break with nothing after
824/// it has no following line to separate, so it is dropped.
825fn drop_trailing_break(mut inlines: Vec<Inline>) -> Vec<Inline> {
826    while matches!(inlines.last(), Some(Inline::LineBreak)) {
827        inlines.pop();
828    }
829    inlines
830}
831
832fn parse_table_row(chars: &[char], start: usize, end: usize) -> Vec<(bool, String)> {
833    let mut cells = Vec::new();
834    let (mut i, _) = trim(chars, start, end);
835    while i < end {
836        if chars.get(i) != Some(&'|') {
837            break;
838        }
839        let mut run = 0;
840        while i < end && chars.get(i) == Some(&'|') {
841            run += 1;
842            i += 1;
843        }
844        let is_header = run >= 2;
845        // Scan the cell content up to the next delimiter, ignoring pipes nested inside a bracketed
846        // link, a brace span, or an image's property list so an inner `|` does not split the cell.
847        let cell_start = i;
848        let mut depth = 0i32;
849        while i < end {
850            match chars.get(i) {
851                Some('[' | '{') => {
852                    depth += 1;
853                    i += 1;
854                }
855                Some(']' | '}') => {
856                    depth = depth.saturating_sub(1);
857                    i += 1;
858                }
859                Some('!') if depth == 0 => match parse_image(chars, i, end) {
860                    Some((_, next)) => i = next,
861                    None => i += 1,
862                },
863                Some('|') if depth == 0 => break,
864                _ => i += 1,
865            }
866        }
867        let (ts, te) = trim(chars, cell_start, i);
868        let content = slice_to_string(chars, ts, te);
869        if i >= end && content.is_empty() {
870            // A trailing delimiter run closes the final cell; it introduces no new cell.
871            break;
872        }
873        cells.push((is_header, content));
874    }
875    cells
876}
877
878fn build_table_row(cells: &[(bool, String)], col_count: usize) -> Row {
879    let mut out_cells = Vec::with_capacity(col_count);
880    for col in 0..col_count {
881        let content = match cells.get(col) {
882            Some((_, text)) if !text.is_empty() => parse_table_cell(text),
883            _ => Vec::new(),
884        };
885        out_cells.push(Cell {
886            attr: Attr::default(),
887            align: Alignment::AlignDefault,
888            row_span: 1,
889            col_span: 1,
890            content,
891        });
892    }
893    Row {
894        attr: Attr::default(),
895        cells: out_cells,
896    }
897}
898
899fn verbatim_params(kind: MacroKind, params: Option<&str>) -> (Vec<String>, Vec<(String, String)>) {
900    let mut classes = Vec::new();
901    let mut attributes = Vec::new();
902    let tokens: Vec<&str> = match params {
903        Some(p) if !p.is_empty() => p.split('|').collect(),
904        _ => Vec::new(),
905    };
906    match kind {
907        MacroKind::Code => {
908            let mut language = "java".to_string();
909            for (idx, token) in tokens.iter().enumerate() {
910                let token = token.trim();
911                if let Some((key, value)) = token.split_once('=') {
912                    attributes.push((key.trim().to_string(), value.trim().to_string()));
913                } else if idx == 0 && !token.is_empty() {
914                    language = token.to_string();
915                }
916            }
917            classes.push(language);
918        }
919        _ => {
920            for token in tokens {
921                if let Some((key, value)) = token.trim().split_once('=') {
922                    attributes.push((key.trim().to_string(), value.trim().to_string()));
923                }
924            }
925        }
926    }
927    (classes, attributes)
928}
929
930fn panel_params(params: Option<&str>) -> (Option<String>, Vec<(String, String)>) {
931    let mut title = None;
932    let mut attributes = Vec::new();
933    let tokens: Vec<&str> = match params {
934        Some(p) if !p.is_empty() => p.split('|').collect(),
935        _ => Vec::new(),
936    };
937    for token in tokens {
938        if let Some((key, value)) = token.trim().split_once('=') {
939            if key.trim() == "title" {
940                title = Some(value.trim().to_string());
941            } else {
942                attributes.push((key.trim().to_string(), value.trim().to_string()));
943            }
944        }
945    }
946    (title, attributes)
947}
948
949// ---------------------------------------------------------------------------
950// Inline layer
951// ---------------------------------------------------------------------------
952
953/// URL prefixes accepted as a bracketed link target. Schemes other than `mailto:` require `://`.
954const LINK_URL_PREFIXES: &[&str] = &[
955    "https://", "http://", "ftp://", "file://", "news://", "nntp://", "irc://", "mailto:",
956];
957/// URL prefixes accepted as a bare (unbracketed) autolink. `file://` is not autolinked.
958const BARE_URL_PREFIXES: &[&str] = &[
959    "https://", "http://", "ftp://", "news://", "nntp://", "irc://", "mailto:",
960];
961
962const PAREN_SYMBOLS: &[(&str, char)] = &[
963    ("(flagoff)", '\u{2690}'),
964    ("(flag)", '\u{2691}'),
965    ("(off)", '\u{1F319}'),
966    ("(on)", '\u{1F4A1}'),
967    ("(*r)", '\u{2B50}'),
968    ("(*g)", '\u{2B50}'),
969    ("(*b)", '\u{2B50}'),
970    ("(*y)", '\u{2B50}'),
971    ("(*)", '\u{2B50}'),
972    ("(!)", '\u{2757}'),
973    ("(x)", '\u{274C}'),
974    ("(/)", '\u{2714}'),
975    ("(i)", '\u{2139}'),
976    ("(?)", '\u{2753}'),
977    ("(y)", '\u{1F44D}'),
978    ("(n)", '\u{1F44E}'),
979    ("(+)", '\u{2795}'),
980    ("(-)", '\u{2796}'),
981];
982
983const EMOTICONS: &[(&str, char)] = &[
984    (":)", '\u{1F642}'),
985    (":(", '\u{1F641}'),
986    (":P", '\u{1F61B}'),
987    (":D", '\u{1F603}'),
988    (";)", '\u{1F609}'),
989];
990
991/// Tokenises `text` into inlines without interpreting markup: whitespace runs become
992/// [`Inline::Space`] and every other run becomes an [`Inline::Str`]. Used for a panel title, whose
993/// text is rendered verbatim inside its header.
994fn plain_inlines(text: &str) -> Vec<Inline> {
995    let mut out = Vec::new();
996    let mut word = String::new();
997    for ch in text.chars() {
998        if is_space(ch) {
999            if !word.is_empty() {
1000                out.push(Inline::Str(std::mem::take(&mut word).into()));
1001            }
1002            if out.last() != Some(&Inline::Space) {
1003                out.push(Inline::Space);
1004            }
1005        } else {
1006            word.push(ch);
1007        }
1008    }
1009    if !word.is_empty() {
1010        out.push(Inline::Str(word.into()));
1011    }
1012    out
1013}
1014
1015/// A unit of scanned inline content, before text-effect delimiters are paired up.
1016enum Tok {
1017    /// A run of literal text.
1018    Text(String),
1019    /// A single flanking delimiter that may open and/or close a text-effect span.
1020    Delim {
1021        marker: char,
1022        open: bool,
1023        close: bool,
1024    },
1025    /// A fully formed inline node — link, image, span, monospace, line break, or space.
1026    Atom(Inline),
1027}
1028
1029/// Inline-nesting depth past which parsing stops descending. Monospace, colour, link-label and
1030/// citation spans each re-enter inline parsing on their inner text; a hard cap keeps adversarially
1031/// deep nesting off the call stack. It is far beyond any nesting real text uses.
1032const MAX_INLINE_DEPTH: usize = 32;
1033
1034/// Parses the character range `lo..hi` into inline nodes: it scans the text into tokens, pairs the
1035/// flanking delimiters into spans, and folds the result into a flat list of inlines. Flanking
1036/// decisions consult the real neighbouring characters via absolute indices, so a range bounded to a
1037/// single line will not let markup escape that line.
1038fn parse_inlines(chars: &[char], lo: usize, hi: usize) -> Vec<Inline> {
1039    inlines_with(chars, lo, hi, true, 0)
1040}
1041
1042/// Parses inlines with control over bare-URL autolinking. A link label cannot contain another link,
1043/// so the text of one is parsed with `autolink` cleared. `depth` tracks how many nested spans deep
1044/// this call is; past the cap the remaining span is emitted as literal text without descending.
1045fn inlines_with(chars: &[char], lo: usize, hi: usize, autolink: bool, depth: usize) -> Vec<Inline> {
1046    if depth > MAX_INLINE_DEPTH {
1047        let text = slice_to_string(chars, lo, hi);
1048        return if text.is_empty() {
1049            Vec::new()
1050        } else {
1051            vec![Inline::Str(text.into())]
1052        };
1053    }
1054    finalize(resolve(scan_tokens(chars, lo, hi, autolink, depth)))
1055}
1056
1057fn push_text(pending: &mut String, toks: &mut Vec<Tok>) {
1058    if !pending.is_empty() {
1059        toks.push(Tok::Text(std::mem::take(pending)));
1060    }
1061}
1062
1063/// Scans `lo..hi` left to right into tokens: literal runs accumulate into [`Tok::Text`], a flanking
1064/// delimiter becomes a [`Tok::Delim`], and a self-contained construct (link, image, brace span,
1065/// citation, autolink, symbol) becomes a [`Tok::Atom`].
1066fn scan_tokens(chars: &[char], lo: usize, hi: usize, autolink: bool, depth: usize) -> Vec<Tok> {
1067    let mut toks: Vec<Tok> = Vec::new();
1068    let mut pending = String::new();
1069    let mut i = lo;
1070
1071    while i < hi {
1072        let Some(&c) = chars.get(i) else {
1073            break;
1074        };
1075
1076        if is_space(c) {
1077            push_text(&mut pending, &mut toks);
1078            i = scan_whitespace_run(chars, i, hi, &mut toks);
1079            continue;
1080        }
1081
1082        let prev_alnum = i > 0 && chars.get(i - 1).is_some_and(|c| c.is_alphanumeric());
1083
1084        if autolink
1085            && !prev_alnum
1086            && let Some(end) = match_bare_url(chars, i, hi)
1087        {
1088            push_text(&mut pending, &mut toks);
1089            let url = slice_to_string(chars, i, end);
1090            toks.push(Tok::Atom(Inline::Link(
1091                Box::default(),
1092                vec![Inline::Str(url.clone().into())],
1093                Box::new(Target {
1094                    url: url.into(),
1095                    title: carta_ast::Text::default(),
1096                }),
1097            )));
1098            i = end;
1099            continue;
1100        }
1101
1102        match c {
1103            '\\' => {
1104                i = scan_backslash(chars, i, hi, &mut pending, &mut toks);
1105            }
1106            '&' => {
1107                if let Some((text, next)) = crate::entities::read_reference(chars, i, hi, false) {
1108                    pending.push_str(&text);
1109                    i = next;
1110                } else {
1111                    pending.push('&');
1112                    i += 1;
1113                }
1114            }
1115            '?' => {
1116                if let Some((next, inner)) = parse_citation(chars, i, hi, autolink, depth) {
1117                    pending.push('\u{2014}');
1118                    push_text(&mut pending, &mut toks);
1119                    toks.push(Tok::Atom(Inline::Space));
1120                    toks.push(Tok::Atom(Inline::Emph(inner)));
1121                    i = next;
1122                } else {
1123                    pending.push('?');
1124                    i += 1;
1125                }
1126            }
1127            '*' | '_' | '+' | '^' | '~' => {
1128                push_delimiter(c, chars, i, &mut pending, &mut toks);
1129                i += 1;
1130            }
1131            '-' => {
1132                i = scan_dash(chars, i, hi, &mut pending, &mut toks);
1133            }
1134            '(' => {
1135                if let Some((glyph, len)) = match_token_symbol(chars, i, PAREN_SYMBOLS) {
1136                    pending.push(glyph);
1137                    i += len;
1138                } else {
1139                    pending.push('(');
1140                    i += 1;
1141                }
1142            }
1143            ':' | ';' => {
1144                if let Some((glyph, len)) = match_token_symbol(chars, i, EMOTICONS) {
1145                    pending.push(glyph);
1146                    i += len;
1147                } else {
1148                    pending.push(c);
1149                    i += 1;
1150                }
1151            }
1152            '[' | '!' | '{' => {
1153                if let Some((node, next)) = scan_construct(c, chars, i, hi, autolink, depth) {
1154                    push_text(&mut pending, &mut toks);
1155                    toks.push(Tok::Atom(node));
1156                    i = next;
1157                } else {
1158                    pending.push(c);
1159                    i += 1;
1160                }
1161            }
1162            _ => {
1163                pending.push(c);
1164                i += 1;
1165            }
1166        }
1167    }
1168
1169    push_text(&mut pending, &mut toks);
1170    toks
1171}
1172
1173/// Consumes the whitespace run beginning at `start`, pushing the single token it collapses to: a
1174/// line break when the run crosses a newline, otherwise a space. The spaces around a soft break are
1175/// absorbed into it. Returns the index just past the run.
1176fn scan_whitespace_run(chars: &[char], start: usize, hi: usize, toks: &mut Vec<Tok>) -> usize {
1177    let mut has_newline = chars.get(start) == Some(&'\n');
1178    let mut i = start + 1;
1179    while i < hi && chars.get(i).is_some_and(|&c| is_space(c)) {
1180        has_newline |= chars.get(i) == Some(&'\n');
1181        i += 1;
1182    }
1183    toks.push(Tok::Atom(if has_newline {
1184        Inline::LineBreak
1185    } else {
1186        Inline::Space
1187    }));
1188    i
1189}
1190
1191/// The punctuation a backslash removes itself before, leaving the character as literal text. Any
1192/// character outside this set keeps its backslash.
1193fn is_escapable(c: char) -> bool {
1194    matches!(
1195        c,
1196        '!' | '"'
1197            | '#'
1198            | '%'
1199            | '&'
1200            | '\''
1201            | '('
1202            | ')'
1203            | '*'
1204            | ','
1205            | '-'
1206            | '.'
1207            | '/'
1208            | ':'
1209            | ';'
1210            | '?'
1211            | '@'
1212            | '['
1213            | ']'
1214            | '_'
1215            | '{'
1216            | '}'
1217    )
1218}
1219
1220/// Emits a flanking-delimiter token for one of the emphasis markers at `i`, or buffers the marker as
1221/// literal text when it can neither open nor close a span.
1222fn push_delimiter(
1223    marker: char,
1224    chars: &[char],
1225    i: usize,
1226    pending: &mut String,
1227    toks: &mut Vec<Tok>,
1228) {
1229    let open = can_open(chars, i);
1230    let close = can_close(chars, i);
1231    if open || close {
1232        push_text(pending, toks);
1233        toks.push(Tok::Delim {
1234            marker,
1235            open,
1236            close,
1237        });
1238    } else {
1239        pending.push(marker);
1240    }
1241}
1242
1243/// Parses a self-contained construct introduced by `c` at `i`: `[` starts a link, `!` an image, and
1244/// `{` a brace span. Returns the resulting node and the index just past it, or `None` when the text
1245/// does not form that construct.
1246fn scan_construct(
1247    c: char,
1248    chars: &[char],
1249    i: usize,
1250    hi: usize,
1251    autolink: bool,
1252    depth: usize,
1253) -> Option<(Inline, usize)> {
1254    match c {
1255        '[' => parse_link(chars, i, hi, depth),
1256        '!' => parse_image(chars, i, hi),
1257        _ => parse_brace_inline(chars, i, hi, autolink, depth),
1258    }
1259}
1260
1261/// Handles a backslash at `i`. A backslash pair `\\` is a forced line break that absorbs the
1262/// whitespace around it — unless a third backslash follows, in which case the pair is an escaped
1263/// backslash producing one literal `\` and the scan continues at the third. A backslash before one
1264/// of a fixed set of punctuation marks escapes that mark to a literal; before anything else the
1265/// backslash itself stays literal. Returns the next position.
1266fn scan_backslash(
1267    chars: &[char],
1268    i: usize,
1269    hi: usize,
1270    pending: &mut String,
1271    toks: &mut Vec<Tok>,
1272) -> usize {
1273    if i + 1 < hi && chars.get(i + 1) == Some(&'\\') {
1274        if i + 2 < hi && chars.get(i + 2) == Some(&'\\') {
1275            pending.push('\\');
1276            return i + 2;
1277        }
1278        push_text(pending, toks);
1279        if matches!(toks.last(), Some(Tok::Atom(Inline::Space))) {
1280            toks.pop();
1281        }
1282        toks.push(Tok::Atom(Inline::LineBreak));
1283        let mut j = i + 2;
1284        while j < hi && chars.get(j).is_some_and(|&c| is_space(c)) {
1285            j += 1;
1286        }
1287        return j;
1288    }
1289    if let Some(&next) = chars.get(i + 1).filter(|_| i + 1 < hi)
1290        && is_escapable(next)
1291    {
1292        pending.push(next);
1293        return i + 2;
1294    }
1295    pending.push('\\');
1296    i + 1
1297}
1298
1299/// Handles a run of `-` at `i`. A run of two or more hyphens followed by a space or tab folds into
1300/// typographic dashes: a word character on its left keeps the first hyphen attached to that word,
1301/// then the remaining hyphens fold — two into an en dash, three or more into an em dash preceded by
1302/// the surplus hyphens. Otherwise a single `-` is scanned as a strikeout delimiter (or literal text).
1303/// Returns the next scan position. The character following the run is read from the full input rather
1304/// than the line-content bound, so a hyphen run that ends a line still sees the space trimmed from it.
1305fn scan_dash(
1306    chars: &[char],
1307    i: usize,
1308    hi: usize,
1309    pending: &mut String,
1310    toks: &mut Vec<Tok>,
1311) -> usize {
1312    let mut run = 0;
1313    while i + run < hi && chars.get(i + run) == Some(&'-') {
1314        run += 1;
1315    }
1316    let left_word = i > 0 && chars.get(i - 1).is_some_and(|c| c.is_alphanumeric());
1317    let right_space = matches!(chars.get(i + run), Some(' ' | '\t'));
1318    // A word on the left keeps its first hyphen attached, so only the remainder folds.
1319    let fold_run = if left_word {
1320        run.saturating_sub(1)
1321    } else {
1322        run
1323    };
1324    // Fold only when at least two hyphens remain to fold into a typographic dash. A lone leftover
1325    // hyphen would render identically to literal text, so it is left as a strikeout delimiter instead
1326    // — that way a `--…--` pair whose closing run is followed by a space can still form a span.
1327    if right_space && fold_run >= 2 {
1328        if left_word {
1329            pending.push('-');
1330        }
1331        if fold_run == 2 {
1332            pending.push('\u{2013}');
1333        } else {
1334            for _ in 0..fold_run.saturating_sub(3) {
1335                pending.push('-');
1336            }
1337            pending.push('\u{2014}');
1338        }
1339        return i + run;
1340    }
1341
1342    let open = can_open(chars, i);
1343    let close = can_close(chars, i);
1344    if open || close {
1345        push_text(pending, toks);
1346        toks.push(Tok::Delim {
1347            marker: '-',
1348            open,
1349            close,
1350        });
1351    } else {
1352        pending.push('-');
1353    }
1354    i + 1
1355}
1356
1357/// Index of the innermost open delimiter still awaiting a close, regardless of its marker.
1358fn top_opener(acc: &[Tok]) -> Option<usize> {
1359    acc.iter()
1360        .rposition(|t| matches!(t, Tok::Delim { open: true, .. }))
1361}
1362
1363/// Pairs flanking delimiters into spans. A closing delimiter binds only to the innermost open
1364/// delimiter; it forms a span when that opener carries the same marker and they enclose non-empty
1365/// content, and is otherwise left literal. Binding only to the innermost opener keeps spans strictly
1366/// nested, so two different markers that interleave cannot both form a span. Same-marker spans nest
1367/// at most two deep.
1368fn resolve(toks: Vec<Tok>) -> Vec<Tok> {
1369    let mut acc: Vec<Tok> = Vec::new();
1370    for tok in toks {
1371        let Tok::Delim {
1372            marker,
1373            open,
1374            close,
1375        } = tok
1376        else {
1377            acc.push(tok);
1378            continue;
1379        };
1380        if close
1381            && let Some(open_idx) = top_opener(&acc)
1382            && matches!(acc.get(open_idx), Some(Tok::Delim { marker: m, .. }) if *m == marker)
1383            && acc.len() > open_idx + 1
1384        {
1385            let inner = finalize(acc.split_off(open_idx + 1));
1386            if same_marker_depth(&inner, marker) < 2 {
1387                acc.pop();
1388                acc.push(Tok::Atom(make_span(marker, inner)));
1389                continue;
1390            }
1391            // The nesting cap is reached: the opener stays unmatched and its already-resolved
1392            // content returns to the stack.
1393            acc.extend(inner.into_iter().map(Tok::Atom));
1394        }
1395        acc.push(Tok::Delim {
1396            marker,
1397            open,
1398            close,
1399        });
1400    }
1401    acc
1402}
1403
1404/// Lowers resolved tokens into inlines: an unmatched delimiter becomes its literal marker character,
1405/// adjacent text merges into one string, and adjacent spans of the same kind merge into one.
1406fn finalize(toks: Vec<Tok>) -> Vec<Inline> {
1407    let mut out: Vec<Inline> = Vec::new();
1408    for tok in toks {
1409        let inline = match tok {
1410            Tok::Text(s) => Inline::Str(s.into()),
1411            Tok::Delim { marker, .. } => Inline::Str(marker.to_compact_string()),
1412            Tok::Atom(node) => node,
1413        };
1414        let inline = match out.last_mut() {
1415            Some(last) => match merge_adjacent(last, inline) {
1416                None => continue,
1417                Some(unmerged) => unmerged,
1418            },
1419            None => inline,
1420        };
1421        out.push(inline);
1422    }
1423    out
1424}
1425
1426/// Merges `next` into `last` when they are two strings or two spans of the same kind, returning
1427/// `None` on success and `Some(next)` when they do not combine.
1428fn merge_adjacent(last: &mut Inline, next: Inline) -> Option<Inline> {
1429    match (last, next) {
1430        (Inline::Str(a), Inline::Str(b)) => {
1431            a.push_str(&b);
1432            None
1433        }
1434        (Inline::Strong(a), Inline::Strong(b))
1435        | (Inline::Emph(a), Inline::Emph(b))
1436        | (Inline::Underline(a), Inline::Underline(b))
1437        | (Inline::Superscript(a), Inline::Superscript(b))
1438        | (Inline::Subscript(a), Inline::Subscript(b))
1439        | (Inline::Strikeout(a), Inline::Strikeout(b)) => {
1440            a.extend(b);
1441            None
1442        }
1443        (_, other) => Some(other),
1444    }
1445}
1446
1447fn make_span(marker: char, inner: Vec<Inline>) -> Inline {
1448    match marker {
1449        '*' => Inline::Strong(inner),
1450        '_' => Inline::Emph(inner),
1451        '+' => Inline::Underline(inner),
1452        '^' => Inline::Superscript(inner),
1453        '~' => Inline::Subscript(inner),
1454        _ => Inline::Strikeout(inner),
1455    }
1456}
1457
1458/// The deepest nesting of spans carrying `marker` anywhere within `nodes`.
1459fn same_marker_depth(nodes: &[Inline], marker: char) -> usize {
1460    nodes
1461        .iter()
1462        .map(|n| node_marker_depth(n, marker))
1463        .max()
1464        .unwrap_or(0)
1465}
1466
1467fn node_marker_depth(node: &Inline, marker: char) -> usize {
1468    let (is_match, children) = match node {
1469        Inline::Strong(k) => (marker == '*', Some(k)),
1470        Inline::Emph(k) => (marker == '_', Some(k)),
1471        Inline::Underline(k) => (marker == '+', Some(k)),
1472        Inline::Superscript(k) => (marker == '^', Some(k)),
1473        Inline::Subscript(k) => (marker == '~', Some(k)),
1474        Inline::Strikeout(k) => (marker == '-', Some(k)),
1475        _ => (false, None),
1476    };
1477    match children {
1478        Some(k) => same_marker_depth(k, marker) + usize::from(is_match),
1479        None => 0,
1480    }
1481}
1482
1483/// True when the character at `i` is absent (start/end of input) or not alphanumeric.
1484fn boundary(chars: &[char], i: usize) -> bool {
1485    chars.get(i).is_none_or(|c| !c.is_alphanumeric())
1486}
1487
1488fn non_space(chars: &[char], i: usize) -> bool {
1489    chars.get(i).is_some_and(|&c| !is_space(c))
1490}
1491
1492/// A delimiter at `i` may open a span when its left neighbour is a boundary and the next character
1493/// is not whitespace.
1494fn can_open(chars: &[char], i: usize) -> bool {
1495    let left_boundary = i == 0 || boundary(chars, i - 1);
1496    left_boundary && non_space(chars, i + 1)
1497}
1498
1499/// A delimiter at `j` may close a span when the previous character is not whitespace and the right
1500/// neighbour is a boundary.
1501fn can_close(chars: &[char], j: usize) -> bool {
1502    j > 0 && non_space(chars, j - 1) && boundary(chars, j + 1)
1503}
1504
1505fn parse_citation(
1506    chars: &[char],
1507    i: usize,
1508    hi: usize,
1509    autolink: bool,
1510    depth: usize,
1511) -> Option<(usize, Vec<Inline>)> {
1512    if chars.get(i + 1) != Some(&'?') {
1513        return None;
1514    }
1515    let left_boundary = i == 0 || boundary(chars, i - 1);
1516    if !left_boundary || !non_space(chars, i + 2) {
1517        return None;
1518    }
1519    let mut j = i + 2;
1520    while j < hi {
1521        if chars.get(j) == Some(&'?')
1522            && chars.get(j + 1) == Some(&'?')
1523            && j > i + 2
1524            && non_space(chars, j - 1)
1525            && boundary(chars, j + 2)
1526        {
1527            return Some((j + 2, inlines_with(chars, i + 2, j, autolink, depth + 1)));
1528        }
1529        j += 1;
1530    }
1531    None
1532}
1533
1534/// A monospaced span opens at `i` (which holds the first `{` of `{{`) when its left neighbour is a
1535/// boundary and the character after `{{` is not whitespace.
1536fn can_open_monospace(chars: &[char], i: usize) -> bool {
1537    let left_boundary = i == 0 || boundary(chars, i - 1);
1538    left_boundary && non_space(chars, i + 2)
1539}
1540
1541/// A monospaced span closes at `j` (holding the first `}` of `}}`) when the close is non-empty, its
1542/// left neighbour is not whitespace, and the character after `}}` is a boundary.
1543fn closes_monospace(chars: &[char], open: usize, j: usize) -> bool {
1544    j > open + 2 && non_space(chars, j - 1) && boundary(chars, j + 2)
1545}
1546
1547/// Finds the `}}` that closes the monospaced span opened at `i`, scanning across nested `{{ … }}`
1548/// pairs so an inner span does not end the outer one. Returns the index of the closing `}}`, or
1549/// `None` when the span is never closed.
1550fn match_monospace_close(chars: &[char], i: usize, hi: usize) -> Option<usize> {
1551    // A dense run of unbalanced `{` would otherwise make each failed nested open re-scan the whole
1552    // suffix, so the search cost grows exponentially. A step budget proportional to the span keeps
1553    // it linear per span: it is far above what any genuine span needs, so a real close is always
1554    // found, while a pathological run gives up and leaves the braces as literal text.
1555    let mut budget = hi
1556        .saturating_sub(i)
1557        .saturating_mul(8)
1558        .saturating_add(64)
1559        .min(200_000);
1560    match_monospace_close_within(chars, i, hi, &mut budget, 0)
1561}
1562
1563fn match_monospace_close_within(
1564    chars: &[char],
1565    i: usize,
1566    hi: usize,
1567    budget: &mut usize,
1568    depth: usize,
1569) -> Option<usize> {
1570    // Each nested `{{` recurses, so cap the nesting to keep deeply stacked braces off the call stack.
1571    if depth > MAX_INLINE_DEPTH {
1572        return None;
1573    }
1574    let mut j = i + 2;
1575    while j < hi {
1576        if *budget == 0 {
1577            return None;
1578        }
1579        *budget -= 1;
1580        if chars.get(j) == Some(&'{')
1581            && chars.get(j + 1) == Some(&'{')
1582            && can_open_monospace(chars, j)
1583            && let Some(nested) = match_monospace_close_within(chars, j, hi, budget, depth + 1)
1584        {
1585            j = nested + 2;
1586            continue;
1587        }
1588        if chars.get(j) == Some(&'}')
1589            && chars.get(j + 1) == Some(&'}')
1590            && closes_monospace(chars, i, j)
1591        {
1592            return Some(j);
1593        }
1594        j += 1;
1595    }
1596    None
1597}
1598
1599fn parse_brace_inline(
1600    chars: &[char],
1601    i: usize,
1602    hi: usize,
1603    autolink: bool,
1604    depth: usize,
1605) -> Option<(Inline, usize)> {
1606    if chars.get(i + 1) == Some(&'{') {
1607        // Monospaced span: `{{ … }}`. The close is the `}}` that balances this open, so a nested
1608        // `{{ … }}` inside is skipped over rather than ending the span early.
1609        if !can_open_monospace(chars, i) {
1610            return None;
1611        }
1612        let close = match_monospace_close(chars, i, hi)?;
1613        let inner = inlines_with(chars, i + 2, close, autolink, depth + 1);
1614        let text = carta_ast::to_plain_text(&inner);
1615        return Some((Inline::Code(Box::default(), text.into()), close + 2));
1616    }
1617
1618    if matches_at(chars, i, "{color:") {
1619        let value_start = i + "{color:".len();
1620        let value_end = (value_start..hi).find(|&k| chars.get(k) == Some(&'}'))?;
1621        let value = color_value(&slice_to_string(chars, value_start, value_end))?;
1622        let close = match_color_close(chars, value_end + 1, hi)?;
1623        let inner = inlines_with(chars, value_end + 1, close, autolink, depth + 1);
1624        let attr = Attr {
1625            id: carta_ast::Text::default(),
1626            classes: Vec::new(),
1627            attributes: vec![("color".into(), value.into())],
1628        };
1629        return Some((Inline::Span(Box::new(attr), inner), close + "{color}".len()));
1630    }
1631
1632    if matches_at(chars, i, "{anchor:") {
1633        let name_start = i + "{anchor:".len();
1634        let name_end = (name_start..hi).find(|&k| chars.get(k) == Some(&'}'))?;
1635        let name: String = chars
1636            .get(name_start..name_end)
1637            .unwrap_or_default()
1638            .iter()
1639            .filter(|c| !is_space(**c))
1640            .collect();
1641        let attr = Attr {
1642            id: name.into(),
1643            classes: Vec::new(),
1644            attributes: Vec::new(),
1645        };
1646        return Some((Inline::Span(Box::new(attr), Vec::new()), name_end + 1));
1647    }
1648
1649    None
1650}
1651
1652/// Validates and normalises a colour value. A recognised value is one of: a name of letters (any
1653/// Unicode letters, not only ASCII); a `#` followed by exactly six hexadecimal digits; or six
1654/// hexadecimal digits with a leading decimal digit, which is normalised by prepending `#`. Anything
1655/// else leaves the `{color:…}` markup as literal text.
1656fn color_value(value: &str) -> Option<String> {
1657    if let Some(hex) = value.strip_prefix('#') {
1658        return (hex.len() == 6 && hex.bytes().all(|b| b.is_ascii_hexdigit()))
1659            .then(|| value.to_string());
1660    }
1661    if !value.is_empty() && value.chars().all(char::is_alphabetic) {
1662        return Some(value.to_string());
1663    }
1664    if value.len() == 6
1665        && value.bytes().all(|b| b.is_ascii_hexdigit())
1666        && value.bytes().next().is_some_and(|b| b.is_ascii_digit())
1667    {
1668        return Some(format!("#{value}"));
1669    }
1670    None
1671}
1672
1673/// Finds the `{color}` that closes the inline colour span whose content begins at `from`, balancing
1674/// nested `{color:…}` opens so an inner close does not end the outer span early. Returns the index of
1675/// the closing token, or `None` when the span is never closed within `from..hi`.
1676fn match_color_close(chars: &[char], from: usize, hi: usize) -> Option<usize> {
1677    let mut depth = 1usize;
1678    let mut k = from;
1679    while k < hi {
1680        if matches_at(chars, k, "{color:") {
1681            depth += 1;
1682            k += "{color:".len();
1683        } else if matches_at(chars, k, "{color}") {
1684            depth -= 1;
1685            if depth == 0 {
1686                return Some(k);
1687            }
1688            k += "{color}".len();
1689        } else {
1690            k += 1;
1691        }
1692    }
1693    None
1694}
1695
1696fn parse_link(chars: &[char], i: usize, hi: usize, depth: usize) -> Option<(Inline, usize)> {
1697    let close = (i + 1..hi).find(|&k| chars.get(k) == Some(&']'))?;
1698    let pipes: Vec<usize> = (i + 1..close)
1699        .filter(|&k| chars.get(k) == Some(&'|'))
1700        .collect();
1701    // A third `|`-segment is allowed only when it names a smart-link style, which becomes a class on
1702    // the link; any other third segment, or a fourth, is not a link.
1703    let (label_range, target_start, target_end, smart_class) = match pipes.as_slice() {
1704        [] => (None, i + 1, close, None),
1705        [p] => (Some((i + 1, *p)), p + 1, close, None),
1706        [p1, p2] => {
1707            let third = slice_to_string(chars, p2 + 1, close);
1708            if third != "smart-link" && third != "smart-card" {
1709                return None;
1710            }
1711            (Some((i + 1, *p1)), p1 + 1, *p2, Some(third))
1712        }
1713        _ => return None,
1714    };
1715    let has_pipe = label_range.is_some();
1716    let target = slice_to_string(chars, target_start, target_end);
1717
1718    let (url, class, default_label) = classify_link_target(&target, has_pipe)?;
1719
1720    let label = match label_range {
1721        Some((ls, le)) if le > ls => inlines_with(chars, ls, le, false, depth + 1),
1722        _ => vec![Inline::Str(default_label.into())],
1723    };
1724    let mut classes: Vec<String> = class.into_iter().map(str::to_string).collect();
1725    classes.extend(smart_class);
1726    let attr = Attr {
1727        id: carta_ast::Text::default(),
1728        classes: classes.into_iter().map(Into::into).collect(),
1729        attributes: Vec::new(),
1730    };
1731    Some((
1732        Inline::Link(
1733            Box::new(attr),
1734            label,
1735            Box::new(Target {
1736                url: url.into(),
1737                title: carta_ast::Text::default(),
1738            }),
1739        ),
1740        close + 1,
1741    ))
1742}
1743
1744fn classify_link_target(
1745    target: &str,
1746    has_pipe: bool,
1747) -> Option<(String, Option<&'static str>, String)> {
1748    if target.starts_with('#') {
1749        return Some((target.to_string(), None, target.to_string()));
1750    }
1751    if target.starts_with('~') {
1752        return Some((target.to_string(), Some("user-account"), target.to_string()));
1753    }
1754    if let Some(rest) = target.strip_prefix('^') {
1755        if has_pipe {
1756            return None;
1757        }
1758        return Some((rest.to_string(), Some("attachment"), rest.to_string()));
1759    }
1760    if has_url_prefix(target, LINK_URL_PREFIXES) {
1761        let label = target
1762            .strip_prefix("mailto:")
1763            .map_or_else(|| target.to_string(), str::to_string);
1764        return Some((target.to_string(), None, label));
1765    }
1766    None
1767}
1768
1769fn parse_image(chars: &[char], i: usize, hi: usize) -> Option<(Inline, usize)> {
1770    // The character immediately after the opening `!` must not be whitespace.
1771    if !non_space(chars, i + 1) {
1772        return None;
1773    }
1774    let close = (i + 1..hi).find(|&k| chars.get(k) == Some(&'!'))?;
1775    let content = slice_to_string(chars, i + 1, close);
1776    let (src, props) = match content.split_once('|') {
1777        Some((s, p)) => (s.to_string(), Some(p.to_string())),
1778        None => (content, None),
1779    };
1780    if src.is_empty() {
1781        return None;
1782    }
1783
1784    let (attr, title) = match props {
1785        Some(props) => image_properties(&props)?,
1786        None => (Attr::default(), String::new()),
1787    };
1788    Some((
1789        Inline::Image(
1790            Box::new(attr),
1791            Vec::new(),
1792            Box::new(Target {
1793                url: src.into(),
1794                title: title.into(),
1795            }),
1796        ),
1797        close + 1,
1798    ))
1799}
1800
1801/// Parses the property list that follows the `|` in an image, returning its attributes and title, or
1802/// `None` when the list is malformed (which disqualifies the image). Leading whitespace on the whole
1803/// list disqualifies it; `thumbnail` is accepted only as the sole property and only with no
1804/// surrounding whitespace. Otherwise every property is `key=value`: a key carries no whitespace and
1805/// loses only the whitespace introduced after a separating comma, while a value is kept verbatim so
1806/// its surrounding whitespace is preserved. A `title` property is the image's title rather than an
1807/// attribute.
1808fn image_properties(props: &str) -> Option<(Attr, String)> {
1809    if props.starts_with(is_space) {
1810        return None;
1811    }
1812    if props == "thumbnail" {
1813        return Some((
1814            Attr {
1815                id: carta_ast::Text::default(),
1816                classes: vec!["thumbnail".into()],
1817                attributes: Vec::new(),
1818            },
1819            String::new(),
1820        ));
1821    }
1822    let mut attributes = Vec::new();
1823    let mut title = String::new();
1824    for (idx, raw) in props.split(',').enumerate() {
1825        let part = if idx == 0 {
1826            raw
1827        } else {
1828            raw.trim_start_matches(is_space)
1829        };
1830        let (key, value) = part.split_once('=')?;
1831        if key.is_empty() || key.contains(is_space) {
1832            return None;
1833        }
1834        if key == "title" {
1835            title = value.to_string();
1836        } else {
1837            attributes.push((key.to_string(), value.to_string()));
1838        }
1839    }
1840    Some((
1841        Attr {
1842            id: carta_ast::Text::default(),
1843            classes: Vec::new(),
1844            attributes: attributes
1845                .into_iter()
1846                .map(|(k, v)| (k.into(), v.into()))
1847                .collect(),
1848        },
1849        title,
1850    ))
1851}
1852
1853/// If a bare autolink starts at `i`, returns the index just past its URL run. A scheme matches
1854/// only in lower case. The run extends to the first whitespace or URL terminator.
1855fn match_bare_url(chars: &[char], i: usize, hi: usize) -> Option<usize> {
1856    if !BARE_URL_PREFIXES.iter().any(|p| matches_at(chars, i, p)) {
1857        return None;
1858    }
1859    let mut end = i;
1860    while end < hi
1861        && chars
1862            .get(end)
1863            .is_some_and(|&c| !is_space(c) && !is_url_terminator(c))
1864    {
1865        end += 1;
1866    }
1867    Some(end)
1868}
1869
1870/// Characters that end a bare autolink run.
1871fn is_url_terminator(c: char) -> bool {
1872    matches!(c, '|' | ']' | '}' | '<' | '>' | '"' | '[' | '{' | '`')
1873}
1874
1875/// Whether `s` begins with one of `prefixes`. A scheme matches only in lower case.
1876fn has_url_prefix(s: &str, prefixes: &[&str]) -> bool {
1877    prefixes.iter().any(|p| s.starts_with(p))
1878}
1879
1880/// Matches a symbol or emoticon token at `i`. The token is recognised wherever the character that
1881/// follows it is a boundary (end of input or a non-alphanumeric character); the character before it
1882/// is irrelevant, so a symbol may abut the end of a preceding word.
1883fn match_token_symbol(chars: &[char], i: usize, table: &[(&str, char)]) -> Option<(char, usize)> {
1884    for (token, glyph) in table {
1885        let len = token.chars().count();
1886        if matches_at(chars, i, token) && boundary(chars, i + len) {
1887            return Some((*glyph, len));
1888        }
1889    }
1890    None
1891}
1892
1893// ---------------------------------------------------------------------------
1894// Shared helpers
1895// ---------------------------------------------------------------------------
1896
1897/// The separators this format recognises: ASCII space, tab, and line feed. Code points that Unicode
1898/// classes as whitespace — a no-break space, em space, form feed, vertical tab, and the like — are
1899/// ordinary characters here, kept inside the surrounding word rather than splitting it.
1900fn is_space(c: char) -> bool {
1901    matches!(c, ' ' | '\t' | '\n')
1902}
1903
1904fn matches_at(chars: &[char], pos: usize, needle: &str) -> bool {
1905    needle
1906        .chars()
1907        .enumerate()
1908        .all(|(k, ch)| chars.get(pos + k) == Some(&ch))
1909}
1910
1911/// Whether a parameterless block macro begins at `pos`. These tokens introduce a block wherever they
1912/// occur, so they end any paragraph that runs into them.
1913fn bare_block_macro_at(chars: &[char], pos: usize) -> bool {
1914    matches_at(chars, pos, "{code}")
1915        || matches_at(chars, pos, "{noformat}")
1916        || matches_at(chars, pos, "{quote}")
1917        || matches_at(chars, pos, "{panel}")
1918}
1919
1920fn find_token(chars: &[char], from: usize, token: &str) -> Option<usize> {
1921    let token_len = token.chars().count();
1922    let upper = chars.len().saturating_sub(token_len);
1923    (from..=upper).find(|&k| matches_at(chars, k, token))
1924}
1925
1926fn slice_to_string(chars: &[char], start: usize, end: usize) -> String {
1927    chars.get(start..end).unwrap_or_default().iter().collect()
1928}
1929
1930/// Trims leading and trailing whitespace from `start..end`, returning the narrowed range.
1931fn trim(chars: &[char], start: usize, end: usize) -> (usize, usize) {
1932    let mut s = start;
1933    while s < end && chars.get(s).is_some_and(|&c| is_space(c)) {
1934        s += 1;
1935    }
1936    let mut e = end;
1937    while e > s && chars.get(e - 1).is_some_and(|&c| is_space(c)) {
1938        e -= 1;
1939    }
1940    (s, e)
1941}
1942
1943/// The end of `start..end` with trailing whitespace removed, leaving any leading whitespace in place.
1944fn trim_end(chars: &[char], start: usize, end: usize) -> usize {
1945    let mut e = end;
1946    while e > start && chars.get(e - 1).is_some_and(|&c| is_space(c)) {
1947        e -= 1;
1948    }
1949    e
1950}
1951
1952#[cfg(test)]
1953mod tests {
1954    use super::*;
1955
1956    fn blocks(input: &str) -> Vec<Block> {
1957        JiraReader
1958            .read(input, &ReaderOptions::default())
1959            .expect("jira reader should not fail")
1960            .blocks
1961    }
1962
1963    fn para(input: &str) -> Vec<Inline> {
1964        match blocks(input).into_iter().next() {
1965            Some(Block::Para(inlines)) => inlines,
1966            other => panic!("expected a paragraph, got {other:?}"),
1967        }
1968    }
1969
1970    fn str_node(text: &str) -> Inline {
1971        Inline::Str(text.to_string().into())
1972    }
1973
1974    #[test]
1975    fn empty_input_yields_no_blocks() {
1976        assert!(blocks("").is_empty());
1977    }
1978
1979    #[test]
1980    fn heading_levels() {
1981        assert_eq!(
1982            blocks("h2. Title"),
1983            vec![Block::Header(2, Box::default(), vec![str_node("Title")])]
1984        );
1985        // Level seven is not a heading.
1986        assert!(matches!(blocks("h7. Title").as_slice(), [Block::Para(_)]));
1987    }
1988
1989    #[test]
1990    fn text_effects() {
1991        assert_eq!(para("*bold*"), vec![Inline::Strong(vec![str_node("bold")])]);
1992        assert_eq!(para("_em_"), vec![Inline::Emph(vec![str_node("em")])]);
1993        assert_eq!(
1994            para("+ins+"),
1995            vec![Inline::Underline(vec![str_node("ins")])]
1996        );
1997        assert_eq!(
1998            para("^sup^"),
1999            vec![Inline::Superscript(vec![str_node("sup")])]
2000        );
2001        assert_eq!(
2002            para("~sub~"),
2003            vec![Inline::Subscript(vec![str_node("sub")])]
2004        );
2005    }
2006
2007    #[test]
2008    fn nested_effects() {
2009        assert_eq!(
2010            para("*_both_*"),
2011            vec![Inline::Strong(vec![Inline::Emph(vec![str_node("both")])])]
2012        );
2013    }
2014
2015    #[test]
2016    fn intraword_underscore_is_literal() {
2017        assert_eq!(para("snake_case_here"), vec![str_node("snake_case_here")]);
2018    }
2019
2020    #[test]
2021    fn monospace_stringifies_inner_markup() {
2022        assert_eq!(
2023            para("{{a *b* c}}"),
2024            vec![Inline::Code(Box::default(), "a b c".to_string().into())]
2025        );
2026    }
2027
2028    #[test]
2029    fn color_span() {
2030        assert_eq!(
2031            para("{color:red}x{color}"),
2032            vec![Inline::Span(
2033                Box::new(Attr {
2034                    id: carta_ast::Text::default(),
2035                    classes: Vec::new(),
2036                    attributes: vec![("color".to_string().into(), "red".to_string().into())],
2037                }),
2038                vec![str_node("x")],
2039            )]
2040        );
2041    }
2042
2043    #[test]
2044    fn color_block_wraps_in_div() {
2045        let attr = Attr {
2046            id: carta_ast::Text::default(),
2047            classes: Vec::new(),
2048            attributes: vec![("color".to_string().into(), "red".to_string().into())],
2049        };
2050        assert_eq!(
2051            blocks("{color:red}\nstuff\n{color}"),
2052            vec![Block::Div(
2053                Box::new(attr),
2054                vec![Block::Para(vec![Inline::LineBreak, str_node("stuff")])],
2055            )]
2056        );
2057        // A close that is not alone on its line keeps the colour inline.
2058        assert!(matches!(
2059            blocks("{color:red}a\nb{color}").as_slice(),
2060            [Block::Para(_)]
2061        ));
2062    }
2063
2064    #[test]
2065    fn anchor_span() {
2066        assert_eq!(
2067            para("{anchor:foo}bar"),
2068            vec![
2069                Inline::Span(
2070                    Box::new(Attr {
2071                        id: "foo".to_string().into(),
2072                        classes: Vec::new(),
2073                        attributes: Vec::new(),
2074                    }),
2075                    Vec::new(),
2076                ),
2077                str_node("bar"),
2078            ]
2079        );
2080    }
2081
2082    #[test]
2083    fn citation_renders_with_em_dash_prefix() {
2084        assert_eq!(
2085            para("??cited??"),
2086            vec![
2087                str_node("\u{2014}"),
2088                Inline::Space,
2089                Inline::Emph(vec![str_node("cited")]),
2090            ]
2091        );
2092    }
2093
2094    #[test]
2095    fn dash_folding() {
2096        assert_eq!(
2097            para("a -- b"),
2098            vec![
2099                str_node("a"),
2100                Inline::Space,
2101                str_node("\u{2013}"),
2102                Inline::Space,
2103                str_node("b"),
2104            ]
2105        );
2106        assert_eq!(
2107            para("a --- b"),
2108            vec![
2109                str_node("a"),
2110                Inline::Space,
2111                str_node("\u{2014}"),
2112                Inline::Space,
2113                str_node("b"),
2114            ]
2115        );
2116    }
2117
2118    #[test]
2119    fn strikeout_span() {
2120        assert_eq!(
2121            para("-gone-"),
2122            vec![Inline::Strikeout(vec![str_node("gone")])]
2123        );
2124    }
2125
2126    #[test]
2127    fn escape_emits_literal() {
2128        assert_eq!(
2129            para("\\*not bold\\*"),
2130            vec![str_node("*not"), Inline::Space, str_node("bold*")]
2131        );
2132    }
2133
2134    #[test]
2135    fn forced_line_break() {
2136        assert_eq!(
2137            para("one\\\\two"),
2138            vec![str_node("one"), Inline::LineBreak, str_node("two")]
2139        );
2140    }
2141
2142    #[test]
2143    fn newline_within_paragraph_is_hard_break() {
2144        assert_eq!(
2145            para("one\ntwo"),
2146            vec![str_node("one"), Inline::LineBreak, str_node("two")]
2147        );
2148    }
2149
2150    #[test]
2151    fn horizontal_rule() {
2152        assert_eq!(blocks("----"), vec![Block::HorizontalRule]);
2153    }
2154
2155    #[test]
2156    fn blockquote_prefix() {
2157        assert_eq!(
2158            blocks("bq. quoted"),
2159            vec![Block::BlockQuote(vec![Block::Para(vec![str_node(
2160                "quoted"
2161            )])])]
2162        );
2163    }
2164
2165    #[test]
2166    fn link_with_label() {
2167        assert_eq!(
2168            para("[home|http://example.com]"),
2169            vec![Inline::Link(
2170                Box::default(),
2171                vec![str_node("home")],
2172                Box::new(Target {
2173                    url: "http://example.com".to_string().into(),
2174                    title: carta_ast::Text::default(),
2175                }),
2176            )]
2177        );
2178    }
2179
2180    #[test]
2181    fn link_bare_url_label() {
2182        assert_eq!(
2183            para("[http://example.com]"),
2184            vec![Inline::Link(
2185                Box::default(),
2186                vec![str_node("http://example.com")],
2187                Box::new(Target {
2188                    url: "http://example.com".to_string().into(),
2189                    title: carta_ast::Text::default(),
2190                }),
2191            )]
2192        );
2193    }
2194
2195    #[test]
2196    fn attachment_link_carries_class() {
2197        assert_eq!(
2198            para("[^file.txt]"),
2199            vec![Inline::Link(
2200                Box::new(Attr {
2201                    id: carta_ast::Text::default(),
2202                    classes: vec!["attachment".to_string().into()],
2203                    attributes: Vec::new(),
2204                }),
2205                vec![str_node("file.txt")],
2206                Box::new(Target {
2207                    url: "file.txt".to_string().into(),
2208                    title: carta_ast::Text::default(),
2209                }),
2210            )]
2211        );
2212    }
2213
2214    #[test]
2215    fn bare_autolink() {
2216        assert_eq!(
2217            para("see http://example.com here"),
2218            vec![
2219                str_node("see"),
2220                Inline::Space,
2221                Inline::Link(
2222                    Box::default(),
2223                    vec![str_node("http://example.com")],
2224                    Box::new(Target {
2225                        url: "http://example.com".to_string().into(),
2226                        title: carta_ast::Text::default(),
2227                    }),
2228                ),
2229                Inline::Space,
2230                str_node("here"),
2231            ]
2232        );
2233    }
2234
2235    #[test]
2236    fn image_with_properties() {
2237        assert_eq!(
2238            para("!pic.png|align=right, vspace=4!"),
2239            vec![Inline::Image(
2240                Box::new(Attr {
2241                    id: carta_ast::Text::default(),
2242                    classes: Vec::new(),
2243                    attributes: vec![
2244                        ("align".to_string().into(), "right".to_string().into()),
2245                        ("vspace".to_string().into(), "4".to_string().into()),
2246                    ],
2247                }),
2248                Vec::new(),
2249                Box::new(Target {
2250                    url: "pic.png".to_string().into(),
2251                    title: carta_ast::Text::default(),
2252                }),
2253            )]
2254        );
2255    }
2256
2257    #[test]
2258    fn image_thumbnail() {
2259        assert_eq!(
2260            para("!pic.png|thumbnail!"),
2261            vec![Inline::Image(
2262                Box::new(Attr {
2263                    id: carta_ast::Text::default(),
2264                    classes: vec!["thumbnail".to_string().into()],
2265                    attributes: Vec::new(),
2266                }),
2267                Vec::new(),
2268                Box::new(Target {
2269                    url: "pic.png".to_string().into(),
2270                    title: carta_ast::Text::default(),
2271                }),
2272            )]
2273        );
2274    }
2275
2276    #[test]
2277    fn symbols_and_emoticons() {
2278        assert_eq!(para("(!)"), vec![str_node("\u{2757}")]);
2279        assert_eq!(para("(y)"), vec![str_node("\u{1F44D}")]);
2280        assert_eq!(para(":)"), vec![str_node("\u{1F642}")]);
2281        // A symbol is recognised even when it abuts a preceding word.
2282        assert_eq!(para("a(!)"), vec![str_node("a\u{2757}")]);
2283    }
2284
2285    #[test]
2286    fn bullet_list_nesting() {
2287        assert_eq!(
2288            blocks("* a\n** b"),
2289            vec![Block::BulletList(vec![vec![
2290                Block::Para(vec![str_node("a")]),
2291                Block::BulletList(vec![vec![Block::Para(vec![str_node("b")])]]),
2292            ]])]
2293        );
2294    }
2295
2296    #[test]
2297    fn ordered_list_attributes() {
2298        assert_eq!(
2299            blocks("# one\n# two"),
2300            vec![Block::OrderedList(
2301                ListAttributes {
2302                    start: 1,
2303                    style: ListNumberStyle::DefaultStyle,
2304                    delim: ListNumberDelim::DefaultDelim,
2305                },
2306                vec![
2307                    vec![Block::Para(vec![str_node("one")])],
2308                    vec![Block::Para(vec![str_node("two")])],
2309                ],
2310            )]
2311        );
2312    }
2313
2314    #[test]
2315    fn distinct_markers_split_lists() {
2316        assert_eq!(
2317            blocks("* a\n- b"),
2318            vec![
2319                Block::BulletList(vec![vec![Block::Para(vec![str_node("a")])]]),
2320                Block::BulletList(vec![vec![Block::Para(vec![str_node("b")])]]),
2321            ]
2322        );
2323    }
2324
2325    #[test]
2326    fn table_header_and_body() {
2327        let blocks = blocks("||h1||h2||\n|a|b|");
2328        let table = match blocks.first() {
2329            Some(Block::Table(table)) => table,
2330            other => panic!("expected a table, got {other:?}"),
2331        };
2332        assert_eq!(table.col_specs.len(), 2);
2333        assert_eq!(table.head.rows.len(), 1);
2334        assert_eq!(table.bodies.len(), 1);
2335        assert_eq!(table.bodies.first().map(|b| b.body.len()), Some(1));
2336    }
2337
2338    #[test]
2339    fn code_block_default_language() {
2340        assert_eq!(
2341            blocks("{code}\nint x = 1;\n{code}"),
2342            vec![Block::CodeBlock(
2343                Box::new(Attr {
2344                    id: carta_ast::Text::default(),
2345                    classes: vec!["java".to_string().into()],
2346                    attributes: Vec::new(),
2347                }),
2348                "int x = 1;\n".to_string().into(),
2349            )]
2350        );
2351    }
2352
2353    #[test]
2354    fn code_block_named_language() {
2355        assert_eq!(
2356            blocks("{code:python}\npass\n{code}"),
2357            vec![Block::CodeBlock(
2358                Box::new(Attr {
2359                    id: carta_ast::Text::default(),
2360                    classes: vec!["python".to_string().into()],
2361                    attributes: Vec::new(),
2362                }),
2363                "pass\n".to_string().into(),
2364            )]
2365        );
2366    }
2367
2368    #[test]
2369    fn noformat_has_no_language_class() {
2370        assert_eq!(
2371            blocks("{noformat}\nraw\n{noformat}"),
2372            vec![Block::CodeBlock(Box::default(), "raw\n".to_string().into())]
2373        );
2374    }
2375
2376    #[test]
2377    fn unterminated_code_block_is_dropped() {
2378        assert!(blocks("{code}\nno close").is_empty());
2379    }
2380
2381    #[test]
2382    fn quote_macro_holds_blocks() {
2383        assert_eq!(
2384            blocks("{quote}\ninside\n{quote}"),
2385            vec![Block::BlockQuote(vec![Block::Para(vec![str_node(
2386                "inside"
2387            )])])]
2388        );
2389    }
2390
2391    #[test]
2392    fn panel_with_title() {
2393        assert_eq!(
2394            blocks("{panel:title=Note}\nbody\n{panel}"),
2395            vec![Block::Div(
2396                Box::new(Attr {
2397                    id: carta_ast::Text::default(),
2398                    classes: vec!["panel".to_string().into()],
2399                    attributes: Vec::new(),
2400                }),
2401                vec![
2402                    Block::Div(
2403                        Box::new(Attr {
2404                            id: carta_ast::Text::default(),
2405                            classes: vec!["panelheader".to_string().into()],
2406                            attributes: Vec::new(),
2407                        }),
2408                        vec![Block::Plain(vec![Inline::Strong(vec![str_node("Note")])])],
2409                    ),
2410                    Block::Para(vec![str_node("body")]),
2411                ],
2412            )]
2413        );
2414    }
2415
2416    #[test]
2417    fn paragraph_separation() {
2418        assert_eq!(
2419            blocks("one\n\ntwo"),
2420            vec![
2421                Block::Para(vec![str_node("one")]),
2422                Block::Para(vec![str_node("two")]),
2423            ]
2424        );
2425    }
2426
2427    #[test]
2428    fn leading_space_opens_paragraph() {
2429        assert_eq!(para(" hello"), vec![Inline::Space, str_node("hello")]);
2430        assert_eq!(
2431            para("   indented"),
2432            vec![Inline::Space, str_node("indented")]
2433        );
2434    }
2435
2436    #[test]
2437    fn backslash_before_non_escapable_stays_literal() {
2438        assert_eq!(para("a\\1b"), vec![str_node("a\\1b")]);
2439    }
2440
2441    #[test]
2442    fn named_and_decimal_entities_decode_but_hex_does_not() {
2443        assert_eq!(
2444            para("&copy; &#169; &#x41;"),
2445            vec![
2446                str_node("\u{a9}"),
2447                Inline::Space,
2448                str_node("\u{a9}"),
2449                Inline::Space,
2450                str_node("&#x41;"),
2451            ]
2452        );
2453    }
2454
2455    #[test]
2456    fn empty_color_macro_is_literal() {
2457        assert_eq!(para("{color:}x"), vec![str_node("{color:}x")]);
2458    }
2459
2460    #[test]
2461    fn four_dash_run_folds_to_hyphen_and_em_dash() {
2462        assert_eq!(
2463            para("a ---- b"),
2464            vec![
2465                str_node("a"),
2466                Inline::Space,
2467                str_node("-\u{2014}"),
2468                Inline::Space,
2469                str_node("b"),
2470            ]
2471        );
2472    }
2473
2474    #[test]
2475    fn dash_run_at_line_end_stays_literal() {
2476        assert_eq!(
2477            para("x --"),
2478            vec![str_node("x"), Inline::Space, str_node("--")]
2479        );
2480    }
2481
2482    #[test]
2483    fn repeated_markers_nest_bullet_lists() {
2484        assert_eq!(
2485            blocks("*** x"),
2486            vec![Block::BulletList(vec![vec![Block::BulletList(vec![
2487                vec![Block::BulletList(vec![vec![Block::Para(vec![str_node(
2488                    "x"
2489                )])]]),]
2490            ])]])]
2491        );
2492    }
2493
2494    #[test]
2495    fn indented_marker_still_opens_list() {
2496        assert_eq!(
2497            blocks(" * x"),
2498            vec![Block::BulletList(vec![vec![Block::Para(vec![str_node(
2499                "x"
2500            )])]])]
2501        );
2502    }
2503
2504    #[test]
2505    fn indented_dash_run_is_paragraph_not_rule() {
2506        assert_eq!(
2507            blocks("  ----"),
2508            vec![Block::Para(vec![Inline::Space, str_node("----")])]
2509        );
2510    }
2511
2512    #[test]
2513    fn same_marker_nesting_caps_at_two() {
2514        assert_eq!(
2515            para("*a**b*"),
2516            vec![Inline::Strong(vec![str_node("a"), str_node("b")])]
2517        );
2518        assert_eq!(
2519            para("**x**"),
2520            vec![Inline::Strong(vec![Inline::Strong(vec![str_node("x")])])]
2521        );
2522    }
2523
2524    #[test]
2525    fn strikeout_nests() {
2526        assert_eq!(
2527            para("--x--"),
2528            vec![Inline::Strikeout(vec![Inline::Strikeout(vec![str_node(
2529                "x"
2530            )])])]
2531        );
2532    }
2533}