Skip to main content

brief/
convert.rs

1//! Markdown-to-Brief converter.
2//!
3//! Walks a `pulldown-cmark` event stream and emits Brief source text. Every
4//! Markdown construct that has no clean Brief equivalent is reported as a
5//! `Diag` carrying a `Hole` code; nothing is silently dropped.
6
7#[derive(Clone, Debug)]
8pub struct ConvertResult {
9    pub brief_source: String,
10    pub diagnostics: Vec<Diag>,
11}
12
13#[derive(Clone, Debug)]
14pub struct Diag {
15    pub hole: Hole,
16    pub line: usize, // 1-indexed
17    pub col: usize,  // 1-indexed
18    pub original: String,
19    pub note: String,
20}
21
22#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
23pub enum Hole {
24    SetextHeading,
25    DefinitionListMultipleDefs,
26    DoubleEmphasis, // **x**, __x__, ~~x~~
27    EscapedSigil,
28    AsteriskEmphasis, // *em* (Markdown italic)
29    AltBullet,        // * or + bullet
30    OrderedRenumber,
31    NestIndentNormalize,
32    TildeFence,
33    IndentedCodeBlock,
34    AltHorizontalRule,
35    LinkTitleDropped,
36    AutolinkRewrap,
37    RefLinkInlined,
38    GfmAlert,
39    InlineHtml,
40    HtmlBlock,
41    Frontmatter,
42    HtmlEntity,
43    HeadingAnchorSlugged,
44    BlockquoteParagraphSplit,
45    TableCellPipeEscape,
46    EmptyTableCell,
47}
48
49impl Hole {
50    /// Stable kebab-case slug used in TODO comments and stderr report lines.
51    pub fn slug(self) -> &'static str {
52        match self {
53            Hole::SetextHeading => "setext-heading",
54            Hole::DefinitionListMultipleDefs => "definition-list-multiple-defs",
55            Hole::DoubleEmphasis => "double-emphasis",
56            Hole::EscapedSigil => "escaped-sigil",
57            Hole::AsteriskEmphasis => "asterisk-emphasis",
58            Hole::AltBullet => "alt-bullet",
59            Hole::OrderedRenumber => "ordered-renumber",
60            Hole::NestIndentNormalize => "nest-indent-normalize",
61            Hole::TildeFence => "tilde-fence",
62            Hole::IndentedCodeBlock => "indented-code-block",
63            Hole::AltHorizontalRule => "alt-horizontal-rule",
64            Hole::LinkTitleDropped => "link-title-dropped",
65            Hole::AutolinkRewrap => "autolink-rewrap",
66            Hole::RefLinkInlined => "ref-link-inlined",
67            Hole::GfmAlert => "gfm-alert",
68            Hole::InlineHtml => "inline-html",
69            Hole::HtmlBlock => "html-block",
70            Hole::Frontmatter => "frontmatter",
71            Hole::HtmlEntity => "html-entity",
72            Hole::HeadingAnchorSlugged => "heading-anchor-slugged",
73            Hole::BlockquoteParagraphSplit => "blockquote-paragraph-split",
74            Hole::TableCellPipeEscape => "table-cell-pipe-escape",
75            Hole::EmptyTableCell => "empty-table-cell",
76        }
77    }
78
79    /// Short human-readable label for stderr.
80    pub fn message(self) -> &'static str {
81        match self {
82            Hole::SetextHeading => "setext heading rewritten to ATX",
83            Hole::DefinitionListMultipleDefs => {
84                "definition list term repeated for each of multiple Markdown definitions (Brief v0.3 limitation)"
85            }
86            Hole::DoubleEmphasis => "doubled emphasis marker rewritten to single",
87            Hole::EscapedSigil => {
88                "literal `*`/`_`/`+`/`~` in text escaped to keep Brief from opening an emphasis span"
89            }
90            Hole::AsteriskEmphasis => "Markdown `*italic*` rewritten to Brief `_italic_`",
91            Hole::AltBullet => "`*`/`+` bullet rewritten to `-`",
92            Hole::OrderedRenumber => "ordered list renumbered to sequential 1,2,3,...",
93            Hole::NestIndentNormalize => "list nesting indent normalized to 2 spaces",
94            Hole::TildeFence => "`~~~` fence rewritten to triple-backtick fence",
95            Hole::IndentedCodeBlock => "indented code block rewritten to fenced block",
96            Hole::AltHorizontalRule => "`***`/`___`/spaced rule rewritten to `---`",
97            Hole::LinkTitleDropped => "link/image title attribute dropped",
98            Hole::AutolinkRewrap => "autolink/bare URL wrapped in `@link[...](...)`",
99            Hole::RefLinkInlined => "reference-style link resolved inline",
100            Hole::GfmAlert => "GFM alert blockquote rewritten to `@callout`",
101            Hole::InlineHtml => "inline HTML preserved as TODO comment",
102            Hole::HtmlBlock => "HTML block preserved inside Brief block comment",
103            Hole::Frontmatter => "frontmatter dropped, replaced with TODO comment",
104            Hole::HtmlEntity => "HTML entity decoded to literal character",
105            Hole::HeadingAnchorSlugged => "heading anchor id rewritten to `[a-z0-9-]+` slug",
106            Hole::BlockquoteParagraphSplit => {
107                "Markdown in-quote paragraph break rewritten to two adjacent Brief blockquotes"
108            }
109            Hole::TableCellPipeEscape => "`|` inside table cell escaped to `\\|`",
110            Hole::EmptyTableCell => "empty Markdown table cell padded with em-dash",
111        }
112    }
113}
114
115use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
116
117/// Convert a Markdown document to Brief source.
118///
119/// `source_path` is used only for diagnostic location strings.
120pub fn convert(input: &str, source_path: &str) -> ConvertResult {
121    let mut opts = Options::empty();
122    opts.insert(Options::ENABLE_TABLES);
123    opts.insert(Options::ENABLE_STRIKETHROUGH);
124    opts.insert(Options::ENABLE_TASKLISTS);
125    opts.insert(Options::ENABLE_FOOTNOTES);
126    opts.insert(Options::ENABLE_GFM);
127    opts.insert(Options::ENABLE_MATH);
128    opts.insert(Options::ENABLE_YAML_STYLE_METADATA_BLOCKS);
129    opts.insert(Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS);
130    opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
131    opts.insert(Options::ENABLE_DEFINITION_LIST);
132
133    let line_offsets = compute_line_offsets(input);
134    let events: Vec<(Event<'_>, std::ops::Range<usize>)> =
135        Parser::new_ext(input, opts).into_offset_iter().collect();
136
137    // Pass 1: render each footnote definition's body to Brief using a fresh
138    // sub-Walker so inline formatting (emphasis, links, code, ...) inside the
139    // body is preserved. Markdown footnote definitions live elsewhere in the
140    // source; Brief's `@footnote[body]` is inline at the reference site, so
141    // we must have the rendered body ready before pass 2 visits the
142    // FootnoteReference event.
143    let mut footnote_defs: std::collections::BTreeMap<String, String> =
144        std::collections::BTreeMap::new();
145    let mut footnote_diags: Vec<Diag> = Vec::new();
146    {
147        let mut current_label: Option<String> = None;
148        let mut current_events: Vec<(Event<'_>, std::ops::Range<usize>)> = Vec::new();
149        for (event, range) in &events {
150            match event {
151                Event::Start(Tag::FootnoteDefinition(label)) => {
152                    current_label = Some(label.to_string());
153                    current_events.clear();
154                }
155                Event::End(TagEnd::FootnoteDefinition) => {
156                    if let Some(label) = current_label.take() {
157                        let mut sub = Walker::new(input, source_path, line_offsets.clone());
158                        for (e, r) in current_events.drain(..) {
159                            sub.visit(e, r);
160                        }
161                        let body = sub.out.trim().to_string();
162                        footnote_diags.append(&mut sub.diags);
163                        footnote_defs.insert(label, body);
164                    }
165                }
166                _ => {
167                    if current_label.is_some() {
168                        current_events.push((event.clone(), range.clone()));
169                    }
170                }
171            }
172        }
173    }
174
175    // Pass 2: render the main document, skipping footnote definitions —
176    // their bodies are now inlined at the FootnoteReference site.
177    let mut walker = Walker::new(input, source_path, line_offsets);
178    walker.footnote_defs = footnote_defs;
179    walker.diags.extend(footnote_diags);
180    let mut skip_until_footnote_end = false;
181    for (event, range) in events {
182        match (&event, skip_until_footnote_end) {
183            (Event::Start(Tag::FootnoteDefinition(_)), _) => {
184                skip_until_footnote_end = true;
185                continue;
186            }
187            (Event::End(TagEnd::FootnoteDefinition), _) => {
188                skip_until_footnote_end = false;
189                continue;
190            }
191            (_, true) => continue,
192            _ => {}
193        }
194        walker.visit(event, range);
195    }
196    walker.finish()
197}
198
199struct Walker<'a> {
200    src: &'a str,
201    _source_path: String,
202    line_offsets: Vec<usize>,
203    out: String,
204    diags: Vec<Diag>,
205    /// True while inside a paragraph (between Start(Paragraph) and End).
206    in_paragraph: bool,
207    /// Set while we are inside a fenced/indented code block. Suppresses
208    /// inline parsing of `Event::Text` (text inside a code block is verbatim).
209    in_code_block: bool,
210    /// Stack of active lists.
211    list_stack: Vec<ListFrame>,
212    /// Stack of pending output buffers. The top of the stack is where output
213    /// is actually written. On End(BlockQuote/Alert), pop and post-process
214    /// (prefix lines or wrap in callout) before appending to the parent.
215    out_stack: Vec<String>,
216    /// Mirrors `out_stack`; top tells us how to wrap when the buffer is popped.
217    container_stack: Vec<Container>,
218    /// Active table accumulator (we only ever have one in flight).
219    table: Option<TableState>,
220    /// Active definition-list accumulator.
221    dl: Option<DefinitionListState>,
222    /// Stack of (dest_url, optional diagnostic to emit on End) for active links/images.
223    link_stack: Vec<(String, Option<(Hole, String)>)>,
224    /// Footnote labels → body text (rendered as a flat string).
225    footnote_defs: std::collections::BTreeMap<String, String>,
226    /// Hole TODO comment lines pending insertion before the next block.
227    pending_hole_comments: Vec<String>,
228    in_metadata: bool,
229    metadata_buf: String,
230    metadata_kind: Option<pulldown_cmark::MetadataBlockKind>,
231    /// Stack of currently-open inline HTML rewrites (`<sub>`, `<sup>`, `<kbd>`).
232    /// On a matching close tag we pop and emit `]`.
233    html_replace_stack: Vec<HtmlInlineKind>,
234    /// `Some(anchor)` between Start(Heading) and End(Heading) when the
235    /// markdown source carried a `{#anchor}` attribute.
236    pending_heading_anchor: Option<String>,
237}
238
239struct TableState {
240    aligns: Vec<pulldown_cmark::Alignment>,
241    rows: Vec<Vec<String>>,
242    current_row: Vec<String>,
243    current_cell: String,
244    in_cell: bool,
245}
246
247struct DefinitionListState {
248    /// Finalized (term_brief, definition_brief) pairs.
249    items: Vec<(String, String)>,
250    /// In-progress term content (writes are redirected here while `in_term`).
251    current_term: String,
252    /// In-progress definition content.
253    current_def: String,
254    /// Number of `DefinitionListDefinition` events seen for the *current*
255    /// term. Used to detect "multiple definitions per term" (Task 8).
256    defs_for_current_term: usize,
257    in_term: bool,
258    in_def: bool,
259}
260
261struct ListFrame {
262    ordered: bool,
263    next_index: u64,
264    saw_first_item: bool,
265    /// Column (1-indexed) where the *content* of items at this level starts.
266    /// Used to detect non-2-space nesting in child items.
267    item_content_col: usize,
268}
269
270enum Container {
271    Quote,
272    Alert(#[allow(dead_code)] Hole, &'static str),
273    LinkPending,
274    ImagePending,
275    /// Paragraph buffer; on pop we flush pending HTML comments then this content.
276    Paragraph,
277    /// Buffer for an HtmlBlock; on End we inspect the buffered content and
278    /// either rewrite `<details>` to `@details(...)/@end`, hand off to a
279    /// `Details` container if the block is an opening fragment, or fall
280    /// back to the existing TODO + `/* */` form.
281    HtmlBlock,
282    /// Active `<details>` block whose body spans multiple events (because
283    /// blank lines split markdown content out of the surrounding HtmlBlock).
284    /// The `summary` is captured when we open; on the closing `</details>`
285    /// HtmlBlock we pop and emit `@details(summary: "..")\n[body]\n@end`.
286    Details {
287        summary: String,
288    },
289}
290
291#[derive(Copy, Clone, Debug, PartialEq, Eq)]
292enum HtmlInlineKind {
293    Sub,
294    Sup,
295    Kbd,
296}
297
298impl HtmlInlineKind {
299    fn shortcode(self) -> &'static str {
300        match self {
301            HtmlInlineKind::Sub => "sub",
302            HtmlInlineKind::Sup => "sup",
303            HtmlInlineKind::Kbd => "kbd",
304        }
305    }
306}
307
308impl<'a> Walker<'a> {
309    fn write(&mut self, s: &str) {
310        if let Some(d) = self.dl.as_mut() {
311            if d.in_term {
312                d.current_term.push_str(s);
313                return;
314            }
315            if d.in_def {
316                d.current_def.push_str(s);
317                return;
318            }
319        }
320        if let Some(t) = self.table.as_mut() {
321            if t.in_cell {
322                t.current_cell.push_str(s);
323                return;
324            }
325        }
326        if let Some(buf) = self.out_stack.last_mut() {
327            buf.push_str(s);
328        } else {
329            self.out.push_str(s);
330        }
331    }
332    fn write_char(&mut self, c: char) {
333        if let Some(d) = self.dl.as_mut() {
334            if d.in_term {
335                d.current_term.push(c);
336                return;
337            }
338            if d.in_def {
339                d.current_def.push(c);
340                return;
341            }
342        }
343        if let Some(t) = self.table.as_mut() {
344            if t.in_cell {
345                t.current_cell.push(c);
346                return;
347            }
348        }
349        if let Some(buf) = self.out_stack.last_mut() {
350            buf.push(c);
351        } else {
352            self.out.push(c);
353        }
354    }
355    fn current_ends_with(&self, c: char) -> bool {
356        if let Some(d) = self.dl.as_ref() {
357            if d.in_term {
358                return d.current_term.ends_with(c);
359            }
360            if d.in_def {
361                return d.current_def.ends_with(c);
362            }
363        }
364        if let Some(t) = self.table.as_ref() {
365            if t.in_cell {
366                return t.current_cell.ends_with(c);
367            }
368        }
369        if let Some(buf) = self.out_stack.last() {
370            buf.ends_with(c)
371        } else {
372            self.out.ends_with(c)
373        }
374    }
375
376    fn new(src: &'a str, source_path: &str, line_offsets: Vec<usize>) -> Self {
377        Walker {
378            src,
379            _source_path: source_path.to_string(),
380            line_offsets,
381            out: String::new(),
382            diags: Vec::new(),
383            in_paragraph: false,
384            in_code_block: false,
385            list_stack: Vec::new(),
386            out_stack: Vec::new(),
387            container_stack: Vec::new(),
388            table: None,
389            dl: None,
390            link_stack: Vec::new(),
391            footnote_defs: std::collections::BTreeMap::new(),
392            pending_hole_comments: Vec::new(),
393            in_metadata: false,
394            metadata_buf: String::new(),
395            metadata_kind: None,
396            html_replace_stack: Vec::new(),
397            pending_heading_anchor: None,
398        }
399    }
400
401    fn visit(&mut self, event: Event<'_>, range: std::ops::Range<usize>) {
402        match event {
403            Event::Start(Tag::List(start)) => {
404                self.flush_pending_hole_comments();
405                let ordered = start.is_some();
406                if let Some(n) = start {
407                    if n != 1 {
408                        self.push_diag(
409                            Hole::OrderedRenumber,
410                            range.clone(),
411                            format!("ordered list started at {}; renumbered from 1", n),
412                        );
413                    }
414                }
415                // Nested list opens inside a parent Item whose text hasn't
416                // closed with a newline yet. Ensure one is in place.
417                if !self.list_stack.is_empty() && !self.current_ends_with('\n') {
418                    self.write_char('\n');
419                }
420                self.list_stack.push(ListFrame {
421                    ordered,
422                    next_index: 1, // always renumber from 1 in Brief
423                    saw_first_item: false,
424                    item_content_col: 0,
425                });
426            }
427            Event::End(TagEnd::List(_)) => {
428                self.list_stack.pop();
429            }
430            Event::Start(Tag::Item) => {
431                let depth = self.list_stack.len().saturating_sub(1);
432                let (line, col) = self.pos(range.start);
433                let _ = line;
434                // Check for non-2-space nesting against parent frame.
435                if depth > 0 {
436                    let parent = &self.list_stack[depth - 1];
437                    if parent.item_content_col > 0 {
438                        let expected_col = parent.item_content_col;
439                        if col != expected_col && col != parent.item_content_col {
440                            let already = self
441                                .diags
442                                .iter()
443                                .any(|d| d.hole == Hole::NestIndentNormalize && d.line == line);
444                            if !already {
445                                self.push_diag(
446                                    Hole::NestIndentNormalize,
447                                    range.clone(),
448                                    format!(
449                                        "nesting at col {} normalized to {} (2-space rule)",
450                                        col, expected_col
451                                    ),
452                                );
453                            }
454                        }
455                    }
456                }
457                let frame_ordered = self
458                    .list_stack
459                    .last()
460                    .expect("Item without enclosing List")
461                    .ordered;
462                // For unordered lists, detect alt-bullet first (uses src/diags).
463                if !frame_ordered {
464                    let snippet = self.src.get(range.clone()).unwrap_or("");
465                    let first = snippet
466                        .as_bytes()
467                        .iter()
468                        .find(|&&b| b == b'-' || b == b'*' || b == b'+')
469                        .copied();
470                    if first == Some(b'*') || first == Some(b'+') {
471                        self.push_diag(
472                            Hole::AltBullet,
473                            range.clone(),
474                            format!(
475                                "`{}` bullet rewritten to `-`",
476                                first.map(|b| b as char).unwrap_or('?')
477                            ),
478                        );
479                    }
480                }
481                let pad: String = " ".repeat(depth * 2);
482                self.write(&pad);
483                let frame = self
484                    .list_stack
485                    .last_mut()
486                    .expect("Item without enclosing List");
487                let marker_len: usize;
488                let marker_str: String;
489                if frame.ordered {
490                    marker_str = format!("{}. ", frame.next_index);
491                    marker_len = marker_str.len();
492                    frame.next_index += 1;
493                } else {
494                    marker_str = "- ".to_string();
495                    marker_len = 2;
496                }
497                frame.saw_first_item = true;
498                frame.item_content_col = depth * 2 + marker_len + 1;
499                self.write(&marker_str);
500            }
501            Event::End(TagEnd::Item) => {
502                if !self.current_ends_with('\n') {
503                    self.write_char('\n');
504                }
505            }
506            Event::Start(Tag::Paragraph) => {
507                self.flush_pending_hole_comments();
508                self.in_paragraph = true;
509                // Defer paragraph content into a buffer so we can prepend any
510                // hole-comment lines collected from inline events before
511                // emitting the paragraph itself.
512                self.out_stack.push(String::new());
513                self.container_stack.push(Container::Paragraph);
514            }
515            Event::End(TagEnd::Paragraph) => {
516                self.in_paragraph = false;
517                let body = self.out_stack.pop().expect("paragraph buffer");
518                let _ = self.container_stack.pop();
519                self.write(&body);
520                if self.list_stack.is_empty() {
521                    self.write_char('\n');
522                    self.write_char('\n');
523                } else {
524                    // Inside a list item — no trailing blank line.
525                    self.write_char('\n');
526                }
527            }
528            Event::Start(Tag::Heading { level, id, .. }) => {
529                self.flush_pending_hole_comments();
530                let n = match level {
531                    pulldown_cmark::HeadingLevel::H1 => 1,
532                    pulldown_cmark::HeadingLevel::H2 => 2,
533                    pulldown_cmark::HeadingLevel::H3 => 3,
534                    pulldown_cmark::HeadingLevel::H4 => 4,
535                    pulldown_cmark::HeadingLevel::H5 => 5,
536                    pulldown_cmark::HeadingLevel::H6 => 6,
537                };
538                // Detect setext: the source span at `range` does not start with `#`.
539                let snippet = self.src.get(range.clone()).unwrap_or("");
540                let is_setext = !snippet.trim_start().starts_with('#');
541                if is_setext {
542                    self.push_diag(
543                        Hole::SetextHeading,
544                        range.clone(),
545                        format!("rewritten to `{} ...`", "#".repeat(n)),
546                    );
547                }
548                if let Some(raw) = id {
549                    let raw = raw.to_string();
550                    let safe = sluggify_anchor(&raw);
551                    if safe != raw {
552                        self.push_diag(
553                            Hole::HeadingAnchorSlugged,
554                            range.clone(),
555                            format!("anchor `{}` rewritten to `{}`", raw, safe),
556                        );
557                    }
558                    self.pending_heading_anchor = Some(safe);
559                }
560                for _ in 0..n {
561                    self.write_char('#');
562                }
563                self.write_char(' ');
564            }
565            Event::End(TagEnd::Heading(_)) => {
566                if let Some(anchor) = self.pending_heading_anchor.take() {
567                    // Heading body may have ended with a trailing space from a
568                    // soft break; trim before joining the anchor block.
569                    while self.current_ends_with(' ') {
570                        match self.out_stack.last_mut() {
571                            Some(buf) => {
572                                buf.pop();
573                            }
574                            None => {
575                                self.out.pop();
576                            }
577                        }
578                    }
579                    self.write(" {#");
580                    self.write(&anchor);
581                    self.write_char('}');
582                }
583                self.write_char('\n');
584            }
585            Event::Start(Tag::Emphasis) => {
586                // *x* (asterisk italic) is a hole; _x_ is clean.
587                let first = self.src.as_bytes().get(range.start).copied();
588                if first == Some(b'*') {
589                    self.push_diag(
590                        Hole::AsteriskEmphasis,
591                        range.clone(),
592                        "Markdown `*italic*` rewritten to Brief `_italic_`".into(),
593                    );
594                }
595                self.write_char('_');
596            }
597            Event::End(TagEnd::Emphasis) => {
598                self.write_char('_');
599            }
600            Event::Start(Tag::Strong) => {
601                self.push_diag(
602                    Hole::DoubleEmphasis,
603                    range.clone(),
604                    "doubled emphasis marker rewritten to single `*`".into(),
605                );
606                self.write_char('*');
607            }
608            Event::End(TagEnd::Strong) => {
609                self.write_char('*');
610            }
611            Event::Start(Tag::Strikethrough) => {
612                self.push_diag(
613                    Hole::DoubleEmphasis,
614                    range.clone(),
615                    "doubled strikethrough rewritten to single `~`".into(),
616                );
617                self.write_char('~');
618            }
619            Event::End(TagEnd::Strikethrough) => {
620                self.write_char('~');
621            }
622            Event::Code(s) => {
623                if s.contains('`') {
624                    self.write("``");
625                    self.write(&s);
626                    self.write("``");
627                } else {
628                    self.write_char('`');
629                    self.write(&s);
630                    self.write_char('`');
631                }
632            }
633            Event::Start(Tag::CodeBlock(kind)) => {
634                self.flush_pending_hole_comments();
635                use pulldown_cmark::CodeBlockKind;
636                self.in_code_block = true;
637                match kind {
638                    CodeBlockKind::Fenced(lang) => {
639                        // Detect tilde fence by inspecting the source span.
640                        let snippet = self.src.get(range.clone()).unwrap_or("");
641                        if snippet.trim_start().starts_with('~') {
642                            self.push_diag(
643                                Hole::TildeFence,
644                                range.clone(),
645                                "`~~~` fence rewritten to ```` ``` ```` fence".into(),
646                            );
647                        }
648                        self.write("```");
649                        if !lang.is_empty() {
650                            // Brief takes only the first whitespace-separated token.
651                            let lang_token = lang.split_whitespace().next().unwrap_or("");
652                            self.write(lang_token);
653                        }
654                        self.write_char('\n');
655                    }
656                    CodeBlockKind::Indented => {
657                        self.push_diag(
658                            Hole::IndentedCodeBlock,
659                            range.clone(),
660                            "indented code block rewritten to fenced block".into(),
661                        );
662                        self.write("```\n");
663                    }
664                }
665            }
666            Event::End(TagEnd::CodeBlock) => {
667                self.in_code_block = false;
668                if !self.current_ends_with('\n') {
669                    self.write_char('\n');
670                }
671                self.write("```\n");
672            }
673            Event::Text(t) => {
674                if self.in_metadata {
675                    self.metadata_buf.push_str(&t);
676                    return;
677                }
678                if self.in_code_block {
679                    self.write(&t);
680                } else {
681                    let escaped = escape_brief_inline_text(&t);
682                    if escaped != *t {
683                        // Any sigil that needed escaping is a hole — flag it so the
684                        // user can spot-check whether Brief renders the literal
685                        // intent.
686                        self.push_diag(
687                            Hole::EscapedSigil,
688                            range.clone(),
689                            format!(
690                                "escaped emphasis sigil(s) in literal text: {:?}",
691                                t.chars().take(40).collect::<String>()
692                            ),
693                        );
694                    }
695                    self.write(&escaped);
696                }
697            }
698            Event::TaskListMarker(checked) => {
699                // v0.4 §4.3: Brief now natively supports `[x]` / `[ ]`
700                // as a list-item modifier. The conversion is lossless, so
701                // no diagnostic is emitted.
702                self.write(if checked { "[x] " } else { "[ ] " });
703            }
704            Event::Start(Tag::BlockQuote(kind)) => {
705                self.flush_pending_hole_comments();
706                use pulldown_cmark::BlockQuoteKind;
707                let container = match kind {
708                    None => Container::Quote,
709                    Some(BlockQuoteKind::Note) => {
710                        self.push_diag(
711                            Hole::GfmAlert,
712                            range.clone(),
713                            "GFM alert mapped to `@callout(kind: note)`".into(),
714                        );
715                        Container::Alert(Hole::GfmAlert, "note")
716                    }
717                    Some(BlockQuoteKind::Tip) => {
718                        self.push_diag(
719                            Hole::GfmAlert,
720                            range.clone(),
721                            "GFM alert mapped to `@callout(kind: tip)`".into(),
722                        );
723                        Container::Alert(Hole::GfmAlert, "tip")
724                    }
725                    Some(BlockQuoteKind::Important) => {
726                        self.push_diag(
727                            Hole::GfmAlert,
728                            range.clone(),
729                            "GFM alert mapped to `@callout(kind: important)`".into(),
730                        );
731                        Container::Alert(Hole::GfmAlert, "important")
732                    }
733                    Some(BlockQuoteKind::Warning) => {
734                        self.push_diag(
735                            Hole::GfmAlert,
736                            range.clone(),
737                            "GFM alert mapped to `@callout(kind: warning)`".into(),
738                        );
739                        Container::Alert(Hole::GfmAlert, "warning")
740                    }
741                    Some(BlockQuoteKind::Caution) => {
742                        self.push_diag(
743                            Hole::GfmAlert,
744                            range.clone(),
745                            "GFM alert mapped to `@callout(kind: caution)`".into(),
746                        );
747                        Container::Alert(Hole::GfmAlert, "caution")
748                    }
749                };
750                self.out_stack.push(String::new());
751                self.container_stack.push(container);
752            }
753            Event::End(TagEnd::BlockQuote(_)) => {
754                let inner = self.out_stack.pop().expect("unbalanced quote stack");
755                let container = self
756                    .container_stack
757                    .pop()
758                    .expect("unbalanced container stack");
759                let trimmed = inner.trim_end_matches('\n');
760                match container {
761                    Container::Quote => {
762                        // Accumulate groups of consecutive non-empty lines. A blank line
763                        // ends the current group; multiple blanks collapse to one
764                        // separator. Brief's paragraph break inside a blockquote does not
765                        // exist as a feature — it's modeled by ending the `>`-block with a
766                        // blank line and starting a new one on the next non-empty line.
767                        let mut groups: Vec<Vec<&str>> = Vec::new();
768                        let mut cur: Vec<&str> = Vec::new();
769                        for line in trimmed.split('\n') {
770                            if line.is_empty() {
771                                if !cur.is_empty() {
772                                    groups.push(std::mem::take(&mut cur));
773                                }
774                                // Consecutive blanks: drop, no-op (only one separator
775                                // matters between two non-empty groups).
776                                continue;
777                            }
778                            cur.push(line);
779                        }
780                        if !cur.is_empty() {
781                            groups.push(cur);
782                        }
783                        let saw_blank = groups.len() > 1;
784                        for (gi, group) in groups.iter().enumerate() {
785                            if gi > 0 {
786                                // Empty source line — terminates the previous Brief
787                                // blockquote and starts the next one. This is the line
788                                // whose absence produced the original bug.
789                                self.write_char('\n');
790                            }
791                            for line in group {
792                                self.write("> ");
793                                self.write(line);
794                                self.write_char('\n');
795                            }
796                        }
797                        if saw_blank {
798                            // `range` is the End(TagEnd::BlockQuote(_)) event's range — the
799                            // surrounding match arm parameter. It points at the close of the
800                            // Markdown blockquote, which is the best signal we have for
801                            // where the break originated.
802                            self.push_diag(
803                                Hole::BlockquoteParagraphSplit,
804                                range.clone(),
805                                "in-quote paragraph break rewritten to adjacent blockquotes".into(),
806                            );
807                        }
808                    }
809                    Container::Alert(_, kind) => {
810                        self.write("@callout(kind: ");
811                        self.write(kind);
812                        self.write(")\n");
813                        self.write(trimmed);
814                        if !trimmed.ends_with('\n') {
815                            self.write_char('\n');
816                        }
817                        self.write("@end\n");
818                    }
819                    Container::LinkPending
820                    | Container::ImagePending
821                    | Container::Paragraph
822                    | Container::HtmlBlock
823                    | Container::Details { .. } => {
824                        // Should not arrive here — those containers are
825                        // popped by their own End arms. Defensive no-op.
826                        self.write(&inner);
827                    }
828                }
829            }
830            Event::Rule => {
831                self.flush_pending_hole_comments();
832                let snippet = self.src.get(range.clone()).unwrap_or("").trim();
833                let is_clean_dashes = snippet == "---";
834                if !is_clean_dashes {
835                    self.push_diag(
836                        Hole::AltHorizontalRule,
837                        range.clone(),
838                        format!("`{}` rewritten to `---`", snippet),
839                    );
840                }
841                self.write("---\n");
842            }
843            Event::Start(Tag::Link {
844                link_type,
845                dest_url,
846                title,
847                ..
848            }) => {
849                use pulldown_cmark::LinkType;
850                // Capture a non-empty title to emit as `title:` kwarg.
851                let link_title = if title.is_empty() {
852                    None
853                } else {
854                    Some(title.to_string())
855                };
856                let mut diag: Option<(Hole, String)> = None;
857                match link_type {
858                    LinkType::Autolink | LinkType::Email => {
859                        diag = Some((
860                            Hole::AutolinkRewrap,
861                            format!("autolink `<{}>` wrapped in `@link[..](..)`", dest_url),
862                        ));
863                    }
864                    LinkType::Reference
865                    | LinkType::ReferenceUnknown
866                    | LinkType::Collapsed
867                    | LinkType::CollapsedUnknown
868                    | LinkType::Shortcut
869                    | LinkType::ShortcutUnknown => {
870                        diag = Some((
871                            Hole::RefLinkInlined,
872                            "reference-style link resolved inline".into(),
873                        ));
874                    }
875                    LinkType::Inline => {}
876                    _ => {}
877                }
878                // Store (url, optional_title, optional_diag) via a tuple.
879                // We encode the title into the url string using a sentinel separator
880                // so we can reuse the existing link_stack without changing its type.
881                // Instead, push title into a separate parallel stack field by
882                // storing both in a combined tuple stored in out_stack label.
883                // Simplest approach: store title in a new wrapper. Use an existing
884                // field trick: push url\x00title so End can split on \x00.
885                let url_with_title = if let Some(ref t) = link_title {
886                    format!("{}\x00{}", dest_url, t)
887                } else {
888                    dest_url.to_string()
889                };
890                self.link_stack.push((url_with_title, diag));
891                self.out_stack.push(String::new());
892                self.container_stack.push(Container::LinkPending);
893            }
894            Event::End(TagEnd::Link) => {
895                let text = self.out_stack.pop().expect("link buffer");
896                let _ = self.container_stack.pop();
897                let (url_with_title, diag) = self.link_stack.pop().expect("link stack");
898                // Split url and optional title.
899                let (url, opt_title) = if let Some(idx) = url_with_title.find('\x00') {
900                    let (u, t) = url_with_title.split_at(idx);
901                    (u.to_string(), Some(t[1..].to_string()))
902                } else {
903                    (url_with_title, None)
904                };
905                if let Some((hole, note)) = diag {
906                    self.diags.push(Diag {
907                        hole,
908                        line: 0,
909                        col: 0,
910                        original: format!("[{}]({})", text, url),
911                        note,
912                    });
913                }
914                if let Some(t) = opt_title {
915                    self.write("@link(title: \"");
916                    self.write(&t);
917                    self.write("\")[");
918                    self.write(&text);
919                    self.write("](");
920                    self.write(&url);
921                    self.write(")");
922                } else {
923                    self.write("@link[");
924                    self.write(&text);
925                    self.write("](");
926                    self.write(&url);
927                    self.write(")");
928                }
929            }
930            Event::Start(Tag::Image {
931                dest_url, title, ..
932            }) => {
933                let diag = if title.is_empty() {
934                    None
935                } else {
936                    Some((
937                        Hole::LinkTitleDropped,
938                        format!("image title `{}` dropped", title),
939                    ))
940                };
941                self.link_stack.push((dest_url.to_string(), diag));
942                self.out_stack.push(String::new());
943                self.container_stack.push(Container::ImagePending);
944            }
945            Event::End(TagEnd::Image) => {
946                let alt = self.out_stack.pop().expect("image buffer");
947                let _ = self.container_stack.pop();
948                let (src, diag) = self.link_stack.pop().expect("link stack");
949                if let Some((hole, note)) = diag {
950                    self.diags.push(Diag {
951                        hole,
952                        line: 0,
953                        col: 0,
954                        original: format!("![{}]({})", alt, src),
955                        note,
956                    });
957                }
958                self.write("@image(src: \"");
959                self.write(&src);
960                self.write("\", alt: \"");
961                self.write(&alt);
962                self.write("\")[]");
963            }
964            Event::Start(Tag::Table(aligns)) => {
965                self.flush_pending_hole_comments();
966                self.table = Some(TableState {
967                    aligns,
968                    rows: Vec::new(),
969                    current_row: Vec::new(),
970                    current_cell: String::new(),
971                    in_cell: false,
972                });
973            }
974            Event::End(TagEnd::Table) => {
975                if let Some(state) = self.table.take() {
976                    self.emit_table(state);
977                }
978            }
979            Event::Start(Tag::TableHead) | Event::Start(Tag::TableRow) => {
980                if let Some(t) = self.table.as_mut() {
981                    t.current_row = Vec::new();
982                }
983            }
984            Event::End(TagEnd::TableHead) | Event::End(TagEnd::TableRow) => {
985                if let Some(t) = self.table.as_mut() {
986                    let row = std::mem::take(&mut t.current_row);
987                    t.rows.push(row);
988                }
989            }
990            Event::Start(Tag::TableCell) => {
991                if let Some(t) = self.table.as_mut() {
992                    t.current_cell = String::new();
993                    t.in_cell = true;
994                }
995            }
996            Event::End(TagEnd::TableCell) => {
997                if let Some(t) = self.table.as_mut() {
998                    let cell = std::mem::take(&mut t.current_cell);
999                    t.current_row.push(cell);
1000                    t.in_cell = false;
1001                }
1002            }
1003            Event::Start(Tag::DefinitionList) => {
1004                self.flush_pending_hole_comments();
1005                // Ensure preceding blank line, same convention as paragraphs
1006                // and lists.
1007                if !self.out.is_empty() && !self.out.ends_with("\n\n") {
1008                    if !self.out.ends_with('\n') {
1009                        self.out.push('\n');
1010                    }
1011                    self.out.push('\n');
1012                }
1013                self.dl = Some(DefinitionListState {
1014                    items: Vec::new(),
1015                    current_term: String::new(),
1016                    current_def: String::new(),
1017                    defs_for_current_term: 0,
1018                    in_term: false,
1019                    in_def: false,
1020                });
1021            }
1022            Event::End(TagEnd::DefinitionList) => {
1023                let state = self.dl.take().expect("DefinitionList without state");
1024                if !state.items.is_empty() {
1025                    self.out.push_str("@dl\n");
1026                    for (term, def) in &state.items {
1027                        self.out.push_str(term.trim_end());
1028                        self.out.push('\n');
1029                        self.out.push_str(": ");
1030                        self.out.push_str(def.trim_end());
1031                        self.out.push('\n');
1032                    }
1033                    self.out.push_str("@end\n\n");
1034                }
1035            }
1036            Event::Start(Tag::DefinitionListTitle) => {
1037                if let Some(d) = self.dl.as_mut() {
1038                    d.in_term = true;
1039                    d.current_term.clear();
1040                    d.defs_for_current_term = 0;
1041                }
1042            }
1043            Event::End(TagEnd::DefinitionListTitle) => {
1044                if let Some(d) = self.dl.as_mut() {
1045                    d.in_term = false;
1046                }
1047            }
1048            Event::Start(Tag::DefinitionListDefinition) => {
1049                if let Some(d) = self.dl.as_mut() {
1050                    d.in_def = true;
1051                    d.current_def.clear();
1052                    d.defs_for_current_term += 1;
1053                }
1054                let crossed_to_two = self
1055                    .dl
1056                    .as_ref()
1057                    .map(|d| d.defs_for_current_term == 2)
1058                    .unwrap_or(false);
1059                if crossed_to_two {
1060                    self.push_diag(
1061                        Hole::DefinitionListMultipleDefs,
1062                        range.clone(),
1063                        "definition list term repeated for each definition (Brief v0.3 limitation)"
1064                            .into(),
1065                    );
1066                }
1067            }
1068            Event::End(TagEnd::DefinitionListDefinition) => {
1069                if let Some(d) = self.dl.as_mut() {
1070                    d.in_def = false;
1071                    let term = d.current_term.clone();
1072                    let def = std::mem::take(&mut d.current_def);
1073                    d.items.push((term, def));
1074                }
1075            }
1076            Event::Start(Tag::MetadataBlock(kind)) => {
1077                self.in_metadata = true;
1078                self.metadata_buf.clear();
1079                self.metadata_kind = Some(kind);
1080            }
1081            Event::End(TagEnd::MetadataBlock(_)) => {
1082                use pulldown_cmark::MetadataBlockKind;
1083                self.in_metadata = false;
1084                let kind = self.metadata_kind.take();
1085                let body = std::mem::take(&mut self.metadata_buf);
1086                match kind {
1087                    Some(MetadataBlockKind::PlusesStyle) => {
1088                        // TOML markdown frontmatter has a clean Brief equivalent;
1089                        // emit it directly with no hole diagnostic.
1090                        self.write("+++\n");
1091                        self.write(&body);
1092                        if !body.ends_with('\n') {
1093                            self.write_char('\n');
1094                        }
1095                        self.write("+++\n\n");
1096                    }
1097                    _ => {
1098                        self.push_diag(
1099                            Hole::Frontmatter,
1100                            range.clone(),
1101                            "frontmatter dropped, replaced with TODO comment".into(),
1102                        );
1103                        // Flush the pending hole comment (just pushed by push_diag)
1104                        // so it appears immediately at the frontmatter site.
1105                        self.flush_pending_hole_comments();
1106                    }
1107                }
1108            }
1109            Event::Start(Tag::HtmlBlock) => {
1110                // Buffer the block's content; on End we either rewrite a
1111                // recognized `<details>` shape or fall back to the
1112                // existing TODO + `/* */` comment form.
1113                self.out_stack.push(String::new());
1114                self.container_stack.push(Container::HtmlBlock);
1115            }
1116            Event::End(TagEnd::HtmlBlock) => {
1117                let buf = self.out_stack.pop().expect("html block buffer");
1118                let _ = self.container_stack.pop();
1119                match classify_details_block(&buf) {
1120                    DetailsShape::Closed { summary, body } => {
1121                        self.write("@details(summary: \"");
1122                        self.write(&escape_brief_string(&summary));
1123                        self.write("\")\n");
1124                        let body = body.trim_matches('\n');
1125                        if !body.is_empty() {
1126                            self.write(body);
1127                            self.write_char('\n');
1128                        }
1129                        self.write("@end\n\n");
1130                    }
1131                    DetailsShape::Open { summary } => {
1132                        // The `</details>` will arrive in a later HtmlBlock.
1133                        // Stage a Details container that captures any
1134                        // blocks rendered in between.
1135                        self.out_stack.push(String::new());
1136                        self.container_stack.push(Container::Details { summary });
1137                    }
1138                    DetailsShape::Close => {
1139                        // Pop the matching Details container, if any.
1140                        let mut closed = false;
1141                        if matches!(self.container_stack.last(), Some(Container::Details { .. })) {
1142                            let body = self.out_stack.pop().expect("details body buffer");
1143                            let container = self.container_stack.pop().expect("details container");
1144                            if let Container::Details { summary } = container {
1145                                self.write("@details(summary: \"");
1146                                self.write(&escape_brief_string(&summary));
1147                                self.write("\")\n");
1148                                let body = body.trim_matches('\n');
1149                                if !body.is_empty() {
1150                                    self.write(body);
1151                                    self.write_char('\n');
1152                                }
1153                                self.write("@end\n\n");
1154                                closed = true;
1155                            }
1156                        }
1157                        if !closed {
1158                            // Stray `</details>` with no opener on the
1159                            // stack — fall back to the comment form.
1160                            self.fallback_html_block(range.clone(), &buf);
1161                        }
1162                    }
1163                    DetailsShape::Unknown => {
1164                        self.fallback_html_block(range.clone(), &buf);
1165                    }
1166                }
1167            }
1168            Event::Html(s) => {
1169                // Block-level HTML content (between Start/End of HtmlBlock).
1170                self.write(&s);
1171            }
1172            Event::InlineHtml(s) => {
1173                let trimmed = s.trim();
1174                if let Some(kind) = classify_inline_html_open(trimmed) {
1175                    self.html_replace_stack.push(kind);
1176                    self.write_char('@');
1177                    self.write(kind.shortcode());
1178                    self.write_char('[');
1179                    return;
1180                }
1181                if let Some(kind) = classify_inline_html_close(trimmed) {
1182                    if self.html_replace_stack.last().copied() == Some(kind) {
1183                        self.html_replace_stack.pop();
1184                        self.write_char(']');
1185                        return;
1186                    }
1187                    // Mismatched close — fall through to TODO so we don't
1188                    // emit a stray `]` that would corrupt the output.
1189                }
1190                if is_inline_br(trimmed) {
1191                    // Brief hard break: backslash at end of line.
1192                    self.write_char('\\');
1193                    self.write_char('\n');
1194                    return;
1195                }
1196                let snippet = s.to_string();
1197                self.push_diag(
1198                    Hole::InlineHtml,
1199                    range.clone(),
1200                    format!("`{}` preserved as TODO comment", snippet.trim()),
1201                );
1202                // push_diag already queued a pending_hole_comments entry;
1203                // it will be flushed before the next block-level event.
1204            }
1205            Event::FootnoteReference(label) => {
1206                let body = self
1207                    .footnote_defs
1208                    .get(label.as_ref())
1209                    .cloned()
1210                    .unwrap_or_else(|| format!("??: {}", label));
1211                self.write("@footnote[");
1212                self.write(&body);
1213                self.write("]");
1214            }
1215            Event::SoftBreak => {
1216                self.write_char('\n');
1217            }
1218            Event::HardBreak => {
1219                self.write_char('\\');
1220                self.write_char('\n');
1221            }
1222            Event::InlineMath(s) => {
1223                self.write("@math[");
1224                self.write(&s);
1225                self.write_char(']');
1226            }
1227            Event::DisplayMath(s) => {
1228                let body = s.trim_matches('\n');
1229                self.write("@math\n");
1230                self.write(body);
1231                self.write_char('\n');
1232                self.write("@end");
1233            }
1234            _ => {
1235                // Other events handled in subsequent tasks.
1236            }
1237        }
1238    }
1239
1240    fn finish(mut self) -> ConvertResult {
1241        // Drain any TODO comments queued by the last block — there's no
1242        // subsequent Start event to flush them.
1243        self.flush_pending_hole_comments();
1244        // Trim trailing blank lines down to a single newline.
1245        while self.out.ends_with("\n\n") {
1246            self.out.pop();
1247        }
1248        if !self.out.is_empty() && !self.out.ends_with('\n') {
1249            self.out.push('\n');
1250        }
1251        ConvertResult {
1252            brief_source: self.out,
1253            diagnostics: self.diags,
1254        }
1255    }
1256
1257    /// Convert a byte offset into 1-indexed (line, column).
1258    #[allow(dead_code)] // used by later tasks
1259    fn pos(&self, offset: usize) -> (usize, usize) {
1260        match self.line_offsets.binary_search(&offset) {
1261            Ok(line) => (line + 1, 1),
1262            Err(line) => {
1263                let line_start = self.line_offsets[line - 1];
1264                (line, offset - line_start + 1)
1265            }
1266        }
1267    }
1268
1269    #[allow(dead_code)] // used by later tasks
1270    fn push_diag(&mut self, hole: Hole, range: std::ops::Range<usize>, note: String) {
1271        let (line, col) = self.pos(range.start);
1272        let original = self
1273            .src
1274            .get(range.clone())
1275            .unwrap_or("")
1276            .chars()
1277            .take(80)
1278            .collect::<String>();
1279        self.diags.push(Diag {
1280            hole,
1281            line,
1282            col,
1283            original,
1284            note: note.clone(),
1285        });
1286        // Surface every hole as a `// TODO[B-hole:slug]:` line so a reviewer
1287        // can grep the converted corpus. Flushed before the next block.
1288        self.pending_hole_comments
1289            .push(format!("// TODO[B-hole:{}]: {}", hole.slug(), note));
1290    }
1291
1292    fn flush_pending_hole_comments(&mut self) {
1293        for c in std::mem::take(&mut self.pending_hole_comments) {
1294            // Write to top buffer (or `out`).
1295            if let Some(buf) = self.out_stack.last_mut() {
1296                buf.push_str(&c);
1297                buf.push('\n');
1298            } else {
1299                self.out.push_str(&c);
1300                self.out.push('\n');
1301            }
1302        }
1303    }
1304
1305    /// Emit a buffered HTML block as the existing TODO + `/* */` comment
1306    /// fallback. Used when the block isn't a recognizable `<details>`.
1307    fn fallback_html_block(&mut self, range: std::ops::Range<usize>, buf: &str) {
1308        self.push_diag(
1309            Hole::HtmlBlock,
1310            range,
1311            "HTML block preserved inside Brief block comment".into(),
1312        );
1313        // Flush the pending hole comment (just pushed by push_diag) so it
1314        // appears immediately before the block comment, not deferred.
1315        self.flush_pending_hole_comments();
1316        self.write("/*\n");
1317        // Brief block comments don't nest; sanitize any embedded `*/` so
1318        // the comment terminates only where we want it to.
1319        let sanitized = buf.replace("*/", "* /");
1320        self.write(&sanitized);
1321        if !sanitized.ends_with('\n') {
1322            self.write_char('\n');
1323        }
1324        self.write("*/\n");
1325    }
1326
1327    fn emit_table(&mut self, state: TableState) {
1328        use pulldown_cmark::Alignment;
1329        let needs_align = state.aligns.iter().any(|a| !matches!(a, Alignment::None));
1330        if needs_align {
1331            self.write("@t(align: [");
1332            let parts: Vec<&str> = state
1333                .aligns
1334                .iter()
1335                .map(|a| match a {
1336                    Alignment::None | Alignment::Left => "left",
1337                    Alignment::Center => "center",
1338                    Alignment::Right => "right",
1339                })
1340                .collect();
1341            self.write(&parts.join(", "));
1342            self.write("])\n");
1343        } else {
1344            self.write("@t\n");
1345        }
1346        let mut saw_pipe_escape = false;
1347        for row in &state.rows {
1348            self.write("|");
1349            for (i, cell) in row.iter().enumerate() {
1350                self.write(" ");
1351                let trimmed = cell.trim();
1352                let (escaped, escaped_pipe) = if trimmed.is_empty() {
1353                    self.diags.push(Diag {
1354                        hole: Hole::EmptyTableCell,
1355                        line: 0,
1356                        col: 0,
1357                        original: String::new(),
1358                        note: "empty Markdown table cell padded with `—`".into(),
1359                    });
1360                    ("—".to_string(), false)
1361                } else {
1362                    escape_table_cell(trimmed)
1363                };
1364                if escaped_pipe {
1365                    saw_pipe_escape = true;
1366                }
1367                self.write(&escaped);
1368                if i + 1 < row.len() {
1369                    self.write(" |");
1370                }
1371            }
1372            self.write("\n");
1373        }
1374        if saw_pipe_escape {
1375            self.diags.push(Diag {
1376                hole: Hole::TableCellPipeEscape,
1377                line: 0,
1378                col: 0,
1379                original: String::new(),
1380                note: "`|` inside table cell escaped to `\\|`".into(),
1381            });
1382        }
1383    }
1384}
1385
1386/// Coerce an arbitrary heading id string into Brief's `[a-z0-9-]+` form.
1387/// Lowercases ASCII, replaces every other char with `-`, collapses runs,
1388/// strips leading/trailing `-`. Falls back to `"section"` when the input
1389/// has no usable characters.
1390fn sluggify_anchor(raw: &str) -> String {
1391    let mut out = String::with_capacity(raw.len());
1392    let mut last_dash = true;
1393    for ch in raw.chars() {
1394        let lo = ch.to_ascii_lowercase();
1395        if lo.is_ascii_lowercase() || lo.is_ascii_digit() {
1396            out.push(lo);
1397            last_dash = false;
1398        } else if !last_dash {
1399            out.push('-');
1400            last_dash = true;
1401        }
1402    }
1403    while out.ends_with('-') {
1404        out.pop();
1405    }
1406    if out.is_empty() {
1407        return "section".to_string();
1408    }
1409    out
1410}
1411
1412/// Classify an `Event::InlineHtml` payload as an opening tag we know how
1413/// to rewrite. Returns `None` for everything else (closing tags,
1414/// self-closing tags, unrecognized fragments) — the caller handles those.
1415fn classify_inline_html_open(s: &str) -> Option<HtmlInlineKind> {
1416    let t = s.trim().to_ascii_lowercase();
1417    match t.as_str() {
1418        "<sub>" => Some(HtmlInlineKind::Sub),
1419        "<sup>" => Some(HtmlInlineKind::Sup),
1420        "<kbd>" => Some(HtmlInlineKind::Kbd),
1421        _ => None,
1422    }
1423}
1424
1425fn classify_inline_html_close(s: &str) -> Option<HtmlInlineKind> {
1426    let t = s.trim().to_ascii_lowercase();
1427    match t.as_str() {
1428        "</sub>" => Some(HtmlInlineKind::Sub),
1429        "</sup>" => Some(HtmlInlineKind::Sup),
1430        "</kbd>" => Some(HtmlInlineKind::Kbd),
1431        _ => None,
1432    }
1433}
1434
1435fn is_inline_br(s: &str) -> bool {
1436    let t = s.trim().to_ascii_lowercase();
1437    matches!(t.as_str(), "<br>" | "<br/>" | "<br />")
1438}
1439
1440/// Escape unescaped `|` characters in cell content so Brief's row
1441/// splitter sees one cell. Returns `(escaped_string, did_any_escape)`.
1442///
1443/// We deliberately do NOT track whether we are inside a backtick code
1444/// span here. After Phase B (parser backtick-aware split), `|` inside a
1445/// backtick span is already opaque to the row splitter, but `\|` outside
1446/// a code span is still the canonical Brief escape for a literal `|`.
1447/// Always escaping is simpler and never wrong.
1448fn escape_table_cell(s: &str) -> (String, bool) {
1449    let mut out = String::with_capacity(s.len());
1450    let mut escaped = false;
1451    let mut prev_backslash = false;
1452    for ch in s.chars() {
1453        if ch == '|' && !prev_backslash {
1454            out.push('\\');
1455            out.push('|');
1456            escaped = true;
1457            prev_backslash = false;
1458            continue;
1459        }
1460        prev_backslash = ch == '\\' && !prev_backslash;
1461        out.push(ch);
1462    }
1463    (out, escaped)
1464}
1465
1466/// Escape a summary string for use inside `@details(summary: "...")`.
1467fn escape_brief_string(s: &str) -> String {
1468    let mut out = String::with_capacity(s.len());
1469    for ch in s.chars() {
1470        match ch {
1471            '\\' => out.push_str("\\\\"),
1472            '"' => out.push_str("\\\""),
1473            '\n' | '\r' => out.push(' '),
1474            _ => out.push(ch),
1475        }
1476    }
1477    out
1478}
1479
1480/// Try to recognize a `<details>` HTML fragment.
1481///
1482/// Returns `DetailsShape::Closed` when the buffer fully wraps a
1483/// `<details>...</details>` block; `DetailsShape::Open` when the buffer
1484/// is the *opening* fragment of a multi-event `<details>` block (the
1485/// closing `</details>` will arrive in a later HtmlBlock); `Unknown`
1486/// otherwise.
1487fn classify_details_block(buf: &str) -> DetailsShape {
1488    let trimmed = buf.trim();
1489    let lower = trimmed.to_ascii_lowercase();
1490    let starts_open = lower.starts_with("<details>")
1491        || lower.starts_with("<details ")
1492        || lower.starts_with("<details\n");
1493    let only_close = lower == "</details>" || lower.starts_with("</details>");
1494    if only_close && !starts_open {
1495        // Bare close fragment; caller will pop a Details container.
1496        return DetailsShape::Close;
1497    }
1498    if !starts_open {
1499        return DetailsShape::Unknown;
1500    }
1501    let after_open = match find_after_open_tag(trimmed, "details") {
1502        Some(idx) => idx,
1503        None => return DetailsShape::Unknown,
1504    };
1505    let inner = &trimmed[after_open..];
1506    // Strip a single optional leading newline.
1507    let inner = inner.strip_prefix('\n').unwrap_or(inner);
1508    let summary = extract_summary(inner);
1509    let body_start = match summary.as_ref() {
1510        Some((_, end)) => *end,
1511        None => 0,
1512    };
1513    let after_summary = &inner[body_start..];
1514    // Look for matching `</details>` at the end of the buffer (case-insensitive).
1515    let lower_after = after_summary.to_ascii_lowercase();
1516    if let Some(close_idx) = lower_after.rfind("</details>") {
1517        let body = after_summary[..close_idx].trim_matches('\n').to_string();
1518        let summary_text = summary.map(|((s, _), _)| s).unwrap_or_default();
1519        DetailsShape::Closed {
1520            summary: summary_text,
1521            body,
1522        }
1523    } else {
1524        // Opener without close — defer body to a Details container.
1525        let summary_text = summary.map(|((s, _), _)| s).unwrap_or_default();
1526        DetailsShape::Open {
1527            summary: summary_text,
1528        }
1529    }
1530}
1531
1532enum DetailsShape {
1533    Closed { summary: String, body: String },
1534    Open { summary: String },
1535    Close,
1536    Unknown,
1537}
1538
1539/// Return the byte index immediately after the opening tag for `name`
1540/// (case-insensitive), e.g. the index after `<details>` or `<details ...>`.
1541fn find_after_open_tag(s: &str, name: &str) -> Option<usize> {
1542    let lower = s.to_ascii_lowercase();
1543    let needle = format!("<{}", name);
1544    let start = lower.find(&needle)?;
1545    let rest = &s[start + needle.len()..];
1546    let close = rest.find('>')?;
1547    Some(start + needle.len() + close + 1)
1548}
1549
1550/// Pull a `<summary>...</summary>` out of `s`. Returns `((text, byte_len),
1551/// end_byte_offset_of_close_tag)` when found.
1552#[allow(clippy::type_complexity)]
1553fn extract_summary(s: &str) -> Option<((String, usize), usize)> {
1554    let lower = s.to_ascii_lowercase();
1555    let open_idx = lower.find("<summary")?;
1556    let after_open_attrs = &s[open_idx..];
1557    let gt = after_open_attrs.find('>')?;
1558    let body_start = open_idx + gt + 1;
1559    let after_body = &s[body_start..];
1560    let lower_after = after_body.to_ascii_lowercase();
1561    let close_rel = lower_after.find("</summary>")?;
1562    let summary_text = strip_tags(&after_body[..close_rel]).trim().to_string();
1563    let close_end = body_start + close_rel + "</summary>".len();
1564    Some(((summary_text, close_end - open_idx), close_end))
1565}
1566
1567/// Rough HTML-to-text: drops anything that looks like a tag.
1568fn strip_tags(s: &str) -> String {
1569    let mut out = String::with_capacity(s.len());
1570    let mut in_tag = false;
1571    for ch in s.chars() {
1572        match ch {
1573            '<' => in_tag = true,
1574            '>' => in_tag = false,
1575            _ if !in_tag => out.push(ch),
1576            _ => {}
1577        }
1578    }
1579    out
1580}
1581
1582/// Walk `s` and prepend `\` before every `*`/`_`/`+`/`~` for which
1583/// Brief's `is_open_marker_at` predicate fires — those would otherwise
1584/// open an emphasis span in the converted Brief output. Markdown's
1585/// parser already paired any *real* emphasis as `Start/End(Emphasis)`,
1586/// so any sigil reaching us inside `Event::Text` was a literal in the
1587/// source.
1588///
1589/// Pulldown-cmark may split text around strikethrough/emphasis candidates
1590/// (e.g. `~10` becomes two events: `"~"` and `"10..."`). A sigil at the
1591/// *end* of the text string has no visible next-char in this fragment, but
1592/// when the Brief output is assembled the next event's first char follows
1593/// immediately — making the sigil a valid opener in Brief. We therefore
1594/// also escape sigils that have valid left-context and sit at the end of
1595/// the string (conservatively treating the boundary as "next char unknown").
1596fn escape_brief_inline_text(s: &str) -> String {
1597    use crate::inline::{is_inline_sigil, is_open_marker_at, is_punct};
1598    let bytes = s.as_bytes();
1599    let mut out = String::with_capacity(s.len());
1600    let mut i = 0usize;
1601    while i < bytes.len() {
1602        let b = bytes[i];
1603        if matches!(b, b'*' | b'_' | b'+' | b'~') {
1604            // Check the shared predicate first (handles mid-string case).
1605            let should_escape = is_open_marker_at(bytes, i) || {
1606                // Also escape a sigil at the end of the text fragment if it
1607                // has valid left-context: the next text event may start with
1608                // a non-space char, making this a valid emphasis opener in
1609                // the concatenated Brief output.
1610                let is_last = i + 1 == bytes.len();
1611                if is_last {
1612                    let prev = if i == 0 { None } else { Some(bytes[i - 1]) };
1613                    let prev_ok = match prev {
1614                        None => true,
1615                        Some(b' ') => true,
1616                        Some(pb) if is_inline_sigil(pb) => true,
1617                        Some(pb) if is_punct(pb) => true,
1618                        _ => false,
1619                    };
1620                    // prev must not be the same marker (doubled-marker rule)
1621                    let not_doubled = prev != Some(b);
1622                    prev_ok && not_doubled
1623                } else {
1624                    false
1625                }
1626            };
1627            if should_escape {
1628                out.push('\\');
1629                out.push(b as char);
1630                let w = s[i..].chars().next().map_or(1, |c| c.len_utf8());
1631                i += w;
1632                continue;
1633            }
1634        }
1635        let w = s[i..].chars().next().map_or(1, |c| c.len_utf8());
1636        out.push_str(&s[i..i + w]);
1637        i += w;
1638    }
1639    out
1640}
1641
1642fn compute_line_offsets(s: &str) -> Vec<usize> {
1643    let mut v = vec![0usize];
1644    for (i, b) in s.bytes().enumerate() {
1645        if b == b'\n' {
1646            v.push(i + 1);
1647        }
1648    }
1649    v
1650}
1651
1652#[cfg(test)]
1653mod tests {
1654    use super::*;
1655
1656    #[test]
1657    fn converts_toml_frontmatter_cleanly() {
1658        let md = "+++\ntitle = \"hi\"\nn = 3\n+++\n\n# Doc\nbody\n";
1659        let res = convert(md, "in.md");
1660        assert!(
1661            !res.diagnostics.iter().any(|d| d.hole == Hole::Frontmatter),
1662            "{:?}",
1663            res.diagnostics
1664        );
1665        assert!(
1666            res.brief_source.starts_with("+++\n"),
1667            "got: {}",
1668            res.brief_source
1669        );
1670        assert!(res.brief_source.contains("title = \"hi\""));
1671        assert!(res.brief_source.contains("n = 3"));
1672        assert!(res.brief_source.contains("\n+++\n"));
1673        assert!(res.brief_source.contains("# Doc"));
1674    }
1675
1676    #[test]
1677    fn converts_yaml_frontmatter_as_hole() {
1678        let md = "---\ntitle: hi\n---\n\n# Doc\n";
1679        let res = convert(md, "in.md");
1680        assert!(
1681            res.diagnostics.iter().any(|d| d.hole == Hole::Frontmatter),
1682            "{:?}",
1683            res.diagnostics
1684        );
1685        assert!(
1686            res.brief_source.contains("// TODO[B-hole:frontmatter]"),
1687            "{}",
1688            res.brief_source
1689        );
1690    }
1691
1692    #[test]
1693    fn converted_toml_frontmatter_round_trips_through_compiler() {
1694        let md = "+++\ntitle = \"hi\"\n+++\n\n# Doc\n";
1695        let res = convert(md, "in.md");
1696        let src = crate::span::SourceMap::new("in.brf", res.brief_source.clone());
1697        let toks = crate::lexer::lex(&src).expect("lex ok");
1698        let (doc, diags) = crate::parser::parse(toks, &src);
1699        assert!(diags.is_empty(), "{:?}\n---\n{}", diags, res.brief_source);
1700        assert!(doc.metadata.is_some());
1701    }
1702
1703    #[test]
1704    fn tilde_before_digit_inside_emphasis_round_trips() {
1705        // Production report pattern #1: `*…patch exceeds ~10 ops, any value
1706        // exceeds ~50 lines…*` opens a strikethrough on `~10` that never
1707        // closes, producing B0204 + B0207 in Brief.
1708        let md = "*patch exceeds ~10 ops, any value exceeds ~50 lines*\n";
1709        let res = convert(md, "in.md");
1710        let src = crate::span::SourceMap::new("in.brf", res.brief_source.clone());
1711        let toks = crate::lexer::lex(&src).expect("lex ok");
1712        let (_doc, diags) = crate::parser::parse(toks, &src);
1713        let errors: Vec<_> = diags
1714            .iter()
1715            .filter(|d| d.severity == crate::diag::Severity::Error)
1716            .collect();
1717        assert!(
1718            errors.is_empty(),
1719            "converter output failed to compile: {:?}\nbrief: {}",
1720            errors,
1721            res.brief_source
1722        );
1723    }
1724
1725    #[test]
1726    fn asterisk_in_heading_text_round_trips() {
1727        // Production report pattern #2: `### 8.6 Set an attribute (href, aria-*, …)`.
1728        let md = "### 8.6 Set an attribute (href, aria-*, …)\n";
1729        let res = convert(md, "in.md");
1730        let src = crate::span::SourceMap::new("in.brf", res.brief_source.clone());
1731        let toks = crate::lexer::lex(&src).expect("lex ok");
1732        let (_doc, diags) = crate::parser::parse(toks, &src);
1733        let errors: Vec<_> = diags
1734            .iter()
1735            .filter(|d| d.severity == crate::diag::Severity::Error)
1736            .collect();
1737        assert!(
1738            errors.is_empty(),
1739            "{:?}\nbrief: {}",
1740            errors,
1741            res.brief_source
1742        );
1743        assert!(
1744            res.brief_source.contains(r"aria-\*"),
1745            "brief: {}",
1746            res.brief_source
1747        );
1748    }
1749
1750    #[test]
1751    fn empty_blockquote_line_splits_into_adjacent_quotes() {
1752        let md = "> first paragraph\n> still first\n>\n> second paragraph\n";
1753        let res = convert(md, "in.md");
1754        let src = crate::span::SourceMap::new("in.brf", res.brief_source.clone());
1755        let toks = crate::lexer::lex(&src).expect("lex ok");
1756        let (doc, diags) = crate::parser::parse(toks, &src);
1757        let errors: Vec<_> = diags
1758            .iter()
1759            .filter(|d| d.severity == crate::diag::Severity::Error)
1760            .collect();
1761        assert!(
1762            errors.is_empty(),
1763            "{:?}\nbrief: {}",
1764            errors,
1765            res.brief_source
1766        );
1767        assert!(
1768            res.diagnostics
1769                .iter()
1770                .any(|d| d.hole == Hole::BlockquoteParagraphSplit),
1771            "{:?}",
1772            res.diagnostics
1773        );
1774        let blockquote_count = doc
1775            .blocks
1776            .iter()
1777            .filter(|b| matches!(b, crate::ast::Block::Blockquote { .. }))
1778            .count();
1779        assert_eq!(
1780            blockquote_count, 2,
1781            "expected two adjacent blockquotes; got blocks {:?}\nbrief: {}",
1782            doc.blocks, res.brief_source
1783        );
1784    }
1785
1786    #[test]
1787    fn pipe_in_cell_is_escaped() {
1788        // Production report pattern #4.
1789        let md = "| Kind | Example |\n| --- | --- |\n| separator | \"semantic\"\\|\"utility\" |\n";
1790        let res = convert(md, "in.md");
1791        let src = crate::span::SourceMap::new("in.brf", res.brief_source.clone());
1792        let toks = crate::lexer::lex(&src).expect("lex ok");
1793        let (_doc, diags) = crate::parser::parse(toks, &src);
1794        let errors: Vec<_> = diags
1795            .iter()
1796            .filter(|d| d.severity == crate::diag::Severity::Error)
1797            .collect();
1798        assert!(
1799            errors.is_empty(),
1800            "{:?}\nbrief: {}",
1801            errors,
1802            res.brief_source
1803        );
1804        assert!(
1805            res.brief_source.contains(r#""semantic"\|"utility""#),
1806            "brief: {}",
1807            res.brief_source
1808        );
1809    }
1810
1811    #[test]
1812    fn empty_trailing_cell_is_padded() {
1813        // Production report pattern #5.
1814        let md = "| Value | Type | Note |\n| --- | --- | --- |\n| true/false | Boolean |  |\n";
1815        let res = convert(md, "in.md");
1816        let src = crate::span::SourceMap::new("in.brf", res.brief_source.clone());
1817        let toks = crate::lexer::lex(&src).expect("lex ok");
1818        let (_doc, diags) = crate::parser::parse(toks, &src);
1819        let errors: Vec<_> = diags
1820            .iter()
1821            .filter(|d| d.severity == crate::diag::Severity::Error)
1822            .collect();
1823        assert!(
1824            errors.is_empty(),
1825            "{:?}\nbrief: {}",
1826            errors,
1827            res.brief_source
1828        );
1829        assert!(
1830            res.brief_source.contains("—"),
1831            "brief: {}",
1832            res.brief_source
1833        );
1834        assert!(
1835            res.diagnostics
1836                .iter()
1837                .any(|d| d.hole == Hole::EmptyTableCell),
1838            "{:?}",
1839            res.diagnostics
1840        );
1841    }
1842
1843    #[test]
1844    fn double_emphasis_emits_inline_todo_comment() {
1845        let md = "**bold text** in a paragraph\n";
1846        let res = convert(md, "in.md");
1847        assert!(
1848            res.brief_source.contains("// TODO[B-hole:double-emphasis]"),
1849            "brief: {}",
1850            res.brief_source
1851        );
1852    }
1853
1854    #[test]
1855    fn last_block_hole_flushes_at_eof() {
1856        let md = "trailing **bold** at the end of doc\n";
1857        let res = convert(md, "in.md");
1858        assert!(
1859            res.brief_source.contains("// TODO[B-hole:double-emphasis]"),
1860            "EOF flush dropped the trailing hole's comment\nbrief: {}",
1861            res.brief_source
1862        );
1863    }
1864}