marki_parse/
block.rs

1use crate::inline::pool_offset;
2use crate::section::{InlineSpan, OrderedListDelimiter, Section, SpanSlice};
3use crate::simd::find_byte;
4use crate::special_char::SpecialChar;
5use crate::{Inline, MarkdownFile};
6
7// ---------------------------------------------------------------------------
8// Pass 1: block-level parsing into RawSection (no inline parsing)
9// ---------------------------------------------------------------------------
10
11/// Intermediate section representation produced by pass 1 (block parsing).
12/// Stores raw `&str` text that will be inline-parsed in pass 2.
13enum RawSection<'src> {
14    Heading {
15        level: u8,
16        text: &'src str,
17    },
18    Paragraph {
19        text: &'src str,
20    },
21    CodeBlock {
22        language: Option<&'src str>,
23        code: &'src str,
24    },
25    UnorderedList {
26        items_start: u32,
27        items_len: u32,
28    },
29    OrderedList {
30        start: u32,
31        delimiter: OrderedListDelimiter,
32        items_start: u32,
33        items_len: u32,
34    },
35    Blockquote {
36        lines_start: u32,
37        lines_len: u32,
38    },
39    HorizontalRule,
40}
41
42/// Mutable parsing context for pass 1. Only collects raw sections — no inline
43/// pool or span pool needed.
44struct ParseCtx<'src> {
45    input: &'src str,
46    bytes: &'src [u8],
47    sections: Vec<RawSection<'src>>,
48    /// Shared pool for blockquote lines and list items, avoiding per-section
49    /// `Vec<&str>` heap allocations.
50    lines: Vec<&'src str>,
51}
52
53enum Accumulator<'src> {
54    Empty,
55    InBlockquote {
56        lines_start: u32,
57    },
58    InUnorderedList {
59        marker: SpecialChar,
60        items_start: u32,
61    },
62    InOrderedList {
63        start: u32,
64        delimiter: OrderedListDelimiter,
65        items_start: u32,
66    },
67    InParagraph {
68        content: &'src str,
69    },
70}
71
72impl<'src> Accumulator<'src> {
73    const fn flush(self, lines_pool_len: u32) -> Option<RawSection<'src>> {
74        match self {
75            Self::Empty => None,
76            Self::InBlockquote { lines_start } => Some(RawSection::Blockquote {
77                lines_start,
78                lines_len: lines_pool_len - lines_start,
79            }),
80            Self::InUnorderedList { items_start, .. } => Some(RawSection::UnorderedList {
81                items_start,
82                items_len: lines_pool_len - items_start,
83            }),
84            Self::InOrderedList {
85                start,
86                delimiter,
87                items_start,
88            } => Some(RawSection::OrderedList {
89                start,
90                delimiter,
91                items_start,
92                items_len: lines_pool_len - items_start,
93            }),
94            Self::InParagraph { content } => Some(RawSection::Paragraph { text: content }),
95        }
96    }
97
98    fn flush_into(self, ctx: &mut ParseCtx<'src>) {
99        let pool_len = lines_offset(ctx.lines.len());
100        if let Some(section) = self.flush(pool_len) {
101            ctx.sections.push(section);
102        }
103    }
104}
105
106/// Lines pool index as `u32`. Panics if the pool exceeds `u32::MAX` elements.
107#[allow(clippy::inline_always)]
108#[inline(always)]
109fn lines_offset(len: usize) -> u32 {
110    u32::try_from(len).expect("lines pool exceeds u32::MAX elements")
111}
112
113/// Check whether every byte in `bytes[start..end]` is ASCII whitespace.
114#[inline]
115fn is_blank_line(bytes: &[u8], start: usize, end: usize) -> bool {
116    // Most blank lines are truly empty (start == end).
117    if start >= end {
118        return true;
119    }
120    bytes[start..end].iter().all(u8::is_ascii_whitespace)
121}
122
123/// Return the number of leading spaces (0–3) if valid `CommonMark` indentation.
124/// Returns `None` if 4+ leading spaces (too much indentation for block elements).
125#[allow(clippy::inline_always)]
126#[inline(always)]
127fn strip_indent(bytes: &[u8]) -> Option<usize> {
128    let mut n = 0;
129    while n < bytes.len() && bytes[n] == SpecialChar::Space {
130        n += 1;
131        if n > 3 {
132            return None;
133        }
134    }
135    Some(n)
136}
137
138/// Count consecutive occurrences of `needle` at the start of `bytes`.
139#[inline]
140fn count_leading_byte(bytes: &[u8], needle: u8) -> usize {
141    let mut n = 0;
142    while n < bytes.len() && bytes[n] == needle {
143        n += 1;
144    }
145    n
146}
147
148/// Check if `bytes` starts with a valid code fence opening (3+ backticks or tildes).
149/// Returns `(fence_char, fence_len)` if valid.
150/// Expects leading indentation to already be stripped by the caller.
151fn code_fence_opening(bytes: &[u8]) -> Option<(u8, usize)> {
152    let &first = bytes.first()?;
153    if first != SpecialChar::Backtick && first != SpecialChar::Tilde {
154        return None;
155    }
156    let len = count_leading_byte(bytes, first);
157    if len < 3 {
158        return None;
159    }
160    // Backtick fences: info string must not contain backticks (CommonMark §4.5).
161    // Tilde fences have no such restriction.
162    if first == SpecialChar::Backtick && bytes[len..].contains(&first) {
163        return None;
164    }
165    Some((first, len))
166}
167
168/// Check if `bytes` is a valid closing fence for the given character and minimum length.
169/// Expects leading indentation to already be stripped by the caller.
170fn is_closing_fence(bytes: &[u8], fence_char: u8, min_len: usize) -> bool {
171    let len = count_leading_byte(bytes, fence_char);
172    // Must have at least as many fence chars as the opening.
173    len >= min_len
174    // Rest must be whitespace only (no info string on closing fences).
175        && bytes[len..].iter().all(u8::is_ascii_whitespace)
176}
177
178/// Extract the language tag from a code fence opening line.
179/// `bytes` is the line with leading indentation already stripped.
180/// `fence_len` is the number of fence characters.
181fn extract_language<'src>(input: &'src str, bytes: &[u8], fence_len: usize) -> Option<&'src str> {
182    debug_assert!(
183        bytes.as_ptr() as usize >= input.as_ptr() as usize
184            && bytes.as_ptr() as usize + bytes.len() <= input.as_ptr() as usize + input.len(),
185        "bytes must be a subslice of input"
186    );
187    let mut i = fence_len;
188    while bytes.get(i).is_some_and(u8::is_ascii_whitespace) {
189        i += 1;
190    }
191    let mut end = bytes.len();
192    while end > i && bytes[end - 1].is_ascii_whitespace() {
193        end -= 1;
194    }
195    if i >= end {
196        return None;
197    }
198    // Compute the absolute offset into input. `bytes` is a subslice of
199    // `input.as_bytes()`, so pointer arithmetic gives us the offset.
200    let line_offset = bytes.as_ptr() as usize - input.as_ptr() as usize;
201    input.get(line_offset + i..line_offset + end)
202}
203
204// ---------------------------------------------------------------------------
205// Pass 2: resolve inlines
206// ---------------------------------------------------------------------------
207
208/// Convert raw sections from pass 1 into final sections with inline parsing.
209/// Separating passes lets us pre-size the output pools from the raw section
210/// count and avoid interleaving block and inline allocation patterns.
211fn resolve_inlines<'src, const MAX_DEPTH: u8, const CAP: usize>(
212    raw: Vec<RawSection<'src>>,
213    lines: &[&'src str],
214    pool: &mut Vec<Inline<'src>>,
215    span_pool: &mut Vec<InlineSpan>,
216) -> Vec<Section<'src>> {
217    let mut sections = Vec::with_capacity(raw.len());
218    for raw_section in raw {
219        match raw_section {
220            RawSection::Heading { level, text } => {
221                sections.push(Section::Heading {
222                    level,
223                    content: Inline::parse_configured::<MAX_DEPTH, CAP>(text, pool),
224                });
225            }
226            RawSection::Paragraph { text } => {
227                sections.push(Section::Paragraph {
228                    content: Inline::parse_configured::<MAX_DEPTH, CAP>(text, pool),
229                });
230            }
231            RawSection::CodeBlock { language, code } => {
232                sections.push(Section::CodeBlock { language, code });
233            }
234            RawSection::UnorderedList {
235                items_start,
236                items_len,
237            } => {
238                let raw_items = lines
239                    .get(items_start as usize..(items_start + items_len) as usize)
240                    .unwrap_or(&[]);
241                let start = pool_offset(span_pool.len());
242                for item in raw_items {
243                    let span = Inline::parse_configured::<MAX_DEPTH, CAP>(item, pool);
244                    span_pool.push(span);
245                }
246                let len = pool_offset(span_pool.len()) - start;
247                sections.push(Section::UnorderedList {
248                    items: SpanSlice::new(start, len),
249                });
250            }
251            RawSection::OrderedList {
252                start,
253                delimiter,
254                items_start,
255                items_len,
256            } => {
257                let raw_items = lines
258                    .get(items_start as usize..(items_start + items_len) as usize)
259                    .unwrap_or(&[]);
260                let sp_start = pool_offset(span_pool.len());
261                for item in raw_items {
262                    let span = Inline::parse_configured::<MAX_DEPTH, CAP>(item, pool);
263                    span_pool.push(span);
264                }
265                let sp_len = pool_offset(span_pool.len()) - sp_start;
266                sections.push(Section::OrderedList {
267                    start,
268                    delimiter,
269                    items: SpanSlice::new(sp_start, sp_len),
270                });
271            }
272            RawSection::Blockquote {
273                lines_start,
274                lines_len,
275            } => {
276                let raw_lines = lines
277                    .get(lines_start as usize..(lines_start + lines_len) as usize)
278                    .unwrap_or(&[]);
279                let start = pool_offset(pool.len());
280                for (i, line) in raw_lines.iter().enumerate() {
281                    if i > 0 {
282                        pool.push(Inline::Text("\n"));
283                    }
284                    Inline::parse_flat_into_configured::<MAX_DEPTH, CAP>(line, pool);
285                }
286                let len = pool_offset(pool.len()) - start;
287                sections.push(Section::Blockquote {
288                    content: InlineSpan::new(start, len),
289                });
290            }
291            RawSection::HorizontalRule => {
292                sections.push(Section::HorizontalRule);
293            }
294        }
295    }
296    sections
297}
298
299// ---------------------------------------------------------------------------
300// BlockBytes trait — block-level helpers on byte slices.
301// ---------------------------------------------------------------------------
302
303/// Lookup table: true for bytes that could start a block-level element
304/// (heading, blockquote, list marker, HR character, or digit for ordered lists).
305const COULD_START_BLOCK: [bool; 256] = {
306    let mut table = [false; 256];
307    table[SpecialChar::Hash.byte() as usize] = true;
308    table[SpecialChar::GreaterThan.byte() as usize] = true;
309    table[SpecialChar::Dash.byte() as usize] = true;
310    table[SpecialChar::Asterisk.byte() as usize] = true;
311    table[SpecialChar::Plus.byte() as usize] = true;
312    table[SpecialChar::Underscore.byte() as usize] = true;
313    let mut d = SpecialChar::Zero.byte();
314    while d <= b'9' {
315        table[d as usize] = true;
316        d += 1;
317    }
318    table
319};
320
321trait BlockBytes {
322    fn is_horizontal_rule(&self) -> bool;
323    fn try_parse_heading<'src>(
324        &self,
325        input: &'src str,
326        line_offset: usize,
327    ) -> Option<(u8, &'src str)>;
328    fn try_parse_unordered_item(&self) -> Option<(SpecialChar, usize)>;
329    fn try_parse_ordered_item(&self) -> Option<(u32, OrderedListDelimiter, usize)>;
330    fn could_start_block(&self) -> bool;
331}
332
333impl BlockBytes for [u8] {
334    /// Check whether this line is a thematic break / horizontal rule
335    /// (`CommonMark` §4.1): three or more matching `-`, `*`, or `_` characters,
336    /// optionally separated by spaces, with nothing else on the line.
337    fn is_horizontal_rule(&self) -> bool {
338        let mut rule_byte = 0u8;
339        let mut count = 0u32;
340        for &b in self {
341            if b.is_ascii_whitespace() {
342                continue;
343            }
344            if rule_byte == 0 {
345                if b != SpecialChar::Dash
346                    && b != SpecialChar::Asterisk
347                    && b != SpecialChar::Underscore
348                {
349                    return false;
350                }
351                rule_byte = b;
352            }
353            if b != rule_byte {
354                return false;
355            }
356            count += 1;
357        }
358        count >= 3
359    }
360
361    /// Check whether this byte slice is an ATX heading (`CommonMark` §4.2).
362    /// Returns `(level, text)` without performing any inline parsing.
363    fn try_parse_heading<'src>(
364        &self,
365        input: &'src str,
366        line_offset: usize,
367    ) -> Option<(u8, &'src str)> {
368        let level = count_leading_byte(self, SpecialChar::Hash.byte());
369        if !(1..=6).contains(&level) || self.get(level) != SpecialChar::Space {
370            return None;
371        }
372        // Trim leading whitespace after '#'s.
373        let mut start = level;
374        while start < self.len() && self[start].is_ascii_whitespace() {
375            start += 1;
376        }
377        let mut end = self.len();
378        while end > start && self[end - 1].is_ascii_whitespace() {
379            end -= 1;
380        }
381        // Strip optional closing # sequence per CommonMark §4.2:
382        // trailing #s are removed only if preceded by whitespace (or they
383        // are the entire content after the opening).
384        let mut stripped_end = end;
385        while stripped_end > start && self.get(stripped_end - 1) == SpecialChar::Hash {
386            stripped_end -= 1;
387        }
388        if stripped_end == start
389            || self.get(stripped_end - 1) == SpecialChar::Space
390            || self.get(stripped_end - 1) == SpecialChar::Tab
391        {
392            // Trim whitespace before the closing hashes.
393            end = stripped_end;
394            while end > start && self[end - 1].is_ascii_whitespace() {
395                end -= 1;
396            }
397        }
398        let text = input.get(line_offset + start..line_offset + end)?;
399        let level = u8::try_from(level).expect("heading level already validated 1..=6");
400        Some((level, text))
401    }
402
403    /// Try to parse an unordered list item.
404    /// Returns `(marker, item_byte_offset)` where offset is relative to line start.
405    fn try_parse_unordered_item(&self) -> Option<(SpecialChar, usize)> {
406        let &first = self.first()?;
407        let marker = SpecialChar::from_byte(first)?;
408        if !marker.is_list_char() {
409            return None;
410        }
411        if self.get(1) == SpecialChar::Space {
412            Some((marker, 2))
413        } else {
414            None
415        }
416    }
417
418    /// Try to parse an ordered list item.
419    /// Returns `(number, delimiter, item_byte_offset)` where offset is relative
420    /// to line start.
421    fn try_parse_ordered_item(&self) -> Option<(u32, OrderedListDelimiter, usize)> {
422        let mut num: u32 = 0;
423        let mut digits = 0usize;
424        for &b in self {
425            if b.is_ascii_digit() {
426                digits += 1;
427                if digits > 9 {
428                    return None;
429                }
430                num = num * 10 + u32::from(b - SpecialChar::Zero.byte());
431            } else {
432                break;
433            }
434        }
435        if digits == 0 {
436            return None;
437        }
438        let delimiter = OrderedListDelimiter::from_byte(self.get(digits).copied()?)?;
439        if self.get(digits + 1) != SpecialChar::Space {
440            return None;
441        }
442        let item_offset = digits + 2;
443        Some((num, delimiter, item_offset))
444    }
445
446    /// Check whether the first byte of this line could start a block-level element.
447    #[inline]
448    fn could_start_block(&self) -> bool {
449        self.first().is_some_and(|&b| COULD_START_BLOCK[b as usize])
450    }
451}
452
453/// Scan forward from `start` to find a closing code fence of the same type
454/// (`fence_char`) and at least `fence_len` characters.
455/// Returns `(code_content, resume_position)`.
456fn scan_code_block_fast<'src>(
457    input: &'src str,
458    bytes: &[u8],
459    start: usize,
460    fence_len: usize,
461    fence_char: u8,
462) -> (&'src str, usize) {
463    let mut pos = start;
464    while pos < bytes.len() {
465        let line_end = find_byte(bytes, pos, SpecialChar::Newline.byte()).unwrap_or(bytes.len());
466
467        // Fast reject: a closing fence must start with the fence char or a
468        // space (for 0-3 indentation). Skip lines that start with anything else.
469        let first = bytes.get(pos).copied();
470        if (first == Some(fence_char) || first == Some(SpecialChar::Space.byte()))
471            && let Some(indent) = strip_indent(&bytes[pos..line_end])
472        {
473            let spos = pos + indent;
474            if is_closing_fence(&bytes[spos..line_end], fence_char, fence_len) {
475                // Content is everything between opening and closing fence.
476                let code = if start < pos {
477                    input.get(start..pos - 1).unwrap_or("")
478                } else {
479                    ""
480                };
481                return (code, line_end + 1);
482            }
483        }
484        pos = line_end + 1;
485    }
486    // Unclosed code block: content runs to end of input.
487    let code = input.get(start..).unwrap_or("");
488    (code, bytes.len())
489}
490
491/// Merge two subslices of `base` into one contiguous slice spanning from the
492/// start of `a` to the end of `b`.
493fn merge_slices<'src>(base: &'src str, a: &str, b: &str) -> Option<&'src str> {
494    let base_start = base.as_ptr() as usize;
495    let a_start = a.as_ptr() as usize;
496    let b_end = b.as_ptr() as usize + b.len();
497
498    if a_start < base_start || b_end > base_start + base.len() || b_end < a_start {
499        return None;
500    }
501
502    base.get(a_start - base_start..b_end - base_start)
503}
504
505// ---------------------------------------------------------------------------
506// MarkdownFile: public API
507// ---------------------------------------------------------------------------
508
509impl<'src, const MAX_INLINE_DEPTH: u8, const INLINE_STACK_CAP: usize>
510    MarkdownFile<'src, MAX_INLINE_DEPTH, INLINE_STACK_CAP>
511{
512    #[must_use]
513    pub fn parse(input: &'src str) -> Self {
514        let bytes = input.as_bytes();
515
516        // --- Pass 1: block-level parsing (no inline work) ---
517        let mut ctx = ParseCtx {
518            input,
519            bytes,
520            // Rough heuristic: ~50 bytes per section on average.
521            sections: Vec::with_capacity(input.len() / 50 + 1),
522            lines: Vec::with_capacity(input.len() / 80 + 1),
523        };
524        let mut acc = Accumulator::Empty;
525        let mut pos = 0;
526
527        while pos < bytes.len() {
528            let line_end =
529                find_byte(bytes, pos, SpecialChar::Newline.byte()).unwrap_or(bytes.len());
530
531            // Fast-path: when we detect a code fence opening, scan ahead for
532            // the closing fence in one shot instead of processing line-by-line.
533            // CommonMark §4.5: a code fence can be indented 0-3 spaces, so we
534            // check if a backtick or tilde appears within the first 4 bytes.
535            let first = bytes.get(pos).copied();
536            if (first == SpecialChar::Backtick
537                || first == SpecialChar::Tilde
538                || (first == SpecialChar::Space
539                    && bytes[pos..line_end].get(..4).is_some_and(|w| {
540                        w.contains(&SpecialChar::Backtick.byte())
541                            || w.contains(&SpecialChar::Tilde.byte())
542                    })))
543                && let Some(indent) = strip_indent(&bytes[pos..line_end])
544                && let Some((fence_char, fence_len)) =
545                    code_fence_opening(&bytes[pos + indent..line_end])
546            {
547                let spos = pos + indent;
548                let language = extract_language(input, &bytes[spos..line_end], fence_len);
549                acc.flush_into(&mut ctx);
550                let content_start = line_end + 1;
551                let (code, resume) =
552                    scan_code_block_fast(input, bytes, content_start, fence_len, fence_char);
553                ctx.sections.push(RawSection::CodeBlock { language, code });
554                pos = resume;
555                acc = Accumulator::Empty;
556                continue;
557            }
558
559            acc = ctx.fold_line(acc, pos, line_end);
560            pos = line_end + 1;
561        }
562
563        acc.flush_into(&mut ctx);
564
565        // --- Pass 2: inline parsing ---
566        let mut pool = Vec::with_capacity(input.len() / 20);
567        let mut span_pool = Vec::with_capacity(input.len() / 100 + 1);
568        let sections = resolve_inlines::<MAX_INLINE_DEPTH, INLINE_STACK_CAP>(
569            ctx.sections,
570            &ctx.lines,
571            &mut pool,
572            &mut span_pool,
573        );
574
575        Self {
576            sections,
577            pool,
578            span_pool,
579        }
580    }
581}
582
583// ---------------------------------------------------------------------------
584// ParseCtx methods — pass 1 line-level fold logic
585// ---------------------------------------------------------------------------
586
587impl<'src> ParseCtx<'src> {
588    /// Process one line given as byte range `[pos..line_end)`.
589    /// Operates on `&[u8]` throughout; converts to `&str` only when storing.
590    ///
591    /// Code fence opening is handled by the fast-path in `parse()` before this
592    /// method is called, so no code-block state is tracked here.
593    #[inline]
594    fn fold_line(
595        &mut self,
596        acc: Accumulator<'src>,
597        pos: usize,
598        line_end: usize,
599    ) -> Accumulator<'src> {
600        let first = self.bytes.get(pos).copied();
601
602        if first.is_some_and(|b| b.is_ascii_whitespace())
603            && is_blank_line(self.bytes, pos, line_end)
604        {
605            acc.flush_into(self);
606            return Accumulator::Empty;
607        }
608
609        self.fold_block_element(acc, pos, line_end)
610    }
611
612    /// Detect and fold all block-level constructs. Computes `CommonMark` 0-3
613    /// space indentation internally.
614    #[inline]
615    fn fold_block_element(
616        &mut self,
617        acc: Accumulator<'src>,
618        pos: usize,
619        line_end: usize,
620    ) -> Accumulator<'src> {
621        // Strip 0-3 spaces of optional indentation (CommonMark §4).
622        // Lines with 4+ leading spaces cannot start a block-level construct.
623        let Some(indent) = strip_indent(&self.bytes[pos..line_end]) else {
624            // 4+ leading spaces: only valid as paragraph text or
625            // blockquote lazy continuation.
626            if let Accumulator::InBlockquote { lines_start } = acc {
627                self.lines.push(self.input.get(pos..line_end).unwrap_or(""));
628                return Accumulator::InBlockquote { lines_start };
629            }
630            return self.fold_paragraph(acc, pos, line_end);
631        };
632        let spos = pos + indent;
633        let line_bytes = &self.bytes[spos..line_end];
634
635        // Fast-path: if we're in a paragraph and the line can't start a block
636        // element, skip all the block-level checks and extend the paragraph.
637        if let Accumulator::InParagraph { .. } = acc
638            && !line_bytes.is_empty()
639            && !line_bytes.could_start_block()
640        {
641            return self.fold_paragraph(acc, pos, line_end);
642        }
643
644        // ATX headings (CommonMark §4.2).
645        if line_bytes.first() == SpecialChar::Hash
646            && let Some((level, text)) = line_bytes.try_parse_heading(self.input, spos)
647        {
648            acc.flush_into(self);
649            self.sections.push(RawSection::Heading { level, text });
650            return Accumulator::Empty;
651        }
652
653        if line_bytes.first() == SpecialChar::GreaterThan {
654            let content_start = spos + 1;
655            let content = if self.bytes.get(content_start) == SpecialChar::Space {
656                self.input.get(content_start + 1..line_end).unwrap_or("")
657            } else {
658                self.input.get(content_start..line_end).unwrap_or("")
659            };
660            if let Accumulator::InBlockquote { lines_start } = acc {
661                self.lines.push(content);
662                return Accumulator::InBlockquote { lines_start };
663            }
664            acc.flush_into(self);
665            let lines_start = lines_offset(self.lines.len());
666            self.lines.push(content);
667            return Accumulator::InBlockquote { lines_start };
668        }
669
670        // Blockquote lazy continuation (CommonMark §5.1): a non-blank line
671        // that doesn't start a new block-level construct continues the
672        // current blockquote.
673        let acc = if let Accumulator::InBlockquote { lines_start } = acc {
674            // Fast reject: if first byte can't start a block element, continue.
675            let continues = if !line_bytes.is_empty() && !line_bytes.could_start_block() {
676                true
677            } else {
678                !line_bytes.is_horizontal_rule()
679                    && line_bytes.try_parse_heading(self.input, spos).is_none()
680                    && code_fence_opening(line_bytes).is_none()
681                    && line_bytes.try_parse_unordered_item().is_none()
682                    && line_bytes.try_parse_ordered_item().is_none()
683            };
684            if continues {
685                self.lines.push(self.input.get(pos..line_end).unwrap_or(""));
686                return Accumulator::InBlockquote { lines_start };
687            }
688            // Line starts a new block — flush the blockquote and fall through.
689            Accumulator::InBlockquote { lines_start }.flush_into(self);
690            Accumulator::Empty
691        } else {
692            acc
693        };
694
695        // Horizontal rules (CommonMark §4.1): three or more -, *, or _
696        // characters (optionally with spaces) on a line by themselves.
697        if line_bytes.is_horizontal_rule() {
698            acc.flush_into(self);
699            self.sections.push(RawSection::HorizontalRule);
700            return Accumulator::Empty;
701        }
702
703        if let Some((marker, item_offset)) = line_bytes.try_parse_unordered_item() {
704            let item = self.input.get(spos + item_offset..line_end).unwrap_or("");
705            return self.fold_unordered_list(acc, marker, item);
706        }
707
708        if let Some((num, delim, item_offset)) = line_bytes.try_parse_ordered_item() {
709            let item = self.input.get(spos + item_offset..line_end).unwrap_or("");
710            return self.fold_ordered_list(acc, num, delim, item);
711        }
712
713        self.fold_paragraph(acc, pos, line_end)
714    }
715
716    #[inline]
717    fn fold_unordered_list(
718        &mut self,
719        acc: Accumulator<'src>,
720        marker: SpecialChar,
721        item: &'src str,
722    ) -> Accumulator<'src> {
723        if let Accumulator::InUnorderedList {
724            marker: m,
725            items_start,
726        } = acc
727        {
728            if m == marker {
729                self.lines.push(item);
730                return Accumulator::InUnorderedList {
731                    marker,
732                    items_start,
733                };
734            }
735            Accumulator::InUnorderedList {
736                marker: m,
737                items_start,
738            }
739            .flush_into(self);
740        } else {
741            acc.flush_into(self);
742        }
743        let items_start = lines_offset(self.lines.len());
744        self.lines.push(item);
745        Accumulator::InUnorderedList {
746            marker,
747            items_start,
748        }
749    }
750
751    #[inline]
752    fn fold_ordered_list(
753        &mut self,
754        acc: Accumulator<'src>,
755        num: u32,
756        delim: OrderedListDelimiter,
757        item: &'src str,
758    ) -> Accumulator<'src> {
759        if let Accumulator::InOrderedList {
760            start,
761            delimiter,
762            items_start,
763        } = acc
764        {
765            if delimiter == delim {
766                self.lines.push(item);
767                return Accumulator::InOrderedList {
768                    start,
769                    delimiter,
770                    items_start,
771                };
772            }
773            Accumulator::InOrderedList {
774                start,
775                delimiter,
776                items_start,
777            }
778            .flush_into(self);
779        } else {
780            acc.flush_into(self);
781        }
782        let items_start = lines_offset(self.lines.len());
783        self.lines.push(item);
784        Accumulator::InOrderedList {
785            start: num,
786            delimiter: delim,
787            items_start,
788        }
789    }
790
791    #[inline]
792    fn fold_paragraph(
793        &mut self,
794        acc: Accumulator<'src>,
795        pos: usize,
796        line_end: usize,
797    ) -> Accumulator<'src> {
798        let line_str = self.input.get(pos..line_end).unwrap_or("");
799        if let Accumulator::InParagraph { content } = acc {
800            return merge_slices(self.input, content, line_str).map_or_else(
801                || {
802                    self.sections.push(RawSection::Paragraph { text: content });
803                    Accumulator::InParagraph { content: line_str }
804                },
805                |merged| Accumulator::InParagraph { content: merged },
806            );
807        }
808        acc.flush_into(self);
809        Accumulator::InParagraph { content: line_str }
810    }
811}
marki_parse/block.rs

marki_parse/
block.rs