1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
use memchr::memchr;
use ox_content_ast::{Node, Paragraph, Span};
use super::Parser;
use crate::error::{ParseError, ParseResult};
#[allow(unused_imports)]
use crate::profile_span;
impl<'a> Parser<'a> {
pub(super) fn parse_block(&mut self) -> ParseResult<Option<Node<'a>>> {
profile_span!("parser::parse_block");
self.skip_blank_lines();
if self.is_at_end() {
return Ok(None);
}
// Check nesting depth
if self.nesting_depth > self.options.max_nesting_depth {
return Err(ParseError::NestingTooDeep {
span: Span::new(self.position as u32, self.position as u32),
max_depth: self.options.max_nesting_depth,
});
}
let start = self.position;
let bytes = self.source.as_bytes();
let Some(trimmed_start) = self.first_non_whitespace_in_line(start) else {
return Ok(None);
};
// Fast block dispatch.
//
// Most documentation lines are plain paragraph text. The old shape
// built `line` and `trimmed` up front, then tried each block parser in
// sequence; that meant every paragraph paid for newline search,
// trimming, and several failed recognizers. Here the first
// non-whitespace byte is used as a cheap discriminator. Only marker
// families that can actually begin with that byte materialize the
// full line slice and run their more expensive syntax checks.
//
// Keep this table in sync with `line_starts_block`: paragraph parsing
// uses that helper to decide when a following line terminates the
// paragraph, so the two dispatchers must agree on block starts.
match bytes[trimmed_start] {
b'#' if self.try_parse_heading_start(start, trimmed_start) => {
return self.parse_heading(start);
}
b'-' | b'*' => {
let line = self.line_at(start);
let trimmed = &line[trimmed_start - start..];
if Self::try_parse_thematic_break_line(line) {
return self.parse_thematic_break(start);
}
if Self::try_parse_list_line(trimmed) {
return self.parse_list(start);
}
}
b'_' if Self::try_parse_thematic_break_line(self.line_at(start)) => {
return self.parse_thematic_break(start);
}
b'>' => return self.parse_block_quote(start),
b'`' | b'~' => {
let line = self.line_at(start);
let trimmed = &line[trimmed_start - start..];
if Self::try_parse_fenced_code_at(line, trimmed) {
return self.parse_fenced_code(start);
}
}
b'<' => {
let line = self.line_at(start);
let trimmed = &line[trimmed_start - start..];
if let Some(html_start) = Self::parse_html_block_start(trimmed) {
return self.parse_html_block(start, html_start);
}
}
b'+' | b'0'..=b'9' => {
let line = self.line_at(start);
let trimmed = &line[trimmed_start - start..];
if Self::try_parse_list_line(trimmed) {
return self.parse_list(start);
}
}
_ => {}
}
// Table recognition is the one feature that cannot be decided from
// the first byte because table headers usually look like ordinary
// paragraph text. Guard the expensive two-line delimiter check with a
// same-line `|` probe so non-table prose does one memchr2 scan and
// then falls through to paragraph parsing.
if self.options.tables && self.line_contains_byte(start, b'|') && self.try_parse_table() {
return self.parse_table(start);
}
// Default: parse as paragraph
self.parse_paragraph(start)
}
pub(super) fn parse_paragraph(&mut self, start: usize) -> ParseResult<Option<Node<'a>>> {
profile_span!("parser::parse_paragraph");
let bytes = self.source.as_bytes();
// `parse_block` is the sole caller and only reaches here after
// `skip_blank_lines` + its block dispatch — the very checks
// `line_starts_block` re-runs — have already classified the current
// line as a non-blank, non-block paragraph line. So consume the first
// line unconditionally instead of re-deriving that verdict with
// another `current_line` memchr + `trim_start` + dispatch (+ table
// `memchr`). This also removes the infinite-loop hazard the two
// dispatchers guard against: by always advancing past line one we can
// never return `Ok(None)` without progress on a non-blank line.
let mut content_end = if let Some(off) = memchr(b'\n', &bytes[start..]) {
start + off + 1
} else {
self.source.len()
};
self.position = content_end;
loop {
if self.is_at_end() {
break;
}
// Check for blank line (paragraph end): scan whitespace and
// peek the next byte. Cheaper than the prior
// `skip_whitespace` + `peek` + reset dance.
let line_start = self.position;
let mut cursor = line_start;
while cursor < bytes.len() && matches!(bytes[cursor], b' ' | b'\t') {
cursor += 1;
}
if cursor >= bytes.len() || bytes[cursor] == b'\n' {
break;
}
// Check for block-level element that would end paragraph.
if self.line_starts_block() {
break;
}
// Consume one line via memchr.
content_end = if let Some(off) = memchr(b'\n', &bytes[line_start..]) {
line_start + off + 1
} else {
self.source.len()
};
self.position = content_end;
}
let content = self.source[start..content_end].trim();
if content.is_empty() {
return Ok(None);
}
let span = Span::new(start as u32, content_end as u32);
// Parse inline content
let children = self.parse_inline(content, start)?;
Ok(Some(Node::Paragraph(Paragraph { children, span })))
}
}