1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
use memchr::memchr;
use ox_content_ast::{Node, Span};
use super::Parser;
use crate::error::{ParseError, ParseResult};
#[allow(unused_imports)]
use crate::profile_span;
impl<'a> Parser<'a> {
/// Finds the body end and cursor position after a closing fence.
///
/// This helper is used only by the zero-copy fenced-code path where the
/// opening fence has no indentation. In that case body lines do not need
/// stripping, so the parser can search line starts in the original source
/// and return a borrowed body slice. The tuple separates the end of code
/// content from the end of the closing fence line.
pub(super) fn find_fenced_close(
&self,
fence_char: char,
fence_len: usize,
body_start: usize,
) -> (usize, usize) {
let bytes = self.source.as_bytes();
let fence_byte = fence_char as u8;
let mut line_start = body_start;
while line_start < bytes.len() {
// Skip up to 3 leading spaces.
let mut cursor = line_start;
let max_indent_end = (line_start + 3).min(bytes.len());
while cursor < max_indent_end && bytes[cursor] == b' ' {
cursor += 1;
}
// Count the run of `fence_char`.
let fence_start = cursor;
while cursor < bytes.len() && bytes[cursor] == fence_byte {
cursor += 1;
}
let count = cursor - fence_start;
if count >= fence_len {
// Found the closing fence. Body ends at `line_start`;
// fence line ends at the next `\n` (inclusive) or EOF.
let after_fence = match memchr(b'\n', &bytes[cursor..]) {
Some(off) => cursor + off + 1,
None => bytes.len(),
};
return (line_start, after_fence);
}
// Not a closing fence — move to the next line.
line_start = match memchr(b'\n', &bytes[line_start..]) {
Some(off) => line_start + off + 1,
None => bytes.len(),
};
}
// No closing fence; consume everything as body.
(bytes.len(), bytes.len())
}
/// Parses a fenced code block.
pub(super) fn parse_fenced_code(&mut self, start: usize) -> ParseResult<Option<Node<'a>>> {
profile_span!("parser::parse_fenced_code");
let opening_indent = self.calc_indentation(start).min(3);
for _ in 0..opening_indent {
if self.peek() == Some(' ') {
self.advance();
}
}
let Some(fence_char) = self.peek() else {
return Err(ParseError::UnexpectedEof { span: Span::new(start as u32, start as u32) });
};
let mut fence_len = 0;
while self.peek() == Some(fence_char) {
fence_len += 1;
self.advance();
}
// Parse info string (language)
self.skip_whitespace();
let info_start = self.position;
while let Some(ch) = self.peek() {
if ch == '\n' {
break;
}
self.advance();
}
let info = self.source[info_start..self.position].trim();
let (lang, meta) = if info.is_empty() {
(None, None)
} else if let Some(space_idx) = info.find(' ') {
(Some(&info[..space_idx]), Some(&info[space_idx + 1..]))
} else {
(Some(info), None)
};
// Skip newline after info string
if self.peek() == Some('\n') {
self.advance();
}
// Fast path: when the opening fence has no indentation, the body
// lines need no indent stripping — we can find the closing fence
// by scanning the source and emit a zero-copy `&str` slice. This
// is the overwhelmingly common case (almost no code block in the
// wild is indented), and it removes both the per-line copy into a
// growing `String` *and* the trailing `alloc_str` (which used to
// double-allocate every code block's body).
let body_start = self.position;
let span;
let value: &'a str = if opening_indent == 0 {
let (body_end, fence_line_end) =
self.find_fenced_close(fence_char, fence_len, body_start);
self.position = fence_line_end;
span = Span::new(start as u32, self.position as u32);
&self.source[body_start..body_end]
} else {
// Indented opening fence: lines may need leading-space stripping,
// so a borrowed source slice would be wrong. Materialize only this
// uncommon case, and write directly into a bump-allocated string
// so the final AST can borrow it without a second arena copy.
let remaining_estimate = self.source.len().saturating_sub(self.position);
let mut value = ox_content_allocator::String::with_capacity_in(
remaining_estimate.min(8 * 1024),
self.allocator.bump(),
);
loop {
if self.is_at_end() {
break;
}
let line_start = self.position;
let line = self.line_at(line_start);
let line_indent = Self::indentation_columns(line);
if line_indent <= 3 {
self.position = line_start;
// `line_indent` was just computed from the same leading
// whitespace `calc_indentation(line_start)` would re-scan,
// and is already <= 3, so the `.min(3)` was a no-op.
let indent_to_skip = line_indent;
for _ in 0..indent_to_skip {
if self.peek() == Some(' ') {
self.advance();
}
}
// Check for closing fence
let mut closing_fence_len = 0;
while self.peek() == Some(fence_char) {
closing_fence_len += 1;
self.advance();
}
if closing_fence_len >= fence_len {
// Skip rest of line
while let Some(ch) = self.peek() {
if ch == '\n' {
self.advance();
break;
}
self.advance();
}
break;
}
}
// Not a closing fence, reset and consume line
self.position = line_start;
let next_line = self.next_line_start(line_start);
let stripped = Self::strip_indent_columns(line, opening_indent);
value.push_str(stripped);
if self.source.as_bytes().get(line_start + line.len()) == Some(&b'\n') {
value.push('\n');
}
self.position = next_line;
}
span = Span::new(start as u32, self.position as u32);
value.into_bump_str()
};
Ok(Some(Node::CodeBlock(ox_content_ast::CodeBlock { lang, meta, value, span })))
}
}