1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
//! GFM-style footnote definition parser (block-level extension).
//!
//! Syntax:
//! - `[^label]: definition text`
//! - Continuation lines may be indented by 4 spaces or a tab.
use super::shared::GrammarSpan;
use crate::parser::ast::{Node, NodeKind};
use nom::Input;
/// Try to parse a footnote definition at the start of `input`.
///
/// Returns `Some((rest, node))` on success, or `None` if the input does not
/// start with a footnote definition.
pub fn parse_footnote_definition(input: GrammarSpan) -> Option<(GrammarSpan, Node)> {
let frag = input.fragment();
// Only check the first line quickly.
let first_line_end = frag.find('\n').unwrap_or(frag.len());
let first_line = &frag[..first_line_end];
// Allow up to 3 leading spaces.
let mut i = 0usize;
while i < first_line.len() && i < 3 && first_line.as_bytes().get(i) == Some(&b' ') {
i += 1;
}
if i < first_line.len() {
// If there are 4+ leading spaces, it's an indented code block, not a footnote.
if first_line.as_bytes().get(3) == Some(&b' ') {
return None;
}
}
let after_ws = &first_line[i..];
if !after_ws.starts_with("[^") {
return None;
}
// Find the closing `]:` on the first line.
let marker_pos = after_ws.find(":").and_then(|colon| {
// Ensure we have `]:` and that `]` exists before `:`.
if colon == 0 {
return None;
}
if after_ws.as_bytes().get(colon.wrapping_sub(1)) != Some(&b']') {
return None;
}
Some(colon)
})?;
// after_ws looks like: [^label]:...
// marker_pos points to ':'; label end is at marker_pos-1.
if marker_pos < 3 {
return None;
}
if !after_ws.starts_with("[^") {
return None;
}
let label = &after_ws[2..marker_pos - 1];
if label.is_empty() {
return None;
}
// Ensure the exact marker is `]:`.
if after_ws.as_bytes().get(marker_pos - 1) != Some(&b']')
|| after_ws.as_bytes().get(marker_pos) != Some(&b':')
{
return None;
}
// Capture content from after the ':' (and optional single space).
let mut content = String::new();
let mut after_colon = &after_ws[marker_pos + 1..];
if after_colon.starts_with(' ') {
after_colon = &after_colon[1..];
}
content.push_str(after_colon);
// Consume continuation lines.
let mut consumed_len = first_line_end;
if first_line_end < frag.len() {
// include newline
consumed_len += 1;
}
let mut cursor = consumed_len;
while cursor < frag.len() {
let next_line_end = frag[cursor..]
.find('\n')
.map(|r| cursor + r)
.unwrap_or(frag.len());
let next_line = &frag[cursor..next_line_end];
// On a blank line: look ahead to see whether the next non-blank line
// is 4-space (or tab) indented. If so, consume the blank line and
// the following indented lines as a paragraph continuation.
// Otherwise stop — the definition is complete.
if next_line.trim().is_empty() {
let mut lookahead = next_line_end;
if lookahead < frag.len() {
lookahead += 1; // skip the '\n' of the blank line
}
// Skip any additional blank lines.
while lookahead < frag.len() {
let ll_end = frag[lookahead..]
.find('\n')
.map(|r| lookahead + r)
.unwrap_or(frag.len());
let ll = &frag[lookahead..ll_end];
if ll.trim().is_empty() {
lookahead = ll_end;
if lookahead < frag.len() {
lookahead += 1;
}
} else {
break;
}
}
// Check whether the first real line after the blank(s) is indented.
let has_continuation = lookahead < frag.len()
&& (frag[lookahead..].starts_with(" ") || frag[lookahead..].starts_with('\t'));
if !has_continuation {
break;
}
// Accept blank separator: emit a blank line in content so the
// downstream inline parser can split it into a new paragraph.
content.push_str("\n\n");
cursor = lookahead;
continue;
}
let (is_cont, line_content) = if let Some(stripped) = next_line.strip_prefix(" ") {
(true, stripped)
} else if let Some(stripped) = next_line.strip_prefix('\t') {
(true, stripped)
} else {
(false, "")
};
if !is_cont {
break;
}
content.push('\n');
content.push_str(line_content);
cursor = next_line_end;
if cursor < frag.len() {
cursor += 1; // newline
}
consumed_len = cursor;
}
// Advance consumed_len to cover all content collected (cursor may have
// moved past blank lines but consumed_len only tracks confirmed content).
// For the blank-line lookahead path, consumed_len is not updated inside
// that branch intentionally — we re-enter the loop and update it on the
// next indented line.
let (rest, _taken) = input.take_split(consumed_len);
// Use the exclusive (non-inclusive) version so the span ends at the first
// byte of `rest`, not at the end of the entire remaining document.
// `blocks::shared::to_parser_span_range` is aliased to the *inclusive*
// variant; `crate::parser::shared::to_parser_span_range` is exclusive.
let span = crate::parser::shared::opt_span_range(input, rest);
// Parse the definition content as paragraph-like blocks.
// NOTE: We keep this conservative for now: a single paragraph with inline parsing.
let content_children = match crate::parser::inlines::parse_inlines(&content) {
Ok(nodes) => nodes,
Err(_) => vec![Node {
kind: NodeKind::Text(content),
span: None,
children: Vec::new(),
}],
};
let paragraph = Node {
kind: NodeKind::Paragraph,
span: None,
children: content_children,
};
let node = Node {
kind: NodeKind::FootnoteDefinition {
label: label.to_string(),
},
span,
children: vec![paragraph],
};
Some((rest, node))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn smoke_test_parse_footnote_definition_single_line() {
let input = GrammarSpan::new("[^a]: Hello\nNext\n");
let (rest, node) = parse_footnote_definition(input).expect("should parse");
assert!(rest.fragment().starts_with("Next"));
match node.kind {
NodeKind::FootnoteDefinition { label } => assert_eq!(label, "a"),
other => panic!("expected FootnoteDefinition, got {other:?}"),
}
assert_eq!(node.children.len(), 1);
assert!(matches!(node.children[0].kind, NodeKind::Paragraph));
}
#[test]
fn smoke_test_parse_footnote_definition_with_continuation_lines() {
let input = GrammarSpan::new("[^multi]: First\n second\n third\nNext\n");
let (rest, node) = parse_footnote_definition(input).expect("should parse");
assert!(rest.fragment().starts_with("Next"));
match node.kind {
NodeKind::FootnoteDefinition { label } => assert_eq!(label, "multi"),
other => panic!("expected FootnoteDefinition, got {other:?}"),
}
}
}