1use crate::code_fence::{FenceDelimiter, is_closing_fence, parse_opening_fence};
2use crate::frontmatter::frontmatter_line_count;
3use crate::plain_text::inline_markdown_to_text;
4
5#[derive(Clone, Debug, Eq, PartialEq)]
7pub struct MarkdownLink {
8 pub text: String,
10 pub target: String,
12 pub title: Option<String>,
14 pub line: usize,
16}
17
18#[derive(Clone, Copy, Debug, Eq, PartialEq)]
19pub(crate) enum InlineReferenceKind {
20 Link,
21 Image,
22}
23
24#[derive(Clone, Debug, Eq, PartialEq)]
25pub(crate) struct ParsedInlineReference {
26 pub label: String,
27 pub target: String,
28 pub title: Option<String>,
29 pub line: usize,
30}
31
32pub fn extract_links(markdown: &str) -> Vec<MarkdownLink> {
34 extract_inline_references(markdown, InlineReferenceKind::Link)
35 .into_iter()
36 .map(|reference| MarkdownLink {
37 text: reference.label,
38 target: reference.target,
39 title: reference.title,
40 line: reference.line,
41 })
42 .collect()
43}
44
45pub(crate) fn extract_inline_references(
46 markdown: &str,
47 kind: InlineReferenceKind,
48) -> Vec<ParsedInlineReference> {
49 let frontmatter_lines = frontmatter_line_count(markdown);
50 let mut references = Vec::new();
51 let mut active_fence: Option<FenceDelimiter> = None;
52
53 for (index, line) in markdown.lines().enumerate() {
54 if index < frontmatter_lines {
55 continue;
56 }
57
58 if let Some(delimiter) = active_fence {
59 if is_closing_fence(line, delimiter) {
60 active_fence = None;
61 }
62 continue;
63 }
64
65 if let Some(opening) = parse_opening_fence(line) {
66 active_fence = Some(opening.delimiter);
67 continue;
68 }
69
70 let mut cursor = 0usize;
71 let bytes = line.as_bytes();
72 while cursor < bytes.len() {
73 let matches_kind = match kind {
74 InlineReferenceKind::Link => {
75 bytes[cursor] == b'[' && (cursor == 0 || bytes[cursor - 1] != b'!')
76 },
77 InlineReferenceKind::Image => {
78 bytes[cursor] == b'!' && bytes.get(cursor + 1) == Some(&b'[')
79 },
80 };
81
82 if matches_kind
83 && let Some((reference, next_cursor)) =
84 parse_inline_reference_at(line, cursor, kind, index + 1)
85 {
86 references.push(reference);
87 cursor = next_cursor;
88 continue;
89 }
90
91 cursor += 1;
92 }
93 }
94
95 references
96}
97
98pub(crate) fn parse_inline_reference_at(
99 line: &str,
100 start: usize,
101 kind: InlineReferenceKind,
102 line_number: usize,
103) -> Option<(ParsedInlineReference, usize)> {
104 let (open_bracket, label_start) = match kind {
105 InlineReferenceKind::Link => (start, start + 1),
106 InlineReferenceKind::Image => {
107 if line.as_bytes().get(start) != Some(&b'!') {
108 return None;
109 }
110 (start + 1, start + 2)
111 },
112 };
113
114 let label_end = find_matching_bracket(line, open_bracket)?;
115 let mut cursor = label_end + 1;
116
117 while line
118 .as_bytes()
119 .get(cursor)
120 .is_some_and(u8::is_ascii_whitespace)
121 {
122 cursor += 1;
123 }
124
125 if line.as_bytes().get(cursor) != Some(&b'(') {
126 return None;
127 }
128
129 let target_end = find_matching_paren(line, cursor)?;
130 let label = inline_markdown_to_text(&line[label_start..label_end]);
131 let (target, title) = parse_target_and_title(&line[cursor + 1..target_end])?;
132
133 Some((
134 ParsedInlineReference {
135 label,
136 target,
137 title,
138 line: line_number,
139 },
140 target_end + 1,
141 ))
142}
143
144fn parse_target_and_title(input: &str) -> Option<(String, Option<String>)> {
145 let trimmed = input.trim();
146 if trimmed.is_empty() {
147 return None;
148 }
149
150 let (target_part, title_part) = if let Some(stripped) = trimmed.strip_prefix('<') {
151 let close = stripped.find('>')?;
152 (&stripped[..close], stripped[close + 1..].trim())
153 } else {
154 let split_index = find_target_split_index(trimmed);
155 match split_index {
156 Some(index) => (&trimmed[..index], trimmed[index..].trim()),
157 None => (trimmed, ""),
158 }
159 };
160
161 let target = target_part.trim();
162 if target.is_empty() {
163 return None;
164 }
165
166 let title = if title_part.is_empty() {
167 None
168 } else {
169 parse_title_literal(title_part)
170 };
171
172 Some((target.to_owned(), title))
173}
174
175fn find_target_split_index(input: &str) -> Option<usize> {
176 let mut depth = 0usize;
177
178 for (index, character) in input.char_indices() {
179 match character {
180 '(' => depth += 1,
181 ')' if depth > 0 => depth -= 1,
182 character if character.is_whitespace() && depth == 0 => return Some(index),
183 _ => {},
184 }
185 }
186
187 None
188}
189
190fn parse_title_literal(input: &str) -> Option<String> {
191 let trimmed = input.trim();
192 if trimmed.len() < 2 {
193 return None;
194 }
195
196 let first = trimmed.chars().next()?;
197 let last = trimmed.chars().last()?;
198 match (first, last) {
199 ('"', '"') | ('\'', '\'') | ('(', ')') => {
200 Some(trimmed[first.len_utf8()..trimmed.len() - last.len_utf8()].to_owned())
201 },
202 _ => None,
203 }
204}
205
206fn find_matching_bracket(line: &str, open_index: usize) -> Option<usize> {
207 let bytes = line.as_bytes();
208 let mut depth = 0usize;
209 let mut index = open_index;
210
211 while index < bytes.len() {
212 match bytes[index] {
213 b'\\' => index += 2,
214 b'[' => {
215 depth += 1;
216 index += 1;
217 },
218 b']' => {
219 depth = depth.saturating_sub(1);
220 index += 1;
221 if depth == 0 {
222 return Some(index - 1);
223 }
224 },
225 _ => index += 1,
226 }
227 }
228
229 None
230}
231
232fn find_matching_paren(line: &str, open_index: usize) -> Option<usize> {
233 let bytes = line.as_bytes();
234 let mut depth = 0usize;
235 let mut quote = None;
236 let mut index = open_index;
237
238 while index < bytes.len() {
239 let byte = bytes[index];
240 if byte == b'\\' {
241 index += 2;
242 continue;
243 }
244
245 if let Some(active_quote) = quote {
246 if byte == active_quote {
247 quote = None;
248 }
249 index += 1;
250 continue;
251 }
252
253 match byte {
254 b'"' | b'\'' => {
255 quote = Some(byte);
256 index += 1;
257 },
258 b'(' => {
259 depth += 1;
260 index += 1;
261 },
262 b')' => {
263 depth = depth.saturating_sub(1);
264 index += 1;
265 if depth == 0 {
266 return Some(index - 1);
267 }
268 },
269 _ => index += 1,
270 }
271 }
272
273 None
274}