Skip to main content

use_markdown/
link.rs

1use crate::code_fence::{FenceDelimiter, is_closing_fence, parse_opening_fence};
2use crate::frontmatter::frontmatter_line_count;
3use crate::plain_text::inline_markdown_to_text;
4
5/// A Markdown inline link.
6#[derive(Clone, Debug, Eq, PartialEq)]
7pub struct MarkdownLink {
8    /// The cleaned label text.
9    pub text: String,
10    /// The destination target.
11    pub target: String,
12    /// The optional inline title.
13    pub title: Option<String>,
14    /// The 1-based line where the link was found.
15    pub line: usize,
16}
17
18#[derive(Clone, Copy, Debug, Eq, PartialEq)]
19pub(crate) enum InlineReferenceKind {
20    Link,
21    Image,
22}
23
24#[derive(Clone, Debug, Eq, PartialEq)]
25pub(crate) struct ParsedInlineReference {
26    pub label: String,
27    pub target: String,
28    pub title: Option<String>,
29    pub line: usize,
30}
31
32/// Extracts inline links while ignoring fenced code blocks.
33pub fn extract_links(markdown: &str) -> Vec<MarkdownLink> {
34    extract_inline_references(markdown, InlineReferenceKind::Link)
35        .into_iter()
36        .map(|reference| MarkdownLink {
37            text: reference.label,
38            target: reference.target,
39            title: reference.title,
40            line: reference.line,
41        })
42        .collect()
43}
44
45pub(crate) fn extract_inline_references(
46    markdown: &str,
47    kind: InlineReferenceKind,
48) -> Vec<ParsedInlineReference> {
49    let frontmatter_lines = frontmatter_line_count(markdown);
50    let mut references = Vec::new();
51    let mut active_fence: Option<FenceDelimiter> = None;
52
53    for (index, line) in markdown.lines().enumerate() {
54        if index < frontmatter_lines {
55            continue;
56        }
57
58        if let Some(delimiter) = active_fence {
59            if is_closing_fence(line, delimiter) {
60                active_fence = None;
61            }
62            continue;
63        }
64
65        if let Some(opening) = parse_opening_fence(line) {
66            active_fence = Some(opening.delimiter);
67            continue;
68        }
69
70        let mut cursor = 0usize;
71        let bytes = line.as_bytes();
72        while cursor < bytes.len() {
73            let matches_kind = match kind {
74                InlineReferenceKind::Link => {
75                    bytes[cursor] == b'[' && (cursor == 0 || bytes[cursor - 1] != b'!')
76                },
77                InlineReferenceKind::Image => {
78                    bytes[cursor] == b'!' && bytes.get(cursor + 1) == Some(&b'[')
79                },
80            };
81
82            if matches_kind
83                && let Some((reference, next_cursor)) =
84                    parse_inline_reference_at(line, cursor, kind, index + 1)
85            {
86                references.push(reference);
87                cursor = next_cursor;
88                continue;
89            }
90
91            cursor += 1;
92        }
93    }
94
95    references
96}
97
98pub(crate) fn parse_inline_reference_at(
99    line: &str,
100    start: usize,
101    kind: InlineReferenceKind,
102    line_number: usize,
103) -> Option<(ParsedInlineReference, usize)> {
104    let (open_bracket, label_start) = match kind {
105        InlineReferenceKind::Link => (start, start + 1),
106        InlineReferenceKind::Image => {
107            if line.as_bytes().get(start) != Some(&b'!') {
108                return None;
109            }
110            (start + 1, start + 2)
111        },
112    };
113
114    let label_end = find_matching_bracket(line, open_bracket)?;
115    let mut cursor = label_end + 1;
116
117    while line
118        .as_bytes()
119        .get(cursor)
120        .is_some_and(u8::is_ascii_whitespace)
121    {
122        cursor += 1;
123    }
124
125    if line.as_bytes().get(cursor) != Some(&b'(') {
126        return None;
127    }
128
129    let target_end = find_matching_paren(line, cursor)?;
130    let label = inline_markdown_to_text(&line[label_start..label_end]);
131    let (target, title) = parse_target_and_title(&line[cursor + 1..target_end])?;
132
133    Some((
134        ParsedInlineReference {
135            label,
136            target,
137            title,
138            line: line_number,
139        },
140        target_end + 1,
141    ))
142}
143
144fn parse_target_and_title(input: &str) -> Option<(String, Option<String>)> {
145    let trimmed = input.trim();
146    if trimmed.is_empty() {
147        return None;
148    }
149
150    let (target_part, title_part) = if let Some(stripped) = trimmed.strip_prefix('<') {
151        let close = stripped.find('>')?;
152        (&stripped[..close], stripped[close + 1..].trim())
153    } else {
154        let split_index = find_target_split_index(trimmed);
155        match split_index {
156            Some(index) => (&trimmed[..index], trimmed[index..].trim()),
157            None => (trimmed, ""),
158        }
159    };
160
161    let target = target_part.trim();
162    if target.is_empty() {
163        return None;
164    }
165
166    let title = if title_part.is_empty() {
167        None
168    } else {
169        parse_title_literal(title_part)
170    };
171
172    Some((target.to_owned(), title))
173}
174
175fn find_target_split_index(input: &str) -> Option<usize> {
176    let mut depth = 0usize;
177
178    for (index, character) in input.char_indices() {
179        match character {
180            '(' => depth += 1,
181            ')' if depth > 0 => depth -= 1,
182            character if character.is_whitespace() && depth == 0 => return Some(index),
183            _ => {},
184        }
185    }
186
187    None
188}
189
190fn parse_title_literal(input: &str) -> Option<String> {
191    let trimmed = input.trim();
192    if trimmed.len() < 2 {
193        return None;
194    }
195
196    let first = trimmed.chars().next()?;
197    let last = trimmed.chars().last()?;
198    match (first, last) {
199        ('"', '"') | ('\'', '\'') | ('(', ')') => {
200            Some(trimmed[first.len_utf8()..trimmed.len() - last.len_utf8()].to_owned())
201        },
202        _ => None,
203    }
204}
205
206fn find_matching_bracket(line: &str, open_index: usize) -> Option<usize> {
207    let bytes = line.as_bytes();
208    let mut depth = 0usize;
209    let mut index = open_index;
210
211    while index < bytes.len() {
212        match bytes[index] {
213            b'\\' => index += 2,
214            b'[' => {
215                depth += 1;
216                index += 1;
217            },
218            b']' => {
219                depth = depth.saturating_sub(1);
220                index += 1;
221                if depth == 0 {
222                    return Some(index - 1);
223                }
224            },
225            _ => index += 1,
226        }
227    }
228
229    None
230}
231
232fn find_matching_paren(line: &str, open_index: usize) -> Option<usize> {
233    let bytes = line.as_bytes();
234    let mut depth = 0usize;
235    let mut quote = None;
236    let mut index = open_index;
237
238    while index < bytes.len() {
239        let byte = bytes[index];
240        if byte == b'\\' {
241            index += 2;
242            continue;
243        }
244
245        if let Some(active_quote) = quote {
246            if byte == active_quote {
247                quote = None;
248            }
249            index += 1;
250            continue;
251        }
252
253        match byte {
254            b'"' | b'\'' => {
255                quote = Some(byte);
256                index += 1;
257            },
258            b'(' => {
259                depth += 1;
260                index += 1;
261            },
262            b')' => {
263                depth = depth.saturating_sub(1);
264                index += 1;
265                if depth == 0 {
266                    return Some(index - 1);
267                }
268            },
269            _ => index += 1,
270        }
271    }
272
273    None
274}