#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Link {
pub text: String,
pub url: String,
pub start_pos: usize,
pub end_pos: usize,
}
#[must_use]
pub fn detect_links(content: &str) -> Vec<Link> {
let mut links = Vec::new();
let chars: Vec<char> = content.chars().collect();
let mut i = 0;
while i < chars.len() {
if chars[i] == '[' {
let start_pos = i;
if let Some(text_end) = find_closing_bracket(&chars, i + 1) {
let text: String = chars[(i + 1)..text_end].iter().collect();
if text_end + 1 < chars.len() && chars[text_end + 1] == '(' {
if let Some(url_end) = find_closing_parenthesis_balanced(&chars, text_end + 2) {
let url: String = chars[(text_end + 2)..url_end].iter().collect();
links.push(Link {
text,
url,
start_pos,
end_pos: url_end + 1,
});
i = url_end + 1;
continue;
}
}
}
}
i += 1;
}
links
}
fn find_closing_bracket(chars: &[char], start: usize) -> Option<usize> {
let mut i = start;
while i < chars.len() {
if chars[i] == ']' {
return Some(i);
}
if chars[i] == '\\' && i + 1 < chars.len() {
i += 2;
} else {
i += 1;
}
}
None
}
fn find_closing_parenthesis_balanced(chars: &[char], start: usize) -> Option<usize> {
let mut depth = 1; let mut i = start;
while i < chars.len() {
match chars[i] {
'(' => depth += 1,
')' => {
depth -= 1;
if depth == 0 {
return Some(i);
}
}
'\\' if i + 1 < chars.len() => {
i += 1;
}
_ => {}
}
i += 1;
}
None
}
#[must_use]
pub fn is_inside_link_url(_content: &str, pos: usize, links: &[Link]) -> bool {
for link in links {
let url_start = link.start_pos + link.text.len() + 2; let url_end = link.end_pos;
if pos >= url_start && pos < url_end {
return true;
}
}
false
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[allow(dead_code)] pub struct ReferenceLink {
pub label: String,
pub url: String,
pub title: Option<String>,
}
#[must_use]
#[allow(dead_code)] pub fn detect_links_outside_code_blocks(content: &str) -> Vec<Link> {
let all_links = detect_links(content);
let code_regions = get_code_block_regions(content);
all_links
.into_iter()
.filter(|link| !is_in_code_region(link.start_pos, &code_regions))
.collect()
}
pub type CodeRegion = (usize, usize);
#[must_use]
pub fn get_code_block_regions(content: &str) -> Vec<CodeRegion> {
let mut regions = Vec::new();
let lines: Vec<&str> = content.lines().collect();
let mut in_code_block = false;
let mut block_start_pos = 0;
let mut current_pos = 0;
for line in &lines {
let line_start = current_pos;
let line_end = current_pos + line.len();
let trimmed = line.trim();
if is_fence_line(trimmed) {
if in_code_block {
let block_end = line_end;
regions.push((block_start_pos, block_end));
in_code_block = false;
} else {
in_code_block = true;
block_start_pos = line_start;
}
}
current_pos = line_end + 1;
}
if in_code_block {
regions.push((block_start_pos, content.len()));
}
regions
}
fn is_fence_line(line: &str) -> bool {
if line.len() < 3 {
return false;
}
if line.starts_with("```") {
let backtick_count = line.chars().take_while(|&c| c == '`').count();
if backtick_count >= 3 {
let after_fences = &line[backtick_count..];
return !after_fences.contains('`');
}
}
if line.starts_with("~~~") {
let tilde_count = line.chars().take_while(|&c| c == '~').count();
if tilde_count >= 3 {
let after_fences = &line[tilde_count..];
return !after_fences.contains('~');
}
}
false
}
fn is_in_code_region(pos: usize, regions: &[CodeRegion]) -> bool {
regions
.iter()
.any(|(start, end)| pos >= *start && pos < *end)
}
#[must_use]
#[allow(dead_code)] pub fn detect_reference_links(content: &str) -> Vec<ReferenceLink> {
let mut refs = Vec::new();
for line in content.lines() {
if let Some(ref_link) = parse_reference_line(line) {
refs.push(ref_link);
}
}
refs
}
#[allow(dead_code)] fn parse_reference_line(line: &str) -> Option<ReferenceLink> {
let trimmed = line.trim();
let label_start = trimmed.find('[')?;
let label_end = find_unescaped_bracket(trimmed, label_start + 1)?;
if !trimmed[label_end..].starts_with("]:") {
return None;
}
let label = trimmed[label_start + 1..label_end].trim().to_string();
if label.is_empty() {
return None;
}
let after_colon = &trimmed[label_end + 2..].trim_start();
let (url, title) = parse_url_and_title(after_colon)?;
Some(ReferenceLink { label, url, title })
}
#[allow(dead_code)] fn find_unescaped_bracket(s: &str, start: usize) -> Option<usize> {
let chars: Vec<char> = s.chars().collect();
let mut byte_pos = 0;
let mut char_idx = 0;
for (idx, ch) in chars.iter().enumerate() {
if idx == start {
byte_pos = char_idx;
break;
}
char_idx += ch.len_utf8();
}
let rest = &s[byte_pos..];
for (idx, ch) in rest.chars().enumerate() {
if ch == ']' {
if idx > 0 {
let prev_char = rest.chars().nth(idx - 1)?;
if prev_char == '\\' {
continue;
}
}
return Some(byte_pos + idx);
}
}
None
}
#[allow(dead_code)] fn parse_url_and_title(s: &str) -> Option<(String, Option<String>)> {
let trimmed = s.trim();
if trimmed.is_empty() {
return None;
}
if let Some((url, title)) = parse_quoted_title(trimmed, '"') {
return Some((url, Some(title)));
}
if let Some((url, title)) = parse_quoted_title(trimmed, '\'') {
return Some((url, Some(title)));
}
if let Some((url, title)) = parse_parenthesized_title(trimmed) {
return Some((url, Some(title)));
}
Some((trimmed.to_string(), None))
}
#[allow(dead_code)] fn parse_quoted_title(s: &str, quote_char: char) -> Option<(String, String)> {
if let Some(quote_pos) = s.rfind(quote_char) {
let after_quote = &s[quote_pos + 1..];
if after_quote.trim().is_empty() || after_quote.trim().len() < 2 {
let before_quote = &s[..quote_pos].trim_end();
if !before_quote.is_empty() {
if let Some(title_start) = s[..quote_pos].rfind(quote_char) {
if title_start < quote_pos {
let url = s[..title_start].trim_end().to_string();
let title = s[title_start + 1..quote_pos].to_string();
if !title.is_empty() {
return Some((url, title));
}
}
}
}
}
}
None
}
#[allow(dead_code)] fn parse_parenthesized_title(s: &str) -> Option<(String, String)> {
if let Some(lparen_pos) = s.rfind(" (") {
let url_part = &s[..lparen_pos];
let title_part = &s[lparen_pos + 1..];
if let Some(rparen_pos) = title_part.rfind(')') {
let after_rparen = &title_part[rparen_pos + 1..];
if after_rparen.trim().is_empty() && rparen_pos > 0 {
let url = url_part.trim_end().to_string();
let title = title_part[1..rparen_pos].to_string();
if !title.is_empty() && !title.contains('\n') {
return Some((url, title));
}
}
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detect_link_with_parens() {
let content = "[Wiki](https://en.wikipedia.org/wiki/Pointer_(computer_programming))";
let links = detect_links(content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].text, "Wiki");
assert_eq!(
links[0].url,
"https://en.wikipedia.org/wiki/Pointer_(computer_programming)"
);
}
#[test]
fn detect_simple_link() {
let content = "[Example](https://example.com)";
let links = detect_links(content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].text, "Example");
assert_eq!(links[0].url, "https://example.com");
}
#[test]
fn is_inside_link_url_detects_position() {
let content = "[Link](https://example.com/path) text";
let links = detect_links(content);
assert_eq!(links.len(), 1);
let url_pos = content.find("example").unwrap();
assert!(is_inside_link_url(content, url_pos, &links));
let outside_pos = content.find("text").unwrap();
assert!(!is_inside_link_url(content, outside_pos, &links));
}
#[test]
fn detect_reference_link_with_parens() {
let content = "[ref]: https://example.com/path(1)";
let refs = detect_reference_links(content);
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].label, "ref");
assert_eq!(refs[0].url, "https://example.com/path(1)");
assert_eq!(refs[0].title, None);
}
#[test]
fn detect_reference_link_with_title() {
let content = r#"[ref]: https://example.com "title here""#;
let refs = detect_reference_links(content);
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].label, "ref");
assert_eq!(refs[0].url, "https://example.com");
assert_eq!(refs[0].title, Some("title here".to_string()));
}
#[test]
fn detect_reference_link_with_parens_title() {
let content = "[ref]: https://example.com (title here)";
let refs = detect_reference_links(content);
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].label, "ref");
assert_eq!(refs[0].url, "https://example.com");
assert_eq!(refs[0].title, Some("title here".to_string()));
}
#[test]
fn detect_links_outside_code_blocks_only() {
let content =
"```\n[text](https://inside.code.block)\n```\n\n[Outside](https://outside.link)";
let links = detect_links_outside_code_blocks(content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].text, "Outside");
assert_eq!(links[0].url, "https://outside.link");
}
#[test]
fn detect_links_skips_backtick_fence() {
let content = "Some text\n\n```python\n# [Link](https://example.com)\n```\n\nMore text";
let links = detect_links_outside_code_blocks(content);
assert!(
links.is_empty(),
"Links inside code blocks should be skipped"
);
}
#[test]
fn detect_links_skips_tilde_fence() {
let content = "Some text\n\n~~~\n[Link](https://example.com)\n~~~\n\nMore text";
let links = detect_links_outside_code_blocks(content);
assert!(
links.is_empty(),
"Links inside tilde code blocks should be skipped"
);
}
#[test]
fn detect_multiple_links_on_one_line() {
let content = "[First](https://a.com) and [Second](https://b.com/path(1))";
let links = detect_links(content);
assert_eq!(links.len(), 2);
assert_eq!(links[0].text, "First");
assert_eq!(links[0].url, "https://a.com");
assert_eq!(links[1].text, "Second");
assert_eq!(links[1].url, "https://b.com/path(1)");
}
#[test]
fn detect_link_with_deeply_nested_parens() {
let content = "[Link](https://example.com/a(b(c(d))))";
let links = detect_links(content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].url, "https://example.com/a(b(c(d)))");
}
#[test]
fn detect_link_with_special_chars_in_url() {
let content = "[API](https://api.example.com/v1?foo=bar&baz=qux)";
let links = detect_links(content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].url, "https://api.example.com/v1?foo=bar&baz=qux");
}
#[test]
fn detect_link_with_parens_in_text() {
let content = "[click (here)](https://example.com)";
let links = detect_links(content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].text, "click (here)");
assert_eq!(links[0].url, "https://example.com");
}
#[test]
fn link_detection_is_idempotent() {
let content = "[Link](https://example.com/path(1)) and [Another](https://test.org)";
let first_pass = detect_links(content);
let processed: String = first_pass
.iter()
.map(|l| format!("[{}]({})", l.text, l.url))
.collect::<Vec<_>>()
.join(" and ");
let second_pass = detect_links(&processed);
assert_eq!(first_pass.len(), second_pass.len());
for (first, second) in first_pass.iter().zip(second_pass.iter()) {
assert_eq!(first.text, second.text);
assert_eq!(first.url, second.url);
}
}
#[test]
fn link_detection_preserves_content_unchanged() {
let content = "Just plain text without any links";
let links = detect_links(content);
assert!(links.is_empty());
}
}