use crate::support::md::InBlockState;
use crate::types::{MdRef, MdRefKind};
use lazy_regex::regex;
pub struct MdRefIter<'a> {
block_state: InBlockState,
lines: std::iter::Peekable<std::str::Lines<'a>>,
current_line: Option<&'a str>,
line_pos: usize,
line_start: usize,
}
impl<'a> MdRefIter<'a> {
pub fn new(content: &'a str) -> Self {
let mut lines = content.lines().peekable();
let current_line = lines.next();
MdRefIter {
block_state: InBlockState::Out,
lines,
current_line,
line_pos: 0,
line_start: 0,
}
}
fn advance_line(&mut self) {
if let Some(current) = self.current_line {
self.line_start += current.len() + 1; self.current_line = self.lines.next();
self.line_pos = 0;
}
}
fn next_ref(&mut self) -> Option<MdRef> {
let re = regex!(r"(!?\[)([^\]]*)\]\(([^)]+)\)");
while let Some(line) = self.current_line {
let new_state = self.block_state.compute_new(line);
if !new_state.is_out() {
self.block_state = new_state;
self.advance_line();
continue;
}
if !self.block_state.is_out() && new_state.is_out() {
self.block_state = new_state;
self.advance_line();
continue;
}
self.block_state = new_state;
let search_start = self.line_pos;
let line_remainder = &line[search_start..];
if let Some(cap) = re.captures(line_remainder) {
let match_start = cap.get(0)?.start();
let match_end = cap.get(0)?.end();
let prefix = &line[..search_start + match_start];
let backtick_count = prefix.chars().filter(|&c| c == '`').count();
if backtick_count % 2 == 1 {
self.line_pos = search_start + match_end;
continue;
}
let bracket = cap.get(1)?.as_str();
let text = cap.get(2)?.as_str();
let target = cap.get(3)?.as_str();
let inline = bracket == "![";
let text = if text.is_empty() { None } else { Some(text.to_string()) };
let kind = MdRefKind::from_target(target);
self.line_pos = search_start + match_end;
return Some(MdRef {
target: target.to_string(),
text,
inline,
kind,
});
}
self.advance_line();
}
None
}
}
impl Iterator for MdRefIter<'_> {
type Item = MdRef;
fn next(&mut self) -> Option<Self::Item> {
self.next_ref()
}
}
#[cfg(test)]
mod tests {
type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>;
use super::*;
#[test]
fn test_md_ref_iter_simple_link() -> Result<()> {
let fx_content = "[click here](https://example.com)";
let refs: Vec<MdRef> = MdRefIter::new(fx_content).collect();
assert_eq!(refs.len(), 1);
let md_ref = &refs[0];
assert_eq!(md_ref.target, "https://example.com");
assert_eq!(md_ref.text.as_deref(), Some("click here"));
assert!(!md_ref.inline);
assert_eq!(md_ref.kind, MdRefKind::Url);
Ok(())
}
#[test]
fn test_md_ref_iter_image() -> Result<()> {
let fx_content = "";
let refs: Vec<MdRef> = MdRefIter::new(fx_content).collect();
assert_eq!(refs.len(), 1);
let md_ref = &refs[0];
assert_eq!(md_ref.target, "image.png");
assert_eq!(md_ref.text.as_deref(), Some("alt text"));
assert!(md_ref.inline);
assert_eq!(md_ref.kind, MdRefKind::File);
Ok(())
}
#[test]
fn test_md_ref_iter_anchor() -> Result<()> {
let fx_content = "[go to section](#my-section)";
let refs: Vec<MdRef> = MdRefIter::new(fx_content).collect();
assert_eq!(refs.len(), 1);
let md_ref = &refs[0];
assert_eq!(md_ref.target, "#my-section");
assert_eq!(md_ref.text.as_deref(), Some("go to section"));
assert!(!md_ref.inline);
assert_eq!(md_ref.kind, MdRefKind::Anchor);
Ok(())
}
#[test]
fn test_md_ref_iter_multiple_links() -> Result<()> {
let fx_content = r#"
Check out [this link](https://example.com) and [another](docs/page.md).
Also see  for reference.
"#;
let refs: Vec<MdRef> = MdRefIter::new(fx_content).collect();
assert_eq!(refs.len(), 3);
assert_eq!(refs[0].target, "https://example.com");
assert_eq!(refs[0].kind, MdRefKind::Url);
assert!(!refs[0].inline);
assert_eq!(refs[1].target, "docs/page.md");
assert_eq!(refs[1].kind, MdRefKind::File);
assert!(!refs[1].inline);
assert_eq!(refs[2].target, "assets/photo.jpg");
assert_eq!(refs[2].kind, MdRefKind::File);
assert!(refs[2].inline);
Ok(())
}
#[test]
fn test_md_ref_iter_skip_code_block() -> Result<()> {
let fx_content = r#"
Here is a [real link](https://real.com).
```
[not a link](https://fake.com)
```
And [another real](page.md).
"#;
let refs: Vec<MdRef> = MdRefIter::new(fx_content).collect();
assert_eq!(refs.len(), 2);
assert_eq!(refs[0].target, "https://real.com");
assert_eq!(refs[1].target, "page.md");
Ok(())
}
#[test]
fn test_md_ref_iter_skip_code_block_4_backticks() -> Result<()> {
let fx_content = r#"
Here is a [real link](https://real.com).
````
[not a link](https://fake.com)
````
And [another real](page.md).
"#;
let refs: Vec<MdRef> = MdRefIter::new(fx_content).collect();
assert_eq!(refs.len(), 2);
assert_eq!(refs[0].target, "https://real.com");
assert_eq!(refs[1].target, "page.md");
Ok(())
}
#[test]
fn test_md_ref_iter_skip_inline_code() -> Result<()> {
let fx_content = r#"
Here is a [real link](https://real.com).
This is `[not a link](https://fake.com)` inline code.
And [another real](page.md).
"#;
let refs: Vec<MdRef> = MdRefIter::new(fx_content).collect();
assert_eq!(refs.len(), 2);
assert_eq!(refs[0].target, "https://real.com");
assert_eq!(refs[1].target, "page.md");
Ok(())
}
#[test]
fn test_md_ref_iter_empty_text() -> Result<()> {
let fx_content = "[](https://example.com)";
let refs: Vec<MdRef> = MdRefIter::new(fx_content).collect();
assert_eq!(refs.len(), 1);
let md_ref = &refs[0];
assert_eq!(md_ref.target, "https://example.com");
assert!(md_ref.text.is_none());
Ok(())
}
#[test]
fn test_md_ref_iter_protocol_relative_url() -> Result<()> {
let fx_content = "[link](//example.com/path)";
let refs: Vec<MdRef> = MdRefIter::new(fx_content).collect();
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].kind, MdRefKind::Url);
Ok(())
}
#[test]
fn test_md_ref_iter_multiple_on_same_line() -> Result<()> {
let fx_content = "[first](a.md) and [second](b.md) and [third](c.md)";
let refs: Vec<MdRef> = MdRefIter::new(fx_content).collect();
assert_eq!(refs.len(), 3);
assert_eq!(refs[0].target, "a.md");
assert_eq!(refs[1].target, "b.md");
assert_eq!(refs[2].target, "c.md");
Ok(())
}
}