1use crate::error::{Error, Result};
2use percent_encoding::percent_decode_str;
3use regex::Regex;
4use std::collections::HashSet;
5use std::fs;
6use std::path::{Path, PathBuf};
7
8pub fn extract_image_references(markdown_path: &Path, base_dir: &Path) -> Result<HashSet<PathBuf>> {
9 let content = fs::read_to_string(markdown_path).map_err(|source| Error::ReadFile {
10 path: markdown_path.to_path_buf(),
11 source,
12 })?;
13
14 let mut references = HashSet::new();
15 let markdown_dir = markdown_path.parent().unwrap_or(base_dir);
16
17 let img_pattern = Regex::new(r#"!\[.*?]\(([^)]+?)(?:\s+["'].*?["'])?\)"#)?;
19
20 let html_pattern = Regex::new(r#"<img[^>]+src=["']([^"']+)["']"#)?;
22
23 for cap in img_pattern.captures_iter(&content) {
24 if let Some(path_match) = cap.get(1) {
25 let img_path = path_match.as_str().trim();
26
27 if is_url(img_path) {
28 continue;
29 }
30
31 if let Some(resolved) = resolve_image_path(img_path, markdown_dir, base_dir) {
32 references.insert(resolved);
33 }
34 }
35 }
36
37 for cap in html_pattern.captures_iter(&content) {
38 if let Some(path_match) = cap.get(1) {
39 let img_path = path_match.as_str().trim();
40
41 if is_url(img_path) {
42 continue;
43 }
44
45 if let Some(resolved) = resolve_image_path(img_path, markdown_dir, base_dir) {
46 references.insert(resolved);
47 }
48 }
49 }
50
51 Ok(references)
52}
53
54fn is_url(path: &str) -> bool {
55 path.starts_with("http://")
56 || path.starts_with("https://")
57 || path.starts_with("//")
58 || path.starts_with("data:")
59}
60
61pub(crate) fn resolve_image_path(
62 img_path: &str,
63 markdown_dir: &Path,
64 base_dir: &Path,
65) -> Option<PathBuf> {
66 let decoded_path = percent_decode_str(img_path)
67 .decode_utf8()
68 .map(|s| s.into_owned())
69 .unwrap_or_else(|_| img_path.to_string());
70
71 let clean_path = decoded_path
72 .split('#')
73 .next()
74 .and_then(|s| s.split('?').next())
75 .unwrap_or(&decoded_path);
76
77 try_resolve_path(clean_path, markdown_dir, base_dir).or_else(|| {
78 if clean_path != img_path {
79 let clean_original = img_path
80 .split('#')
81 .next()
82 .and_then(|s| s.split('?').next())
83 .unwrap_or(img_path);
84 try_resolve_path(clean_original, markdown_dir, base_dir)
85 } else {
86 None
87 }
88 })
89}
90
91fn try_resolve_path(img_path: &str, markdown_dir: &Path, base_dir: &Path) -> Option<PathBuf> {
92 let relative_to_md = markdown_dir.join(img_path);
93 if let Ok(canonical) = relative_to_md.canonicalize()
94 && canonical.starts_with(base_dir.canonicalize().ok()?)
95 {
96 return Some(canonical);
97 }
98
99 let relative_to_base = base_dir.join(img_path);
100 if let Ok(canonical) = relative_to_base.canonicalize()
101 && canonical.starts_with(base_dir.canonicalize().ok()?)
102 {
103 return Some(canonical);
104 }
105
106 let abs_path = PathBuf::from(img_path);
107 if abs_path.is_absolute()
108 && let Ok(canonical) = abs_path.canonicalize()
109 && canonical.starts_with(base_dir.canonicalize().ok()?)
110 {
111 return Some(canonical);
112 }
113
114 None
115}