use std::collections::HashMap;
use regex::Regex;
#[derive(Debug, Clone)]
pub struct ExtractResult {
pub text: String,
pub images: Vec<String>,
pub links: HashMap<String, String>,
}
pub fn extract_text_imgs_links(text: &str) -> ExtractResult {
let text = crate::parser::norm_new_lines(text);
let mut images: Vec<String> = Vec::new();
let mut links: HashMap<String, String> = HashMap::new();
let img_re = Regex::new(r"!\[.*?\]\(.*?tg_([^.]+)\..*?\)").unwrap();
let link_re = Regex::new(r"\[.*?\]\((.+?)\)").unwrap();
let wiki_re = Regex::new(r"\[\[(.+?)\]\]").unwrap();
let lines: Vec<&str> = text.split('\n').collect();
let mut kept_lines: Vec<String> = Vec::new();
for line in &lines {
let trimmed = line.trim();
if link_re.is_match(trimmed) && link_re.find(trimmed).map(|m| m.as_str()) == Some(trimmed) {
if let Some(caps) = link_re.captures(line) {
let content = caps.get(1).unwrap().as_str();
let (link_path, link_label) = split_link_content(content, false);
links.insert(link_label, link_path);
}
continue;
}
if wiki_re.is_match(trimmed) && wiki_re.find(trimmed).map(|m| m.as_str()) == Some(trimmed) {
if let Some(caps) = wiki_re.captures(line) {
let content = caps.get(1).unwrap().as_str();
let (link_path, link_label) = split_link_content(content, true);
links.insert(link_label, link_path);
}
continue;
}
kept_lines.push((*line).to_string());
}
let mut text = kept_lines.join("\n");
text = img_re
.replace_all(&text, |caps: ®ex::Captures| {
if let Some(id) = caps.get(1) {
images.push(id.as_str().to_string());
}
"🖼"
})
.to_string();
text = link_re
.replace_all(&text, |caps: ®ex::Captures| {
if let Some(m) = caps.get(1) {
let content = m.as_str();
let (link_path, link_label) = split_link_content(content, false);
links.insert(link_label.clone(), link_path);
format!("`{}`", link_label)
} else {
caps.get(0).unwrap().as_str().to_string()
}
})
.to_string();
text = wiki_re
.replace_all(&text, |caps: ®ex::Captures| {
if let Some(m) = caps.get(1) {
let content = m.as_str();
let (link_path, link_label) = split_link_content(content, true);
links.insert(link_label.clone(), link_path);
format!("`{}`", link_label)
} else {
caps.get(0).unwrap().as_str().to_string()
}
})
.to_string();
ExtractResult {
text: text.trim().to_string(),
images,
links,
}
}
fn split_link_content(content: &str, is_wiki: bool) -> (String, String) {
if is_wiki {
let parts: Vec<&str> = content.splitn(2, '|').collect();
let path = format!("{}.md", parts[0]);
let label = parts.get(1).map(|s| (*s).to_string()).unwrap_or_else(|| {
std::path::Path::new(&path)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string()
});
(path, label)
} else {
let path = content.to_string();
let label = std::path::Path::new(&path)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
(path, label)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_images() {
let md = "Hello  world";
let result = extract_text_imgs_links(md);
assert_eq!(result.images, vec!["abc123"]);
assert!(result.text.contains("🖼"));
}
#[test]
fn test_extract_inline_links() {
let md = "See [Rust](brain/Rust.md) for details";
let result = extract_text_imgs_links(md);
assert!(result.text.contains("`Rust`"));
assert_eq!(result.links.get("Rust"), Some(&"brain/Rust.md".to_string()));
}
#[test]
fn test_extract_wiki_links() {
let md = "See [[brain/Rust]] for details";
let result = extract_text_imgs_links(md);
assert!(result.text.contains("`Rust`"));
assert_eq!(result.links.get("Rust"), Some(&"brain/Rust.md".to_string()));
}
#[test]
fn test_extract_wiki_links_with_label() {
let md = "See [[brain/Rust|The Rust Page]] for details";
let result = extract_text_imgs_links(md);
assert!(result.text.contains("`The Rust Page`"));
assert_eq!(
result.links.get("The Rust Page"),
Some(&"brain/Rust.md".to_string())
);
}
#[test]
fn test_link_only_line_removed() {
let md = "Some text\n[My Note](notes/MyNote.md)\nMore text";
let result = extract_text_imgs_links(md);
assert!(result.text.contains("Some text"));
assert!(result.text.contains("More text"));
assert!(!result.text.contains("My Note"));
assert_eq!(
result.links.get("MyNote"),
Some(&"notes/MyNote.md".to_string())
);
}
#[test]
fn test_wiki_link_only_line_removed() {
let md = "Some text\n[[notes/MyNote]]\nMore text";
let result = extract_text_imgs_links(md);
assert!(!result.text.contains("[[notes/MyNote]]"));
assert_eq!(
result.links.get("MyNote"),
Some(&"notes/MyNote.md".to_string())
);
}
#[test]
fn test_empty_input() {
let result = extract_text_imgs_links("");
assert!(result.text.is_empty());
assert!(result.images.is_empty());
assert!(result.links.is_empty());
}
#[test]
fn test_plain_text_unchanged() {
let md = "Just some plain text\nwith no links or images";
let result = extract_text_imgs_links(md);
assert_eq!(result.text, md);
assert!(result.images.is_empty());
assert!(result.links.is_empty());
}
}