use std::collections::HashSet;
use crate::frontmatter::ParsedPage;
#[derive(Debug, Clone, PartialEq)]
pub enum ParsedLink {
Local(String),
CrossWiki {
wiki: String,
slug: String,
},
}
impl ParsedLink {
pub fn parse(s: &str) -> Self {
if let Some(rest) = s.strip_prefix("wiki://")
&& let Some(slash) = rest.find('/')
{
return ParsedLink::CrossWiki {
wiki: rest[..slash].to_string(),
slug: rest[slash + 1..].to_string(),
};
}
ParsedLink::Local(s.to_string())
}
pub fn as_raw(&self) -> &str {
match self {
ParsedLink::Local(s) => s,
ParsedLink::CrossWiki { wiki, slug } => {
let _ = wiki;
slug
}
}
}
}
pub fn extract_parsed_links(page: &ParsedPage) -> Vec<ParsedLink> {
let mut result = Vec::new();
let mut seen = HashSet::new();
for slug in page.string_list("sources") {
let raw = slug.to_string();
if seen.insert(raw.clone()) {
result.push(ParsedLink::parse(&raw));
}
}
for slug in page.string_list("concepts") {
let raw = slug.to_string();
if seen.insert(raw.clone()) {
result.push(ParsedLink::parse(&raw));
}
}
extract_parsed_wikilinks(&page.body, &mut seen, &mut result);
result
}
fn extract_parsed_wikilinks(text: &str, seen: &mut HashSet<String>, result: &mut Vec<ParsedLink>) {
let mut rest = text;
while let Some(start) = rest.find("[[") {
let after = &rest[start + 2..];
if let Some(end) = after.find("]]") {
let raw = after[..end].trim().to_string();
if !raw.is_empty() && seen.insert(raw.clone()) {
result.push(ParsedLink::parse(&raw));
}
rest = &after[end + 2..];
} else {
break;
}
}
extract_commonmark_links(text, seen, result);
}
fn extract_commonmark_links(text: &str, seen: &mut HashSet<String>, result: &mut Vec<ParsedLink>) {
let mut rest = text;
while let Some(bracket) = rest.find("](") {
let before = &rest[..bracket];
if let Some(open) = before.rfind('[') {
let is_image = open > 0 && before.as_bytes()[open - 1] == b'!';
let after_paren = &rest[bracket + 2..];
if let Some(close) = after_paren.find(')') {
let dest_raw = after_paren[..close].trim();
let dest = dest_raw
.find('#')
.map(|i| dest_raw[..i].trim())
.unwrap_or(dest_raw);
if !is_image
&& !dest.is_empty()
&& !dest.starts_with("http://")
&& !dest.starts_with("https://")
&& !dest.starts_with("mailto:")
&& !dest.starts_with('#')
{
let raw = dest.to_string();
if seen.insert(raw.clone()) {
result.push(ParsedLink::parse(&raw));
}
}
rest = &after_paren[close + 1..];
continue;
}
}
rest = &rest[bracket + 2..];
}
}
pub fn extract_links(page: &ParsedPage) -> Vec<String> {
let mut result = Vec::new();
let mut seen = HashSet::new();
for slug in page.string_list("sources") {
if seen.insert(slug.to_string()) {
result.push(slug.to_string());
}
}
for slug in page.string_list("concepts") {
if seen.insert(slug.to_string()) {
result.push(slug.to_string());
}
}
extract_wikilinks(&page.body, &mut seen, &mut result);
result
}
pub fn extract_wikilinks(text: &str, seen: &mut HashSet<String>, result: &mut Vec<String>) {
let mut rest = text;
while let Some(start) = rest.find("[[") {
let after = &rest[start + 2..];
if let Some(end) = after.find("]]") {
let slug = after[..end].trim();
if !slug.is_empty() && seen.insert(slug.to_string()) {
result.push(slug.to_string());
}
rest = &after[end + 2..];
} else {
break;
}
}
let mut parsed: Vec<ParsedLink> = Vec::new();
extract_commonmark_links(text, seen, &mut parsed);
for link in parsed {
result.push(link.as_raw().to_string());
}
}
pub fn extract_body_wikilinks(text: &str) -> Vec<String> {
let mut seen = HashSet::new();
let mut result = Vec::new();
extract_wikilinks(text, &mut seen, &mut result);
result
}