use anyhow::Result;
use regex::Regex;
use std::collections::{HashMap, HashSet};
use walkdir::WalkDir;
use crate::wiki::Wiki;
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct GraphNode {
pub name: String,
pub path: String,
pub headings: Vec<String>,
pub outlinks: usize,
pub backlinks: usize,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct GraphEdge {
pub from: String,
pub to: String,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct GraphResult {
pub ok: bool,
pub node_count: usize,
pub edge_count: usize,
pub nodes: Vec<GraphNode>,
pub edges: Vec<GraphEdge>,
#[serde(default)]
pub orphans: Vec<String>,
}
impl GraphResult {
pub fn empty() -> Self {
Self {
ok: true,
node_count: 0,
edge_count: 0,
nodes: Vec::new(),
edges: Vec::new(),
orphans: Vec::new(),
}
}
}
pub fn run(wiki: &Wiki) -> Result<GraphResult> {
let wiki_dir = wiki.config().wiki_dir()?;
if !wiki_dir.exists() {
return Ok(GraphResult::empty());
}
let files: Vec<_> = WalkDir::new(&wiki_dir)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.path().extension().is_some_and(|ext| ext == "md"))
.filter(|e| e.path().file_name().is_some_and(|n| n != "SYSTEM.md"))
.collect();
if files.is_empty() {
return Ok(GraphResult::empty());
}
let wikilink_re = Regex::new(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]").unwrap();
let heading_re = Regex::new(r"^#\s+(.+)$").unwrap();
let mut name_to_path: HashMap<String, String> = HashMap::new();
let mut outlinks: HashMap<String, Vec<String>> = HashMap::new();
for entry in &files {
let path = entry.path();
let name = path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
let content = std::fs::read_to_string(path)?;
let links: Vec<String> = wikilink_re
.captures_iter(&content)
.filter_map(|c| c.get(1).map(|m| m.as_str().trim().to_string()))
.collect();
name_to_path.insert(name.clone(), path.to_string_lossy().to_string());
outlinks.insert(name, links);
}
let mut backlinks: HashMap<String, Vec<String>> = HashMap::new();
for (from, links) in &outlinks {
for to in links.iter() {
backlinks.entry(to.clone()).or_default().push(from.clone());
}
}
let mut edges = Vec::new();
let all_target_names: HashSet<String> = name_to_path.keys().cloned().collect();
let mut orphans = Vec::new();
for (from, links) in &outlinks {
for to in links.iter() {
edges.push(GraphEdge {
from: from.clone(),
to: to.clone(),
});
if !all_target_names.contains(to.as_str()) {
orphans.push(to.clone());
}
}
}
let mut nodes = Vec::new();
for entry in &files {
let path = entry.path();
let name = path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
let content = std::fs::read_to_string(path)?;
let headings: Vec<String> = heading_re
.captures_iter(&content)
.filter_map(|c| c.get(1).map(|m| m.as_str().trim().to_string()))
.collect();
let out_count = outlinks.get(&name).map(|v| v.len()).unwrap_or(0);
let back_count = backlinks.get(&name).map(|v| v.len()).unwrap_or(0);
nodes.push(GraphNode {
name,
path: path.to_string_lossy().to_string(),
headings,
outlinks: out_count,
backlinks: back_count,
});
}
nodes.sort_by_key(|n| std::cmp::Reverse(n.backlinks));
Ok(GraphResult {
ok: true,
node_count: nodes.len(),
edge_count: edges.len(),
nodes,
edges,
orphans,
})
}