Skip to main content

llm_wiki_lib/
graph.rs

1//! Graph Pipeline — Build and query the wikilink knowledge graph.
2//!
3//! Provides:
4//! - `graph()`: Build a full wikilink graph with backlinks
5//! - `GraphResult`: JSON-serializable graph structure
6
7use anyhow::Result;
8use regex::Regex;
9use std::collections::{HashMap, HashSet};
10use walkdir::WalkDir;
11
12use crate::wiki::Wiki;
13
14/// A node in the wikilink graph (a wiki file).
15#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
16pub struct GraphNode {
17    /// Filename without .md extension (the wikilink target name).
18    pub name: String,
19    /// Full path to the file.
20    pub path: String,
21    /// Titles/headings found in the file.
22    pub headings: Vec<String>,
23    /// Number of outgoing links from this file.
24    pub outlinks: usize,
25    /// Number of files that link to this file.
26    pub backlinks: usize,
27}
28
29/// An edge in the wikilink graph.
30#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
31pub struct GraphEdge {
32    pub from: String,
33    pub to: String,
34}
35
36/// Result of the graph pipeline.
37#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
38pub struct GraphResult {
39    pub ok: bool,
40    pub node_count: usize,
41    pub edge_count: usize,
42    pub nodes: Vec<GraphNode>,
43    pub edges: Vec<GraphEdge>,
44    /// Files that are linked to but don't exist (broken links).
45    #[serde(default)]
46    pub orphans: Vec<String>,
47}
48
49impl GraphResult {
50    pub fn empty() -> Self {
51        Self {
52            ok: true,
53            node_count: 0,
54            edge_count: 0,
55            nodes: Vec::new(),
56            edges: Vec::new(),
57            orphans: Vec::new(),
58        }
59    }
60}
61
62/// Run the graph pipeline: build the full wikilink graph with backlinks.
63pub fn run(wiki: &Wiki) -> Result<GraphResult> {
64    let wiki_dir = wiki.config().wiki_dir()?;
65
66    if !wiki_dir.exists() {
67        return Ok(GraphResult::empty());
68    }
69
70    // Collect all wiki files
71    let files: Vec<_> = WalkDir::new(&wiki_dir)
72        .into_iter()
73        .filter_map(|e| e.ok())
74        .filter(|e| e.path().extension().is_some_and(|ext| ext == "md"))
75        .filter(|e| e.path().file_name().is_some_and(|n| n != "SYSTEM.md"))
76        .collect();
77
78    if files.is_empty() {
79        return Ok(GraphResult::empty());
80    }
81
82    let wikilink_re = Regex::new(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]").unwrap();
83    let heading_re = Regex::new(r"^#\s+(.+)$").unwrap();
84
85    // Build node list and extract wikilinks
86    let mut name_to_path: HashMap<String, String> = HashMap::new();
87    let mut outlinks: HashMap<String, Vec<String>> = HashMap::new();
88
89    for entry in &files {
90        let path = entry.path();
91        let name = path
92            .file_stem()
93            .and_then(|s| s.to_str())
94            .unwrap_or("")
95            .to_string();
96
97        let content = std::fs::read_to_string(path)?;
98        let links: Vec<String> = wikilink_re
99            .captures_iter(&content)
100            .filter_map(|c| c.get(1).map(|m| m.as_str().trim().to_string()))
101            .collect();
102
103        name_to_path.insert(name.clone(), path.to_string_lossy().to_string());
104        outlinks.insert(name, links);
105    }
106
107    // Build backlinks index
108    let mut backlinks: HashMap<String, Vec<String>> = HashMap::new();
109    for (from, links) in &outlinks {
110        for to in links.iter() {
111            backlinks.entry(to.clone()).or_default().push(from.clone());
112        }
113    }
114
115    // Build edges and orphans
116    let mut edges = Vec::new();
117    let all_target_names: HashSet<String> = name_to_path.keys().cloned().collect();
118    let mut orphans = Vec::new();
119
120    for (from, links) in &outlinks {
121        for to in links.iter() {
122            edges.push(GraphEdge {
123                from: from.clone(),
124                to: to.clone(),
125            });
126            if !all_target_names.contains(to.as_str()) {
127                orphans.push(to.clone());
128            }
129        }
130    }
131
132    // Build nodes
133    let mut nodes = Vec::new();
134    for entry in &files {
135        let path = entry.path();
136        let name = path
137            .file_stem()
138            .and_then(|s| s.to_str())
139            .unwrap_or("")
140            .to_string();
141        let content = std::fs::read_to_string(path)?;
142
143        let headings: Vec<String> = heading_re
144            .captures_iter(&content)
145            .filter_map(|c| c.get(1).map(|m| m.as_str().trim().to_string()))
146            .collect();
147
148        let out_count = outlinks.get(&name).map(|v| v.len()).unwrap_or(0);
149        let back_count = backlinks.get(&name).map(|v| v.len()).unwrap_or(0);
150
151        nodes.push(GraphNode {
152            name,
153            path: path.to_string_lossy().to_string(),
154            headings,
155            outlinks: out_count,
156            backlinks: back_count,
157        });
158    }
159
160    // Sort nodes by backlink count (most connected first)
161    nodes.sort_by_key(|n| std::cmp::Reverse(n.backlinks));
162
163    Ok(GraphResult {
164        ok: true,
165        node_count: nodes.len(),
166        edge_count: edges.len(),
167        nodes,
168        edges,
169        orphans,
170    })
171}