1use pulldown_cmark::{Event, Options, Parser, Tag};
2use serde::Serialize;
3use std::collections::{HashMap, HashSet};
4
5use crate::config::Config;
6use crate::content::Content;
7
8#[derive(Debug, Clone)]
10pub struct BacklinkInfo {
11 pub url: String,
13 pub title: String,
15 pub section: String,
17}
18
19#[derive(Debug, Clone, Serialize)]
21pub struct GraphNode {
22 pub id: String,
23 pub title: String,
24 pub section: String,
25 pub url: String,
26}
27
28#[derive(Debug, Clone, Serialize)]
30pub struct GraphEdge {
31 pub source: String,
32 pub target: String,
33}
34
35#[derive(Debug, Serialize)]
37pub struct GraphData {
38 pub nodes: Vec<GraphNode>,
39 pub edges: Vec<GraphEdge>,
40}
41
42#[derive(Debug, Default)]
44pub struct LinkGraph {
45 backlinks: HashMap<String, Vec<BacklinkInfo>>,
47 edges: Vec<(String, String)>,
49 nodes: HashMap<String, GraphNode>,
51}
52
53impl LinkGraph {
54 pub fn build(config: &Config, content: &Content) -> Self {
56 let mut valid_urls: HashSet<String> = HashSet::new();
58 let mut nodes: HashMap<String, GraphNode> = HashMap::new();
59
60 for section in content.sections.values() {
61 for post in §ion.posts {
62 let url = post.url(config);
63 let normalized = normalize_url(&url);
64 valid_urls.insert(normalized.clone());
65 nodes.insert(
66 normalized.clone(),
67 GraphNode {
68 id: normalized,
69 title: post.frontmatter.title.clone(),
70 section: post.section.clone(),
71 url,
72 },
73 );
74 }
75 }
76
77 let mut backlinks: HashMap<String, Vec<BacklinkInfo>> = HashMap::new();
79 let mut edges: Vec<(String, String)> = Vec::new();
80
81 for section in content.sections.values() {
82 for post in §ion.posts {
83 let source_url = normalize_url(&post.url(config));
84 let source_info = BacklinkInfo {
85 url: post.url(config),
86 title: post.frontmatter.title.clone(),
87 section: post.section.clone(),
88 };
89
90 let links = extract_internal_links(&post.content, &valid_urls);
92
93 for target_url in links {
94 backlinks
96 .entry(target_url.clone())
97 .or_default()
98 .push(source_info.clone());
99
100 edges.push((source_url.clone(), target_url));
102 }
103 }
104 }
105
106 Self {
107 backlinks,
108 edges,
109 nodes,
110 }
111 }
112
113 pub fn backlinks_for(&self, url: &str) -> &[BacklinkInfo] {
115 let normalized = normalize_url(url);
116 self.backlinks
117 .get(&normalized)
118 .map(|v| v.as_slice())
119 .unwrap_or(&[])
120 }
121
122 pub fn to_graph_data(&self) -> GraphData {
124 GraphData {
125 nodes: self.nodes.values().cloned().collect(),
126 edges: self
127 .edges
128 .iter()
129 .map(|(source, target)| GraphEdge {
130 source: source.clone(),
131 target: target.clone(),
132 })
133 .collect(),
134 }
135 }
136
137 pub fn local_graph_for(&self, url: &str) -> GraphData {
140 let normalized = normalize_url(url);
141
142 let mut connected_ids: HashSet<String> = HashSet::new();
144 connected_ids.insert(normalized.clone());
145
146 for (source, target) in &self.edges {
148 if source == &normalized {
149 connected_ids.insert(target.clone());
150 }
151 }
152
153 for (source, target) in &self.edges {
155 if target == &normalized {
156 connected_ids.insert(source.clone());
157 }
158 }
159
160 let nodes: Vec<GraphNode> = self
162 .nodes
163 .values()
164 .filter(|n| connected_ids.contains(&n.id))
165 .cloned()
166 .collect();
167
168 let edges: Vec<GraphEdge> = self
169 .edges
170 .iter()
171 .filter(|(s, t)| connected_ids.contains(s) && connected_ids.contains(t))
172 .map(|(source, target)| GraphEdge {
173 source: source.clone(),
174 target: target.clone(),
175 })
176 .collect();
177
178 GraphData { nodes, edges }
179 }
180}
181
182fn extract_internal_links(content: &str, valid_urls: &HashSet<String>) -> Vec<String> {
184 let options = Options::ENABLE_STRIKETHROUGH
185 | Options::ENABLE_TABLES
186 | Options::ENABLE_FOOTNOTES
187 | Options::ENABLE_HEADING_ATTRIBUTES;
188
189 let parser = Parser::new_ext(content, options);
190 let mut links = Vec::new();
191
192 for event in parser {
193 if let Event::Start(Tag::Link { dest_url, .. }) = event {
194 let url = dest_url.to_string();
195
196 if url.starts_with("http://")
198 || url.starts_with("https://")
199 || url.starts_with('#')
200 || url.starts_with("mailto:")
201 {
202 continue;
203 }
204
205 let normalized = normalize_url(&url);
207 if valid_urls.contains(&normalized) {
208 links.push(normalized);
209 }
210 }
211 }
212
213 links
214}
215
216pub fn normalize_url(url: &str) -> String {
218 let url = url.split('#').next().unwrap_or(url); let url = url.trim_matches('/'); format!("/{}/", url) }
222
223#[cfg(test)]
224mod tests {
225 use super::*;
226
227 #[test]
228 fn test_normalize_url() {
229 assert_eq!(normalize_url("/blog/post/"), "/blog/post/");
230 assert_eq!(normalize_url("/blog/post"), "/blog/post/");
231 assert_eq!(normalize_url("blog/post/"), "/blog/post/");
232 assert_eq!(normalize_url("blog/post"), "/blog/post/");
233 assert_eq!(normalize_url("/blog/post#section"), "/blog/post/");
234 assert_eq!(normalize_url("/blog/post/#section"), "/blog/post/");
235 }
236
237 #[test]
238 fn test_extract_internal_links() {
239 let valid_urls: HashSet<String> =
240 vec!["/blog/post-1/".to_string(), "/blog/post-2/".to_string()]
241 .into_iter()
242 .collect();
243
244 let content = r#"
245Check out [post 1](/blog/post-1/) and [post 2](/blog/post-2).
246Also see [external](https://example.com) and [anchor](#section).
247And [invalid](/blog/post-3/) link.
248"#;
249
250 let links = extract_internal_links(content, &valid_urls);
251 assert_eq!(links.len(), 2);
252 assert!(links.contains(&"/blog/post-1/".to_string()));
253 assert!(links.contains(&"/blog/post-2/".to_string()));
254 }
255
256 #[test]
257 fn test_graph_data_serialization() {
258 let graph = GraphData {
259 nodes: vec![GraphNode {
260 id: "/blog/test/".to_string(),
261 title: "Test Post".to_string(),
262 section: "blog".to_string(),
263 url: "/blog/test/".to_string(),
264 }],
265 edges: vec![GraphEdge {
266 source: "/blog/a/".to_string(),
267 target: "/blog/b/".to_string(),
268 }],
269 };
270
271 let json = serde_json::to_string(&graph).unwrap();
272 assert!(json.contains("\"id\":\"/blog/test/\""));
273 assert!(json.contains("\"source\":\"/blog/a/\""));
274 }
275}