rs_web/
links.rs

1use pulldown_cmark::{Event, Options, Parser, Tag};
2use serde::Serialize;
3use std::collections::{HashMap, HashSet};
4
5use crate::config::Config;
6use crate::content::Content;
7
8/// Represents a link from one post to another
9#[derive(Debug, Clone)]
10pub struct BacklinkInfo {
11    /// URL of the source post
12    pub url: String,
13    /// Title of the source post
14    pub title: String,
15    /// Section the source post belongs to
16    pub section: String,
17}
18
19/// Node in the graph (a post)
20#[derive(Debug, Clone, Serialize)]
21pub struct GraphNode {
22    pub id: String,
23    pub title: String,
24    pub section: String,
25    pub url: String,
26}
27
28/// Edge in the graph (a link between posts)
29#[derive(Debug, Clone, Serialize)]
30pub struct GraphEdge {
31    pub source: String,
32    pub target: String,
33}
34
35/// JSON-serializable graph for visualization
36#[derive(Debug, Serialize)]
37pub struct GraphData {
38    pub nodes: Vec<GraphNode>,
39    pub edges: Vec<GraphEdge>,
40}
41
42/// Graph of internal links between posts
43#[derive(Debug, Default)]
44pub struct LinkGraph {
45    /// Map from target URL -> list of posts that link to it
46    backlinks: HashMap<String, Vec<BacklinkInfo>>,
47    /// All edges (source -> target)
48    edges: Vec<(String, String)>,
49    /// All nodes
50    nodes: HashMap<String, GraphNode>,
51}
52
53impl LinkGraph {
54    /// Build link graph from all content
55    pub fn build(config: &Config, content: &Content) -> Self {
56        // First, build a set of all valid internal URLs and nodes
57        let mut valid_urls: HashSet<String> = HashSet::new();
58        let mut nodes: HashMap<String, GraphNode> = HashMap::new();
59
60        for section in content.sections.values() {
61            for post in &section.posts {
62                let url = post.url(config);
63                let normalized = normalize_url(&url);
64                valid_urls.insert(normalized.clone());
65                nodes.insert(
66                    normalized.clone(),
67                    GraphNode {
68                        id: normalized,
69                        title: post.frontmatter.title.clone(),
70                        section: post.section.clone(),
71                        url,
72                    },
73                );
74            }
75        }
76
77        // Extract links from all posts
78        let mut backlinks: HashMap<String, Vec<BacklinkInfo>> = HashMap::new();
79        let mut edges: Vec<(String, String)> = Vec::new();
80
81        for section in content.sections.values() {
82            for post in &section.posts {
83                let source_url = normalize_url(&post.url(config));
84                let source_info = BacklinkInfo {
85                    url: post.url(config),
86                    title: post.frontmatter.title.clone(),
87                    section: post.section.clone(),
88                };
89
90                // Extract all internal links from this post's content
91                let links = extract_internal_links(&post.content, &valid_urls);
92
93                for target_url in links {
94                    // Track backlinks
95                    backlinks
96                        .entry(target_url.clone())
97                        .or_default()
98                        .push(source_info.clone());
99
100                    // Track edges
101                    edges.push((source_url.clone(), target_url));
102                }
103            }
104        }
105
106        Self {
107            backlinks,
108            edges,
109            nodes,
110        }
111    }
112
113    /// Get backlinks for a given URL
114    pub fn backlinks_for(&self, url: &str) -> &[BacklinkInfo] {
115        let normalized = normalize_url(url);
116        self.backlinks
117            .get(&normalized)
118            .map(|v| v.as_slice())
119            .unwrap_or(&[])
120    }
121
122    /// Export full graph data for JSON visualization
123    pub fn to_graph_data(&self) -> GraphData {
124        GraphData {
125            nodes: self.nodes.values().cloned().collect(),
126            edges: self
127                .edges
128                .iter()
129                .map(|(source, target)| GraphEdge {
130                    source: source.clone(),
131                    target: target.clone(),
132                })
133                .collect(),
134        }
135    }
136
137    /// Export graph data for a specific post (local neighborhood)
138    /// Includes: the post itself, posts it links to, and posts that link to it
139    pub fn local_graph_for(&self, url: &str) -> GraphData {
140        let normalized = normalize_url(url);
141
142        // Collect connected node IDs
143        let mut connected_ids: HashSet<String> = HashSet::new();
144        connected_ids.insert(normalized.clone());
145
146        // Posts this one links to (outgoing)
147        for (source, target) in &self.edges {
148            if source == &normalized {
149                connected_ids.insert(target.clone());
150            }
151        }
152
153        // Posts that link to this one (incoming/backlinks)
154        for (source, target) in &self.edges {
155            if target == &normalized {
156                connected_ids.insert(source.clone());
157            }
158        }
159
160        // Filter nodes and edges
161        let nodes: Vec<GraphNode> = self
162            .nodes
163            .values()
164            .filter(|n| connected_ids.contains(&n.id))
165            .cloned()
166            .collect();
167
168        let edges: Vec<GraphEdge> = self
169            .edges
170            .iter()
171            .filter(|(s, t)| connected_ids.contains(s) && connected_ids.contains(t))
172            .map(|(source, target)| GraphEdge {
173                source: source.clone(),
174                target: target.clone(),
175            })
176            .collect();
177
178        GraphData { nodes, edges }
179    }
180}
181
182/// Extract internal links from markdown content
183fn extract_internal_links(content: &str, valid_urls: &HashSet<String>) -> Vec<String> {
184    let options = Options::ENABLE_STRIKETHROUGH
185        | Options::ENABLE_TABLES
186        | Options::ENABLE_FOOTNOTES
187        | Options::ENABLE_HEADING_ATTRIBUTES;
188
189    let parser = Parser::new_ext(content, options);
190    let mut links = Vec::new();
191
192    for event in parser {
193        if let Event::Start(Tag::Link { dest_url, .. }) = event {
194            let url = dest_url.to_string();
195
196            // Skip external links and anchors
197            if url.starts_with("http://")
198                || url.starts_with("https://")
199                || url.starts_with('#')
200                || url.starts_with("mailto:")
201            {
202                continue;
203            }
204
205            // Normalize and check if it's a valid internal link
206            let normalized = normalize_url(&url);
207            if valid_urls.contains(&normalized) {
208                links.push(normalized);
209            }
210        }
211    }
212
213    links
214}
215
216/// Normalize URL for comparison (strip trailing slash, anchors)
217pub fn normalize_url(url: &str) -> String {
218    let url = url.split('#').next().unwrap_or(url); // Remove anchor
219    let url = url.trim_matches('/'); // Remove leading/trailing slashes
220    format!("/{}/", url) // Consistent format: /path/
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226
227    #[test]
228    fn test_normalize_url() {
229        assert_eq!(normalize_url("/blog/post/"), "/blog/post/");
230        assert_eq!(normalize_url("/blog/post"), "/blog/post/");
231        assert_eq!(normalize_url("blog/post/"), "/blog/post/");
232        assert_eq!(normalize_url("blog/post"), "/blog/post/");
233        assert_eq!(normalize_url("/blog/post#section"), "/blog/post/");
234        assert_eq!(normalize_url("/blog/post/#section"), "/blog/post/");
235    }
236
237    #[test]
238    fn test_extract_internal_links() {
239        let valid_urls: HashSet<String> =
240            vec!["/blog/post-1/".to_string(), "/blog/post-2/".to_string()]
241                .into_iter()
242                .collect();
243
244        let content = r#"
245Check out [post 1](/blog/post-1/) and [post 2](/blog/post-2).
246Also see [external](https://example.com) and [anchor](#section).
247And [invalid](/blog/post-3/) link.
248"#;
249
250        let links = extract_internal_links(content, &valid_urls);
251        assert_eq!(links.len(), 2);
252        assert!(links.contains(&"/blog/post-1/".to_string()));
253        assert!(links.contains(&"/blog/post-2/".to_string()));
254    }
255
256    #[test]
257    fn test_graph_data_serialization() {
258        let graph = GraphData {
259            nodes: vec![GraphNode {
260                id: "/blog/test/".to_string(),
261                title: "Test Post".to_string(),
262                section: "blog".to_string(),
263                url: "/blog/test/".to_string(),
264            }],
265            edges: vec![GraphEdge {
266                source: "/blog/a/".to_string(),
267                target: "/blog/b/".to_string(),
268            }],
269        };
270
271        let json = serde_json::to_string(&graph).unwrap();
272        assert!(json.contains("\"id\":\"/blog/test/\""));
273        assert!(json.contains("\"source\":\"/blog/a/\""));
274    }
275}