Skip to main content

llmwiki_tooling/
link_index.rs

1use std::collections::{HashMap, HashSet};
2
3use crate::error::WikiError;
4use crate::page::{PageId, WikilinkOccurrence};
5use crate::wiki::Wiki;
6
7/// Directed link graph across all wiki pages.
8#[derive(Debug)]
9pub struct LinkIndex {
10    outbound: HashMap<PageId, Vec<WikilinkOccurrence>>,
11    inbound: HashMap<PageId, HashSet<PageId>>,
12}
13
14impl LinkIndex {
15    /// Build the link index by scanning all wiki files.
16    pub fn build(wiki: &Wiki) -> Result<Self, WikiError> {
17        let mut outbound: HashMap<PageId, Vec<WikilinkOccurrence>> = HashMap::new();
18        let mut inbound: HashMap<PageId, HashSet<PageId>> = HashMap::new();
19
20        for page_id in wiki.pages().keys() {
21            inbound.entry(page_id.clone()).or_default();
22        }
23
24        for file_path in wiki.all_scannable_files() {
25            let source_page = PageId::from_path(&file_path);
26            let wikilinks = wiki.wikilinks(&file_path)?;
27
28            if let Some(source_id) = &source_page {
29                for wl in wikilinks {
30                    inbound
31                        .entry(wl.page.clone())
32                        .or_default()
33                        .insert(source_id.clone());
34                }
35                outbound.insert(source_id.clone(), wikilinks.to_vec());
36            }
37        }
38
39        Ok(Self { outbound, inbound })
40    }
41
42    pub fn inbound(&self, page: &PageId) -> Vec<&PageId> {
43        self.inbound
44            .get(page)
45            .map(|set| set.iter().collect())
46            .unwrap_or_default()
47    }
48
49    pub fn outbound(&self, page: &PageId) -> &[WikilinkOccurrence] {
50        self.outbound
51            .get(page)
52            .map(|v| v.as_slice())
53            .unwrap_or_default()
54    }
55
56    pub fn orphans(&self, wiki: &Wiki) -> Vec<PageId> {
57        let mut orphans: Vec<PageId> = wiki
58            .pages()
59            .keys()
60            .filter(|id| {
61                self.inbound
62                    .get(*id)
63                    .map(|set| set.is_empty())
64                    .unwrap_or(true)
65            })
66            .cloned()
67            .collect();
68        orphans.sort();
69        orphans
70    }
71
72    pub fn all_edges(&self) -> Vec<(&PageId, &PageId)> {
73        let mut edges = Vec::new();
74        for (source, wikilinks) in &self.outbound {
75            for wl in wikilinks {
76                edges.push((source, &wl.page));
77            }
78        }
79        edges.sort();
80        edges
81    }
82}