llmwiki_tooling/
link_index.rs1use std::collections::{HashMap, HashSet};
2
3use crate::error::WikiError;
4use crate::page::{PageId, WikilinkOccurrence};
5use crate::wiki::Wiki;
6
7#[derive(Debug)]
9pub struct LinkIndex {
10 outbound: HashMap<PageId, Vec<WikilinkOccurrence>>,
11 inbound: HashMap<PageId, HashSet<PageId>>,
12}
13
14impl LinkIndex {
15 pub fn build(wiki: &Wiki) -> Result<Self, WikiError> {
17 let mut outbound: HashMap<PageId, Vec<WikilinkOccurrence>> = HashMap::new();
18 let mut inbound: HashMap<PageId, HashSet<PageId>> = HashMap::new();
19
20 for page_id in wiki.pages().keys() {
21 inbound.entry(page_id.clone()).or_default();
22 }
23
24 for file_path in wiki.all_scannable_files() {
25 let source_page = PageId::from_path(&file_path);
26 let wikilinks = wiki.wikilinks(&file_path)?;
27
28 if let Some(source_id) = &source_page {
29 for wl in wikilinks {
30 inbound
31 .entry(wl.page.clone())
32 .or_default()
33 .insert(source_id.clone());
34 }
35 outbound.insert(source_id.clone(), wikilinks.to_vec());
36 }
37 }
38
39 Ok(Self { outbound, inbound })
40 }
41
42 pub fn inbound(&self, page: &PageId) -> Vec<&PageId> {
43 self.inbound
44 .get(page)
45 .map(|set| set.iter().collect())
46 .unwrap_or_default()
47 }
48
49 pub fn outbound(&self, page: &PageId) -> &[WikilinkOccurrence] {
50 self.outbound
51 .get(page)
52 .map(|v| v.as_slice())
53 .unwrap_or_default()
54 }
55
56 pub fn orphans(&self, wiki: &Wiki) -> Vec<PageId> {
57 let mut orphans: Vec<PageId> = wiki
58 .pages()
59 .keys()
60 .filter(|id| {
61 self.inbound
62 .get(*id)
63 .map(|set| set.is_empty())
64 .unwrap_or(true)
65 })
66 .cloned()
67 .collect();
68 orphans.sort();
69 orphans
70 }
71
72 pub fn all_edges(&self) -> Vec<(&PageId, &PageId)> {
73 let mut edges = Vec::new();
74 for (source, wikilinks) in &self.outbound {
75 for wl in wikilinks {
76 edges.push((source, &wl.page));
77 }
78 }
79 edges.sort();
80 edges
81 }
82}