Skip to main content

llm_wiki/
graph.rs

1use std::cmp::Reverse;
2use std::collections::{HashMap, HashSet, VecDeque};
3use std::sync::Arc;
4
5use anyhow::Result;
6use chrono::Utc;
7use petgraph::Direction;
8use petgraph::graph::{DiGraph, NodeIndex};
9use serde::{Deserialize, Serialize};
10use tantivy::Searcher;
11use tantivy::collector::TopDocs;
12use tantivy::query::AllQuery;
13use tantivy::schema::Value;
14
15use petgraph_live::cache::GenerationCache;
16use petgraph_live::live::GraphState;
17
18use crate::index_manager::SpaceIndexManager;
19use crate::index_schema::IndexSchema;
20use crate::links::ParsedLink;
21use crate::type_registry::SpaceTypeRegistry;
22
23// ── Types ─────────────────────────────────────────────────────────────────────
24
25/// A node in the concept graph representing one wiki page.
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct PageNode {
28    /// Slug identifying this page within its wiki.
29    pub slug: String,
30    /// Display title of the page.
31    pub title: String,
32    /// Frontmatter type of the page.
33    pub r#type: String,
34    /// True for cross-wiki placeholder nodes not present in the local index.
35    #[serde(default)]
36    pub external: bool,
37}
38
39/// A directed edge in the wiki concept graph with a relation label.
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct LabeledEdge {
42    /// Relation label (e.g. `"links-to"`, `"cites"`, `"supersedes"`).
43    pub relation: String,
44}
45
46/// Directed graph type used for the wiki concept graph.
47pub type WikiGraph = DiGraph<PageNode, LabeledEdge>;
48
49/// Filtering parameters for graph construction and subgraph extraction.
50#[derive(Debug, Clone, Default)]
51pub struct GraphFilter {
52    /// Root slug for subgraph extraction (None = full graph).
53    pub root: Option<String>,
54    /// Maximum hop depth from root (None = use config default).
55    pub depth: Option<usize>,
56    /// Page types to include (empty = all types).
57    pub types: Vec<String>,
58    /// Edge relation label to filter on (None = all relations).
59    pub relation: Option<String>,
60}
61
62impl GraphFilter {
63    /// Returns `true` when the filter represents an unfiltered full-graph request.
64    /// Note: `depth` is intentionally excluded — a depth-limited full graph still uses the full cache.
65    pub fn is_default(&self) -> bool {
66        self.root.is_none() && self.types.is_empty() && self.relation.is_none()
67    }
68}
69
70/// Summary of a completed graph build or render operation.
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct GraphReport {
73    /// Total number of nodes in the graph.
74    pub nodes: usize,
75    /// Total number of edges in the graph.
76    pub edges: usize,
77    /// Rendered graph content (Mermaid, DOT, or LLM text).
78    pub output: String,
79}
80
81/// Health metrics computed from a built wiki graph.
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct GraphMetrics {
84    /// Total number of nodes.
85    pub nodes: usize,
86    /// Total number of edges.
87    pub edges: usize,
88    /// Number of nodes with no incoming or outgoing edges.
89    pub orphans: usize,
90    /// Mean edge count per node (edges × 2 / nodes).
91    pub avg_connections: f64,
92    /// Graph density: edges / (nodes × (nodes − 1)).
93    pub density: f64,
94}
95
96/// Compute health metrics from a built graph.
97pub fn compute_metrics(graph: &WikiGraph) -> GraphMetrics {
98    let nodes = graph.node_count();
99    let edges = graph.edge_count();
100
101    let orphans = graph
102        .node_indices()
103        .filter(|&idx| {
104            graph.neighbors_directed(idx, Direction::Incoming).count() == 0
105                && graph.neighbors_directed(idx, Direction::Outgoing).count() == 0
106        })
107        .count();
108
109    let avg_connections = if nodes > 0 {
110        (edges as f64 * 2.0) / nodes as f64
111    } else {
112        0.0
113    };
114
115    let density = if nodes > 1 {
116        edges as f64 / (nodes as f64 * (nodes as f64 - 1.0))
117    } else {
118        0.0
119    };
120
121    GraphMetrics {
122        nodes,
123        edges,
124        orphans,
125        avg_connections,
126        density,
127    }
128}
129
130// ── Community detection (Louvain) ─────────────────────────────────────────────
131
132/// Louvain community detection results for a wiki graph.
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct CommunityStats {
135    /// Number of distinct clusters found.
136    pub count: usize,
137    /// Size (node count) of the largest cluster.
138    pub largest: usize,
139    /// Size (node count) of the smallest cluster.
140    pub smallest: usize,
141    /// Slugs of pages in communities of size ≤ 2 — weakly connected pages.
142    pub isolated: Vec<String>,
143}
144
145/// Cached community detection results for a space.
146pub struct CommunityData {
147    /// Number of local (non-external) nodes in the graph at cache time.
148    pub local_count: usize,
149    /// Slug → community id map.
150    pub map: Arc<HashMap<String, usize>>,
151    /// Aggregated Louvain stats.
152    pub stats: CommunityStats,
153}
154
155/// Graph cache abstraction — either in-memory only or snapshot-backed.
156///
157/// Constructed by `mount_space` based on `GraphConfig.snapshot`.
158/// `NoSnapshot` preserves Phase 1 behaviour; `WithSnapshot` adds warm-start.
159pub enum WikiGraphCache {
160    NoSnapshot(GenerationCache<WikiGraph>),
161    WithSnapshot(GraphState<WikiGraph>),
162}
163
164impl WikiGraphCache {
165    /// Return the current graph, rebuilding if the generation key changed.
166    pub fn get_fresh(
167        &self,
168        current_gen: u64,
169        builder: impl FnOnce() -> anyhow::Result<WikiGraph>,
170    ) -> anyhow::Result<Arc<WikiGraph>> {
171        match self {
172            WikiGraphCache::NoSnapshot(cache) => cache.get_or_build(current_gen, builder),
173            WikiGraphCache::WithSnapshot(state) => {
174                state.get_fresh().map_err(|e| anyhow::anyhow!("{e}"))
175            }
176        }
177    }
178
179    /// Force a full rebuild and (if snapshot-backed) persist a new snapshot.
180    pub fn rebuild(
181        &self,
182        current_gen: u64,
183        builder: impl FnOnce() -> anyhow::Result<WikiGraph>,
184    ) -> anyhow::Result<Arc<WikiGraph>> {
185        match self {
186            WikiGraphCache::NoSnapshot(cache) => {
187                cache.invalidate();
188                cache.get_or_build(current_gen, builder)
189            }
190            WikiGraphCache::WithSnapshot(state) => {
191                state.rebuild().map_err(|e| anyhow::anyhow!("{e}"))
192            }
193        }
194    }
195}
196
197/// Build undirected adjacency by symmetrizing the directed graph. External nodes excluded.
198fn build_adjacency(graph: &WikiGraph) -> HashMap<NodeIndex, HashSet<NodeIndex>> {
199    let mut adj: HashMap<NodeIndex, HashSet<NodeIndex>> = HashMap::new();
200    for idx in graph.node_indices() {
201        if !graph[idx].external {
202            adj.entry(idx).or_default();
203        }
204    }
205    for edge in graph.edge_indices() {
206        let (a, b) = graph.edge_endpoints(edge).unwrap();
207        if graph[a].external || graph[b].external {
208            continue;
209        }
210        adj.entry(a).or_default().insert(b);
211        adj.entry(b).or_default().insert(a);
212    }
213    adj
214}
215
216/// Louvain phase 1: greedy modularity optimisation — each node moves to the neighboring
217/// community with the highest modularity gain, applied immediately (in-place).
218///
219/// Repeats until no node moves in a full pass, capped at `n × 10` passes to prevent
220/// oscillation: mid-pass moves alter `sigma_tot` for later nodes, which can cause
221/// them to swap back, creating a cycle that never terminates on small or cyclic graphs.
222///
223/// Returns `true` if any move occurred.
224fn louvain_phase1(
225    adj: &HashMap<NodeIndex, HashSet<NodeIndex>>,
226    community: &mut HashMap<NodeIndex, usize>,
227    degrees: &HashMap<NodeIndex, usize>,
228    m: usize,
229) -> bool {
230    if m == 0 {
231        return false;
232    }
233    let m_f = m as f64;
234
235    let mut sorted_nodes: Vec<NodeIndex> = adj.keys().copied().collect();
236    // Sort by slug for determinism — we need the graph ref here; use NodeIndex raw id as proxy
237    // (caller guarantees deterministic ordering via node insertion order from sorted-slug pass)
238    sorted_nodes.sort_by_key(|n| n.index());
239
240    let mut moved = false;
241    let max_passes = sorted_nodes.len().max(10) * 10;
242    let mut pass = 0;
243
244    loop {
245        if pass >= max_passes {
246            break;
247        }
248        pass += 1;
249        let mut any_move = false;
250        for &node in &sorted_nodes {
251            let current_c = *community.get(&node).unwrap();
252            let k_i = *degrees.get(&node).unwrap_or(&0) as f64;
253
254            // Gather neighboring communities and k_i_in for each
255            let mut neighbor_c_edges: HashMap<usize, usize> = HashMap::new();
256            for &nb in adj.get(&node).into_iter().flatten() {
257                let nb_c = *community.get(&nb).unwrap();
258                *neighbor_c_edges.entry(nb_c).or_default() += 1;
259            }
260
261            // sigma_tot per community (sum of degrees)
262            let mut sigma_tot: HashMap<usize, f64> = HashMap::new();
263            for (&n2, &c2) in community.iter() {
264                if n2 == node {
265                    continue;
266                }
267                let d = *degrees.get(&n2).unwrap_or(&0) as f64;
268                *sigma_tot.entry(c2).or_default() += d;
269            }
270
271            // Find best community
272            let mut best_c = current_c;
273            let mut best_gain = 0.0_f64;
274
275            for (&c, &k_i_in) in &neighbor_c_edges {
276                if c == current_c {
277                    continue;
278                }
279                let st = *sigma_tot.get(&c).unwrap_or(&0.0);
280                let gain = (k_i_in as f64) / m_f - st * k_i / (2.0 * m_f * m_f);
281                if gain > best_gain {
282                    best_gain = gain;
283                    best_c = c;
284                }
285            }
286
287            if best_c != current_c {
288                community.insert(node, best_c);
289                any_move = true;
290                moved = true;
291            }
292        }
293        if !any_move {
294            break;
295        }
296    }
297    moved
298}
299
300/// Run Louvain community detection on `graph`. Returns `None` when local node count < `min_nodes`.
301/// Delegates to `build_community_data` — see its doc for algorithm details.
302pub fn compute_communities(graph: &WikiGraph, min_nodes: usize) -> Option<CommunityStats> {
303    build_community_data(graph, min_nodes).0
304}
305
306/// Returns slug → community id map, or `None` when below threshold.
307/// Delegates to `build_community_data` — shares the same Louvain run as `compute_communities`.
308pub fn node_community_map(graph: &WikiGraph, min_nodes: usize) -> Option<HashMap<String, usize>> {
309    build_community_data(graph, min_nodes).1
310}
311
312// ── build_graph ───────────────────────────────────────────────────────────────
313
314/// Build the concept graph from the tantivy index. No file I/O.
315/// Edge relations come from `x-graph-edges` declarations in the type registry.
316/// Body `[[wikilinks]]` get a generic `links-to` relation.
317pub fn build_graph(
318    searcher: &Searcher,
319    is: &IndexSchema,
320    filter: &GraphFilter,
321    registry: &SpaceTypeRegistry,
322) -> Result<WikiGraph> {
323    let f_slug = is.field("slug");
324    let f_title = is.field("title");
325    let f_type = is.field("type");
326    let f_body_links = is.field("body_links");
327
328    let top_docs = searcher.search(&AllQuery, &TopDocs::with_limit(100_000).order_by_score())?;
329
330    let mut graph = WikiGraph::new();
331    let mut slug_to_idx: HashMap<String, NodeIndex> = HashMap::new();
332
333    struct DocInfo {
334        slug: String,
335        page_type: String,
336        body_links: Vec<String>,
337        edge_fields: Vec<(String, Vec<String>)>, // (field_name, target_slugs)
338    }
339    let mut all_docs: Vec<DocInfo> = Vec::new();
340
341    // First pass: create nodes and collect edge data
342    for (_score, doc_addr) in &top_docs {
343        let doc: tantivy::TantivyDocument = searcher.doc(*doc_addr)?;
344
345        let slug = doc
346            .get_first(f_slug)
347            .and_then(|v| v.as_str())
348            .unwrap_or("")
349            .to_string();
350        let title = doc
351            .get_first(f_title)
352            .and_then(|v| v.as_str())
353            .unwrap_or("")
354            .to_string();
355        let page_type = doc
356            .get_first(f_type)
357            .and_then(|v| v.as_str())
358            .unwrap_or("")
359            .to_string();
360
361        if !filter.types.is_empty() && !filter.types.contains(&page_type) {
362            continue;
363        }
364
365        let node = PageNode {
366            slug: slug.clone(),
367            title,
368            r#type: page_type.clone(),
369            external: false,
370        };
371        let idx = graph.add_node(node);
372        slug_to_idx.insert(slug.clone(), idx);
373
374        // Read body wiki-links
375        let body_links: Vec<String> = doc
376            .get_all(f_body_links)
377            .filter_map(|v| v.as_str().map(|s| s.to_string()))
378            .collect();
379
380        // Read declared edge fields from the index
381        let mut edge_fields = Vec::new();
382        for decl in registry.edges(&page_type) {
383            if let Some(field_handle) = is.try_field(&decl.field) {
384                let targets: Vec<String> = doc
385                    .get_all(field_handle)
386                    .filter_map(|v| v.as_str().map(|s| s.to_string()))
387                    .collect();
388                if !targets.is_empty() {
389                    edge_fields.push((decl.field.clone(), targets));
390                }
391            }
392        }
393
394        all_docs.push(DocInfo {
395            slug,
396            page_type,
397            body_links,
398            edge_fields,
399        });
400    }
401
402    // Second pass: add edges
403    for doc_info in &all_docs {
404        let from_idx = match slug_to_idx.get(&doc_info.slug) {
405            Some(idx) => *idx,
406            None => continue,
407        };
408
409        // Declared edges (from x-graph-edges)
410        let edge_decls = registry.edges(&doc_info.page_type);
411        for (field_name, targets) in &doc_info.edge_fields {
412            let relation = edge_decls
413                .iter()
414                .find(|d| d.field == *field_name)
415                .map(|d| d.relation.as_str())
416                .unwrap_or("links-to");
417
418            if filter.relation.is_some() && filter.relation.as_deref() != Some(relation) {
419                continue;
420            }
421
422            for target in targets {
423                let to_idx = resolve_or_external(target, &mut graph, &mut slug_to_idx);
424                if let Some(to_idx) = to_idx
425                    && from_idx != to_idx
426                {
427                    graph.add_edge(
428                        from_idx,
429                        to_idx,
430                        LabeledEdge {
431                            relation: relation.to_string(),
432                        },
433                    );
434                }
435            }
436        }
437
438        // Body wiki-links → "links-to"
439        if filter.relation.is_none() || filter.relation.as_deref() == Some("links-to") {
440            for target in &doc_info.body_links {
441                let to_idx = resolve_or_external(target, &mut graph, &mut slug_to_idx);
442                if let Some(to_idx) = to_idx
443                    && from_idx != to_idx
444                {
445                    graph.add_edge(
446                        from_idx,
447                        to_idx,
448                        LabeledEdge {
449                            relation: "links-to".into(),
450                        },
451                    );
452                }
453            }
454        }
455    }
456
457    // Apply root + depth filter
458    if let Some(ref root_slug) = filter.root {
459        return Ok(subgraph(&graph, root_slug, filter.depth.unwrap_or(3)));
460    }
461
462    Ok(graph)
463}
464
465// ── helpers ───────────────────────────────────────────────────────────────────
466
467/// Resolve a target slug to a node index. If the target is a `wiki://` URI,
468/// insert an external placeholder node on demand. Returns `None` only for
469/// plain local slugs that don't exist in the index.
470fn resolve_or_external(
471    target: &str,
472    graph: &mut WikiGraph,
473    slug_to_idx: &mut HashMap<String, NodeIndex>,
474) -> Option<NodeIndex> {
475    if target.starts_with("wiki://") {
476        let key = target.to_string();
477        let idx = *slug_to_idx.entry(key.clone()).or_insert_with(|| {
478            let (_wiki, slug) = match ParsedLink::parse(target) {
479                ParsedLink::CrossWiki { wiki, slug } => (wiki, slug),
480                ParsedLink::Local(_) => ("external".to_string(), target.to_string()),
481            };
482            graph.add_node(PageNode {
483                slug: slug.clone(),
484                title: key.clone(),
485                r#type: "external".to_string(),
486                external: true,
487            })
488        });
489        Some(idx)
490    } else {
491        slug_to_idx.get(target).copied()
492    }
493}
494
495// ── build_graph_cross_wiki ────────────────────────────────────────────────────
496
497/// Build a unified graph merging all provided wikis. Cross-wiki edges that
498/// were external placeholders in single-wiki graphs become resolved connections
499/// when both endpoint wikis are present in `wikis`.
500pub fn build_graph_cross_wiki(
501    wikis: &[(&str, &Searcher, &IndexSchema, &SpaceTypeRegistry)],
502    filter: &GraphFilter,
503) -> Result<WikiGraph> {
504    // Build per-wiki graphs and merge into one, prefixing slugs with wiki name
505    let mut merged = WikiGraph::new();
506    // Map from "wikiname/slug" -> NodeIndex in merged graph
507    let mut global_idx: HashMap<String, NodeIndex> = HashMap::new();
508
509    // First: add all local (non-external) nodes from each wiki
510    for (wiki_name, searcher, is, registry) in wikis {
511        let g = build_graph(searcher, is, filter, registry)?;
512        for idx in g.node_indices() {
513            let node = &g[idx];
514            if node.external {
515                continue; // will re-resolve below
516            }
517            let key = format!("{wiki_name}/{}", node.slug);
518            let new_idx = merged.add_node(PageNode {
519                slug: key.clone(),
520                title: node.title.clone(),
521                r#type: node.r#type.clone(),
522                external: false,
523            });
524            global_idx.insert(key, new_idx);
525        }
526    }
527
528    // Second: add edges, re-resolving cross-wiki targets
529    for (wiki_name, searcher, is, registry) in wikis {
530        let g = build_graph(searcher, is, filter, registry)?;
531        for edge_idx in g.edge_indices() {
532            let (from, to) = g.edge_endpoints(edge_idx).unwrap();
533            let from_node = &g[from];
534            let to_node = &g[to];
535
536            let from_key = format!("{wiki_name}/{}", from_node.slug);
537            let from_merged = match global_idx.get(&from_key) {
538                Some(&i) => i,
539                None => continue,
540            };
541
542            // to_node is external if it has external=true; its title is the wiki:// URI
543            let to_key = if to_node.external {
544                // title was set to "wiki://otherwiki/slug"
545                if let ParsedLink::CrossWiki { wiki, slug } = ParsedLink::parse(&to_node.title) {
546                    format!("{wiki}/{slug}")
547                } else {
548                    continue;
549                }
550            } else {
551                format!("{wiki_name}/{}", to_node.slug)
552            };
553
554            let to_merged = match global_idx.get(&to_key) {
555                Some(&i) => i,
556                None => {
557                    // target wiki not mounted — keep as external placeholder
558                    *global_idx.entry(to_key.clone()).or_insert_with(|| {
559                        merged.add_node(PageNode {
560                            slug: to_key.clone(),
561                            title: to_node.title.clone(),
562                            r#type: "external".to_string(),
563                            external: true,
564                        })
565                    })
566                }
567            };
568
569            if from_merged != to_merged {
570                merged.add_edge(
571                    from_merged,
572                    to_merged,
573                    LabeledEdge {
574                        relation: g[edge_idx].relation.clone(),
575                    },
576                );
577            }
578        }
579    }
580
581    Ok(merged)
582}
583
584// ── merge_cached_graphs ──────────────────────────────────────────────────────
585
586/// Merge pre-built per-space graphs into a single cross-wiki graph.
587/// Accepts `Arc<WikiGraph>` inputs (from cache) instead of building from index.
588/// Matches the slug-prefixing and external-node resolution of `build_graph_cross_wiki`.
589///
590/// # Precondition
591/// Each `Arc<WikiGraph>` in `wikis` should have been built with the same `filter`.
592/// The relation and type filters are re-applied here as a safety gate, but if the
593/// cached graph was built without a filter, this function is the only gate.
594///
595/// `filter.root` and `filter.depth` are NOT re-applied — `get_or_build_graph` only
596/// caches the full unfiltered graph (it bypasses cache for non-default filters), so
597/// subgraph traversal from a root must be done by the caller after merging.
598/// In `ops/graph.rs`, the cross-wiki path always calls `get_or_build_graph` with
599/// `is_default()` filter, so this precondition holds in practice.
600pub fn merge_cached_graphs(
601    wikis: &[(&str, Arc<WikiGraph>)],
602    filter: &GraphFilter,
603) -> Result<WikiGraph> {
604    let mut merged = WikiGraph::new();
605    let mut global_idx: HashMap<String, NodeIndex> = HashMap::new();
606
607    // First pass: add all local (non-external) nodes with "wikiname/slug" keys
608    for (wiki_name, graph) in wikis {
609        for idx in graph.node_indices() {
610            let node = &graph[idx];
611            if node.external {
612                continue;
613            }
614            // Type filter re-applied here — matches build_graph_cross_wiki's first-pass filter.
615            // Precondition: input graphs should have been built with matching filter.
616            if !filter.types.is_empty() && !filter.types.contains(&node.r#type) {
617                continue;
618            }
619            let key = format!("{wiki_name}/{}", node.slug);
620            let new_idx = merged.add_node(PageNode {
621                slug: key.clone(),
622                title: node.title.clone(),
623                r#type: node.r#type.clone(),
624                external: false,
625            });
626            global_idx.insert(key, new_idx);
627        }
628    }
629
630    // Second pass: add edges, re-resolving cross-wiki external nodes
631    for (wiki_name, graph) in wikis {
632        for edge_idx in graph.edge_indices() {
633            let (from, to) = graph.edge_endpoints(edge_idx).unwrap();
634            let from_node = &graph[from];
635            let to_node = &graph[to];
636
637            if from_node.external {
638                continue;
639            }
640
641            // Relation filter re-applied here. If graphs were built without this filter,
642            // this is the only gate — see precondition in doc comment.
643            let relation = graph[edge_idx].relation.clone();
644            if let Some(ref rel_filter) = filter.relation
645                && &relation != rel_filter
646            {
647                continue;
648            }
649
650            let from_key = format!("{wiki_name}/{}", from_node.slug);
651            let from_merged = match global_idx.get(&from_key) {
652                Some(&i) => i,
653                None => continue,
654            };
655
656            // Resolve destination: external nodes have title = "wiki://otherwiki/slug"
657            let to_key = if to_node.external {
658                if let ParsedLink::CrossWiki { wiki, slug } = ParsedLink::parse(&to_node.title) {
659                    format!("{wiki}/{slug}")
660                } else {
661                    continue;
662                }
663            } else {
664                format!("{wiki_name}/{}", to_node.slug)
665            };
666
667            let to_merged = match global_idx.get(&to_key) {
668                Some(&i) => i,
669                None => {
670                    // Target wiki not mounted — keep as external placeholder
671                    *global_idx.entry(to_key.clone()).or_insert_with(|| {
672                        merged.add_node(PageNode {
673                            slug: to_key.clone(),
674                            title: to_node.title.clone(),
675                            r#type: "external".to_string(),
676                            external: true,
677                        })
678                    })
679                }
680            };
681
682            if from_merged != to_merged {
683                merged.add_edge(from_merged, to_merged, LabeledEdge { relation });
684            }
685        }
686    }
687
688    Ok(merged)
689}
690
691// ── render_llms ───────────────────────────────────────────────────────────────
692
693/// Natural language description of graph structure for direct LLM consumption.
694pub fn render_llms(graph: &WikiGraph) -> String {
695    let nodes = graph.node_count();
696    let edges = graph.edge_count();
697
698    // Separate external placeholder nodes
699    let external_refs: Vec<String> = graph
700        .node_indices()
701        .filter(|&idx| graph[idx].external)
702        .map(|idx| graph[idx].title.clone())
703        .collect();
704
705    // Group local nodes by type
706    let mut by_type: HashMap<String, Vec<String>> = HashMap::new();
707    for idx in graph.node_indices() {
708        let node = &graph[idx];
709        if node.external {
710            continue;
711        }
712        by_type
713            .entry(node.r#type.clone())
714            .or_default()
715            .push(node.title.clone());
716    }
717
718    // Sort type groups by count descending
719    let mut type_groups: Vec<(String, Vec<String>)> = by_type.into_iter().collect();
720    type_groups.sort_by(|a, b| b.1.len().cmp(&a.1.len()).then(a.0.cmp(&b.0)));
721
722    // Count edge relations
723    let mut relation_counts: HashMap<String, usize> = HashMap::new();
724    for edge in graph.edge_indices() {
725        *relation_counts
726            .entry(graph[edge].relation.clone())
727            .or_default() += 1;
728    }
729    let mut relations: Vec<(String, usize)> = relation_counts.into_iter().collect();
730    relations.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0)));
731
732    // Compute per-node total degree for hub detection
733    let mut degree: Vec<(usize, String)> = graph
734        .node_indices()
735        .map(|idx| {
736            let d = graph.neighbors_directed(idx, Direction::Incoming).count()
737                + graph.neighbors_directed(idx, Direction::Outgoing).count();
738            (d, graph[idx].title.clone())
739        })
740        .collect();
741    degree.sort_by_key(|a| Reverse(a.0));
742    let top_hubs: Vec<String> = degree
743        .iter()
744        .take(5)
745        .filter(|(d, _)| *d > 0)
746        .map(|(d, t)| format!("{t} ({d} edges)"))
747        .collect();
748
749    // Isolated nodes (no edges at all)
750    let isolated: Vec<String> = graph
751        .node_indices()
752        .filter(|&idx| {
753            graph.neighbors_directed(idx, Direction::Incoming).count() == 0
754                && graph.neighbors_directed(idx, Direction::Outgoing).count() == 0
755        })
756        .map(|idx| graph[idx].title.clone())
757        .collect();
758
759    let cluster_count = type_groups.len();
760
761    let mut out = String::new();
762    out.push_str(&format!(
763        "The wiki graph has {nodes} nodes and {edges} edges across {cluster_count} type groups.\n\n"
764    ));
765
766    for (type_name, mut titles) in type_groups {
767        titles.sort();
768        let count = titles.len();
769        let sample = if titles.len() > 8 {
770            format!("{}, ...", titles[..8].join(", "))
771        } else {
772            titles.join(", ")
773        };
774        out.push_str(&format!("**{type_name}** ({count} nodes): {sample}\n"));
775    }
776
777    if !top_hubs.is_empty() {
778        out.push_str(&format!("\nKey hubs: {}\n", top_hubs.join(", ")));
779    }
780
781    if !relations.is_empty() {
782        out.push_str("\n**Edges by relation:**\n");
783        for (rel, count) in &relations {
784            out.push_str(&format!("- `{rel}` ({count})\n"));
785        }
786    }
787
788    if !isolated.is_empty() {
789        out.push_str(&format!(
790            "\n**Isolated nodes ({}):** {}\n",
791            isolated.len(),
792            isolated.join(", ")
793        ));
794    }
795
796    if !external_refs.is_empty() {
797        let mut sorted = external_refs.clone();
798        sorted.sort();
799        out.push_str(&format!(
800            "\n**External references ({}):** {}\n",
801            sorted.len(),
802            sorted.join(", ")
803        ));
804    }
805
806    out
807}
808
809// ── render_mermaid ────────────────────────────────────────────────────────────
810
811/// Render the wiki graph as a Mermaid `graph LR` diagram.
812pub fn render_mermaid(graph: &WikiGraph) -> String {
813    let mut out = String::from("graph LR\n");
814
815    // Collect unique types for classDef
816    let mut types_seen: HashSet<String> = HashSet::new();
817
818    let mut has_external = false;
819
820    // Declare nodes with titles and type classes
821    for idx in graph.node_indices() {
822        let node = &graph[idx];
823        let safe_id = mermaid_id(&node.title);
824        if node.external {
825            out.push_str(&format!("  {safe_id}[\"{}\"]:::external\n", node.title));
826            has_external = true;
827        } else {
828            out.push_str(&format!(
829                "  {safe_id}[\"{}\"]:::{}\n",
830                node.title, node.r#type
831            ));
832            types_seen.insert(node.r#type.clone());
833        }
834    }
835
836    out.push('\n');
837
838    // Edges with relation labels
839    for edge in graph.edge_indices() {
840        let (from, to) = graph.edge_endpoints(edge).unwrap();
841        let from_id = mermaid_id(&graph[from].title);
842        let to_id = mermaid_id(&graph[to].title);
843        let relation = &graph[edge].relation;
844        out.push_str(&format!("  {from_id} -->|{relation}| {to_id}\n"));
845    }
846
847    // classDef for known types + external
848    out.push('\n');
849    if has_external {
850        out.push_str("  classDef external fill:#eee,stroke:#999,stroke-dasharray:5 5\n");
851    }
852    let type_colors = [
853        ("concept", "#cce5ff"),
854        ("query-result", "#cce5ff"),
855        ("paper", "#d4edda"),
856        ("article", "#d4edda"),
857        ("documentation", "#d4edda"),
858        ("skill", "#ffeeba"),
859        ("doc", "#e2e3e5"),
860        ("section", "#f8f9fa"),
861    ];
862    for (t, color) in &type_colors {
863        if types_seen.contains(*t) {
864            out.push_str(&format!("  classDef {t} fill:{color}\n"));
865        }
866    }
867
868    out
869}
870
871fn mermaid_id(slug: &str) -> String {
872    slug.replace("://", "__").replace(['/', '-', ':'], "_")
873}
874
875// ── render_dot ────────────────────────────────────────────────────────────────
876
877/// Render the wiki graph as a Graphviz DOT `digraph`.
878pub fn render_dot(graph: &WikiGraph) -> String {
879    let mut out = String::from("digraph wiki {\n");
880
881    for idx in graph.node_indices() {
882        let node = &graph[idx];
883        if node.external {
884            out.push_str(&format!(
885                "  \"{}\" [label=\"{}\" type=\"external\" style=\"dashed\"];\n",
886                node.title, node.title
887            ));
888        } else {
889            out.push_str(&format!(
890                "  \"{}\" [label=\"{}\" type=\"{}\"];\n",
891                node.slug, node.title, node.r#type
892            ));
893        }
894    }
895
896    for edge in graph.edge_indices() {
897        let (from, to) = graph.edge_endpoints(edge).unwrap();
898        let relation = &graph[edge].relation;
899        let from_id = if graph[from].external {
900            &graph[from].title
901        } else {
902            &graph[from].slug
903        };
904        let to_id = if graph[to].external {
905            &graph[to].title
906        } else {
907            &graph[to].slug
908        };
909        out.push_str(&format!(
910            "  \"{from_id}\" -> \"{to_id}\" [label=\"{relation}\"];\n"
911        ));
912    }
913
914    out.push_str("}\n");
915    out
916}
917
918// ── wrap_graph_md ─────────────────────────────────────────────────────────────
919
920/// Wrap rendered graph content in a YAML frontmatter + code-fence Markdown document.
921pub fn wrap_graph_md(rendered: &str, format: &str, filter: &GraphFilter) -> String {
922    let now = Utc::now().to_rfc3339();
923    let root = filter.root.as_deref().unwrap_or("");
924    let depth = filter.depth.unwrap_or(0);
925    let types = if filter.types.is_empty() {
926        "[]".to_string()
927    } else {
928        format!("[{}]", filter.types.join(", "))
929    };
930
931    let mut out = String::new();
932    out.push_str("---\n");
933    out.push_str("title: \"Wiki Graph\"\n");
934    out.push_str(&format!("generated: \"{now}\"\n"));
935    out.push_str(&format!("format: {format}\n"));
936    out.push_str(&format!("root: {root}\n"));
937    out.push_str(&format!("depth: {depth}\n"));
938    out.push_str(&format!("types: {types}\n"));
939    out.push_str("status: generated\n");
940    out.push_str("---\n\n");
941    out.push_str(&format!("```{format}\n"));
942    out.push_str(rendered);
943    out.push_str("```\n");
944    out
945}
946
947// ── subgraph ──────────────────────────────────────────────────────────────────
948
949/// Extract a BFS subgraph rooted at `root_slug` up to `depth` hops in both directions.
950pub fn subgraph(graph: &WikiGraph, root_slug: &str, depth: usize) -> WikiGraph {
951    let root_idx = match graph
952        .node_indices()
953        .find(|&idx| graph[idx].slug == root_slug)
954    {
955        Some(idx) => idx,
956        None => return WikiGraph::new(),
957    };
958
959    let mut visited: HashSet<NodeIndex> = HashSet::new();
960    let mut queue: VecDeque<(NodeIndex, usize)> = VecDeque::new();
961    queue.push_back((root_idx, 0));
962    visited.insert(root_idx);
963
964    while let Some((node, d)) = queue.pop_front() {
965        if d >= depth {
966            continue;
967        }
968        for neighbor in graph.neighbors_directed(node, Direction::Outgoing) {
969            if visited.insert(neighbor) {
970                queue.push_back((neighbor, d + 1));
971            }
972        }
973        for neighbor in graph.neighbors_directed(node, Direction::Incoming) {
974            if visited.insert(neighbor) {
975                queue.push_back((neighbor, d + 1));
976            }
977        }
978    }
979
980    let mut new_graph = WikiGraph::new();
981    let mut old_to_new: HashMap<NodeIndex, NodeIndex> = HashMap::new();
982
983    for &old_idx in &visited {
984        let new_idx = new_graph.add_node(graph[old_idx].clone());
985        old_to_new.insert(old_idx, new_idx);
986    }
987
988    for edge in graph.edge_indices() {
989        let (from, to) = graph.edge_endpoints(edge).unwrap();
990        if let (Some(&new_from), Some(&new_to)) = (old_to_new.get(&from), old_to_new.get(&to)) {
991            new_graph.add_edge(new_from, new_to, graph[edge].clone());
992        }
993    }
994
995    new_graph
996}
997
998// ── build_community_data ─────────────────────────────────────────────────────
999
1000/// Run Louvain once and return both community outputs.
1001/// Returns `(None, None)` when local node count < `min_nodes` (pass 0 to always run).
1002fn build_community_data(
1003    graph: &WikiGraph,
1004    min_nodes: usize,
1005) -> (Option<CommunityStats>, Option<HashMap<String, usize>>) {
1006    let local_nodes: Vec<NodeIndex> = {
1007        let mut v: Vec<NodeIndex> = graph
1008            .node_indices()
1009            .filter(|&idx| !graph[idx].external)
1010            .collect();
1011        v.sort_by(|&a, &b| graph[a].slug.cmp(&graph[b].slug));
1012        v
1013    };
1014
1015    if local_nodes.len() < min_nodes {
1016        return (None, None);
1017    }
1018
1019    let adj = build_adjacency(graph);
1020    let degrees: HashMap<NodeIndex, usize> =
1021        local_nodes.iter().map(|&n| (n, adj[&n].len())).collect();
1022    let m: usize = adj.values().map(|s| s.len()).sum::<usize>() / 2;
1023
1024    let mut community: HashMap<NodeIndex, usize> = local_nodes
1025        .iter()
1026        .enumerate()
1027        .map(|(i, &n)| (n, i))
1028        .collect();
1029
1030    louvain_phase1(&adj, &mut community, &degrees, m);
1031
1032    // Normalize community ids to contiguous 0..k
1033    let mut id_remap: HashMap<usize, usize> = HashMap::new();
1034    let mut next_id = 0usize;
1035    for &n in &local_nodes {
1036        let c = *community.get(&n).unwrap();
1037        id_remap.entry(c).or_insert_with(|| {
1038            let id = next_id;
1039            next_id += 1;
1040            id
1041        });
1042    }
1043    for val in community.values_mut() {
1044        *val = *id_remap.get(val).unwrap();
1045    }
1046
1047    // Build community_map
1048    let community_map: HashMap<String, usize> = local_nodes
1049        .iter()
1050        .map(|&n| (graph[n].slug.clone(), community[&n]))
1051        .collect();
1052
1053    // Build community_stats (mirrors compute_communities logic)
1054    let count = next_id;
1055    let mut sizes: HashMap<usize, usize> = HashMap::new();
1056    for &c in community.values() {
1057        *sizes.entry(c).or_default() += 1;
1058    }
1059    let largest = sizes.values().copied().max().unwrap_or(0);
1060    let smallest = sizes.values().copied().min().unwrap_or(0);
1061    let mut isolated: Vec<String> = local_nodes
1062        .iter()
1063        .filter(|&&n| {
1064            let c = *community.get(&n).unwrap();
1065            *sizes.get(&c).unwrap_or(&0) <= 2
1066        })
1067        .map(|&n| graph[n].slug.clone())
1068        .collect();
1069    isolated.sort();
1070
1071    let stats = CommunityStats {
1072        count,
1073        largest,
1074        smallest,
1075        isolated,
1076    };
1077
1078    (Some(stats), Some(community_map))
1079}
1080
1081// ── Cached graph accessors ───────────────────────────────────────────────────
1082
1083/// Return cached full graph, or build and cache on miss.
1084/// Filtered (non-default) requests bypass cache entirely.
1085pub fn get_or_build_graph(
1086    index_schema: &IndexSchema,
1087    type_registry: &SpaceTypeRegistry,
1088    index_manager: &SpaceIndexManager,
1089    graph_cache: &WikiGraphCache,
1090    searcher: &Searcher,
1091    filter: &GraphFilter,
1092) -> Result<Arc<WikiGraph>> {
1093    if !filter.is_default() {
1094        let g = build_graph(searcher, index_schema, filter, type_registry)?;
1095        return Ok(Arc::new(g));
1096    }
1097
1098    let current_gen = index_manager.generation();
1099    graph_cache.get_fresh(current_gen, || {
1100        build_graph(
1101            searcher,
1102            index_schema,
1103            &GraphFilter::default(),
1104            type_registry,
1105        )
1106    })
1107}
1108
1109/// Return cached community map, or None if graph is below `min_nodes` threshold.
1110///
1111/// Hot path (both caches warm): community_cache hits immediately — graph_cache not touched.
1112/// Cold path (miss): graph built and cached first, community built and cached inside closure.
1113pub fn get_cached_community_map(
1114    index_schema: &IndexSchema,
1115    type_registry: &SpaceTypeRegistry,
1116    index_manager: &SpaceIndexManager,
1117    graph_cache: &WikiGraphCache,
1118    community_cache: &petgraph_live::cache::GenerationCache<CommunityData>,
1119    searcher: &Searcher,
1120    min_nodes: usize,
1121) -> Result<Option<Arc<HashMap<String, usize>>>> {
1122    let current_gen = index_manager.generation();
1123
1124    let community = community_cache.get_or_build(current_gen, || -> Result<CommunityData> {
1125        let graph = graph_cache.get_fresh(current_gen, || {
1126            build_graph(
1127                searcher,
1128                index_schema,
1129                &GraphFilter::default(),
1130                type_registry,
1131            )
1132        })?;
1133        let local_count = graph.node_indices().filter(|&i| !graph[i].external).count();
1134        let (stats_opt, map_opt) = build_community_data(&graph, 0);
1135        let stats = stats_opt.unwrap_or(CommunityStats {
1136            count: 0,
1137            largest: 0,
1138            smallest: 0,
1139            isolated: vec![],
1140        });
1141        let map = map_opt.unwrap_or_default();
1142        Ok(CommunityData {
1143            local_count,
1144            map: Arc::new(map),
1145            stats,
1146        })
1147    })?;
1148
1149    if community.local_count < min_nodes {
1150        return Ok(None);
1151    }
1152    Ok(Some(Arc::clone(&community.map)))
1153}
1154
1155/// Return cached CommunityStats, or None if graph is below threshold.
1156///
1157/// Hot path: community_cache hits immediately — graph_cache not touched.
1158pub fn get_cached_community_stats(
1159    index_schema: &IndexSchema,
1160    type_registry: &SpaceTypeRegistry,
1161    index_manager: &SpaceIndexManager,
1162    graph_cache: &WikiGraphCache,
1163    community_cache: &petgraph_live::cache::GenerationCache<CommunityData>,
1164    searcher: &Searcher,
1165    min_nodes: usize,
1166) -> Result<Option<CommunityStats>> {
1167    let current_gen = index_manager.generation();
1168
1169    let community = community_cache.get_or_build(current_gen, || -> Result<CommunityData> {
1170        let graph = graph_cache.get_fresh(current_gen, || {
1171            build_graph(
1172                searcher,
1173                index_schema,
1174                &GraphFilter::default(),
1175                type_registry,
1176            )
1177        })?;
1178        let local_count = graph.node_indices().filter(|&i| !graph[i].external).count();
1179        let (stats_opt, map_opt) = build_community_data(&graph, 0);
1180        let stats = stats_opt.unwrap_or(CommunityStats {
1181            count: 0,
1182            largest: 0,
1183            smallest: 0,
1184            isolated: vec![],
1185        });
1186        let map = map_opt.unwrap_or_default();
1187        Ok(CommunityData {
1188            local_count,
1189            map: Arc::new(map),
1190            stats,
1191        })
1192    })?;
1193
1194    if community.local_count < min_nodes {
1195        return Ok(None);
1196    }
1197    Ok(Some(community.stats.clone()))
1198}
1199
1200#[cfg(test)]
1201mod tests {
1202    use super::*;
1203
1204    #[test]
1205    fn labeled_edge_serializes() {
1206        let e = LabeledEdge {
1207            relation: "links-to".to_string(),
1208        };
1209        let json = serde_json::to_string(&e).unwrap();
1210        let back: LabeledEdge = serde_json::from_str(&json).unwrap();
1211        assert_eq!(back.relation, "links-to");
1212    }
1213
1214    #[test]
1215    fn community_data_constructs() {
1216        use std::collections::HashMap;
1217        let data = CommunityData {
1218            local_count: 3,
1219            map: std::sync::Arc::new(HashMap::from([("slug-a".to_string(), 0usize)])),
1220            stats: CommunityStats {
1221                count: 1,
1222                largest: 1,
1223                smallest: 1,
1224                isolated: vec![],
1225            },
1226        };
1227        assert_eq!(data.local_count, 3);
1228        assert_eq!(data.map.get("slug-a"), Some(&0));
1229        assert_eq!(data.stats.count, 1);
1230    }
1231}