Skip to main content

llm_wiki/ops/
stats.rs

1use std::collections::HashMap;
2
3use anyhow::Result;
4use serde::{Deserialize, Serialize};
5
6use crate::engine::EngineState;
7use crate::graph::{
8    self, CommunityStats, GraphFilter, get_cached_community_stats, get_or_build_graph,
9};
10use crate::search;
11use tantivy::schema::Value;
12
13/// Page staleness bucketed by last-updated age.
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct StalenessBuckets {
16    /// Pages updated within the last 7 days.
17    pub fresh: usize,
18    /// Pages updated 7–30 days ago.
19    pub stale_7d: usize,
20    /// Pages updated more than 30 days ago (or with no date).
21    pub stale_30d: usize,
22}
23
24/// Summary health status of the tantivy search index.
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct IndexHealth {
27    /// True if the index is out of date relative to the wiki files.
28    pub stale: bool,
29    /// ISO-8601 timestamp of the last successful index build, if known.
30    pub built: Option<String>,
31}
32
33/// Aggregate statistics for a single wiki space.
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct WikiStats {
36    /// Name of the wiki.
37    pub wiki: String,
38    /// Total number of indexed pages.
39    pub pages: usize,
40    /// Number of pages whose type is `"section"`.
41    pub sections: usize,
42    /// Page count per frontmatter type.
43    pub types: HashMap<String, u64>,
44    /// Page count per frontmatter status.
45    pub status: HashMap<String, u64>,
46    /// Number of pages with no incoming links.
47    pub orphans: usize,
48    /// Mean number of links per page (rounded to 2 decimal places).
49    pub avg_connections: f64,
50    /// Graph density (edges / max-possible-edges, rounded to 2 decimal places).
51    pub graph_density: f64,
52    /// Page staleness buckets by last-updated date.
53    pub staleness: StalenessBuckets,
54    /// Index health — staleness and last build timestamp.
55    pub index: IndexHealth,
56    /// Louvain community detection results; `None` when graph is below `min_nodes_for_communities`.
57    pub communities: Option<CommunityStats>,
58    /// Maximum shortest directed-path length between any two pages.
59    /// `None` when graph exceeds `max_nodes_for_diameter` or `structural_algorithms` is false.
60    pub diameter: Option<f32>,
61    /// Minimum eccentricity — closest page to all others on average.
62    /// `None` under same conditions as `diameter`.
63    pub radius: Option<f32>,
64    /// Slugs with eccentricity equal to `radius` (central hub pages).
65    /// Empty when `diameter` is `None`.
66    pub center: Vec<String>,
67    /// Non-null when O(n²) algorithms were skipped due to graph size.
68    pub structural_note: Option<String>,
69}
70
71/// Compute aggregate stats for a wiki — page counts, graph metrics, staleness, and index health.
72pub fn stats(engine: &EngineState, wiki_name: &str) -> Result<WikiStats> {
73    let space = engine.space(wiki_name)?;
74
75    // Page counts + facets from list
76    let searcher = space.index_manager.searcher()?;
77    let list_result = search::list(
78        &search::ListOptions {
79            page_size: 1,
80            facets_top_tags: 0,
81            ..Default::default()
82        },
83        &searcher,
84        wiki_name,
85        &space.index_schema,
86    )?;
87
88    let pages = list_result.total;
89    let sections = *list_result.facets.r#type.get("section").unwrap_or(&0) as usize;
90    let types = list_result.facets.r#type;
91    let status = list_result.facets.status;
92
93    // Graph metrics
94    let wiki_graph = get_or_build_graph(
95        &space.index_schema,
96        &space.type_registry,
97        &space.index_manager,
98        &space.graph_cache,
99        &searcher,
100        &GraphFilter::default(),
101    )?;
102    let metrics = graph::compute_metrics(&wiki_graph);
103    let resolved = space.resolved_config(&engine.config);
104    let communities = get_cached_community_stats(
105        &space.index_schema,
106        &space.type_registry,
107        &space.index_manager,
108        &space.graph_cache,
109        &space.community_cache,
110        &searcher,
111        resolved.graph.min_nodes_for_communities,
112    )?;
113
114    // Staleness buckets from last_updated field
115    let staleness = compute_staleness(&searcher, &space.index_schema)?;
116
117    // Index health
118    let index_status = space.index_manager.status(&space.repo_root);
119    let index = IndexHealth {
120        stale: index_status.as_ref().map(|s| s.stale).unwrap_or(true),
121        built: index_status.ok().and_then(|s| s.built),
122    };
123
124    // Structural topology fields
125    let local_count = wiki_graph
126        .node_indices()
127        .filter(|&idx| !wiki_graph[idx].external)
128        .count();
129    let max_n = resolved.graph.max_nodes_for_diameter;
130
131    let (diameter, radius, center, structural_note) = if !resolved.graph.structural_algorithms {
132        (None, None, vec![], None)
133    } else if local_count <= max_n {
134        let d = petgraph_live::metrics::diameter(&*wiki_graph);
135        let r = petgraph_live::metrics::radius(&*wiki_graph);
136        let c: Vec<String> = petgraph_live::metrics::center(&*wiki_graph)
137            .into_iter()
138            .filter(|&idx| !wiki_graph[idx].external)
139            .map(|idx| wiki_graph[idx].slug.clone())
140            .collect();
141        (d, r, c, None)
142    } else {
143        let note = format!(
144            "graph too large for diameter computation ({local_count} nodes > max_nodes_for_diameter={max_n})"
145        );
146        (None, None, vec![], Some(note))
147    };
148
149    Ok(WikiStats {
150        wiki: wiki_name.to_string(),
151        pages,
152        sections,
153        types,
154        status,
155        orphans: metrics.orphans,
156        avg_connections: (metrics.avg_connections * 100.0).round() / 100.0,
157        graph_density: (metrics.density * 100.0).round() / 100.0,
158        staleness,
159        index,
160        communities,
161        diameter,
162        radius,
163        center,
164        structural_note,
165    })
166}
167
168fn compute_staleness(
169    searcher: &tantivy::Searcher,
170    is: &crate::index_schema::IndexSchema,
171) -> Result<StalenessBuckets> {
172    let f_last_updated = match is.try_field("last_updated") {
173        Some(f) => f,
174        None => {
175            return Ok(StalenessBuckets {
176                fresh: 0,
177                stale_7d: 0,
178                stale_30d: 0,
179            });
180        }
181    };
182
183    let today = chrono::Utc::now().date_naive();
184    let seven_days_ago = today - chrono::Duration::days(7);
185    let thirty_days_ago = today - chrono::Duration::days(30);
186
187    let all_docs = searcher.search(
188        &tantivy::query::AllQuery,
189        &tantivy::collector::DocSetCollector,
190    )?;
191
192    let mut fresh = 0usize;
193    let mut stale_7d = 0usize;
194    let mut stale_30d = 0usize;
195
196    for doc_addr in &all_docs {
197        let doc: tantivy::TantivyDocument = searcher.doc(*doc_addr)?;
198        let date_str = doc
199            .get_first(f_last_updated)
200            .and_then(|v| v.as_str())
201            .unwrap_or("");
202
203        if let Ok(date) = chrono::NaiveDate::parse_from_str(date_str, "%Y-%m-%d") {
204            if date >= seven_days_ago {
205                fresh += 1;
206            } else if date >= thirty_days_ago {
207                stale_7d += 1;
208            } else {
209                stale_30d += 1;
210            }
211        } else {
212            // No valid date — count as stale
213            stale_30d += 1;
214        }
215    }
216
217    Ok(StalenessBuckets {
218        fresh,
219        stale_7d,
220        stale_30d,
221    })
222}