Skip to main content

nodedb_fts/index/
stats.rs

1//! Corpus statistics for BM25 scoring.
2
3use crate::backend::FtsBackend;
4use crate::index::FtsIndex;
5
6impl<B: FtsBackend> FtsIndex<B> {
7    /// Get total document count and average document length for a collection.
8    ///
9    /// Returns `(total_docs, avg_doc_len)`. If the collection is empty,
10    /// returns `(0, 1.0)` to avoid division by zero.
11    pub fn index_stats(&self, collection: &str) -> Result<(u32, f32), B::Error> {
12        let (count, total_len) = self.backend.collection_stats(collection)?;
13        let avg = if count > 0 {
14            total_len as f32 / count as f32
15        } else {
16            1.0
17        };
18        Ok((count, avg))
19    }
20}
21
22#[cfg(test)]
23mod tests {
24    use crate::backend::memory::MemoryBackend;
25    use crate::index::FtsIndex;
26
27    #[test]
28    fn empty_collection_stats() {
29        let idx: FtsIndex<MemoryBackend> = FtsIndex::new(MemoryBackend::new());
30        let (count, avg) = idx.index_stats("empty").unwrap();
31        assert_eq!(count, 0);
32        assert!((avg - 1.0).abs() < f32::EPSILON);
33    }
34
35    #[test]
36    fn stats_after_indexing() {
37        let idx = FtsIndex::new(MemoryBackend::new());
38        idx.index_document("docs", "d1", "hello world greeting")
39            .unwrap();
40        idx.index_document("docs", "d2", "hello rust").unwrap();
41
42        let (count, avg) = idx.index_stats("docs").unwrap();
43        assert_eq!(count, 2);
44        assert!(avg > 0.0);
45    }
46}