Skip to main content

nodedb_fts/index/
stats.rs

1//! Corpus statistics for BM25 scoring.
2
3use crate::backend::FtsBackend;
4use crate::index::FtsIndex;
5
6impl<B: FtsBackend> FtsIndex<B> {
7    /// Get total document count and average document length for a collection.
8    ///
9    /// Returns `(total_docs, avg_doc_len)`. If the collection is empty,
10    /// returns `(0, 1.0)` to avoid division by zero.
11    pub fn index_stats(&self, tid: u32, collection: &str) -> Result<(u32, f32), B::Error> {
12        let (count, total_len) = self.backend.collection_stats(tid, collection)?;
13        let avg = if count > 0 {
14            total_len as f32 / count as f32
15        } else {
16            1.0
17        };
18        Ok((count, avg))
19    }
20}
21
22#[cfg(test)]
23mod tests {
24    use crate::backend::memory::MemoryBackend;
25    use crate::index::FtsIndex;
26
27    const T: u32 = 1;
28
29    #[test]
30    fn empty_collection_stats() {
31        let idx: FtsIndex<MemoryBackend> = FtsIndex::new(MemoryBackend::new());
32        let (count, avg) = idx.index_stats(T, "empty").unwrap();
33        assert_eq!(count, 0);
34        assert!((avg - 1.0).abs() < f32::EPSILON);
35    }
36
37    #[test]
38    fn stats_after_indexing() {
39        let idx = FtsIndex::new(MemoryBackend::new());
40        idx.index_document(T, "docs", "d1", "hello world greeting")
41            .unwrap();
42        idx.index_document(T, "docs", "d2", "hello rust").unwrap();
43
44        let (count, avg) = idx.index_stats(T, "docs").unwrap();
45        assert_eq!(count, 2);
46        assert!(avg > 0.0);
47    }
48}