Skip to main content

nodedb_fts/index/
stats.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Corpus statistics for BM25 scoring.
4
5use crate::backend::FtsBackend;
6use crate::index::FtsIndex;
7
8impl<B: FtsBackend> FtsIndex<B> {
9    /// Get total document count and average document length for a collection.
10    ///
11    /// Returns `(total_docs, avg_doc_len)`. If the collection is empty,
12    /// returns `(0, 1.0)` to avoid division by zero.
13    pub fn index_stats(&self, tid: u64, collection: &str) -> Result<(u32, f32), B::Error> {
14        let (count, total_len) = self.backend.collection_stats(tid, collection)?;
15        let avg = if count > 0 {
16            total_len as f32 / count as f32
17        } else {
18            1.0
19        };
20        Ok((count, avg))
21    }
22}
23
24#[cfg(test)]
25mod tests {
26    use crate::backend::memory::MemoryBackend;
27    use crate::index::FtsIndex;
28    use nodedb_types::Surrogate;
29
30    const T: u64 = 1;
31
32    #[test]
33    fn empty_collection_stats() {
34        let idx: FtsIndex<MemoryBackend> = FtsIndex::new(MemoryBackend::new());
35        let (count, avg) = idx.index_stats(T, "empty").unwrap();
36        assert_eq!(count, 0);
37        assert!((avg - 1.0).abs() < f32::EPSILON);
38    }
39
40    #[test]
41    fn stats_after_indexing() {
42        let idx = FtsIndex::new(MemoryBackend::new());
43        idx.index_document(T, "docs", Surrogate(1), "hello world greeting")
44            .unwrap();
45        idx.index_document(T, "docs", Surrogate(2), "hello rust")
46            .unwrap();
47
48        let (count, avg) = idx.index_stats(T, "docs").unwrap();
49        assert_eq!(count, 2);
50        assert!(avg > 0.0);
51    }
52}