Skip to main content

nodedb_fts/index/
fieldnorm.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Fieldnorm storage: SmallFloat-encoded document lengths per collection.
4//!
5//! Stores a compact `Vec<u8>` array indexed by surrogate. Each byte is
6//! a SmallFloat-encoded document length. Persisted as metadata blob via
7//! the backend's `read_meta`/`write_meta`.
8
9use nodedb_types::Surrogate;
10
11use crate::backend::FtsBackend;
12use crate::codec::smallfloat;
13use crate::index::FtsIndex;
14
15impl<B: FtsBackend> FtsIndex<B> {
16    /// Get the fieldnorm (SmallFloat-encoded doc length) for a doc.
17    ///
18    /// Returns the decoded approximate u32 length, or `None` if not stored.
19    pub fn read_fieldnorm(
20        &self,
21        tid: u64,
22        collection: &str,
23        doc_id: Surrogate,
24    ) -> Result<Option<u32>, B::Error> {
25        let data = self.backend.read_meta(tid, collection, "fieldnorms")?;
26        match data {
27            Some(bytes) if (doc_id.0 as usize) < bytes.len() => {
28                Ok(Some(smallfloat::decode(bytes[doc_id.0 as usize])))
29            }
30            _ => Ok(None),
31        }
32    }
33
34    /// Write a fieldnorm byte for a surrogate. Grows the array if needed.
35    pub fn write_fieldnorm(
36        &self,
37        tid: u64,
38        collection: &str,
39        doc_id: Surrogate,
40        doc_length: u32,
41    ) -> Result<(), B::Error> {
42        let mut data = self
43            .backend
44            .read_meta(tid, collection, "fieldnorms")?
45            .unwrap_or_default();
46
47        let idx = doc_id.0 as usize;
48        if idx >= data.len() {
49            data.resize(idx + 1, 0);
50        }
51        data[idx] = smallfloat::encode(doc_length);
52
53        self.backend
54            .write_meta(tid, collection, "fieldnorms", &data)
55    }
56}
57
58#[cfg(test)]
59mod tests {
60    use nodedb_types::Surrogate;
61
62    use crate::backend::memory::MemoryBackend;
63    use crate::codec::smallfloat;
64    use crate::index::FtsIndex;
65
66    const T: u64 = 1;
67
68    #[test]
69    fn fieldnorm_roundtrip() {
70        let idx = FtsIndex::new(MemoryBackend::new());
71        idx.write_fieldnorm(T, "col", Surrogate(0), 100).unwrap();
72        idx.write_fieldnorm(T, "col", Surrogate(5), 50).unwrap();
73
74        let norm0 = idx.read_fieldnorm(T, "col", Surrogate(0)).unwrap().unwrap();
75        let norm5 = idx.read_fieldnorm(T, "col", Surrogate(5)).unwrap().unwrap();
76
77        assert!(norm0 <= 100);
78        assert!(norm5 <= 50);
79        assert_eq!(norm0, smallfloat::decode(smallfloat::encode(100)));
80        assert_eq!(norm5, smallfloat::decode(smallfloat::encode(50)));
81    }
82
83    #[test]
84    fn fieldnorm_missing_doc() {
85        let idx = FtsIndex::new(MemoryBackend::new());
86        assert_eq!(idx.read_fieldnorm(T, "col", Surrogate(99)).unwrap(), None);
87    }
88
89    #[test]
90    fn fieldnorm_overwrite() {
91        let idx = FtsIndex::new(MemoryBackend::new());
92        idx.write_fieldnorm(T, "col", Surrogate(0), 100).unwrap();
93        idx.write_fieldnorm(T, "col", Surrogate(0), 200).unwrap();
94
95        let norm = idx.read_fieldnorm(T, "col", Surrogate(0)).unwrap().unwrap();
96        assert_eq!(norm, smallfloat::decode(smallfloat::encode(200)));
97    }
98}