manifold_vectors/
sparse.rs

1//! Sparse vector storage using COO format.
2use manifold::{
3    ReadOnlyTable, ReadTransaction, ReadableTableMetadata, StorageError, Table, TableDefinition,
4    TableError, WriteTransaction,
5};
6use uuid::Uuid;
7
8/// A sparse vector represented as (index, value) pairs.
9#[derive(Debug, Clone, PartialEq)]
10pub struct SparseVector {
11    /// Non-zero entries as (index, value) pairs
12    pub entries: Vec<(u32, f32)>,
13}
14
15impl SparseVector {
16    /// Creates a new sparse vector from entries
17    pub fn new(mut entries: Vec<(u32, f32)>) -> Self {
18        entries.sort_unstable_by_key(|(idx, _)| *idx);
19        Self { entries }
20    }
21
22    /// Returns the number of non-zero entries
23    pub fn len(&self) -> usize {
24        self.entries.len()
25    }
26
27    /// Returns true if there are no entries
28    pub fn is_empty(&self) -> bool {
29        self.entries.is_empty()
30    }
31
32    /// Computes the dot product with another sparse vector
33    pub fn dot(&self, other: &Self) -> f32 {
34        let mut result = 0.0;
35        let mut i = 0;
36        let mut j = 0;
37        while i < self.entries.len() && j < other.entries.len() {
38            let (idx_a, val_a) = self.entries[i];
39            let (idx_b, val_b) = other.entries[j];
40            match idx_a.cmp(&idx_b) {
41                std::cmp::Ordering::Equal => {
42                    result += val_a * val_b;
43                    i += 1;
44                    j += 1;
45                }
46                std::cmp::Ordering::Less => i += 1,
47                std::cmp::Ordering::Greater => j += 1,
48            }
49        }
50        result
51    }
52}
53
54/// Table for storing sparse vectors
55pub struct SparseVectorTable<'txn> {
56    table: Table<'txn, Uuid, Vec<(u32, f32)>>,
57}
58
59impl<'txn> SparseVectorTable<'txn> {
60    /// Opens a sparse vector table for writing
61    pub fn open(txn: &'txn WriteTransaction, name: &str) -> Result<Self, TableError> {
62        let def: TableDefinition<Uuid, Vec<(u32, f32)>> = TableDefinition::new(name);
63        let table = txn.open_table(def)?;
64        Ok(Self { table })
65    }
66
67    /// Inserts a sparse vector
68    pub fn insert(&mut self, key: &Uuid, vector: &SparseVector) -> Result<(), TableError> {
69        self.table.insert(key, &vector.entries)?;
70        Ok(())
71    }
72
73    /// Returns the number of vectors stored
74    pub fn len(&self) -> Result<u64, StorageError> {
75        self.table.len()
76    }
77
78    /// Returns true if the table is empty
79    pub fn is_empty(&self) -> Result<bool, StorageError> {
80        Ok(self.len()? == 0)
81    }
82}
83
84/// Read-only sparse vector table
85pub struct SparseVectorTableRead {
86    table: ReadOnlyTable<Uuid, Vec<(u32, f32)>>,
87}
88
89impl SparseVectorTableRead {
90    /// Opens a sparse vector table for reading
91    pub fn open(txn: &ReadTransaction, name: &str) -> Result<Self, StorageError> {
92        let def: TableDefinition<Uuid, Vec<(u32, f32)>> = TableDefinition::new(name);
93        let table = txn.open_table(def).map_err(|e| match e {
94            TableError::Storage(s) => s,
95            _ => StorageError::Io(std::io::Error::other(e)),
96        })?;
97        Ok(Self { table })
98    }
99
100    /// Retrieves a sparse vector by key
101    pub fn get(&self, key: &Uuid) -> Result<Option<SparseVector>, StorageError> {
102        Ok(self.table.get(key)?.map(|guard| SparseVector {
103            entries: guard.value().clone(),
104        }))
105    }
106
107    /// Returns the number of vectors stored
108    pub fn len(&self) -> Result<u64, StorageError> {
109        self.table.len()
110    }
111
112    /// Returns true if the table is empty
113    pub fn is_empty(&self) -> Result<bool, StorageError> {
114        Ok(self.len()? == 0)
115    }
116}