Skip to main content

sqlrite/sql/db/
secondary_index.rs

1//! Secondary indexes — a separate lookup structure per indexed column.
2//!
3//! Every UNIQUE (or PRIMARY KEY) column gets one automatically at
4//! CREATE TABLE time. Explicit `CREATE INDEX` statements (Phase 3e.2) add
5//! more. On INSERT / UPDATE / DELETE the owning `Table` keeps its indexes
6//! in lockstep with its row storage.
7//!
8//! **Key shape.** A B-Tree keyed by `(value, rowid)` would let us support
9//! duplicate values naturally. For simplicity the in-memory representation
10//! is `BTreeMap<value, Vec<rowid>>` — functionally equivalent, cheaper to
11//! iterate for a given value, a little heavier on the allocator for
12//! wide-dup columns. The on-disk representation in Phase 3e.4 will flatten
13//! to `(value, rowid)` keys one row per entry.
14//!
15//! **Types.** Only Integer and Text columns are currently indexed. Real
16//! has floating-point equality hazards; Bool has so few distinct values an
17//! index isn't worth it.
18
19use std::collections::BTreeMap;
20
21use crate::error::{Result, SQLRiteError};
22use crate::sql::db::table::{DataType, Value};
23
24/// Declares who created the index. Persisted into `sqlrite_master.sql` so
25/// the text round-trips; auto-created indexes get a synthesized SQL form
26/// so the catalog stays uniform.
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum IndexOrigin {
29    /// Auto-created for a UNIQUE / PRIMARY KEY column at CREATE TABLE time.
30    Auto,
31    /// Explicit `CREATE INDEX` statement (Phase 3e.2).
32    Explicit,
33}
34
35/// One secondary index on a single column. Multi-column composite indexes
36/// are on the longer-term list; the `column_name` field stays singular
37/// for now.
38#[derive(Debug, Clone)]
39pub struct SecondaryIndex {
40    /// Catalog name. For auto indexes: `sqlrite_autoindex_<table>_<col>`.
41    /// For explicit indexes: the user-supplied identifier from CREATE INDEX.
42    pub name: String,
43    pub table_name: String,
44    pub column_name: String,
45    pub is_unique: bool,
46    pub origin: IndexOrigin,
47    pub entries: IndexEntries,
48}
49
50/// Typed map from value → list of rowids carrying that value. The rowid
51/// list is always non-empty; empty lists are pruned on remove.
52#[derive(Debug, Clone)]
53pub enum IndexEntries {
54    Integer(BTreeMap<i64, Vec<i64>>),
55    Text(BTreeMap<String, Vec<i64>>),
56}
57
58impl SecondaryIndex {
59    /// Builds an empty index over a column of the given datatype. Returns
60    /// an error for unsupported datatypes (Real, Bool, None, Invalid).
61    pub fn new(
62        name: String,
63        table_name: String,
64        column_name: String,
65        datatype: &DataType,
66        is_unique: bool,
67        origin: IndexOrigin,
68    ) -> Result<Self> {
69        let entries = match datatype {
70            DataType::Integer => IndexEntries::Integer(BTreeMap::new()),
71            DataType::Text => IndexEntries::Text(BTreeMap::new()),
72            other => {
73                return Err(SQLRiteError::General(format!(
74                    "cannot build a secondary index on a {other} column"
75                )));
76            }
77        };
78        Ok(Self {
79            name,
80            table_name,
81            column_name,
82            is_unique,
83            origin,
84            entries,
85        })
86    }
87
88    /// Synthesizes a CREATE INDEX statement for `sqlrite_master.sql`. For
89    /// auto indexes this is a synthetic form; for explicit indexes the
90    /// caller can override with the original user text if it has been
91    /// preserved. Used by the persistence path in Phase 3e.4.
92    #[allow(dead_code)]
93    pub fn synthesized_sql(&self) -> String {
94        let unique = if self.is_unique { "UNIQUE " } else { "" };
95        format!(
96            "CREATE {unique}INDEX {} ON {} ({});",
97            self.name, self.table_name, self.column_name
98        )
99    }
100
101    /// Standard name for the auto-generated index of a UNIQUE/PK column.
102    /// Uniform across save/open so indexes persist under a stable name.
103    pub fn auto_name(table_name: &str, column_name: &str) -> String {
104        format!("sqlrite_autoindex_{table_name}_{column_name}")
105    }
106
107    /// Returns `true` iff inserting `value` would violate the UNIQUE
108    /// constraint — i.e., the index already has an entry for this value
109    /// and `self.is_unique` is set. Null values are never indexed and
110    /// never conflict.
111    pub fn would_violate_unique(&self, value: &Value) -> bool {
112        if !self.is_unique {
113            return false;
114        }
115        match (&self.entries, value) {
116            (IndexEntries::Integer(m), Value::Integer(v)) => m.contains_key(v),
117            (IndexEntries::Text(m), Value::Text(s)) => m.contains_key(s),
118            _ => false, // type mismatch can't collide; other code paths catch it
119        }
120    }
121
122    /// Adds a `(value, rowid)` entry. Null values are ignored (NULL in an
123    /// indexed column stays out of the index). Type mismatches — e.g.
124    /// calling this with a Text value against an Integer index — return
125    /// an error rather than silently skipping.
126    pub fn insert(&mut self, value: &Value, rowid: i64) -> Result<()> {
127        match (&mut self.entries, value) {
128            (_, Value::Null) => Ok(()),
129            (IndexEntries::Integer(m), Value::Integer(v)) => {
130                m.entry(*v).or_default().push(rowid);
131                Ok(())
132            }
133            (IndexEntries::Text(m), Value::Text(s)) => {
134                m.entry(s.clone()).or_default().push(rowid);
135                Ok(())
136            }
137            (entries, value) => Err(SQLRiteError::Internal(format!(
138                "type mismatch inserting into index '{}': entries={entries:?}, value={value:?}",
139                self.name
140            ))),
141        }
142    }
143
144    /// Removes a `(value, rowid)` entry. If the value has other rowids
145    /// attached they remain; if this was the last rowid, the value key is
146    /// pruned. A no-op if the entry isn't present (simpler than failing —
147    /// UPDATE paths rely on this).
148    pub fn remove(&mut self, value: &Value, rowid: i64) {
149        match (&mut self.entries, value) {
150            (IndexEntries::Integer(m), Value::Integer(v)) => {
151                if let Some(list) = m.get_mut(v) {
152                    list.retain(|r| *r != rowid);
153                    if list.is_empty() {
154                        m.remove(v);
155                    }
156                }
157            }
158            (IndexEntries::Text(m), Value::Text(s)) => {
159                if let Some(list) = m.get_mut(s) {
160                    list.retain(|r| *r != rowid);
161                    if list.is_empty() {
162                        m.remove(s);
163                    }
164                }
165            }
166            _ => {}
167        }
168    }
169
170    /// Returns every rowid currently associated with `value`. For a unique
171    /// index this is at most one; for a non-unique index it can be many.
172    /// Empty `Vec` if the value isn't present.
173    pub fn lookup(&self, value: &Value) -> Vec<i64> {
174        match (&self.entries, value) {
175            (IndexEntries::Integer(m), Value::Integer(v)) => m.get(v).cloned().unwrap_or_default(),
176            (IndexEntries::Text(m), Value::Text(s)) => m.get(s).cloned().unwrap_or_default(),
177            _ => Vec::new(),
178        }
179    }
180
181    /// Iterates every `(value, rowid)` pair in ascending-value order. The
182    /// rowids for a given value come out in insertion order, which happens
183    /// to match ascending rowid order in practice because rows are inserted
184    /// in rowid-ascending sequence during a bulk load. Phase 3e.4 uses
185    /// this to serialize the index to its B-Tree.
186    #[allow(dead_code)]
187    pub fn iter_entries(&self) -> Box<dyn Iterator<Item = (Value, i64)> + '_> {
188        match &self.entries {
189            IndexEntries::Integer(m) => Box::new(
190                m.iter()
191                    .flat_map(|(v, rs)| rs.iter().map(|r| (Value::Integer(*v), *r))),
192            ),
193            IndexEntries::Text(m) => Box::new(
194                m.iter()
195                    .flat_map(|(v, rs)| rs.iter().map(|r| (Value::Text(v.clone()), *r))),
196            ),
197        }
198    }
199}
200
201// PartialEq that's useful for tests (not strictly required by the crate).
202impl PartialEq for SecondaryIndex {
203    fn eq(&self, other: &Self) -> bool {
204        self.name == other.name
205            && self.table_name == other.table_name
206            && self.column_name == other.column_name
207            && self.is_unique == other.is_unique
208            && self.origin == other.origin
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215
216    #[test]
217    fn auto_name_is_deterministic() {
218        assert_eq!(
219            SecondaryIndex::auto_name("users", "email"),
220            "sqlrite_autoindex_users_email"
221        );
222    }
223
224    #[test]
225    fn rejects_index_on_real_column() {
226        let err = SecondaryIndex::new(
227            "x".into(),
228            "t".into(),
229            "c".into(),
230            &DataType::Real,
231            false,
232            IndexOrigin::Explicit,
233        )
234        .unwrap_err();
235        assert!(format!("{err}").contains("cannot build"));
236    }
237
238    #[test]
239    fn unique_violation_detected_before_insert() {
240        let mut idx = SecondaryIndex::new(
241            "x".into(),
242            "t".into(),
243            "c".into(),
244            &DataType::Integer,
245            true,
246            IndexOrigin::Auto,
247        )
248        .unwrap();
249        assert!(!idx.would_violate_unique(&Value::Integer(1)));
250        idx.insert(&Value::Integer(1), 100).unwrap();
251        assert!(idx.would_violate_unique(&Value::Integer(1)));
252        assert!(!idx.would_violate_unique(&Value::Integer(2)));
253    }
254
255    #[test]
256    fn null_is_not_indexed_and_never_conflicts() {
257        let mut idx = SecondaryIndex::new(
258            "x".into(),
259            "t".into(),
260            "c".into(),
261            &DataType::Text,
262            true,
263            IndexOrigin::Auto,
264        )
265        .unwrap();
266        idx.insert(&Value::Null, 1).unwrap();
267        idx.insert(&Value::Null, 2).unwrap();
268        assert_eq!(idx.lookup(&Value::Null), Vec::<i64>::new());
269        assert!(!idx.would_violate_unique(&Value::Null));
270    }
271
272    #[test]
273    fn insert_and_remove_preserve_list_semantics() {
274        let mut idx = SecondaryIndex::new(
275            "x".into(),
276            "t".into(),
277            "c".into(),
278            &DataType::Text,
279            false,
280            IndexOrigin::Explicit,
281        )
282        .unwrap();
283        idx.insert(&Value::Text("a".into()), 1).unwrap();
284        idx.insert(&Value::Text("a".into()), 2).unwrap();
285        idx.insert(&Value::Text("a".into()), 3).unwrap();
286        assert_eq!(idx.lookup(&Value::Text("a".into())), vec![1, 2, 3]);
287
288        idx.remove(&Value::Text("a".into()), 2);
289        assert_eq!(idx.lookup(&Value::Text("a".into())), vec![1, 3]);
290
291        idx.remove(&Value::Text("a".into()), 1);
292        idx.remove(&Value::Text("a".into()), 3);
293        assert_eq!(idx.lookup(&Value::Text("a".into())), Vec::<i64>::new());
294    }
295
296    #[test]
297    fn iter_entries_yields_value_rowid_pairs_in_order() {
298        let mut idx = SecondaryIndex::new(
299            "x".into(),
300            "t".into(),
301            "c".into(),
302            &DataType::Integer,
303            false,
304            IndexOrigin::Explicit,
305        )
306        .unwrap();
307        idx.insert(&Value::Integer(20), 200).unwrap();
308        idx.insert(&Value::Integer(10), 100).unwrap();
309        idx.insert(&Value::Integer(10), 101).unwrap();
310
311        let pairs: Vec<(Value, i64)> = idx.iter_entries().collect();
312        assert_eq!(
313            pairs,
314            vec![
315                (Value::Integer(10), 100),
316                (Value::Integer(10), 101),
317                (Value::Integer(20), 200),
318            ]
319        );
320    }
321
322    #[test]
323    fn synthesized_sql_round_trips_through_parser() {
324        let idx = SecondaryIndex::new(
325            "sqlrite_autoindex_users_name".into(),
326            "users".into(),
327            "name".into(),
328            &DataType::Text,
329            true,
330            IndexOrigin::Auto,
331        )
332        .unwrap();
333        let sql = idx.synthesized_sql();
334        assert_eq!(
335            sql,
336            "CREATE UNIQUE INDEX sqlrite_autoindex_users_name ON users (name);"
337        );
338    }
339}