Skip to main content

memvid_rs/storage/
database.rs

1//! SQLite database operations for memvid-rs
2//!
3//! This module provides high-performance metadata storage using embedded SQLite
4//! with O(1) chunk lookups and millions of chunks scalability.
5
6use crate::error::{MemvidError, Result};
7use crate::storage::schema::*;
8use crate::text::ChunkMetadata;
9use rusqlite::{Connection, OptionalExtension, Row, params};
10use std::path::Path;
11
12/// Database connection and operations
13pub struct Database {
14    conn: Connection,
15}
16
17impl Database {
18    /// Create a new database connection
19    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> {
20        let conn = Connection::open(path)
21            .map_err(|e| MemvidError::Storage(format!("Failed to open database: {}", e)))?;
22
23        let mut db = Self { conn };
24        db.initialize()?;
25        Ok(db)
26    }
27
28    /// Create an in-memory database (for testing)
29    pub fn memory() -> Result<Self> {
30        let conn = Connection::open_in_memory().map_err(|e| {
31            MemvidError::Storage(format!("Failed to create in-memory database: {}", e))
32        })?;
33
34        let mut db = Self { conn };
35        db.initialize()?;
36        Ok(db)
37    }
38
39    /// Initialize database schema
40    fn initialize(&mut self) -> Result<()> {
41        // Enable WAL mode for better concurrency
42        let _: String = self
43            .conn
44            .query_row("PRAGMA journal_mode=WAL", [], |row| row.get(0))
45            .map_err(|e| MemvidError::Storage(format!("Failed to enable WAL mode: {}", e)))?;
46
47        // Create tables
48        self.conn
49            .execute(CREATE_CHUNKS_TABLE, [])
50            .map_err(|e| MemvidError::Storage(format!("Failed to create chunks table: {}", e)))?;
51
52        self.conn
53            .execute(CREATE_METADATA_TABLE, [])
54            .map_err(|e| MemvidError::Storage(format!("Failed to create metadata table: {}", e)))?;
55
56        // Create indexes for O(1) lookups
57        self.conn
58            .execute(CREATE_CHUNKS_INDEXES, [])
59            .map_err(|e| MemvidError::Storage(format!("Failed to create indexes: {}", e)))?;
60
61        // Set schema version
62        self.conn
63            .execute(
64                "INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', ?)",
65                params![SCHEMA_VERSION.to_string()],
66            )
67            .map_err(|e| MemvidError::Storage(format!("Failed to set schema version: {}", e)))?;
68
69        log::info!(
70            "Database initialized with schema version {}",
71            SCHEMA_VERSION
72        );
73        Ok(())
74    }
75
76    /// Insert multiple chunks in a transaction
77    pub fn insert_chunks(&mut self, chunks: &[ChunkMetadata]) -> Result<()> {
78        let tx = self
79            .conn
80            .transaction()
81            .map_err(|e| MemvidError::Storage(format!("Failed to start transaction: {}", e)))?;
82
83        {
84            let mut stmt = tx
85                .prepare(
86                    r#"
87                INSERT INTO chunks (id, text, source, page, offset, length, frame, embedding)
88                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
89                "#,
90                )
91                .map_err(|e| MemvidError::Storage(format!("Failed to prepare statement: {}", e)))?;
92
93            for chunk in chunks {
94                let embedding_blob = chunk.embedding.as_ref().map(|emb| {
95                    let mut bytes = Vec::new();
96                    for &val in emb {
97                        bytes.extend_from_slice(&val.to_le_bytes());
98                    }
99                    bytes
100                });
101
102                stmt.execute(params![
103                    chunk.id as i64,
104                    chunk.text,
105                    chunk.source,
106                    chunk.page.map(|p| p as i64),
107                    chunk.offset as i64,
108                    chunk.length as i64,
109                    chunk.frame.map(|f| f as i64),
110                    embedding_blob,
111                ])
112                .map_err(|e| {
113                    MemvidError::Storage(format!("Failed to insert chunk {}: {}", chunk.id, e))
114                })?;
115            }
116        }
117
118        tx.commit()
119            .map_err(|e| MemvidError::Storage(format!("Failed to commit transaction: {}", e)))?;
120
121        log::info!("Inserted {} chunks into database", chunks.len());
122        Ok(())
123    }
124
125    /// Get chunk by ID - O(1) lookup
126    pub fn get_chunk_by_id(&self, chunk_id: usize) -> Result<Option<ChunkMetadata>> {
127        let mut stmt = self.conn.prepare(
128            "SELECT id, text, source, page, offset, length, frame, embedding FROM chunks WHERE id = ?"
129        ).map_err(|e| MemvidError::Storage(format!("Failed to prepare query: {}", e)))?;
130
131        let chunk = stmt
132            .query_row(params![chunk_id as i64], |row| self.row_to_chunk(row))
133            .optional()
134            .map_err(|e| MemvidError::Storage(format!("Failed to query chunk: {}", e)))?;
135
136        Ok(chunk)
137    }
138
139    /// Get chunks by frame number - O(log n) indexed lookup
140    pub fn get_chunks_by_frame(&self, frame_number: u32) -> Result<Vec<ChunkMetadata>> {
141        let mut stmt = self.conn.prepare(
142            "SELECT id, text, source, page, offset, length, frame, embedding FROM chunks WHERE frame = ? ORDER BY id"
143        ).map_err(|e| MemvidError::Storage(format!("Failed to prepare query: {}", e)))?;
144
145        let chunks = stmt
146            .query_map(params![frame_number as i64], |row| self.row_to_chunk(row))
147            .map_err(|e| MemvidError::Storage(format!("Failed to query chunks by frame: {}", e)))?;
148
149        let mut result = Vec::new();
150        for chunk in chunks {
151            result.push(chunk.map_err(|e| {
152                MemvidError::Storage(format!("Failed to process chunk row: {}", e))
153            })?);
154        }
155
156        Ok(result)
157    }
158
159    /// Get total chunk count
160    pub fn get_chunk_count(&self) -> Result<usize> {
161        let count: i64 = self
162            .conn
163            .query_row("SELECT COUNT(*) FROM chunks", [], |row| row.get(0))
164            .map_err(|e| MemvidError::Storage(format!("Failed to count chunks: {}", e)))?;
165
166        Ok(count as usize)
167    }
168
169    /// Search chunks by text content (simple LIKE search)
170    pub fn search_chunks(&self, query: &str, limit: usize) -> Result<Vec<ChunkMetadata>> {
171        let mut stmt = self.conn.prepare(
172            "SELECT id, text, source, page, offset, length, frame, embedding FROM chunks WHERE text LIKE ? ORDER BY id LIMIT ?"
173        ).map_err(|e| MemvidError::Storage(format!("Failed to prepare search query: {}", e)))?;
174
175        let search_term = format!("%{}%", query);
176        let chunks = stmt
177            .query_map(params![search_term, limit as i64], |row| {
178                self.row_to_chunk(row)
179            })
180            .map_err(|e| MemvidError::Storage(format!("Failed to search chunks: {}", e)))?;
181
182        let mut result = Vec::new();
183        for chunk in chunks {
184            result.push(chunk.map_err(|e| {
185                MemvidError::Storage(format!("Failed to process search result: {}", e))
186            })?);
187        }
188
189        Ok(result)
190    }
191
192    /// Get database statistics
193    pub fn get_stats(&self) -> Result<DatabaseStats> {
194        let chunk_count = self.get_chunk_count()?;
195
196        let file_size: i64 = self
197            .conn
198            .query_row(
199                "SELECT page_count * page_size FROM pragma_page_count(), pragma_page_size()",
200                [],
201                |row| row.get(0),
202            )
203            .map_err(|e| MemvidError::Storage(format!("Failed to get database size: {}", e)))?;
204
205        let max_frame: Option<i64> = self
206            .conn
207            .query_row(
208                "SELECT MAX(frame) FROM chunks WHERE frame IS NOT NULL",
209                [],
210                |row| row.get(0),
211            )
212            .optional()
213            .map_err(|e| MemvidError::Storage(format!("Failed to get max frame: {}", e)))?
214            .flatten();
215
216        Ok(DatabaseStats {
217            chunk_count,
218            frame_count: max_frame.map(|f| f as usize + 1).unwrap_or(0),
219            file_size_bytes: file_size as usize,
220        })
221    }
222
223    /// Helper function to convert database row to ChunkMetadata
224    fn row_to_chunk(&self, row: &Row) -> rusqlite::Result<ChunkMetadata> {
225        let embedding = if let Some(blob) = row.get::<_, Option<Vec<u8>>>(7)? {
226            let mut embedding = Vec::new();
227            for chunk in blob.chunks_exact(4) {
228                let bytes = [chunk[0], chunk[1], chunk[2], chunk[3]];
229                embedding.push(f32::from_le_bytes(bytes));
230            }
231            Some(embedding)
232        } else {
233            None
234        };
235
236        Ok(ChunkMetadata {
237            id: row.get::<_, i64>(0)? as usize,
238            text: row.get(1)?,
239            source: row.get(2)?,
240            page: row.get::<_, Option<i64>>(3)?.map(|p| p as u32),
241            offset: row.get::<_, i64>(4)? as usize,
242            length: row.get::<_, i64>(5)? as usize,
243            frame: row.get::<_, Option<i64>>(6)?.map(|f| f as u32),
244            embedding,
245        })
246    }
247}
248
249/// Database statistics
250#[derive(Debug, Clone)]
251pub struct DatabaseStats {
252    pub chunk_count: usize,
253    pub frame_count: usize,
254    pub file_size_bytes: usize,
255}