leindex 1.6.0

LeIndex MCP and semantic code search engine for AI tools and large codebases
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
// Storage schema and database management

use crate::storage::{ProjectMetadata, UniqueProjectId};
use rusqlite::{Connection, Result as SqliteResult};
use serde::{Deserialize, Serialize};
use std::path::Path;

/// Storage configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StorageConfig {
    /// Database path
    pub db_path: String,

    /// Whether to enable WAL mode
    pub wal_enabled: bool,

    /// Cache size in pages
    pub cache_size_pages: Option<usize>,
}

impl Default for StorageConfig {
    fn default() -> Self {
        Self {
            db_path: "leindex.db".to_string(),
            wal_enabled: true,
            cache_size_pages: Some(10000),
        }
    }
}

/// Main storage interface
pub struct Storage {
    conn: Connection,
    #[allow(dead_code)]
    config: StorageConfig,
}

impl Storage {
    /// Open storage with default config
    pub fn open<P: AsRef<Path>>(path: P) -> SqliteResult<Self> {
        Self::open_with_config(path, StorageConfig::default())
    }

    /// Open storage with custom config
    pub fn open_with_config<P: AsRef<Path>>(path: P, config: StorageConfig) -> SqliteResult<Self> {
        let conn = Connection::open(path)?;

        // Enable WAL mode for better concurrency
        if config.wal_enabled {
            conn.pragma_update(None, "journal_mode", "WAL")?;
        }

        // Allow concurrent access: wait up to 5 seconds for locks instead of
        // immediately failing.  This is critical when multiple LeIndex instances
        // (or a ProjectRegistry) access the same project's .leindex/leindex.db.
        conn.pragma_update(None, "busy_timeout", 5000)?;

        // Set cache size if specified
        if let Some(cache_size) = config.cache_size_pages {
            conn.pragma_update(None, "cache_size", cache_size)?;
        }

        let mut storage = Self { conn, config };

        // Check schema version BEFORE any DDL — reject newer databases early
        // so an older binary cannot corrupt a schema it doesn't understand.
        storage.run_migrations()?;

        // Initialize schema (CREATE TABLE IF NOT EXISTS — safe after version check)
        storage.initialize_schema()?;

        Ok(storage)
    }

    /// Initialize database schema
    fn initialize_schema(&mut self) -> SqliteResult<()> {
        // Initialize project_metadata table first
        // SQL schema for project_metadata table
        let project_metadata_schema = r#"
CREATE TABLE IF NOT EXISTS project_metadata (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    unique_project_id TEXT UNIQUE NOT NULL,
    base_name TEXT NOT NULL,
    path_hash TEXT NOT NULL,
    instance INTEGER DEFAULT 0,
    canonical_path TEXT NOT NULL,
    display_name TEXT,
    is_clone BOOLEAN DEFAULT 0,
    cloned_from TEXT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    UNIQUE(canonical_path)
)
"#;

        // SQL indexes for project_metadata table
        let project_metadata_indexes = [
            "CREATE INDEX IF NOT EXISTS idx_project_metadata_unique_id ON project_metadata(unique_project_id)",
            "CREATE INDEX IF NOT EXISTS idx_project_metadata_canonical_path ON project_metadata(canonical_path)",
            "CREATE INDEX IF NOT EXISTS idx_project_metadata_base_hash ON project_metadata(base_name, path_hash)",
            "CREATE INDEX IF NOT EXISTS idx_project_metadata_base_name ON project_metadata(base_name)",
        ];

        self.conn.execute(project_metadata_schema, [])?;
        for index_sql in project_metadata_indexes {
            self.conn.execute(index_sql, [])?;
        }

        // Create indexed_files table for incremental indexing
        self.conn.execute(
            "CREATE TABLE IF NOT EXISTS indexed_files (
                file_path TEXT PRIMARY KEY,
                project_id TEXT NOT NULL,
                file_hash TEXT NOT NULL,
                last_indexed INTEGER NOT NULL
            )",
            [],
        )?;

        // Create intel_nodes table
        self.conn.execute(
            "CREATE TABLE IF NOT EXISTS intel_nodes (
                id INTEGER PRIMARY KEY,
                project_id TEXT NOT NULL,
                file_path TEXT NOT NULL,
                node_id TEXT NOT NULL,
                symbol_name TEXT NOT NULL,
                qualified_name TEXT NOT NULL,
                language TEXT NOT NULL DEFAULT 'unknown',
                node_type TEXT NOT NULL,
                signature TEXT,
                complexity INTEGER,
                content_hash TEXT NOT NULL,
                embedding BLOB,
                byte_range_start INTEGER,
                byte_range_end INTEGER,
                created_at INTEGER NOT NULL,
                updated_at INTEGER NOT NULL,
                embedding_format INTEGER
            )",
            [],
        )?;

        // Migration: Ensure new columns exist for existing databases
        let columns: Vec<String> = self
            .conn
            .prepare("PRAGMA table_info(intel_nodes)")?
            .query_map([], |row| row.get::<_, String>(1))?
            .collect::<SqliteResult<Vec<_>>>()?;

        if !columns.iter().any(|c| c == "node_id") {
            self.conn.execute(
                "ALTER TABLE intel_nodes ADD COLUMN node_id TEXT DEFAULT ''",
                [],
            )?;
            // Update node_id with symbol_name for existing records
            self.conn.execute(
                "UPDATE intel_nodes SET node_id = symbol_name WHERE node_id = ''",
                [],
            )?;
        }
        if !columns.iter().any(|c| c == "qualified_name") {
            self.conn.execute(
                "ALTER TABLE intel_nodes ADD COLUMN qualified_name TEXT DEFAULT ''",
                [],
            )?;
            self.conn.execute(
                "UPDATE intel_nodes SET qualified_name = symbol_name WHERE qualified_name = ''",
                [],
            )?;
        }
        if !columns.iter().any(|c| c == "language") {
            self.conn.execute(
                "ALTER TABLE intel_nodes ADD COLUMN language TEXT DEFAULT 'unknown'",
                [],
            )?;
        }
        if !columns.iter().any(|c| c == "byte_range_start") {
            self.conn.execute(
                "ALTER TABLE intel_nodes ADD COLUMN byte_range_start INTEGER",
                [],
            )?;
        }
        if !columns.iter().any(|c| c == "byte_range_end") {
            self.conn.execute(
                "ALTER TABLE intel_nodes ADD COLUMN byte_range_end INTEGER",
                [],
            )?;
        }

        if !columns.iter().any(|c| c == "embedding_format") {
            self.conn.execute(
                "ALTER TABLE intel_nodes ADD COLUMN embedding_format INTEGER",
                [],
            )?;
        }
        // Create intel_edges table
        self.conn.execute(
            "CREATE TABLE IF NOT EXISTS intel_edges (
                caller_id INTEGER NOT NULL,
                callee_id INTEGER NOT NULL,
                edge_type TEXT NOT NULL,
                metadata TEXT,
                FOREIGN KEY(caller_id) REFERENCES intel_nodes(id),
                FOREIGN KEY(callee_id) REFERENCES intel_nodes(id),
                PRIMARY KEY(caller_id, callee_id, edge_type)
            )",
            [],
        )?;

        // Create analysis_cache table
        self.conn.execute(
            "CREATE TABLE IF NOT EXISTS analysis_cache (
                node_hash TEXT PRIMARY KEY,
                cfg_data BLOB,
                complexity_metrics BLOB,
                timestamp INTEGER NOT NULL
            )",
            [],
        )?;

        // Persistent cache telemetry for cross-session hit-rate tracking.
        self.conn.execute(
            "CREATE TABLE IF NOT EXISTS cache_telemetry (
                id INTEGER PRIMARY KEY CHECK (id = 1),
                cache_hits INTEGER NOT NULL DEFAULT 0,
                cache_misses INTEGER NOT NULL DEFAULT 0,
                cache_writes INTEGER NOT NULL DEFAULT 0,
                updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now'))
            )",
            [],
        )?;
        self.conn.execute(
            "INSERT OR IGNORE INTO cache_telemetry (id, cache_hits, cache_misses, cache_writes, updated_at)
             VALUES (1, 0, 0, 0, strftime('%s', 'now'))",
            [],
        )?;

        // Create global_symbols table (Phase 7: Cross-Project Resolution)
        self.conn.execute(
            "CREATE TABLE IF NOT EXISTS global_symbols (
                symbol_id TEXT PRIMARY KEY,
                project_id TEXT NOT NULL,
                symbol_name TEXT NOT NULL,
                symbol_type TEXT NOT NULL,
                signature TEXT,
                file_path TEXT NOT NULL,
                byte_range_start INTEGER,
                byte_range_end INTEGER,
                complexity INTEGER DEFAULT 1,
                is_public INTEGER DEFAULT 0,
                created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
                UNIQUE(project_id, symbol_name, signature)
            )",
            [],
        )?;

        // Create external_refs table (Phase 7: Cross-Project Resolution)
        self.conn.execute(
            "CREATE TABLE IF NOT EXISTS external_refs (
                ref_id TEXT PRIMARY KEY,
                source_project_id TEXT NOT NULL,
                source_symbol_id TEXT NOT NULL,
                target_project_id TEXT NOT NULL,
                target_symbol_id TEXT NOT NULL,
                ref_type TEXT NOT NULL,
                FOREIGN KEY (source_symbol_id) REFERENCES global_symbols(symbol_id),
                FOREIGN KEY (target_symbol_id) REFERENCES global_symbols(symbol_id)
            )",
            [],
        )?;

        // Create project_deps table (Phase 7: Cross-Project Resolution)
        self.conn.execute(
            "CREATE TABLE IF NOT EXISTS project_deps (
                dep_id TEXT PRIMARY KEY,
                project_id TEXT NOT NULL,
                depends_on_project_id TEXT NOT NULL,
                dependency_type TEXT NOT NULL,
                UNIQUE(project_id, depends_on_project_id)
            )",
            [],
        )?;

        // Create indexes for query performance
        self.conn.execute(
            "CREATE INDEX IF NOT EXISTS idx_nodes_project ON intel_nodes(project_id)",
            [],
        )?;
        self.conn.execute(
            "CREATE INDEX IF NOT EXISTS idx_nodes_file ON intel_nodes(file_path)",
            [],
        )?;
        self.conn.execute(
            "CREATE INDEX IF NOT EXISTS idx_nodes_symbol ON intel_nodes(symbol_name)",
            [],
        )?;
        self.conn.execute(
            "CREATE INDEX IF NOT EXISTS idx_nodes_hash ON intel_nodes(content_hash)",
            [],
        )?;

        // Create indexes for global_symbols (Phase 7)
        self.conn.execute(
            "CREATE INDEX IF NOT EXISTS idx_global_symbols_name ON global_symbols(symbol_name)",
            [],
        )?;
        self.conn.execute(
            "CREATE INDEX IF NOT EXISTS idx_global_symbols_type ON global_symbols(symbol_type)",
            [],
        )?;
        self.conn.execute(
            "CREATE INDEX IF NOT EXISTS idx_global_symbols_project ON global_symbols(project_id)",
            [],
        )?;
        self.conn.execute(
            "CREATE INDEX IF NOT EXISTS idx_global_symbols_public ON global_symbols(symbol_id) WHERE is_public = 1",
            [],
        )?;

        // Create indexes for external_refs (Phase 7)
        self.conn.execute(
            "CREATE INDEX IF NOT EXISTS idx_external_refs_source ON external_refs(source_symbol_id)",
            [],
        )?;
        self.conn.execute(
            "CREATE INDEX IF NOT EXISTS idx_external_refs_target ON external_refs(target_symbol_id)",
            [],
        )?;

        // Create indexes for project_deps (Phase 7)
        self.conn.execute(
            "CREATE INDEX IF NOT EXISTS idx_project_deps_project ON project_deps(project_id)",
            [],
        )?;

        Ok(())
    }

    /// Get the underlying connection
    pub fn conn(&self) -> &Connection {
        &self.conn
    }

    /// Get mutable connection
    pub fn conn_mut(&mut self) -> &mut Connection {
        &mut self.conn
    }

    /// Close the storage connection and ensure WAL is checkpointed
    ///
    /// This explicitly checkpoints the WAL (Write-Ahead Log) to the main database file
    /// and closes the SQLite connection. This should be called before switching projects
    /// to ensure file locks are released properly.
    pub fn close(&mut self) -> SqliteResult<()> {
        // Force WAL checkpoint to ensure all data is written to main DB
        // This releases locks on the -wal and -shm files
        if self.config.wal_enabled {
            self.conn.execute("PRAGMA wal_checkpoint(TRUNCATE)", [])?;
        }
        // Optionally run optimize to clean up the database file
        // self.conn.execute("PRAGMA optimize", [])?;
        Ok(())
    }

    /// Load existing project IDs for a given base name.
    ///
    /// This is used for unique project ID generation to avoid conflicts.
    pub fn load_existing_ids(&self, base_name: &str) -> SqliteResult<Vec<UniqueProjectId>> {
        ProjectMetadata::load_existing_ids(&self.conn, base_name)
            .map_err(|_| rusqlite::Error::InvalidQuery)
    }

    /// Store project metadata.
    ///
    /// This persists the unique project ID and associated metadata.
    pub fn store_project_metadata(
        &self,
        unique_id: &UniqueProjectId,
        project_path: &Path,
    ) -> SqliteResult<()> {
        let metadata = ProjectMetadata::new(project_path);
        // Override with the provided unique_id
        let metadata = ProjectMetadata {
            unique_project_id: unique_id.clone(),
            ..metadata
        };
        metadata
            .save(&self.conn)
            .map_err(|_| rusqlite::Error::InvalidQuery)
    }

    /// Current schema version. Increment when adding migrations.
    const SCHEMA_VERSION: u32 = 1;

    /// Run database migrations based on the stored schema version.
    /// Creates the version tracking table if it doesn't exist.
    fn run_migrations(&mut self) -> SqliteResult<()> {
        // Create version tracking table
        self.conn.execute(
            "CREATE TABLE IF NOT EXISTS schema_version (
                key TEXT PRIMARY KEY,
                version INTEGER NOT NULL
            )",
            [],
        )?;

        // Read current version
        let current: u32 = self
            .conn
            .query_row(
                "SELECT COALESCE(MAX(version), 0) FROM schema_version WHERE key = 'schema'",
                [],
                |row| row.get(0),
            )
            .unwrap_or(0);

        // Reject databases from newer versions — they may contain data
        // this version cannot interpret.
        if current > Self::SCHEMA_VERSION {
            return Err(rusqlite::Error::InvalidParameterName(format!(
                "Database schema v{} is newer than this version (v{}). Please upgrade LeIndex.",
                current,
                Self::SCHEMA_VERSION
            )));
        }

        // Add future migrations here:
        // if current < 2 { self.migrate_v1_to_v2()?; }
        // if current < 3 { self.migrate_v2_to_v3()?; }

        // Update stored version
        self.conn.execute(
            "INSERT OR REPLACE INTO schema_version (key, version) VALUES ('schema', ?1)",
            [Self::SCHEMA_VERSION],
        )?;

        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::NamedTempFile;

    #[test]
    fn test_storage_creation() {
        let temp_file = NamedTempFile::new().unwrap();
        let storage = Storage::open(temp_file.path());
        assert!(storage.is_ok());
    }

    #[test]
    fn test_schema_initialization() {
        let temp_file = NamedTempFile::new().unwrap();
        let storage = Storage::open(temp_file.path()).unwrap();

        // Check that tables exist
        let table_count: i64 = storage
            .conn
            .query_row(
                "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND (name LIKE 'intel_%' OR name = 'analysis_cache' OR name = 'cache_telemetry' OR name LIKE 'global_%' OR name LIKE 'external_%' OR name LIKE 'project_%')",
                [],
                |row| row.get(0),
            )
            .unwrap();

        assert_eq!(table_count, 8); // intel_nodes, intel_edges, analysis_cache, cache_telemetry, global_symbols, external_refs, project_deps, project_metadata
    }
}