1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
//! Database migration command implementation
//!
//! Handles migrations from schema v1-v5 to v6:
//! - v2: magellan_meta table
//! - v4: BLAKE3-based SymbolId (additive fields)
//! - v5: AST nodes table for hierarchy storage
//! - v6: AST nodes file_id column for per-file tracking
use anyhow::Result;
use rusqlite::{params, OptionalExtension, Transaction};
use std::fs;
use std::path::{Path, PathBuf};
/// Current Magellan schema version
/// v4: BLAKE3-based SymbolId, canonical_fqn, display_fqn
/// v5: AST nodes table for hierarchy storage
/// v6: AST nodes file_id column for per-file tracking
/// v7: CFG blocks table for control flow graph storage
/// v8: cfg_blocks.cfg_hash column for cache invalidation
/// v9: cfg_blocks.statements column for AST snippets
/// v10: cfg_blocks 4D spatial-temporal coordinate columns
/// v11: geo_index_meta table for lazy geometric index tracking
/// v12: symbol_fts FTS5 virtual table for fast symbol search
pub const MAGELLAN_SCHEMA_VERSION: i64 = 12;
/// Migration result summary
#[derive(Debug, Clone)]
pub struct MigrationResult {
pub success: bool,
pub backup_path: Option<PathBuf>,
pub old_version: i64,
pub new_version: i64,
pub message: String,
}
/// Run database migration
///
/// Creates backup, uses transaction for atomicity, supports rollback on error.
///
/// # Arguments
/// * `db_path` - Path to database file
/// * `dry_run` - If true, check version only without migrating
/// * `no_backup` - If true, skip backup creation
///
/// # Returns
/// Migration result with version info and backup path
pub fn run_migrate(db_path: PathBuf, dry_run: bool, no_backup: bool) -> Result<MigrationResult> {
// Check database exists
if !db_path.exists() {
return Ok(MigrationResult {
success: false,
backup_path: None,
old_version: 0,
new_version: MAGELLAN_SCHEMA_VERSION,
message: format!("Database not found: {}", db_path.display()),
});
}
// Open database and check current version
let conn = rusqlite::Connection::open(&db_path)?;
// Check if magellan_meta table exists
let has_meta_table: bool = conn
.query_row(
"SELECT 1 FROM sqlite_master WHERE type='table' AND name='magellan_meta' LIMIT 1",
[],
|_| Ok(true),
)
.unwrap_or(false);
let current_version: Option<i64> = if has_meta_table {
conn.query_row(
"SELECT magellan_schema_version FROM magellan_meta WHERE id=1",
[],
|row| row.get(0),
)
.optional()?
} else {
None
};
let old_version = current_version.unwrap_or(1);
if old_version == MAGELLAN_SCHEMA_VERSION {
return Ok(MigrationResult {
success: true,
backup_path: None,
old_version,
new_version: MAGELLAN_SCHEMA_VERSION,
message: "Database already at current version".to_string(),
});
}
if old_version > MAGELLAN_SCHEMA_VERSION {
return Ok(MigrationResult {
success: false,
backup_path: None,
old_version,
new_version: MAGELLAN_SCHEMA_VERSION,
message: format!(
"Database version {} is newer than current {}",
old_version, MAGELLAN_SCHEMA_VERSION
),
});
}
if dry_run {
return Ok(MigrationResult {
success: true,
backup_path: None,
old_version,
new_version: MAGELLAN_SCHEMA_VERSION,
message: format!(
"Would migrate from version {} to {} (dry run)",
old_version, MAGELLAN_SCHEMA_VERSION
),
});
}
// Create backup
let backup_path = if !no_backup {
Some(create_backup(&db_path)?)
} else {
None
};
// Run migration in transaction
let tx = conn.unchecked_transaction()?;
// Execute version-specific migrations
migrate_from_version(&tx, old_version)?;
// Update magellan_meta version
tx.execute(
"UPDATE magellan_meta SET magellan_schema_version=?1 WHERE id=1",
params![MAGELLAN_SCHEMA_VERSION],
)?;
tx.commit()?;
Ok(MigrationResult {
success: true,
backup_path,
old_version,
new_version: MAGELLAN_SCHEMA_VERSION,
message: format!(
"Migrated from version {} to {}",
old_version, MAGELLAN_SCHEMA_VERSION
),
})
}
/// Create backup of database file
fn create_backup(db_path: &Path) -> Result<PathBuf> {
let backup_path = db_path.with_extension(format!(
"v{}.bak",
chrono::Utc::now().format("%Y%m%d_%H%M%S")
));
fs::copy(db_path, &backup_path)?;
Ok(backup_path)
}
/// Execute migration steps from old version
fn migrate_from_version(tx: &Transaction, old_version: i64) -> Result<()> {
// For schema version 3 -> 4 migration:
// The schema change is BLAKE3-based SymbolId and canonical_fqn/display_fqn fields.
// These fields are added with #[serde(default)] so old data deserializes correctly.
// The migration primarily needs to ensure the magellan_meta table exists and
// that new SymbolNode fields can be added.
if old_version < 2 {
// Create magellan_meta table if it doesn't exist (v1 -> v2)
tx.execute(
"CREATE TABLE IF NOT EXISTS magellan_meta (
id INTEGER PRIMARY KEY CHECK (id = 1),
magellan_schema_version INTEGER NOT NULL,
sqlitegraph_schema_version INTEGER NOT NULL,
created_at INTEGER NOT NULL
)",
[],
)?;
}
if old_version < 4 {
// v3 -> v4: BLAKE3 SymbolId migration
// SymbolNode schema changes are additive (Option fields with defaults)
// No explicit schema migration needed - just version bump
// Existing symbols will have symbol_id=None, new symbols get BLAKE3 IDs
}
if old_version < 5 {
// v4 -> v5: AST nodes table
// Create ast_nodes table for storing AST hierarchy
tx.execute(
"CREATE TABLE IF NOT EXISTS ast_nodes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
parent_id INTEGER,
kind TEXT NOT NULL,
byte_start INTEGER NOT NULL,
byte_end INTEGER NOT NULL
)",
[],
)?;
// Create indexes for efficient queries
tx.execute(
"CREATE INDEX IF NOT EXISTS idx_ast_nodes_parent
ON ast_nodes(parent_id)",
[],
)?;
tx.execute(
"CREATE INDEX IF NOT EXISTS idx_ast_nodes_span
ON ast_nodes(byte_start, byte_end)",
[],
)?;
}
if old_version < 6 {
// v5 -> v6: Add file_id to ast_nodes table
// Add file_id column for per-file AST node tracking
tx.execute("ALTER TABLE ast_nodes ADD COLUMN file_id INTEGER", [])?;
// Create index for efficient per-file queries
tx.execute(
"CREATE INDEX IF NOT EXISTS idx_ast_nodes_file_id
ON ast_nodes(file_id)",
[],
)?;
}
if old_version < 7 {
// v6 -> v7: Add cfg_blocks table for control flow graph storage
tx.execute(
"CREATE TABLE IF NOT EXISTS cfg_blocks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
function_id INTEGER NOT NULL,
kind TEXT NOT NULL,
terminator TEXT NOT NULL,
byte_start INTEGER NOT NULL,
byte_end INTEGER NOT NULL,
start_line INTEGER NOT NULL,
start_col INTEGER NOT NULL,
end_line INTEGER NOT NULL,
end_col INTEGER NOT NULL,
FOREIGN KEY (function_id) REFERENCES graph_entities(id) ON DELETE CASCADE
)",
[],
)?;
// Index for function-based queries
tx.execute(
"CREATE INDEX IF NOT EXISTS idx_cfg_blocks_function
ON cfg_blocks(function_id)",
[],
)?;
// Index for span-based position queries
tx.execute(
"CREATE INDEX IF NOT EXISTS idx_cfg_blocks_span
ON cfg_blocks(byte_start, byte_end)",
[],
)?;
}
if old_version < 8 {
// v7 -> v8: Add cfg_hash column for cache invalidation
// This allows tools like Mirage to detect when CFG structure changes
tx.execute("ALTER TABLE cfg_blocks ADD COLUMN cfg_hash TEXT", [])?;
// Index for hash-based cache lookups
tx.execute(
"CREATE INDEX IF NOT EXISTS idx_cfg_blocks_hash
ON cfg_blocks(cfg_hash)",
[],
)?;
}
if old_version < 9 {
// v8 -> v9: Add statements column to cfg_blocks for AST snippets
tx.execute("ALTER TABLE cfg_blocks ADD COLUMN statements TEXT", [])?;
}
if old_version < 10 {
// v9 -> v10: Add 4D spatial-temporal coordinate columns to cfg_blocks
tx.execute(
"ALTER TABLE cfg_blocks ADD COLUMN coord_x INTEGER DEFAULT 0",
[],
)?;
tx.execute(
"ALTER TABLE cfg_blocks ADD COLUMN coord_y INTEGER DEFAULT 0",
[],
)?;
tx.execute(
"ALTER TABLE cfg_blocks ADD COLUMN coord_z INTEGER DEFAULT 0",
[],
)?;
tx.execute("ALTER TABLE cfg_blocks ADD COLUMN coord_t TEXT", [])?;
}
if old_version < 11 {
// v10 -> v11: Add geo_index_meta table for lazy geometric index tracking
// This table records when a .geo file was built from the SQLite database
tx.execute(
"CREATE TABLE IF NOT EXISTS geo_index_meta (\n id INTEGER PRIMARY KEY CHECK (id = 1),\n geo_path TEXT NOT NULL,\n built_at INTEGER NOT NULL,\n schema_version INTEGER NOT NULL,\n symbol_count INTEGER NOT NULL,\n call_count INTEGER NOT NULL,\n cfg_block_count INTEGER NOT NULL,\n checksum TEXT NOT NULL\n )",
[],
)?;
}
if old_version < 12 {
// v11 -> v12: Add FTS5 virtual table for fast symbol search
// FTS5 indexes the 'name' column from graph_entities for prefix/full-text search
tx.execute(
"CREATE VIRTUAL TABLE IF NOT EXISTS symbol_fts USING fts5(\n name,\n content='graph_entities',\n content_rowid='id'\n )",
[],
)?;
}
Ok(())
}