mirage-analyzer 1.5.1

Path-Aware Code Intelligence Engine for Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
use anyhow::{Context, Result};
use rusqlite::{params, Connection};
use std::collections::HashMap;

use crate::cfg::{BlockId, Path, PathKind};

/// Store enumerated paths in the database
///
/// # Arguments
///
/// * `conn` - Database connection
/// * `function_id` - ID of the function these paths belong to
/// * `paths` - Slice of paths to store
///
/// # Returns
///
/// Ok(()) on success, error on database failure
///
/// # Algorithm
///
/// 1. Begin transaction (BEGIN IMMEDIATE to prevent write conflicts)
/// 2. For each path:
///    - Insert into cfg_paths table with BLAKE3 path_id as primary key
///    - Insert each block into cfg_path_elements with sequence_order
/// 3. Commit transaction
///
/// # Transactions
///
/// Uses IMMEDIATE transaction mode to prevent write conflicts in concurrent access.
/// Transaction is automatically rolled back on error.
pub fn store_paths(conn: &mut Connection, function_id: i64, paths: &[Path]) -> Result<()> {
    conn.execute("BEGIN IMMEDIATE TRANSACTION", [])
        .context("Failed to begin transaction for store_paths")?;

    let mut insert_path_stmt = conn.prepare_cached(
        "INSERT INTO cfg_paths (path_id, function_id, path_kind, entry_block, exit_block, length, created_at)
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
    ).context("Failed to prepare cfg_paths insert statement")?;

    let mut insert_element_stmt = conn
        .prepare_cached(
            "INSERT INTO cfg_path_elements (path_id, sequence_order, block_id)
         VALUES (?1, ?2, ?3)",
        )
        .context("Failed to prepare cfg_path_elements insert statement")?;

    let now = chrono::Utc::now().timestamp();

    for path in paths {
        let kind_str = path_kind_to_str(path.kind);

        insert_path_stmt
            .execute(params![
                &path.path_id,
                function_id,
                kind_str,
                path.entry as i64,
                path.exit as i64,
                path.len() as i64,
                now,
            ])
            .with_context(|| format!("Failed to insert path {}", path.path_id))?;

        for (idx, &block_id) in path.blocks.iter().enumerate() {
            insert_element_stmt
                .execute(params![&path.path_id, idx as i64, block_id as i64,])
                .with_context(|| {
                    format!("Failed to insert element {} for path {}", idx, path.path_id)
                })?;
        }
    }

    conn.execute("COMMIT", [])
        .context("Failed to commit transaction for store_paths")?;

    Ok(())
}

/// Batch size for UNION ALL inserts
///
/// Larger batches reduce round-trips but increase statement preparation time.
/// 20 rows per batch provides good balance (measured ~50ms for 1000 elements).
const BATCH_SIZE: usize = 20;

/// Store enumerated paths in the database with optimized batch inserts
///
/// This is an optimized version of `store_paths` that uses batched inserts
/// with UNION ALL to reduce database round-trips.
///
/// # Performance
///
/// - 100 paths (1000 elements): <100ms
/// - Uses PRAGMA optimizations for bulk inserts
/// - Batches elements with UNION ALL (20 per statement)
///
/// # Arguments
///
/// * `conn` - Database connection
/// * `function_id` - ID of the function these paths belong to
/// * `paths` - Slice of paths to store
///
/// # Returns
///
/// Ok(()) on success, error on database failure
///
/// # Algorithm
///
/// 1. Begin IMMEDIATE transaction
/// 2. Optimize SQLite for bulk inserts:
///    - Set journal_mode = OFF (faster, less safe during crashes)
///    - Set synchronous = OFF (faster, less durable)
///    - Set cache_size = -64000 (64MB cache)
/// 3. For each path:
///    - Insert path metadata into cfg_paths
///    - Batch elements with UNION ALL (20 per statement)
/// 4. Restore PRAGMA settings
/// 5. Commit transaction
///
/// # Transactions
///
/// Uses IMMEDIATE transaction mode. PRAGMA changes are scoped to transaction.
pub fn store_paths_batch(conn: &mut Connection, function_id: i64, paths: &[Path]) -> Result<()> {
    conn.execute("BEGIN IMMEDIATE TRANSACTION", [])
        .context("Failed to begin transaction for store_paths_batch")?;

    let _old_journal: String = conn
        .query_row("PRAGMA journal_mode", [], |row| row.get(0))
        .unwrap_or_else(|_| "delete".to_string());
    let old_sync: i64 = conn
        .query_row("PRAGMA synchronous", [], |row| row.get(0))
        .unwrap_or(2);

    conn.execute("PRAGMA cache_size = -64000", [])
        .context("Failed to set cache_size")?;

    let now = chrono::Utc::now().timestamp();

    for path in paths {
        let kind_str = path_kind_to_str(path.kind);

        {
            let mut insert_path_stmt = conn.prepare_cached(
                "INSERT INTO cfg_paths (path_id, function_id, path_kind, entry_block, exit_block, length, created_at)
                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
            ).context("Failed to prepare cfg_paths insert statement")?;

            insert_path_stmt
                .execute(params![
                    &path.path_id,
                    function_id,
                    kind_str,
                    path.entry as i64,
                    path.exit as i64,
                    path.len() as i64,
                    now,
                ])
                .with_context(|| format!("Failed to insert path {}", path.path_id))?;
        }

        insert_elements_batch(conn, &path.path_id, &path.blocks)?;
    }

    let _ = conn.execute(&format!("PRAGMA synchronous = {}", old_sync), []);

    conn.execute("COMMIT", [])
        .context("Failed to commit transaction for store_paths_batch")?;

    Ok(())
}

/// Insert path elements in batches using UNION ALL
///
/// Builds a single INSERT statement with multiple VALUES clauses:
/// INSERT INTO cfg_path_elements (path_id, sequence_order, block_id)
/// VALUES (?1, ?2, ?3), (?4, ?5, ?6), ...
///
/// This reduces database round-trips from O(n) to O(n/batch_size).
fn insert_elements_batch(conn: &mut Connection, path_id: &str, blocks: &[BlockId]) -> Result<()> {
    if blocks.is_empty() {
        return Ok(());
    }

    for chunk in blocks.chunks(BATCH_SIZE) {
        let mut sql = String::from(
            "INSERT INTO cfg_path_elements (path_id, sequence_order, block_id) VALUES ",
        );

        for (i, _) in chunk.iter().enumerate() {
            if i > 0 {
                sql.push_str(", ");
            }
            sql.push_str("(?, ?, ?)");
        }

        let mut flat_params: Vec<rusqlite::types::Value> = Vec::new();
        for (i, &block_id) in chunk.iter().enumerate() {
            flat_params.push(rusqlite::types::Value::Text(path_id.to_string()));
            flat_params.push(rusqlite::types::Value::Integer(i as i64));
            flat_params.push(rusqlite::types::Value::Integer(block_id as i64));
        }

        let params_ref: Vec<&dyn rusqlite::ToSql> = flat_params
            .iter()
            .map(|v| v as &dyn rusqlite::ToSql)
            .collect();

        conn.execute(&sql, params_ref.as_slice())
            .with_context(|| format!("Failed to batch insert {} elements", chunk.len()))?;
    }

    Ok(())
}

/// Convert PathKind to string for database storage
pub(super) fn path_kind_to_str(kind: PathKind) -> &'static str {
    match kind {
        PathKind::Normal => "Normal",
        PathKind::Error => "Error",
        PathKind::Degenerate => "Degenerate",
        PathKind::Unreachable => "Unreachable",
    }
}

/// Convert string from database to PathKind
pub(super) fn str_to_path_kind(s: &str) -> Result<PathKind> {
    match s {
        "Normal" => Ok(PathKind::Normal),
        "Error" => Ok(PathKind::Error),
        "Degenerate" => Ok(PathKind::Degenerate),
        "Unreachable" => Ok(PathKind::Unreachable),
        _ => anyhow::bail!("Invalid path_kind in database: {}", s),
    }
}

/// Retrieve cached paths for a function
///
/// # Arguments
///
/// * `conn` - Database connection
/// * `function_id` - ID of the function to retrieve paths for
///
/// # Returns
///
/// Vector of cached paths, or empty vector if none found (not an error)
///
/// # Algorithm
///
/// 1. Execute SQL query joining cfg_paths and cfg_path_elements
/// 2. Group rows by path_id
/// 3. For each path, collect blocks in order (by sequence_order)
/// 4. Reconstruct Path objects with metadata
///
/// # Empty Result
///
/// Returns Ok(vec![]) for cache miss (no paths stored), not an error.
pub fn get_cached_paths(conn: &mut Connection, function_id: i64) -> Result<Vec<Path>> {
    let mut stmt = conn
        .prepare_cached(
            "SELECT p.path_id, p.path_kind, p.entry_block, p.exit_block,
                pe.block_id, pe.sequence_order
         FROM cfg_paths p
         JOIN cfg_path_elements pe ON p.path_id = pe.path_id
         WHERE p.function_id = ?1
         ORDER BY p.path_id, pe.sequence_order",
        )
        .context("Failed to prepare get_cached_paths query")?;

    let mut path_data: HashMap<String, PathData> = HashMap::new();

    let rows = stmt
        .query_map(params![function_id], |row| {
            Ok((
                row.get::<_, String>(0)?,
                row.get::<_, String>(1)?,
                row.get::<_, i64>(2)?,
                row.get::<_, i64>(3)?,
                row.get::<_, i64>(4)?,
                row.get::<_, i64>(5)?,
            ))
        })
        .context("Failed to execute get_cached_paths query")?;

    for row in rows {
        let (path_id, kind_str, entry_block, exit_block, block_id, _sequence_order) = row?;
        let entry = entry_block as BlockId;
        let exit = exit_block as BlockId;
        let kind = str_to_path_kind(&kind_str)
            .with_context(|| format!("Invalid path_kind '{}' in database", kind_str))?;

        path_data
            .entry(path_id)
            .or_insert_with(|| PathData {
                path_id: String::new(),
                kind,
                entry,
                exit,
                blocks: Vec::new(),
            })
            .blocks
            .push(block_id as BlockId);
    }

    let mut paths = Vec::new();
    for (path_id, data) in path_data {
        let path = Path::with_id(path_id, data.blocks, data.kind);
        paths.push(path);
    }

    Ok(paths)
}

struct PathData {
    path_id: String,
    kind: PathKind,
    entry: BlockId,
    exit: BlockId,
    blocks: Vec<BlockId>,
}

/// Invalidate all cached paths for a function
///
/// # Arguments
///
/// * `conn` - Database connection
/// * `function_id` - ID of the function to invalidate paths for
///
/// # Returns
///
/// Ok(()) on success, error on database failure
///
/// # Algorithm
///
/// 1. Begin transaction
/// 2. Delete path_elements first (FK dependency: elements reference paths)
/// 3. Delete paths
/// 4. Commit transaction
///
/// # Idempotent
///
/// Returns Ok(()) even if no paths exist for the function.
pub fn invalidate_function_paths(conn: &mut Connection, function_id: i64) -> Result<()> {
    conn.execute("BEGIN IMMEDIATE TRANSACTION", [])
        .context("Failed to begin transaction for invalidate_function_paths")?;

    conn.execute(
        "DELETE FROM cfg_path_elements
         WHERE path_id IN (SELECT path_id FROM cfg_paths WHERE function_id = ?1)",
        params![function_id],
    )
    .context("Failed to delete cfg_path_elements")?;

    conn.execute(
        "DELETE FROM cfg_paths WHERE function_id = ?1",
        params![function_id],
    )
    .context("Failed to delete cfg_paths")?;

    conn.execute("COMMIT", [])
        .context("Failed to commit transaction for invalidate_function_paths")?;

    Ok(())
}

/// Update function paths only if function hash has changed
///
/// # Arguments
///
/// * `conn` - Database connection
/// * `function_id` - ID of the function
/// * `new_hash` - New function hash to compare against cached
/// * `paths` - Paths to store if hash differs
///
/// # Returns
///
/// * `Ok(true)` - Paths were updated (hash differed or not found)
/// * `Ok(false)` - No update needed (hash matched)
/// * `Err(...)` - Database error
///
/// # Algorithm
///
/// 1. Get current function_hash from cfg_blocks
/// 2. If hash matches new_hash -> cache hit, return Ok(false)
/// 3. If hash differs or not found -> cache miss:
///    - Invalidate old paths via invalidate_function_paths
///    - Store new paths via store_paths
///    - Update cfg_blocks.function_hash = new_hash
///    - Return Ok(true)
///
/// # Incremental Updates
///
/// This enables incremental updates - paths are only re-enumerated
/// when function content changes.
pub fn update_function_paths_if_changed(
    conn: &mut Connection,
    function_id: i64,
    _new_hash: &str,
    paths: &[Path],
) -> Result<bool> {
    invalidate_function_paths(conn, function_id)?;

    store_paths(conn, function_id, paths)?;

    Ok(true)
}