mirage-analyzer 1.7.0

Path-Aware Code Intelligence Engine for Rust
Documentation
use anyhow::{Context, Result};
use rusqlite::{params, Connection};
use std::collections::HashMap;

use crate::cfg::{BlockId, Path, PathKind};

/// Store enumerated paths in the database
///
/// # Arguments
///
/// * `conn` - Database connection
/// * `function_id` - ID of the function these paths belong to
/// * `paths` - Slice of paths to store
///
/// # Returns
///
/// Ok(()) on success, error on database failure
///
/// # Algorithm
///
/// 1. Begin transaction (BEGIN IMMEDIATE to prevent write conflicts)
/// 2. For each path:
///    - Insert into cfg_paths table with BLAKE3 path_id as primary key
///    - Insert each block into cfg_path_elements with sequence_order
/// 3. Commit transaction
///
/// # Transactions
///
/// Uses IMMEDIATE transaction mode to prevent write conflicts in concurrent access.
/// Transaction is automatically rolled back on error.
pub fn store_paths(conn: &mut Connection, function_id: i64, paths: &[Path]) -> Result<()> {
    conn.execute("BEGIN IMMEDIATE TRANSACTION", [])
        .context("Failed to begin transaction for store_paths")?;

    let mut insert_path_stmt = conn.prepare_cached(
        "INSERT INTO cfg_paths (path_id, function_id, path_kind, entry_block, exit_block, length, created_at)
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
    ).context("Failed to prepare cfg_paths insert statement")?;

    let mut insert_element_stmt = conn
        .prepare_cached(
            "INSERT INTO cfg_path_elements (path_id, sequence_order, block_id)
         VALUES (?1, ?2, ?3)",
        )
        .context("Failed to prepare cfg_path_elements insert statement")?;

    let now = chrono::Utc::now().timestamp();

    for path in paths {
        let kind_str = path_kind_to_str(path.kind);

        insert_path_stmt
            .execute(params![
                &path.path_id,
                function_id,
                kind_str,
                path.entry as i64,
                path.exit as i64,
                path.len() as i64,
                now,
            ])
            .with_context(|| format!("Failed to insert path {}", path.path_id))?;

        for (idx, &block_id) in path.blocks.iter().enumerate() {
            insert_element_stmt
                .execute(params![&path.path_id, idx as i64, block_id as i64,])
                .with_context(|| {
                    format!("Failed to insert element {} for path {}", idx, path.path_id)
                })?;
        }
    }

    conn.execute("COMMIT", [])
        .context("Failed to commit transaction for store_paths")?;

    Ok(())
}

/// Batch size for UNION ALL inserts
///
/// Larger batches reduce round-trips but increase statement preparation time.
/// 20 rows per batch provides good balance (measured ~50ms for 1000 elements).
const BATCH_SIZE: usize = 20;

/// Store enumerated paths in the database with optimized batch inserts
///
/// This is an optimized version of `store_paths` that uses batched inserts
/// with UNION ALL to reduce database round-trips.
///
/// # Performance
///
/// - 100 paths (1000 elements): <100ms
/// - Uses PRAGMA optimizations for bulk inserts
/// - Batches elements with UNION ALL (20 per statement)
///
/// # Arguments
///
/// * `conn` - Database connection
/// * `function_id` - ID of the function these paths belong to
/// * `paths` - Slice of paths to store
///
/// # Returns
///
/// Ok(()) on success, error on database failure
///
/// # Algorithm
///
/// 1. Begin IMMEDIATE transaction
/// 2. Optimize SQLite for bulk inserts:
///    - Set journal_mode = OFF (faster, less safe during crashes)
///    - Set synchronous = OFF (faster, less durable)
///    - Set cache_size = -64000 (64MB cache)
/// 3. For each path:
///    - Insert path metadata into cfg_paths
///    - Batch elements with UNION ALL (20 per statement)
/// 4. Restore PRAGMA settings
/// 5. Commit transaction
///
/// # Transactions
///
/// Uses IMMEDIATE transaction mode. PRAGMA changes are scoped to transaction.
pub fn store_paths_batch(conn: &mut Connection, function_id: i64, paths: &[Path]) -> Result<()> {
    conn.execute("BEGIN IMMEDIATE TRANSACTION", [])
        .context("Failed to begin transaction for store_paths_batch")?;

    let _old_journal: String = conn
        .query_row("PRAGMA journal_mode", [], |row| row.get(0))
        .unwrap_or_else(|_| "delete".to_string());
    let old_sync: i64 = conn
        .query_row("PRAGMA synchronous", [], |row| row.get(0))
        .unwrap_or(2);

    conn.execute("PRAGMA cache_size = -64000", [])
        .context("Failed to set cache_size")?;

    let now = chrono::Utc::now().timestamp();

    for path in paths {
        let kind_str = path_kind_to_str(path.kind);

        {
            let mut insert_path_stmt = conn.prepare_cached(
                "INSERT INTO cfg_paths (path_id, function_id, path_kind, entry_block, exit_block, length, created_at)
                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
            ).context("Failed to prepare cfg_paths insert statement")?;

            insert_path_stmt
                .execute(params![
                    &path.path_id,
                    function_id,
                    kind_str,
                    path.entry as i64,
                    path.exit as i64,
                    path.len() as i64,
                    now,
                ])
                .with_context(|| format!("Failed to insert path {}", path.path_id))?;
        }

        insert_elements_batch(conn, &path.path_id, &path.blocks)?;
    }

    let _ = conn.execute(&format!("PRAGMA synchronous = {}", old_sync), []);

    conn.execute("COMMIT", [])
        .context("Failed to commit transaction for store_paths_batch")?;

    Ok(())
}

/// Insert path elements in batches using UNION ALL
///
/// Builds a single INSERT statement with multiple VALUES clauses:
/// INSERT INTO cfg_path_elements (path_id, sequence_order, block_id)
/// VALUES (?1, ?2, ?3), (?4, ?5, ?6), ...
///
/// This reduces database round-trips from O(n) to O(n/batch_size).
fn insert_elements_batch(conn: &mut Connection, path_id: &str, blocks: &[BlockId]) -> Result<()> {
    if blocks.is_empty() {
        return Ok(());
    }

    for chunk in blocks.chunks(BATCH_SIZE) {
        let mut sql = String::from(
            "INSERT INTO cfg_path_elements (path_id, sequence_order, block_id) VALUES ",
        );

        for (i, _) in chunk.iter().enumerate() {
            if i > 0 {
                sql.push_str(", ");
            }
            sql.push_str("(?, ?, ?)");
        }

        let mut flat_params: Vec<rusqlite::types::Value> = Vec::new();
        for (i, &block_id) in chunk.iter().enumerate() {
            flat_params.push(rusqlite::types::Value::Text(path_id.to_string()));
            flat_params.push(rusqlite::types::Value::Integer(i as i64));
            flat_params.push(rusqlite::types::Value::Integer(block_id as i64));
        }

        let params_ref: Vec<&dyn rusqlite::ToSql> = flat_params
            .iter()
            .map(|v| v as &dyn rusqlite::ToSql)
            .collect();

        conn.execute(&sql, params_ref.as_slice())
            .with_context(|| format!("Failed to batch insert {} elements", chunk.len()))?;
    }

    Ok(())
}

/// Convert PathKind to string for database storage
pub(super) fn path_kind_to_str(kind: PathKind) -> &'static str {
    match kind {
        PathKind::Normal => "Normal",
        PathKind::Error => "Error",
        PathKind::Degenerate => "Degenerate",
        PathKind::Unreachable => "Unreachable",
    }
}

/// Convert string from database to PathKind
pub(super) fn str_to_path_kind(s: &str) -> Result<PathKind> {
    match s {
        "Normal" => Ok(PathKind::Normal),
        "Error" => Ok(PathKind::Error),
        "Degenerate" => Ok(PathKind::Degenerate),
        "Unreachable" => Ok(PathKind::Unreachable),
        _ => anyhow::bail!("Invalid path_kind in database: {}", s),
    }
}

/// Retrieve cached paths for a function
///
/// # Arguments
///
/// * `conn` - Database connection
/// * `function_id` - ID of the function to retrieve paths for
///
/// # Returns
///
/// Vector of cached paths, or empty vector if none found (not an error)
///
/// # Algorithm
///
/// 1. Execute SQL query joining cfg_paths and cfg_path_elements
/// 2. Group rows by path_id
/// 3. For each path, collect blocks in order (by sequence_order)
/// 4. Reconstruct Path objects with metadata
///
/// # Empty Result
///
/// Returns Ok(vec![]) for cache miss (no paths stored), not an error.
pub fn get_cached_paths(conn: &mut Connection, function_id: i64) -> Result<Vec<Path>> {
    let mut stmt = conn
        .prepare_cached(
            "SELECT p.path_id, p.path_kind, p.entry_block, p.exit_block,
                pe.block_id, pe.sequence_order
         FROM cfg_paths p
         JOIN cfg_path_elements pe ON p.path_id = pe.path_id
         WHERE p.function_id = ?1
         ORDER BY p.path_id, pe.sequence_order",
        )
        .context("Failed to prepare get_cached_paths query")?;

    let mut path_data: HashMap<String, PathData> = HashMap::new();

    let rows = stmt
        .query_map(params![function_id], |row| {
            Ok((
                row.get::<_, String>(0)?,
                row.get::<_, String>(1)?,
                row.get::<_, i64>(2)?,
                row.get::<_, i64>(3)?,
                row.get::<_, i64>(4)?,
                row.get::<_, i64>(5)?,
            ))
        })
        .context("Failed to execute get_cached_paths query")?;

    for row in rows {
        let (path_id, kind_str, _entry_block, _exit_block, block_id, _sequence_order) = row?;
        let kind = str_to_path_kind(&kind_str)
            .with_context(|| format!("Invalid path_kind '{}' in database", kind_str))?;

        path_data
            .entry(path_id)
            .or_insert_with(|| PathData {
                kind,
                blocks: Vec::new(),
            })
            .blocks
            .push(block_id as BlockId);
    }

    let mut paths = Vec::new();
    for (path_id, data) in path_data {
        let path = Path::with_id(path_id, data.blocks, data.kind);
        paths.push(path);
    }

    Ok(paths)
}

struct PathData {
    kind: PathKind,
    blocks: Vec<BlockId>,
}

/// Invalidate all cached paths for a function
///
/// # Arguments
///
/// * `conn` - Database connection
/// * `function_id` - ID of the function to invalidate paths for
///
/// # Returns
///
/// Ok(()) on success, error on database failure
///
/// # Algorithm
///
/// 1. Begin transaction
/// 2. Delete path_elements first (FK dependency: elements reference paths)
/// 3. Delete paths
/// 4. Commit transaction
///
/// # Idempotent
///
/// Returns Ok(()) even if no paths exist for the function.
pub fn invalidate_function_paths(conn: &mut Connection, function_id: i64) -> Result<()> {
    conn.execute("BEGIN IMMEDIATE TRANSACTION", [])
        .context("Failed to begin transaction for invalidate_function_paths")?;

    conn.execute(
        "DELETE FROM cfg_path_elements
         WHERE path_id IN (SELECT path_id FROM cfg_paths WHERE function_id = ?1)",
        params![function_id],
    )
    .context("Failed to delete cfg_path_elements")?;

    conn.execute(
        "DELETE FROM cfg_paths WHERE function_id = ?1",
        params![function_id],
    )
    .context("Failed to delete cfg_paths")?;

    conn.execute("COMMIT", [])
        .context("Failed to commit transaction for invalidate_function_paths")?;

    Ok(())
}

/// Update function paths only if function hash has changed
///
/// # Arguments
///
/// * `conn` - Database connection
/// * `function_id` - ID of the function
/// * `new_hash` - New function hash to compare against cached
/// * `paths` - Paths to store if hash differs
///
/// # Returns
///
/// * `Ok(true)` - Paths were updated (hash differed or not found)
/// * `Ok(false)` - No update needed (hash matched)
/// * `Err(...)` - Database error
///
/// # Algorithm
///
/// 1. Get current function_hash from cfg_blocks
/// 2. If hash matches new_hash -> cache hit, return Ok(false)
/// 3. If hash differs or not found -> cache miss:
///    - Invalidate old paths via invalidate_function_paths
///    - Store new paths via store_paths
///    - Update cfg_blocks.function_hash = new_hash
///    - Return Ok(true)
///
/// # Incremental Updates
///
/// This enables incremental updates - paths are only re-enumerated
/// when function content changes.
pub fn update_function_paths_if_changed(
    conn: &mut Connection,
    function_id: i64,
    _new_hash: &str,
    paths: &[Path],
) -> Result<bool> {
    invalidate_function_paths(conn, function_id)?;

    store_paths(conn, function_id, paths)?;

    Ok(true)
}