Skip to main content

bcore_mutation/
db.rs

1use crate::error::{MutationError, Result};
2use rusqlite::{params, Connection};
3use sha2::{Digest, Sha256};
4use std::path::Path;
5
6const SCHEMA: &str = "
7PRAGMA foreign_keys = ON;
8
9CREATE TABLE IF NOT EXISTS projects (
10  id              INTEGER PRIMARY KEY,
11  name            TEXT NOT NULL,
12  repository_url  TEXT,
13  UNIQUE(name),
14  UNIQUE(repository_url)
15);
16
17CREATE TABLE IF NOT EXISTS runs (
18  id              INTEGER PRIMARY KEY,
19  project_id      INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
20  commit_hash     TEXT NOT NULL,
21  pr_number       INTEGER,
22  created_at      TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
23  tool_version    TEXT,
24  config_json     TEXT
25);
26
27CREATE INDEX IF NOT EXISTS idx_runs_project_created ON runs(project_id, created_at DESC);
28CREATE INDEX IF NOT EXISTS idx_runs_commit ON runs(commit_hash);
29
30CREATE TABLE IF NOT EXISTS mutants (
31  id              INTEGER PRIMARY KEY,
32  run_id          INTEGER NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
33  diff            TEXT NOT NULL,
34  patch_hash      TEXT NOT NULL,
35  status          TEXT NOT NULL DEFAULT 'pending'
36                    CHECK (status IN ('pending','running','killed','survived',
37                                      'timeout','error','skipped','equivalent','unproductive')),
38  killed          INTEGER GENERATED ALWAYS AS (CASE WHEN status='killed' THEN 1 ELSE 0 END) VIRTUAL,
39  command_to_test TEXT,
40  file_path       TEXT,
41  operator        TEXT,
42  UNIQUE(run_id, patch_hash)
43);
44
45CREATE INDEX IF NOT EXISTS idx_mutants_run_status ON mutants(run_id, status);
46CREATE INDEX IF NOT EXISTS idx_mutants_file ON mutants(file_path);
47CREATE INDEX IF NOT EXISTS idx_mutants_operator ON mutants(operator);
48CREATE INDEX IF NOT EXISTS idx_mutants_killed ON mutants(killed);
49";
50
51/// Data collected during mutation for a single generated mutant.
52pub struct MutantData {
53    pub diff: String,
54    pub patch_hash: String,
55    pub file_path: String,
56    pub operator: String,
57}
58
59/// A mutant row read back from the database.
60pub struct MutantRow {
61    pub id: i64,
62    pub diff: String,
63    pub file_path: Option<String>,
64}
65
66pub struct Database {
67    conn: Connection,
68}
69
70impl Database {
71    /// Open (or create) the database at `path` and enable foreign keys.
72    pub fn open(path: &Path) -> Result<Self> {
73        let conn = Connection::open(path)?;
74        conn.execute_batch("PRAGMA foreign_keys = ON;")?;
75        Ok(Database { conn })
76    }
77
78    /// Create tables and indexes if they do not yet exist, and apply any
79    /// additive migrations needed for older databases.
80    pub fn ensure_schema(&self) -> Result<()> {
81        self.conn.execute_batch(SCHEMA)?;
82        // Migration: add config_json to runs if the column is missing.
83        // ALTER TABLE ADD COLUMN fails with "duplicate column name" when the
84        // column already exists; silence that specific error so the function
85        // is idempotent on databases created before this column was added.
86        if let Err(e) = self
87            .conn
88            .execute_batch("ALTER TABLE runs ADD COLUMN config_json TEXT;")
89        {
90            if !e.to_string().contains("duplicate column name") {
91                return Err(e.into());
92            }
93        }
94        Ok(())
95    }
96
97    /// Insert the Bitcoin Core project row if not already present.
98    pub fn seed_projects(&self) -> Result<()> {
99        self.conn.execute(
100            "INSERT OR IGNORE INTO projects (name, repository_url) VALUES (?1, ?2)",
101            params!["Bitcoin Core", "https://github.com/bitcoin/bitcoin"],
102        )?;
103        Ok(())
104    }
105
106    /// Return the id of the Bitcoin Core project row.
107    pub fn get_bitcoin_core_project_id(&self) -> Result<i64> {
108        let id = self.conn.query_row(
109            "SELECT id FROM projects WHERE name = 'Bitcoin Core'",
110            [],
111            |row| row.get(0),
112        )?;
113        Ok(id)
114    }
115
116    /// Create a new run row and return its id.
117    pub fn create_run(
118        &self,
119        project_id: i64,
120        commit_hash: &str,
121        tool_version: &str,
122        pr_number: Option<u32>,
123        config_json: Option<&str>,
124    ) -> Result<i64> {
125        self.conn.execute(
126            "INSERT INTO runs (project_id, commit_hash, tool_version, pr_number, config_json)
127             VALUES (?1, ?2, ?3, ?4, ?5)",
128            params![project_id, commit_hash, tool_version, pr_number, config_json],
129        )?;
130        Ok(self.conn.last_insert_rowid())
131    }
132
133    /// Batch-insert mutants under `run_id` using a single transaction.
134    /// Duplicates (same run_id + patch_hash) are silently ignored.
135    pub fn insert_mutant_batch(&mut self, run_id: i64, mutants: &[MutantData]) -> Result<()> {
136        let tx = self.conn.transaction()?;
137        {
138            let mut stmt = tx.prepare(
139                "INSERT OR IGNORE INTO mutants
140                   (run_id, diff, patch_hash, status, file_path, operator)
141                 VALUES (?1, ?2, ?3, 'pending', ?4, ?5)",
142            )?;
143            for m in mutants {
144                stmt.execute(params![
145                    run_id,
146                    m.diff,
147                    m.patch_hash,
148                    m.file_path,
149                    m.operator
150                ])?;
151            }
152        }
153        tx.commit()?;
154        Ok(())
155    }
156
157    /// Return mutants belonging to `run_id`, optionally filtered by `file_path`.
158    /// When `survivors_only` is true, only mutants with status `'survived'` are returned.
159    pub fn get_mutants_for_run(
160        &self,
161        run_id: i64,
162        file_path: Option<&str>,
163        survivors_only: bool,
164    ) -> Result<Vec<MutantRow>> {
165        let map_row = |row: &rusqlite::Row<'_>| {
166            Ok(MutantRow {
167                id: row.get(0)?,
168                diff: row.get(1)?,
169                file_path: row.get(2)?,
170            })
171        };
172
173        let rows: Vec<MutantRow> = match (file_path, survivors_only) {
174            (Some(fp), false) => {
175                let mut stmt = self.conn.prepare(
176                    "SELECT id, diff, file_path FROM mutants WHERE run_id = ?1 AND file_path = ?2",
177                )?;
178                let rows = stmt.query_map(params![run_id, fp], map_row)?
179                    .collect::<rusqlite::Result<_>>()?;
180                rows
181            }
182            (Some(fp), true) => {
183                let mut stmt = self.conn.prepare(
184                    "SELECT id, diff, file_path FROM mutants \
185                     WHERE run_id = ?1 AND file_path = ?2 AND status = 'survived'",
186                )?;
187                let rows = stmt.query_map(params![run_id, fp], map_row)?
188                    .collect::<rusqlite::Result<_>>()?;
189                rows
190            }
191            (None, false) => {
192                let mut stmt = self.conn.prepare(
193                    "SELECT id, diff, file_path FROM mutants WHERE run_id = ?1",
194                )?;
195                let rows = stmt.query_map(params![run_id], map_row)?
196                    .collect::<rusqlite::Result<_>>()?;
197                rows
198            }
199            (None, true) => {
200                let mut stmt = self.conn.prepare(
201                    "SELECT id, diff, file_path FROM mutants \
202                     WHERE run_id = ?1 AND status = 'survived'",
203                )?;
204                let rows = stmt.query_map(params![run_id], map_row)?
205                    .collect::<rusqlite::Result<_>>()?;
206                rows
207            }
208        };
209
210        Ok(rows)
211    }
212
213    /// Update the status and command_to_test for a single mutant.
214    pub fn update_mutant_status(&self, id: i64, status: &str, command: &str) -> Result<()> {
215        self.conn.execute(
216            "UPDATE mutants SET status = ?1, command_to_test = ?2 WHERE id = ?3",
217            params![status, command, id],
218        )?;
219        Ok(())
220    }
221}
222
223/// Compute the SHA-256 hex digest of `diff`.
224pub fn compute_patch_hash(diff: &str) -> String {
225    let mut hasher = Sha256::new();
226    hasher.update(diff.as_bytes());
227    format!("{:x}", hasher.finalize())
228}
229
230/// Generate a proper unified diff by running `git diff --no-index` between the
231/// original file on disk and a temp file containing `mutated_content`.
232/// The resulting patch includes context lines and is suitable for `git apply`.
233pub async fn generate_diff(file_path: &str, mutated_content: &str) -> Result<String> {
234    use std::io::Write;
235    use tempfile::NamedTempFile;
236    use tokio::process::Command;
237
238    let mut tmp = NamedTempFile::new()?;
239    tmp.write_all(mutated_content.as_bytes())?;
240    tmp.flush()?;
241
242    let tmp_path = tmp.path().to_string_lossy().to_string();
243
244    // `git diff --no-index` exits with 1 when differences exist — that is expected.
245    let output = Command::new("git")
246        .args(["diff", "--no-index", "--", file_path, &tmp_path])
247        .output()
248        .await
249        .map_err(|e| MutationError::Git(format!("git diff failed to spawn: {}", e)))?;
250
251    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
252
253    if stdout.is_empty() {
254        return Err(MutationError::Git(format!(
255            "git diff produced no output for {}",
256            file_path
257        )));
258    }
259
260    // Fix the temp-file path back to the real file path in the diff headers.
261    // `git diff --no-index` shows the second argument's path in `+++ b/` and
262    // `diff --git … b/…`; replace those with `file_path`.
263    let fixed = stdout
264        .lines()
265        .map(|line| {
266            if line.starts_with("+++ ") {
267                format!("+++ b/{}", file_path)
268            } else if line.starts_with("diff --git ") {
269                format!("diff --git a/{} b/{}", file_path, file_path)
270            } else {
271                line.to_string()
272            }
273        })
274        .collect::<Vec<_>>()
275        .join("\n");
276
277    // Preserve trailing newline present in git diff output.
278    let fixed = if stdout.ends_with('\n') {
279        fixed + "\n"
280    } else {
281        fixed
282    };
283
284    Ok(fixed)
285}