Skip to main content

bcore_mutation/
db.rs

1use crate::error::{MutationError, Result};
2use rusqlite::{params, Connection};
3use sha2::{Digest, Sha256};
4use std::path::Path;
5
6const SCHEMA: &str = "
7PRAGMA foreign_keys = ON;
8
9CREATE TABLE IF NOT EXISTS projects (
10  id              INTEGER PRIMARY KEY,
11  name            TEXT NOT NULL,
12  repository_url  TEXT,
13  UNIQUE(name),
14  UNIQUE(repository_url)
15);
16
17CREATE TABLE IF NOT EXISTS runs (
18  id              INTEGER PRIMARY KEY,
19  project_id      INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
20  commit_hash     TEXT NOT NULL,
21  pr_number       INTEGER,
22  created_at      TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
23  tool_version    TEXT,
24  config_json     TEXT
25);
26
27CREATE INDEX IF NOT EXISTS idx_runs_project_created ON runs(project_id, created_at DESC);
28CREATE INDEX IF NOT EXISTS idx_runs_commit ON runs(commit_hash);
29
30CREATE TABLE IF NOT EXISTS mutants (
31  id              INTEGER PRIMARY KEY,
32  run_id          INTEGER NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
33  diff            TEXT NOT NULL,
34  patch_hash      TEXT NOT NULL,
35  status          TEXT NOT NULL DEFAULT 'pending'
36                    CHECK (status IN ('pending','running','killed','survived',
37                                      'timeout','error','skipped','equivalent','unproductive')),
38  killed          INTEGER GENERATED ALWAYS AS (CASE WHEN status='killed' THEN 1 ELSE 0 END) VIRTUAL,
39  command_to_test TEXT,
40  file_path       TEXT,
41  operator        TEXT,
42  UNIQUE(run_id, patch_hash)
43);
44
45CREATE INDEX IF NOT EXISTS idx_mutants_run_status ON mutants(run_id, status);
46CREATE INDEX IF NOT EXISTS idx_mutants_file ON mutants(file_path);
47CREATE INDEX IF NOT EXISTS idx_mutants_operator ON mutants(operator);
48CREATE INDEX IF NOT EXISTS idx_mutants_killed ON mutants(killed);
49";
50
51/// Data collected during mutation for a single generated mutant.
52pub struct MutantData {
53    pub diff: String,
54    pub patch_hash: String,
55    pub file_path: String,
56    pub operator: String,
57}
58
59/// A mutant row read back from the database.
60pub struct MutantRow {
61    pub id: i64,
62    pub diff: String,
63    pub file_path: Option<String>,
64}
65
66pub struct Database {
67    conn: Connection,
68}
69
70impl Database {
71    /// Open (or create) the database at `path` and enable foreign keys.
72    pub fn open(path: &Path) -> Result<Self> {
73        let conn = Connection::open(path)?;
74        conn.execute_batch("PRAGMA foreign_keys = ON;")?;
75        Ok(Database { conn })
76    }
77
78    /// Create tables and indexes if they do not yet exist, and apply any
79    /// additive migrations needed for older databases.
80    pub fn ensure_schema(&self) -> Result<()> {
81        self.conn.execute_batch(SCHEMA)?;
82        // Migration: add config_json to runs if the column is missing.
83        // ALTER TABLE ADD COLUMN fails with "duplicate column name" when the
84        // column already exists; silence that specific error so the function
85        // is idempotent on databases created before this column was added.
86        if let Err(e) = self
87            .conn
88            .execute_batch("ALTER TABLE runs ADD COLUMN config_json TEXT;")
89        {
90            if !e.to_string().contains("duplicate column name") {
91                return Err(e.into());
92            }
93        }
94        Ok(())
95    }
96
97    /// Insert the known project rows if not already present.
98    pub fn seed_projects(&self) -> Result<()> {
99        for (name, url) in [
100            ("Bitcoin Core", "https://github.com/bitcoin/bitcoin"),
101            ("secp256k1", "https://github.com/bitcoin-core/secp256k1"),
102        ] {
103            self.conn.execute(
104                "INSERT OR IGNORE INTO projects (name, repository_url) VALUES (?1, ?2)",
105                params![name, url],
106            )?;
107        }
108        Ok(())
109    }
110
111    /// Return the id of the project row with the given name.
112    pub fn get_project_id(&self, name: &str) -> Result<i64> {
113        let id = self.conn.query_row(
114            "SELECT id FROM projects WHERE name = ?1",
115            params![name],
116            |row| row.get(0),
117        )?;
118        Ok(id)
119    }
120
121    /// Create a new run row and return its id.
122    pub fn create_run(
123        &self,
124        project_id: i64,
125        commit_hash: &str,
126        tool_version: &str,
127        pr_number: Option<u32>,
128        config_json: Option<&str>,
129    ) -> Result<i64> {
130        self.conn.execute(
131            "INSERT INTO runs (project_id, commit_hash, tool_version, pr_number, config_json)
132             VALUES (?1, ?2, ?3, ?4, ?5)",
133            params![
134                project_id,
135                commit_hash,
136                tool_version,
137                pr_number,
138                config_json
139            ],
140        )?;
141        Ok(self.conn.last_insert_rowid())
142    }
143
144    /// Batch-insert mutants under `run_id` using a single transaction.
145    /// Duplicates (same run_id + patch_hash) are silently ignored.
146    pub fn insert_mutant_batch(&mut self, run_id: i64, mutants: &[MutantData]) -> Result<()> {
147        let tx = self.conn.transaction()?;
148        {
149            let mut stmt = tx.prepare(
150                "INSERT OR IGNORE INTO mutants
151                   (run_id, diff, patch_hash, status, file_path, operator)
152                 VALUES (?1, ?2, ?3, 'pending', ?4, ?5)",
153            )?;
154            for m in mutants {
155                stmt.execute(params![
156                    run_id,
157                    m.diff,
158                    m.patch_hash,
159                    m.file_path,
160                    m.operator
161                ])?;
162            }
163        }
164        tx.commit()?;
165        Ok(())
166    }
167
168    /// Return mutants belonging to `run_id`, optionally filtered by `file_path`.
169    /// When `survivors_only` is true, only mutants with status `'survived'` are returned.
170    pub fn get_mutants_for_run(
171        &self,
172        run_id: i64,
173        file_path: Option<&str>,
174        survivors_only: bool,
175    ) -> Result<Vec<MutantRow>> {
176        let map_row = |row: &rusqlite::Row<'_>| {
177            Ok(MutantRow {
178                id: row.get(0)?,
179                diff: row.get(1)?,
180                file_path: row.get(2)?,
181            })
182        };
183
184        let rows: Vec<MutantRow> = match (file_path, survivors_only) {
185            (Some(fp), false) => {
186                let mut stmt = self.conn.prepare(
187                    "SELECT id, diff, file_path FROM mutants WHERE run_id = ?1 AND file_path = ?2",
188                )?;
189                let rows = stmt
190                    .query_map(params![run_id, fp], map_row)?
191                    .collect::<rusqlite::Result<_>>()?;
192                rows
193            }
194            (Some(fp), true) => {
195                let mut stmt = self.conn.prepare(
196                    "SELECT id, diff, file_path FROM mutants \
197                     WHERE run_id = ?1 AND file_path = ?2 AND status = 'survived'",
198                )?;
199                let rows = stmt
200                    .query_map(params![run_id, fp], map_row)?
201                    .collect::<rusqlite::Result<_>>()?;
202                rows
203            }
204            (None, false) => {
205                let mut stmt = self
206                    .conn
207                    .prepare("SELECT id, diff, file_path FROM mutants WHERE run_id = ?1")?;
208                let rows = stmt
209                    .query_map(params![run_id], map_row)?
210                    .collect::<rusqlite::Result<_>>()?;
211                rows
212            }
213            (None, true) => {
214                let mut stmt = self.conn.prepare(
215                    "SELECT id, diff, file_path FROM mutants \
216                     WHERE run_id = ?1 AND status = 'survived'",
217                )?;
218                let rows = stmt
219                    .query_map(params![run_id], map_row)?
220                    .collect::<rusqlite::Result<_>>()?;
221                rows
222            }
223        };
224
225        Ok(rows)
226    }
227
228    /// Update the status and command_to_test for a single mutant.
229    pub fn update_mutant_status(&self, id: i64, status: &str, command: &str) -> Result<()> {
230        self.conn.execute(
231            "UPDATE mutants SET status = ?1, command_to_test = ?2 WHERE id = ?3",
232            params![status, command, id],
233        )?;
234        Ok(())
235    }
236}
237
238/// Compute the SHA-256 hex digest of `diff`.
239pub fn compute_patch_hash(diff: &str) -> String {
240    let mut hasher = Sha256::new();
241    hasher.update(diff.as_bytes());
242    format!("{:x}", hasher.finalize())
243}
244
245/// Generate a proper unified diff by running `git diff --no-index` between the
246/// original file on disk and a temp file containing `mutated_content`.
247/// The resulting patch includes context lines and is suitable for `git apply`.
248pub async fn generate_diff(file_path: &str, mutated_content: &str) -> Result<String> {
249    use std::io::Write;
250    use tempfile::NamedTempFile;
251    use tokio::process::Command;
252
253    let mut tmp = NamedTempFile::new()?;
254    tmp.write_all(mutated_content.as_bytes())?;
255    tmp.flush()?;
256
257    let tmp_path = tmp.path().to_string_lossy().to_string();
258
259    // `git diff --no-index` exits with 1 when differences exist — that is expected.
260    let output = Command::new("git")
261        .args(["diff", "--no-index", "--", file_path, &tmp_path])
262        .output()
263        .await
264        .map_err(|e| MutationError::Git(format!("git diff failed to spawn: {}", e)))?;
265
266    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
267
268    if stdout.is_empty() {
269        return Err(MutationError::Git(format!(
270            "git diff produced no output for {}",
271            file_path
272        )));
273    }
274
275    // Fix the temp-file path back to the real file path in the diff headers.
276    // `git diff --no-index` shows the second argument's path in `+++ b/` and
277    // `diff --git … b/…`; replace those with `file_path`.
278    let fixed = stdout
279        .lines()
280        .map(|line| {
281            if line.starts_with("+++ ") {
282                format!("+++ b/{}", file_path)
283            } else if line.starts_with("diff --git ") {
284                format!("diff --git a/{} b/{}", file_path, file_path)
285            } else {
286                line.to_string()
287            }
288        })
289        .collect::<Vec<_>>()
290        .join("\n");
291
292    // Preserve trailing newline present in git diff output.
293    let fixed = if stdout.ends_with('\n') {
294        fixed + "\n"
295    } else {
296        fixed
297    };
298
299    Ok(fixed)
300}