use crate::ast_analysis::{filter_mutatable_lines, AridNodeDetector};
use crate::db::{compute_patch_hash, generate_diff, Database, MutantData};
use crate::error::{MutationError, Result};
use crate::git_changes::{get_changed_files, get_commit_hash, get_lines_touched};
use crate::operators::{self, OperatorSet};
use crate::project::Project;
use regex::Regex;
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
#[derive(Debug)]
pub struct FileToMutate {
pub file_path: String,
pub lines_touched: Vec<usize>,
pub is_unit_test: bool,
}
const DB_BATCH_SIZE: usize = 100;
fn build_config_json(range_lines: Option<(usize, usize)>) -> Option<String> {
range_lines.map(|(start, end)| format!("{{\"range\":[{},{}]}}", start, end))
}
pub async fn run_mutation(
project: Project,
pr_number: Option<u32>,
file: Option<PathBuf>,
one_mutant: bool,
only_security_mutations: bool,
range_lines: Option<(usize, usize)>,
coverage: Option<HashMap<String, Vec<usize>>>,
test_only: bool,
skip_lines: HashMap<String, Vec<usize>>,
enable_ast_filtering: bool,
custom_expert_rule: Option<String>,
sqlite_path: Option<PathBuf>,
) -> Result<()> {
let mut db_and_run: Option<(Database, i64)> = None;
if let Some(ref path) = sqlite_path {
let db = Database::open(path)?;
db.ensure_schema()?;
db.seed_projects()?;
let project_id = db.get_project_id(project.db_name())?;
let commit_hash = get_commit_hash()
.await
.unwrap_or_else(|_| "unknown".to_string());
let tool_version = env!("CARGO_PKG_VERSION");
let config_json = build_config_json(range_lines);
let run_id = db.create_run(
project_id,
&commit_hash,
tool_version,
pr_number,
config_json.as_deref(),
)?;
println!("SQLite: created run id={} in {}", run_id, path.display());
db_and_run = Some((db, run_id));
}
let operator_set = operators::for_project(project);
let mut all_mutants: Vec<MutantData> = Vec::new();
if let Some(file_path) = file {
let file_str = file_path.to_string_lossy().to_string();
let is_unit_test = file_str.contains("test") && !file_str.contains(".py");
let mutants = mutate_file(
&file_str,
None,
None,
one_mutant,
only_security_mutations,
range_lines,
&coverage,
is_unit_test,
&skip_lines,
enable_ast_filtering,
custom_expert_rule,
operator_set.as_ref(),
)
.await?;
all_mutants.extend(mutants);
} else {
let files_changed = get_changed_files(pr_number, project).await?;
let mut files_to_mutate = Vec::new();
for file_changed in files_changed {
if project.should_skip_file(&file_changed) {
continue;
}
let lines_touched = get_lines_touched(&file_changed, project).await?;
let is_unit_test = file_changed.contains("test")
&& !file_changed.contains(".py")
&& !file_changed.contains("util");
if test_only && !(is_unit_test || file_changed.contains(".py")) {
continue;
}
files_to_mutate.push(FileToMutate {
file_path: file_changed,
lines_touched,
is_unit_test,
});
}
for file_info in files_to_mutate {
let mutants = mutate_file(
&file_info.file_path,
Some(file_info.lines_touched),
pr_number,
one_mutant,
only_security_mutations,
range_lines,
&coverage,
file_info.is_unit_test,
&skip_lines,
enable_ast_filtering,
custom_expert_rule.clone(),
operator_set.as_ref(),
)
.await?;
all_mutants.extend(mutants);
}
}
if let Some((ref mut db, run_id)) = db_and_run {
let total = all_mutants.len();
let mut inserted = 0usize;
for chunk in all_mutants.chunks(DB_BATCH_SIZE) {
db.insert_mutant_batch(run_id, chunk)?;
inserted += chunk.len();
}
println!(
"SQLite: inserted {}/{} mutants for run_id={}",
inserted, total, run_id
);
}
Ok(())
}
pub async fn mutate_file(
file_to_mutate: &str,
touched_lines: Option<Vec<usize>>,
pr_number: Option<u32>,
one_mutant: bool,
only_security_mutations: bool,
range_lines: Option<(usize, usize)>,
coverage: &Option<HashMap<String, Vec<usize>>>,
is_unit_test: bool,
skip_lines: &HashMap<String, Vec<usize>>,
enable_ast_filtering: bool,
custom_expert_rule: Option<String>,
operator_set: &dyn OperatorSet,
) -> Result<Vec<MutantData>> {
println!("\n\nGenerating mutants for {}...", file_to_mutate);
let source_code = fs::read_to_string(file_to_mutate)?;
let lines: Vec<&str> = source_code.lines().collect();
println!("File has {} lines", lines.len());
let mut arid_detector = if enable_ast_filtering
&& (file_to_mutate.ends_with(".cpp") || file_to_mutate.ends_with(".h"))
{
let mut detector = AridNodeDetector::new()?;
if let Some(rule) = custom_expert_rule {
detector.add_expert_rule(&rule, "Custom user rule")?;
}
Some(detector)
} else {
if !enable_ast_filtering {
println!("AST filtering disabled - generating all possible mutants");
}
None
};
let ast_filtered_lines = if let Some(ref mut detector) = arid_detector {
let string_lines: Vec<String> = lines.iter().map(|s| s.to_string()).collect();
let mutatable_line_numbers = filter_mutatable_lines(&string_lines, detector);
println!(
"AST analysis filtered to {} mutatable lines (from {})",
mutatable_line_numbers.len(),
lines.len()
);
let filtered_out_count = lines.len() - mutatable_line_numbers.len();
if filtered_out_count > 0 {
println!(
"Filtered out {} arid lines (logging, reserve calls, etc.)",
filtered_out_count
);
}
Some(mutatable_line_numbers)
} else {
None
};
let operators = if only_security_mutations {
println!("Using security operators");
operator_set.security_operators()?
} else if file_to_mutate.contains(".py") || is_unit_test {
println!("Using test operators (Python or unit test file)");
operator_set.test_operators()?
} else {
println!("Using regex operators");
operator_set.regex_operators()?
};
println!("Loaded {} operators", operators.len());
let skip_lines_for_file = skip_lines.get(file_to_mutate);
let mut touched_lines = touched_lines.unwrap_or_else(|| (1..=lines.len()).collect());
if let Some(ast_lines) = ast_filtered_lines {
touched_lines.retain(|line_num| ast_lines.contains(line_num));
println!(
"After AST filtering: {} lines to process",
touched_lines.len()
);
}
let lines_with_test_coverage = if let Some(cov) = coverage {
cov.iter()
.find(|(path, _)| file_to_mutate.contains(path.as_str()))
.map(|(_, lines)| lines.clone())
.unwrap_or_default()
} else {
Vec::new()
};
if !lines_with_test_coverage.is_empty() {
println!(
"Using coverage data with {} covered lines",
lines_with_test_coverage.len()
);
}
let mut mutant_count = 0;
let mut collected: Vec<MutantData> = Vec::new();
if one_mutant {
println!("One mutant mode enabled");
}
for line_num in touched_lines {
let line_idx = line_num.saturating_sub(1);
if !lines_with_test_coverage.is_empty() && !lines_with_test_coverage.contains(&line_num) {
continue;
}
if let Some((start, end)) = range_lines {
if line_idx < start || line_idx > end {
continue;
}
}
if let Some(skip) = skip_lines_for_file {
if skip.contains(&line_num) {
continue;
}
}
if line_idx >= lines.len() {
continue;
}
let line_before_mutation = lines[line_idx];
if should_skip_line(
line_before_mutation,
file_to_mutate,
is_unit_test,
operator_set,
)? {
continue;
}
let mut line_had_match = false;
for operator in &operators {
if file_to_mutate.contains(".py") || is_unit_test {
if !operator_set.should_mutate_test_line(line_before_mutation) {
continue;
}
}
if operator.pattern.is_match(line_before_mutation) {
line_had_match = true;
let line_mutated = operator
.pattern
.replace(line_before_mutation, &operator.replacement);
let mut mutated_lines = lines.clone();
mutated_lines[line_idx] = &line_mutated;
let mut mutated_content = mutated_lines.join("\n");
if source_code.ends_with('\n') {
mutated_content.push('\n');
}
mutant_count = write_mutation(
file_to_mutate,
&mutated_content,
mutant_count,
pr_number,
range_lines,
)?;
let diff = match generate_diff(file_to_mutate, &mutated_content).await {
Ok(d) => d,
Err(e) => {
eprintln!(
" Warning: could not generate diff for mutant at line {}: {}",
line_num, e
);
continue;
}
};
let patch_hash = compute_patch_hash(&diff);
let operator_label =
format!("{} ==> {}", operator.pattern.as_str(), operator.replacement);
collected.push(MutantData {
diff,
patch_hash,
file_path: file_to_mutate.to_string(),
operator: operator_label,
});
if one_mutant {
break; }
}
}
if !line_had_match && !line_before_mutation.trim().is_empty() {
println!(
"Line {} '{}' didn't match any patterns",
line_num,
line_before_mutation.trim()
);
}
}
if let Some(detector) = arid_detector {
let stats = detector.get_stats();
println!("AST Analysis Stats: {:?}", stats);
}
println!("Generated {} mutants...", mutant_count);
Ok(collected)
}
fn should_skip_line(
line: &str,
file_path: &str,
is_unit_test: bool,
operator_set: &dyn OperatorSet,
) -> Result<bool> {
let trimmed = line.trim_start();
for pattern in operator_set.do_not_mutate_patterns() {
if trimmed.starts_with(pattern) {
return Ok(true);
}
}
for pattern in operator_set.skip_if_contain_patterns() {
if line.contains(pattern) {
return Ok(true);
}
}
if file_path.contains(".py") || is_unit_test {
let patterns = if is_unit_test {
operator_set.do_not_mutate_unit_patterns()
} else {
operator_set.do_not_mutate_py_patterns()
};
for pattern in patterns {
if line.contains(pattern) {
return Ok(true);
}
}
let assignment_regex = if is_unit_test {
Regex::new(
r"\b(?:[a-zA-Z_][a-zA-Z0-9_:<>*&\s]+)\s+[a-zA-Z_][a-zA-Z0-9_]*(?:\[[^\]]*\])?(?:\.(?:[a-zA-Z_][a-zA-Z0-9_]*)|\->(?:[a-zA-Z_][a-zA-Z0-9_]*))*(?:\s*=\s*[^;]+|\s*\{[^;]+\})\s*",
)?
} else {
Regex::new(r"^\s*([a-zA-Z_]\w*)\s*=\s*(.+)$")?
};
if assignment_regex.is_match(line) {
return Ok(true);
}
}
Ok(false)
}
fn get_folder_path(file_to_mutate: &str) -> String {
let path = Path::new(file_to_mutate);
if let Some(parent) = path.parent() {
let parent_str = parent.to_str().unwrap_or("");
let without_src = parent_str
.strip_prefix("src/")
.or_else(|| parent_str.strip_prefix("src"))
.unwrap_or(parent_str);
if without_src.is_empty() || without_src == "src" {
String::new()
} else {
without_src.to_string()
}
} else {
String::new()
}
}
fn write_mutation(
file_to_mutate: &str,
mutated_content: &str,
mutant_index: usize,
pr_number: Option<u32>,
range_lines: Option<(usize, usize)>,
) -> Result<usize> {
let file_extension = if file_to_mutate.ends_with(".h") {
".h"
} else if file_to_mutate.ends_with(".py") {
".py"
} else {
".cpp"
};
let folders = get_folder_path(file_to_mutate);
let base_file_name = Path::new(file_to_mutate)
.file_stem()
.and_then(|s| s.to_str())
.ok_or_else(|| MutationError::InvalidInput("Invalid file path".to_string()))?;
let file_name = if folders.is_empty() {
base_file_name.to_string()
} else {
format!("{}/{}", folders, base_file_name)
};
let ext = file_extension.trim_start_matches('.');
let folder = if let Some(pr) = pr_number {
format!("muts-pr-{}-{}-{}", pr, file_name.replace('/', "-"), ext)
} else if let Some(range) = range_lines {
format!(
"muts-pr-{}-{}-{}",
file_name.replace('/', "-"),
range.0,
range.1
)
} else {
format!("muts-{}-{}", file_name.replace('/', "-"), ext)
};
create_mutation_folder(&folder, file_to_mutate)?;
let mutator_file = format!(
"{}/{}.mutant.{}{}",
folder, base_file_name, mutant_index, file_extension
);
fs::write(mutator_file, mutated_content)?;
Ok(mutant_index + 1)
}
fn create_mutation_folder(folder_name: &str, file_to_mutate: &str) -> Result<()> {
let folder_path = Path::new(folder_name);
if !folder_path.exists() {
fs::create_dir_all(folder_path)?;
let original_file_path = folder_path.join("original_file.txt");
fs::write(original_file_path, file_to_mutate)?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_should_skip_line() {
let ops = operators::for_project(Project::BitcoinCore);
let ops = ops.as_ref();
assert!(should_skip_line("// This is a comment", "test.cpp", false, ops).unwrap());
assert!(should_skip_line("assert(condition);", "test.cpp", false, ops).unwrap());
assert!(should_skip_line("LogPrintf(\"test\");", "test.cpp", false, ops).unwrap());
assert!(should_skip_line("LogDebug(\"test\");", "test.cpp", false, ops).unwrap());
assert!(!should_skip_line("int x = 5;", "test.cpp", false, ops).unwrap());
assert!(!should_skip_line("return value;", "test.cpp", false, ops).unwrap());
}
#[test]
fn test_create_mutation_folder() {
let temp_dir = tempdir().unwrap();
let folder_path = temp_dir.path().join("test_muts");
let folder_name = folder_path.to_str().unwrap();
create_mutation_folder(folder_name, "test/file.cpp").unwrap();
assert!(folder_path.exists());
assert!(folder_path.join("original_file.txt").exists());
let content = fs::read_to_string(folder_path.join("original_file.txt")).unwrap();
assert_eq!(content, "test/file.cpp");
}
#[test]
fn test_write_mutation() {
let temp_dir = tempdir().unwrap();
std::env::set_current_dir(&temp_dir).unwrap();
let result = write_mutation("test.cpp", "mutated content", 0, None, None).unwrap();
assert_eq!(result, 1);
let folder_path = Path::new("muts-test-cpp");
assert!(folder_path.exists());
assert!(folder_path.join("test.mutant.0.cpp").exists());
let content = fs::read_to_string(folder_path.join("test.mutant.0.cpp")).unwrap();
assert_eq!(content, "mutated content");
}
}