Skip to main content

bcore_mutation/
mutation.rs

1use crate::ast_analysis::{filter_mutatable_lines, AridNodeDetector};
2use crate::db::{compute_patch_hash, generate_diff, Database, MutantData};
3use crate::error::{MutationError, Result};
4use crate::git_changes::{get_changed_files, get_commit_hash, get_lines_touched};
5use crate::operators::{self, OperatorSet};
6use crate::project::Project;
7use regex::Regex;
8use std::collections::HashMap;
9use std::fs;
10use std::path::{Path, PathBuf};
11
12#[derive(Debug)]
13pub struct FileToMutate {
14    pub file_path: String,
15    pub lines_touched: Vec<usize>,
16    pub is_unit_test: bool,
17}
18
19/// Chunk size for DB batch inserts.
20const DB_BATCH_SIZE: usize = 100;
21
22/// Serialize execution config options into a JSON string for the runs table.
23/// Returns `None` when there is nothing worth recording.
24fn build_config_json(range_lines: Option<(usize, usize)>) -> Option<String> {
25    range_lines.map(|(start, end)| format!("{{\"range\":[{},{}]}}", start, end))
26}
27
28pub async fn run_mutation(
29    project: Project,
30    pr_number: Option<u32>,
31    file: Option<PathBuf>,
32    one_mutant: bool,
33    only_security_mutations: bool,
34    range_lines: Option<(usize, usize)>,
35    coverage: Option<HashMap<String, Vec<usize>>>,
36    test_only: bool,
37    skip_lines: HashMap<String, Vec<usize>>,
38    enable_ast_filtering: bool,
39    custom_expert_rule: Option<String>,
40    sqlite_path: Option<PathBuf>,
41) -> Result<()> {
42    // Set up database if requested.
43    let mut db_and_run: Option<(Database, i64)> = None;
44    if let Some(ref path) = sqlite_path {
45        let db = Database::open(path)?;
46        db.ensure_schema()?;
47        db.seed_projects()?;
48        let project_id = db.get_project_id(project.db_name())?;
49        let commit_hash = get_commit_hash()
50            .await
51            .unwrap_or_else(|_| "unknown".to_string());
52        let tool_version = env!("CARGO_PKG_VERSION");
53        let config_json = build_config_json(range_lines);
54        let run_id = db.create_run(
55            project_id,
56            &commit_hash,
57            tool_version,
58            pr_number,
59            config_json.as_deref(),
60        )?;
61        println!("SQLite: created run id={} in {}", run_id, path.display());
62        db_and_run = Some((db, run_id));
63    }
64
65    let operator_set = operators::for_project(project);
66
67    let mut all_mutants: Vec<MutantData> = Vec::new();
68
69    if let Some(file_path) = file {
70        let file_str = file_path.to_string_lossy().to_string();
71        let is_unit_test = file_str.contains("test") && !file_str.contains(".py");
72
73        let mutants = mutate_file(
74            &file_str,
75            None,
76            None,
77            one_mutant,
78            only_security_mutations,
79            range_lines,
80            &coverage,
81            is_unit_test,
82            &skip_lines,
83            enable_ast_filtering,
84            custom_expert_rule,
85            operator_set.as_ref(),
86        )
87        .await?;
88        all_mutants.extend(mutants);
89    } else {
90        let files_changed = get_changed_files(pr_number, project).await?;
91        let mut files_to_mutate = Vec::new();
92
93        for file_changed in files_changed {
94            // Skip non-source files (docs, tooling, benchmarks, ...).
95            // The exact set is project-specific; see `Project::should_skip_file`.
96            if project.should_skip_file(&file_changed) {
97                continue;
98            }
99
100            let lines_touched = get_lines_touched(&file_changed, project).await?;
101            let is_unit_test = file_changed.contains("test")
102                && !file_changed.contains(".py")
103                && !file_changed.contains("util");
104
105            if test_only && !(is_unit_test || file_changed.contains(".py")) {
106                continue;
107            }
108
109            files_to_mutate.push(FileToMutate {
110                file_path: file_changed,
111                lines_touched,
112                is_unit_test,
113            });
114        }
115
116        for file_info in files_to_mutate {
117            let mutants = mutate_file(
118                &file_info.file_path,
119                Some(file_info.lines_touched),
120                pr_number,
121                one_mutant,
122                only_security_mutations,
123                range_lines,
124                &coverage,
125                file_info.is_unit_test,
126                &skip_lines,
127                enable_ast_filtering,
128                custom_expert_rule.clone(),
129                operator_set.as_ref(),
130            )
131            .await?;
132            all_mutants.extend(mutants);
133        }
134    }
135
136    // Persist mutants to the database in chunks.
137    if let Some((ref mut db, run_id)) = db_and_run {
138        let total = all_mutants.len();
139        let mut inserted = 0usize;
140        for chunk in all_mutants.chunks(DB_BATCH_SIZE) {
141            db.insert_mutant_batch(run_id, chunk)?;
142            inserted += chunk.len();
143        }
144        println!(
145            "SQLite: inserted {}/{} mutants for run_id={}",
146            inserted, total, run_id
147        );
148    }
149
150    Ok(())
151}
152
153pub async fn mutate_file(
154    file_to_mutate: &str,
155    touched_lines: Option<Vec<usize>>,
156    pr_number: Option<u32>,
157    one_mutant: bool,
158    only_security_mutations: bool,
159    range_lines: Option<(usize, usize)>,
160    coverage: &Option<HashMap<String, Vec<usize>>>,
161    is_unit_test: bool,
162    skip_lines: &HashMap<String, Vec<usize>>,
163    enable_ast_filtering: bool,
164    custom_expert_rule: Option<String>,
165    operator_set: &dyn OperatorSet,
166) -> Result<Vec<MutantData>> {
167    println!("\n\nGenerating mutants for {}...", file_to_mutate);
168
169    let source_code = fs::read_to_string(file_to_mutate)?;
170    let lines: Vec<&str> = source_code.lines().collect();
171    println!("File has {} lines", lines.len());
172
173    // Initialize AST-based arid node detection for C++ files
174    let mut arid_detector = if enable_ast_filtering
175        && (file_to_mutate.ends_with(".cpp") || file_to_mutate.ends_with(".h"))
176    {
177        let mut detector = AridNodeDetector::new()?;
178
179        // Add custom expert rule if provided
180        if let Some(rule) = custom_expert_rule {
181            detector.add_expert_rule(&rule, "Custom user rule")?;
182        }
183
184        Some(detector)
185    } else {
186        if !enable_ast_filtering {
187            println!("AST filtering disabled - generating all possible mutants");
188        }
189        None
190    };
191
192    // Filter out arid lines using AST analysis (for C++ files)
193    let ast_filtered_lines = if let Some(ref mut detector) = arid_detector {
194        let string_lines: Vec<String> = lines.iter().map(|s| s.to_string()).collect();
195        let mutatable_line_numbers = filter_mutatable_lines(&string_lines, detector);
196        println!(
197            "AST analysis filtered to {} mutatable lines (from {})",
198            mutatable_line_numbers.len(),
199            lines.len()
200        );
201
202        // Show some examples of filtered out lines
203        let filtered_out_count = lines.len() - mutatable_line_numbers.len();
204        if filtered_out_count > 0 {
205            println!(
206                "Filtered out {} arid lines (logging, reserve calls, etc.)",
207                filtered_out_count
208            );
209        }
210
211        Some(mutatable_line_numbers)
212    } else {
213        None
214    };
215
216    // Select operators based on file type and options
217    let operators = if only_security_mutations {
218        println!("Using security operators");
219        operator_set.security_operators()?
220    } else if file_to_mutate.contains(".py") || is_unit_test {
221        println!("Using test operators (Python or unit test file)");
222        operator_set.test_operators()?
223    } else {
224        println!("Using regex operators");
225        operator_set.regex_operators()?
226    };
227
228    println!("Loaded {} operators", operators.len());
229
230    let skip_lines_for_file = skip_lines.get(file_to_mutate);
231    let mut touched_lines = touched_lines.unwrap_or_else(|| (1..=lines.len()).collect());
232
233    // Apply AST filtering if available
234    if let Some(ast_lines) = ast_filtered_lines {
235        // Intersect touched_lines with AST-filtered lines
236        touched_lines.retain(|line_num| ast_lines.contains(line_num));
237        println!(
238            "After AST filtering: {} lines to process",
239            touched_lines.len()
240        );
241    }
242
243    // Get coverage data for this file
244    let lines_with_test_coverage = if let Some(cov) = coverage {
245        cov.iter()
246            .find(|(path, _)| file_to_mutate.contains(path.as_str()))
247            .map(|(_, lines)| lines.clone())
248            .unwrap_or_default()
249    } else {
250        Vec::new()
251    };
252
253    if !lines_with_test_coverage.is_empty() {
254        println!(
255            "Using coverage data with {} covered lines",
256            lines_with_test_coverage.len()
257        );
258    }
259
260    let mut mutant_count = 0;
261    let mut collected: Vec<MutantData> = Vec::new();
262
263    if one_mutant {
264        println!("One mutant mode enabled");
265    }
266
267    for line_num in touched_lines {
268        let line_idx = line_num.saturating_sub(1);
269
270        // Check coverage if provided
271        if !lines_with_test_coverage.is_empty() && !lines_with_test_coverage.contains(&line_num) {
272            continue;
273        }
274
275        // Check range if provided
276        if let Some((start, end)) = range_lines {
277            if line_idx < start || line_idx > end {
278                continue;
279            }
280        }
281
282        // Check skip lines (skip_lines uses 1-indexed line numbers)
283        if let Some(skip) = skip_lines_for_file {
284            if skip.contains(&line_num) {
285                continue;
286            }
287        }
288
289        if line_idx >= lines.len() {
290            continue;
291        }
292
293        let line_before_mutation = lines[line_idx];
294
295        // Check if line should be skipped (traditional approach)
296        if should_skip_line(
297            line_before_mutation,
298            file_to_mutate,
299            is_unit_test,
300            operator_set,
301        )? {
302            continue;
303        }
304
305        let mut line_had_match = false;
306
307        for operator in &operators {
308            // Special handling for test operators
309            if file_to_mutate.contains(".py") || is_unit_test {
310                if !operator_set.should_mutate_test_line(line_before_mutation) {
311                    continue;
312                }
313            }
314
315            if operator.pattern.is_match(line_before_mutation) {
316                line_had_match = true;
317                let line_mutated = operator
318                    .pattern
319                    .replace(line_before_mutation, &operator.replacement);
320
321                // Create mutated file content
322                let mut mutated_lines = lines.clone();
323                mutated_lines[line_idx] = &line_mutated;
324                let mut mutated_content = mutated_lines.join("\n");
325                if source_code.ends_with('\n') {
326                    mutated_content.push('\n');
327                }
328
329                mutant_count = write_mutation(
330                    file_to_mutate,
331                    &mutated_content,
332                    mutant_count,
333                    pr_number,
334                    range_lines,
335                )?;
336
337                // Collect mutant metadata for DB persistence.
338                let diff = match generate_diff(file_to_mutate, &mutated_content).await {
339                    Ok(d) => d,
340                    Err(e) => {
341                        eprintln!(
342                            "  Warning: could not generate diff for mutant at line {}: {}",
343                            line_num, e
344                        );
345                        continue;
346                    }
347                };
348                let patch_hash = compute_patch_hash(&diff);
349                let operator_label =
350                    format!("{} ==> {}", operator.pattern.as_str(), operator.replacement);
351                collected.push(MutantData {
352                    diff,
353                    patch_hash,
354                    file_path: file_to_mutate.to_string(),
355                    operator: operator_label,
356                });
357
358                if one_mutant {
359                    break; // Break only from operator loop, continue to next line
360                }
361            }
362        }
363
364        // Debug output for lines that didn't match any patterns
365        if !line_had_match && !line_before_mutation.trim().is_empty() {
366            println!(
367                "Line {} '{}' didn't match any patterns",
368                line_num,
369                line_before_mutation.trim()
370            );
371        }
372
373        // Note: Removed the early break that was stopping line processing
374        // Now each line gets processed independently
375    }
376
377    // Print AST analysis statistics
378    if let Some(detector) = arid_detector {
379        let stats = detector.get_stats();
380        println!("AST Analysis Stats: {:?}", stats);
381    }
382
383    println!("Generated {} mutants...", mutant_count);
384    Ok(collected)
385}
386
387fn should_skip_line(
388    line: &str,
389    file_path: &str,
390    is_unit_test: bool,
391    operator_set: &dyn OperatorSet,
392) -> Result<bool> {
393    let trimmed = line.trim_start();
394
395    // Check basic patterns to skip
396    for pattern in operator_set.do_not_mutate_patterns() {
397        if trimmed.starts_with(pattern) {
398            return Ok(true);
399        }
400    }
401
402    // Check skip if contain patterns
403    for pattern in operator_set.skip_if_contain_patterns() {
404        if line.contains(pattern) {
405            return Ok(true);
406        }
407    }
408
409    // Language-specific checks
410    if file_path.contains(".py") || is_unit_test {
411        let patterns = if is_unit_test {
412            operator_set.do_not_mutate_unit_patterns()
413        } else {
414            operator_set.do_not_mutate_py_patterns()
415        };
416
417        for pattern in patterns {
418            if line.contains(pattern) {
419                return Ok(true);
420            }
421        }
422
423        // Check for assignment patterns
424        let assignment_regex = if is_unit_test {
425            Regex::new(
426                r"\b(?:[a-zA-Z_][a-zA-Z0-9_:<>*&\s]+)\s+[a-zA-Z_][a-zA-Z0-9_]*(?:\[[^\]]*\])?(?:\.(?:[a-zA-Z_][a-zA-Z0-9_]*)|\->(?:[a-zA-Z_][a-zA-Z0-9_]*))*(?:\s*=\s*[^;]+|\s*\{[^;]+\})\s*",
427            )?
428        } else {
429            Regex::new(r"^\s*([a-zA-Z_]\w*)\s*=\s*(.+)$")?
430        };
431
432        if assignment_regex.is_match(line) {
433            return Ok(true);
434        }
435    }
436
437    Ok(false)
438}
439
440fn get_folder_path(file_to_mutate: &str) -> String {
441    let path = Path::new(file_to_mutate);
442
443    // Get the parent directory
444    if let Some(parent) = path.parent() {
445        let parent_str = parent.to_str().unwrap_or("");
446
447        // Remove "src/" prefix if it exists
448        let without_src = parent_str
449            .strip_prefix("src/")
450            .or_else(|| parent_str.strip_prefix("src"))
451            .unwrap_or(parent_str);
452
453        // If we're left with something after removing src, return it
454        // Otherwise return empty string
455        if without_src.is_empty() || without_src == "src" {
456            String::new()
457        } else {
458            without_src.to_string()
459        }
460    } else {
461        String::new()
462    }
463}
464
465fn write_mutation(
466    file_to_mutate: &str,
467    mutated_content: &str,
468    mutant_index: usize,
469    pr_number: Option<u32>,
470    range_lines: Option<(usize, usize)>,
471) -> Result<usize> {
472    let file_extension = if file_to_mutate.ends_with(".h") {
473        ".h"
474    } else if file_to_mutate.ends_with(".py") {
475        ".py"
476    } else {
477        ".cpp"
478    };
479
480    let folders = get_folder_path(file_to_mutate);
481
482    let base_file_name = Path::new(file_to_mutate)
483        .file_stem()
484        .and_then(|s| s.to_str())
485        .ok_or_else(|| MutationError::InvalidInput("Invalid file path".to_string()))?;
486
487    // Combine folders with base filename
488    let file_name = if folders.is_empty() {
489        base_file_name.to_string()
490    } else {
491        format!("{}/{}", folders, base_file_name)
492    };
493
494    let ext = file_extension.trim_start_matches('.');
495    let folder = if let Some(pr) = pr_number {
496        format!("muts-pr-{}-{}-{}", pr, file_name.replace('/', "-"), ext)
497    } else if let Some(range) = range_lines {
498        format!(
499            "muts-pr-{}-{}-{}",
500            file_name.replace('/', "-"),
501            range.0,
502            range.1
503        )
504    } else {
505        format!("muts-{}-{}", file_name.replace('/', "-"), ext)
506    };
507
508    create_mutation_folder(&folder, file_to_mutate)?;
509
510    let mutator_file = format!(
511        "{}/{}.mutant.{}{}",
512        folder, base_file_name, mutant_index, file_extension
513    );
514    fs::write(mutator_file, mutated_content)?;
515
516    Ok(mutant_index + 1)
517}
518
519fn create_mutation_folder(folder_name: &str, file_to_mutate: &str) -> Result<()> {
520    let folder_path = Path::new(folder_name);
521
522    if !folder_path.exists() {
523        fs::create_dir_all(folder_path)?;
524
525        let original_file_path = folder_path.join("original_file.txt");
526        fs::write(original_file_path, file_to_mutate)?;
527    }
528
529    Ok(())
530}
531
532#[cfg(test)]
533mod tests {
534    use super::*;
535    use tempfile::tempdir;
536
537    #[test]
538    fn test_should_skip_line() {
539        let ops = operators::for_project(Project::BitcoinCore);
540        let ops = ops.as_ref();
541
542        // Test basic skip patterns
543        assert!(should_skip_line("// This is a comment", "test.cpp", false, ops).unwrap());
544        assert!(should_skip_line("assert(condition);", "test.cpp", false, ops).unwrap());
545        assert!(should_skip_line("LogPrintf(\"test\");", "test.cpp", false, ops).unwrap());
546        assert!(should_skip_line("LogDebug(\"test\");", "test.cpp", false, ops).unwrap());
547
548        // Test normal lines that shouldn't be skipped
549        assert!(!should_skip_line("int x = 5;", "test.cpp", false, ops).unwrap());
550        assert!(!should_skip_line("return value;", "test.cpp", false, ops).unwrap());
551    }
552
553    #[test]
554    fn test_create_mutation_folder() {
555        let temp_dir = tempdir().unwrap();
556        let folder_path = temp_dir.path().join("test_muts");
557        let folder_name = folder_path.to_str().unwrap();
558
559        create_mutation_folder(folder_name, "test/file.cpp").unwrap();
560
561        assert!(folder_path.exists());
562        assert!(folder_path.join("original_file.txt").exists());
563
564        let content = fs::read_to_string(folder_path.join("original_file.txt")).unwrap();
565        assert_eq!(content, "test/file.cpp");
566    }
567
568    #[test]
569    fn test_write_mutation() {
570        let temp_dir = tempdir().unwrap();
571        std::env::set_current_dir(&temp_dir).unwrap();
572
573        let result = write_mutation("test.cpp", "mutated content", 0, None, None).unwrap();
574        assert_eq!(result, 1);
575
576        let folder_path = Path::new("muts-test-cpp");
577        assert!(folder_path.exists());
578        assert!(folder_path.join("test.mutant.0.cpp").exists());
579
580        let content = fs::read_to_string(folder_path.join("test.mutant.0.cpp")).unwrap();
581        assert_eq!(content, "mutated content");
582    }
583}