Skip to main content

bcore_mutation/
mutation.rs

1use crate::ast_analysis::{filter_mutatable_lines, AridNodeDetector};
2use crate::db::{compute_patch_hash, generate_diff, Database, MutantData};
3use crate::error::{MutationError, Result};
4use crate::git_changes::{get_changed_files, get_commit_hash, get_lines_touched};
5use crate::operators::{
6    get_do_not_mutate_patterns, get_do_not_mutate_py_patterns, get_do_not_mutate_unit_patterns,
7    get_regex_operators, get_security_operators, get_skip_if_contain_patterns, get_test_operators,
8    should_mutate_test_line,
9};
10use regex::Regex;
11use std::collections::HashMap;
12use std::fs;
13use std::path::{Path, PathBuf};
14
15#[derive(Debug)]
16pub struct FileToMutate {
17    pub file_path: String,
18    pub lines_touched: Vec<usize>,
19    pub is_unit_test: bool,
20}
21
22/// Chunk size for DB batch inserts.
23const DB_BATCH_SIZE: usize = 100;
24
25/// Serialize execution config options into a JSON string for the runs table.
26/// Returns `None` when there is nothing worth recording.
27fn build_config_json(range_lines: Option<(usize, usize)>) -> Option<String> {
28    range_lines.map(|(start, end)| format!("{{\"range\":[{},{}]}}", start, end))
29}
30
31pub async fn run_mutation(
32    pr_number: Option<u32>,
33    file: Option<PathBuf>,
34    one_mutant: bool,
35    only_security_mutations: bool,
36    range_lines: Option<(usize, usize)>,
37    coverage: Option<HashMap<String, Vec<usize>>>,
38    test_only: bool,
39    skip_lines: HashMap<String, Vec<usize>>,
40    enable_ast_filtering: bool,
41    custom_expert_rule: Option<String>,
42    sqlite_path: Option<PathBuf>,
43) -> Result<()> {
44    // Set up database if requested.
45    let mut db_and_run: Option<(Database, i64)> = None;
46    if let Some(ref path) = sqlite_path {
47        let db = Database::open(path)?;
48        db.ensure_schema()?;
49        db.seed_projects()?;
50        let project_id = db.get_bitcoin_core_project_id()?;
51        let commit_hash = get_commit_hash().await.unwrap_or_else(|_| "unknown".to_string());
52        let tool_version = env!("CARGO_PKG_VERSION");
53        let config_json = build_config_json(range_lines);
54        let run_id = db.create_run(
55            project_id,
56            &commit_hash,
57            tool_version,
58            pr_number,
59            config_json.as_deref(),
60        )?;
61        println!("SQLite: created run id={} in {}", run_id, path.display());
62        db_and_run = Some((db, run_id));
63    }
64
65    let mut all_mutants: Vec<MutantData> = Vec::new();
66
67    if let Some(file_path) = file {
68        let file_str = file_path.to_string_lossy().to_string();
69        let is_unit_test = file_str.contains("test") && !file_str.contains(".py");
70
71        let mutants = mutate_file(
72            &file_str,
73            None,
74            None,
75            one_mutant,
76            only_security_mutations,
77            range_lines,
78            &coverage,
79            is_unit_test,
80            &skip_lines,
81            enable_ast_filtering,
82            custom_expert_rule,
83        )
84        .await?;
85        all_mutants.extend(mutants);
86    } else {
87        let files_changed = get_changed_files(pr_number).await?;
88        let mut files_to_mutate = Vec::new();
89
90        for file_changed in files_changed {
91            // Skip certain file types
92            if file_changed.contains("doc")
93                || file_changed.contains("fuzz")
94                || file_changed.contains("bench")
95                || file_changed.contains("util")
96                || file_changed.contains("sanitizer_supressions")
97                || file_changed.contains("test_framework.py")
98                || file_changed.ends_with(".txt")
99            {
100                continue;
101            }
102
103            let lines_touched = get_lines_touched(&file_changed).await?;
104            let is_unit_test = file_changed.contains("test")
105                && !file_changed.contains(".py")
106                && !file_changed.contains("util");
107
108            if test_only && !(is_unit_test || file_changed.contains(".py")) {
109                continue;
110            }
111
112            files_to_mutate.push(FileToMutate {
113                file_path: file_changed,
114                lines_touched,
115                is_unit_test,
116            });
117        }
118
119        for file_info in files_to_mutate {
120            let mutants = mutate_file(
121                &file_info.file_path,
122                Some(file_info.lines_touched),
123                pr_number,
124                one_mutant,
125                only_security_mutations,
126                range_lines,
127                &coverage,
128                file_info.is_unit_test,
129                &skip_lines,
130                enable_ast_filtering,
131                custom_expert_rule.clone(),
132            )
133            .await?;
134            all_mutants.extend(mutants);
135        }
136    }
137
138    // Persist mutants to the database in chunks.
139    if let Some((ref mut db, run_id)) = db_and_run {
140        let total = all_mutants.len();
141        let mut inserted = 0usize;
142        for chunk in all_mutants.chunks(DB_BATCH_SIZE) {
143            db.insert_mutant_batch(run_id, chunk)?;
144            inserted += chunk.len();
145        }
146        println!(
147            "SQLite: inserted {}/{} mutants for run_id={}",
148            inserted, total, run_id
149        );
150    }
151
152    Ok(())
153}
154
155pub async fn mutate_file(
156    file_to_mutate: &str,
157    touched_lines: Option<Vec<usize>>,
158    pr_number: Option<u32>,
159    one_mutant: bool,
160    only_security_mutations: bool,
161    range_lines: Option<(usize, usize)>,
162    coverage: &Option<HashMap<String, Vec<usize>>>,
163    is_unit_test: bool,
164    skip_lines: &HashMap<String, Vec<usize>>,
165    enable_ast_filtering: bool,
166    custom_expert_rule: Option<String>,
167) -> Result<Vec<MutantData>> {
168    println!("\n\nGenerating mutants for {}...", file_to_mutate);
169
170    let source_code = fs::read_to_string(file_to_mutate)?;
171    let lines: Vec<&str> = source_code.lines().collect();
172    println!("File has {} lines", lines.len());
173
174    // Initialize AST-based arid node detection for C++ files
175    let mut arid_detector = if enable_ast_filtering
176        && (file_to_mutate.ends_with(".cpp") || file_to_mutate.ends_with(".h"))
177    {
178        let mut detector = AridNodeDetector::new()?;
179
180        // Add custom expert rule if provided
181        if let Some(rule) = custom_expert_rule {
182            detector.add_expert_rule(&rule, "Custom user rule")?;
183        }
184
185        Some(detector)
186    } else {
187        if !enable_ast_filtering {
188            println!("AST filtering disabled - generating all possible mutants");
189        }
190        None
191    };
192
193    // Filter out arid lines using AST analysis (for C++ files)
194    let ast_filtered_lines = if let Some(ref mut detector) = arid_detector {
195        let string_lines: Vec<String> = lines.iter().map(|s| s.to_string()).collect();
196        let mutatable_line_numbers = filter_mutatable_lines(&string_lines, detector);
197        println!(
198            "AST analysis filtered to {} mutatable lines (from {})",
199            mutatable_line_numbers.len(),
200            lines.len()
201        );
202
203        // Show some examples of filtered out lines
204        let filtered_out_count = lines.len() - mutatable_line_numbers.len();
205        if filtered_out_count > 0 {
206            println!(
207                "Filtered out {} arid lines (logging, reserve calls, etc.)",
208                filtered_out_count
209            );
210        }
211
212        Some(mutatable_line_numbers)
213    } else {
214        None
215    };
216
217    // Select operators based on file type and options
218    let operators = if only_security_mutations {
219        println!("Using security operators");
220        get_security_operators()?
221    } else if file_to_mutate.contains(".py") || is_unit_test {
222        println!("Using test operators (Python or unit test file)");
223        get_test_operators()?
224    } else {
225        println!("Using regex operators");
226        get_regex_operators()?
227    };
228
229    println!("Loaded {} operators", operators.len());
230
231    let skip_lines_for_file = skip_lines.get(file_to_mutate);
232    let mut touched_lines = touched_lines.unwrap_or_else(|| (1..=lines.len()).collect());
233
234    // Apply AST filtering if available
235    if let Some(ast_lines) = ast_filtered_lines {
236        // Intersect touched_lines with AST-filtered lines
237        touched_lines.retain(|line_num| ast_lines.contains(line_num));
238        println!(
239            "After AST filtering: {} lines to process",
240            touched_lines.len()
241        );
242    }
243
244    // Get coverage data for this file
245    let lines_with_test_coverage = if let Some(cov) = coverage {
246        cov.iter()
247            .find(|(path, _)| file_to_mutate.contains(path.as_str()))
248            .map(|(_, lines)| lines.clone())
249            .unwrap_or_default()
250    } else {
251        Vec::new()
252    };
253
254    if !lines_with_test_coverage.is_empty() {
255        println!(
256            "Using coverage data with {} covered lines",
257            lines_with_test_coverage.len()
258        );
259    }
260
261    let mut mutant_count = 0;
262    let mut collected: Vec<MutantData> = Vec::new();
263
264    if one_mutant {
265        println!("One mutant mode enabled");
266    }
267
268    for line_num in touched_lines {
269        let line_idx = line_num.saturating_sub(1);
270
271        // Check coverage if provided
272        if !lines_with_test_coverage.is_empty() && !lines_with_test_coverage.contains(&line_num) {
273            continue;
274        }
275
276        // Check range if provided
277        if let Some((start, end)) = range_lines {
278            if line_idx < start || line_idx > end {
279                continue;
280            }
281        }
282
283        // Check skip lines (skip_lines uses 1-indexed line numbers)
284        if let Some(skip) = skip_lines_for_file {
285            if skip.contains(&line_num) {
286                continue;
287            }
288        }
289
290        if line_idx >= lines.len() {
291            continue;
292        }
293
294        let line_before_mutation = lines[line_idx];
295
296        // Check if line should be skipped (traditional approach)
297        if should_skip_line(line_before_mutation, file_to_mutate, is_unit_test)? {
298            continue;
299        }
300
301        let mut line_had_match = false;
302
303        for operator in &operators {
304            // Special handling for test operators
305            if file_to_mutate.contains(".py") || is_unit_test {
306                if !should_mutate_test_line(line_before_mutation) {
307                    continue;
308                }
309            }
310
311            if operator.pattern.is_match(line_before_mutation) {
312                line_had_match = true;
313                let line_mutated = operator
314                    .pattern
315                    .replace(line_before_mutation, &operator.replacement);
316
317                // Create mutated file content
318                let mut mutated_lines = lines.clone();
319                mutated_lines[line_idx] = &line_mutated;
320                let mut mutated_content = mutated_lines.join("\n");
321                if source_code.ends_with('\n') {
322                    mutated_content.push('\n');
323                }
324
325                mutant_count = write_mutation(
326                    file_to_mutate,
327                    &mutated_content,
328                    mutant_count,
329                    pr_number,
330                    range_lines,
331                )?;
332
333                // Collect mutant metadata for DB persistence.
334                let diff = match generate_diff(file_to_mutate, &mutated_content).await {
335                    Ok(d) => d,
336                    Err(e) => {
337                        eprintln!(
338                            "  Warning: could not generate diff for mutant at line {}: {}",
339                            line_num, e
340                        );
341                        continue;
342                    }
343                };
344                let patch_hash = compute_patch_hash(&diff);
345                let operator_label =
346                    format!("{} ==> {}", operator.pattern.as_str(), operator.replacement);
347                collected.push(MutantData {
348                    diff,
349                    patch_hash,
350                    file_path: file_to_mutate.to_string(),
351                    operator: operator_label,
352                });
353
354                if one_mutant {
355                    break; // Break only from operator loop, continue to next line
356                }
357            }
358        }
359
360        // Debug output for lines that didn't match any patterns
361        if !line_had_match && !line_before_mutation.trim().is_empty() {
362            println!(
363                "Line {} '{}' didn't match any patterns",
364                line_num,
365                line_before_mutation.trim()
366            );
367        }
368
369        // Note: Removed the early break that was stopping line processing
370        // Now each line gets processed independently
371    }
372
373    // Print AST analysis statistics
374    if let Some(detector) = arid_detector {
375        let stats = detector.get_stats();
376        println!("AST Analysis Stats: {:?}", stats);
377    }
378
379    println!("Generated {} mutants...", mutant_count);
380    Ok(collected)
381}
382
383fn should_skip_line(line: &str, file_path: &str, is_unit_test: bool) -> Result<bool> {
384    let trimmed = line.trim_start();
385
386    // Check basic patterns to skip
387    for pattern in get_do_not_mutate_patterns() {
388        if trimmed.starts_with(pattern) {
389            return Ok(true);
390        }
391    }
392
393    // Check skip if contain patterns
394    for pattern in get_skip_if_contain_patterns() {
395        if line.contains(pattern) {
396            return Ok(true);
397        }
398    }
399
400    // Language-specific checks
401    if file_path.contains(".py") || is_unit_test {
402        let patterns = if is_unit_test {
403            get_do_not_mutate_unit_patterns()
404        } else {
405            get_do_not_mutate_py_patterns()
406        };
407
408        for pattern in patterns {
409            if line.contains(pattern) {
410                return Ok(true);
411            }
412        }
413
414        // Check for assignment patterns
415        let assignment_regex = if is_unit_test {
416            Regex::new(
417                r"\b(?:[a-zA-Z_][a-zA-Z0-9_:<>*&\s]+)\s+[a-zA-Z_][a-zA-Z0-9_]*(?:\[[^\]]*\])?(?:\.(?:[a-zA-Z_][a-zA-Z0-9_]*)|\->(?:[a-zA-Z_][a-zA-Z0-9_]*))*(?:\s*=\s*[^;]+|\s*\{[^;]+\})\s*",
418            )?
419        } else {
420            Regex::new(r"^\s*([a-zA-Z_]\w*)\s*=\s*(.+)$")?
421        };
422
423        if assignment_regex.is_match(line) {
424            return Ok(true);
425        }
426    }
427
428    Ok(false)
429}
430
431fn get_folder_path(file_to_mutate: &str) -> String {
432    let path = Path::new(file_to_mutate);
433
434    // Get the parent directory
435    if let Some(parent) = path.parent() {
436        let parent_str = parent.to_str().unwrap_or("");
437
438        // Remove "src/" prefix if it exists
439        let without_src = parent_str.strip_prefix("src/")
440            .or_else(|| parent_str.strip_prefix("src"))
441            .unwrap_or(parent_str);
442
443        // If we're left with something after removing src, return it
444        // Otherwise return empty string
445        if without_src.is_empty() || without_src == "src" {
446            String::new()
447        } else {
448            without_src.to_string()
449        }
450    } else {
451        String::new()
452    }
453}
454
455fn write_mutation(
456    file_to_mutate: &str,
457    mutated_content: &str,
458    mutant_index: usize,
459    pr_number: Option<u32>,
460    range_lines: Option<(usize, usize)>,
461) -> Result<usize> {
462    let file_extension = if file_to_mutate.ends_with(".h") {
463        ".h"
464    } else if file_to_mutate.ends_with(".py") {
465        ".py"
466    } else {
467        ".cpp"
468    };
469
470    let folders = get_folder_path(file_to_mutate);
471
472    let base_file_name = Path::new(file_to_mutate)
473        .file_stem()
474        .and_then(|s| s.to_str())
475        .ok_or_else(|| MutationError::InvalidInput("Invalid file path".to_string()))?;
476
477    // Combine folders with base filename
478    let file_name = if folders.is_empty() {
479        base_file_name.to_string()
480    } else {
481        format!("{}/{}", folders, base_file_name)
482    };
483
484    let ext = file_extension.trim_start_matches('.');
485    let folder = if let Some(pr) = pr_number {
486        format!("muts-pr-{}-{}-{}", pr, file_name.replace('/', "-"), ext)
487    } else if let Some(range) = range_lines {
488        format!("muts-pr-{}-{}-{}", file_name.replace('/', "-"), range.0, range.1)
489    } else {
490        format!("muts-{}-{}", file_name.replace('/', "-"), ext)
491    };
492
493    create_mutation_folder(&folder, file_to_mutate)?;
494
495    let mutator_file = format!(
496        "{}/{}.mutant.{}{}",
497        folder, base_file_name, mutant_index, file_extension
498    );
499    fs::write(mutator_file, mutated_content)?;
500
501    Ok(mutant_index + 1)
502}
503
504fn create_mutation_folder(folder_name: &str, file_to_mutate: &str) -> Result<()> {
505    let folder_path = Path::new(folder_name);
506
507    if !folder_path.exists() {
508        fs::create_dir_all(folder_path)?;
509
510        let original_file_path = folder_path.join("original_file.txt");
511        fs::write(original_file_path, file_to_mutate)?;
512    }
513
514    Ok(())
515}
516
517#[cfg(test)]
518mod tests {
519    use super::*;
520    use tempfile::tempdir;
521
522    #[test]
523    fn test_should_skip_line() {
524        // Test basic skip patterns
525        assert!(should_skip_line("// This is a comment", "test.cpp", false).unwrap());
526        assert!(should_skip_line("assert(condition);", "test.cpp", false).unwrap());
527        assert!(should_skip_line("LogPrintf(\"test\");", "test.cpp", false).unwrap());
528        assert!(should_skip_line("LogDebug(\"test\");", "test.cpp", false).unwrap());
529
530        // Test normal lines that shouldn't be skipped
531        assert!(!should_skip_line("int x = 5;", "test.cpp", false).unwrap());
532        assert!(!should_skip_line("return value;", "test.cpp", false).unwrap());
533    }
534
535    #[test]
536    fn test_create_mutation_folder() {
537        let temp_dir = tempdir().unwrap();
538        let folder_path = temp_dir.path().join("test_muts");
539        let folder_name = folder_path.to_str().unwrap();
540
541        create_mutation_folder(folder_name, "test/file.cpp").unwrap();
542
543        assert!(folder_path.exists());
544        assert!(folder_path.join("original_file.txt").exists());
545
546        let content = fs::read_to_string(folder_path.join("original_file.txt")).unwrap();
547        assert_eq!(content, "test/file.cpp");
548    }
549
550    #[test]
551    fn test_write_mutation() {
552        let temp_dir = tempdir().unwrap();
553        std::env::set_current_dir(&temp_dir).unwrap();
554
555        let result = write_mutation("test.cpp", "mutated content", 0, None, None).unwrap();
556        assert_eq!(result, 1);
557
558        let folder_path = Path::new("muts-test-cpp");
559        assert!(folder_path.exists());
560        assert!(folder_path.join("test.mutant.0.cpp").exists());
561
562        let content = fs::read_to_string(folder_path.join("test.mutant.0.cpp")).unwrap();
563        assert_eq!(content, "mutated content");
564    }
565}