Skip to main content

bcore_mutation/
ast_analysis.rs

1use crate::error::Result;
2use regex::Regex;
3use std::collections::HashMap;
4
5/// Represents different types of AST nodes
6#[derive(Debug, Clone, PartialEq)]
7pub enum AstNodeType {
8    // Simple nodes (no body)
9    FunctionCall,
10    VariableDeclaration,
11    Assignment,
12    Literal,
13    Identifier,
14    BinaryOperator,
15    UnaryOperator,
16
17    // Compound nodes (have body/children)
18    IfStatement,
19    ForLoop,
20    WhileLoop,
21    Block,
22    Function,
23    Class,
24    Namespace,
25}
26
27/// Represents a node in the AST
28#[derive(Debug, Clone)]
29pub struct AstNode {
30    pub node_type: AstNodeType,
31    pub content: String,
32    pub line_number: usize,
33    pub column_start: usize,
34    #[allow(dead_code)]
35    pub column_end: usize,
36    pub children: Vec<AstNode>,
37}
38
39impl AstNode {
40    pub fn new(
41        node_type: AstNodeType,
42        content: String,
43        line_number: usize,
44        column_start: usize,
45        column_end: usize,
46    ) -> Self {
47        Self {
48            node_type,
49            content,
50            line_number,
51            column_start,
52            column_end,
53            children: Vec::new(),
54        }
55    }
56
57    #[allow(dead_code)]
58    pub fn add_child(&mut self, child: AstNode) {
59        self.children.push(child);
60    }
61
62    pub fn is_simple(&self) -> bool {
63        matches!(
64            self.node_type,
65            AstNodeType::FunctionCall
66                | AstNodeType::VariableDeclaration
67                | AstNodeType::Assignment
68                | AstNodeType::Literal
69                | AstNodeType::Identifier
70                | AstNodeType::BinaryOperator
71                | AstNodeType::UnaryOperator
72        )
73    }
74
75    #[allow(dead_code)]
76    pub fn is_compound(&self) -> bool {
77        !self.is_simple()
78    }
79}
80
81/// Expert knowledge for detecting arid nodes
82pub struct ExpertKnowledge {
83    arid_function_patterns: Vec<Regex>,
84    arid_variable_patterns: Vec<Regex>,
85    arid_statement_patterns: Vec<Regex>,
86    arid_namespace_patterns: Vec<Regex>,
87}
88
89impl ExpertKnowledge {
90    pub fn new() -> Result<Self> {
91        let arid_function_patterns = vec![
92            // Memory management functions
93            Regex::new(r"std::vector<.*>::reserve")?,
94            Regex::new(r"std::vector<.*>::resize")?,
95            Regex::new(r"std::.*::reserve")?,
96            Regex::new(r"\.reserve\s*\(")?,
97            Regex::new(r"\.resize\s*\(")?,
98            // I/O operations (typically not unit tested)
99            Regex::new(r"std::cout\s*<<")?,
100            Regex::new(r"std::cerr\s*<<")?,
101            Regex::new(r"printf\s*\(")?,
102            Regex::new(r"fprintf\s*\(")?,
103            Regex::new(r"std::endl")?,
104            // Logging functions - note the patterns match anywhere in the string
105            Regex::new(r"LogPrintf\s*\(")?,
106            Regex::new(r"LogPrint\s*\(")?,
107            Regex::new(r"LogDebug\s*\(")?,
108            Regex::new(r"\blog\.")?,
109            Regex::new(r"\blogger\.")?,
110            Regex::new(r"\blogging\.")?,
111            // Debug/trace functions
112            Regex::new(r"assert\s*\(")?,
113            Regex::new(r"DEBUG_")?,
114            Regex::new(r"TRACE_")?,
115            // Bitcoin Core specific patterns
116            Regex::new(r"G_FUZZING")?,
117            Regex::new(r"fPrintToConsole")?,
118            Regex::new(r"strprintf\s*\(")?,
119            // Memory allocation that's usually not tested
120            Regex::new(r"malloc\s*\(")?,
121            Regex::new(r"calloc\s*\(")?,
122            Regex::new(r"realloc\s*\(")?,
123            Regex::new(r"free\s*\(")?,
124            // Thread/concurrency primitives often not unit tested
125            Regex::new(r"std::thread")?,
126            Regex::new(r"std::mutex")?,
127            Regex::new(r"std::lock_guard")?,
128            // Performance monitoring (usually not tested)
129            Regex::new(r"\.now\(\)")?,
130            Regex::new(r"steady_clock")?,
131            Regex::new(r"high_resolution_clock")?,
132        ];
133
134        let arid_variable_patterns = vec![
135            // Timing/performance variables
136            Regex::new(r".*_time$")?,
137            Regex::new(r".*_duration$")?,
138            Regex::new(r".*_start$")?,
139            Regex::new(r".*_end$")?,
140            // Debug/logging variables
141            Regex::new(r".*_debug$")?,
142            Regex::new(r".*_log$")?,
143            Regex::new(r".*_trace$")?,
144            // Temporary/scratch variables
145            Regex::new(r"temp_.*")?,
146            Regex::new(r"tmp_.*")?,
147            Regex::new(r"scratch_.*")?,
148        ];
149
150        let arid_statement_patterns = vec![
151            // Comments
152            Regex::new(r"^\s*//")?,
153            Regex::new(r"^\s*/\*")?,
154            // Preprocessor directives
155            Regex::new(r"^\s*#")?,
156            // Empty statements
157            Regex::new(r"^\s*;")?,
158            // Namespace declarations
159            Regex::new(r"^\s*namespace\s+")?,
160            Regex::new(r"^\s*using\s+namespace\s+")?,
161            // Forward declarations
162            Regex::new(r"^\s*class\s+\w+\s*;")?,
163            Regex::new(r"^\s*struct\s+\w+\s*;")?,
164        ];
165
166        let arid_namespace_patterns = vec![
167            // Standard library
168            Regex::new(r"std::")?,
169            // Boost library (often infrastructure)
170            Regex::new(r"boost::")?,
171            // Testing frameworks
172            Regex::new(r"testing::")?,
173            Regex::new(r"gtest::")?,
174        ];
175
176        Ok(Self {
177            arid_function_patterns,
178            arid_variable_patterns,
179            arid_statement_patterns,
180            arid_namespace_patterns,
181        })
182    }
183
184    /// Expert function that determines if a simple node is arid
185    pub fn is_arid_simple_node(&self, node: &AstNode) -> bool {
186        if !node.is_simple() {
187            return false;
188        }
189
190        let content = &node.content;
191
192        // Check function call patterns first (most specific)
193        if matches!(node.node_type, AstNodeType::FunctionCall) {
194            for pattern in &self.arid_function_patterns {
195                if pattern.is_match(content) {
196                    return true;
197                }
198            }
199        }
200
201        // Check variable patterns
202        if matches!(
203            node.node_type,
204            AstNodeType::VariableDeclaration | AstNodeType::Assignment
205        ) {
206            for pattern in &self.arid_variable_patterns {
207                if pattern.is_match(content) {
208                    return true;
209                }
210            }
211        }
212
213        // Check general statement patterns
214        for pattern in &self.arid_statement_patterns {
215            if pattern.is_match(content) {
216                return true;
217            }
218        }
219
220        // Check namespace patterns (but not for function calls as that's too broad)
221        if !matches!(node.node_type, AstNodeType::FunctionCall) {
222            for pattern in &self.arid_namespace_patterns {
223                if pattern.is_match(content) {
224                    return true;
225                }
226            }
227        }
228
229        false
230    }
231}
232
233/// Arid node detector implementing Google's algorithm
234pub struct AridNodeDetector {
235    expert: ExpertKnowledge,
236    cache: HashMap<String, bool>,
237}
238
239impl AridNodeDetector {
240    pub fn new() -> Result<Self> {
241        Ok(Self {
242            expert: ExpertKnowledge::new()?,
243            cache: HashMap::new(),
244        })
245    }
246
247    /// Implementation of Google's arid node detection algorithm
248    /// arid(N) = expert(N) if simple(N)
249    ///         = 1 if ∀(arid(c)) = 1, ∀c ∈ N otherwise
250    pub fn is_arid(&mut self, node: &AstNode) -> bool {
251        // Create cache key
252        let cache_key = format!(
253            "{}:{}:{}",
254            node.line_number, node.column_start, node.content
255        );
256
257        if let Some(&cached_result) = self.cache.get(&cache_key) {
258            return cached_result;
259        }
260
261        let result = if node.is_simple() {
262            // For simple nodes, use expert knowledge
263            self.expert.is_arid_simple_node(node)
264        } else {
265            // For compound nodes, check if ALL children are arid
266            if node.children.is_empty() {
267                // Empty compound node is not arid
268                false
269            } else {
270                // All children must be arid for compound node to be arid
271                node.children.iter().all(|child| self.is_arid(child))
272            }
273        };
274
275        // Cache the result
276        self.cache.insert(cache_key, result);
277        result
278    }
279
280    /// Context-aware version that checks if a line should be mutated
281    /// Takes all lines and the current line index to understand control structures
282    pub fn should_mutate_line_with_context(&mut self, lines: &[String], line_index: usize) -> bool {
283        let line = &lines[line_index];
284        let trimmed = line.trim();
285
286        // Skip empty lines and closing braces
287        if trimmed.is_empty() || trimmed == "}" {
288            return false;
289        }
290
291        let line_number = line_index + 1;
292        let node_type = self.classify_line(trimmed);
293
294        // For control structures, check if their body is all arid
295        if matches!(
296            node_type,
297            AstNodeType::IfStatement | AstNodeType::ForLoop | AstNodeType::WhileLoop
298        ) {
299            // If the control structure body is all arid, don't mutate the control structure
300            return !self.is_control_structure_body_arid(lines, line_index);
301        }
302
303        // For lines inside control structures, we still need to check them individually
304        // unless they're part of an all-arid control structure (which is handled above)
305        let node = self.parse_line_to_simple_ast(trimmed, line_number);
306        !self.is_arid(&node)
307    }
308
309    /// Check if a control structure's body contains only arid statements
310    fn is_control_structure_body_arid(&mut self, lines: &[String], start_index: usize) -> bool {
311        let start_line = lines[start_index].trim();
312
313        // Check if this is a single-line control structure (no braces)
314        // e.g., "if (condition) single_statement;"
315        if !start_line.contains('{') {
316            // Look for the statement on the same line or next line
317            let statement = if start_line.contains(')') && start_line.ends_with(';') {
318                // Extract everything after the closing paren
319                if let Some(pos) = start_line.rfind(')') {
320                    start_line[pos + 1..].trim()
321                } else {
322                    start_line
323                }
324            } else if start_index + 1 < lines.len() {
325                // Statement is on the next line
326                lines[start_index + 1].trim()
327            } else {
328                return false;
329            };
330
331            // Parse and check if the statement is arid
332            let node = self.parse_line_to_simple_ast(statement, start_index + 2);
333            return self.is_arid(&node);
334        }
335
336        // Find the opening brace
337        let mut brace_line_index = start_index;
338        if !start_line.contains('{') {
339            // Opening brace might be on the next line
340            brace_line_index = start_index + 1;
341            if brace_line_index >= lines.len() || !lines[brace_line_index].contains('{') {
342                return false;
343            }
344        }
345
346        // Find matching closing brace
347        let body_range = match self.find_matching_brace(lines, brace_line_index) {
348            Some(end_index) => (brace_line_index + 1, end_index),
349            None => return false,
350        };
351
352        // Check if all non-empty lines in the body are arid
353        let mut has_non_empty_line = false;
354        for i in body_range.0..body_range.1 {
355            let line = lines[i].trim();
356
357            // Skip empty lines and braces
358            if line.is_empty() || line == "{" || line == "}" {
359                continue;
360            }
361
362            has_non_empty_line = true;
363
364            // Parse the line and check if it's arid
365            let node = self.parse_line_to_simple_ast(line, i + 1);
366            if !self.is_arid(&node) {
367                // Found a non-arid line in the body
368                return false;
369            }
370        }
371
372        // If we found at least one non-empty line and all were arid, return true
373        // If no non-empty lines, return false (empty body is not arid)
374        has_non_empty_line
375    }
376
377    /// Find the index of the closing brace that matches the opening brace at start_index
378    fn find_matching_brace(&self, lines: &[String], start_index: usize) -> Option<usize> {
379        let mut brace_count = 0;
380        let mut found_opening = false;
381
382        for (i, line) in lines.iter().enumerate().skip(start_index) {
383            for ch in line.chars() {
384                match ch {
385                    '{' => {
386                        brace_count += 1;
387                        found_opening = true;
388                    }
389                    '}' => {
390                        brace_count -= 1;
391                        if found_opening && brace_count == 0 {
392                            return Some(i);
393                        }
394                    }
395                    _ => {}
396                }
397            }
398        }
399
400        None
401    }
402
403    /// Simple heuristic-based parsing to create AST nodes from single lines
404    fn parse_line_to_simple_ast(&self, line_content: &str, line_number: usize) -> AstNode {
405        let trimmed = line_content.trim();
406
407        // Skip empty lines and comments
408        if trimmed.is_empty() || trimmed.starts_with("//") || trimmed.starts_with("/*") {
409            return AstNode::new(
410                AstNodeType::Identifier,
411                trimmed.to_string(),
412                line_number,
413                0,
414                line_content.len(),
415            );
416        }
417
418        // Determine node type based on content patterns
419        let node_type = self.classify_line(trimmed);
420
421        AstNode::new(
422            node_type,
423            trimmed.to_string(),
424            line_number,
425            0,
426            line_content.len(),
427        )
428    }
429
430    /// Classify a line of code into the appropriate AST node type
431    fn classify_line(&self, line: &str) -> AstNodeType {
432        // Namespace declarations
433        if line.starts_with("namespace ") || line.contains("using namespace") {
434            return AstNodeType::Namespace;
435        }
436
437        // Class declarations
438        if line.starts_with("class ") || line.starts_with("struct ") {
439            return AstNodeType::Class;
440        }
441
442        // Control flow statements (compound nodes) - check these before function declarations
443        if line.starts_with("if ") || line.starts_with("if(") || line.contains("} else ") {
444            return AstNodeType::IfStatement;
445        }
446        if line.starts_with("for ") || line.starts_with("for(") {
447            return AstNodeType::ForLoop;
448        }
449        if line.starts_with("while ") || line.starts_with("while(") {
450            return AstNodeType::WhileLoop;
451        }
452
453        // Block statements
454        if line == "{" || line == "}" || line.ends_with(" {") {
455            return AstNodeType::Block;
456        }
457
458        // Variable declarations
459        if self.is_variable_declaration(line) {
460            return AstNodeType::VariableDeclaration;
461        }
462
463        // Assignment operations
464        if self.is_assignment(line) {
465            return AstNodeType::Assignment;
466        }
467
468        // Function calls - check BEFORE function declarations
469        if self.is_function_call(line) {
470            return AstNodeType::FunctionCall;
471        }
472
473        // Function declarations/definitions - check AFTER function calls
474        if self.is_function_declaration(line) {
475            return AstNodeType::Function;
476        }
477
478        // Binary operators
479        if self.is_binary_operation(line) {
480            return AstNodeType::BinaryOperator;
481        }
482
483        // Unary operators
484        if self.is_unary_operation(line) {
485            return AstNodeType::UnaryOperator;
486        }
487
488        // Literals
489        if self.is_literal(line) {
490            return AstNodeType::Literal;
491        }
492
493        // Default to identifier
494        AstNodeType::Identifier
495    }
496
497    /// Check if line is a function declaration or definition
498    fn is_function_declaration(&self, line: &str) -> bool {
499        // Function calls end with ); - those are NOT declarations
500        if line.trim().ends_with(");") {
501            return false;
502        }
503
504        // Function declarations typically:
505        // - Have a return type before the function name
506        // - End with { or just ; (not );)
507        // - Have modifiers like virtual, static, etc.
508
509        let function_patterns = [
510            // Return type + function name + params + opening brace
511            Regex::new(r"^\s*\w+\s+\w+\s*\([^)]*\)\s*\{").unwrap(),
512            // Constructor/destructor with opening brace or initializer list
513            Regex::new(r"^\s*~?\w+\s*\([^)]*\)\s*[{:]").unwrap(),
514            // Template function
515            Regex::new(r"^\s*template\s*<[^>]*>").unwrap(),
516            // Function with qualifiers (virtual, static, inline, explicit, etc.)
517            Regex::new(r"^\s*(?:virtual\s+|static\s+|inline\s+|explicit\s+)").unwrap(),
518            // Return type + function name + params + ending semicolon (forward declaration)
519            // But make sure it doesn't end with );
520            Regex::new(r"^\s*\w+\s+\w+\s*\([^)]*\)\s*;\s*$").unwrap(),
521        ];
522
523        function_patterns
524            .iter()
525            .any(|pattern| pattern.is_match(line))
526            && !line.contains('=')
527    }
528
529    /// Check if line is a variable declaration
530    fn is_variable_declaration(&self, line: &str) -> bool {
531        let var_patterns = [
532            Regex::new(r"^\s*(int|bool|char|float|double|long|short|unsigned|signed)\s+\w+")
533                .unwrap(),
534            Regex::new(r"^\s*std::\w+\s*<?[^>]*>?\s+\w+").unwrap(),
535            Regex::new(r"^\s*[A-Z]\w*\s+\w+").unwrap(),
536            Regex::new(r"^\s*\w+\s*[*&]+\s*\w+").unwrap(),
537            Regex::new(r"^\s*const\s+\w+").unwrap(),
538            Regex::new(r"^\s*auto\s+\w+").unwrap(),
539        ];
540
541        var_patterns.iter().any(|pattern| pattern.is_match(line))
542            && !line.contains('(')
543            && (line.contains('=') || line.ends_with(';'))
544    }
545
546    /// Check if line is an assignment
547    fn is_assignment(&self, line: &str) -> bool {
548        line.contains('=')
549            && !line.contains("==")
550            && !line.contains("!=")
551            && !line.contains("<=")
552            && !line.contains(">=")
553            && !self.is_variable_declaration(line)
554    }
555
556    /// Check if line is a function call
557    fn is_function_call(&self, line: &str) -> bool {
558        line.contains('(')
559            && line.contains(')')
560            && !self.is_function_declaration(line)
561            && !self.is_variable_declaration(line)
562            && !line.starts_with("if ")
563            && !line.starts_with("if(")
564            && !line.starts_with("while ")
565            && !line.starts_with("while(")
566            && !line.starts_with("for ")
567            && !line.starts_with("for(")
568    }
569
570    /// Check if line contains binary operations
571    fn is_binary_operation(&self, line: &str) -> bool {
572        let binary_ops = [
573            "+", "-", "*", "/", "%", "&&", "||", "&", "|", "^", "<<", ">>",
574        ];
575        binary_ops.iter().any(|op| line.contains(op)) && !line.contains('=') && !line.contains('(')
576    }
577
578    /// Check if line contains unary operations
579    fn is_unary_operation(&self, line: &str) -> bool {
580        let unary_patterns = [
581            Regex::new(r"\+\+\w+").unwrap(),
582            Regex::new(r"\w\+\+").unwrap(),
583            Regex::new(r"--\w+").unwrap(),
584            Regex::new(r"\w--").unwrap(),
585            Regex::new(r"!\w+").unwrap(),
586            Regex::new(r"~\w+").unwrap(),
587        ];
588
589        unary_patterns.iter().any(|pattern| pattern.is_match(line))
590    }
591
592    /// Check if line is a literal value
593    fn is_literal(&self, line: &str) -> bool {
594        let literal_patterns = [
595            Regex::new(r"^\s*\d+\s*;?\s*$").unwrap(),
596            Regex::new(r"^\s*\d+\.\d+\s*;?\s*$").unwrap(),
597            Regex::new(r#"^\s*"[^"]*"\s*;?\s*$"#).unwrap(),
598            Regex::new(r"^\s*'[^']*'\s*;?\s*$").unwrap(),
599            Regex::new(r"^\s*(true|false)\s*;?\s*$").unwrap(),
600            Regex::new(r"^\s*(nullptr|NULL)\s*;?\s*$").unwrap(),
601        ];
602
603        literal_patterns
604            .iter()
605            .any(|pattern| pattern.is_match(line))
606    }
607
608    /// Add a new expert rule at runtime
609    pub fn add_expert_rule(&mut self, pattern: &str, description: &str) -> Result<()> {
610        let regex = Regex::new(pattern)?;
611        self.expert.arid_function_patterns.push(regex);
612        println!("Added expert rule: {} ({})", pattern, description);
613        Ok(())
614    }
615
616    /// Get statistics about arid node detection
617    pub fn get_stats(&self) -> HashMap<String, usize> {
618        let mut stats = HashMap::new();
619        stats.insert(
620            "total_expert_rules".to_string(),
621            self.expert.arid_function_patterns.len()
622                + self.expert.arid_variable_patterns.len()
623                + self.expert.arid_statement_patterns.len(),
624        );
625        stats.insert("cache_size".to_string(), self.cache.len());
626        stats.insert(
627            "function_patterns".to_string(),
628            self.expert.arid_function_patterns.len(),
629        );
630        stats.insert(
631            "variable_patterns".to_string(),
632            self.expert.arid_variable_patterns.len(),
633        );
634        stats.insert(
635            "statement_patterns".to_string(),
636            self.expert.arid_statement_patterns.len(),
637        );
638        stats
639    }
640
641    /// Export detailed analysis of which lines were filtered and why
642    #[allow(dead_code)]
643    pub fn analyze_file_detailed(&mut self, file_content: &str) -> DetailedAnalysis {
644        let lines: Vec<String> = file_content.lines().map(|s| s.to_string()).collect();
645        let mut analysis = DetailedAnalysis::new();
646
647        for (idx, line) in lines.iter().enumerate() {
648            let line_number = idx + 1;
649            let should_mutate = self.should_mutate_line_with_context(&lines, idx);
650            let node = self.parse_line_to_simple_ast(line, line_number);
651            let is_arid = !should_mutate;
652            let reason = if is_arid {
653                self.get_arid_reason(&node, &lines, idx)
654            } else {
655                "Not arid - will be mutated".to_string()
656            };
657
658            analysis.add_line_analysis(LineAnalysis {
659                line_number,
660                content: line.to_string(),
661                node_type: node.node_type,
662                is_arid,
663                reason,
664            });
665        }
666
667        analysis
668    }
669
670    /// Get the reason why a node is considered arid
671    #[allow(dead_code)]
672    fn get_arid_reason(&self, node: &AstNode, _lines: &[String], _line_index: usize) -> String {
673        // Check if this is a control structure with arid body
674        if matches!(
675            node.node_type,
676            AstNodeType::IfStatement | AstNodeType::ForLoop | AstNodeType::WhileLoop
677        ) {
678            return "Control structure with arid body (logging/debugging only)".to_string();
679        }
680
681        if !node.is_simple() {
682            return "Compound node - arid if all children are arid".to_string();
683        }
684
685        let content = &node.content;
686
687        // Check function call patterns
688        if matches!(node.node_type, AstNodeType::FunctionCall) {
689            for (idx, pattern) in self.expert.arid_function_patterns.iter().enumerate() {
690                if pattern.is_match(content) {
691                    return format!(
692                        "Matches arid function pattern #{}: {}",
693                        idx + 1,
694                        pattern.as_str()
695                    );
696                }
697            }
698        }
699
700        // Check variable patterns
701        if matches!(
702            node.node_type,
703            AstNodeType::VariableDeclaration | AstNodeType::Assignment
704        ) {
705            for (idx, pattern) in self.expert.arid_variable_patterns.iter().enumerate() {
706                if pattern.is_match(content) {
707                    return format!(
708                        "Matches arid variable pattern #{}: {}",
709                        idx + 1,
710                        pattern.as_str()
711                    );
712                }
713            }
714        }
715
716        // Check statement patterns
717        for (idx, pattern) in self.expert.arid_statement_patterns.iter().enumerate() {
718            if pattern.is_match(content) {
719                return format!(
720                    "Matches arid statement pattern #{}: {}",
721                    idx + 1,
722                    pattern.as_str()
723                );
724            }
725        }
726
727        "Not arid".to_string()
728    }
729
730    /// Clear the cache (useful for testing or when rules change)
731    #[allow(dead_code)]
732    pub fn clear_cache(&mut self) {
733        self.cache.clear();
734    }
735}
736
737/// Detailed analysis results for a file
738#[allow(dead_code)]
739#[derive(Debug)]
740pub struct DetailedAnalysis {
741    pub lines: Vec<LineAnalysis>,
742    pub summary: AnalysisSummary,
743}
744
745#[allow(dead_code)]
746impl DetailedAnalysis {
747    pub fn new() -> Self {
748        Self {
749            lines: Vec::new(),
750            summary: AnalysisSummary::default(),
751        }
752    }
753
754    pub fn add_line_analysis(&mut self, analysis: LineAnalysis) {
755        if analysis.is_arid {
756            self.summary.arid_lines += 1;
757        } else {
758            self.summary.mutatable_lines += 1;
759        }
760        self.summary.total_lines += 1;
761        self.lines.push(analysis);
762    }
763
764    pub fn print_summary(&self) {
765        println!("\n=== AST Analysis Summary ===");
766        println!("Total lines: {}", self.summary.total_lines);
767        println!("Mutatable lines: {}", self.summary.mutatable_lines);
768        println!("Arid lines: {}", self.summary.arid_lines);
769        println!(
770            "Filtering efficiency: {:.1}% reduction",
771            (self.summary.arid_lines as f64 / self.summary.total_lines as f64) * 100.0
772        );
773    }
774
775    pub fn print_arid_lines(&self) {
776        println!("\n=== Filtered Out (Arid) Lines ===");
777        for line in &self.lines {
778            if line.is_arid {
779                println!(
780                    "Line {}: {} | Reason: {}",
781                    line.line_number,
782                    line.content.trim(),
783                    line.reason
784                );
785            }
786        }
787    }
788}
789
790/// Analysis of a single line
791#[allow(dead_code)]
792#[derive(Debug)]
793pub struct LineAnalysis {
794    pub line_number: usize,
795    pub content: String,
796    pub node_type: AstNodeType,
797    pub is_arid: bool,
798    pub reason: String,
799}
800
801/// Summary statistics for analysis
802#[allow(dead_code)]
803#[derive(Debug, Default)]
804pub struct AnalysisSummary {
805    pub total_lines: usize,
806    pub mutatable_lines: usize,
807    pub arid_lines: usize,
808}
809
810/// Integration with existing mutation system - context-aware version
811pub fn filter_mutatable_lines(lines: &[String], detector: &mut AridNodeDetector) -> Vec<usize> {
812    lines
813        .iter()
814        .enumerate()
815        .filter_map(|(idx, _line)| {
816            let line_number = idx + 1;
817            if detector.should_mutate_line_with_context(lines, idx) {
818                Some(line_number)
819            } else {
820                None
821            }
822        })
823        .collect()
824}
825
826#[cfg(test)]
827mod tests {
828    use super::*;
829
830    #[test]
831    fn test_expert_knowledge() {
832        let expert = ExpertKnowledge::new().unwrap();
833
834        // Test arid function calls
835        let reserve_node = AstNode::new(
836            AstNodeType::FunctionCall,
837            "vec.reserve(100)".to_string(),
838            1,
839            0,
840            15,
841        );
842        assert!(expert.is_arid_simple_node(&reserve_node));
843
844        // Test non-arid function calls
845        let normal_node = AstNode::new(
846            AstNodeType::FunctionCall,
847            "calculate_sum(a, b)".to_string(),
848            1,
849            0,
850            18,
851        );
852        assert!(!expert.is_arid_simple_node(&normal_node));
853
854        // Test LogDebug function call
855        let log_debug_node = AstNode::new(
856            AstNodeType::FunctionCall,
857            "LogDebug(BCLog::ADDRMAN, \"test\");".to_string(),
858            1,
859            0,
860            30,
861        );
862        assert!(
863            expert.is_arid_simple_node(&log_debug_node),
864            "LogDebug should be recognized as arid"
865        );
866    }
867
868    #[test]
869    fn test_arid_detection_algorithm() {
870        let mut detector = AridNodeDetector::new().unwrap();
871
872        // Test simple arid node
873        let arid_simple = AstNode::new(
874            AstNodeType::FunctionCall,
875            "std::cout << \"debug\"".to_string(),
876            1,
877            0,
878            20,
879        );
880        assert!(detector.is_arid(&arid_simple));
881
882        // Test compound node with all arid children
883        let mut compound_arid =
884            AstNode::new(AstNodeType::Block, "{ debug block }".to_string(), 1, 0, 15);
885        compound_arid.add_child(arid_simple.clone());
886        assert!(detector.is_arid(&compound_arid));
887
888        // Test compound node with non-arid child
889        let non_arid_simple = AstNode::new(
890            AstNodeType::FunctionCall,
891            "important_function()".to_string(),
892            2,
893            0,
894            20,
895        );
896        let mut compound_mixed =
897            AstNode::new(AstNodeType::Block, "{ mixed block }".to_string(), 1, 0, 15);
898        compound_mixed.add_child(arid_simple);
899        compound_mixed.add_child(non_arid_simple);
900        assert!(!detector.is_arid(&compound_mixed));
901    }
902
903    #[test]
904    fn test_line_mutation_filtering() {
905        let mut detector = AridNodeDetector::new().unwrap();
906
907        let lines = vec![
908            "int x = 5;".to_string(),              // Should mutate
909            "std::cout << \"debug\";".to_string(), // Should NOT mutate (arid)
910            "vec.reserve(100);".to_string(),       // Should NOT mutate (arid)
911            "return x + y;".to_string(),           // Should mutate
912        ];
913
914        let mutatable_lines = filter_mutatable_lines(&lines, &mut detector);
915
916        // Should only include lines 1 and 4
917        assert_eq!(mutatable_lines, vec![1, 4]);
918    }
919
920    #[test]
921    fn test_if_statement_with_logging() {
922        let mut detector = AridNodeDetector::new().unwrap();
923
924        let lines = vec![
925            "if (!restore_bucketing) {".to_string(),
926            "    LogDebug(BCLog::ADDRMAN, \"Bucketing method was updated, re-bucketing addrman entries from disk\\n\");".to_string(),
927            "}".to_string(),
928        ];
929
930        // First, let's test that LogDebug itself is recognized as arid
931        let log_line = lines[1].trim();
932        let log_node = detector.parse_line_to_simple_ast(log_line, 2);
933        assert_eq!(
934            log_node.node_type,
935            AstNodeType::FunctionCall,
936            "LogDebug line should be classified as FunctionCall"
937        );
938        assert!(
939            detector.is_arid(&log_node),
940            "LogDebug should be recognized as arid"
941        );
942
943        let mutatable_lines = filter_mutatable_lines(&lines, &mut detector);
944
945        // The if statement should NOT be mutated because it only contains logging
946        // Lines 2 (LogDebug) and 3 (closing brace) also should not be mutated
947        assert!(
948            mutatable_lines.is_empty(),
949            "Expected no mutatable lines, got: {:?}",
950            mutatable_lines
951        );
952    }
953
954    #[test]
955    fn test_if_statement_with_non_arid_body() {
956        let mut detector = AridNodeDetector::new().unwrap();
957
958        let lines = vec![
959            "if (condition) {".to_string(),
960            "    x = x + 1;".to_string(),
961            "}".to_string(),
962        ];
963
964        let mutatable_lines = filter_mutatable_lines(&lines, &mut detector);
965
966        // The if statement and the assignment should be mutated
967        assert!(
968            mutatable_lines.contains(&1),
969            "If statement should be mutatable"
970        );
971        assert!(
972            mutatable_lines.contains(&2),
973            "Assignment should be mutatable"
974        );
975    }
976
977    #[test]
978    fn test_if_statement_mixed_body() {
979        let mut detector = AridNodeDetector::new().unwrap();
980
981        let lines = vec![
982            "if (condition) {".to_string(),
983            "    LogDebug(BCLog::TEST, \"debug\");".to_string(),
984            "    x = x + 1;".to_string(),
985            "}".to_string(),
986        ];
987
988        let mutatable_lines = filter_mutatable_lines(&lines, &mut detector);
989
990        // The if statement should be mutated because it has non-arid content
991        assert!(
992            mutatable_lines.contains(&1),
993            "If statement with mixed body should be mutable"
994        );
995        assert!(
996            mutatable_lines.contains(&3),
997            "Non-arid line in body should be mutable"
998        );
999    }
1000}