Skip to main content

bcore_mutation/
ast_analysis.rs

1use crate::error::Result;
2use regex::Regex;
3use std::collections::HashMap;
4
5/// Represents different types of AST nodes
6#[derive(Debug, Clone, PartialEq)]
7pub enum AstNodeType {
8    // Simple nodes (no body)
9    FunctionCall,
10    VariableDeclaration,
11    Assignment,
12    Literal,
13    Identifier,
14    BinaryOperator,
15    UnaryOperator,
16
17    // Compound nodes (have body/children)
18    IfStatement,
19    ForLoop,
20    WhileLoop,
21    Block,
22    Function,
23    Class,
24    Namespace,
25}
26
27/// Represents a node in the AST
28#[derive(Debug, Clone)]
29pub struct AstNode {
30    pub node_type: AstNodeType,
31    pub content: String,
32    pub line_number: usize,
33    pub column_start: usize,
34    #[allow(dead_code)]
35    pub column_end: usize,
36    pub children: Vec<AstNode>,
37}
38
39impl AstNode {
40    pub fn new(
41        node_type: AstNodeType,
42        content: String,
43        line_number: usize,
44        column_start: usize,
45        column_end: usize,
46    ) -> Self {
47        Self {
48            node_type,
49            content,
50            line_number,
51            column_start,
52            column_end,
53            children: Vec::new(),
54        }
55    }
56
57    #[allow(dead_code)]
58    pub fn add_child(&mut self, child: AstNode) {
59        self.children.push(child);
60    }
61
62    pub fn is_simple(&self) -> bool {
63        matches!(
64            self.node_type,
65            AstNodeType::FunctionCall
66                | AstNodeType::VariableDeclaration
67                | AstNodeType::Assignment
68                | AstNodeType::Literal
69                | AstNodeType::Identifier
70                | AstNodeType::BinaryOperator
71                | AstNodeType::UnaryOperator
72        )
73    }
74
75    #[allow(dead_code)]
76    pub fn is_compound(&self) -> bool {
77        !self.is_simple()
78    }
79}
80
81/// Expert knowledge for detecting arid nodes
82pub struct ExpertKnowledge {
83    arid_function_patterns: Vec<Regex>,
84    arid_variable_patterns: Vec<Regex>,
85    arid_statement_patterns: Vec<Regex>,
86    arid_namespace_patterns: Vec<Regex>,
87}
88
89impl ExpertKnowledge {
90    pub fn new() -> Result<Self> {
91        let arid_function_patterns = vec![
92            // Memory management functions
93            Regex::new(r"std::vector<.*>::reserve")?,
94            Regex::new(r"std::vector<.*>::resize")?,
95            Regex::new(r"std::.*::reserve")?,
96            Regex::new(r"\.reserve\s*\(")?,
97            Regex::new(r"\.resize\s*\(")?,
98            // I/O operations (typically not unit tested)
99            Regex::new(r"std::cout\s*<<")?,
100            Regex::new(r"std::cerr\s*<<")?,
101            Regex::new(r"printf\s*\(")?,
102            Regex::new(r"fprintf\s*\(")?,
103            Regex::new(r"std::endl")?,
104            // Logging functions - note the patterns match anywhere in the string
105            Regex::new(r"LogPrintf\s*\(")?,
106            Regex::new(r"LogPrint\s*\(")?,
107            Regex::new(r"LogDebug\s*\(")?,
108            Regex::new(r"\blog\.")?,
109            Regex::new(r"\blogger\.")?,
110            Regex::new(r"\blogging\.")?,
111            // Debug/trace functions
112            Regex::new(r"assert\s*\(")?,
113            Regex::new(r"DEBUG_")?,
114            Regex::new(r"TRACE_")?,
115            // Bitcoin Core specific patterns
116            Regex::new(r"G_FUZZING")?,
117            Regex::new(r"fPrintToConsole")?,
118            Regex::new(r"strprintf\s*\(")?,
119            // Memory allocation that's usually not tested
120            Regex::new(r"malloc\s*\(")?,
121            Regex::new(r"calloc\s*\(")?,
122            Regex::new(r"realloc\s*\(")?,
123            Regex::new(r"free\s*\(")?,
124            // Thread/concurrency primitives often not unit tested
125            Regex::new(r"std::thread")?,
126            Regex::new(r"std::mutex")?,
127            Regex::new(r"std::lock_guard")?,
128            // Performance monitoring (usually not tested)
129            Regex::new(r"\.now\(\)")?,
130            Regex::new(r"steady_clock")?,
131            Regex::new(r"high_resolution_clock")?,
132        ];
133
134        let arid_variable_patterns = vec![
135            // Timing/performance variables
136            Regex::new(r".*_time$")?,
137            Regex::new(r".*_duration$")?,
138            Regex::new(r".*_start$")?,
139            Regex::new(r".*_end$")?,
140            // Debug/logging variables
141            Regex::new(r".*_debug$")?,
142            Regex::new(r".*_log$")?,
143            Regex::new(r".*_trace$")?,
144            // Temporary/scratch variables
145            Regex::new(r"temp_.*")?,
146            Regex::new(r"tmp_.*")?,
147            Regex::new(r"scratch_.*")?,
148        ];
149
150        let arid_statement_patterns = vec![
151            // Comments
152            Regex::new(r"^\s*//")?,
153            Regex::new(r"^\s*/\*")?,
154            // Preprocessor directives
155            Regex::new(r"^\s*#")?,
156            // Empty statements
157            Regex::new(r"^\s*;")?,
158            // Namespace declarations
159            Regex::new(r"^\s*namespace\s+")?,
160            Regex::new(r"^\s*using\s+namespace\s+")?,
161            // Forward declarations
162            Regex::new(r"^\s*class\s+\w+\s*;")?,
163            Regex::new(r"^\s*struct\s+\w+\s*;")?,
164        ];
165
166        let arid_namespace_patterns = vec![
167            // Standard library
168            Regex::new(r"std::")?,
169            // Boost library (often infrastructure)
170            Regex::new(r"boost::")?,
171            // Testing frameworks
172            Regex::new(r"testing::")?,
173            Regex::new(r"gtest::")?,
174        ];
175
176        Ok(Self {
177            arid_function_patterns,
178            arid_variable_patterns,
179            arid_statement_patterns,
180            arid_namespace_patterns,
181        })
182    }
183
184    /// Expert function that determines if a simple node is arid
185    pub fn is_arid_simple_node(&self, node: &AstNode) -> bool {
186        if !node.is_simple() {
187            return false;
188        }
189
190        let content = &node.content;
191
192        // Check function call patterns first (most specific)
193        if matches!(node.node_type, AstNodeType::FunctionCall) {
194            for pattern in &self.arid_function_patterns {
195                if pattern.is_match(content) {
196                    return true;
197                }
198            }
199        }
200
201        // Check variable patterns
202        if matches!(
203            node.node_type,
204            AstNodeType::VariableDeclaration | AstNodeType::Assignment
205        ) {
206            for pattern in &self.arid_variable_patterns {
207                if pattern.is_match(content) {
208                    return true;
209                }
210            }
211        }
212
213        // Check general statement patterns
214        for pattern in &self.arid_statement_patterns {
215            if pattern.is_match(content) {
216                return true;
217            }
218        }
219
220        // Check namespace patterns (but not for function calls as that's too broad)
221        if !matches!(node.node_type, AstNodeType::FunctionCall) {
222            for pattern in &self.arid_namespace_patterns {
223                if pattern.is_match(content) {
224                    return true;
225                }
226            }
227        }
228
229        false
230    }
231}
232
233/// Arid node detector implementing Google's algorithm
234pub struct AridNodeDetector {
235    expert: ExpertKnowledge,
236    cache: HashMap<String, bool>,
237}
238
239impl AridNodeDetector {
240    pub fn new() -> Result<Self> {
241        Ok(Self {
242            expert: ExpertKnowledge::new()?,
243            cache: HashMap::new(),
244        })
245    }
246
247    /// Implementation of Google's arid node detection algorithm
248    /// arid(N) = expert(N) if simple(N)
249    ///         = 1 if ∀(arid(c)) = 1, ∀c ∈ N otherwise
250    pub fn is_arid(&mut self, node: &AstNode) -> bool {
251        // Create cache key
252        let cache_key = format!(
253            "{}:{}:{}",
254            node.line_number, node.column_start, node.content
255        );
256
257        if let Some(&cached_result) = self.cache.get(&cache_key) {
258            return cached_result;
259        }
260
261        let result = if node.is_simple() {
262            // For simple nodes, use expert knowledge
263            self.expert.is_arid_simple_node(node)
264        } else {
265            // For compound nodes, check if ALL children are arid
266            if node.children.is_empty() {
267                // Empty compound node is not arid
268                false
269            } else {
270                // All children must be arid for compound node to be arid
271                node.children.iter().all(|child| self.is_arid(child))
272            }
273        };
274
275        // Cache the result
276        self.cache.insert(cache_key, result);
277        result
278    }
279
280    /// Context-aware version that checks if a line should be mutated
281    /// Takes all lines and the current line index to understand control structures
282    pub fn should_mutate_line_with_context(
283        &mut self,
284        lines: &[String],
285        line_index: usize,
286    ) -> bool {
287        let line = &lines[line_index];
288        let trimmed = line.trim();
289
290        // Skip empty lines and closing braces
291        if trimmed.is_empty() || trimmed == "}" {
292            return false;
293        }
294
295        let line_number = line_index + 1;
296        let node_type = self.classify_line(trimmed);
297
298        // For control structures, check if their body is all arid
299        if matches!(
300            node_type,
301            AstNodeType::IfStatement | AstNodeType::ForLoop | AstNodeType::WhileLoop
302        ) {
303            // If the control structure body is all arid, don't mutate the control structure
304            return !self.is_control_structure_body_arid(lines, line_index);
305        }
306
307        // For lines inside control structures, we still need to check them individually
308        // unless they're part of an all-arid control structure (which is handled above)
309        let node = self.parse_line_to_simple_ast(trimmed, line_number);
310        !self.is_arid(&node)
311    }
312
313    /// Check if a control structure's body contains only arid statements
314    fn is_control_structure_body_arid(&mut self, lines: &[String], start_index: usize) -> bool {
315        let start_line = lines[start_index].trim();
316
317        // Check if this is a single-line control structure (no braces)
318        // e.g., "if (condition) single_statement;"
319        if !start_line.contains('{') {
320            // Look for the statement on the same line or next line
321            let statement = if start_line.contains(')') && start_line.ends_with(';') {
322                // Extract everything after the closing paren
323                if let Some(pos) = start_line.rfind(')') {
324                    start_line[pos + 1..].trim()
325                } else {
326                    start_line
327                }
328            } else if start_index + 1 < lines.len() {
329                // Statement is on the next line
330                lines[start_index + 1].trim()
331            } else {
332                return false;
333            };
334
335            // Parse and check if the statement is arid
336            let node = self.parse_line_to_simple_ast(statement, start_index + 2);
337            return self.is_arid(&node);
338        }
339
340        // Find the opening brace
341        let mut brace_line_index = start_index;
342        if !start_line.contains('{') {
343            // Opening brace might be on the next line
344            brace_line_index = start_index + 1;
345            if brace_line_index >= lines.len() || !lines[brace_line_index].contains('{') {
346                return false;
347            }
348        }
349
350        // Find matching closing brace
351        let body_range = match self.find_matching_brace(lines, brace_line_index) {
352            Some(end_index) => (brace_line_index + 1, end_index),
353            None => return false,
354        };
355
356        // Check if all non-empty lines in the body are arid
357        let mut has_non_empty_line = false;
358        for i in body_range.0..body_range.1 {
359            let line = lines[i].trim();
360
361            // Skip empty lines and braces
362            if line.is_empty() || line == "{" || line == "}" {
363                continue;
364            }
365
366            has_non_empty_line = true;
367
368            // Parse the line and check if it's arid
369            let node = self.parse_line_to_simple_ast(line, i + 1);
370            if !self.is_arid(&node) {
371                // Found a non-arid line in the body
372                return false;
373            }
374        }
375
376        // If we found at least one non-empty line and all were arid, return true
377        // If no non-empty lines, return false (empty body is not arid)
378        has_non_empty_line
379    }
380
381    /// Find the index of the closing brace that matches the opening brace at start_index
382    fn find_matching_brace(&self, lines: &[String], start_index: usize) -> Option<usize> {
383        let mut brace_count = 0;
384        let mut found_opening = false;
385
386        for (i, line) in lines.iter().enumerate().skip(start_index) {
387            for ch in line.chars() {
388                match ch {
389                    '{' => {
390                        brace_count += 1;
391                        found_opening = true;
392                    }
393                    '}' => {
394                        brace_count -= 1;
395                        if found_opening && brace_count == 0 {
396                            return Some(i);
397                        }
398                    }
399                    _ => {}
400                }
401            }
402        }
403
404        None
405    }
406
407    /// Simple heuristic-based parsing to create AST nodes from single lines
408    fn parse_line_to_simple_ast(&self, line_content: &str, line_number: usize) -> AstNode {
409        let trimmed = line_content.trim();
410
411        // Skip empty lines and comments
412        if trimmed.is_empty() || trimmed.starts_with("//") || trimmed.starts_with("/*") {
413            return AstNode::new(
414                AstNodeType::Identifier,
415                trimmed.to_string(),
416                line_number,
417                0,
418                line_content.len(),
419            );
420        }
421
422        // Determine node type based on content patterns
423        let node_type = self.classify_line(trimmed);
424
425        AstNode::new(
426            node_type,
427            trimmed.to_string(),
428            line_number,
429            0,
430            line_content.len(),
431        )
432    }
433
434    /// Classify a line of code into the appropriate AST node type
435    fn classify_line(&self, line: &str) -> AstNodeType {
436        // Namespace declarations
437        if line.starts_with("namespace ") || line.contains("using namespace") {
438            return AstNodeType::Namespace;
439        }
440
441        // Class declarations
442        if line.starts_with("class ") || line.starts_with("struct ") {
443            return AstNodeType::Class;
444        }
445
446        // Control flow statements (compound nodes) - check these before function declarations
447        if line.starts_with("if ") || line.starts_with("if(") || line.contains("} else ") {
448            return AstNodeType::IfStatement;
449        }
450        if line.starts_with("for ") || line.starts_with("for(") {
451            return AstNodeType::ForLoop;
452        }
453        if line.starts_with("while ") || line.starts_with("while(") {
454            return AstNodeType::WhileLoop;
455        }
456
457        // Block statements
458        if line == "{" || line == "}" || line.ends_with(" {") {
459            return AstNodeType::Block;
460        }
461
462        // Variable declarations
463        if self.is_variable_declaration(line) {
464            return AstNodeType::VariableDeclaration;
465        }
466
467        // Assignment operations
468        if self.is_assignment(line) {
469            return AstNodeType::Assignment;
470        }
471
472        // Function calls - check BEFORE function declarations
473        if self.is_function_call(line) {
474            return AstNodeType::FunctionCall;
475        }
476
477        // Function declarations/definitions - check AFTER function calls
478        if self.is_function_declaration(line) {
479            return AstNodeType::Function;
480        }
481
482        // Binary operators
483        if self.is_binary_operation(line) {
484            return AstNodeType::BinaryOperator;
485        }
486
487        // Unary operators
488        if self.is_unary_operation(line) {
489            return AstNodeType::UnaryOperator;
490        }
491
492        // Literals
493        if self.is_literal(line) {
494            return AstNodeType::Literal;
495        }
496
497        // Default to identifier
498        AstNodeType::Identifier
499    }
500
501    /// Check if line is a function declaration or definition
502    fn is_function_declaration(&self, line: &str) -> bool {
503        // Function calls end with ); - those are NOT declarations
504        if line.trim().ends_with(");") {
505            return false;
506        }
507
508        // Function declarations typically:
509        // - Have a return type before the function name
510        // - End with { or just ; (not );)
511        // - Have modifiers like virtual, static, etc.
512
513        let function_patterns = [
514            // Return type + function name + params + opening brace
515            Regex::new(r"^\s*\w+\s+\w+\s*\([^)]*\)\s*\{").unwrap(),
516            // Constructor/destructor with opening brace or initializer list
517            Regex::new(r"^\s*~?\w+\s*\([^)]*\)\s*[{:]").unwrap(),
518            // Template function
519            Regex::new(r"^\s*template\s*<[^>]*>").unwrap(),
520            // Function with qualifiers (virtual, static, inline, explicit, etc.)
521            Regex::new(r"^\s*(?:virtual\s+|static\s+|inline\s+|explicit\s+)").unwrap(),
522            // Return type + function name + params + ending semicolon (forward declaration)
523            // But make sure it doesn't end with );
524            Regex::new(r"^\s*\w+\s+\w+\s*\([^)]*\)\s*;\s*$").unwrap(),
525        ];
526
527        function_patterns
528            .iter()
529            .any(|pattern| pattern.is_match(line))
530            && !line.contains('=')
531    }
532
533    /// Check if line is a variable declaration
534    fn is_variable_declaration(&self, line: &str) -> bool {
535        let var_patterns = [
536            Regex::new(r"^\s*(int|bool|char|float|double|long|short|unsigned|signed)\s+\w+")
537                .unwrap(),
538            Regex::new(r"^\s*std::\w+\s*<?[^>]*>?\s+\w+").unwrap(),
539            Regex::new(r"^\s*[A-Z]\w*\s+\w+").unwrap(),
540            Regex::new(r"^\s*\w+\s*[*&]+\s*\w+").unwrap(),
541            Regex::new(r"^\s*const\s+\w+").unwrap(),
542            Regex::new(r"^\s*auto\s+\w+").unwrap(),
543        ];
544
545        var_patterns.iter().any(|pattern| pattern.is_match(line))
546            && !line.contains('(')
547            && (line.contains('=') || line.ends_with(';'))
548    }
549
550    /// Check if line is an assignment
551    fn is_assignment(&self, line: &str) -> bool {
552        line.contains('=')
553            && !line.contains("==")
554            && !line.contains("!=")
555            && !line.contains("<=")
556            && !line.contains(">=")
557            && !self.is_variable_declaration(line)
558    }
559
560    /// Check if line is a function call
561    fn is_function_call(&self, line: &str) -> bool {
562        line.contains('(')
563            && line.contains(')')
564            && !self.is_function_declaration(line)
565            && !self.is_variable_declaration(line)
566            && !line.starts_with("if ")
567            && !line.starts_with("if(")
568            && !line.starts_with("while ")
569            && !line.starts_with("while(")
570            && !line.starts_with("for ")
571            && !line.starts_with("for(")
572    }
573
574    /// Check if line contains binary operations
575    fn is_binary_operation(&self, line: &str) -> bool {
576        let binary_ops = [
577            "+", "-", "*", "/", "%", "&&", "||", "&", "|", "^", "<<", ">>",
578        ];
579        binary_ops.iter().any(|op| line.contains(op)) && !line.contains('=') && !line.contains('(')
580    }
581
582    /// Check if line contains unary operations
583    fn is_unary_operation(&self, line: &str) -> bool {
584        let unary_patterns = [
585            Regex::new(r"\+\+\w+").unwrap(),
586            Regex::new(r"\w\+\+").unwrap(),
587            Regex::new(r"--\w+").unwrap(),
588            Regex::new(r"\w--").unwrap(),
589            Regex::new(r"!\w+").unwrap(),
590            Regex::new(r"~\w+").unwrap(),
591        ];
592
593        unary_patterns.iter().any(|pattern| pattern.is_match(line))
594    }
595
596    /// Check if line is a literal value
597    fn is_literal(&self, line: &str) -> bool {
598        let literal_patterns = [
599            Regex::new(r"^\s*\d+\s*;?\s*$").unwrap(),
600            Regex::new(r"^\s*\d+\.\d+\s*;?\s*$").unwrap(),
601            Regex::new(r#"^\s*"[^"]*"\s*;?\s*$"#).unwrap(),
602            Regex::new(r"^\s*'[^']*'\s*;?\s*$").unwrap(),
603            Regex::new(r"^\s*(true|false)\s*;?\s*$").unwrap(),
604            Regex::new(r"^\s*(nullptr|NULL)\s*;?\s*$").unwrap(),
605        ];
606
607        literal_patterns
608            .iter()
609            .any(|pattern| pattern.is_match(line))
610    }
611
612    /// Add a new expert rule at runtime
613    pub fn add_expert_rule(&mut self, pattern: &str, description: &str) -> Result<()> {
614        let regex = Regex::new(pattern)?;
615        self.expert.arid_function_patterns.push(regex);
616        println!("Added expert rule: {} ({})", pattern, description);
617        Ok(())
618    }
619
620    /// Get statistics about arid node detection
621    pub fn get_stats(&self) -> HashMap<String, usize> {
622        let mut stats = HashMap::new();
623        stats.insert(
624            "total_expert_rules".to_string(),
625            self.expert.arid_function_patterns.len()
626                + self.expert.arid_variable_patterns.len()
627                + self.expert.arid_statement_patterns.len(),
628        );
629        stats.insert("cache_size".to_string(), self.cache.len());
630        stats.insert(
631            "function_patterns".to_string(),
632            self.expert.arid_function_patterns.len(),
633        );
634        stats.insert(
635            "variable_patterns".to_string(),
636            self.expert.arid_variable_patterns.len(),
637        );
638        stats.insert(
639            "statement_patterns".to_string(),
640            self.expert.arid_statement_patterns.len(),
641        );
642        stats
643    }
644
645    /// Export detailed analysis of which lines were filtered and why
646    #[allow(dead_code)]
647    pub fn analyze_file_detailed(&mut self, file_content: &str) -> DetailedAnalysis {
648        let lines: Vec<String> = file_content.lines().map(|s| s.to_string()).collect();
649        let mut analysis = DetailedAnalysis::new();
650
651        for (idx, line) in lines.iter().enumerate() {
652            let line_number = idx + 1;
653            let should_mutate = self.should_mutate_line_with_context(&lines, idx);
654            let node = self.parse_line_to_simple_ast(line, line_number);
655            let is_arid = !should_mutate;
656            let reason = if is_arid {
657                self.get_arid_reason(&node, &lines, idx)
658            } else {
659                "Not arid - will be mutated".to_string()
660            };
661
662            analysis.add_line_analysis(LineAnalysis {
663                line_number,
664                content: line.to_string(),
665                node_type: node.node_type,
666                is_arid,
667                reason,
668            });
669        }
670
671        analysis
672    }
673
674    /// Get the reason why a node is considered arid
675    #[allow(dead_code)]
676    fn get_arid_reason(&self, node: &AstNode, _lines: &[String], _line_index: usize) -> String {
677        // Check if this is a control structure with arid body
678        if matches!(
679            node.node_type,
680            AstNodeType::IfStatement | AstNodeType::ForLoop | AstNodeType::WhileLoop
681        ) {
682            return "Control structure with arid body (logging/debugging only)".to_string();
683        }
684
685        if !node.is_simple() {
686            return "Compound node - arid if all children are arid".to_string();
687        }
688
689        let content = &node.content;
690
691        // Check function call patterns
692        if matches!(node.node_type, AstNodeType::FunctionCall) {
693            for (idx, pattern) in self.expert.arid_function_patterns.iter().enumerate() {
694                if pattern.is_match(content) {
695                    return format!(
696                        "Matches arid function pattern #{}: {}",
697                        idx + 1,
698                        pattern.as_str()
699                    );
700                }
701            }
702        }
703
704        // Check variable patterns
705        if matches!(
706            node.node_type,
707            AstNodeType::VariableDeclaration | AstNodeType::Assignment
708        ) {
709            for (idx, pattern) in self.expert.arid_variable_patterns.iter().enumerate() {
710                if pattern.is_match(content) {
711                    return format!(
712                        "Matches arid variable pattern #{}: {}",
713                        idx + 1,
714                        pattern.as_str()
715                    );
716                }
717            }
718        }
719
720        // Check statement patterns
721        for (idx, pattern) in self.expert.arid_statement_patterns.iter().enumerate() {
722            if pattern.is_match(content) {
723                return format!(
724                    "Matches arid statement pattern #{}: {}",
725                    idx + 1,
726                    pattern.as_str()
727                );
728            }
729        }
730
731        "Not arid".to_string()
732    }
733
734    /// Clear the cache (useful for testing or when rules change)
735    #[allow(dead_code)]
736    pub fn clear_cache(&mut self) {
737        self.cache.clear();
738    }
739}
740
741/// Detailed analysis results for a file
742#[allow(dead_code)]
743#[derive(Debug)]
744pub struct DetailedAnalysis {
745    pub lines: Vec<LineAnalysis>,
746    pub summary: AnalysisSummary,
747}
748
749#[allow(dead_code)]
750impl DetailedAnalysis {
751    pub fn new() -> Self {
752        Self {
753            lines: Vec::new(),
754            summary: AnalysisSummary::default(),
755        }
756    }
757
758    pub fn add_line_analysis(&mut self, analysis: LineAnalysis) {
759        if analysis.is_arid {
760            self.summary.arid_lines += 1;
761        } else {
762            self.summary.mutatable_lines += 1;
763        }
764        self.summary.total_lines += 1;
765        self.lines.push(analysis);
766    }
767
768    pub fn print_summary(&self) {
769        println!("\n=== AST Analysis Summary ===");
770        println!("Total lines: {}", self.summary.total_lines);
771        println!("Mutatable lines: {}", self.summary.mutatable_lines);
772        println!("Arid lines: {}", self.summary.arid_lines);
773        println!(
774            "Filtering efficiency: {:.1}% reduction",
775            (self.summary.arid_lines as f64 / self.summary.total_lines as f64) * 100.0
776        );
777    }
778
779    pub fn print_arid_lines(&self) {
780        println!("\n=== Filtered Out (Arid) Lines ===");
781        for line in &self.lines {
782            if line.is_arid {
783                println!(
784                    "Line {}: {} | Reason: {}",
785                    line.line_number,
786                    line.content.trim(),
787                    line.reason
788                );
789            }
790        }
791    }
792}
793
794/// Analysis of a single line
795#[allow(dead_code)]
796#[derive(Debug)]
797pub struct LineAnalysis {
798    pub line_number: usize,
799    pub content: String,
800    pub node_type: AstNodeType,
801    pub is_arid: bool,
802    pub reason: String,
803}
804
805/// Summary statistics for analysis
806#[allow(dead_code)]
807#[derive(Debug, Default)]
808pub struct AnalysisSummary {
809    pub total_lines: usize,
810    pub mutatable_lines: usize,
811    pub arid_lines: usize,
812}
813
814/// Integration with existing mutation system - context-aware version
815pub fn filter_mutatable_lines(lines: &[String], detector: &mut AridNodeDetector) -> Vec<usize> {
816    lines
817        .iter()
818        .enumerate()
819        .filter_map(|(idx, _line)| {
820            let line_number = idx + 1;
821            if detector.should_mutate_line_with_context(lines, idx) {
822                Some(line_number)
823            } else {
824                None
825            }
826        })
827        .collect()
828}
829
830#[cfg(test)]
831mod tests {
832    use super::*;
833
834    #[test]
835    fn test_expert_knowledge() {
836        let expert = ExpertKnowledge::new().unwrap();
837
838        // Test arid function calls
839        let reserve_node = AstNode::new(
840            AstNodeType::FunctionCall,
841            "vec.reserve(100)".to_string(),
842            1,
843            0,
844            15,
845        );
846        assert!(expert.is_arid_simple_node(&reserve_node));
847
848        // Test non-arid function calls
849        let normal_node = AstNode::new(
850            AstNodeType::FunctionCall,
851            "calculate_sum(a, b)".to_string(),
852            1,
853            0,
854            18,
855        );
856        assert!(!expert.is_arid_simple_node(&normal_node));
857
858        // Test LogDebug function call
859        let log_debug_node = AstNode::new(
860            AstNodeType::FunctionCall,
861            "LogDebug(BCLog::ADDRMAN, \"test\");".to_string(),
862            1,
863            0,
864            30,
865        );
866        assert!(expert.is_arid_simple_node(&log_debug_node), "LogDebug should be recognized as arid");
867    }
868
869    #[test]
870    fn test_arid_detection_algorithm() {
871        let mut detector = AridNodeDetector::new().unwrap();
872
873        // Test simple arid node
874        let arid_simple = AstNode::new(
875            AstNodeType::FunctionCall,
876            "std::cout << \"debug\"".to_string(),
877            1,
878            0,
879            20,
880        );
881        assert!(detector.is_arid(&arid_simple));
882
883        // Test compound node with all arid children
884        let mut compound_arid =
885            AstNode::new(AstNodeType::Block, "{ debug block }".to_string(), 1, 0, 15);
886        compound_arid.add_child(arid_simple.clone());
887        assert!(detector.is_arid(&compound_arid));
888
889        // Test compound node with non-arid child
890        let non_arid_simple = AstNode::new(
891            AstNodeType::FunctionCall,
892            "important_function()".to_string(),
893            2,
894            0,
895            20,
896        );
897        let mut compound_mixed =
898            AstNode::new(AstNodeType::Block, "{ mixed block }".to_string(), 1, 0, 15);
899        compound_mixed.add_child(arid_simple);
900        compound_mixed.add_child(non_arid_simple);
901        assert!(!detector.is_arid(&compound_mixed));
902    }
903
904    #[test]
905    fn test_line_mutation_filtering() {
906        let mut detector = AridNodeDetector::new().unwrap();
907
908        let lines = vec![
909            "int x = 5;".to_string(),              // Should mutate
910            "std::cout << \"debug\";".to_string(), // Should NOT mutate (arid)
911            "vec.reserve(100);".to_string(),       // Should NOT mutate (arid)
912            "return x + y;".to_string(),           // Should mutate
913        ];
914
915        let mutatable_lines = filter_mutatable_lines(&lines, &mut detector);
916
917        // Should only include lines 1 and 4
918        assert_eq!(mutatable_lines, vec![1, 4]);
919    }
920
921    #[test]
922    fn test_if_statement_with_logging() {
923        let mut detector = AridNodeDetector::new().unwrap();
924
925        let lines = vec![
926            "if (!restore_bucketing) {".to_string(),
927            "    LogDebug(BCLog::ADDRMAN, \"Bucketing method was updated, re-bucketing addrman entries from disk\\n\");".to_string(),
928            "}".to_string(),
929        ];
930
931        // First, let's test that LogDebug itself is recognized as arid
932        let log_line = lines[1].trim();
933        let log_node = detector.parse_line_to_simple_ast(log_line, 2);
934        assert_eq!(log_node.node_type, AstNodeType::FunctionCall, "LogDebug line should be classified as FunctionCall");
935        assert!(detector.is_arid(&log_node), "LogDebug should be recognized as arid");
936
937        let mutatable_lines = filter_mutatable_lines(&lines, &mut detector);
938
939        // The if statement should NOT be mutated because it only contains logging
940        // Lines 2 (LogDebug) and 3 (closing brace) also should not be mutated
941        assert!(
942            mutatable_lines.is_empty(),
943            "Expected no mutatable lines, got: {:?}",
944            mutatable_lines
945        );
946    }
947
948    #[test]
949    fn test_if_statement_with_non_arid_body() {
950        let mut detector = AridNodeDetector::new().unwrap();
951
952        let lines = vec![
953            "if (condition) {".to_string(),
954            "    x = x + 1;".to_string(),
955            "}".to_string(),
956        ];
957
958        let mutatable_lines = filter_mutatable_lines(&lines, &mut detector);
959
960        // The if statement and the assignment should be mutated
961        assert!(
962            mutatable_lines.contains(&1),
963            "If statement should be mutatable"
964        );
965        assert!(
966            mutatable_lines.contains(&2),
967            "Assignment should be mutatable"
968        );
969    }
970
971    #[test]
972    fn test_if_statement_mixed_body() {
973        let mut detector = AridNodeDetector::new().unwrap();
974
975        let lines = vec![
976            "if (condition) {".to_string(),
977            "    LogDebug(BCLog::TEST, \"debug\");".to_string(),
978            "    x = x + 1;".to_string(),
979            "}".to_string(),
980        ];
981
982        let mutatable_lines = filter_mutatable_lines(&lines, &mut detector);
983
984        // The if statement should be mutated because it has non-arid content
985        assert!(
986            mutatable_lines.contains(&1),
987            "If statement with mixed body should be mutable"
988        );
989        assert!(
990            mutatable_lines.contains(&3),
991            "Non-arid line in body should be mutable"
992        );
993    }
994}