1use crate::error::Result;
2use regex::Regex;
3use std::collections::HashMap;
4
5#[derive(Debug, Clone, PartialEq)]
7pub enum AstNodeType {
8 FunctionCall,
10 VariableDeclaration,
11 Assignment,
12 Literal,
13 Identifier,
14 BinaryOperator,
15 UnaryOperator,
16
17 IfStatement,
19 ForLoop,
20 WhileLoop,
21 Block,
22 Function,
23 Class,
24 Namespace,
25}
26
27#[derive(Debug, Clone)]
29pub struct AstNode {
30 pub node_type: AstNodeType,
31 pub content: String,
32 pub line_number: usize,
33 pub column_start: usize,
34 #[allow(dead_code)]
35 pub column_end: usize,
36 pub children: Vec<AstNode>,
37}
38
39impl AstNode {
40 pub fn new(
41 node_type: AstNodeType,
42 content: String,
43 line_number: usize,
44 column_start: usize,
45 column_end: usize,
46 ) -> Self {
47 Self {
48 node_type,
49 content,
50 line_number,
51 column_start,
52 column_end,
53 children: Vec::new(),
54 }
55 }
56
57 #[allow(dead_code)]
58 pub fn add_child(&mut self, child: AstNode) {
59 self.children.push(child);
60 }
61
62 pub fn is_simple(&self) -> bool {
63 matches!(
64 self.node_type,
65 AstNodeType::FunctionCall
66 | AstNodeType::VariableDeclaration
67 | AstNodeType::Assignment
68 | AstNodeType::Literal
69 | AstNodeType::Identifier
70 | AstNodeType::BinaryOperator
71 | AstNodeType::UnaryOperator
72 )
73 }
74
75 #[allow(dead_code)]
76 pub fn is_compound(&self) -> bool {
77 !self.is_simple()
78 }
79}
80
81pub struct ExpertKnowledge {
83 arid_function_patterns: Vec<Regex>,
84 arid_variable_patterns: Vec<Regex>,
85 arid_statement_patterns: Vec<Regex>,
86 arid_namespace_patterns: Vec<Regex>,
87}
88
89impl ExpertKnowledge {
90 pub fn new() -> Result<Self> {
91 let arid_function_patterns = vec![
92 Regex::new(r"std::vector<.*>::reserve")?,
94 Regex::new(r"std::vector<.*>::resize")?,
95 Regex::new(r"std::.*::reserve")?,
96 Regex::new(r"\.reserve\s*\(")?,
97 Regex::new(r"\.resize\s*\(")?,
98 Regex::new(r"std::cout\s*<<")?,
100 Regex::new(r"std::cerr\s*<<")?,
101 Regex::new(r"printf\s*\(")?,
102 Regex::new(r"fprintf\s*\(")?,
103 Regex::new(r"std::endl")?,
104 Regex::new(r"LogPrintf\s*\(")?,
106 Regex::new(r"LogPrint\s*\(")?,
107 Regex::new(r"log\.|logger\.|logging\.")?,
108 Regex::new(r"assert\s*\(")?,
110 Regex::new(r"DEBUG_")?,
111 Regex::new(r"TRACE_")?,
112 Regex::new(r"G_FUZZING")?,
114 Regex::new(r"fPrintToConsole")?,
115 Regex::new(r"strprintf\s*\(")?,
116 Regex::new(r"malloc\s*\(")?,
118 Regex::new(r"calloc\s*\(")?,
119 Regex::new(r"realloc\s*\(")?,
120 Regex::new(r"free\s*\(")?,
121 Regex::new(r"std::thread")?,
123 Regex::new(r"std::mutex")?,
124 Regex::new(r"std::lock_guard")?,
125 Regex::new(r"\.now\(\)")?,
127 Regex::new(r"steady_clock")?,
128 Regex::new(r"high_resolution_clock")?,
129 ];
130
131 let arid_variable_patterns = vec![
132 Regex::new(r".*_time$")?,
134 Regex::new(r".*_duration$")?,
135 Regex::new(r".*_start$")?,
136 Regex::new(r".*_end$")?,
137 Regex::new(r".*_debug$")?,
139 Regex::new(r".*_log$")?,
140 Regex::new(r".*_trace$")?,
141 Regex::new(r"temp_.*")?,
143 Regex::new(r"tmp_.*")?,
144 Regex::new(r"scratch_.*")?,
145 ];
146
147 let arid_statement_patterns = vec![
148 Regex::new(r"^\s*//")?,
150 Regex::new(r"^\s*/\*")?,
151 Regex::new(r"^\s*#")?,
153 Regex::new(r"^\s*;")?,
155 Regex::new(r"^\s*namespace\s+")?,
157 Regex::new(r"^\s*using\s+namespace\s+")?,
158 Regex::new(r"^\s*class\s+\w+\s*;")?,
160 Regex::new(r"^\s*struct\s+\w+\s*;")?,
161 ];
162
163 let arid_namespace_patterns = vec![
164 Regex::new(r"std::")?,
166 Regex::new(r"boost::")?,
168 Regex::new(r"testing::")?,
170 Regex::new(r"gtest::")?,
171 ];
172
173 Ok(Self {
174 arid_function_patterns,
175 arid_variable_patterns,
176 arid_statement_patterns,
177 arid_namespace_patterns,
178 })
179 }
180
181 pub fn is_arid_simple_node(&self, node: &AstNode) -> bool {
183 if !node.is_simple() {
184 return false;
185 }
186
187 let content = &node.content;
188
189 if matches!(node.node_type, AstNodeType::FunctionCall) {
191 for pattern in &self.arid_function_patterns {
192 if pattern.is_match(content) {
193 return true;
194 }
195 }
196 }
197
198 if matches!(
200 node.node_type,
201 AstNodeType::VariableDeclaration | AstNodeType::Assignment
202 ) {
203 for pattern in &self.arid_variable_patterns {
204 if pattern.is_match(content) {
205 return true;
206 }
207 }
208 }
209
210 for pattern in &self.arid_statement_patterns {
212 if pattern.is_match(content) {
213 return true;
214 }
215 }
216
217 for pattern in &self.arid_namespace_patterns {
219 if pattern.is_match(content) {
220 return true;
221 }
222 }
223
224 false
225 }
226}
227
228pub struct AridNodeDetector {
230 expert: ExpertKnowledge,
231 cache: HashMap<String, bool>,
232}
233
234impl AridNodeDetector {
235 pub fn new() -> Result<Self> {
236 Ok(Self {
237 expert: ExpertKnowledge::new()?,
238 cache: HashMap::new(),
239 })
240 }
241
242 pub fn is_arid(&mut self, node: &AstNode) -> bool {
246 let cache_key = format!(
248 "{}:{}:{}",
249 node.line_number, node.column_start, node.content
250 );
251
252 if let Some(&cached_result) = self.cache.get(&cache_key) {
253 return cached_result;
254 }
255
256 let result = if node.is_simple() {
257 self.expert.is_arid_simple_node(node)
259 } else {
260 if node.children.is_empty() {
262 false
264 } else {
265 node.children.iter().all(|child| self.is_arid(child))
267 }
268 };
269
270 self.cache.insert(cache_key, result);
272 result
273 }
274
275 pub fn should_mutate_line(&mut self, line_content: &str, line_number: usize) -> bool {
277 let node = self.parse_line_to_simple_ast(line_content, line_number);
279 !self.is_arid(&node)
280 }
281
282 fn parse_line_to_simple_ast(&self, line_content: &str, line_number: usize) -> AstNode {
284 let trimmed = line_content.trim();
285
286 if trimmed.is_empty() || trimmed.starts_with("//") || trimmed.starts_with("/*") {
288 return AstNode::new(
289 AstNodeType::Identifier,
290 trimmed.to_string(),
291 line_number,
292 0,
293 line_content.len(),
294 );
295 }
296
297 let node_type = self.classify_line(trimmed);
299
300 AstNode::new(
301 node_type,
302 trimmed.to_string(),
303 line_number,
304 0,
305 line_content.len(),
306 )
307 }
308
309 fn classify_line(&self, line: &str) -> AstNodeType {
311 if line.starts_with("namespace ") || line.contains("using namespace") {
313 return AstNodeType::Namespace;
314 }
315
316 if line.starts_with("class ") || line.starts_with("struct ") {
318 return AstNodeType::Class;
319 }
320
321 if self.is_function_declaration(line) {
323 return AstNodeType::Function;
324 }
325
326 if line.starts_with("if ") || line.starts_with("if(") || line.contains("} else ") {
328 return AstNodeType::IfStatement;
329 }
330 if line.starts_with("for ") || line.starts_with("for(") {
331 return AstNodeType::ForLoop;
332 }
333 if line.starts_with("while ") || line.starts_with("while(") {
334 return AstNodeType::WhileLoop;
335 }
336
337 if line == "{" || line == "}" || line.ends_with(" {") {
339 return AstNodeType::Block;
340 }
341
342 if self.is_variable_declaration(line) {
344 return AstNodeType::VariableDeclaration;
345 }
346
347 if self.is_assignment(line) {
349 return AstNodeType::Assignment;
350 }
351
352 if self.is_function_call(line) {
354 return AstNodeType::FunctionCall;
355 }
356
357 if self.is_binary_operation(line) {
359 return AstNodeType::BinaryOperator;
360 }
361
362 if self.is_unary_operation(line) {
364 return AstNodeType::UnaryOperator;
365 }
366
367 if self.is_literal(line) {
369 return AstNodeType::Literal;
370 }
371
372 AstNodeType::Identifier
374 }
375
376 fn is_function_declaration(&self, line: &str) -> bool {
378 let function_patterns = [
379 Regex::new(r"^\s*\w+\s+\w+\s*\([^)]*\)\s*[{;]?").unwrap(),
380 Regex::new(r"^\s*~?\w+\s*\([^)]*\)\s*[{;:]").unwrap(),
381 Regex::new(r"^\s*(?:template\s*<[^>]*>\s*)?(?:virtual\s+|static\s+|inline\s+)*[\w:<>*&\s]+\s+\w+\s*\([^)]*\)").unwrap(),
382 ];
383
384 function_patterns
385 .iter()
386 .any(|pattern| pattern.is_match(line))
387 && !line.contains('=')
388 }
389
390 fn is_variable_declaration(&self, line: &str) -> bool {
392 let var_patterns = [
393 Regex::new(r"^\s*(int|bool|char|float|double|long|short|unsigned|signed)\s+\w+")
394 .unwrap(),
395 Regex::new(r"^\s*std::\w+\s*<?[^>]*>?\s+\w+").unwrap(),
396 Regex::new(r"^\s*[A-Z]\w*\s+\w+").unwrap(),
397 Regex::new(r"^\s*\w+\s*[*&]+\s*\w+").unwrap(),
398 Regex::new(r"^\s*const\s+\w+").unwrap(),
399 Regex::new(r"^\s*auto\s+\w+").unwrap(),
400 ];
401
402 var_patterns.iter().any(|pattern| pattern.is_match(line))
403 && !line.contains('(')
404 && (line.contains('=') || line.ends_with(';'))
405 }
406
407 fn is_assignment(&self, line: &str) -> bool {
409 line.contains('=')
410 && !line.contains("==")
411 && !line.contains("!=")
412 && !line.contains("<=")
413 && !line.contains(">=")
414 && !self.is_variable_declaration(line)
415 }
416
417 fn is_function_call(&self, line: &str) -> bool {
419 line.contains('(')
420 && line.contains(')')
421 && !self.is_function_declaration(line)
422 && !self.is_variable_declaration(line)
423 && !line.starts_with("if ")
424 && !line.starts_with("while ")
425 && !line.starts_with("for ")
426 }
427
428 fn is_binary_operation(&self, line: &str) -> bool {
430 let binary_ops = [
431 "+", "-", "*", "/", "%", "&&", "||", "&", "|", "^", "<<", ">>",
432 ];
433 binary_ops.iter().any(|op| line.contains(op)) && !line.contains('=') && !line.contains('(')
434 }
435
436 fn is_unary_operation(&self, line: &str) -> bool {
438 let unary_patterns = [
439 Regex::new(r"\+\+\w+").unwrap(),
440 Regex::new(r"\w\+\+").unwrap(),
441 Regex::new(r"--\w+").unwrap(),
442 Regex::new(r"\w--").unwrap(),
443 Regex::new(r"!\w+").unwrap(),
444 Regex::new(r"~\w+").unwrap(),
445 ];
446
447 unary_patterns.iter().any(|pattern| pattern.is_match(line))
448 }
449
450 fn is_literal(&self, line: &str) -> bool {
452 let literal_patterns = [
453 Regex::new(r"^\s*\d+\s*;?\s*$").unwrap(),
454 Regex::new(r"^\s*\d+\.\d+\s*;?\s*$").unwrap(),
455 Regex::new(r#"^\s*"[^"]*"\s*;?\s*$"#).unwrap(),
456 Regex::new(r"^\s*'[^']*'\s*;?\s*$").unwrap(),
457 Regex::new(r"^\s*(true|false)\s*;?\s*$").unwrap(),
458 Regex::new(r"^\s*(nullptr|NULL)\s*;?\s*$").unwrap(),
459 ];
460
461 literal_patterns
462 .iter()
463 .any(|pattern| pattern.is_match(line))
464 }
465
466 pub fn add_expert_rule(&mut self, pattern: &str, description: &str) -> Result<()> {
468 let regex = Regex::new(pattern)?;
469 self.expert.arid_function_patterns.push(regex);
470 println!("Added expert rule: {} ({})", pattern, description);
471 Ok(())
472 }
473
474 pub fn get_stats(&self) -> HashMap<String, usize> {
476 let mut stats = HashMap::new();
477 stats.insert(
478 "total_expert_rules".to_string(),
479 self.expert.arid_function_patterns.len()
480 + self.expert.arid_variable_patterns.len()
481 + self.expert.arid_statement_patterns.len(),
482 );
483 stats.insert("cache_size".to_string(), self.cache.len());
484 stats.insert(
485 "function_patterns".to_string(),
486 self.expert.arid_function_patterns.len(),
487 );
488 stats.insert(
489 "variable_patterns".to_string(),
490 self.expert.arid_variable_patterns.len(),
491 );
492 stats.insert(
493 "statement_patterns".to_string(),
494 self.expert.arid_statement_patterns.len(),
495 );
496 stats
497 }
498
499 #[allow(dead_code)]
501 pub fn analyze_file_detailed(&mut self, file_content: &str) -> DetailedAnalysis {
502 let lines: Vec<&str> = file_content.lines().collect();
503 let mut analysis = DetailedAnalysis::new();
504
505 for (idx, line) in lines.iter().enumerate() {
506 let line_number = idx + 1;
507 let node = self.parse_line_to_simple_ast(line, line_number);
508 let is_arid = self.is_arid(&node);
509 let reason = if is_arid {
510 self.get_arid_reason(&node)
511 } else {
512 "Not arid - will be mutated".to_string()
513 };
514
515 analysis.add_line_analysis(LineAnalysis {
516 line_number,
517 content: line.to_string(),
518 node_type: node.node_type,
519 is_arid,
520 reason,
521 });
522 }
523
524 analysis
525 }
526
527 #[allow(dead_code)]
529 fn get_arid_reason(&self, node: &AstNode) -> String {
530 if !node.is_simple() {
531 return "Compound node - arid if all children are arid".to_string();
532 }
533
534 let content = &node.content;
535
536 if matches!(node.node_type, AstNodeType::FunctionCall) {
538 for (idx, pattern) in self.expert.arid_function_patterns.iter().enumerate() {
539 if pattern.is_match(content) {
540 return format!(
541 "Matches arid function pattern #{}: {}",
542 idx + 1,
543 pattern.as_str()
544 );
545 }
546 }
547 }
548
549 if matches!(
551 node.node_type,
552 AstNodeType::VariableDeclaration | AstNodeType::Assignment
553 ) {
554 for (idx, pattern) in self.expert.arid_variable_patterns.iter().enumerate() {
555 if pattern.is_match(content) {
556 return format!(
557 "Matches arid variable pattern #{}: {}",
558 idx + 1,
559 pattern.as_str()
560 );
561 }
562 }
563 }
564
565 for (idx, pattern) in self.expert.arid_statement_patterns.iter().enumerate() {
567 if pattern.is_match(content) {
568 return format!(
569 "Matches arid statement pattern #{}: {}",
570 idx + 1,
571 pattern.as_str()
572 );
573 }
574 }
575
576 "Not arid".to_string()
577 }
578
579 #[allow(dead_code)]
581 pub fn clear_cache(&mut self) {
582 self.cache.clear();
583 }
584}
585
586#[allow(dead_code)]
588#[derive(Debug)]
589pub struct DetailedAnalysis {
590 pub lines: Vec<LineAnalysis>,
591 pub summary: AnalysisSummary,
592}
593
594#[allow(dead_code)]
595impl DetailedAnalysis {
596 pub fn new() -> Self {
597 Self {
598 lines: Vec::new(),
599 summary: AnalysisSummary::default(),
600 }
601 }
602
603 pub fn add_line_analysis(&mut self, analysis: LineAnalysis) {
604 if analysis.is_arid {
605 self.summary.arid_lines += 1;
606 } else {
607 self.summary.mutatable_lines += 1;
608 }
609 self.summary.total_lines += 1;
610 self.lines.push(analysis);
611 }
612
613 pub fn print_summary(&self) {
614 println!("\n=== AST Analysis Summary ===");
615 println!("Total lines: {}", self.summary.total_lines);
616 println!("Mutatable lines: {}", self.summary.mutatable_lines);
617 println!("Arid lines: {}", self.summary.arid_lines);
618 println!(
619 "Filtering efficiency: {:.1}% reduction",
620 (self.summary.arid_lines as f64 / self.summary.total_lines as f64) * 100.0
621 );
622 }
623
624 pub fn print_arid_lines(&self) {
625 println!("\n=== Filtered Out (Arid) Lines ===");
626 for line in &self.lines {
627 if line.is_arid {
628 println!(
629 "Line {}: {} | Reason: {}",
630 line.line_number,
631 line.content.trim(),
632 line.reason
633 );
634 }
635 }
636 }
637}
638
639#[allow(dead_code)]
641#[derive(Debug)]
642pub struct LineAnalysis {
643 pub line_number: usize,
644 pub content: String,
645 pub node_type: AstNodeType,
646 pub is_arid: bool,
647 pub reason: String,
648}
649
650#[allow(dead_code)]
652#[derive(Debug, Default)]
653pub struct AnalysisSummary {
654 pub total_lines: usize,
655 pub mutatable_lines: usize,
656 pub arid_lines: usize,
657}
658
659pub fn filter_mutatable_lines(lines: &[String], detector: &mut AridNodeDetector) -> Vec<usize> {
661 lines
662 .iter()
663 .enumerate()
664 .filter_map(|(idx, line)| {
665 let line_number = idx + 1;
666 if detector.should_mutate_line(line, line_number) {
667 Some(line_number)
668 } else {
669 None
670 }
671 })
672 .collect()
673}
674
675#[cfg(test)]
676mod tests {
677 use super::*;
678
679 #[test]
680 fn test_expert_knowledge() {
681 let expert = ExpertKnowledge::new().unwrap();
682
683 let reserve_node = AstNode::new(
685 AstNodeType::FunctionCall,
686 "vec.reserve(100)".to_string(),
687 1,
688 0,
689 15,
690 );
691 assert!(expert.is_arid_simple_node(&reserve_node));
692
693 let normal_node = AstNode::new(
695 AstNodeType::FunctionCall,
696 "calculate_sum(a, b)".to_string(),
697 1,
698 0,
699 18,
700 );
701 assert!(!expert.is_arid_simple_node(&normal_node));
702 }
703
704 #[test]
705 fn test_arid_detection_algorithm() {
706 let mut detector = AridNodeDetector::new().unwrap();
707
708 let arid_simple = AstNode::new(
710 AstNodeType::FunctionCall,
711 "std::cout << \"debug\"".to_string(),
712 1,
713 0,
714 20,
715 );
716 assert!(detector.is_arid(&arid_simple));
717
718 let mut compound_arid =
720 AstNode::new(AstNodeType::Block, "{ debug block }".to_string(), 1, 0, 15);
721 compound_arid.add_child(arid_simple.clone());
722 assert!(detector.is_arid(&compound_arid));
723
724 let non_arid_simple = AstNode::new(
726 AstNodeType::FunctionCall,
727 "important_function()".to_string(),
728 2,
729 0,
730 20,
731 );
732 let mut compound_mixed =
733 AstNode::new(AstNodeType::Block, "{ mixed block }".to_string(), 1, 0, 15);
734 compound_mixed.add_child(arid_simple);
735 compound_mixed.add_child(non_arid_simple);
736 assert!(!detector.is_arid(&compound_mixed));
737 }
738
739 #[test]
740 fn test_line_mutation_filtering() {
741 let mut detector = AridNodeDetector::new().unwrap();
742
743 let lines = vec![
744 "int x = 5;".to_string(), "std::cout << \"debug\";".to_string(), "vec.reserve(100);".to_string(), "return x + y;".to_string(), ];
749
750 let mutatable_lines = filter_mutatable_lines(&lines, &mut detector);
751
752 assert_eq!(mutatable_lines, vec![1, 4]);
754 }
755}