1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
impl PatternExtractor {
/// Extract error handling patterns
fn extract_error_handling_patterns(
&self,
file_path: &Path,
content: &str,
collection: &mut PatternCollection,
) -> Result<()> {
use regex::Regex;
// Pattern: Result<T, E> handling
let result_pattern = Regex::new(r"(?m)^\s*(match|if let)\s+.*Result\s*<.*>\s*\{")
.expect("Hardcoded regex pattern must be valid");
let matches: Vec<_> = result_pattern.find_iter(content).collect();
if matches.len() > 1 {
self.group_by_structural_hash(
&matches,
content,
file_path,
PatternType::ErrorHandling,
3,
5,
collection,
);
}
Ok(())
}
/// Extract data validation patterns
fn extract_data_validation_patterns(
&self,
file_path: &Path,
content: &str,
collection: &mut PatternCollection,
) -> Result<()> {
use regex::Regex;
// Pattern: Input validation — only match multi-condition validation blocks,
// not standalone `.len()` / `.is_empty()` calls (those are standard Rust idioms).
// Require at least two chained conditions or comparisons on the same line.
let validation_pattern = Regex::new(
r"(?m)if\s+.*\.(is_empty|len|contains|starts_with|ends_with)\(.*(\&\&|\|\||\.len\(\)\s*[<>=])",
)
.expect("Hardcoded regex pattern must be valid");
let matches: Vec<_> = validation_pattern.find_iter(content).collect();
// Raised threshold: standalone validation calls are idiomatic, not duplication
if matches.len() > 5 {
self.group_by_structural_hash(
&matches,
content,
file_path,
PatternType::DataValidation,
3,
3,
collection,
);
}
Ok(())
}
/// Extract resource management patterns
fn extract_resource_management_patterns(
&self,
file_path: &Path,
content: &str,
collection: &mut PatternCollection,
) -> Result<()> {
use regex::Regex;
// Pattern: File/resource management (open/close, lock/unlock)
// Standalone `.lock()` calls on mutexes are idiomatic Rust, not duplication.
// Only flag when the same resource management sequence repeats.
let resource_pattern = Regex::new(r"(?m)\.(open|close|lock|unlock|acquire|release)\(\)")
.expect("Hardcoded regex pattern must be valid");
let matches: Vec<_> = resource_pattern.find_iter(content).collect();
// Raised threshold: individual .lock()/.open() calls are standard practice
if matches.len() > 5 {
self.group_by_structural_hash(
&matches,
content,
file_path,
PatternType::ResourceManagement,
3,
4,
collection,
);
}
Ok(())
}
/// Extract control flow patterns
fn extract_control_flow_patterns(
&self,
file_path: &Path,
content: &str,
collection: &mut PatternCollection,
) -> Result<()> {
use regex::Regex;
// Pattern: Complex if-else chains
let if_else_pattern =
Regex::new(r"(?m)^\s*}\s*else\s+if\s+").expect("Hardcoded regex pattern must be valid");
let matches: Vec<_> = if_else_pattern.find_iter(content).collect();
if matches.len() > 2 {
self.group_by_structural_hash(
&matches,
content,
file_path,
PatternType::ControlFlow,
3,
6,
collection,
);
}
Ok(())
}
/// Extract data transformation patterns
fn extract_data_transformation_patterns(
&self,
file_path: &Path,
content: &str,
collection: &mut PatternCollection,
) -> Result<()> {
use regex::Regex;
// Pattern: Iterator chains — only flag multi-step chains, not individual
// .map()/.collect() calls which are standard Rust idiom.
// Match chains of 2+ combinators: e.g. `.iter().map(...).collect()`
let iter_pattern = Regex::new(r"\.(map|filter|filter_map|flat_map|fold|reduce)\(")
.expect("Hardcoded regex pattern must be valid");
let matches: Vec<_> = iter_pattern.find_iter(content).collect();
// Raised threshold: individual iterator combinators are idiomatic Rust
if matches.len() > 8 {
self.group_by_structural_hash(
&matches,
content,
file_path,
PatternType::DataTransformation,
3,
2,
collection,
);
}
Ok(())
}
/// Extract API call patterns
fn extract_api_call_patterns(
&self,
file_path: &Path,
content: &str,
collection: &mut PatternCollection,
) -> Result<()> {
use regex::Regex;
// Pattern: HTTP/API calls (reqwest, fetch, etc.)
// Exclude bare `.get(` which matches HashMap/BTreeMap/Vec accessors.
// Only match qualified HTTP patterns (client., http., fetch(), .post, .put, .delete).
let api_pattern = Regex::new(r"(?m)(client\.|http\.|fetch\(|\.post\(|\.put\(|\.delete\()")
.expect("Hardcoded regex pattern must be valid");
let matches: Vec<_> = api_pattern.find_iter(content).collect();
// Raised threshold: isolated HTTP calls are not duplication
if matches.len() > 3 {
self.group_by_structural_hash(
&matches,
content,
file_path,
PatternType::ApiCall,
3,
3,
collection,
);
}
Ok(())
}
}