1use crate::{IntakeError, IntakeResult, IntakeSourceType};
4use regex_lite::Regex;
5use serde::{Deserialize, Serialize};
6
7#[derive(Clone, Debug, Serialize, Deserialize)]
9pub struct ExtractedSignal {
10 pub signal_id: String,
12
13 pub content: String,
15
16 pub signal_type: SignalType,
18
19 pub confidence: f32,
21
22 pub source: String,
24}
25
26#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
28#[serde(rename_all = "snake_case")]
29pub enum SignalType {
30 CompilerError,
32 RuntimeError,
34 TestFailure,
36 Performance,
38 Security,
40 ConfigError,
42 GenericError,
44}
45
46impl std::fmt::Display for SignalType {
47 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48 match self {
49 SignalType::CompilerError => write!(f, "compiler_error"),
50 SignalType::RuntimeError => write!(f, "runtime_error"),
51 SignalType::TestFailure => write!(f, "test_failure"),
52 SignalType::Performance => write!(f, "performance"),
53 SignalType::Security => write!(f, "security"),
54 SignalType::ConfigError => write!(f, "config_error"),
55 SignalType::GenericError => write!(f, "generic_error"),
56 }
57 }
58}
59
60pub struct SignalExtractor {
62 min_confidence: f32,
64
65 compiler_patterns: Vec<(&'static str, Regex)>,
67
68 runtime_patterns: Vec<(&'static str, Regex)>,
70
71 test_patterns: Vec<(&'static str, Regex)>,
73
74 performance_patterns: Vec<(&'static str, Regex)>,
76
77 security_patterns: Vec<(&'static str, Regex)>,
79}
80
81impl SignalExtractor {
82 pub fn new(min_confidence: f32) -> Self {
84 Self {
85 min_confidence,
86 compiler_patterns: vec![
87 (
88 "borrow checker",
89 Regex::new(r"(?i)borrow.*(error|checker)").unwrap(),
90 ),
91 ("type mismatch", Regex::new(r"(?i)type.*mismatch").unwrap()),
92 (
93 "missing import",
94 Regex::new(r"(?i)(cannot find|missing).*(import|struct|function)").unwrap(),
95 ),
96 (
97 "unresolved import",
98 Regex::new(r"(?i)unresolved.*import").unwrap(),
99 ),
100 (
101 "unused",
102 Regex::new(r"(?i)unused.*(import|variable|function)").unwrap(),
103 ),
104 ],
105 runtime_patterns: vec![
106 ("timeout", Regex::new(r"(?i)timeout").unwrap()),
107 (
108 "connection refused",
109 Regex::new(r"(?i)(connection|connect).*(refused|failed)").unwrap(),
110 ),
111 (
112 "out of memory",
113 Regex::new(r"(?i)(out of memory|oom)").unwrap(),
114 ),
115 ("panic", Regex::new(r"(?i)panic").unwrap()),
116 (
117 "null pointer",
118 Regex::new(r"(?i)(null|nil).*pointer").unwrap(),
119 ),
120 ],
121 test_patterns: vec![
122 ("test failed", Regex::new(r"(?i)test.*failed").unwrap()),
123 (
124 "assertion failed",
125 Regex::new(r"(?i)assertion.*failed").unwrap(),
126 ),
127 (
128 "expected.*actual",
129 Regex::new(r"(?i)expected.*actual").unwrap(),
130 ),
131 ],
132 performance_patterns: vec![
133 (
134 "slow",
135 Regex::new(r"(?i)(slow|latency).*(than|exceed)").unwrap(),
136 ),
137 ("memory leak", Regex::new(r"(?i)memory.*leak").unwrap()),
138 (
139 "high cpu",
140 Regex::new(r"(?i)(high|cpu).*(usage|load)").unwrap(),
141 ),
142 ],
143 security_patterns: vec![
144 ("vulnerability", Regex::new(r"(?i)vulnerability").unwrap()),
145 ("injection", Regex::new(r"(?i)(sql|xss|injection)").unwrap()),
146 (
147 "auth failed",
148 Regex::new(r"(?i)(auth|permission).*(failed|denied)").unwrap(),
149 ),
150 ],
151 }
152 }
153
154 pub fn extract(&self, event: &crate::source::IntakeEvent) -> Vec<ExtractedSignal> {
156 let mut signals = Vec::new();
157
158 let text = format!("{}\n{}", event.title, event.description);
160
161 for (name, pattern) in &self.compiler_patterns {
163 if pattern.is_match(&text) {
164 signals.push(ExtractedSignal {
165 signal_id: uuid::Uuid::new_v4().to_string(),
166 content: format!("compiler_error:{}", name),
167 signal_type: SignalType::CompilerError,
168 confidence: 0.8,
169 source: event.source_type.to_string(),
170 });
171 }
172 }
173
174 for (name, pattern) in &self.runtime_patterns {
176 if pattern.is_match(&text) {
177 signals.push(ExtractedSignal {
178 signal_id: uuid::Uuid::new_v4().to_string(),
179 content: format!("runtime_error:{}", name),
180 signal_type: SignalType::RuntimeError,
181 confidence: 0.75,
182 source: event.source_type.to_string(),
183 });
184 }
185 }
186
187 for (name, pattern) in &self.test_patterns {
189 if pattern.is_match(&text) {
190 signals.push(ExtractedSignal {
191 signal_id: uuid::Uuid::new_v4().to_string(),
192 content: format!("test_failure:{}", name),
193 signal_type: SignalType::TestFailure,
194 confidence: 0.85,
195 source: event.source_type.to_string(),
196 });
197 }
198 }
199
200 for (name, pattern) in &self.performance_patterns {
202 if pattern.is_match(&text) {
203 signals.push(ExtractedSignal {
204 signal_id: uuid::Uuid::new_v4().to_string(),
205 content: format!("performance:{}", name),
206 signal_type: SignalType::Performance,
207 confidence: 0.7,
208 source: event.source_type.to_string(),
209 });
210 }
211 }
212
213 for (name, pattern) in &self.security_patterns {
215 if pattern.is_match(&text) {
216 signals.push(ExtractedSignal {
217 signal_id: uuid::Uuid::new_v4().to_string(),
218 content: format!("security:{}", name),
219 signal_type: SignalType::Security,
220 confidence: 0.9,
221 source: event.source_type.to_string(),
222 });
223 }
224 }
225
226 if signals.is_empty() {
228 let confidence = match event.severity {
229 crate::source::IssueSeverity::Critical => 0.9,
230 crate::source::IssueSeverity::High => 0.75,
231 crate::source::IssueSeverity::Medium => 0.5,
232 crate::source::IssueSeverity::Low => 0.35,
233 crate::source::IssueSeverity::Info => 0.2,
234 };
235
236 signals.push(ExtractedSignal {
237 signal_id: uuid::Uuid::new_v4().to_string(),
238 content: format!("issue:{}", event.title),
239 signal_type: SignalType::GenericError,
240 confidence,
241 source: event.source_type.to_string(),
242 });
243 }
244
245 signals.retain(|s| s.confidence >= self.min_confidence);
247
248 signals
249 }
250}
251
252impl Default for SignalExtractor {
253 fn default() -> Self {
254 Self::new(0.5)
255 }
256}
257
258#[cfg(test)]
259mod tests {
260 use super::*;
261 use crate::source::{IntakeEvent, IntakeSourceType, IssueSeverity};
262
263 #[test]
264 fn test_extract_compiler_error() {
265 let extractor = SignalExtractor::default();
266
267 let event = IntakeEvent {
268 event_id: "test-1".to_string(),
269 source_type: IntakeSourceType::Github,
270 source_event_id: None,
271 title: "Build failed".to_string(),
272 description: "error: borrow checker error in src/main.rs".to_string(),
273 severity: IssueSeverity::High,
274 signals: vec![],
275 raw_payload: None,
276 timestamp_ms: 0,
277 };
278
279 let signals = extractor.extract(&event);
280 assert!(!signals.is_empty());
281 assert!(signals
282 .iter()
283 .any(|s| s.signal_type == SignalType::CompilerError));
284 }
285
286 #[test]
287 fn test_extract_runtime_error() {
288 let extractor = SignalExtractor::default();
289
290 let event = IntakeEvent {
291 event_id: "test-2".to_string(),
292 source_type: IntakeSourceType::Gitlab,
293 source_event_id: None,
294 title: "Deployment failed".to_string(),
295 description: "Error: connection timeout to database".to_string(),
296 severity: IssueSeverity::High,
297 signals: vec![],
298 raw_payload: None,
299 timestamp_ms: 0,
300 };
301
302 let signals = extractor.extract(&event);
303 assert!(signals
304 .iter()
305 .any(|s| s.signal_type == SignalType::RuntimeError));
306 }
307
308 #[test]
309 fn test_min_confidence_filter() {
310 let extractor = SignalExtractor::new(0.8); let event = IntakeEvent {
313 event_id: "test-3".to_string(),
314 source_type: IntakeSourceType::Http,
315 source_event_id: None,
316 title: "Minor issue".to_string(),
317 description: "Some minor issue occurred".to_string(),
318 severity: IssueSeverity::Low,
319 signals: vec![],
320 raw_payload: None,
321 timestamp_ms: 0,
322 };
323
324 let signals = extractor.extract(&event);
325 for s in &signals {
327 assert!(s.confidence >= 0.8);
328 }
329 }
330}