Skip to main content

oris_intake/
signal.rs

1//! Signal extraction from intake events
2
3use crate::{IntakeError, IntakeResult, IntakeSourceType};
4use regex_lite::Regex;
5use serde::{Deserialize, Serialize};
6
7/// A extracted signal from an intake event
8#[derive(Clone, Debug, Serialize, Deserialize)]
9pub struct ExtractedSignal {
10    /// Signal ID
11    pub signal_id: String,
12
13    /// Signal content/description
14    pub content: String,
15
16    /// Signal type
17    pub signal_type: SignalType,
18
19    /// Confidence score (0.0 - 1.0)
20    pub confidence: f32,
21
22    /// Source of the signal
23    pub source: String,
24}
25
26/// Types of signals that can be extracted
27#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
28#[serde(rename_all = "snake_case")]
29pub enum SignalType {
30    /// Compiler diagnostic signal
31    CompilerError,
32    /// Runtime error signal
33    RuntimeError,
34    /// Test failure signal
35    TestFailure,
36    /// Performance issue signal
37    Performance,
38    /// Security issue signal
39    Security,
40    /// Configuration issue signal
41    ConfigError,
42    /// Generic error signal
43    GenericError,
44}
45
46impl std::fmt::Display for SignalType {
47    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48        match self {
49            SignalType::CompilerError => write!(f, "compiler_error"),
50            SignalType::RuntimeError => write!(f, "runtime_error"),
51            SignalType::TestFailure => write!(f, "test_failure"),
52            SignalType::Performance => write!(f, "performance"),
53            SignalType::Security => write!(f, "security"),
54            SignalType::ConfigError => write!(f, "config_error"),
55            SignalType::GenericError => write!(f, "generic_error"),
56        }
57    }
58}
59
60/// Signal extractor for converting intake events to evolution signals
61pub struct SignalExtractor {
62    /// Minimum confidence threshold
63    min_confidence: f32,
64
65    /// Compiler error patterns
66    compiler_patterns: Vec<(&'static str, Regex)>,
67
68    /// Runtime error patterns
69    runtime_patterns: Vec<(&'static str, Regex)>,
70
71    /// Test failure patterns
72    test_patterns: Vec<(&'static str, Regex)>,
73
74    /// Performance issue patterns
75    performance_patterns: Vec<(&'static str, Regex)>,
76
77    /// Security issue patterns
78    security_patterns: Vec<(&'static str, Regex)>,
79}
80
81impl SignalExtractor {
82    /// Create a new signal extractor with default patterns
83    pub fn new(min_confidence: f32) -> Self {
84        Self {
85            min_confidence,
86            compiler_patterns: vec![
87                (
88                    "borrow checker",
89                    Regex::new(r"(?i)borrow.*(error|checker)").unwrap(),
90                ),
91                ("type mismatch", Regex::new(r"(?i)type.*mismatch").unwrap()),
92                (
93                    "missing import",
94                    Regex::new(r"(?i)(cannot find|missing).*(import|struct|function)").unwrap(),
95                ),
96                (
97                    "unresolved import",
98                    Regex::new(r"(?i)unresolved.*import").unwrap(),
99                ),
100                (
101                    "unused",
102                    Regex::new(r"(?i)unused.*(import|variable|function)").unwrap(),
103                ),
104            ],
105            runtime_patterns: vec![
106                ("timeout", Regex::new(r"(?i)timeout").unwrap()),
107                (
108                    "connection refused",
109                    Regex::new(r"(?i)(connection|connect).*(refused|failed)").unwrap(),
110                ),
111                (
112                    "out of memory",
113                    Regex::new(r"(?i)(out of memory|oom)").unwrap(),
114                ),
115                ("panic", Regex::new(r"(?i)panic").unwrap()),
116                (
117                    "null pointer",
118                    Regex::new(r"(?i)(null|nil).*pointer").unwrap(),
119                ),
120            ],
121            test_patterns: vec![
122                ("test failed", Regex::new(r"(?i)test.*failed").unwrap()),
123                (
124                    "assertion failed",
125                    Regex::new(r"(?i)assertion.*failed").unwrap(),
126                ),
127                (
128                    "expected.*actual",
129                    Regex::new(r"(?i)expected.*actual").unwrap(),
130                ),
131            ],
132            performance_patterns: vec![
133                (
134                    "slow",
135                    Regex::new(r"(?i)(slow|latency).*(than|exceed)").unwrap(),
136                ),
137                ("memory leak", Regex::new(r"(?i)memory.*leak").unwrap()),
138                (
139                    "high cpu",
140                    Regex::new(r"(?i)(high|cpu).*(usage|load)").unwrap(),
141                ),
142            ],
143            security_patterns: vec![
144                ("vulnerability", Regex::new(r"(?i)vulnerability").unwrap()),
145                ("injection", Regex::new(r"(?i)(sql|xss|injection)").unwrap()),
146                (
147                    "auth failed",
148                    Regex::new(r"(?i)(auth|permission).*(failed|denied)").unwrap(),
149                ),
150            ],
151        }
152    }
153
154    /// Extract signals from an intake event
155    pub fn extract(&self, event: &crate::source::IntakeEvent) -> Vec<ExtractedSignal> {
156        let mut signals = Vec::new();
157
158        // Process the title and description
159        let text = format!("{}\n{}", event.title, event.description);
160
161        // Check compiler patterns
162        for (name, pattern) in &self.compiler_patterns {
163            if pattern.is_match(&text) {
164                signals.push(ExtractedSignal {
165                    signal_id: uuid::Uuid::new_v4().to_string(),
166                    content: format!("compiler_error:{}", name),
167                    signal_type: SignalType::CompilerError,
168                    confidence: 0.8,
169                    source: event.source_type.to_string(),
170                });
171            }
172        }
173
174        // Check runtime patterns
175        for (name, pattern) in &self.runtime_patterns {
176            if pattern.is_match(&text) {
177                signals.push(ExtractedSignal {
178                    signal_id: uuid::Uuid::new_v4().to_string(),
179                    content: format!("runtime_error:{}", name),
180                    signal_type: SignalType::RuntimeError,
181                    confidence: 0.75,
182                    source: event.source_type.to_string(),
183                });
184            }
185        }
186
187        // Check test patterns
188        for (name, pattern) in &self.test_patterns {
189            if pattern.is_match(&text) {
190                signals.push(ExtractedSignal {
191                    signal_id: uuid::Uuid::new_v4().to_string(),
192                    content: format!("test_failure:{}", name),
193                    signal_type: SignalType::TestFailure,
194                    confidence: 0.85,
195                    source: event.source_type.to_string(),
196                });
197            }
198        }
199
200        // Check performance patterns
201        for (name, pattern) in &self.performance_patterns {
202            if pattern.is_match(&text) {
203                signals.push(ExtractedSignal {
204                    signal_id: uuid::Uuid::new_v4().to_string(),
205                    content: format!("performance:{}", name),
206                    signal_type: SignalType::Performance,
207                    confidence: 0.7,
208                    source: event.source_type.to_string(),
209                });
210            }
211        }
212
213        // Check security patterns
214        for (name, pattern) in &self.security_patterns {
215            if pattern.is_match(&text) {
216                signals.push(ExtractedSignal {
217                    signal_id: uuid::Uuid::new_v4().to_string(),
218                    content: format!("security:{}", name),
219                    signal_type: SignalType::Security,
220                    confidence: 0.9,
221                    source: event.source_type.to_string(),
222                });
223            }
224        }
225
226        // If no specific pattern matched, add a generic signal based on severity
227        if signals.is_empty() {
228            let confidence = match event.severity {
229                crate::source::IssueSeverity::Critical => 0.9,
230                crate::source::IssueSeverity::High => 0.75,
231                crate::source::IssueSeverity::Medium => 0.5,
232                crate::source::IssueSeverity::Low => 0.35,
233                crate::source::IssueSeverity::Info => 0.2,
234            };
235
236            signals.push(ExtractedSignal {
237                signal_id: uuid::Uuid::new_v4().to_string(),
238                content: format!("issue:{}", event.title),
239                signal_type: SignalType::GenericError,
240                confidence,
241                source: event.source_type.to_string(),
242            });
243        }
244
245        // Filter by minimum confidence
246        signals.retain(|s| s.confidence >= self.min_confidence);
247
248        signals
249    }
250}
251
252impl Default for SignalExtractor {
253    fn default() -> Self {
254        Self::new(0.5)
255    }
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261    use crate::source::{IntakeEvent, IntakeSourceType, IssueSeverity};
262
263    #[test]
264    fn test_extract_compiler_error() {
265        let extractor = SignalExtractor::default();
266
267        let event = IntakeEvent {
268            event_id: "test-1".to_string(),
269            source_type: IntakeSourceType::Github,
270            source_event_id: None,
271            title: "Build failed".to_string(),
272            description: "error: borrow checker error in src/main.rs".to_string(),
273            severity: IssueSeverity::High,
274            signals: vec![],
275            raw_payload: None,
276            timestamp_ms: 0,
277        };
278
279        let signals = extractor.extract(&event);
280        assert!(!signals.is_empty());
281        assert!(signals
282            .iter()
283            .any(|s| s.signal_type == SignalType::CompilerError));
284    }
285
286    #[test]
287    fn test_extract_runtime_error() {
288        let extractor = SignalExtractor::default();
289
290        let event = IntakeEvent {
291            event_id: "test-2".to_string(),
292            source_type: IntakeSourceType::Gitlab,
293            source_event_id: None,
294            title: "Deployment failed".to_string(),
295            description: "Error: connection timeout to database".to_string(),
296            severity: IssueSeverity::High,
297            signals: vec![],
298            raw_payload: None,
299            timestamp_ms: 0,
300        };
301
302        let signals = extractor.extract(&event);
303        assert!(signals
304            .iter()
305            .any(|s| s.signal_type == SignalType::RuntimeError));
306    }
307
308    #[test]
309    fn test_min_confidence_filter() {
310        let extractor = SignalExtractor::new(0.8); // High threshold
311
312        let event = IntakeEvent {
313            event_id: "test-3".to_string(),
314            source_type: IntakeSourceType::Http,
315            source_event_id: None,
316            title: "Minor issue".to_string(),
317            description: "Some minor issue occurred".to_string(),
318            severity: IssueSeverity::Low,
319            signals: vec![],
320            raw_payload: None,
321            timestamp_ms: 0,
322        };
323
324        let signals = extractor.extract(&event);
325        // With high threshold and low severity, should filter out
326        for s in &signals {
327            assert!(s.confidence >= 0.8);
328        }
329    }
330}