debtmap/analysis/
context_detection.rs

1//! Context detection for specialized code patterns
2//!
3//! This module detects the context of functions (formatter, parser, CLI handler, etc.)
4//! to provide specialized, context-aware recommendations.
5
6use crate::core::FunctionMetrics;
7use regex::Regex;
8use serde::{Deserialize, Serialize};
9use std::path::Path;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
12pub enum FunctionContext {
13    Formatter,
14    Parser,
15    CliHandler,
16    StateMachine,
17    Configuration,
18    TestHelper,
19    DatabaseQuery,
20    Validator,
21    Generic,
22}
23
24impl FunctionContext {
25    pub fn display_name(&self) -> &'static str {
26        match self {
27            FunctionContext::Formatter => "Formatter",
28            FunctionContext::Parser => "Parser",
29            FunctionContext::CliHandler => "CLI Handler",
30            FunctionContext::StateMachine => "State Machine",
31            FunctionContext::Configuration => "Configuration",
32            FunctionContext::TestHelper => "Test Helper",
33            FunctionContext::DatabaseQuery => "Database Query",
34            FunctionContext::Validator => "Validator",
35            FunctionContext::Generic => "Generic",
36        }
37    }
38}
39
40#[derive(Debug, Clone)]
41pub struct ContextAnalysis {
42    pub context: FunctionContext,
43    pub confidence: f64,
44    pub detected_signals: Vec<String>,
45}
46
47pub struct ContextDetector {
48    // Cache compiled regexes for performance
49    format_patterns: Vec<Regex>,
50    parse_patterns: Vec<Regex>,
51    cli_patterns: Vec<Regex>,
52}
53
54impl Default for ContextDetector {
55    fn default() -> Self {
56        Self::new()
57    }
58}
59
60impl ContextDetector {
61    pub fn new() -> Self {
62        Self {
63            format_patterns: vec![
64                Regex::new(r"^format_").unwrap(),
65                Regex::new(r"^render_").unwrap(),
66                Regex::new(r"^display_").unwrap(),
67                Regex::new(r"^to_string").unwrap(),
68                Regex::new(r"^write_").unwrap(),
69                Regex::new(r"_formatter$").unwrap(),
70                Regex::new(r"_display$").unwrap(),
71            ],
72            parse_patterns: vec![
73                Regex::new(r"^parse_").unwrap(),
74                Regex::new(r"^read_").unwrap(),
75                Regex::new(r"^decode_").unwrap(),
76                Regex::new(r"^from_str").unwrap(),
77                Regex::new(r"_parser$").unwrap(),
78            ],
79            cli_patterns: vec![
80                Regex::new(r"^handle_").unwrap(),
81                Regex::new(r"^cmd_").unwrap(),
82                Regex::new(r"^command_").unwrap(),
83                Regex::new(r"^execute_").unwrap(),
84                Regex::new(r"^run_").unwrap(),
85            ],
86        }
87    }
88
89    /// Detect the context of a function
90    pub fn detect_context(&self, function: &FunctionMetrics, file_path: &Path) -> ContextAnalysis {
91        let signals = self.gather_signals(function, file_path);
92        let context = self.classify_context(&signals);
93        let confidence = self.calculate_confidence(&signals, &context);
94
95        ContextAnalysis {
96            context,
97            confidence,
98            detected_signals: signals.descriptions(),
99        }
100    }
101
102    fn gather_signals(&self, function: &FunctionMetrics, file_path: &Path) -> ContextSignals {
103        let file_path_str = file_path.to_string_lossy().to_lowercase();
104
105        ContextSignals {
106            function_name: function.name.to_lowercase(),
107            in_formatter_file: file_path_str.contains("format")
108                || file_path_str.contains("output")
109                || file_path_str.contains("display"),
110            in_parser_file: file_path_str.contains("parse") || file_path_str.contains("input"),
111            in_cli_file: file_path_str.contains("cli")
112                || file_path_str.contains("command")
113                || file_path_str.contains("cmd"),
114            in_config_file: file_path_str.contains("config"),
115            in_db_file: file_path_str.contains("db")
116                || file_path_str.contains("database")
117                || file_path_str.contains("query"),
118            has_validate_name: function.name.to_lowercase().contains("valid"),
119            has_state_keywords: function.name.to_lowercase().contains("state")
120                || function.name.to_lowercase().contains("transition"),
121            is_test_helper: function.is_test || function.in_test_module,
122        }
123    }
124
125    fn classify_context(&self, signals: &ContextSignals) -> FunctionContext {
126        // Test helpers have high precedence
127        if signals.is_test_helper {
128            return FunctionContext::TestHelper;
129        }
130
131        // Name-based detection (high confidence)
132        if self.matches_name_pattern(&signals.function_name, &self.format_patterns) {
133            return FunctionContext::Formatter;
134        }
135
136        if self.matches_name_pattern(&signals.function_name, &self.parse_patterns) {
137            return FunctionContext::Parser;
138        }
139
140        if self.matches_name_pattern(&signals.function_name, &self.cli_patterns) {
141            return FunctionContext::CliHandler;
142        }
143
144        if signals.has_validate_name {
145            return FunctionContext::Validator;
146        }
147
148        // File location-based detection (medium confidence)
149        if signals.in_formatter_file {
150            return FunctionContext::Formatter;
151        }
152
153        if signals.in_parser_file {
154            return FunctionContext::Parser;
155        }
156
157        if signals.in_cli_file {
158            return FunctionContext::CliHandler;
159        }
160
161        if signals.in_config_file {
162            return FunctionContext::Configuration;
163        }
164
165        if signals.in_db_file {
166            return FunctionContext::DatabaseQuery;
167        }
168
169        // State machine detection
170        if signals.has_state_keywords {
171            return FunctionContext::StateMachine;
172        }
173
174        FunctionContext::Generic
175    }
176
177    fn matches_name_pattern(&self, name: &str, patterns: &[Regex]) -> bool {
178        patterns.iter().any(|pattern| pattern.is_match(name))
179    }
180
181    fn calculate_confidence(&self, signals: &ContextSignals, context: &FunctionContext) -> f64 {
182        let signal_count = signals.matching_signal_count(context);
183
184        match signal_count {
185            0 => 0.1,  // Default/generic
186            1 => 0.6,  // Single signal
187            2 => 0.8,  // Two signals
188            _ => 0.95, // Three or more signals
189        }
190    }
191}
192
193#[derive(Debug, Clone)]
194struct ContextSignals {
195    function_name: String,
196    in_formatter_file: bool,
197    in_parser_file: bool,
198    in_cli_file: bool,
199    in_config_file: bool,
200    in_db_file: bool,
201    has_validate_name: bool,
202    has_state_keywords: bool,
203    is_test_helper: bool,
204}
205
206impl ContextSignals {
207    fn descriptions(&self) -> Vec<String> {
208        let mut signals = Vec::new();
209
210        if self.in_formatter_file {
211            signals.push("Located in formatter/output file".to_string());
212        }
213        if self.in_parser_file {
214            signals.push("Located in parser/input file".to_string());
215        }
216        if self.in_cli_file {
217            signals.push("Located in CLI/command file".to_string());
218        }
219        if self.in_config_file {
220            signals.push("Located in configuration file".to_string());
221        }
222        if self.in_db_file {
223            signals.push("Located in database file".to_string());
224        }
225        if self.has_validate_name {
226            signals.push("Name contains 'valid'".to_string());
227        }
228        if self.has_state_keywords {
229            signals.push("Name contains state-related keywords".to_string());
230        }
231        if self.is_test_helper {
232            signals.push("Is test or in test module".to_string());
233        }
234
235        signals
236    }
237
238    fn matching_signal_count(&self, context: &FunctionContext) -> usize {
239        match context {
240            FunctionContext::Formatter => {
241                let mut count = 0;
242                if self.function_name.contains("format")
243                    || self.function_name.contains("render")
244                    || self.function_name.contains("display")
245                {
246                    count += 1;
247                }
248                if self.in_formatter_file {
249                    count += 1;
250                }
251                count
252            }
253            FunctionContext::Parser => {
254                let mut count = 0;
255                if self.function_name.contains("parse")
256                    || self.function_name.contains("read")
257                    || self.function_name.contains("decode")
258                {
259                    count += 1;
260                }
261                if self.in_parser_file {
262                    count += 1;
263                }
264                count
265            }
266            FunctionContext::CliHandler => {
267                let mut count = 0;
268                if self.function_name.contains("handle")
269                    || self.function_name.contains("cmd")
270                    || self.function_name.contains("command")
271                {
272                    count += 1;
273                }
274                if self.in_cli_file {
275                    count += 1;
276                }
277                count
278            }
279            FunctionContext::TestHelper => {
280                if self.is_test_helper {
281                    2
282                } else {
283                    0
284                }
285            }
286            FunctionContext::Configuration => {
287                if self.in_config_file {
288                    1
289                } else {
290                    0
291                }
292            }
293            FunctionContext::DatabaseQuery => {
294                if self.in_db_file {
295                    1
296                } else {
297                    0
298                }
299            }
300            FunctionContext::Validator => {
301                if self.has_validate_name {
302                    1
303                } else {
304                    0
305                }
306            }
307            FunctionContext::StateMachine => {
308                if self.has_state_keywords {
309                    1
310                } else {
311                    0
312                }
313            }
314            FunctionContext::Generic => 0,
315        }
316    }
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322    use std::path::PathBuf;
323
324    fn create_test_function(name: &str, file: &str) -> FunctionMetrics {
325        FunctionMetrics {
326            name: name.to_string(),
327            file: PathBuf::from(file),
328            line: 10,
329            cyclomatic: 10,
330            cognitive: 15,
331            nesting: 2,
332            length: 50,
333            is_test: false,
334            visibility: None,
335            is_trait_method: false,
336            in_test_module: false,
337            entropy_score: None,
338            is_pure: None,
339            purity_confidence: None,
340            purity_reason: None,
341            call_dependencies: None,
342            detected_patterns: None,
343            upstream_callers: None,
344            downstream_callees: None,
345            mapping_pattern_result: None,
346            adjusted_complexity: None,
347            composition_metrics: None,
348            language_specific: None,
349            purity_level: None,
350            error_swallowing_count: None,
351            error_swallowing_patterns: None,
352        }
353    }
354
355    #[test]
356    fn detects_formatter_by_name() {
357        let detector = ContextDetector::new();
358        let function = create_test_function("format_output", "src/output.rs");
359        let context = detector.detect_context(&function, Path::new("src/output.rs"));
360
361        assert_eq!(context.context, FunctionContext::Formatter);
362        assert!(context.confidence > 0.6);
363    }
364
365    #[test]
366    fn detects_parser_by_name() {
367        let detector = ContextDetector::new();
368        let function = create_test_function("parse_input", "src/parser.rs");
369        let context = detector.detect_context(&function, Path::new("src/parser.rs"));
370
371        assert_eq!(context.context, FunctionContext::Parser);
372        assert!(context.confidence > 0.6);
373    }
374
375    #[test]
376    fn detects_cli_handler_by_name() {
377        let detector = ContextDetector::new();
378        let function = create_test_function("handle_command", "src/cli.rs");
379        let context = detector.detect_context(&function, Path::new("src/cli.rs"));
380
381        assert_eq!(context.context, FunctionContext::CliHandler);
382        assert!(context.confidence > 0.6);
383    }
384
385    #[test]
386    fn detects_formatter_by_file_location() {
387        let detector = ContextDetector::new();
388        let function = create_test_function("process_data", "src/io/formatter.rs");
389        let context = detector.detect_context(&function, Path::new("src/io/formatter.rs"));
390
391        assert_eq!(context.context, FunctionContext::Formatter);
392        assert!(context.confidence > 0.5);
393    }
394
395    #[test]
396    fn detects_parser_by_file_location() {
397        let detector = ContextDetector::new();
398        let function = create_test_function("process_data", "src/parser/input.rs");
399        let context = detector.detect_context(&function, Path::new("src/parser/input.rs"));
400
401        assert_eq!(context.context, FunctionContext::Parser);
402    }
403
404    #[test]
405    fn detects_validator() {
406        let detector = ContextDetector::new();
407        let function = create_test_function("validate_config", "src/config.rs");
408        let context = detector.detect_context(&function, Path::new("src/config.rs"));
409
410        assert_eq!(context.context, FunctionContext::Validator);
411    }
412
413    #[test]
414    fn detects_state_machine() {
415        let detector = ContextDetector::new();
416        let function = create_test_function("transition_state", "src/state.rs");
417        let context = detector.detect_context(&function, Path::new("src/state.rs"));
418
419        assert_eq!(context.context, FunctionContext::StateMachine);
420    }
421
422    #[test]
423    fn detects_test_helper() {
424        let detector = ContextDetector::new();
425        let mut function = create_test_function("setup_test", "tests/helper.rs");
426        function.in_test_module = true;
427        let context = detector.detect_context(&function, Path::new("tests/helper.rs"));
428
429        assert_eq!(context.context, FunctionContext::TestHelper);
430        assert!(context.confidence > 0.7);
431    }
432
433    #[test]
434    fn defaults_to_generic() {
435        let detector = ContextDetector::new();
436        let function = create_test_function("process_data", "src/core/logic.rs");
437        let context = detector.detect_context(&function, Path::new("src/core/logic.rs"));
438
439        assert_eq!(context.context, FunctionContext::Generic);
440        assert!(context.confidence < 0.2);
441    }
442
443    #[test]
444    fn high_confidence_with_multiple_signals() {
445        let detector = ContextDetector::new();
446        let function = create_test_function("format_pattern_type", "src/io/pattern_output.rs");
447        let context = detector.detect_context(&function, Path::new("src/io/pattern_output.rs"));
448
449        assert_eq!(context.context, FunctionContext::Formatter);
450        assert!(context.confidence >= 0.8);
451        assert!(!context.detected_signals.is_empty());
452    }
453}