debtmap/analysis/
purity_analysis.rs

1//! Purity Analysis Module
2//!
3//! This module classifies functions on a purity spectrum (strictly pure, locally pure,
4//! read-only, impure) using static analysis. It enables responsibility detection to
5//! prefer pure computation classification and identify purity violations that indicate
6//! mixed concerns.
7//!
8//! # Purity Levels
9//!
10//! - **Strictly Pure**: No I/O, no side effects, deterministic
11//! - **Locally Pure**: Only mutates local variables, deterministic results
12//! - **Read-Only**: Reads external state but doesn't modify it
13//! - **Impure**: Performs I/O or modifies external state
14//!
15//! # Example
16//!
17//! ```ignore
18//! use debtmap::analysis::purity_analysis::{PurityAnalyzer, PurityLevel};
19//!
20//! let analyzer = PurityAnalyzer::new();
21//! let analysis = analyzer.analyze_code(code, Language::Rust);
22//!
23//! if analysis.purity == PurityLevel::StrictlyPure {
24//!     println!("Function is strictly pure - ideal for testing!");
25//! }
26//! ```
27
28use crate::analysis::io_detection::{IoDetector, IoProfile, Language, SideEffect};
29use serde::{Deserialize, Serialize};
30use std::collections::HashMap;
31
32/// Purity level classification
33#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
34pub enum PurityLevel {
35    /// No I/O, no side effects, deterministic
36    StrictlyPure,
37    /// Only local mutations, deterministic output
38    LocallyPure,
39    /// Reads external state, no mutations
40    ReadOnly,
41    /// Performs I/O or modifies external state
42    Impure,
43}
44
45impl PurityLevel {
46    /// Convert to a human-readable string
47    pub fn as_str(&self) -> &'static str {
48        match self {
49            PurityLevel::StrictlyPure => "Strictly Pure",
50            PurityLevel::LocallyPure => "Locally Pure",
51            PurityLevel::ReadOnly => "Read-Only",
52            PurityLevel::Impure => "Impure",
53        }
54    }
55}
56
57/// Purity violation types
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub enum PurityViolation {
60    /// I/O operation performed
61    IoOperation {
62        description: String,
63        line: Option<usize>,
64    },
65    /// External state mutation
66    StateMutation { target: String, line: Option<usize> },
67    /// Non-deterministic operation
68    NonDeterministic {
69        operation: String,
70        line: Option<usize>,
71    },
72    /// Calls impure function
73    ImpureCall { callee: String, line: Option<usize> },
74}
75
76impl PurityViolation {
77    /// Get a description of this violation
78    pub fn description(&self) -> String {
79        match self {
80            PurityViolation::IoOperation { description, .. } => {
81                format!("I/O operation: {}", description)
82            }
83            PurityViolation::StateMutation { target, .. } => {
84                format!("State mutation: {}", target)
85            }
86            PurityViolation::NonDeterministic { operation, .. } => {
87                format!("Non-deterministic operation: {}", operation)
88            }
89            PurityViolation::ImpureCall { callee, .. } => {
90                format!("Calls impure function: {}", callee)
91            }
92        }
93    }
94
95    /// Get the line number if available
96    pub fn line(&self) -> Option<usize> {
97        match self {
98            PurityViolation::IoOperation { line, .. }
99            | PurityViolation::StateMutation { line, .. }
100            | PurityViolation::NonDeterministic { line, .. }
101            | PurityViolation::ImpureCall { line, .. } => *line,
102        }
103    }
104}
105
106/// Refactoring opportunity type
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub enum RefactoringType {
109    /// Extract pure portion from impure function
110    ExtractPureCore,
111    /// Move I/O to function boundary
112    SeparateIoFromLogic,
113    /// Replace non-deterministic operation with parameter
114    ParameterizeNonDeterminism,
115    /// Extract single impure operation
116    IsolateSingleViolation,
117}
118
119impl RefactoringType {
120    /// Get a description of this refactoring type
121    pub fn as_str(&self) -> &'static str {
122        match self {
123            RefactoringType::ExtractPureCore => "Extract Pure Core",
124            RefactoringType::SeparateIoFromLogic => "Separate I/O from Logic",
125            RefactoringType::ParameterizeNonDeterminism => "Parameterize Non-Determinism",
126            RefactoringType::IsolateSingleViolation => "Isolate Single Violation",
127        }
128    }
129}
130
131/// Effort level for refactoring
132#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
133pub enum EffortLevel {
134    Low,
135    Medium,
136    High,
137}
138
139impl EffortLevel {
140    /// Convert to a human-readable string
141    pub fn as_str(&self) -> &'static str {
142        match self {
143            EffortLevel::Low => "Low",
144            EffortLevel::Medium => "Medium",
145            EffortLevel::High => "High",
146        }
147    }
148}
149
150/// Purity refactoring opportunity
151#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct PurityRefactoringOpportunity {
153    pub opportunity_type: RefactoringType,
154    pub description: String,
155    pub estimated_effort: EffortLevel,
156}
157
158/// Complete purity analysis result
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct PurityAnalysis {
161    pub purity: PurityLevel,
162    pub violations: Vec<PurityViolation>,
163    pub is_deterministic: bool,
164    pub can_be_pure: bool,
165    pub refactoring_opportunity: Option<PurityRefactoringOpportunity>,
166}
167
168impl PurityAnalysis {
169    /// Create a new purity analysis for a strictly pure function
170    pub fn strictly_pure() -> Self {
171        Self {
172            purity: PurityLevel::StrictlyPure,
173            violations: Vec::new(),
174            is_deterministic: true,
175            can_be_pure: false,
176            refactoring_opportunity: None,
177        }
178    }
179
180    /// Create a new purity analysis for an impure function
181    pub fn impure(violations: Vec<PurityViolation>) -> Self {
182        let is_deterministic = !violations
183            .iter()
184            .any(|v| matches!(v, PurityViolation::NonDeterministic { .. }));
185
186        Self {
187            purity: PurityLevel::Impure,
188            violations,
189            is_deterministic,
190            can_be_pure: false,
191            refactoring_opportunity: None,
192        }
193    }
194}
195
196/// Purity analyzer
197pub struct PurityAnalyzer {
198    io_detector: IoDetector,
199    non_determinism_patterns: HashMap<Language, Vec<String>>,
200}
201
202impl PurityAnalyzer {
203    /// Create a new purity analyzer
204    pub fn new() -> Self {
205        Self {
206            io_detector: IoDetector::new(),
207            non_determinism_patterns: Self::build_non_determinism_patterns(),
208        }
209    }
210
211    /// Analyze code for purity
212    pub fn analyze_code(&self, code: &str, language: Language) -> PurityAnalysis {
213        // Get I/O profile from Spec 141
214        let io_profile = self.io_detector.detect_io(code, language);
215
216        // Collect violations
217        let mut violations = Vec::new();
218
219        // Check for I/O operations
220        violations.extend(self.analyze_io_operations(&io_profile, code, language));
221
222        // Check for side effects
223        violations.extend(self.analyze_side_effects(&io_profile, code));
224
225        // Check for non-deterministic operations
226        violations.extend(self.detect_non_determinism(code, language));
227
228        // Classify purity level
229        let purity = self.classify_purity(&violations, &io_profile, code, language);
230
231        // Check determinism
232        let is_deterministic = !violations
233            .iter()
234            .any(|v| matches!(v, PurityViolation::NonDeterministic { .. }));
235
236        // Check if function can be made pure with refactoring
237        let can_be_pure = self.can_be_made_pure(&violations);
238
239        // Generate refactoring opportunity if applicable
240        let refactoring_opportunity = self.suggest_refactoring(&violations);
241
242        PurityAnalysis {
243            purity,
244            violations,
245            is_deterministic,
246            can_be_pure,
247            refactoring_opportunity,
248        }
249    }
250
251    /// Analyze I/O operations from the I/O profile
252    fn analyze_io_operations(
253        &self,
254        profile: &IoProfile,
255        _code: &str,
256        _language: Language,
257    ) -> Vec<PurityViolation> {
258        let mut violations = Vec::new();
259
260        // File operations
261        for _ in &profile.file_operations {
262            violations.push(PurityViolation::IoOperation {
263                description: "File I/O operation".to_string(),
264                line: None,
265            });
266        }
267
268        // Network operations
269        for _ in &profile.network_operations {
270            violations.push(PurityViolation::IoOperation {
271                description: "Network I/O operation".to_string(),
272                line: None,
273            });
274        }
275
276        // Console operations
277        for _ in &profile.console_operations {
278            violations.push(PurityViolation::IoOperation {
279                description: "Console I/O operation".to_string(),
280                line: None,
281            });
282        }
283
284        // Database operations
285        for _ in &profile.database_operations {
286            violations.push(PurityViolation::IoOperation {
287                description: "Database I/O operation".to_string(),
288                line: None,
289            });
290        }
291
292        // Environment operations
293        for _ in &profile.environment_operations {
294            violations.push(PurityViolation::IoOperation {
295                description: "Environment variable access".to_string(),
296                line: None,
297            });
298        }
299
300        violations
301    }
302
303    /// Analyze side effects from the I/O profile
304    fn analyze_side_effects(&self, profile: &IoProfile, code: &str) -> Vec<PurityViolation> {
305        let mut violations = Vec::new();
306
307        for side_effect in &profile.side_effects {
308            // Check if mutation is local or external
309            if !self.is_local_mutation(side_effect, code) {
310                match side_effect {
311                    SideEffect::FieldMutation { target, field } => {
312                        violations.push(PurityViolation::StateMutation {
313                            target: format!("{}.{}", target, field),
314                            line: None,
315                        });
316                    }
317                    SideEffect::GlobalMutation { name } => {
318                        violations.push(PurityViolation::StateMutation {
319                            target: name.clone(),
320                            line: None,
321                        });
322                    }
323                    SideEffect::CollectionMutation { .. } => {
324                        // Collection mutations are considered local unless proven otherwise
325                        // This is a simplification - a more sophisticated analysis would
326                        // track whether the collection is local or external
327                    }
328                    SideEffect::ExternalState { description } => {
329                        violations.push(PurityViolation::StateMutation {
330                            target: description.clone(),
331                            line: None,
332                        });
333                    }
334                }
335            }
336        }
337
338        violations
339    }
340
341    /// Check if a mutation is local to the function
342    fn is_local_mutation(&self, side_effect: &SideEffect, code: &str) -> bool {
343        match side_effect {
344            SideEffect::FieldMutation { target, .. } => {
345                // If target is "self", it's a field mutation (not local)
346                target == "unknown" || !code.contains("self.")
347            }
348            SideEffect::GlobalMutation { .. } => false, // Global mutations are never local
349            SideEffect::CollectionMutation { .. } => {
350                // Assume collection mutations are local for now
351                // A more sophisticated analysis would track variable scope
352                true
353            }
354            SideEffect::ExternalState { .. } => false,
355        }
356    }
357
358    /// Detect non-deterministic operations
359    fn detect_non_determinism(&self, code: &str, language: Language) -> Vec<PurityViolation> {
360        let mut violations = Vec::new();
361
362        if let Some(patterns) = self.non_determinism_patterns.get(&language) {
363            for pattern in patterns {
364                if code.contains(pattern) {
365                    violations.push(PurityViolation::NonDeterministic {
366                        operation: pattern.clone(),
367                        line: None,
368                    });
369                }
370            }
371        }
372
373        violations
374    }
375
376    /// Classify the purity level based on violations
377    fn classify_purity(
378        &self,
379        violations: &[PurityViolation],
380        profile: &IoProfile,
381        code: &str,
382        language: Language,
383    ) -> PurityLevel {
384        if violations.is_empty() {
385            return PurityLevel::StrictlyPure;
386        }
387
388        // Check if all violations are local mutations
389        let only_local_mutations = violations
390            .iter()
391            .all(|v| matches!(v, PurityViolation::StateMutation { .. }))
392            && !violations.is_empty();
393
394        if only_local_mutations {
395            return PurityLevel::LocallyPure;
396        }
397
398        // Check if function only reads state (no writes)
399        let only_reads = self.only_has_read_operations(profile, code, language);
400
401        if only_reads
402            && !violations
403                .iter()
404                .any(|v| matches!(v, PurityViolation::StateMutation { .. }))
405        {
406            return PurityLevel::ReadOnly;
407        }
408
409        PurityLevel::Impure
410    }
411
412    /// Check if the I/O profile only contains read operations
413    fn only_has_read_operations(
414        &self,
415        profile: &IoProfile,
416        code: &str,
417        language: Language,
418    ) -> bool {
419        // Check if we have file reads but no other I/O
420        let has_file_ops = !profile.file_operations.is_empty();
421        let has_network = !profile.network_operations.is_empty();
422        let has_console = !profile.console_operations.is_empty();
423        let has_db = !profile.database_operations.is_empty();
424        let has_mutations = !profile.side_effects.is_empty();
425
426        // If we have network, console, db, or mutations, it's not read-only
427        if has_network || has_console || has_db || has_mutations {
428            return false;
429        }
430
431        // Check if we have file write patterns
432        if has_file_ops && self.has_write_operations(code, language) {
433            return false;
434        }
435
436        // If we only have file operations and no write patterns, consider it read-only
437        has_file_ops
438    }
439
440    /// Check if code contains write operations
441    fn has_write_operations(&self, code: &str, language: Language) -> bool {
442        match language {
443            Language::Rust => {
444                code.contains("::write")
445                    || code.contains("File::create")
446                    || code.contains("OpenOptions")
447                    || code.contains("write_all")
448            }
449            Language::Python => {
450                code.contains("write_text")
451                    || code.contains("write_bytes")
452                    || code.contains("open(") && code.contains("'w'")
453                    || code.contains("open(") && code.contains("\"w\"")
454            }
455            Language::JavaScript | Language::TypeScript => {
456                code.contains("writeFile")
457                    || code.contains("createWriteStream")
458                    || code.contains("appendFile")
459            }
460        }
461    }
462
463    /// Check if function can be made pure with refactoring
464    fn can_be_made_pure(&self, violations: &[PurityViolation]) -> bool {
465        // Single violation: Easy to extract
466        if violations.len() == 1 {
467            return true;
468        }
469
470        // All violations are I/O: Can separate I/O from logic
471        let all_io = violations
472            .iter()
473            .all(|v| matches!(v, PurityViolation::IoOperation { .. }));
474
475        if all_io && violations.len() <= 3 {
476            return true;
477        }
478
479        false
480    }
481
482    /// Suggest refactoring opportunities
483    fn suggest_refactoring(
484        &self,
485        violations: &[PurityViolation],
486    ) -> Option<PurityRefactoringOpportunity> {
487        // Single violation: Easy to extract
488        if violations.len() == 1 {
489            let description = format!(
490                "Function has single purity violation: {}. Extract to make core logic pure.",
491                violations[0].description()
492            );
493            return Some(PurityRefactoringOpportunity {
494                opportunity_type: RefactoringType::IsolateSingleViolation,
495                description,
496                estimated_effort: EffortLevel::Low,
497            });
498        }
499
500        // All violations are I/O: Separate I/O from logic
501        let all_io = violations
502            .iter()
503            .all(|v| matches!(v, PurityViolation::IoOperation { .. }));
504
505        if all_io {
506            return Some(PurityRefactoringOpportunity {
507                opportunity_type: RefactoringType::SeparateIoFromLogic,
508                description: "Separate I/O operations from business logic. Make computation pure."
509                    .to_string(),
510                estimated_effort: EffortLevel::Medium,
511            });
512        }
513
514        // Non-deterministic: Parameterize
515        let has_non_determinism = violations
516            .iter()
517            .any(|v| matches!(v, PurityViolation::NonDeterministic { .. }));
518
519        if has_non_determinism {
520            return Some(PurityRefactoringOpportunity {
521                opportunity_type: RefactoringType::ParameterizeNonDeterminism,
522                description: "Replace non-deterministic operations (time, random) with parameters for testability.".to_string(),
523                estimated_effort: EffortLevel::Low,
524            });
525        }
526
527        None
528    }
529
530    /// Build non-determinism patterns for each language
531    fn build_non_determinism_patterns() -> HashMap<Language, Vec<String>> {
532        let mut patterns = HashMap::new();
533
534        // Rust patterns
535        patterns.insert(
536            Language::Rust,
537            vec![
538                "std::time::Instant::now".to_string(),
539                "std::time::SystemTime::now".to_string(),
540                "Instant::now".to_string(),
541                "SystemTime::now".to_string(),
542                "rand::".to_string(),
543                "thread_rng".to_string(),
544                "uuid::Uuid::new_v4".to_string(),
545                "Uuid::new_v4".to_string(),
546                "HashMap::new".to_string(), // Uses random seed
547                "HashSet::new".to_string(), // Uses random seed
548            ],
549        );
550
551        // Python patterns
552        patterns.insert(
553            Language::Python,
554            vec![
555                "random.".to_string(),
556                "datetime.now".to_string(),
557                "time.time".to_string(),
558                "uuid.uuid4".to_string(),
559                "time.monotonic".to_string(),
560            ],
561        );
562
563        // JavaScript patterns
564        patterns.insert(
565            Language::JavaScript,
566            vec![
567                "Math.random".to_string(),
568                "Date.now".to_string(),
569                "new Date()".to_string(),
570                "crypto.randomUUID".to_string(),
571                "performance.now".to_string(),
572            ],
573        );
574
575        // TypeScript has same patterns as JavaScript
576        patterns.insert(
577            Language::TypeScript,
578            patterns[&Language::JavaScript].clone(),
579        );
580
581        patterns
582    }
583}
584
585impl Default for PurityAnalyzer {
586    fn default() -> Self {
587        Self::new()
588    }
589}
590
591#[cfg(test)]
592mod tests {
593    use super::*;
594
595    #[test]
596    fn strictly_pure_function() {
597        let code = r#"
598        fn add(a: i32, b: i32) -> i32 {
599            a + b
600        }
601        "#;
602
603        let analyzer = PurityAnalyzer::new();
604        let analysis = analyzer.analyze_code(code, Language::Rust);
605
606        assert_eq!(analysis.purity, PurityLevel::StrictlyPure);
607        assert!(analysis.violations.is_empty());
608        assert!(analysis.is_deterministic);
609    }
610
611    #[test]
612    fn read_only_function() {
613        let code = r#"
614        fn read_config() -> String {
615            std::fs::read_to_string("config.toml").unwrap()
616        }
617        "#;
618
619        let analyzer = PurityAnalyzer::new();
620        let analysis = analyzer.analyze_code(code, Language::Rust);
621
622        assert_eq!(analysis.purity, PurityLevel::ReadOnly);
623        assert!(!analysis.violations.is_empty());
624        assert!(analysis
625            .violations
626            .iter()
627            .any(|v| { matches!(v, PurityViolation::IoOperation { .. }) }));
628    }
629
630    #[test]
631    fn impure_function() {
632        let code = r#"
633        fn save_data(data: &str) {
634            std::fs::write("output.txt", data).unwrap();
635        }
636        "#;
637
638        let analyzer = PurityAnalyzer::new();
639        let analysis = analyzer.analyze_code(code, Language::Rust);
640
641        assert_eq!(analysis.purity, PurityLevel::Impure);
642        assert!(!analysis.violations.is_empty());
643    }
644
645    #[test]
646    fn non_deterministic_detection() {
647        let code = r#"
648        fn generate_id() -> String {
649            uuid::Uuid::new_v4().to_string()
650        }
651        "#;
652
653        let analyzer = PurityAnalyzer::new();
654        let analysis = analyzer.analyze_code(code, Language::Rust);
655
656        assert!(!analysis.is_deterministic);
657        assert!(analysis
658            .violations
659            .iter()
660            .any(|v| { matches!(v, PurityViolation::NonDeterministic { .. }) }));
661    }
662
663    #[test]
664    fn almost_pure_refactoring_opportunity() {
665        let code = r#"
666        fn calculate_with_logging(a: i32, b: i32) -> i32 {
667            let result = a * b + a / b;
668            println!("Result: {}", result);
669            result
670        }
671        "#;
672
673        let analyzer = PurityAnalyzer::new();
674        let analysis = analyzer.analyze_code(code, Language::Rust);
675
676        assert!(analysis.can_be_pure);
677        assert!(analysis.refactoring_opportunity.is_some());
678
679        if let Some(opportunity) = &analysis.refactoring_opportunity {
680            assert!(matches!(
681                opportunity.opportunity_type,
682                RefactoringType::IsolateSingleViolation
683            ));
684        }
685    }
686
687    #[test]
688    fn python_non_deterministic() {
689        let code = r#"
690def generate_timestamp():
691    return datetime.now()
692        "#;
693
694        let analyzer = PurityAnalyzer::new();
695        let analysis = analyzer.analyze_code(code, Language::Python);
696
697        assert!(!analysis.is_deterministic);
698        assert!(analysis
699            .violations
700            .iter()
701            .any(|v| { matches!(v, PurityViolation::NonDeterministic { .. }) }));
702    }
703
704    #[test]
705    fn javascript_random() {
706        let code = r#"
707function randomNumber() {
708    return Math.random();
709}
710        "#;
711
712        let analyzer = PurityAnalyzer::new();
713        let analysis = analyzer.analyze_code(code, Language::JavaScript);
714
715        assert!(!analysis.is_deterministic);
716        assert!(analysis
717            .violations
718            .iter()
719            .any(|v| { matches!(v, PurityViolation::NonDeterministic { .. }) }));
720    }
721
722    #[test]
723    fn separate_io_refactoring() {
724        let code = r#"
725        fn process_file(path: &str) -> Result<i32, Error> {
726            let content = std::fs::read_to_string(path)?;
727            let data = parse_content(&content);
728            let result = calculate(&data);
729            std::fs::write("output.txt", &result.to_string())?;
730            Ok(result)
731        }
732        "#;
733
734        let analyzer = PurityAnalyzer::new();
735        let analysis = analyzer.analyze_code(code, Language::Rust);
736
737        assert_eq!(analysis.purity, PurityLevel::Impure);
738
739        if let Some(opportunity) = &analysis.refactoring_opportunity {
740            assert!(matches!(
741                opportunity.opportunity_type,
742                RefactoringType::SeparateIoFromLogic
743            ));
744        }
745    }
746}