debtmap 0.17.0

Code complexity and technical debt analyzer
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
//! Core data types and domain models for debtmap analysis.
//!
//! This module contains the fundamental types used throughout debtmap, including
//! analysis results, complexity reports, function metrics, and technical debt items.
//! Types here are designed to be serializable for persistence and interoperability.
//!
//! # Key Components
//!
//! - **Analysis results**: Top-level structures aggregating all analysis data
//! - **Complexity metrics**: Cyclomatic, cognitive, and weighted complexity measures
//! - **Debt items**: Individual technical debt findings with severity and location
//! - **AST utilities**: Abstract syntax tree helpers for code parsing
//! - **Monadic patterns**: Functional error handling and composition utilities

pub mod ast;
pub mod errors;
pub mod injection;
pub mod lazy;
pub mod metrics;
pub mod monadic;
pub mod parsing;
pub mod refined;
pub mod traits;
pub mod types;

use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AnalysisResults {
    pub project_path: PathBuf,
    pub timestamp: DateTime<Utc>,
    pub complexity: ComplexityReport,
    pub technical_debt: TechnicalDebtReport,
    pub dependencies: DependencyReport,
    pub duplications: Vec<DuplicationBlock>,
    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
    pub file_contexts: HashMap<PathBuf, crate::analysis::FileContext>,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ComplexityReport {
    pub metrics: Vec<FunctionMetrics>,
    pub summary: ComplexitySummary,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ComplexitySummary {
    pub total_functions: usize,
    pub average_complexity: f64,
    pub max_complexity: u32,
    pub high_complexity_count: usize,
}

/// Refined purity classification that distinguishes local from external mutations.
///
/// This enum provides more nuanced purity analysis than the simple boolean `is_pure` field.
/// It enables better scoring for functions that use local mutations for efficiency but are
/// functionally pure (referentially transparent).
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum PurityLevel {
    /// No mutations whatsoever - pure mathematical functions
    StrictlyPure,

    /// Uses local mutations for efficiency but no external side effects
    /// (builder patterns, accumulators, owned `mut self`)
    LocallyPure,

    /// Reads external state but doesn't modify it (constants, `&self` methods)
    ReadOnly,

    /// Modifies external state or performs I/O (`&mut self`, statics, I/O)
    Impure,
}

#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct FunctionMetrics {
    pub name: String,
    pub file: PathBuf,
    pub line: usize,
    pub cyclomatic: u32,
    pub cognitive: u32,
    pub nesting: u32,
    pub length: usize,
    pub is_test: bool,
    pub visibility: Option<String>, // "pub", "pub(crate)", or None for private
    pub is_trait_method: bool,      // Whether this is a trait method implementation
    pub in_test_module: bool,       // Whether this function is inside a #[cfg(test)] module
    pub entropy_score: Option<crate::complexity::entropy_core::EntropyScore>, // Optional entropy-based complexity score
    pub is_pure: Option<bool>, // Whether the function is pure (no side effects)
    pub purity_confidence: Option<f32>, // Confidence level of purity detection (0.0 to 1.0)
    pub purity_reason: Option<String>, // Reason for purity classification (from propagation)
    pub call_dependencies: Option<Vec<String>>, // Function IDs this function calls
    pub detected_patterns: Option<Vec<String>>, // Patterns detected for complexity adjustment
    pub upstream_callers: Option<Vec<String>>, // Functions that call this function
    pub downstream_callees: Option<Vec<String>>, // Functions that this function calls
    pub mapping_pattern_result:
        Option<crate::complexity::pure_mapping_patterns::MappingPatternResult>, // Pure mapping pattern detection result (spec 118)
    pub adjusted_complexity: Option<f64>, // Adjusted complexity score after mapping pattern detection (spec 118)
    pub composition_metrics: Option<crate::analysis::CompositionMetrics>, // AST-based functional composition metrics (spec 111)
    pub language_specific: Option<LanguageSpecificData>, // Language-specific pattern detection results (spec 146)

    // Refined purity classification (replaces is_pure eventually)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub purity_level: Option<PurityLevel>,

    // Error swallowing metrics - count of error handling issues within this function
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub error_swallowing_count: Option<u32>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub error_swallowing_patterns: Option<Vec<String>>,

    /// Unified entropy analysis (Spec 218)
    ///
    /// This is the SINGLE SOURCE OF TRUTH for entropy-based complexity analysis.
    /// Populated from `entropy_score` during analysis and flows through the pipeline.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub entropy_analysis: Option<crate::complexity::EntropyAnalysis>,
}

/// Language-specific data to avoid memory overhead for non-applicable files
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum LanguageSpecificData {
    Rust(crate::analysis::rust_patterns::RustPatternResult),
    // Future: Python(PythonPatternResult), JavaScript(JSPatternResult)
}

/// Entropy details for explainable output
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EntropyDetails {
    pub token_entropy: f64,
    pub pattern_repetition: f64,
    pub branch_similarity: f64,
    pub effective_complexity: f64,
    pub dampening_applied: bool,
    pub dampening_factor: f64,
    pub reasoning: Vec<String>,
}

impl FunctionMetrics {
    pub fn new(name: String, file: PathBuf, line: usize) -> Self {
        Self {
            name,
            file,
            line,
            cyclomatic: 1,
            cognitive: 0,
            nesting: 0,
            length: 0,
            is_test: false,
            visibility: None,
            is_trait_method: false,
            in_test_module: false,
            entropy_score: None,
            is_pure: None,
            purity_confidence: None,
            purity_reason: None,
            call_dependencies: None,
            detected_patterns: None,
            upstream_callers: None,
            downstream_callees: None,
            mapping_pattern_result: None,
            adjusted_complexity: None,
            composition_metrics: None,
            language_specific: None,
            purity_level: None,
            error_swallowing_count: None,
            error_swallowing_patterns: None,
            entropy_analysis: None,
        }
    }

    pub fn is_complex(&self, threshold: u32) -> bool {
        self.cyclomatic > threshold || self.cognitive > threshold
    }

    /// Get entropy details with explanation for verbose output
    ///
    /// **DEPRECATED (Spec 218)**: Use `entropy_analysis` field directly.
    /// This method remains for backward compatibility.
    #[deprecated(
        since = "0.10.0",
        note = "Use entropy_analysis field directly. See spec 218."
    )]
    pub fn get_entropy_details(&self) -> Option<EntropyDetails> {
        self.entropy_score.as_ref().map(|score| {
            let mut reasoning = Vec::new();

            // Add reasoning based on metrics
            if score.pattern_repetition > 0.6 {
                reasoning.push(format!(
                    "High pattern repetition detected ({}%)",
                    (score.pattern_repetition * 100.0) as i32
                ));
            }

            if score.token_entropy < 0.4 {
                reasoning.push(format!(
                    "Low token entropy indicates simple patterns ({:.2})",
                    score.token_entropy
                ));
            }

            if score.branch_similarity > 0.7 {
                reasoning.push(format!(
                    "Similar branch structures found ({}% similarity)",
                    (score.branch_similarity * 100.0) as i32
                ));
            }

            let dampening_factor = 1.0 - score.effective_complexity;
            if dampening_factor > 0.3 {
                reasoning.push(format!(
                    "Complexity reduced by {}% due to pattern-based code",
                    (dampening_factor * 100.0) as i32
                ));
            } else {
                reasoning
                    .push("Genuine complexity detected - minimal reduction applied".to_string());
            }

            EntropyDetails {
                token_entropy: score.token_entropy,
                pattern_repetition: score.pattern_repetition,
                branch_similarity: score.branch_similarity,
                effective_complexity: score.effective_complexity,
                dampening_applied: dampening_factor > 0.1,
                dampening_factor,
                reasoning,
            }
        })
    }

    /// Populate `entropy_analysis` from raw `entropy_score` (Spec 218).
    ///
    /// Call this after entropy calculation to convert raw EntropyScore into
    /// the unified EntropyAnalysis format. This ensures data flows through
    /// the analysis pipeline consistently.
    pub fn populate_entropy_analysis(&mut self) {
        if let Some(ref raw) = self.entropy_score {
            let config = crate::complexity::entropy_core::EntropyConfig::default();
            self.entropy_analysis = Some(crate::complexity::EntropyAnalysis::from_raw(
                raw,
                self.cognitive,
                &config,
            ));
        }
    }
}

mod debt_type_map_serde {
    use super::*;
    use serde::{Deserialize, Deserializer, Serialize, Serializer};

    pub fn serialize<S>(
        map: &HashMap<DebtType, Vec<DebtItem>>,
        serializer: S,
    ) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        #[derive(Serialize)]
        struct Entry<'a> {
            debt_type: &'a DebtType,
            items: &'a Vec<DebtItem>,
        }

        let entries: Vec<Entry> = map
            .iter()
            .map(|(debt_type, items)| Entry { debt_type, items })
            .collect();

        entries.serialize(serializer)
    }

    pub fn deserialize<'de, D>(
        deserializer: D,
    ) -> Result<HashMap<DebtType, Vec<DebtItem>>, D::Error>
    where
        D: Deserializer<'de>,
    {
        #[derive(Deserialize)]
        struct Entry {
            debt_type: DebtType,
            items: Vec<DebtItem>,
        }

        let entries = Vec::<Entry>::deserialize(deserializer)?;
        Ok(entries
            .into_iter()
            .map(|e| (e.debt_type, e.items))
            .collect())
    }
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct TechnicalDebtReport {
    pub items: Vec<DebtItem>,
    #[serde(with = "debt_type_map_serde")]
    pub by_type: HashMap<DebtType, Vec<DebtItem>>,
    pub priorities: Vec<Priority>,
    pub duplications: Vec<DuplicationBlock>,
}

#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct DebtItem {
    pub id: String,
    pub debt_type: DebtType,
    pub priority: Priority,
    pub file: PathBuf,
    pub line: usize,
    pub column: Option<usize>,
    pub message: String,
    pub context: Option<String>,
}

// Re-export DebtType from priority module (spec 203)
// This consolidates the duplicate DebtType definitions into a single source of truth
pub use crate::priority::DebtType;

#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, Copy, Ord, PartialOrd)]
pub enum Priority {
    Low,
    Medium,
    High,
    Critical,
}

impl std::fmt::Display for Priority {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        static DISPLAY_STRINGS: &[(Priority, &str)] = &[
            (Priority::Low, "Low"),
            (Priority::Medium, "Medium"),
            (Priority::High, "High"),
            (Priority::Critical, "Critical"),
        ];

        let display_str = DISPLAY_STRINGS
            .iter()
            .find(|(p, _)| p == self)
            .map(|(_, s)| *s)
            .unwrap_or("Unknown");

        write!(f, "{display_str}")
    }
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DependencyReport {
    pub modules: Vec<ModuleDependency>,
    pub circular: Vec<CircularDependency>,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ModuleDependency {
    pub module: String,
    pub dependencies: Vec<String>,
    pub dependents: Vec<String>,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct CircularDependency {
    pub cycle: Vec<String>,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DuplicationBlock {
    pub hash: u64,
    pub lines: usize,
    pub locations: Vec<DuplicationLocation>,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DuplicationLocation {
    pub file: PathBuf,
    pub start_line: usize,
    pub end_line: usize,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct FileMetrics {
    pub path: PathBuf,
    pub language: Language,
    pub complexity: ComplexityMetrics,
    pub debt_items: Vec<DebtItem>,
    pub dependencies: Vec<Dependency>,
    pub duplications: Vec<DuplicationBlock>,
    /// Total number of lines in the file, captured during initial parsing.
    /// Used to avoid redundant file I/O in later analysis phases.
    #[serde(default)]
    pub total_lines: usize,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub module_scope: Option<ast::ModuleScopeAnalysis>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub classes: Option<Vec<ast::ClassDef>>,
}

#[derive(Clone, Debug, Serialize, Deserialize, Default)]
pub struct ComplexityMetrics {
    pub functions: Vec<FunctionMetrics>,
    pub cyclomatic_complexity: u32,
    pub cognitive_complexity: u32,
}

impl ComplexityMetrics {
    pub fn from_function(func: &FunctionMetrics) -> Self {
        Self {
            functions: vec![func.clone()],
            cyclomatic_complexity: func.cyclomatic,
            cognitive_complexity: func.cognitive,
        }
    }
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Dependency {
    pub name: String,
    pub kind: DependencyKind,
}

#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub enum DependencyKind {
    Import,
    Module,
    Package,
}

#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Copy)]
pub enum Language {
    Rust,
    Python,
    JavaScript,
    TypeScript,
    Unknown,
}

impl Language {
    pub fn from_extension(ext: &str) -> Self {
        static EXTENSION_MAP: &[(&[&str], Language)] = &[
            (&["rs"], Language::Rust),
            (&["py", "pyw"], Language::Python),
            (&["js", "mjs", "cjs", "jsx"], Language::JavaScript),
            (&["ts", "mts", "cts", "tsx"], Language::TypeScript),
        ];

        EXTENSION_MAP
            .iter()
            .find(|(exts, _)| exts.contains(&ext))
            .map(|(_, lang)| *lang)
            .unwrap_or(Language::Unknown)
    }

    pub fn from_path(path: &std::path::Path) -> Self {
        path.extension()
            .and_then(|ext| ext.to_str())
            .map(Self::from_extension)
            .unwrap_or(Language::Unknown)
    }

    /// Check if this language is JavaScript or TypeScript
    pub fn is_js_ts(&self) -> bool {
        matches!(self, Language::JavaScript | Language::TypeScript)
    }
}

impl std::fmt::Display for Language {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        static DISPLAY_STRINGS: &[(Language, &str)] = &[
            (Language::Rust, "Rust"),
            (Language::Python, "Python"),
            (Language::JavaScript, "JavaScript"),
            (Language::TypeScript, "TypeScript"),
            (Language::Unknown, "Unknown"),
        ];

        let display_str = DISPLAY_STRINGS
            .iter()
            .find(|(l, _)| l == self)
            .map(|(_, s)| *s)
            .unwrap_or("Unknown");

        write!(f, "{display_str}")
    }
}