debtmap 0.16.4

Code complexity and technical debt analyzer
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
//! File classification and context-aware size threshold determination.
//!
//! This module provides heuristics to classify files by their purpose and
//! architectural role, enabling context-appropriate size thresholds rather
//! than one-size-fits-all limits.

use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::path::Path;

/// File type classification based on purpose and content patterns
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum FileType {
    /// Business logic and application code (strict thresholds)
    BusinessLogic,
    /// Generated code from code generators (lenient/suppressed)
    GeneratedCode { tool: Option<String> },
    /// Test code (unit, integration, property, benchmark)
    TestCode { test_type: TestType },
    /// Declarative configuration (flags, schemas, routes, builders)
    DeclarativeConfig { config_type: ConfigType },
    /// Procedural macros
    ProceduralMacro,
    /// Build scripts
    BuildScript,
    /// Unknown/unclassified
    Unknown,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum TestType {
    Unit,
    Integration,
    Property,
    Benchmark,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum ConfigType {
    Flags,
    Schema,
    Routes,
    Builder,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum ReductionTarget {
    /// Single target line count
    Single(usize),
    /// Phased reduction for very large files
    Phased {
        phase1: usize,
        phase2: usize,
        final_target: usize,
    },
    /// Reduction not recommended (with reason)
    NotRecommended { reason: String },
}

/// Size thresholds for a specific file type
#[derive(Debug, Clone)]
pub struct FileSizeThresholds {
    pub base_threshold: usize,
    pub max_threshold: usize,
    pub min_lines_per_function: f32,
}

/// Complete file size analysis with context
#[derive(Debug)]
pub struct FileSizeAnalysis {
    pub file_type: FileType,
    pub current_lines: usize,
    pub threshold: FileSizeThresholds,
    pub reduction_target: ReductionTarget,
    pub function_density: f32,
    pub recommendation_level: RecommendationLevel,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum RecommendationLevel {
    Critical,   // >2x threshold, business logic
    High,       // >1.5x threshold, business logic
    Medium,     // >threshold but <1.5x
    Low,        // Slightly over threshold
    Suppressed, // Generated/declarative code
}

// Pre-compiled regex patterns for performance
static FIELD_PATTERN: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"(?m)^\s*pub\s+\w+:\s+\w+,?\s*$").unwrap());

static DERIVE_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?m)^\s*#\[derive\(").unwrap());

static BUILDER_METHOD_PATTERN: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"(?m)^\s*pub\s+fn\s+\w+\(mut\s+self").unwrap());

/// Classify a file based on its content and path
pub fn classify_file(source: &str, path: &Path) -> FileType {
    // Multi-stage classification with priority order
    if is_generated_code(source) {
        FileType::GeneratedCode {
            tool: detect_generator(source),
        }
    } else if is_test_file(path, source) {
        FileType::TestCode {
            test_type: detect_test_type(source),
        }
    } else if is_declarative_config(source) {
        FileType::DeclarativeConfig {
            config_type: detect_config_type(source),
        }
    } else if is_proc_macro(path) {
        FileType::ProceduralMacro
    } else if is_build_script(path) {
        FileType::BuildScript
    } else {
        FileType::BusinessLogic
    }
}

/// Check if file is generated code
fn is_generated_code(source: &str) -> bool {
    let markers = [
        "DO NOT EDIT",
        "automatically generated",
        "AUTO-GENERATED",
        "@generated",
        "Code generated by",
        "autogenerated",
    ];

    source
        .lines()
        .take(20)
        .any(|line| markers.iter().any(|m| line.contains(m)))
}

/// Detect the code generation tool if possible
fn detect_generator(source: &str) -> Option<String> {
    if source.contains("prost::Message") {
        Some("prost".to_string())
    } else if source.contains("diesel::") {
        Some("diesel".to_string())
    } else if source.contains("tonic::") {
        Some("tonic".to_string())
    } else if source.contains("sea_orm::") {
        Some("sea-orm".to_string())
    } else {
        None
    }
}

/// Check if file is a test file
fn is_test_file(path: &Path, source: &str) -> bool {
    // Check path patterns
    let path_str = path.to_string_lossy();
    let is_test_path = path_str.contains("/tests/")
        || path_str.contains("/benches/")
        || path_str.ends_with("_test.rs")
        || path_str.ends_with("_tests.rs");

    // Check for test attributes
    let has_test_attrs = source.contains("#[test]")
        || source.contains("#[cfg(test)]")
        || source.contains("#[bench]");

    is_test_path || has_test_attrs
}

/// Detect the type of test code
fn detect_test_type(source: &str) -> TestType {
    if source.contains("proptest!") || source.contains("quickcheck!") {
        TestType::Property
    } else if source.contains("#[bench]") || source.contains("criterion") {
        TestType::Benchmark
    } else if source.contains("tests/integration") {
        TestType::Integration
    } else {
        TestType::Unit
    }
}

/// Check if file is primarily declarative configuration
fn is_declarative_config(source: &str) -> bool {
    let field_matches = FIELD_PATTERN.find_iter(source).count();
    let derive_matches = DERIVE_PATTERN.find_iter(source).count();
    let builder_matches = BUILDER_METHOD_PATTERN.find_iter(source).count();

    let total_matches = field_matches + derive_matches + builder_matches;
    let total_lines = source.lines().count();

    if total_lines == 0 {
        return false;
    }

    // If >50% of lines match declarative patterns
    (total_matches as f32 / total_lines as f32) > 0.5
}

/// Detect the type of declarative configuration
fn detect_config_type(source: &str) -> ConfigType {
    if source.contains("clap::Parser") || source.contains("structopt") {
        ConfigType::Flags
    } else if source.contains("serde::") && source.contains("Deserialize") {
        ConfigType::Schema
    } else if source.contains("Router") || source.contains("routes") {
        ConfigType::Routes
    } else {
        ConfigType::Builder
    }
}

/// Check if file is a procedural macro
fn is_proc_macro(path: &Path) -> bool {
    let path_str = path.to_string_lossy();
    path_str.contains("/proc-macro/") || path_str.contains("/macros/")
}

/// Check if file is a build script
fn is_build_script(path: &Path) -> bool {
    path.file_name()
        .and_then(|n| n.to_str())
        .map(|n| n == "build.rs")
        .unwrap_or(false)
}

/// Get context-aware thresholds for a file type
pub fn get_threshold(
    file_type: &FileType,
    function_count: usize,
    lines: usize,
) -> FileSizeThresholds {
    let base = match file_type {
        FileType::BusinessLogic => 400,
        FileType::GeneratedCode { .. } => 5000,
        FileType::TestCode { .. } => 650,
        FileType::DeclarativeConfig { .. } => 1200,
        FileType::ProceduralMacro => 500,
        FileType::BuildScript => 300,
        FileType::Unknown => 400,
    };

    // Adjust based on function density
    let density = if function_count > 0 {
        lines as f32 / function_count as f32
    } else {
        0.0
    };
    let adjusted = adjust_for_density(base, density);

    FileSizeThresholds {
        base_threshold: adjusted,
        max_threshold: adjusted * 2,
        min_lines_per_function: 3.0,
    }
}

/// Adjust threshold based on function density
fn adjust_for_density(base_threshold: usize, density: f32) -> usize {
    match density {
        d if d < 5.0 => base_threshold, // Many small functions: strict
        d if d < 10.0 => (base_threshold as f32 * 1.2) as usize,
        d if d < 20.0 => (base_threshold as f32 * 1.5) as usize,
        _ => (base_threshold as f32 * 2.0) as usize, // Few large functions: lenient
    }
}

/// Calculate a practical reduction target
pub fn calculate_reduction_target(
    current_lines: usize,
    threshold: &FileSizeThresholds,
    function_count: usize,
) -> ReductionTarget {
    // Minimum achievable size based on function count
    let min_achievable = (function_count as f32 * threshold.min_lines_per_function) as usize;

    // Don't suggest reducing below achievable minimum
    let target = threshold.base_threshold.max(min_achievable);

    if current_lines > threshold.base_threshold * 3 {
        // Phased reduction for very large files
        ReductionTarget::Phased {
            phase1: current_lines / 2,
            phase2: (threshold.base_threshold as f32 * 1.5) as usize,
            final_target: target,
        }
    } else if current_lines <= threshold.base_threshold {
        // Already within threshold
        ReductionTarget::NotRecommended {
            reason: "File is already within size threshold".to_string(),
        }
    } else {
        ReductionTarget::Single(target)
    }
}

/// Determine recommendation level based on file type and size
pub fn recommendation_level(
    file_type: &FileType,
    current_lines: usize,
    threshold: &FileSizeThresholds,
) -> RecommendationLevel {
    match file_type {
        FileType::GeneratedCode { .. } => RecommendationLevel::Suppressed,
        FileType::BusinessLogic => {
            let ratio = current_lines as f32 / threshold.base_threshold as f32;
            if ratio > 2.0 {
                RecommendationLevel::Critical
            } else if ratio > 1.5 {
                RecommendationLevel::High
            } else if ratio > 1.0 {
                RecommendationLevel::Medium
            } else {
                RecommendationLevel::Low
            }
        }
        _ => {
            let ratio = current_lines as f32 / threshold.base_threshold as f32;
            if ratio > 2.0 {
                RecommendationLevel::High
            } else if ratio > 1.5 {
                RecommendationLevel::Medium
            } else {
                RecommendationLevel::Low
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_generated_code_detection() {
        let generated = r#"
// DO NOT EDIT
// This file is automatically generated
pub struct Generated {}
        "#;
        assert!(is_generated_code(generated));
    }

    #[test]
    fn test_not_generated_code() {
        let normal = r#"
pub struct Normal {
    field: String,
}
        "#;
        assert!(!is_generated_code(normal));
    }

    #[test]
    fn test_declarative_config_detection() {
        let flags = r#"
#[derive(Debug)]
pub struct Flags {
    pub verbose: bool,
    pub quiet: bool,
    pub output: PathBuf,
    pub debug: bool,
    pub trace: bool,
    pub log_level: String,
    pub log_file: PathBuf,
}
        "#;
        assert!(is_declarative_config(flags));
    }

    #[test]
    fn test_reduction_target_respects_function_count() {
        let threshold = FileSizeThresholds {
            base_threshold: 500,
            max_threshold: 1000,
            min_lines_per_function: 3.0,
        };

        let target = calculate_reduction_target(2000, &threshold, 600);
        // Should not suggest <1800 lines (600 functions * 3 lines)
        match target {
            ReductionTarget::Single(t) => assert!(t >= 1800),
            ReductionTarget::Phased { final_target, .. } => assert!(final_target >= 1800),
            _ => panic!("Expected reduction target"),
        }
    }

    #[test]
    fn test_function_density_adjustment() {
        let low_density = adjust_for_density(400, 4.0); // Many small functions
        let high_density = adjust_for_density(400, 25.0); // Few large functions

        assert_eq!(low_density, 400); // Strict threshold
        assert!(high_density > 600); // More lenient
    }

    #[test]
    fn test_test_file_detection() {
        let test_code = r#"
#[cfg(test)]
mod tests {
    #[test]
    fn test_something() {}
}
        "#;
        let path = Path::new("src/main.rs");
        assert!(is_test_file(path, test_code));
    }

    #[test]
    fn test_build_script_detection() {
        assert!(is_build_script(Path::new("build.rs")));
        assert!(!is_build_script(Path::new("src/main.rs")));
    }

    #[test]
    fn test_recommendation_level_for_business_logic() {
        let file_type = FileType::BusinessLogic;
        let threshold = FileSizeThresholds {
            base_threshold: 400,
            max_threshold: 800,
            min_lines_per_function: 3.0,
        };

        // >2x threshold
        assert_eq!(
            recommendation_level(&file_type, 900, &threshold),
            RecommendationLevel::Critical
        );

        // >1.5x threshold
        assert_eq!(
            recommendation_level(&file_type, 650, &threshold),
            RecommendationLevel::High
        );

        // >1x threshold
        assert_eq!(
            recommendation_level(&file_type, 450, &threshold),
            RecommendationLevel::Medium
        );
    }

    #[test]
    fn test_generated_code_suppressed() {
        let file_type = FileType::GeneratedCode { tool: None };
        let threshold = FileSizeThresholds {
            base_threshold: 400,
            max_threshold: 800,
            min_lines_per_function: 3.0,
        };

        assert_eq!(
            recommendation_level(&file_type, 10000, &threshold),
            RecommendationLevel::Suppressed
        );
    }
}