Skip to main content

codelens_core/analyzer/
stats.rs

1//! Statistics data structures.
2
3use indexmap::IndexMap;
4use serde::Serialize;
5use std::collections::HashMap;
6use std::path::PathBuf;
7use std::time::Duration;
8
9/// Statistics for a single file.
10#[derive(Debug, Clone, Default, Serialize)]
11pub struct FileStats {
12    /// File path.
13    pub path: PathBuf,
14    /// Detected language name.
15    pub language: String,
16    /// Line statistics.
17    pub lines: LineStats,
18    /// File size in bytes.
19    pub size: u64,
20    /// Complexity metrics.
21    pub complexity: Complexity,
22}
23
24/// Line count statistics.
25#[derive(Debug, Clone, Default, Serialize, PartialEq, Eq)]
26pub struct LineStats {
27    /// Total number of lines.
28    pub total: usize,
29    /// Number of code lines (non-blank, non-comment).
30    pub code: usize,
31    /// Number of comment lines.
32    pub comment: usize,
33    /// Number of blank lines.
34    pub blank: usize,
35}
36
37impl LineStats {
38    /// Create a new LineStats with all zeros.
39    pub fn new() -> Self {
40        Self::default()
41    }
42
43    /// Add another LineStats to this one.
44    pub fn add(&mut self, other: &LineStats) {
45        self.total += other.total;
46        self.code += other.code;
47        self.comment += other.comment;
48        self.blank += other.blank;
49    }
50}
51
52impl std::ops::Add for LineStats {
53    type Output = Self;
54
55    fn add(self, other: Self) -> Self {
56        Self {
57            total: self.total + other.total,
58            code: self.code + other.code,
59            comment: self.comment + other.comment,
60            blank: self.blank + other.blank,
61        }
62    }
63}
64
65impl std::ops::AddAssign for LineStats {
66    fn add_assign(&mut self, other: Self) {
67        self.add(&other);
68    }
69}
70
71/// Code complexity metrics.
72#[derive(Debug, Clone, Default, Serialize)]
73pub struct Complexity {
74    /// Number of functions/methods.
75    pub functions: usize,
76    /// Total cyclomatic complexity.
77    pub cyclomatic: usize,
78    /// Maximum nesting depth.
79    pub max_depth: usize,
80    /// Average lines per function.
81    pub avg_func_lines: f64,
82}
83
84impl Complexity {
85    /// Add another Complexity to this one.
86    pub fn add(&mut self, other: &Complexity) {
87        self.functions += other.functions;
88        self.cyclomatic += other.cyclomatic;
89        self.max_depth = self.max_depth.max(other.max_depth);
90    }
91}
92
93/// File size distribution buckets.
94#[derive(Debug, Clone, Default, Serialize)]
95pub struct SizeDistribution {
96    /// Files < 1KB
97    pub tiny: usize,
98    /// Files 1KB - 10KB
99    pub small: usize,
100    /// Files 10KB - 100KB
101    pub medium: usize,
102    /// Files 100KB - 1MB
103    pub large: usize,
104    /// Files > 1MB
105    pub huge: usize,
106}
107
108impl SizeDistribution {
109    /// Add a file size to the distribution.
110    pub fn add(&mut self, size: u64) {
111        match size {
112            s if s < 1024 => self.tiny += 1,
113            s if s < 10 * 1024 => self.small += 1,
114            s if s < 100 * 1024 => self.medium += 1,
115            s if s < 1024 * 1024 => self.large += 1,
116            _ => self.huge += 1,
117        }
118    }
119}
120
121/// Statistics grouped by language.
122#[derive(Debug, Clone, Default, Serialize)]
123pub struct LanguageSummary {
124    /// Number of files.
125    pub files: usize,
126    /// Line statistics.
127    pub lines: LineStats,
128    /// Total size in bytes.
129    pub size: u64,
130    /// Complexity metrics.
131    pub complexity: Complexity,
132}
133
134/// Repository statistics.
135#[derive(Debug, Clone, Serialize)]
136pub struct RepoStats {
137    /// Repository name.
138    pub name: String,
139    /// Repository path.
140    pub path: PathBuf,
141    /// Primary language (by code lines).
142    pub primary_language: String,
143    /// All file statistics.
144    pub files: Vec<FileStats>,
145    /// Summary statistics.
146    pub summary: RepoSummary,
147    /// Statistics by language.
148    pub by_language: IndexMap<String, LanguageSummary>,
149    /// Git information (if available).
150    pub git_info: Option<GitInfo>,
151}
152
153/// Repository summary statistics.
154#[derive(Debug, Clone, Default, Serialize)]
155pub struct RepoSummary {
156    /// Total number of code files.
157    pub total_files: usize,
158    /// Line statistics.
159    pub lines: LineStats,
160    /// Total size in bytes.
161    pub total_size: u64,
162    /// Complexity metrics.
163    pub complexity: Complexity,
164    /// File size distribution.
165    pub size_distribution: SizeDistribution,
166}
167
168/// Git repository information.
169#[derive(Debug, Clone, Serialize)]
170pub struct GitInfo {
171    /// Current branch name.
172    pub branch: Option<String>,
173    /// Last commit hash.
174    pub commit: Option<String>,
175    /// Last commit author.
176    pub author: Option<String>,
177    /// Last commit date.
178    pub date: Option<String>,
179}
180
181/// Overall analysis summary.
182#[derive(Debug, Clone, Default, Serialize)]
183pub struct Summary {
184    /// Total number of files.
185    pub total_files: usize,
186    /// Total line statistics.
187    pub lines: LineStats,
188    /// Total size in bytes.
189    pub total_size: u64,
190    /// Statistics by language.
191    pub by_language: IndexMap<String, LanguageSummary>,
192    /// File size distribution.
193    pub size_distribution: SizeDistribution,
194    /// Complexity metrics.
195    pub complexity: Complexity,
196}
197
198impl Summary {
199    /// Build summary from a list of file statistics.
200    pub fn from_file_stats(files: &[FileStats]) -> Self {
201        let mut summary = Summary::default();
202        let mut by_language: HashMap<String, LanguageSummary> = HashMap::new();
203
204        for file in files {
205            summary.total_files += 1;
206            summary.lines.add(&file.lines);
207            summary.total_size += file.size;
208            summary.size_distribution.add(file.size);
209            summary.complexity.add(&file.complexity);
210
211            let lang_summary = by_language.entry(file.language.clone()).or_default();
212            lang_summary.files += 1;
213            lang_summary.lines.add(&file.lines);
214            lang_summary.size += file.size;
215            lang_summary.complexity.add(&file.complexity);
216        }
217
218        // Sort by code lines (descending)
219        let mut sorted: Vec<_> = by_language.into_iter().collect();
220        sorted.sort_by(|a, b| b.1.lines.code.cmp(&a.1.lines.code));
221        summary.by_language = sorted.into_iter().collect();
222
223        // Calculate average function lines
224        if summary.complexity.functions > 0 {
225            summary.complexity.avg_func_lines =
226                summary.lines.code as f64 / summary.complexity.functions as f64;
227        }
228
229        summary
230    }
231}
232
233/// Complete analysis result.
234#[derive(Debug, Clone, Serialize)]
235pub struct AnalysisResult {
236    /// All file statistics.
237    pub files: Vec<FileStats>,
238    /// Overall summary.
239    pub summary: Summary,
240    /// Analysis duration.
241    #[serde(with = "duration_serde")]
242    pub elapsed: Duration,
243    /// Number of files scanned.
244    pub scanned_files: usize,
245    /// Number of files skipped.
246    pub skipped_files: usize,
247}
248
249mod duration_serde {
250    use serde::{Deserialize, Deserializer, Serialize, Serializer};
251    use std::time::Duration;
252
253    pub fn serialize<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
254    where
255        S: Serializer,
256    {
257        duration.as_secs_f64().serialize(serializer)
258    }
259
260    #[allow(dead_code)]
261    pub fn deserialize<'de, D>(deserializer: D) -> Result<Duration, D::Error>
262    where
263        D: Deserializer<'de>,
264    {
265        let secs = f64::deserialize(deserializer)?;
266        Ok(Duration::from_secs_f64(secs))
267    }
268}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273
274    #[test]
275    fn test_line_stats_default() {
276        let stats = LineStats::default();
277        assert_eq!(stats.total, 0);
278        assert_eq!(stats.code, 0);
279        assert_eq!(stats.comment, 0);
280        assert_eq!(stats.blank, 0);
281    }
282
283    #[test]
284    fn test_line_stats_add() {
285        let mut stats1 = LineStats {
286            total: 100,
287            code: 80,
288            comment: 10,
289            blank: 10,
290        };
291        let stats2 = LineStats {
292            total: 50,
293            code: 40,
294            comment: 5,
295            blank: 5,
296        };
297
298        stats1.add(&stats2);
299
300        assert_eq!(stats1.total, 150);
301        assert_eq!(stats1.code, 120);
302        assert_eq!(stats1.comment, 15);
303        assert_eq!(stats1.blank, 15);
304    }
305
306    #[test]
307    fn test_line_stats_add_trait() {
308        let stats1 = LineStats {
309            total: 100,
310            code: 80,
311            comment: 10,
312            blank: 10,
313        };
314        let stats2 = LineStats {
315            total: 50,
316            code: 40,
317            comment: 5,
318            blank: 5,
319        };
320
321        let result = stats1 + stats2;
322
323        assert_eq!(result.total, 150);
324        assert_eq!(result.code, 120);
325    }
326
327    #[test]
328    fn test_line_stats_add_assign() {
329        let mut stats1 = LineStats {
330            total: 100,
331            code: 80,
332            comment: 10,
333            blank: 10,
334        };
335        let stats2 = LineStats {
336            total: 50,
337            code: 40,
338            comment: 5,
339            blank: 5,
340        };
341
342        stats1 += stats2;
343
344        assert_eq!(stats1.total, 150);
345        assert_eq!(stats1.code, 120);
346    }
347
348    #[test]
349    fn test_complexity_add() {
350        let mut c1 = Complexity {
351            functions: 10,
352            cyclomatic: 20,
353            max_depth: 5,
354            avg_func_lines: 0.0,
355        };
356        let c2 = Complexity {
357            functions: 5,
358            cyclomatic: 10,
359            max_depth: 8,
360            avg_func_lines: 0.0,
361        };
362
363        c1.add(&c2);
364
365        assert_eq!(c1.functions, 15);
366        assert_eq!(c1.cyclomatic, 30);
367        assert_eq!(c1.max_depth, 8); // max of 5 and 8
368    }
369
370    #[test]
371    fn test_size_distribution() {
372        let mut dist = SizeDistribution::default();
373
374        dist.add(500); // tiny: < 1KB
375        dist.add(1024); // small: 1KB - 10KB
376        dist.add(5000); // small
377        dist.add(15000); // medium: 10KB - 100KB
378        dist.add(500_000); // large: 100KB - 1MB
379        dist.add(2_000_000); // huge: > 1MB
380
381        assert_eq!(dist.tiny, 1);
382        assert_eq!(dist.small, 2);
383        assert_eq!(dist.medium, 1);
384        assert_eq!(dist.large, 1);
385        assert_eq!(dist.huge, 1);
386    }
387
388    #[test]
389    fn test_summary_from_file_stats() {
390        let files = vec![
391            FileStats {
392                path: PathBuf::from("src/main.rs"),
393                language: "Rust".to_string(),
394                lines: LineStats {
395                    total: 100,
396                    code: 80,
397                    comment: 10,
398                    blank: 10,
399                },
400                size: 2000,
401                complexity: Complexity {
402                    functions: 5,
403                    cyclomatic: 10,
404                    max_depth: 3,
405                    avg_func_lines: 16.0,
406                },
407            },
408            FileStats {
409                path: PathBuf::from("src/lib.rs"),
410                language: "Rust".to_string(),
411                lines: LineStats {
412                    total: 50,
413                    code: 40,
414                    comment: 5,
415                    blank: 5,
416                },
417                size: 1000,
418                complexity: Complexity {
419                    functions: 3,
420                    cyclomatic: 6,
421                    max_depth: 2,
422                    avg_func_lines: 13.3,
423                },
424            },
425            FileStats {
426                path: PathBuf::from("test.py"),
427                language: "Python".to_string(),
428                lines: LineStats {
429                    total: 30,
430                    code: 20,
431                    comment: 5,
432                    blank: 5,
433                },
434                size: 500,
435                complexity: Complexity {
436                    functions: 2,
437                    cyclomatic: 4,
438                    max_depth: 2,
439                    avg_func_lines: 10.0,
440                },
441            },
442        ];
443
444        let summary = Summary::from_file_stats(&files);
445
446        assert_eq!(summary.total_files, 3);
447        assert_eq!(summary.lines.total, 180);
448        assert_eq!(summary.lines.code, 140);
449        assert_eq!(summary.total_size, 3500);
450        assert_eq!(summary.by_language.len(), 2);
451        assert_eq!(summary.complexity.functions, 10);
452
453        // Rust should be first (more code lines)
454        let first_lang = summary.by_language.keys().next().unwrap();
455        assert_eq!(first_lang, "Rust");
456
457        let rust_stats = summary.by_language.get("Rust").unwrap();
458        assert_eq!(rust_stats.files, 2);
459        assert_eq!(rust_stats.lines.code, 120);
460    }
461
462    #[test]
463    fn test_summary_empty() {
464        let summary = Summary::from_file_stats(&[]);
465
466        assert_eq!(summary.total_files, 0);
467        assert_eq!(summary.lines.total, 0);
468        assert!(summary.by_language.is_empty());
469    }
470
471    #[test]
472    fn test_file_stats_default() {
473        let stats = FileStats::default();
474        assert!(stats.path.as_os_str().is_empty());
475        assert!(stats.language.is_empty());
476        assert_eq!(stats.size, 0);
477    }
478}