Skip to main content

codelens_core/analyzer/
stats.rs

1//! Statistics data structures.
2
3use indexmap::IndexMap;
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::path::PathBuf;
7use std::time::Duration;
8
9/// Statistics for a single file.
10#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11pub struct FileStats {
12    /// File path.
13    pub path: PathBuf,
14    /// Detected language name.
15    pub language: String,
16    /// Line statistics.
17    pub lines: LineStats,
18    /// File size in bytes.
19    pub size: u64,
20    /// Complexity metrics.
21    pub complexity: Complexity,
22}
23
24/// Line count statistics.
25#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
26pub struct LineStats {
27    /// Total number of lines.
28    pub total: usize,
29    /// Number of code lines (non-blank, non-comment).
30    pub code: usize,
31    /// Number of comment lines.
32    pub comment: usize,
33    /// Number of blank lines.
34    pub blank: usize,
35}
36
37impl LineStats {
38    /// Create a new LineStats with all zeros.
39    pub fn new() -> Self {
40        Self::default()
41    }
42
43    /// Add another LineStats to this one.
44    pub fn add(&mut self, other: &LineStats) {
45        self.total += other.total;
46        self.code += other.code;
47        self.comment += other.comment;
48        self.blank += other.blank;
49    }
50}
51
52impl std::ops::Add for LineStats {
53    type Output = Self;
54
55    fn add(self, other: Self) -> Self {
56        Self {
57            total: self.total + other.total,
58            code: self.code + other.code,
59            comment: self.comment + other.comment,
60            blank: self.blank + other.blank,
61        }
62    }
63}
64
65impl std::ops::AddAssign for LineStats {
66    fn add_assign(&mut self, other: Self) {
67        self.add(&other);
68    }
69}
70
71/// Code complexity metrics.
72#[derive(Debug, Clone, Default, Serialize, Deserialize)]
73pub struct Complexity {
74    /// Number of functions/methods.
75    pub functions: usize,
76    /// Total cyclomatic complexity.
77    pub cyclomatic: usize,
78    /// Maximum nesting depth.
79    pub max_depth: usize,
80    /// Average lines per function.
81    pub avg_func_lines: f64,
82}
83
84impl Complexity {
85    /// Add another Complexity to this one.
86    pub fn add(&mut self, other: &Complexity) {
87        self.functions += other.functions;
88        self.cyclomatic += other.cyclomatic;
89        self.max_depth = self.max_depth.max(other.max_depth);
90    }
91}
92
93/// File size distribution buckets.
94#[derive(Debug, Clone, Default, Serialize, Deserialize)]
95pub struct SizeDistribution {
96    /// Files < 1KB
97    pub tiny: usize,
98    /// Files 1KB - 10KB
99    pub small: usize,
100    /// Files 10KB - 100KB
101    pub medium: usize,
102    /// Files 100KB - 1MB
103    pub large: usize,
104    /// Files > 1MB
105    pub huge: usize,
106}
107
108impl SizeDistribution {
109    /// Add a file size to the distribution.
110    pub fn add(&mut self, size: u64) {
111        match size {
112            s if s < 1024 => self.tiny += 1,
113            s if s < 10 * 1024 => self.small += 1,
114            s if s < 100 * 1024 => self.medium += 1,
115            s if s < 1024 * 1024 => self.large += 1,
116            _ => self.huge += 1,
117        }
118    }
119}
120
121/// Statistics grouped by language.
122#[derive(Debug, Clone, Default, Serialize, Deserialize)]
123pub struct LanguageSummary {
124    /// Number of files.
125    pub files: usize,
126    /// Line statistics.
127    pub lines: LineStats,
128    /// Total size in bytes.
129    pub size: u64,
130    /// Complexity metrics.
131    pub complexity: Complexity,
132}
133
134/// Repository statistics.
135#[derive(Debug, Clone, Serialize)]
136pub struct RepoStats {
137    /// Repository name.
138    pub name: String,
139    /// Repository path.
140    pub path: PathBuf,
141    /// Primary language (by code lines).
142    pub primary_language: String,
143    /// All file statistics.
144    pub files: Vec<FileStats>,
145    /// Summary statistics.
146    pub summary: RepoSummary,
147    /// Statistics by language.
148    pub by_language: IndexMap<String, LanguageSummary>,
149    /// Git information (if available).
150    pub git_info: Option<GitInfo>,
151}
152
153/// Repository summary statistics.
154#[derive(Debug, Clone, Default, Serialize)]
155pub struct RepoSummary {
156    /// Total number of code files.
157    pub total_files: usize,
158    /// Line statistics.
159    pub lines: LineStats,
160    /// Total size in bytes.
161    pub total_size: u64,
162    /// Complexity metrics.
163    pub complexity: Complexity,
164    /// File size distribution.
165    pub size_distribution: SizeDistribution,
166}
167
168/// Git repository information.
169#[derive(Debug, Clone, Serialize)]
170pub struct GitInfo {
171    /// Current branch name.
172    pub branch: Option<String>,
173    /// Last commit hash.
174    pub commit: Option<String>,
175    /// Last commit author.
176    pub author: Option<String>,
177    /// Last commit date.
178    pub date: Option<String>,
179}
180
181/// Overall analysis summary.
182#[derive(Debug, Clone, Default, Serialize, Deserialize)]
183pub struct Summary {
184    /// Total number of files.
185    pub total_files: usize,
186    /// Total line statistics.
187    pub lines: LineStats,
188    /// Total size in bytes.
189    pub total_size: u64,
190    /// Statistics by language.
191    pub by_language: IndexMap<String, LanguageSummary>,
192    /// File size distribution.
193    pub size_distribution: SizeDistribution,
194    /// Complexity metrics.
195    pub complexity: Complexity,
196}
197
198impl Summary {
199    /// Build summary from a list of file statistics.
200    pub fn from_file_stats(files: &[FileStats]) -> Self {
201        let mut summary = Summary::default();
202        let mut by_language: HashMap<String, LanguageSummary> = HashMap::new();
203
204        for file in files {
205            summary.total_files += 1;
206            summary.lines.add(&file.lines);
207            summary.total_size += file.size;
208            summary.size_distribution.add(file.size);
209            summary.complexity.add(&file.complexity);
210
211            let lang_summary = by_language.entry(file.language.clone()).or_default();
212            lang_summary.files += 1;
213            lang_summary.lines.add(&file.lines);
214            lang_summary.size += file.size;
215            lang_summary.complexity.add(&file.complexity);
216        }
217
218        // Sort by code lines (descending)
219        let mut sorted: Vec<_> = by_language.into_iter().collect();
220        sorted.sort_by(|a, b| b.1.lines.code.cmp(&a.1.lines.code));
221        summary.by_language = sorted.into_iter().collect();
222
223        // Calculate average function lines
224        if summary.complexity.functions > 0 {
225            summary.complexity.avg_func_lines =
226                summary.lines.code as f64 / summary.complexity.functions as f64;
227        }
228
229        summary
230    }
231}
232
233/// Complete analysis result.
234#[derive(Debug, Clone, Serialize, Deserialize)]
235pub struct AnalysisResult {
236    /// All file statistics.
237    pub files: Vec<FileStats>,
238    /// Overall summary.
239    pub summary: Summary,
240    /// Analysis duration.
241    #[serde(with = "duration_serde")]
242    pub elapsed: Duration,
243    /// Number of files scanned.
244    pub scanned_files: usize,
245    /// Number of files skipped.
246    pub skipped_files: usize,
247}
248
249mod duration_serde {
250    use serde::{Deserialize, Deserializer, Serialize, Serializer};
251    use std::time::Duration;
252
253    pub fn serialize<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
254    where
255        S: Serializer,
256    {
257        duration.as_secs_f64().serialize(serializer)
258    }
259
260    pub fn deserialize<'de, D>(deserializer: D) -> Result<Duration, D::Error>
261    where
262        D: Deserializer<'de>,
263    {
264        let secs = f64::deserialize(deserializer)?;
265        Ok(Duration::from_secs_f64(secs))
266    }
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272
273    #[test]
274    fn test_line_stats_default() {
275        let stats = LineStats::default();
276        assert_eq!(stats.total, 0);
277        assert_eq!(stats.code, 0);
278        assert_eq!(stats.comment, 0);
279        assert_eq!(stats.blank, 0);
280    }
281
282    #[test]
283    fn test_line_stats_add() {
284        let mut stats1 = LineStats {
285            total: 100,
286            code: 80,
287            comment: 10,
288            blank: 10,
289        };
290        let stats2 = LineStats {
291            total: 50,
292            code: 40,
293            comment: 5,
294            blank: 5,
295        };
296
297        stats1.add(&stats2);
298
299        assert_eq!(stats1.total, 150);
300        assert_eq!(stats1.code, 120);
301        assert_eq!(stats1.comment, 15);
302        assert_eq!(stats1.blank, 15);
303    }
304
305    #[test]
306    fn test_line_stats_add_trait() {
307        let stats1 = LineStats {
308            total: 100,
309            code: 80,
310            comment: 10,
311            blank: 10,
312        };
313        let stats2 = LineStats {
314            total: 50,
315            code: 40,
316            comment: 5,
317            blank: 5,
318        };
319
320        let result = stats1 + stats2;
321
322        assert_eq!(result.total, 150);
323        assert_eq!(result.code, 120);
324    }
325
326    #[test]
327    fn test_line_stats_add_assign() {
328        let mut stats1 = LineStats {
329            total: 100,
330            code: 80,
331            comment: 10,
332            blank: 10,
333        };
334        let stats2 = LineStats {
335            total: 50,
336            code: 40,
337            comment: 5,
338            blank: 5,
339        };
340
341        stats1 += stats2;
342
343        assert_eq!(stats1.total, 150);
344        assert_eq!(stats1.code, 120);
345    }
346
347    #[test]
348    fn test_complexity_add() {
349        let mut c1 = Complexity {
350            functions: 10,
351            cyclomatic: 20,
352            max_depth: 5,
353            avg_func_lines: 0.0,
354        };
355        let c2 = Complexity {
356            functions: 5,
357            cyclomatic: 10,
358            max_depth: 8,
359            avg_func_lines: 0.0,
360        };
361
362        c1.add(&c2);
363
364        assert_eq!(c1.functions, 15);
365        assert_eq!(c1.cyclomatic, 30);
366        assert_eq!(c1.max_depth, 8); // max of 5 and 8
367    }
368
369    #[test]
370    fn test_size_distribution() {
371        let mut dist = SizeDistribution::default();
372
373        dist.add(500); // tiny: < 1KB
374        dist.add(1024); // small: 1KB - 10KB
375        dist.add(5000); // small
376        dist.add(15000); // medium: 10KB - 100KB
377        dist.add(500_000); // large: 100KB - 1MB
378        dist.add(2_000_000); // huge: > 1MB
379
380        assert_eq!(dist.tiny, 1);
381        assert_eq!(dist.small, 2);
382        assert_eq!(dist.medium, 1);
383        assert_eq!(dist.large, 1);
384        assert_eq!(dist.huge, 1);
385    }
386
387    #[test]
388    fn test_summary_from_file_stats() {
389        let files = vec![
390            FileStats {
391                path: PathBuf::from("src/main.rs"),
392                language: "Rust".to_string(),
393                lines: LineStats {
394                    total: 100,
395                    code: 80,
396                    comment: 10,
397                    blank: 10,
398                },
399                size: 2000,
400                complexity: Complexity {
401                    functions: 5,
402                    cyclomatic: 10,
403                    max_depth: 3,
404                    avg_func_lines: 16.0,
405                },
406            },
407            FileStats {
408                path: PathBuf::from("src/lib.rs"),
409                language: "Rust".to_string(),
410                lines: LineStats {
411                    total: 50,
412                    code: 40,
413                    comment: 5,
414                    blank: 5,
415                },
416                size: 1000,
417                complexity: Complexity {
418                    functions: 3,
419                    cyclomatic: 6,
420                    max_depth: 2,
421                    avg_func_lines: 13.3,
422                },
423            },
424            FileStats {
425                path: PathBuf::from("test.py"),
426                language: "Python".to_string(),
427                lines: LineStats {
428                    total: 30,
429                    code: 20,
430                    comment: 5,
431                    blank: 5,
432                },
433                size: 500,
434                complexity: Complexity {
435                    functions: 2,
436                    cyclomatic: 4,
437                    max_depth: 2,
438                    avg_func_lines: 10.0,
439                },
440            },
441        ];
442
443        let summary = Summary::from_file_stats(&files);
444
445        assert_eq!(summary.total_files, 3);
446        assert_eq!(summary.lines.total, 180);
447        assert_eq!(summary.lines.code, 140);
448        assert_eq!(summary.total_size, 3500);
449        assert_eq!(summary.by_language.len(), 2);
450        assert_eq!(summary.complexity.functions, 10);
451
452        // Rust should be first (more code lines)
453        let first_lang = summary.by_language.keys().next().unwrap();
454        assert_eq!(first_lang, "Rust");
455
456        let rust_stats = summary.by_language.get("Rust").unwrap();
457        assert_eq!(rust_stats.files, 2);
458        assert_eq!(rust_stats.lines.code, 120);
459    }
460
461    #[test]
462    fn test_summary_empty() {
463        let summary = Summary::from_file_stats(&[]);
464
465        assert_eq!(summary.total_files, 0);
466        assert_eq!(summary.lines.total, 0);
467        assert!(summary.by_language.is_empty());
468    }
469
470    #[test]
471    fn test_file_stats_default() {
472        let stats = FileStats::default();
473        assert!(stats.path.as_os_str().is_empty());
474        assert!(stats.language.is_empty());
475        assert_eq!(stats.size, 0);
476    }
477}