Skip to main content

kardo_core/analysis/
structure.rs

1//! Project structure quality analysis.
2//!
3//! Evaluates how well a project's file organization supports AI comprehension:
4//! directory depth, documentation coverage, naming conventions,
5//! standard structure patterns, and organization quality.
6
7use serde::Serialize;
8use std::collections::HashMap;
9use std::path::Path;
10
11use crate::scanner::DiscoveredFile;
12
13/// Weight constants for each signal group.
14const W_DEPTH: f64 = 0.15;
15const W_COVERAGE: f64 = 0.25;
16const W_NAMING: f64 = 0.20;
17const W_STANDARD: f64 = 0.25;
18const W_ORGANIZATION: f64 = 0.15;
19
20/// Result of project structure analysis.
21#[derive(Debug, Clone, Serialize)]
22pub struct StructureResult {
23    /// Weighted composite structure quality score (0.0-1.0).
24    pub score: f64,
25    /// Directory nesting depth quality (ideal average 2-4 levels).
26    pub depth_score: f64,
27    /// Documentation coverage ratio relative to total files.
28    pub coverage_score: f64,
29    /// File naming convention consistency (kebab/snake/camel).
30    pub naming_score: f64,
31    /// Presence of standard project directories (docs/, src/, tests/, .github/).
32    pub standard_score: f64,
33    /// Organization quality: no dumping grounds, separation of concerns.
34    pub organization_score: f64,
35}
36
37pub struct StructureAnalyzer;
38
39impl StructureAnalyzer {
40    /// Analyze project structure quality.
41    pub fn analyze(project_root: &Path, files: &[DiscoveredFile]) -> StructureResult {
42        if files.is_empty() {
43            return StructureResult {
44                score: 0.0,
45                depth_score: 0.0,
46                coverage_score: 0.0,
47                naming_score: 0.0,
48                standard_score: 0.0,
49                organization_score: 0.0,
50            };
51        }
52
53        let depth_score = Self::score_depth(files);
54        let coverage_score = Self::score_coverage(files);
55        let naming_score = Self::score_naming(files);
56        let standard_score = Self::score_standard(project_root);
57        let organization_score = Self::score_organization(files);
58
59        let score = W_DEPTH * depth_score
60            + W_COVERAGE * coverage_score
61            + W_NAMING * naming_score
62            + W_STANDARD * standard_score
63            + W_ORGANIZATION * organization_score;
64
65        StructureResult {
66            score,
67            depth_score,
68            coverage_score,
69            naming_score,
70            standard_score,
71            organization_score,
72        }
73    }
74
75    /// Score directory depth: ideal average is 2-4.
76    fn score_depth(files: &[DiscoveredFile]) -> f64 {
77        if files.is_empty() {
78            return 0.0;
79        }
80
81        let depths: Vec<usize> = files
82            .iter()
83            .map(|f| f.relative_path.matches('/').count())
84            .collect();
85
86        let avg = depths.iter().sum::<usize>() as f64 / depths.len() as f64;
87
88        if (2.0..=4.0).contains(&avg) {
89            1.0
90        } else if avg < 1.0 {
91            0.6 // Very flat
92        } else if avg < 2.0 {
93            // Interpolate 0.6 → 1.0 as avg goes from 1.0 → 2.0
94            0.6 + 0.4 * (avg - 1.0)
95        } else if avg <= 6.0 {
96            // Interpolate 1.0 → 0.5 as avg goes from 4.0 → 6.0
97            1.0 - 0.5 * (avg - 4.0) / 2.0
98        } else {
99            0.5 // Too deep
100        }
101    }
102
103    /// Score documentation coverage: ratio of markdown files to total.
104    fn score_coverage(files: &[DiscoveredFile]) -> f64 {
105        let total = files.len() as f64;
106        let md_count = files.iter().filter(|f| f.is_markdown).count() as f64;
107        let ratio = md_count / total;
108        let pct = ratio * 100.0;
109
110        if (5.0..=15.0).contains(&pct) {
111            1.0
112        } else if pct < 2.0 {
113            0.3
114        } else if pct < 5.0 {
115            // Interpolate 0.3 → 1.0 as pct goes from 2 → 5
116            0.3 + 0.7 * (pct - 2.0) / 3.0
117        } else if pct <= 30.0 {
118            // Interpolate 1.0 → 0.7 as pct goes from 15 → 30
119            1.0 - 0.3 * (pct - 15.0) / 15.0
120        } else {
121            0.7 // Doc-heavy project
122        }
123    }
124
125    /// Score naming convention consistency.
126    fn score_naming(files: &[DiscoveredFile]) -> f64 {
127        let mut kebab = 0usize;
128        let mut snake = 0usize;
129        let mut camel = 0usize;
130
131        for file in files {
132            let name = match file.relative_path.rsplit('/').next() {
133                Some(n) => {
134                    // Strip extension
135                    match n.rfind('.') {
136                        Some(idx) => &n[..idx],
137                        None => n,
138                    }
139                }
140                None => continue,
141            };
142
143            // Skip very short names or names with no casing/separator signal
144            if name.len() < 2 {
145                continue;
146            }
147
148            if name.contains('-') && !name.contains('_') {
149                kebab += 1;
150            } else if name.contains('_') && !name.contains('-') {
151                snake += 1;
152            } else if name.chars().any(|c| c.is_uppercase()) && !name.contains('-') && !name.contains('_') {
153                camel += 1;
154            }
155            // else: single word or ambiguous — skip
156        }
157
158        let total_classified = kebab + snake + camel;
159        if total_classified == 0 {
160            return 0.8; // All single-word: consistent enough
161        }
162
163        let counts = [kebab, snake, camel];
164        let dominant = *counts.iter().max().unwrap_or(&0);
165        let consistency = dominant as f64 / total_classified as f64;
166
167        if consistency >= 0.8 {
168            1.0
169        } else if consistency >= 0.6 {
170            0.7
171        } else {
172            0.5
173        }
174    }
175
176    /// Score standard project structure patterns.
177    fn score_standard(root: &Path) -> f64 {
178        let mut score = 0.0;
179
180        // Has docs/ or doc/ directory
181        if root.join("docs").is_dir() || root.join("doc").is_dir() {
182            score += 0.3;
183        }
184
185        // Has src/ or lib/ directory
186        if root.join("src").is_dir() || root.join("lib").is_dir() {
187            score += 0.2;
188        }
189
190        // Has tests/ or test/ or __tests__/
191        if root.join("tests").is_dir() || root.join("test").is_dir() || root.join("__tests__").is_dir() {
192            score += 0.2;
193        }
194
195        // Has .github/ or .gitlab-ci.yml
196        if root.join(".github").is_dir() || root.join(".gitlab-ci.yml").exists() {
197            score += 0.15;
198        }
199
200        // Has README.md in root
201        if root.join("README.md").exists() {
202            score += 0.15;
203        }
204
205        score
206    }
207
208    /// Score organization quality: no dumping ground dirs, separation of concerns.
209    fn score_organization(files: &[DiscoveredFile]) -> f64 {
210        // Count files per directory
211        let mut dir_counts: HashMap<String, usize> = HashMap::new();
212
213        for file in files {
214            let dir = match file.relative_path.rfind('/') {
215                Some(idx) => file.relative_path[..idx].to_string(),
216                None => ".".to_string(), // root
217            };
218            *dir_counts.entry(dir).or_insert(0) += 1;
219        }
220
221        // No dumping ground: no directory with >50 files
222        let has_dumping_ground = dir_counts.values().any(|&count| count > 50);
223        let dump_score = if has_dumping_ground { 0.0 } else { 0.5 };
224
225        // Separation of concerns: docs are in separate dir from source
226        let has_doc_separation = dir_counts.keys().any(|k| {
227            k == "docs" || k == "doc" || k.starts_with("docs/") || k.starts_with("doc/")
228        });
229        let has_src_separation = dir_counts.keys().any(|k| {
230            k == "src" || k == "lib" || k.starts_with("src/") || k.starts_with("lib/")
231        });
232        let separation_score = if has_doc_separation && has_src_separation {
233            0.5
234        } else if has_doc_separation || has_src_separation {
235            0.3
236        } else {
237            0.0
238        };
239
240        dump_score + separation_score
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247    use chrono::Utc;
248    use std::path::PathBuf;
249
250    fn make_file(relative_path: &str) -> DiscoveredFile {
251        let is_markdown = relative_path.ends_with(".md");
252        DiscoveredFile {
253            path: PathBuf::from(relative_path),
254            relative_path: relative_path.to_string(),
255            size: 100,
256            modified_at: Some(Utc::now()),
257            extension: relative_path.rsplit('.').next().map(|s| s.to_string()),
258            is_markdown,
259            content_hash: String::new(),
260        }
261    }
262
263    fn make_files(paths: &[&str]) -> Vec<DiscoveredFile> {
264        paths.iter().map(|p| make_file(p)).collect()
265    }
266
267    #[test]
268    fn test_well_structured_project() {
269        let dir = tempfile::TempDir::new().unwrap();
270        let root = dir.path();
271
272        // Create standard dirs
273        std::fs::create_dir_all(root.join("docs")).unwrap();
274        std::fs::create_dir_all(root.join("src/components")).unwrap();
275        std::fs::create_dir_all(root.join("tests")).unwrap();
276        std::fs::create_dir_all(root.join(".github")).unwrap();
277        std::fs::write(root.join("README.md"), "# Project").unwrap();
278
279        let files = make_files(&[
280            "README.md",
281            "docs/api.md",
282            "docs/setup.md",
283            "src/main.rs",
284            "src/lib.rs",
285            "src/components/button.rs",
286            "src/components/input.rs",
287            "tests/test_main.rs",
288            "tests/test_lib.rs",
289            "Cargo.toml",
290        ]);
291
292        let result = StructureAnalyzer::analyze(root, &files);
293        assert!(
294            result.score > 0.6,
295            "Well-structured project should score > 0.6, got {}",
296            result.score
297        );
298        assert!(
299            (result.standard_score - 1.0).abs() < 0.01,
300            "All standard dirs present → standard_score = 1.0, got {}",
301            result.standard_score
302        );
303    }
304
305    #[test]
306    fn test_flat_project() {
307        let dir = tempfile::TempDir::new().unwrap();
308        let root = dir.path();
309
310        let files = make_files(&[
311            "main.py",
312            "utils.py",
313            "config.py",
314            "test.py",
315            "README.md",
316        ]);
317
318        let result = StructureAnalyzer::analyze(root, &files);
319        assert!(
320            result.depth_score <= 0.6,
321            "Flat project should have low depth score, got {}",
322            result.depth_score
323        );
324    }
325
326    #[test]
327    fn test_no_markdown_low_coverage() {
328        let dir = tempfile::TempDir::new().unwrap();
329        let root = dir.path();
330
331        let files = make_files(&[
332            "src/main.rs",
333            "src/lib.rs",
334            "src/utils.rs",
335            "src/config.rs",
336            "src/models.rs",
337        ]);
338
339        let result = StructureAnalyzer::analyze(root, &files);
340        assert!(
341            result.coverage_score <= 0.3,
342            "No markdown → coverage_score should be 0.3, got {}",
343            result.coverage_score
344        );
345    }
346
347    #[test]
348    fn test_consistent_naming() {
349        let dir = tempfile::TempDir::new().unwrap();
350        let root = dir.path();
351
352        let files = make_files(&[
353            "src/my-component.ts",
354            "src/my-utils.ts",
355            "src/my-config.ts",
356            "src/main-app.ts",
357            "src/test-runner.ts",
358        ]);
359
360        let result = StructureAnalyzer::analyze(root, &files);
361        assert!(
362            result.naming_score >= 1.0,
363            "All kebab-case → naming_score should be 1.0, got {}",
364            result.naming_score
365        );
366    }
367
368    #[test]
369    fn test_dumping_ground_low_organization() {
370        let dir = tempfile::TempDir::new().unwrap();
371        let root = dir.path();
372
373        // Create 51+ files in a single directory (via DiscoveredFile paths)
374        let mut paths: Vec<String> = (0..55)
375            .map(|i| format!("src/file_{}.rs", i))
376            .collect();
377        paths.push("README.md".to_string());
378        let path_refs: Vec<&str> = paths.iter().map(|s| s.as_str()).collect();
379        let files = make_files(&path_refs);
380
381        let result = StructureAnalyzer::analyze(root, &files);
382        assert!(
383            result.organization_score < 0.5,
384            "Dumping ground dir → low organization score, got {}",
385            result.organization_score
386        );
387    }
388
389    #[test]
390    fn test_empty_project() {
391        let dir = tempfile::TempDir::new().unwrap();
392        let result = StructureAnalyzer::analyze(dir.path(), &[]);
393        assert!(
394            result.score < 0.01,
395            "Empty project should have 0 score, got {}",
396            result.score
397        );
398    }
399
400    #[test]
401    fn test_mixed_naming_low_score() {
402        let dir = tempfile::TempDir::new().unwrap();
403        let root = dir.path();
404
405        let files = make_files(&[
406            "src/my-component.ts",
407            "src/my_utils.ts",
408            "src/MyConfig.ts",
409            "src/main-app.ts",
410            "src/test_runner.ts",
411            "src/AnotherFile.ts",
412        ]);
413
414        let result = StructureAnalyzer::analyze(root, &files);
415        assert!(
416            result.naming_score <= 0.7,
417            "Mixed naming should score low, got {}",
418            result.naming_score
419        );
420    }
421}