Skip to main content

codemem_engine/enrichment/
code_smells.rs

1//! Code smell detection: long functions, many parameters, deep nesting, long files.
2
3use super::{resolve_path, EnrichResult};
4use crate::CodememEngine;
5use codemem_core::{CodememError, NodeKind};
6use serde_json::json;
7use std::collections::HashMap;
8use std::path::Path;
9
10impl CodememEngine {
11    /// Detect common code smells: long functions (>50 lines), too many parameters (>5),
12    /// deep nesting (>4 levels), and long files (>500 lines).
13    ///
14    /// Stores findings as Pattern memories with importance 0.5 (0.3 for test files).
15    /// Skips non-source files (docs, markdown, etc.). Caps total smells at 50 to
16    /// avoid flooding the memory store with low-value findings.
17    pub fn enrich_code_smells(
18        &self,
19        namespace: Option<&str>,
20        project_root: Option<&Path>,
21    ) -> Result<EnrichResult, CodememError> {
22        let all_nodes = {
23            let graph = self.lock_graph()?;
24            graph.get_all_nodes()
25        };
26
27        // Collect all smells with a severity score, then keep only top N.
28        let mut candidates: Vec<(String, f64, Vec<String>)> = Vec::new();
29
30        // Check functions/methods for long bodies and deep nesting
31        let mut file_cache: HashMap<String, Vec<String>> = HashMap::new();
32
33        for node in &all_nodes {
34            if !matches!(node.kind, NodeKind::Function | NodeKind::Method) {
35                continue;
36            }
37            let file_path = match node.payload.get("file_path").and_then(|v| v.as_str()) {
38                Some(fp) => fp.to_string(),
39                None => continue,
40            };
41
42            // Skip non-source files
43            if is_non_source_file(&file_path) {
44                continue;
45            }
46
47            let line_start = node
48                .payload
49                .get("line_start")
50                .and_then(|v| v.as_u64())
51                .unwrap_or(0) as usize;
52            let line_end = node
53                .payload
54                .get("line_end")
55                .and_then(|v| v.as_u64())
56                .unwrap_or(0) as usize;
57
58            // Guard against underflow: if line_end < line_start, skip
59            if line_end <= line_start {
60                continue;
61            }
62            let fn_length = line_end - line_start;
63
64            // Sanity cap: no function is >100K lines
65            if fn_length > 100_000 {
66                continue;
67            }
68
69            let is_test = is_test_file(&file_path);
70
71            // Long function (>50 lines)
72            if fn_length > 50 {
73                let content = format!(
74                    "Code smell: Long function {} ({} lines) in {} — consider splitting",
75                    node.label, fn_length, file_path
76                );
77                let severity = if is_test {
78                    0.3
79                } else {
80                    0.5 + (fn_length as f64 / 500.0).min(0.3)
81                };
82                candidates.push((content, severity, vec![node.id.clone()]));
83            }
84
85            // Check parameter count from signature
86            let signature = node
87                .payload
88                .get("signature")
89                .and_then(|v| v.as_str())
90                .unwrap_or("");
91            if let Some(params_str) = signature
92                .split('(')
93                .nth(1)
94                .and_then(|s| s.split(')').next())
95            {
96                let param_count = if params_str.trim().is_empty() {
97                    0
98                } else {
99                    params_str.split(',').count()
100                };
101                if param_count > 5 {
102                    let content = format!(
103                        "Code smell: {} has {} parameters in {} — consider using a struct",
104                        node.label, param_count, file_path
105                    );
106                    let severity = if is_test { 0.3 } else { 0.5 };
107                    candidates.push((content, severity, vec![node.id.clone()]));
108                }
109            }
110
111            // Check nesting depth
112            if fn_length > 0 {
113                let lines = file_cache.entry(file_path.clone()).or_insert_with(|| {
114                    std::fs::read_to_string(resolve_path(&file_path, project_root))
115                        .unwrap_or_default()
116                        .lines()
117                        .map(String::from)
118                        .collect()
119                });
120
121                let start = line_start.saturating_sub(1);
122                let end = line_end.min(lines.len());
123                if start < end {
124                    let mut max_depth = 0usize;
125                    let mut depth = 0usize;
126                    for line in &lines[start..end] {
127                        for ch in line.chars() {
128                            match ch {
129                                '{' => {
130                                    depth += 1;
131                                    max_depth = max_depth.max(depth);
132                                }
133                                '}' => depth = depth.saturating_sub(1),
134                                _ => {}
135                            }
136                        }
137                    }
138                    if max_depth > 4 {
139                        let content = format!(
140                            "Code smell: Deep nesting ({} levels) in {} in {} — consider extracting",
141                            max_depth, node.label, file_path
142                        );
143                        let severity = if is_test {
144                            0.3
145                        } else {
146                            0.5 + (max_depth as f64 / 20.0).min(0.3)
147                        };
148                        candidates.push((content, severity, vec![node.id.clone()]));
149                    }
150                }
151            }
152        }
153
154        // Check for long files (>500 lines) — source files only
155        for node in &all_nodes {
156            if node.kind != NodeKind::File {
157                continue;
158            }
159            let file_path = &node.label;
160
161            if is_non_source_file(file_path) {
162                continue;
163            }
164
165            let line_count = file_cache
166                .get(file_path.as_str())
167                .map(|lines| lines.len())
168                .unwrap_or_else(|| {
169                    std::fs::read_to_string(resolve_path(file_path, project_root))
170                        .map(|s| s.lines().count())
171                        .unwrap_or(0)
172                });
173            if line_count > 500 {
174                let content = format!(
175                    "Code smell: Long file {} ({} lines) — consider splitting into modules",
176                    file_path, line_count
177                );
178                let is_test = is_test_file(file_path);
179                let severity = if is_test { 0.3 } else { 0.5 };
180                candidates.push((content, severity, vec![node.id.clone()]));
181            }
182        }
183
184        // Sort by severity (highest first) and cap at 50 to avoid flooding
185        candidates.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
186        let max_smells = self.config.enrichment.max_code_smells;
187        candidates.truncate(max_smells);
188
189        let mut smells_stored = 0;
190        for (content, _severity, links) in &candidates {
191            if self
192                .store_pattern_memory(content, namespace, links)
193                .is_some()
194            {
195                smells_stored += 1;
196            }
197        }
198
199        self.save_index();
200
201        Ok(EnrichResult {
202            insights_stored: smells_stored,
203            details: json!({
204                "smells_detected": smells_stored,
205            }),
206        })
207    }
208}
209
210/// Check if a file path is a test file.
211fn is_test_file(path: &str) -> bool {
212    path.contains("/tests/")
213        || path.contains("/test/")
214        || path.contains("_test.")
215        || path.contains("_tests.")
216        || path.contains(".test.")
217        || path.contains(".spec.")
218        || path.ends_with("_test.rs")
219        || path.ends_with("_tests.rs")
220}
221
222/// Check if a file is non-source (docs, config, generated, etc.)
223fn is_non_source_file(path: &str) -> bool {
224    let lower = path.to_lowercase();
225    lower.ends_with(".md")
226        || lower.ends_with(".txt")
227        || lower.ends_with(".json")
228        || lower.ends_with(".yaml")
229        || lower.ends_with(".yml")
230        || lower.ends_with(".toml")
231        || lower.ends_with(".lock")
232        || lower.ends_with(".svg")
233        || lower.ends_with(".css")
234        || lower.contains("/node_modules/")
235        || lower.contains("/target/")
236        || lower.contains("/dist/")
237}