om_context/
scorer.rs

1use lazy_static::lazy_static;
2use std::collections::HashMap;
3use std::path::Path;
4
5#[derive(Debug, Clone)]
6pub struct ScoredFile {
7    pub path: String,
8    pub score: i32,
9    #[allow(dead_code)]
10    pub reason: String,
11}
12
13lazy_static! {
14    static ref PROJECT_FILES: HashMap<&'static str, i32> = {
15        let mut m = HashMap::new();
16        m.insert("Cargo.toml", 8);
17        m.insert("package.json", 8);
18        m.insert("go.mod", 8);
19        m.insert("pom.xml", 8);
20        m.insert("build.gradle", 8);
21        m.insert("Dockerfile", 8);
22        m.insert("docker-compose.yml", 8);
23        m.insert("Makefile", 8);
24        m.insert("CMakeLists.txt", 8);
25        m.insert("tsconfig.json", 8);
26        m.insert("setup.py", 8);
27        m.insert("pyproject.toml", 8);
28        m
29    };
30    static ref IMPORTANT_DIRS: Vec<&'static str> =
31        vec!["src", "core", "lib", "app", "pkg", "internal", "cmd",];
32    static ref DOMAIN_DIRS: Vec<&'static str> = vec![
33        "api",
34        "server",
35        "client",
36        "models",
37        "services",
38        "handlers",
39        "controllers",
40        "routes",
41        "middleware",
42        "database",
43        "db",
44        "auth",
45        "components",
46        "views",
47        "utils",
48    ];
49    static ref TEST_DIRS: Vec<&'static str> = vec!["test", "tests", "spec", "__tests__",];
50    static ref LOW_DIRS: Vec<&'static str> = vec![
51        "vendor",
52        "third_party",
53        "fixtures",
54        "mocks",
55        "docs",
56        "examples",
57        "scripts",
58        "tools",
59        "dist",
60        "build",
61        "out",
62    ];
63}
64
65pub fn score_file(filepath: &str) -> ScoredFile {
66    let path = Path::new(filepath);
67    let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
68    let mut score = 7;
69    let mut reasons = Vec::new();
70
71    if filename == "main.rs" || filename == "lib.rs" || filename == "mod.rs" {
72        return ScoredFile {
73            path: filepath.to_string(),
74            score: 10,
75            reason: "entry point".to_string(),
76        };
77    }
78
79    if filename.starts_with("main.")
80        || filename.starts_with("index.")
81        || filename.starts_with("app.")
82        || filename.starts_with("server.")
83        || filename.starts_with("cli.")
84    {
85        return ScoredFile {
86            path: filepath.to_string(),
87            score: 10,
88            reason: "entry point".to_string(),
89        };
90    }
91
92    if filename == "README.md" || filename == "README" || filename == "README.rst" {
93        return ScoredFile {
94            path: filepath.to_string(),
95            score: 10,
96            reason: "readme".to_string(),
97        };
98    }
99
100    if filename.starts_with("config.") || filename.starts_with("settings.") {
101        return ScoredFile {
102            path: filepath.to_string(),
103            score: 9,
104            reason: "config".to_string(),
105        };
106    }
107
108    if filename.ends_with(".lock")
109        || filename.contains("-lock.")
110        || filename.contains(".lock.")
111        || filename.ends_with(".min.js")
112        || filename.ends_with(".min.css")
113        || filename.ends_with(".map")
114        || filename.ends_with(".d.ts")
115        || filename.ends_with(".pyc")
116        || filename.contains(".generated.")
117    {
118        return ScoredFile {
119            path: filepath.to_string(),
120            score: 2,
121            reason: "generated".to_string(),
122        };
123    }
124
125    if let Some(&project_score) = PROJECT_FILES.get(filename) {
126        return ScoredFile {
127            path: filepath.to_string(),
128            score: project_score,
129            reason: "project file".to_string(),
130        };
131    }
132
133    if filename.starts_with("test_")
134        || filename.contains("_test.")
135        || filename.contains(".test.")
136        || filename.contains(".spec.")
137    {
138        return ScoredFile {
139            path: filepath.to_string(),
140            score: 5,
141            reason: "test file".to_string(),
142        };
143    }
144
145    if filename == "__init__.py" {
146        return ScoredFile {
147            path: filepath.to_string(),
148            score: 3,
149            reason: "init file".to_string(),
150        };
151    }
152
153    let components: Vec<&str> = filepath.split('/').collect();
154    let depth = components.len() - 1;
155
156    for component in &components[..components.len().saturating_sub(1)] {
157        if IMPORTANT_DIRS.contains(component) {
158            score += 2;
159            reasons.push("important dir");
160            break;
161        }
162    }
163
164    for component in &components[..components.len().saturating_sub(1)] {
165        if DOMAIN_DIRS.contains(component) {
166            score += 1;
167            reasons.push("domain dir");
168            break;
169        }
170    }
171
172    for component in &components[..components.len().saturating_sub(1)] {
173        if TEST_DIRS.contains(component) {
174            score -= 2;
175            reasons.push("test dir");
176            break;
177        }
178    }
179
180    for component in &components[..components.len().saturating_sub(1)] {
181        if LOW_DIRS.contains(component) {
182            score -= 3;
183            reasons.push("low priority dir");
184            break;
185        }
186    }
187
188    if depth == 0 {
189        score += 1;
190        reasons.push("root level");
191    } else if depth > 4 {
192        score -= 2;
193        reasons.push("deep nesting");
194    } else if depth > 2 {
195        score -= 1;
196        reasons.push("nested");
197    }
198
199    let extension = path.extension().and_then(|s| s.to_str()).unwrap_or("");
200    if extension == "proto" || extension == "graphql" || extension == "gql" || extension == "thrift"
201    {
202        score += 1;
203        reasons.push("schema file");
204    }
205
206    if (extension == "md" || extension == "rst")
207        && filename != "README.md"
208        && filename != "README.rst"
209    {
210        score -= 1;
211        reasons.push("doc file");
212    }
213
214    score = score.clamp(1, 10);
215
216    let reason = if reasons.is_empty() {
217        "base score".to_string()
218    } else {
219        reasons.join(", ")
220    };
221
222    ScoredFile {
223        path: filepath.to_string(),
224        score,
225        reason,
226    }
227}
228
229pub fn score_files(files: Vec<String>) -> Vec<ScoredFile> {
230    files.into_iter().map(|f| score_file(&f)).collect()
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236
237    #[test]
238    fn test_entry_points() {
239        assert_eq!(score_file("src/main.rs").score, 10);
240        assert_eq!(score_file("src/lib.rs").score, 10);
241        assert_eq!(score_file("index.js").score, 10);
242        assert_eq!(score_file("app.py").score, 10);
243    }
244
245    #[test]
246    fn test_readme() {
247        assert_eq!(score_file("README.md").score, 10);
248        assert_eq!(score_file("docs/README.md").score, 10);
249    }
250
251    #[test]
252    fn test_config() {
253        assert_eq!(score_file("config.toml").score, 9);
254        assert_eq!(score_file("settings.json").score, 9);
255    }
256
257    #[test]
258    fn test_project_files() {
259        assert_eq!(score_file("Cargo.toml").score, 8);
260        assert_eq!(score_file("package.json").score, 8);
261        assert_eq!(score_file("Dockerfile").score, 8);
262    }
263
264    #[test]
265    fn test_test_files() {
266        assert_eq!(score_file("test_foo.py").score, 5);
267        assert_eq!(score_file("foo_test.go").score, 5);
268        assert_eq!(score_file("foo.test.ts").score, 5);
269    }
270
271    #[test]
272    fn test_generated() {
273        assert_eq!(score_file("package-lock.json").score, 2);
274        assert_eq!(score_file("bundle.min.js").score, 2);
275        assert_eq!(score_file("types.generated.ts").score, 2);
276    }
277
278    #[test]
279    fn test_directory_modifiers() {
280        let scored_src = score_file("src/handler.rs");
281        let scored_vendor = score_file("vendor/utils.rs");
282        assert!(scored_src.score > scored_vendor.score);
283    }
284
285    #[test]
286    fn test_depth() {
287        let score_root = score_file("file.rs").score;
288        let score_deep = score_file("a/b/c/d/e/file.rs").score;
289        assert!(score_root > score_deep);
290    }
291}