Skip to main content

om_context/
scorer.rs

1use lazy_static::lazy_static;
2use rayon::prelude::*;
3use std::collections::HashMap;
4use std::path::Path;
5
6#[derive(Debug, Clone)]
7pub struct ScoredFile {
8    pub path: String,
9    pub score: i32,
10    #[allow(dead_code)]
11    pub reason: String,
12}
13
14lazy_static! {
15    static ref PROJECT_FILES: HashMap<&'static str, i32> = {
16        let mut m = HashMap::new();
17        m.insert("Cargo.toml", 8);
18        m.insert("package.json", 8);
19        m.insert("go.mod", 8);
20        m.insert("pom.xml", 8);
21        m.insert("build.gradle", 8);
22        m.insert("Dockerfile", 8);
23        m.insert("docker-compose.yml", 8);
24        m.insert("Makefile", 8);
25        m.insert("CMakeLists.txt", 8);
26        m.insert("tsconfig.json", 8);
27        m.insert("setup.py", 8);
28        m.insert("pyproject.toml", 8);
29        m
30    };
31    static ref IMPORTANT_DIRS: Vec<&'static str> =
32        vec!["src", "core", "lib", "app", "pkg", "internal", "cmd",];
33    static ref DOMAIN_DIRS: Vec<&'static str> = vec![
34        "api",
35        "server",
36        "client",
37        "models",
38        "services",
39        "handlers",
40        "controllers",
41        "routes",
42        "middleware",
43        "database",
44        "db",
45        "auth",
46        "components",
47        "views",
48        "utils",
49    ];
50    static ref TEST_DIRS: Vec<&'static str> = vec!["test", "tests", "spec", "__tests__",];
51    static ref LOW_DIRS: Vec<&'static str> = vec![
52        "vendor",
53        "third_party",
54        "fixtures",
55        "mocks",
56        "docs",
57        "examples",
58        "scripts",
59        "tools",
60        "dist",
61        "build",
62        "out",
63    ];
64}
65
66pub fn score_file(filepath: &str) -> ScoredFile {
67    let path = Path::new(filepath);
68    let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
69    let mut score = 7;
70    let mut reasons = Vec::new();
71
72    if filename == "main.rs" || filename == "lib.rs" || filename == "mod.rs" {
73        return ScoredFile {
74            path: filepath.to_string(),
75            score: 10,
76            reason: "entry point".to_string(),
77        };
78    }
79
80    if filename.starts_with("main.")
81        || filename.starts_with("index.")
82        || filename.starts_with("app.")
83        || filename.starts_with("server.")
84        || filename.starts_with("cli.")
85    {
86        return ScoredFile {
87            path: filepath.to_string(),
88            score: 10,
89            reason: "entry point".to_string(),
90        };
91    }
92
93    if filename == "README.md" || filename == "README" || filename == "README.rst" {
94        return ScoredFile {
95            path: filepath.to_string(),
96            score: 10,
97            reason: "readme".to_string(),
98        };
99    }
100
101    if filename.starts_with("config.") || filename.starts_with("settings.") {
102        return ScoredFile {
103            path: filepath.to_string(),
104            score: 9,
105            reason: "config".to_string(),
106        };
107    }
108
109    if filename.ends_with(".lock")
110        || filename.contains("-lock.")
111        || filename.contains(".lock.")
112        || filename.ends_with(".min.js")
113        || filename.ends_with(".min.css")
114        || filename.ends_with(".map")
115        || filename.ends_with(".d.ts")
116        || filename.ends_with(".pyc")
117        || filename.contains(".generated.")
118    {
119        return ScoredFile {
120            path: filepath.to_string(),
121            score: 2,
122            reason: "generated".to_string(),
123        };
124    }
125
126    if let Some(&project_score) = PROJECT_FILES.get(filename) {
127        return ScoredFile {
128            path: filepath.to_string(),
129            score: project_score,
130            reason: "project file".to_string(),
131        };
132    }
133
134    if filename.starts_with("test_")
135        || filename.contains("_test.")
136        || filename.contains(".test.")
137        || filename.contains(".spec.")
138    {
139        return ScoredFile {
140            path: filepath.to_string(),
141            score: 5,
142            reason: "test file".to_string(),
143        };
144    }
145
146    if filename == "__init__.py" {
147        return ScoredFile {
148            path: filepath.to_string(),
149            score: 3,
150            reason: "init file".to_string(),
151        };
152    }
153
154    let components: Vec<&str> = filepath.split('/').collect();
155    let depth = components.len() - 1;
156
157    for component in &components[..components.len().saturating_sub(1)] {
158        if IMPORTANT_DIRS.contains(component) {
159            score += 2;
160            reasons.push("important dir");
161            break;
162        }
163    }
164
165    for component in &components[..components.len().saturating_sub(1)] {
166        if DOMAIN_DIRS.contains(component) {
167            score += 1;
168            reasons.push("domain dir");
169            break;
170        }
171    }
172
173    for component in &components[..components.len().saturating_sub(1)] {
174        if TEST_DIRS.contains(component) {
175            score -= 2;
176            reasons.push("test dir");
177            break;
178        }
179    }
180
181    for component in &components[..components.len().saturating_sub(1)] {
182        if LOW_DIRS.contains(component) {
183            score -= 3;
184            reasons.push("low priority dir");
185            break;
186        }
187    }
188
189    if depth == 0 {
190        score += 1;
191        reasons.push("root level");
192    } else if depth > 4 {
193        score -= 2;
194        reasons.push("deep nesting");
195    } else if depth > 2 {
196        score -= 1;
197        reasons.push("nested");
198    }
199
200    let extension = path.extension().and_then(|s| s.to_str()).unwrap_or("");
201    if extension == "proto" || extension == "graphql" || extension == "gql" || extension == "thrift"
202    {
203        score += 1;
204        reasons.push("schema file");
205    }
206
207    if (extension == "md" || extension == "rst")
208        && filename != "README.md"
209        && filename != "README.rst"
210    {
211        score -= 1;
212        reasons.push("doc file");
213    }
214
215    score = score.clamp(1, 10);
216
217    let reason = if reasons.is_empty() {
218        "base score".to_string()
219    } else {
220        reasons.join(", ")
221    };
222
223    ScoredFile {
224        path: filepath.to_string(),
225        score,
226        reason,
227    }
228}
229
230pub fn score_files(files: Vec<String>) -> Vec<ScoredFile> {
231    files.into_par_iter().map(|f| score_file(&f)).collect()
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use proptest::prelude::*;
238
239    proptest! {
240        #[test]
241        fn test_score_always_valid(s in "\\PC*") {
242            let scored = score_file(&s);
243            prop_assert!(scored.score >= 1 && scored.score <= 10);
244        }
245
246        #[test]
247        fn test_depth_score_consistency(s in "[a-z0-9/]{1,100}") {
248            let scored = score_file(&s);
249            let depth = s.split('/').count() - 1;
250            if depth > 4 {
251                prop_assert!(scored.score <= 8);
252            }
253        }
254    }
255
256    #[test]
257    fn test_entry_points() {
258        assert_eq!(score_file("src/main.rs").score, 10);
259        assert_eq!(score_file("src/lib.rs").score, 10);
260        assert_eq!(score_file("index.js").score, 10);
261        assert_eq!(score_file("app.py").score, 10);
262    }
263
264    #[test]
265    fn test_readme() {
266        assert_eq!(score_file("README.md").score, 10);
267        assert_eq!(score_file("docs/README.md").score, 10);
268    }
269
270    #[test]
271    fn test_config() {
272        assert_eq!(score_file("config.toml").score, 9);
273        assert_eq!(score_file("settings.json").score, 9);
274    }
275
276    #[test]
277    fn test_project_files() {
278        assert_eq!(score_file("Cargo.toml").score, 8);
279        assert_eq!(score_file("package.json").score, 8);
280        assert_eq!(score_file("Dockerfile").score, 8);
281    }
282
283    #[test]
284    fn test_test_files() {
285        assert_eq!(score_file("test_foo.py").score, 5);
286        assert_eq!(score_file("foo_test.go").score, 5);
287        assert_eq!(score_file("foo.test.ts").score, 5);
288    }
289
290    #[test]
291    fn test_generated() {
292        assert_eq!(score_file("package-lock.json").score, 2);
293        assert_eq!(score_file("bundle.min.js").score, 2);
294        assert_eq!(score_file("types.generated.ts").score, 2);
295    }
296
297    #[test]
298    fn test_directory_modifiers() {
299        let scored_src = score_file("src/handler.rs");
300        let scored_vendor = score_file("vendor/utils.rs");
301        assert!(scored_src.score > scored_vendor.score);
302    }
303
304    #[test]
305    fn test_depth() {
306        let score_root = score_file("file.rs").score;
307        let score_deep = score_file("a/b/c/d/e/file.rs").score;
308        assert!(score_root > score_deep);
309    }
310
311    #[test]
312    fn test_directory_rules() {
313        assert_eq!(score_file("src/foo.rs").score, 9);
314        assert_eq!(score_file("core/foo.rs").score, 9);
315
316        assert_eq!(score_file("api/foo.rs").score, 8);
317
318        assert_eq!(score_file("tests/foo.rs").score, 5);
319
320        assert_eq!(score_file("vendor/foo.rs").score, 4);
321    }
322
323    #[test]
324    fn test_depth_scoring() {
325        assert_eq!(score_file("foo.rs").score, 8);
326        assert_eq!(score_file("a/foo.rs").score, 7);
327        assert_eq!(score_file("a/b/foo.rs").score, 7);
328        assert_eq!(score_file("a/b/c/foo.rs").score, 6);
329        assert_eq!(score_file("a/b/c/d/foo.rs").score, 6);
330        assert_eq!(score_file("a/b/c/d/e/foo.rs").score, 5);
331    }
332
333    #[test]
334    fn test_schema_files() {
335        assert_eq!(score_file("schema.proto").score, 9);
336        assert_eq!(score_file("api/schema.graphql").score, 9);
337    }
338
339    #[test]
340    fn test_doc_files() {
341        assert_eq!(score_file("docs.md").score, 7);
342        assert_eq!(score_file("README.md").score, 10);
343    }
344}