plissken_core/
discover.rs1use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8use walkdir::WalkDir;
9
10use crate::config::ModuleSourceType;
11
12#[derive(Debug, Clone)]
14pub struct DiscoveredModule {
15 pub name: String,
17 pub path: PathBuf,
19 pub module_type: ModuleSourceType,
21}
22
23const SKIP_DIRS: &[&str] = &[
25 "__pycache__",
26 ".venv",
27 "venv",
28 ".env",
29 "env",
30 ".tox",
31 ".nox",
32 ".pytest_cache",
33 ".mypy_cache",
34 ".ruff_cache",
35 "node_modules",
36 ".git",
37 "build",
38 "dist",
39 "egg-info",
40];
41
42pub fn discover_python_modules(
51 source_dir: &Path,
52 package_name: &str,
53) -> Result<Vec<DiscoveredModule>, std::io::Error> {
54 let mut modules = Vec::new();
55
56 if !source_dir.exists() {
57 return Ok(modules);
58 }
59
60 for entry in WalkDir::new(source_dir)
61 .follow_links(true)
62 .into_iter()
63 .filter_entry(|e| !should_skip_entry(e))
64 {
65 let entry = entry?;
66 let path = entry.path();
67
68 if path.extension().map(|e| e == "py").unwrap_or(false)
70 && let Some(module) = path_to_module(path, source_dir, package_name)
71 {
72 modules.push(module);
73 }
74 }
75
76 modules.sort_by(|a, b| a.name.cmp(&b.name));
78
79 Ok(modules)
80}
81
82fn should_skip_entry(entry: &walkdir::DirEntry) -> bool {
84 let file_name = entry.file_name().to_string_lossy();
85
86 if file_name.starts_with('.') && entry.depth() > 0 {
88 return true;
89 }
90
91 if entry.file_type().is_dir() {
93 if SKIP_DIRS.iter().any(|&skip| file_name == skip) {
94 return true;
95 }
96 if file_name.ends_with(".egg-info") {
98 return true;
99 }
100 }
101
102 false
103}
104
105fn path_to_module(
107 file_path: &Path,
108 source_dir: &Path,
109 package_name: &str,
110) -> Option<DiscoveredModule> {
111 let relative = file_path.strip_prefix(source_dir).ok()?;
113
114 let module_name = path_to_module_name(relative, package_name)?;
116
117 let module_type = detect_module_type(file_path);
119
120 Some(DiscoveredModule {
121 name: module_name,
122 path: file_path.to_owned(),
123 module_type,
124 })
125}
126
127fn path_to_module_name(relative_path: &Path, package_name: &str) -> Option<String> {
134 let mut components: Vec<&str> = Vec::new();
135
136 for component in relative_path.components() {
137 if let std::path::Component::Normal(name) = component {
138 let name_str = name.to_str()?;
139 components.push(name_str);
140 }
141 }
142
143 if components.is_empty() {
144 return None;
145 }
146
147 let last_idx = components.len() - 1;
149 let last = components[last_idx];
150 let last_without_ext = last.strip_suffix(".py")?;
151
152 if last_without_ext == "__init__" {
154 if components.len() == 1 {
155 return Some(package_name.to_string());
157 }
158 components.pop();
160 } else {
161 components[last_idx] = last_without_ext;
162 }
163
164 if components.is_empty() {
165 return Some(package_name.to_string());
166 }
167
168 if components[0] == package_name {
171 Some(components.join("."))
172 } else {
173 Some(format!("{}.{}", package_name, components.join(".")))
175 }
176}
177
178fn detect_module_type(file_path: &Path) -> ModuleSourceType {
184 if let Ok(content) = std::fs::read_to_string(file_path) {
186 let preview = if content.len() > 2048 {
188 &content[..2048]
189 } else {
190 &content
191 };
192
193 if preview.contains("# pyo3")
195 || preview.contains("#pyo3")
196 || preview.contains("# type: ignore[import]")
197 {
199 return ModuleSourceType::Pyo3;
200 }
201
202 for line in preview.lines() {
204 let line = line.trim();
205 if (line.starts_with("from ._") || line.starts_with("from _"))
206 && line.contains(" import ")
207 {
208 return ModuleSourceType::Pyo3;
209 }
210 }
211 }
212
213 ModuleSourceType::Python
214}
215
216pub fn merge_modules(
220 discovered: Vec<DiscoveredModule>,
221 explicit: &HashMap<String, ModuleSourceType>,
222) -> HashMap<String, ModuleSourceType> {
223 let mut result: HashMap<String, ModuleSourceType> = discovered
224 .into_iter()
225 .map(|m| (m.name, m.module_type))
226 .collect();
227
228 for (name, module_type) in explicit {
230 result.insert(name.clone(), module_type.clone());
231 }
232
233 result
234}
235
236#[cfg(test)]
237mod tests {
238 use super::*;
239 use tempfile::TempDir;
240
241 #[test]
242 fn test_path_to_module_name_simple() {
243 let path = Path::new("utils.py");
244 assert_eq!(
245 path_to_module_name(path, "mypackage"),
246 Some("mypackage.utils".to_string())
247 );
248 }
249
250 #[test]
251 fn test_path_to_module_name_nested() {
252 let path = Path::new("sub/helpers.py");
253 assert_eq!(
254 path_to_module_name(path, "mypackage"),
255 Some("mypackage.sub.helpers".to_string())
256 );
257 }
258
259 #[test]
260 fn test_path_to_module_name_init() {
261 let path = Path::new("__init__.py");
262 assert_eq!(
263 path_to_module_name(path, "mypackage"),
264 Some("mypackage".to_string())
265 );
266 }
267
268 #[test]
269 fn test_path_to_module_name_subpackage_init() {
270 let path = Path::new("sub/__init__.py");
271 assert_eq!(
272 path_to_module_name(path, "mypackage"),
273 Some("mypackage.sub".to_string())
274 );
275 }
276
277 #[test]
278 fn test_path_to_module_name_with_package_in_path() {
279 let path = Path::new("mypackage/utils.py");
280 assert_eq!(
281 path_to_module_name(path, "mypackage"),
282 Some("mypackage.utils".to_string())
283 );
284 }
285
286 #[test]
287 fn test_discover_python_modules() {
288 let temp_dir = TempDir::new().unwrap();
289 let pkg_dir = temp_dir.path().join("mypackage");
290 std::fs::create_dir(&pkg_dir).unwrap();
291
292 std::fs::write(pkg_dir.join("__init__.py"), "").unwrap();
294 std::fs::write(pkg_dir.join("utils.py"), "def helper(): pass").unwrap();
295 std::fs::write(pkg_dir.join("core.py"), "class Engine: pass").unwrap();
296
297 let sub_dir = pkg_dir.join("sub");
299 std::fs::create_dir(&sub_dir).unwrap();
300 std::fs::write(sub_dir.join("__init__.py"), "").unwrap();
301 std::fs::write(sub_dir.join("helpers.py"), "").unwrap();
302
303 let pycache = pkg_dir.join("__pycache__");
305 std::fs::create_dir(&pycache).unwrap();
306 std::fs::write(pycache.join("utils.cpython-311.pyc"), "").unwrap();
307
308 let modules = discover_python_modules(&pkg_dir, "mypackage").unwrap();
309
310 let names: Vec<&str> = modules.iter().map(|m| m.name.as_str()).collect();
311 assert!(names.contains(&"mypackage"));
312 assert!(names.contains(&"mypackage.utils"));
313 assert!(names.contains(&"mypackage.core"));
314 assert!(names.contains(&"mypackage.sub"));
315 assert!(names.contains(&"mypackage.sub.helpers"));
316 assert!(!names.iter().any(|n| n.contains("pycache")));
318 }
319
320 #[test]
321 fn test_detect_module_type_python() {
322 let temp_dir = TempDir::new().unwrap();
323 let file = temp_dir.path().join("module.py");
324 std::fs::write(&file, "def foo(): pass\n").unwrap();
325
326 assert!(matches!(
327 detect_module_type(&file),
328 ModuleSourceType::Python
329 ));
330 }
331
332 #[test]
333 fn test_detect_module_type_pyo3_marker() {
334 let temp_dir = TempDir::new().unwrap();
335 let file = temp_dir.path().join("module.py");
336 std::fs::write(&file, "# pyo3\nfrom ._native import Foo\n").unwrap();
337
338 assert!(matches!(detect_module_type(&file), ModuleSourceType::Pyo3));
339 }
340
341 #[test]
342 fn test_detect_module_type_native_import() {
343 let temp_dir = TempDir::new().unwrap();
344 let file = temp_dir.path().join("module.py");
345 std::fs::write(&file, "from ._impl import SomeClass\n").unwrap();
346
347 assert!(matches!(detect_module_type(&file), ModuleSourceType::Pyo3));
348 }
349
350 #[test]
351 fn test_merge_modules() {
352 let discovered = vec![
353 DiscoveredModule {
354 name: "pkg.a".to_string(),
355 path: PathBuf::from("a.py"),
356 module_type: ModuleSourceType::Python,
357 },
358 DiscoveredModule {
359 name: "pkg.b".to_string(),
360 path: PathBuf::from("b.py"),
361 module_type: ModuleSourceType::Python,
362 },
363 ];
364
365 let mut explicit = HashMap::new();
366 explicit.insert("pkg.b".to_string(), ModuleSourceType::Pyo3); explicit.insert("pkg.c".to_string(), ModuleSourceType::Python); let merged = merge_modules(discovered, &explicit);
370
371 assert_eq!(merged.len(), 3);
372 assert!(matches!(
373 merged.get("pkg.a"),
374 Some(ModuleSourceType::Python)
375 ));
376 assert!(matches!(merged.get("pkg.b"), Some(ModuleSourceType::Pyo3))); assert!(matches!(
378 merged.get("pkg.c"),
379 Some(ModuleSourceType::Python)
380 )); }
382}