use crate::core::config;
use anyhow::Result;
use std::collections::HashMap;
use std::fs;
use std::path::Path;
pub fn collect_compilable_python_files(dir: &Path) -> Result<HashMap<String, String>> {
let mut files = HashMap::new();
let exclude_files = vec![
"__init__.py",
"__about__.py",
"__version__.py",
"__main__.py",
"setup.py",
];
let exclude_dirs = vec![
"venv",
"env",
".venv",
".env",
".git",
"__pycache__",
"node_modules",
"site-packages",
"dist",
"build",
"tests",
"docs",
];
collect_python_files_recursive(dir, dir, &mut files, &exclude_files, &exclude_dirs)?;
let mut compilable_files = HashMap::new();
for (path, content) in files {
if !contains_function_definitions(&content) {
println!("Skipping {path} (no functions)");
continue;
}
if has_complex_imports(&content) {
println!("Skipping {path} (complex imports)");
continue;
}
let has_module_level = has_module_level_code(&content);
if has_module_level && has_complex_module_level_code(&content) {
println!("Skipping {path} (complex module-level code)");
continue;
}
compilable_files.insert(path, content);
}
Ok(compilable_files)
}
pub fn collect_python_files_recursive(
root_dir: &Path,
current_dir: &Path,
files: &mut HashMap<String, String>,
exclude_files: &[&str],
exclude_dirs: &[&str],
) -> Result<()> {
for entry in fs::read_dir(current_dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
if let Some(dir_name) = path.file_name() {
let dir_name = dir_name.to_string_lossy();
if exclude_dirs.iter().any(|&d| dir_name == d) || dir_name.starts_with('.') {
continue;
}
}
collect_python_files_recursive(root_dir, &path, files, exclude_files, exclude_dirs)?;
} else if path.is_file() && path.extension().is_some_and(|ext| ext == "py") {
if let Some(file_name) = path.file_name() {
let file_name = file_name.to_string_lossy();
if exclude_files.iter().any(|&f| file_name == f) {
continue;
}
}
match fs::read_to_string(&path) {
Ok(content) => {
let rel_path = path
.strip_prefix(root_dir)
.unwrap_or(&path)
.to_string_lossy()
.to_string();
files.insert(rel_path, content);
}
Err(e) => {
println!("Warning: Failed to read {}: {}", path.display(), e);
}
}
}
}
Ok(())
}
pub fn is_special_python_file(filename: &str) -> bool {
let filename = Path::new(filename)
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default();
filename.starts_with("__")
|| filename == "setup.py"
|| filename.contains("test")
|| filename.contains("config")
|| config::is_config_file(&filename)
}
pub fn contains_function_definitions(content: &str) -> bool {
for line in content.lines() {
if line.trim().starts_with("def ") {
return true;
}
}
false
}
pub fn has_complex_imports(content: &str) -> bool {
for line in content.lines().take(30) {
let line = line.trim();
if line.starts_with("import ") || line.starts_with("from ") {
if line.contains("*")
|| line.contains("(")
|| line.contains(")")
|| line.contains("try:")
|| line.contains("except")
{
return true;
}
}
}
false
}
pub fn has_module_level_code(content: &str) -> bool {
let mut in_function = false;
let mut in_docstring = false;
let mut last_line_blank = true;
for line in content.lines() {
let trimmed = line.trim();
if trimmed.is_empty() {
last_line_blank = true;
continue;
}
if trimmed.starts_with("#") {
continue;
}
if trimmed.starts_with("\"\"\"") || trimmed.starts_with("'''") {
in_docstring = !in_docstring;
continue;
}
if in_docstring {
continue;
}
if trimmed.starts_with("def ") {
in_function = true;
last_line_blank = false;
continue;
}
if trimmed.starts_with("class ") {
in_function = false;
last_line_blank = false;
continue;
}
if last_line_blank && !trimmed.starts_with(" ") && !trimmed.starts_with("\t") {
in_function = false;
}
if !in_function && !trimmed.starts_with("import ") && !trimmed.starts_with("from ") {
if !trimmed.starts_with("__") && !trimmed.contains(" = ") {
return true;
}
}
last_line_blank = false;
}
false
}
pub fn has_complex_module_level_code(content: &str) -> bool {
let mut in_function = false;
let mut in_docstring = false;
for line in content.lines() {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with("#") {
continue;
}
if trimmed.starts_with("\"\"\"") || trimmed.starts_with("'''") {
in_docstring = !in_docstring;
continue;
}
if in_docstring {
continue;
}
if trimmed.starts_with("def ") || trimmed.starts_with("class ") {
in_function = true;
continue;
}
if trimmed.starts_with("return") && !in_function {
return true;
}
if !in_function && !trimmed.starts_with("import ") && !trimmed.starts_with("from ") {
if trimmed.contains("if ")
|| trimmed.contains("for ")
|| trimmed.contains("while ")
|| trimmed.contains("with ")
|| trimmed.contains("try:")
|| trimmed.contains("except ")
|| trimmed.contains("lambda ")
|| trimmed.contains("yield ")
|| trimmed.contains("raise ")
{
return true;
}
if trimmed.contains("(") && trimmed.contains(")") && !trimmed.contains(" = ") {
return true;
}
}
}
false
}
pub fn should_skip_directory(dir_name: &str) -> bool {
dir_name.starts_with("__pycache__") || dir_name.starts_with('.') || dir_name == "venv" || dir_name.starts_with("env") || dir_name == "node_modules" || dir_name.contains("site-packages") || dir_name == "dist" || dir_name == "build" }