use crate::storage::UniqueProjectId;
use std::path::PathBuf;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum DiscoveryError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("Invalid project: {0}")]
InvalidProject(String),
}
#[derive(Debug, Clone)]
pub struct DiscoveredProject {
pub unique_id: UniqueProjectId,
pub base_name: String,
pub path: PathBuf,
pub language: Option<String>,
pub file_count: usize,
pub content_fingerprint: String,
pub is_valid: bool,
}
#[derive(Clone)]
pub struct DiscoveryEngine {
search_paths: Vec<PathBuf>,
}
impl DiscoveryEngine {
#[must_use]
pub fn new() -> Self {
Self {
search_paths: Vec::new(),
}
}
#[must_use]
pub fn with_roots(roots: Vec<PathBuf>) -> Self {
Self {
search_paths: roots,
}
}
pub fn add_search_path(&mut self, path: PathBuf) {
self.search_paths.push(path);
}
pub fn discover(&self) -> std::result::Result<Vec<DiscoveredProject>, DiscoveryError> {
let mut discovered = Vec::new();
for search_path in &self.search_paths {
if !search_path.exists() || !search_path.is_dir() {
continue;
}
let git_dir = search_path.join(".git");
if git_dir.exists() {
if let Some(project) = scan_project(search_path) {
discovered.push(project);
}
}
if let Ok(entries) = scan_subdirectories(search_path, 2) {
discovered.extend(entries);
}
}
Ok(discovered)
}
}
fn scan_project(path: &std::path::Path) -> Option<DiscoveredProject> {
let git_dir = path.join(".git");
if !git_dir.exists() {
return None;
}
let readdir = std::fs::read_dir(path).ok()?;
let entries: Vec<std::fs::DirEntry> = readdir.filter_map(|e| e.ok()).collect();
let file_count = entries
.iter()
.filter(|entry| {
entry.path().is_file() && !entry.file_name().to_string_lossy().starts_with('.')
})
.count();
if file_count == 0 {
return None;
}
let language = detect_language(path);
let content_data = format!("{}:{}", path.display(), file_count);
let content_fingerprint = blake3::hash(content_data.as_bytes()).to_hex()[..8].to_string();
let base_name = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
.to_string();
let unique_id = UniqueProjectId::new(
base_name.clone(),
content_fingerprint.clone(),
0, );
Some(DiscoveredProject {
unique_id,
base_name,
path: path.to_path_buf(),
language,
file_count,
content_fingerprint,
is_valid: true,
})
}
fn scan_subdirectories(
path: &std::path::Path,
max_depth: usize,
) -> std::io::Result<Vec<DiscoveredProject>> {
let mut discovered = Vec::new();
if max_depth == 0 {
return Ok(discovered);
}
let readdir = std::fs::read_dir(path)?;
for entry in readdir.filter_map(|e| e.ok()) {
let entry_path = entry.path();
if entry_path.is_dir() {
if let Some(project) = scan_project(&entry_path) {
discovered.push(project);
} else {
discovered.extend(scan_subdirectories(&entry_path, max_depth - 1)?);
}
}
}
Ok(discovered)
}
fn detect_language(path: &std::path::Path) -> Option<String> {
let readdir = match std::fs::read_dir(path) {
Ok(rd) => rd,
Err(_) => return None,
};
let entries: Vec<std::fs::DirEntry> = readdir.filter_map(|e| e.ok()).collect();
let extensions: Vec<String> = entries
.iter()
.filter_map(|entry| {
entry
.path()
.extension()
.and_then(|ext| ext.to_str().map(|s| s.to_ascii_lowercase()))
})
.collect();
let mut counts: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
for ext in extensions {
*counts.entry(ext).or_insert(0) += 1;
}
let most_common = counts.into_iter().max_by_key(|(_, count)| *count);
match most_common {
Some((ext, _)) => match ext.as_str() {
"rs" => Some("rust".to_string()),
"go" => Some("go".to_string()),
"py" => Some("python".to_string()),
"js" | "jsx" | "mjs" | "cjs" => Some("javascript".to_string()),
"ts" | "tsx" | "mts" | "cts" => Some("typescript".to_string()),
"java" => Some("java".to_string()),
"cpp" | "cc" | "cxx" | "h" => Some("cpp".to_string()),
"c" => Some("c".to_string()),
"cs" => Some("csharp".to_string()),
"rb" => Some("ruby".to_string()),
"php" => Some("php".to_string()),
"scala" => Some("scala".to_string()),
"kt" | "kts" => Some("kotlin".to_string()),
"swift" => Some("swift".to_string()),
_ => None,
},
None => None,
}
}
impl Default for DiscoveryEngine {
fn default() -> Self {
Self::new()
}
}