Skip to main content

polykit_core/
scanner.rs

1//! Repository scanner for discovering packages.
2
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6use rayon::prelude::*;
7use rustc_hash::FxHashMap;
8use walkdir::WalkDir;
9
10use crate::cache::Cache;
11use crate::config::{Config, WorkspaceConfig};
12use crate::error::Result;
13use crate::package::Package;
14use crate::simd_utils;
15
16fn get_default_cache_dir() -> std::path::PathBuf {
17    dirs::cache_dir()
18        .map(|d| d.join("polykit"))
19        .unwrap_or_else(|| std::env::temp_dir().join("polykit-cache"))
20}
21
22/// Scans a directory for packages.
23///
24/// Looks for `polykit.toml` files and parses them into `Package` structures.
25/// Uses caching for fast incremental scans.
26pub struct Scanner {
27    packages_dir: PathBuf,
28    cache: Option<Cache>,
29    workspace_config: Option<WorkspaceConfig>,
30}
31
32impl Scanner {
33    fn load_workspace_config(packages_dir: &Path) -> Option<WorkspaceConfig> {
34        let mut current_dir = packages_dir.parent()?;
35
36        loop {
37            let workspace_toml = current_dir.join("polykit.toml");
38            if workspace_toml.exists() {
39                let content = std::fs::read_to_string(&workspace_toml).ok()?;
40                let mut table: toml::Value = toml::from_str(&content).ok()?;
41                let workspace_table = table.get_mut("workspace")?.as_table_mut()?;
42
43                let mut config = WorkspaceConfig {
44                    cache_dir: workspace_table
45                        .get("cache_dir")
46                        .and_then(|v| v.as_str())
47                        .map(|s| s.to_string()),
48                    default_parallel: workspace_table
49                        .get("default_parallel")
50                        .and_then(|v| v.as_integer())
51                        .map(|i| i as usize),
52                    workspace_config_path: Some(workspace_toml),
53                    tasks: FxHashMap::default(),
54                    remote_cache: None,
55                };
56
57                if let Some(tasks_table) = workspace_table.get("tasks").and_then(|v| v.as_table()) {
58                    config.tasks = crate::config::parse_tasks_from_toml_map(tasks_table);
59                }
60
61                return Some(config);
62            }
63
64            if current_dir.join(".git").exists() {
65                break;
66            }
67
68            match current_dir.parent() {
69                Some(parent) => {
70                    if parent == current_dir {
71                        break;
72                    }
73                    current_dir = parent;
74                }
75                None => break,
76            }
77        }
78
79        None
80    }
81
82    pub fn new(packages_dir: impl AsRef<Path>) -> Self {
83        let packages_dir = packages_dir.as_ref().to_path_buf();
84        let workspace_config = Self::load_workspace_config(&packages_dir);
85        Self {
86            packages_dir,
87            cache: None,
88            workspace_config,
89        }
90    }
91
92    pub fn with_default_cache(packages_dir: impl AsRef<Path>) -> Self {
93        let packages_dir = packages_dir.as_ref().to_path_buf();
94        let workspace_config = Self::load_workspace_config(&packages_dir);
95        let cache_dir = workspace_config
96            .as_ref()
97            .and_then(|wc| {
98                wc.cache_dir.as_ref().map(|cache_dir_str| {
99                    let cache_path = PathBuf::from(cache_dir_str);
100                    if cache_path.is_absolute() {
101                        cache_path
102                    } else {
103                        wc.workspace_config_path
104                            .as_ref()
105                            .and_then(|config_path| config_path.parent())
106                            .map(|config_dir| config_dir.join(&cache_path))
107                            .unwrap_or_else(|| PathBuf::from(cache_dir_str))
108                    }
109                })
110            })
111            .unwrap_or_else(get_default_cache_dir);
112        Self {
113            packages_dir,
114            cache: Some(Cache::new(cache_dir)),
115            workspace_config,
116        }
117    }
118
119    pub fn with_cache(packages_dir: impl AsRef<Path>, cache_dir: impl AsRef<Path>) -> Self {
120        let packages_dir = packages_dir.as_ref().to_path_buf();
121        let workspace_config = Self::load_workspace_config(&packages_dir);
122        Self {
123            packages_dir,
124            cache: Some(Cache::new(cache_dir)),
125            workspace_config,
126        }
127    }
128
129    pub fn workspace_config(&self) -> Option<&WorkspaceConfig> {
130        self.workspace_config.as_ref()
131    }
132
133    pub fn cache_stats(&self) -> Option<&crate::cache::CacheStats> {
134        self.cache.as_ref().map(|c| c.stats())
135    }
136
137    pub fn scan(&mut self) -> Result<Vec<Package>> {
138        if let Some(ref mut cache) = self.cache {
139            if let Some(cached) = cache.load(&self.packages_dir)? {
140                return Ok(cached);
141            }
142        }
143
144        let packages = self.scan_internal()?;
145
146        if let Some(ref mut cache) = self.cache {
147            cache.save(&self.packages_dir, &packages)?;
148        }
149
150        Ok(packages)
151    }
152
153    #[inline]
154    fn scan_internal(&self) -> Result<Vec<Package>> {
155        let workspace_config = Arc::new(self.workspace_config.clone());
156        let packages_dir = Arc::new(self.packages_dir.clone());
157
158        let config_files: Vec<PathBuf> = WalkDir::new(&self.packages_dir)
159            .max_depth(2)
160            .follow_links(false)
161            .into_iter()
162            .filter_map(|e| {
163                let entry = e.ok()?;
164                let name_bytes = entry.file_name().as_encoded_bytes();
165                if simd_utils::fast_str_eq(
166                    std::str::from_utf8(name_bytes).unwrap_or(""),
167                    "polykit.toml",
168                ) {
169                    Some(entry.path().to_path_buf())
170                } else {
171                    None
172                }
173            })
174            .collect();
175
176        let packages: Result<Vec<Package>> = config_files
177            .into_par_iter()
178            .map(|config_path| {
179                let package_path = config_path
180                    .parent()
181                    .ok_or_else(|| crate::error::Error::ConfigNotFound(config_path.clone()))?;
182
183                let config_content = std::fs::read_to_string(&config_path)?;
184                let config: Config = toml::from_str(&config_content)?;
185
186                crate::command_validator::CommandValidator::validate_identifier(
187                    &config.name,
188                    "Package name",
189                )?;
190
191                for dep_name in &config.deps.internal {
192                    crate::command_validator::CommandValidator::validate_identifier(
193                        dep_name,
194                        "Dependency name",
195                    )?;
196                }
197
198                let language = config.parse_language()?;
199                let relative_path = package_path
200                    .strip_prefix(packages_dir.as_ref())
201                    .map(|p| p.to_path_buf())
202                    .unwrap_or_else(|_| package_path.to_path_buf());
203
204                let mut package_tasks = config.to_tasks();
205
206                for task in &package_tasks {
207                    crate::command_validator::CommandValidator::validate_identifier(
208                        &task.name,
209                        "Task name",
210                    )?;
211                }
212
213                if let Some(ref ws_config) = workspace_config.as_ref() {
214                    let workspace_tasks = ws_config.to_tasks();
215                    for workspace_task in workspace_tasks {
216                        crate::command_validator::CommandValidator::validate_identifier(
217                            &workspace_task.name,
218                            "Workspace task name",
219                        )?;
220                        if !package_tasks.iter().any(|t| t.name == workspace_task.name) {
221                            package_tasks.push(workspace_task);
222                        }
223                    }
224                }
225
226                Ok(Package::new(
227                    config.name,
228                    language,
229                    config.public,
230                    relative_path,
231                    config.deps.internal,
232                    package_tasks,
233                ))
234            })
235            .collect();
236
237        let mut packages = packages?;
238        packages.sort_unstable_by(|a, b| a.name.cmp(&b.name));
239        Ok(packages)
240    }
241
242    pub fn scan_as_map(&mut self) -> Result<FxHashMap<String, Package>> {
243        let packages = self.scan()?;
244        let mut map = FxHashMap::with_capacity_and_hasher(packages.len(), Default::default());
245        for p in packages {
246            map.insert(p.name.clone(), p);
247        }
248        Ok(map)
249    }
250}