polykit_core/
scanner.rs

1//! Repository scanner for discovering packages.
2
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6use rayon::prelude::*;
7use rustc_hash::FxHashMap;
8use jwalk::WalkDir as JWalkDir;
9use memmap2::Mmap;
10use std::fs::File;
11
12use crate::cache::Cache;
13use crate::config::{Config, WorkspaceConfig};
14use crate::error::Result;
15use crate::package::Package;
16use crate::simd_utils;
17
18fn get_default_cache_dir() -> std::path::PathBuf {
19    dirs::cache_dir()
20        .map(|d| d.join("polykit"))
21        .unwrap_or_else(|| std::env::temp_dir().join("polykit-cache"))
22}
23
24/// Scans a directory for packages.
25///
26/// Looks for `polykit.toml` files and parses them into `Package` structures.
27/// Uses caching for fast incremental scans.
28pub struct Scanner {
29    packages_dir: PathBuf,
30    cache: Option<Cache>,
31    workspace_config: Option<WorkspaceConfig>,
32}
33
34impl Scanner {
35    fn load_workspace_config(packages_dir: &Path) -> Option<WorkspaceConfig> {
36        let mut current_dir = packages_dir.parent()?;
37
38        loop {
39            let workspace_toml = current_dir.join("polykit.toml");
40            if workspace_toml.exists() {
41                let content = std::fs::read_to_string(&workspace_toml).ok()?;
42                let mut table: toml::Value = toml::from_str(&content).ok()?;
43                let workspace_table = table.get_mut("workspace")?.as_table_mut()?;
44
45                let mut config = WorkspaceConfig {
46                    cache_dir: workspace_table
47                        .get("cache_dir")
48                        .and_then(|v| v.as_str())
49                        .map(|s| s.to_string()),
50                    default_parallel: workspace_table
51                        .get("default_parallel")
52                        .and_then(|v| v.as_integer())
53                        .map(|i| i as usize),
54                    workspace_config_path: Some(workspace_toml),
55                    tasks: FxHashMap::default(),
56                    remote_cache: None,
57                };
58
59                if let Some(tasks_table) = workspace_table.get("tasks").and_then(|v| v.as_table()) {
60                    config.tasks = crate::config::parse_tasks_from_toml_map(tasks_table);
61                }
62
63                return Some(config);
64            }
65
66            if current_dir.join(".git").exists() {
67                break;
68            }
69
70            match current_dir.parent() {
71                Some(parent) => {
72                    if parent == current_dir {
73                        break;
74                    }
75                    current_dir = parent;
76                }
77                None => break,
78            }
79        }
80
81        None
82    }
83
84    pub fn new(packages_dir: impl AsRef<Path>) -> Self {
85        let packages_dir = packages_dir.as_ref().to_path_buf();
86        let workspace_config = Self::load_workspace_config(&packages_dir);
87        Self {
88            packages_dir,
89            cache: None,
90            workspace_config,
91        }
92    }
93
94    pub fn with_default_cache(packages_dir: impl AsRef<Path>) -> Self {
95        let packages_dir = packages_dir.as_ref().to_path_buf();
96        let workspace_config = Self::load_workspace_config(&packages_dir);
97        let cache_dir = workspace_config
98            .as_ref()
99            .and_then(|wc| {
100                wc.cache_dir.as_ref().map(|cache_dir_str| {
101                    let cache_path = PathBuf::from(cache_dir_str);
102                    if cache_path.is_absolute() {
103                        cache_path
104                    } else {
105                        wc.workspace_config_path
106                            .as_ref()
107                            .and_then(|config_path| config_path.parent())
108                            .map(|config_dir| config_dir.join(&cache_path))
109                            .unwrap_or_else(|| PathBuf::from(cache_dir_str))
110                    }
111                })
112            })
113            .unwrap_or_else(get_default_cache_dir);
114        Self {
115            packages_dir,
116            cache: Some(Cache::new(cache_dir)),
117            workspace_config,
118        }
119    }
120
121    pub fn with_cache(packages_dir: impl AsRef<Path>, cache_dir: impl AsRef<Path>) -> Self {
122        let packages_dir = packages_dir.as_ref().to_path_buf();
123        let workspace_config = Self::load_workspace_config(&packages_dir);
124        Self {
125            packages_dir,
126            cache: Some(Cache::new(cache_dir)),
127            workspace_config,
128        }
129    }
130
131    pub fn workspace_config(&self) -> Option<&WorkspaceConfig> {
132        self.workspace_config.as_ref()
133    }
134
135    pub fn cache_stats(&self) -> Option<&crate::cache::CacheStats> {
136        self.cache.as_ref().map(|c| c.stats())
137    }
138
139    pub fn scan(&mut self) -> Result<Vec<Package>> {
140        if let Some(ref mut cache) = self.cache {
141            if let Some(cached) = cache.load(&self.packages_dir)? {
142                return Ok(cached);
143            }
144        }
145
146        let packages = self.scan_internal()?;
147
148        if let Some(ref mut cache) = self.cache {
149            cache.save(&self.packages_dir, &packages)?;
150        }
151
152        Ok(packages)
153    }
154
155    #[inline]
156    fn scan_internal(&self) -> Result<Vec<Package>> {
157        let workspace_config = Arc::new(self.workspace_config.clone());
158        let packages_dir = Arc::new(self.packages_dir.clone());
159
160        let config_files: Vec<PathBuf> = JWalkDir::new(&self.packages_dir)
161            .max_depth(2)
162            .follow_links(false)
163            .parallelism(jwalk::Parallelism::RayonNewPool(rayon::current_num_threads()))
164            .into_iter()
165            .filter_map(|e| {
166                let entry = e.ok()?;
167                let name_bytes = entry.file_name().as_encoded_bytes();
168                if simd_utils::fast_str_eq(
169                    std::str::from_utf8(name_bytes).unwrap_or(""),
170                    "polykit.toml",
171                ) {
172                    Some(entry.path().to_path_buf())
173                } else {
174                    None
175                }
176            })
177            .collect();
178
179        let packages: Result<Vec<Package>> = config_files
180            .into_par_iter()
181            .map(|config_path| {
182                let package_path = config_path
183                    .parent()
184                    .ok_or_else(|| crate::error::Error::ConfigNotFound(config_path.clone()))?;
185
186                let config = Self::read_config_mmap(&config_path)?;
187
188                crate::command_validator::CommandValidator::validate_identifier(
189                    &config.name,
190                    "Package name",
191                )?;
192
193                for dep_name in &config.deps.internal {
194                    crate::command_validator::CommandValidator::validate_identifier(
195                        dep_name,
196                        "Dependency name",
197                    )?;
198                }
199
200                let language = config.parse_language()?;
201                let relative_path = package_path
202                    .strip_prefix(packages_dir.as_ref())
203                    .map(|p| p.to_path_buf())
204                    .unwrap_or_else(|_| package_path.to_path_buf());
205
206                let mut package_tasks = config.to_tasks();
207
208                for task in &package_tasks {
209                    crate::command_validator::CommandValidator::validate_identifier(
210                        &task.name,
211                        "Task name",
212                    )?;
213                }
214
215                if let Some(ref ws_config) = workspace_config.as_ref() {
216                    let workspace_tasks = ws_config.to_tasks();
217                    for workspace_task in workspace_tasks {
218                        crate::command_validator::CommandValidator::validate_identifier(
219                            &workspace_task.name,
220                            "Workspace task name",
221                        )?;
222                        if !package_tasks.iter().any(|t| t.name == workspace_task.name) {
223                            package_tasks.push(workspace_task);
224                        }
225                    }
226                }
227
228                Ok(Package::new(
229                    config.name,
230                    language,
231                    config.public,
232                    relative_path,
233                    config.deps.internal,
234                    package_tasks,
235                ))
236            })
237            .collect();
238
239        let mut packages = packages?;
240        packages.sort_unstable_by(|a, b| a.name.cmp(&b.name));
241        Ok(packages)
242    }
243
244    pub fn scan_as_map(&mut self) -> Result<FxHashMap<String, Package>> {
245        let packages = self.scan()?;
246        let mut map = FxHashMap::with_capacity_and_hasher(packages.len(), Default::default());
247        for p in packages {
248            map.insert(p.name.clone(), p);
249        }
250        Ok(map)
251    }
252
253    fn read_config_mmap(path: &Path) -> Result<Config> {
254        let file = File::open(path)?;
255        let metadata = file.metadata()?;
256
257        if metadata.len() > 4096 {
258            let mmap = unsafe { Mmap::map(&file).map_err(crate::error::Error::Io)? };
259            let s = std::str::from_utf8(&mmap)
260                .map_err(|e| crate::error::Error::Adapter {
261                    package: "scanner".to_string(),
262                    message: format!("Invalid UTF-8 in config file: {}", e),
263                })?;
264            Ok(toml::from_str(s)?)
265        } else {
266            let config_content = std::fs::read_to_string(path)?;
267            Ok(toml::from_str(&config_content)?)
268        }
269    }
270
271    /// Scans packages and returns both the packages and detected changes.
272    ///
273    /// Useful for incremental graph updates.
274    pub fn scan_with_changes(
275        &mut self,
276        old_packages: &FxHashMap<String, Package>,
277    ) -> Result<(Vec<Package>, crate::graph::GraphChange)> {
278        let new_packages = self.scan_as_map()?;
279        let change = detect_graph_changes(old_packages, &new_packages);
280        Ok((new_packages.values().cloned().collect(), change))
281    }
282}
283
284/// Detects changes between old and new package sets.
285pub fn detect_graph_changes(
286    old_packages: &FxHashMap<String, Package>,
287    new_packages: &FxHashMap<String, Package>,
288) -> crate::graph::GraphChange {
289    let mut change = crate::graph::GraphChange {
290        added: Vec::new(),
291        modified: Vec::new(),
292        removed: Vec::new(),
293        dependency_changes: Vec::new(),
294    };
295
296    for (name, new_pkg) in new_packages {
297        match old_packages.get(name) {
298            Some(old_pkg) => {
299                if old_pkg.deps != new_pkg.deps || old_pkg.tasks != new_pkg.tasks {
300                    change.modified.push(new_pkg.clone());
301                    if old_pkg.deps != new_pkg.deps {
302                        change.dependency_changes.push((
303                            name.clone(),
304                            new_pkg.deps.iter().cloned().collect(),
305                        ));
306                    }
307                }
308            }
309            None => change.added.push(new_pkg.clone()),
310        }
311    }
312
313    for name in old_packages.keys() {
314        if !new_packages.contains_key(name) {
315            change.removed.push(name.clone());
316        }
317    }
318
319    change
320}