bob/scan.rs
1/*
2 * Copyright (c) 2025 Jonathan Perkin <jonathan@perkin.org.uk>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17//! Package dependency scanning and resolution.
18//!
19//! This module provides the [`Scan`] struct for discovering package dependencies
20//! and building a directed acyclic graph (DAG) for build ordering.
21//!
22//! # Scan Process
23//!
24//! 1. Create a scan sandbox
25//! 2. Run `make pbulk-index` on each package to discover dependencies
26//! 3. Recursively discover all transitive dependencies
27//! 4. Resolve dependency patterns to specific package versions
28//! 5. Verify no circular dependencies exist
29//! 6. Return buildable and skipped package lists
30//!
31//! # Skip Reasons
32//!
33//! Packages may be skipped for several reasons:
34//!
35//! - `PKG_SKIP_REASON` - Package explicitly marked to skip on this platform
36//! - `PKG_FAIL_REASON` - Package expected to fail on this platform
37//! - Unresolved dependencies - Required dependency not found
38//! - Circular dependencies - Package has a dependency cycle
39//!
40//! # Example
41//!
42//! ```no_run
43//! use bob::{Config, Database, RunContext, Scan};
44//! use pkgsrc::PkgPath;
45//! use std::sync::Arc;
46//! use std::sync::atomic::AtomicBool;
47//!
48//! let config = Config::load(None, false)?;
49//! let db_path = config.logdir().join("bob").join("bob.db");
50//! let db = Database::open(&db_path)?;
51//! let mut scan = Scan::new(&config);
52//!
53//! scan.add(&PkgPath::new("mail/mutt")?);
54//! scan.add(&PkgPath::new("www/curl")?);
55//!
56//! let ctx = RunContext::new(Arc::new(AtomicBool::new(false)));
57//! scan.start(&ctx, &db)?; // Discover dependencies
58//! let result = scan.resolve(&db)?;
59//!
60//! println!("Buildable: {}", result.buildable.len());
61//! println!("Skipped: {}", result.skipped.len());
62//! # Ok::<(), anyhow::Error>(())
63//! ```
64
65use crate::tui::MultiProgress;
66use crate::{Config, RunContext, Sandbox};
67use anyhow::{Context, Result, bail};
68use indexmap::IndexMap;
69use petgraph::graphmap::DiGraphMap;
70use pkgsrc::{Depend, PkgName, PkgPath, ScanIndex};
71use rayon::prelude::*;
72use std::collections::{HashMap, HashSet};
73use std::io::BufReader;
74use std::sync::atomic::{AtomicBool, Ordering};
75use std::sync::{Arc, Mutex};
76use std::time::Duration;
77use tracing::{debug, error, info, trace};
78
79/// Reason why a package was excluded from the build.
80///
81/// Packages with skip or fail reasons set in pkgsrc are not built.
82#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
83pub enum SkipReason {
84 /// Package has `PKG_SKIP_REASON` set.
85 ///
86 /// This typically indicates the package cannot be built on the current
87 /// platform (e.g., architecture-specific code, missing dependencies).
88 PkgSkipReason(String),
89 /// Package has `PKG_FAIL_REASON` set.
90 ///
91 /// This indicates the package is known to fail on the current platform
92 /// and should not be attempted.
93 PkgFailReason(String),
94 /// A dependency could not be resolved to any known package.
95 ///
96 /// Contains the dependency pattern that could not be matched.
97 UnresolvedDependency(String),
98}
99
100/// Information about a package that was skipped during scanning.
101#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
102pub struct SkippedPackage {
103 /// Package name with version.
104 pub pkgname: PkgName,
105 /// Package path in pkgsrc.
106 pub pkgpath: Option<PkgPath>,
107 /// Reason the package was skipped.
108 pub reason: SkipReason,
109}
110
111/// Information about a package that failed to scan.
112#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
113pub struct ScanFailure {
114 /// Package path in pkgsrc (e.g., `games/plib`).
115 pub pkgpath: PkgPath,
116 /// Error message from the scan failure.
117 pub error: String,
118}
119
120/// A resolved package index entry with dependency information.
121///
122/// This extends [`ScanIndex`] with resolved dependencies (`depends`).
123#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
124pub struct ResolvedIndex {
125 /// The underlying scan index data.
126 pub index: ScanIndex,
127 /// Resolved dependencies as package names.
128 pub depends: Vec<PkgName>,
129 /// True if this package has an unresolved dependency.
130 #[serde(default)]
131 pub has_unresolved_dep: bool,
132}
133
134impl ResolvedIndex {
135 /// Create from a ScanIndex with empty depends.
136 pub fn from_scan_index(index: ScanIndex) -> Self {
137 Self { index, depends: Vec::new(), has_unresolved_dep: false }
138 }
139}
140
141impl std::ops::Deref for ResolvedIndex {
142 type Target = ScanIndex;
143 fn deref(&self) -> &Self::Target {
144 &self.index
145 }
146}
147
148impl std::ops::DerefMut for ResolvedIndex {
149 fn deref_mut(&mut self) -> &mut Self::Target {
150 &mut self.index
151 }
152}
153
154impl std::fmt::Display for ResolvedIndex {
155 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
156 write!(f, "{}", self.index)?;
157 // Only output DEPENDS= if there are dependencies and no unresolved deps.
158 if !self.depends.is_empty() && !self.has_unresolved_dep {
159 write!(f, "DEPENDS=")?;
160 for (i, d) in self.depends.iter().enumerate() {
161 if i > 0 {
162 write!(f, " ")?;
163 }
164 write!(f, "{d}")?;
165 }
166 writeln!(f)?;
167 }
168 Ok(())
169 }
170}
171
172/// Result of scanning and resolving packages.
173///
174/// Returned by [`Scan::resolve`], contains the packages that can be built
175/// and those that were skipped.
176#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
177pub struct ScanResult {
178 /// Packages that can be built, indexed by package name.
179 ///
180 /// These packages have all dependencies resolved and no skip/fail reasons.
181 /// Uses IndexMap to preserve insertion order from the original scan.
182 pub buildable: IndexMap<PkgName, ResolvedIndex>,
183 /// Packages that were skipped due to skip/fail reasons.
184 pub skipped: Vec<SkippedPackage>,
185 /// Packages that failed to scan (bmake pbulk-index failed).
186 pub scan_failed: Vec<ScanFailure>,
187 /// All packages in original order with their skip reason (if any).
188 /// Used for presolve output that needs to preserve original ordering.
189 pub all_ordered: Vec<(ResolvedIndex, Option<SkipReason>)>,
190 /// Unresolved dependency errors.
191 /// Callers can check this and config.strict_scan() to decide if fatal.
192 pub errors: Vec<String>,
193}
194
195/// Package dependency scanner.
196///
197/// Discovers all dependencies for a set of packages and resolves them into
198/// a buildable set with proper ordering.
199///
200/// # Usage
201///
202/// 1. Create a `Scan` with [`Scan::new`]
203/// 2. Add packages to scan with [`Scan::add`]
204/// 3. Run the scan with [`Scan::start`]
205/// 4. Resolve dependencies with [`Scan::resolve`]
206///
207/// # Example
208///
209/// ```no_run
210/// # use bob::{Config, Database, RunContext, Scan};
211/// # use pkgsrc::PkgPath;
212/// # use std::sync::Arc;
213/// # use std::sync::atomic::AtomicBool;
214/// # fn example() -> anyhow::Result<()> {
215/// let config = Config::load(None, false)?;
216/// let db_path = config.logdir().join("bob").join("bob.db");
217/// let db = Database::open(&db_path)?;
218/// let mut scan = Scan::new(&config);
219///
220/// scan.add(&PkgPath::new("mail/mutt")?);
221/// let ctx = RunContext::new(Arc::new(AtomicBool::new(false)));
222/// scan.start(&ctx, &db)?;
223///
224/// let result = scan.resolve(&db)?;
225/// println!("Found {} buildable packages", result.buildable.len());
226/// # Ok(())
227/// # }
228/// ```
229#[derive(Debug, Default)]
230pub struct Scan {
231 config: Config,
232 sandbox: Sandbox,
233 incoming: HashSet<PkgPath>,
234 /// Pkgpaths we've completed scanning (in this session).
235 done: HashSet<PkgPath>,
236 resolved: IndexMap<PkgName, ResolvedIndex>,
237 /// Full tree scan - discover all packages, skip recursive dependency discovery.
238 /// Defaults to true; set to false when packages are explicitly added.
239 full_tree: bool,
240 /// A previous full tree scan completed successfully.
241 full_scan_complete: bool,
242 /// Packages that failed to scan (pkgpath, error message).
243 scan_failures: Vec<(PkgPath, String)>,
244}
245
246impl Scan {
247 pub fn new(config: &Config) -> Scan {
248 let sandbox = Sandbox::new(config);
249 debug!(pkgsrc = %config.pkgsrc().display(),
250 make = %config.make().display(),
251 scan_threads = config.scan_threads(),
252 "Created new Scan instance"
253 );
254 Scan {
255 config: config.clone(),
256 sandbox,
257 full_tree: true,
258 ..Default::default()
259 }
260 }
261
262 pub fn add(&mut self, pkgpath: &PkgPath) {
263 info!(pkgpath = %pkgpath.as_path().display(), "Adding package to scan queue");
264 self.full_tree = false;
265 self.incoming.insert(pkgpath.clone());
266 }
267
268 /// Returns true if this is a full tree scan.
269 pub fn is_full_tree(&self) -> bool {
270 self.full_tree
271 }
272
273 /// Mark that a previous full tree scan completed successfully.
274 pub fn set_full_scan_complete(&mut self) {
275 self.full_scan_complete = true;
276 }
277
278 /// Initialize scan from database, checking what's already scanned.
279 /// Returns (cached_count, pending_deps_count) where pending_deps_count is the
280 /// number of dependencies discovered but not yet scanned (from interrupted scans).
281 pub fn init_from_db(
282 &mut self,
283 db: &crate::db::Database,
284 ) -> Result<(usize, usize)> {
285 let scanned = db.get_scanned_pkgpaths()?;
286 let cached_count = scanned.len();
287 let mut pending_count = 0;
288
289 if cached_count > 0 {
290 info!(
291 cached_count = cached_count,
292 "Found cached scan results in database"
293 );
294
295 // For full tree scans with full_scan_complete, we'll skip scanning
296 // For limited scans, remove already-scanned from incoming
297 if !self.full_tree {
298 self.incoming.retain(|p| !scanned.contains(&p.to_string()));
299 }
300
301 // Add scanned pkgpaths to done set
302 for pkgpath_str in &scanned {
303 if let Ok(pkgpath) = PkgPath::new(pkgpath_str) {
304 self.done.insert(pkgpath);
305 }
306 }
307
308 // Check for dependencies that were discovered but not yet scanned.
309 // This handles the case where a scan was interrupted partway through.
310 let unscanned = db.get_unscanned_dependencies()?;
311 if !unscanned.is_empty() {
312 info!(
313 unscanned_count = unscanned.len(),
314 "Found unscanned dependencies from interrupted scan"
315 );
316 for pkgpath_str in unscanned {
317 if let Ok(pkgpath) = PkgPath::new(&pkgpath_str) {
318 if !self.done.contains(&pkgpath) {
319 self.incoming.insert(pkgpath);
320 pending_count += 1;
321 }
322 }
323 }
324 }
325 }
326
327 Ok((cached_count, pending_count))
328 }
329
330 /// Discover all packages in pkgsrc tree.
331 fn discover_packages(&mut self) -> anyhow::Result<()> {
332 println!("Discovering packages...");
333 let pkgsrc = self.config.pkgsrc().display();
334 let make = self.config.make().display();
335
336 // Get top-level SUBDIR (categories + USER_ADDITIONAL_PKGS)
337 let script = format!(
338 "cd {} && {} show-subdir-var VARNAME=SUBDIR\n",
339 pkgsrc, make
340 );
341 let child = self.sandbox.execute_script(0, &script, vec![])?;
342 let output = child
343 .wait_with_output()
344 .context("Failed to run show-subdir-var")?;
345
346 if !output.status.success() {
347 let stderr = String::from_utf8_lossy(&output.stderr);
348 bail!("Failed to get categories: {}", stderr);
349 }
350
351 let stdout = String::from_utf8_lossy(&output.stdout);
352 let entries: Vec<&str> = stdout.split_whitespace().collect();
353
354 for entry in entries {
355 if entry.contains('/') {
356 // USER_ADDITIONAL_PKGS - add directly as pkgpath
357 if let Ok(pkgpath) = PkgPath::new(entry) {
358 self.incoming.insert(pkgpath);
359 }
360 } else {
361 // Category - get packages within it
362 let script = format!(
363 "cd {}/{} && {} show-subdir-var VARNAME=SUBDIR\n",
364 pkgsrc, entry, make
365 );
366 let child = self.sandbox.execute_script(0, &script, vec![])?;
367 let cat_output = child.wait_with_output();
368
369 match cat_output {
370 Ok(o) if o.status.success() => {
371 let pkgs = String::from_utf8_lossy(&o.stdout);
372 for pkg in pkgs.split_whitespace() {
373 let path = format!("{}/{}", entry, pkg);
374 if let Ok(pkgpath) = PkgPath::new(&path) {
375 self.incoming.insert(pkgpath);
376 }
377 }
378 }
379 Ok(o) => {
380 let stderr = String::from_utf8_lossy(&o.stderr);
381 debug!(category = entry, stderr = %stderr,
382 "Failed to get packages for category");
383 }
384 Err(e) => {
385 debug!(category = entry, error = %e,
386 "Failed to run make in category");
387 }
388 }
389 }
390 }
391
392 info!(discovered = self.incoming.len(), "Package discovery complete");
393 println!("Discovered {} package paths", self.incoming.len());
394
395 Ok(())
396 }
397
398 pub fn start(
399 &mut self,
400 ctx: &RunContext,
401 db: &crate::db::Database,
402 ) -> anyhow::Result<bool> {
403 info!(
404 incoming_count = self.incoming.len(),
405 sandbox_enabled = self.sandbox.enabled(),
406 "Starting package scan"
407 );
408
409 let pool = rayon::ThreadPoolBuilder::new()
410 .num_threads(self.config.scan_threads())
411 .build()
412 .context("Failed to build scan thread pool")?;
413
414 let shutdown_flag = Arc::clone(&ctx.shutdown);
415
416 /*
417 * Only a single sandbox is required, 'make pbulk-index' can safely be
418 * run in parallel inside one sandbox.
419 */
420 let script_envs = self.config.script_env();
421
422 // For full tree scans where a previous scan completed, all packages
423 // are already cached - nothing to do.
424 if self.full_tree && self.full_scan_complete && !self.done.is_empty() {
425 println!("All {} package paths already scanned", self.done.len());
426 return Ok(false);
427 }
428
429 // For non-full-tree scans, prune already-cached packages from incoming
430 // before sandbox creation to avoid unnecessary setup/teardown.
431 if !self.full_tree {
432 self.incoming.retain(|p| !self.done.contains(p));
433 if self.incoming.is_empty() {
434 if !self.done.is_empty() {
435 println!(
436 "All {} package paths already scanned",
437 self.done.len()
438 );
439 }
440 return Ok(false);
441 }
442 }
443
444 if self.sandbox.enabled() {
445 println!("Creating sandbox...");
446 if let Err(e) = self.sandbox.create(0) {
447 if let Err(destroy_err) = self.sandbox.destroy(0) {
448 eprintln!(
449 "Warning: failed to destroy sandbox: {}",
450 destroy_err
451 );
452 }
453 return Err(e);
454 }
455
456 // Run pre-build script if defined
457 if let Some(pre_build) = self.config.script("pre-build") {
458 debug!("Running pre-build script");
459 let child = self.sandbox.execute(
460 0,
461 pre_build,
462 script_envs.clone(),
463 None,
464 None,
465 )?;
466 let output = child
467 .wait_with_output()
468 .context("Failed to wait for pre-build")?;
469 if !output.status.success() {
470 let stderr = String::from_utf8_lossy(&output.stderr);
471 error!(exit_code = ?output.status.code(), stderr = %stderr, "pre-build script failed");
472 }
473 }
474 }
475
476 // For full tree scans, always discover all packages
477 if self.full_tree {
478 self.discover_packages()?;
479 self.incoming.retain(|p| !self.done.contains(p));
480 }
481
482 // Nothing to scan - all packages are cached
483 if self.incoming.is_empty() {
484 if !self.done.is_empty() {
485 println!(
486 "All {} package paths already scanned",
487 self.done.len()
488 );
489 }
490
491 if self.sandbox.enabled() {
492 self.cleanup_sandbox(script_envs)?;
493 }
494
495 return Ok(false);
496 }
497
498 // Clear resolved dependencies since we're scanning new packages
499 db.clear_resolved_depends()?;
500
501 println!("Scanning packages...");
502
503 // Set up multi-line progress display using ratatui inline viewport
504 // Include cached packages in total so progress shows full picture
505 let cached_count = self.done.len();
506 let total_count = cached_count + self.incoming.len();
507 let progress = Arc::new(Mutex::new(
508 MultiProgress::new(
509 "Scanning",
510 "Scanned",
511 total_count,
512 self.config.scan_threads(),
513 )
514 .expect("Failed to initialize progress display"),
515 ));
516
517 // Mark cached packages
518 if cached_count > 0 {
519 if let Ok(mut p) = progress.lock() {
520 p.state_mut().cached = cached_count;
521 }
522 }
523
524 // Flag to stop the refresh thread
525 let stop_refresh = Arc::new(AtomicBool::new(false));
526
527 // Spawn a thread to periodically refresh the display (for timer updates)
528 let progress_refresh = Arc::clone(&progress);
529 let stop_flag = Arc::clone(&stop_refresh);
530 let shutdown_for_refresh = Arc::clone(&shutdown_flag);
531 let refresh_thread = std::thread::spawn(move || {
532 while !stop_flag.load(Ordering::Relaxed)
533 && !shutdown_for_refresh.load(Ordering::SeqCst)
534 {
535 if let Ok(mut p) = progress_refresh.lock() {
536 // Check for keyboard events (Ctrl+C raises SIGINT)
537 let _ = p.poll_events();
538 let _ = p.render();
539 }
540 std::thread::sleep(Duration::from_millis(50));
541 }
542 });
543
544 // Start transaction for all writes
545 db.begin_transaction()?;
546
547 let mut interrupted = false;
548
549 // Borrow config and sandbox separately for use in scanner thread,
550 // allowing main thread to mutate self.done, self.incoming, etc.
551 let config = &self.config;
552 let sandbox = &self.sandbox;
553
554 /*
555 * Continuously iterate over incoming queue, moving to done once
556 * processed, and adding any dependencies to incoming to be processed
557 * next.
558 */
559 loop {
560 // Check for shutdown signal
561 if shutdown_flag.load(Ordering::Relaxed) {
562 stop_refresh.store(true, Ordering::Relaxed);
563 if let Ok(mut p) = progress.lock() {
564 let _ = p.finish_interrupted();
565 }
566 interrupted = true;
567 break;
568 }
569
570 /*
571 * Convert the incoming HashSet into a Vec for parallel processing.
572 */
573 let pkgpaths: Vec<PkgPath> = self.incoming.drain().collect();
574 if pkgpaths.is_empty() {
575 break;
576 }
577
578 // Create bounded channel for streaming results
579 const CHANNEL_BUFFER_SIZE: usize = 128;
580 let (tx, rx) = std::sync::mpsc::sync_channel::<(
581 PkgPath,
582 Result<Vec<ScanIndex>>,
583 )>(CHANNEL_BUFFER_SIZE);
584
585 let mut new_incoming: HashSet<PkgPath> = HashSet::new();
586
587 std::thread::scope(|s| {
588 // Spawn scanning thread
589 let progress_clone = Arc::clone(&progress);
590 let shutdown_clone = Arc::clone(&shutdown_flag);
591 let pool_ref = &pool;
592
593 s.spawn(move || {
594 pool_ref.install(|| {
595 pkgpaths.par_iter().for_each(|pkgpath| {
596 // Check for shutdown before starting
597 if shutdown_clone.load(Ordering::Relaxed) {
598 return;
599 }
600
601 let pathname =
602 pkgpath.as_path().to_string_lossy().to_string();
603 let thread_id =
604 rayon::current_thread_index().unwrap_or(0);
605
606 // Update progress - show current package
607 if let Ok(mut p) = progress_clone.lock() {
608 p.state_mut()
609 .set_worker_active(thread_id, &pathname);
610 }
611
612 let result = Self::scan_pkgpath_with(
613 config, sandbox, pkgpath,
614 );
615
616 // Update progress counter
617 if let Ok(mut p) = progress_clone.lock() {
618 p.state_mut().set_worker_idle(thread_id);
619 if result.is_ok() {
620 p.state_mut().increment_completed();
621 } else {
622 p.state_mut().increment_failed();
623 }
624 }
625
626 // Send result (blocks if buffer full = backpressure)
627 let _ = tx.send((pkgpath.clone(), result));
628 });
629 });
630 drop(tx);
631 });
632
633 // Check if we were interrupted during parallel processing
634 let was_interrupted = shutdown_flag.load(Ordering::Relaxed);
635
636 /*
637 * Process results - write to DB and extract dependencies.
638 */
639 for (pkgpath, result) in rx {
640 let scanpkgs = match result {
641 Ok(pkgs) => pkgs,
642 Err(e) => {
643 self.scan_failures
644 .push((pkgpath.clone(), e.to_string()));
645 self.done.insert(pkgpath);
646 continue;
647 }
648 };
649 self.done.insert(pkgpath.clone());
650
651 // Save to database
652 if !scanpkgs.is_empty() {
653 if let Err(e) = db
654 .store_scan_pkgpath(&pkgpath.to_string(), &scanpkgs)
655 {
656 error!(error = %e, "Failed to store scan results");
657 }
658 }
659
660 // Skip dependency discovery for full tree scans (all
661 // packages already discovered) or if interrupted
662 if self.full_tree || was_interrupted {
663 continue;
664 }
665
666 // Discover dependencies not yet seen
667 for pkg in &scanpkgs {
668 if let Some(ref all_deps) = pkg.all_depends {
669 for dep in all_deps {
670 let dep_path = dep.pkgpath();
671 if self.done.contains(dep_path)
672 || new_incoming.contains(dep_path)
673 {
674 continue;
675 }
676 // Check database for cached dependency
677 match db
678 .is_pkgpath_scanned(&dep_path.to_string())
679 {
680 Ok(true) => {
681 self.done.insert(dep_path.clone());
682 if let Ok(mut p) = progress.lock() {
683 p.state_mut().total += 1;
684 p.state_mut().cached += 1;
685 }
686 }
687 Ok(false) => {
688 new_incoming.insert(dep_path.clone());
689 if let Ok(mut p) = progress.lock() {
690 p.state_mut().total += 1;
691 }
692 }
693 Err(_) => {}
694 }
695 }
696 }
697 }
698 }
699 });
700
701 // Check for interruption after batch
702 if shutdown_flag.load(Ordering::Relaxed) {
703 stop_refresh.store(true, Ordering::Relaxed);
704 if let Ok(mut p) = progress.lock() {
705 let _ = p.finish_interrupted();
706 }
707 interrupted = true;
708 break;
709 }
710
711 /*
712 * We're finished with the current incoming, replace it with the
713 * new incoming list. If it is empty then we've already processed
714 * all known PKGPATHs and are done.
715 */
716 self.incoming = new_incoming;
717 }
718
719 // Commit transaction (partial on interrupt, full on success)
720 db.commit()?;
721
722 // Stop the refresh thread and print final summary
723 stop_refresh.store(true, Ordering::Relaxed);
724 let _ = refresh_thread.join();
725
726 // Only call finish() for normal completion; finish_interrupted()
727 // was already called immediately when interrupt was detected
728 if !interrupted {
729 if let Ok(mut p) = progress.lock() {
730 let _ = p.finish();
731 }
732 }
733
734 if self.sandbox.enabled() {
735 self.cleanup_sandbox(script_envs)?;
736 }
737
738 if interrupted {
739 return Ok(true);
740 }
741
742 Ok(false)
743 }
744
745 /// Run post-build cleanup and destroy the scan sandbox.
746 fn cleanup_sandbox(
747 &self,
748 envs: Vec<(String, String)>,
749 ) -> anyhow::Result<()> {
750 if let Some(post_build) = self.config.script("post-build") {
751 debug!("Running post-build script");
752 let child =
753 self.sandbox.execute(0, post_build, envs, None, None)?;
754 let output = child
755 .wait_with_output()
756 .context("Failed to wait for post-build")?;
757 if !output.status.success() {
758 let stderr = String::from_utf8_lossy(&output.stderr);
759 error!(exit_code = ?output.status.code(), stderr = %stderr, "post-build script failed");
760 }
761 }
762 self.sandbox.destroy(0)
763 }
764
765 /// Returns scan failures as formatted error strings.
766 pub fn scan_errors(&self) -> impl Iterator<Item = &str> {
767 self.scan_failures.iter().map(|(_, e)| e.as_str())
768 }
769
770 /// Returns scan failures with pkgpath information.
771 pub fn scan_failures(&self) -> &[(PkgPath, String)] {
772 &self.scan_failures
773 }
774
775 /**
776 * Scan a single PKGPATH, returning a [`Vec`] of [`ScanIndex`] results,
777 * as multi-version packages may return multiple results.
778 */
779 pub fn scan_pkgpath(
780 &self,
781 pkgpath: &PkgPath,
782 ) -> anyhow::Result<Vec<ScanIndex>> {
783 Self::scan_pkgpath_with(&self.config, &self.sandbox, pkgpath)
784 }
785
786 /// Scan a single PKGPATH using provided config and sandbox references.
787 /// This allows scanning without borrowing all of `self`.
788 fn scan_pkgpath_with(
789 config: &Config,
790 sandbox: &Sandbox,
791 pkgpath: &PkgPath,
792 ) -> anyhow::Result<Vec<ScanIndex>> {
793 let pkgpath_str = pkgpath.as_path().display().to_string();
794 debug!(pkgpath = %pkgpath_str, "Scanning package");
795
796 let bmake = config.make().display().to_string();
797 let pkgsrcdir = config.pkgsrc().display().to_string();
798 let script = format!(
799 "cd {}/{} && {} pbulk-index\n",
800 pkgsrcdir, pkgpath_str, bmake
801 );
802
803 let scan_env = config.scan_env();
804 trace!(pkgpath = %pkgpath_str,
805 script = %script,
806 scan_env = ?scan_env,
807 "Executing pkg-scan"
808 );
809 let child = sandbox.execute_script(0, &script, scan_env)?;
810 let output = child.wait_with_output()?;
811
812 if !output.status.success() {
813 let stderr = String::from_utf8_lossy(&output.stderr);
814 error!(pkgpath = %pkgpath_str,
815 exit_code = ?output.status.code(),
816 stderr = %stderr,
817 "pkg-scan script failed"
818 );
819 let stderr = stderr.trim();
820 let msg = if stderr.is_empty() {
821 format!("Scan failed for {}", pkgpath_str)
822 } else {
823 format!("Scan failed for {}: {}", pkgpath_str, stderr)
824 };
825 bail!(msg);
826 }
827
828 let stdout_str = String::from_utf8_lossy(&output.stdout);
829 trace!(pkgpath = %pkgpath_str,
830 stdout_len = stdout_str.len(),
831 stdout = %stdout_str,
832 "pkg-scan script output"
833 );
834
835 let reader = BufReader::new(&output.stdout[..]);
836 let all_results: Vec<ScanIndex> =
837 ScanIndex::from_reader(reader).collect::<Result<_, _>>()?;
838
839 /*
840 * Filter to keep only the first occurrence of each PKGNAME.
841 * For multi-version packages, pbulk-index returns the *_DEFAULT
842 * version first, which is the one we want.
843 */
844 let mut seen_pkgnames = HashSet::new();
845 let mut index: Vec<ScanIndex> = Vec::new();
846 for pkg in all_results {
847 if seen_pkgnames.insert(pkg.pkgname.clone()) {
848 index.push(pkg);
849 }
850 }
851
852 info!(pkgpath = %pkgpath_str,
853 packages_found = index.len(),
854 "Scan complete for pkgpath"
855 );
856
857 /*
858 * Set PKGPATH (PKG_LOCATION) as for some reason pbulk-index doesn't.
859 */
860 for pkg in &mut index {
861 pkg.pkg_location = Some(pkgpath.clone());
862 debug!(pkgpath = %pkgpath_str,
863 pkgname = %pkg.pkgname.pkgname(),
864 skip_reason = ?pkg.pkg_skip_reason,
865 fail_reason = ?pkg.pkg_fail_reason,
866 depends_count = pkg.all_depends.as_ref().map_or(0, |v| v.len()),
867 "Found package in scan"
868 );
869 }
870
871 Ok(index)
872 }
873
874 /**
875 * Resolve the list of scanned packages, by ensuring all of the [`Depend`]
876 * patterns in `all_depends` match a found package, and that there are no
877 * circular dependencies. The best match for each is stored in the
878 * `depends` for the package in question.
879 *
880 * Return a [`ScanResult`] containing buildable packages and skipped packages.
881 *
882 * Also stores resolved dependencies in the database for fast reverse lookups.
883 */
884 pub fn resolve(&mut self, db: &crate::db::Database) -> Result<ScanResult> {
885 info!(
886 done_pkgpaths = self.done.len(),
887 "Starting dependency resolution"
888 );
889
890 // Load all scan data in one query
891 let all_scan_data = db.get_all_scan_indexes()?;
892
893 /*
894 * Populate the resolved hash with ALL packages first, including those
895 * with skip/fail reasons. This allows us to resolve dependencies for
896 * all packages before separating them.
897 */
898 let mut pkgnames: indexmap::IndexSet<PkgName> =
899 indexmap::IndexSet::new();
900
901 // Track which packages have skip/fail reasons
902 let mut skip_reasons: HashMap<PkgName, SkipReason> = HashMap::new();
903
904 // Track package_id for storing resolved dependencies
905 let mut pkgname_to_id: HashMap<PkgName, i64> = HashMap::new();
906
907 // Process all scan data, consuming to avoid clones
908 for (pkg_id, pkg) in all_scan_data {
909 debug!(pkgpath = ?pkg.pkg_location,
910 pkgname = %pkg.pkgname.pkgname(),
911 "Processing package"
912 );
913
914 // Skip duplicate PKGNAMEs - keep only the first (preferred)
915 // variant for multi-version packages.
916 if pkgnames.contains(&pkg.pkgname) {
917 debug!(pkgname = %pkg.pkgname.pkgname(),
918 multi_version = ?pkg.multi_version,
919 "Skipping duplicate PKGNAME"
920 );
921 continue;
922 }
923
924 // Track skip/fail reasons but still add to resolved
925 if let Some(reason) = &pkg.pkg_skip_reason {
926 if !reason.is_empty() {
927 info!(pkgname = %pkg.pkgname.pkgname(),
928 reason = %reason,
929 "Package has PKG_SKIP_REASON"
930 );
931 skip_reasons.insert(
932 pkg.pkgname.clone(),
933 SkipReason::PkgSkipReason(reason.clone()),
934 );
935 }
936 }
937 if let Some(reason) = &pkg.pkg_fail_reason {
938 if !reason.is_empty()
939 && !skip_reasons.contains_key(&pkg.pkgname)
940 {
941 info!(pkgname = %pkg.pkgname.pkgname(),
942 reason = %reason,
943 "Package has PKG_FAIL_REASON"
944 );
945 skip_reasons.insert(
946 pkg.pkgname.clone(),
947 SkipReason::PkgFailReason(reason.clone()),
948 );
949 }
950 }
951
952 pkgname_to_id.insert(pkg.pkgname.clone(), pkg_id);
953 debug!(pkgname = %pkg.pkgname.pkgname(),
954 "Adding package to resolved set"
955 );
956 pkgnames.insert(pkg.pkgname.clone());
957 self.resolved.insert(
958 pkg.pkgname.clone(),
959 ResolvedIndex::from_scan_index(pkg),
960 );
961 }
962
963 info!(
964 resolved_count = self.resolved.len(),
965 skip_reasons_count = skip_reasons.len(),
966 "Initial resolution complete"
967 );
968
969 /*
970 * Build a hashmap of pkgbase -> Vec<&PkgName> for efficient lookups.
971 * For Dewey patterns with a known pkgbase, we can directly look up
972 * candidates instead of iterating through all packages.
973 */
974 let pkgbase_map: HashMap<&str, Vec<&PkgName>> = {
975 let mut map: HashMap<&str, Vec<&PkgName>> = HashMap::new();
976 for pkgname in &pkgnames {
977 map.entry(pkgname.pkgbase()).or_default().push(pkgname);
978 }
979 map
980 };
981
982 /*
983 * Keep a cache of best Depend => PkgName matches we've already seen
984 * as it's likely the same patterns will be used in multiple places.
985 */
986 let mut match_cache: HashMap<Depend, PkgName> = HashMap::new();
987
988 /*
989 * Track packages to skip due to skipped dependencies, and
990 * unresolved dependency errors (callers decide if these are fatal).
991 */
992 let mut skip_due_to_dep: HashMap<PkgName, String> = HashMap::new();
993 let mut errors: Vec<String> = Vec::new();
994
995 // Helper to check if a dependency pattern is already satisfied
996 let is_satisfied = |depends: &[PkgName], pattern: &pkgsrc::Pattern| {
997 depends.iter().any(|existing| pattern.matches(existing.pkgname()))
998 };
999
1000 for pkg in self.resolved.values_mut() {
1001 let all_deps = match pkg.all_depends.take() {
1002 Some(deps) => deps,
1003 None => continue,
1004 };
1005 for depend in all_deps.iter() {
1006 // Check for cached DEPENDS match first. If found, use it
1007 // (but only add if the pattern isn't already satisfied).
1008 if let Some(pkgname) = match_cache.get(depend) {
1009 if !is_satisfied(&pkg.depends, depend.pattern())
1010 && !pkg.depends.contains(pkgname)
1011 {
1012 pkg.depends.push(pkgname.clone());
1013 }
1014 continue;
1015 }
1016 /*
1017 * Find best DEPENDS match out of all known PKGNAME.
1018 * Collect all candidates that match the pattern.
1019 *
1020 * Use pkgbase hashmap for efficient lookups when pattern
1021 * has a known pkgbase, otherwise fall back to full scan.
1022 */
1023 let candidates: Vec<&PkgName> = if let Some(base) =
1024 depend.pattern().pkgbase()
1025 {
1026 match pkgbase_map.get(base) {
1027 Some(v) => v
1028 .iter()
1029 .filter(|c| depend.pattern().matches(c.pkgname()))
1030 .copied()
1031 .collect(),
1032 None => Vec::new(),
1033 }
1034 } else {
1035 pkgnames
1036 .iter()
1037 .filter(|c| depend.pattern().matches(c.pkgname()))
1038 .collect()
1039 };
1040
1041 // Find best match among all candidates using pbulk algorithm:
1042 // higher version wins, larger name on tie.
1043 let mut best: Option<&PkgName> = None;
1044 let mut match_error: Option<pkgsrc::PatternError> = None;
1045 for candidate in candidates {
1046 best = match best {
1047 None => Some(candidate),
1048 Some(current) => {
1049 match depend.pattern().best_match_pbulk(
1050 current.pkgname(),
1051 candidate.pkgname(),
1052 ) {
1053 Ok(Some(m)) if m == candidate.pkgname() => {
1054 Some(candidate)
1055 }
1056 Ok(_) => Some(current),
1057 Err(e) => {
1058 match_error = Some(e);
1059 break;
1060 }
1061 }
1062 }
1063 };
1064 }
1065 if let Some(e) = match_error {
1066 let reason = format!(
1067 "pattern error for {}: {}",
1068 depend.pattern().pattern(),
1069 e
1070 );
1071 errors.push(format!(
1072 "{} in {}",
1073 reason,
1074 pkg.pkgname.pkgname()
1075 ));
1076 if !skip_reasons.contains_key(&pkg.pkgname) {
1077 pkg.pkg_fail_reason = Some(format!("\"{}\"", reason));
1078 skip_reasons.insert(
1079 pkg.pkgname.clone(),
1080 SkipReason::PkgFailReason(reason),
1081 );
1082 }
1083 continue;
1084 }
1085 // If found, save to cache and add to depends (if not already satisfied)
1086 if let Some(pkgname) = best {
1087 if !is_satisfied(&pkg.depends, depend.pattern())
1088 && !pkg.depends.contains(pkgname)
1089 {
1090 pkg.depends.push(pkgname.clone());
1091 }
1092 match_cache.insert(depend.clone(), pkgname.clone());
1093 } else {
1094 // No matching package exists
1095 let pattern = depend.pattern().pattern().to_string();
1096 pkg.has_unresolved_dep = true;
1097 errors.push(format!(
1098 "No match found for {} in {}",
1099 pattern,
1100 pkg.pkgname.pkgname()
1101 ));
1102 if !skip_reasons.contains_key(&pkg.pkgname) {
1103 let reason = format!(
1104 "could not resolve dependency \"{}\"",
1105 pattern
1106 );
1107 pkg.pkg_fail_reason = Some(format!("\"{}\"", reason));
1108 skip_reasons.insert(
1109 pkg.pkgname.clone(),
1110 SkipReason::UnresolvedDependency(pattern),
1111 );
1112 }
1113 }
1114 }
1115 // Restore all_depends for output formatting
1116 pkg.all_depends = Some(all_deps);
1117 }
1118
1119 /*
1120 * Iteratively propagate skips: if A depends on B, and B is now
1121 * marked to skip, then A should also be skipped.
1122 */
1123 loop {
1124 let mut new_skips: HashMap<PkgName, String> = HashMap::new();
1125
1126 for pkg in self.resolved.values() {
1127 if skip_due_to_dep.contains_key(&pkg.pkgname)
1128 || skip_reasons.contains_key(&pkg.pkgname)
1129 {
1130 continue;
1131 }
1132 for dep in &pkg.depends {
1133 if skip_due_to_dep.contains_key(dep)
1134 || skip_reasons.contains_key(dep)
1135 {
1136 // Our dependency is being skipped
1137 new_skips.insert(
1138 pkg.pkgname.clone(),
1139 format!("Dependency {} skipped", dep.pkgname()),
1140 );
1141 break;
1142 }
1143 }
1144 }
1145
1146 if new_skips.is_empty() {
1147 break;
1148 }
1149 skip_due_to_dep.extend(new_skips);
1150 }
1151
1152 // Merge skip_due_to_dep into skip_reasons
1153 for (pkgname, reason) in skip_due_to_dep.iter() {
1154 if !skip_reasons.contains_key(pkgname) {
1155 skip_reasons.insert(
1156 pkgname.clone(),
1157 SkipReason::PkgSkipReason(reason.clone()),
1158 );
1159 }
1160 }
1161
1162 // Build all_ordered first to preserve original order, then separate
1163 let mut all_ordered: Vec<(ResolvedIndex, Option<SkipReason>)> =
1164 Vec::new();
1165 let mut buildable: IndexMap<PkgName, ResolvedIndex> = IndexMap::new();
1166 let mut skipped: Vec<SkippedPackage> = Vec::new();
1167
1168 for (pkgname, index) in std::mem::take(&mut self.resolved) {
1169 let reason = skip_reasons.remove(&pkgname);
1170 if let Some(r) = reason {
1171 // Skipped: extract metadata, then move index to all_ordered
1172 skipped.push(SkippedPackage {
1173 pkgname: index.pkgname.clone(),
1174 pkgpath: index.pkg_location.clone(),
1175 reason: r.clone(),
1176 });
1177 all_ordered.push((index, Some(r)));
1178 } else {
1179 // Buildable: clone for all_ordered, move to buildable
1180 all_ordered.push((index.clone(), None));
1181 buildable.insert(pkgname, index);
1182 }
1183 }
1184
1185 /*
1186 * Verify that the graph is acyclic (only for buildable packages).
1187 */
1188 debug!(
1189 buildable_count = buildable.len(),
1190 "Checking for circular dependencies"
1191 );
1192 let mut graph = DiGraphMap::new();
1193 for (pkgname, index) in &buildable {
1194 for dep in &index.depends {
1195 graph.add_edge(dep.pkgname(), pkgname.pkgname(), ());
1196 }
1197 }
1198 let cycle_error = find_cycle(&graph).map(|cycle| {
1199 let mut err = "Circular dependencies detected:\n".to_string();
1200 for n in cycle.iter().rev() {
1201 err.push_str(&format!("\t{}\n", n));
1202 }
1203 err.push_str(&format!("\t{}", cycle.last().unwrap()));
1204 error!(cycle = ?cycle, "Circular dependency detected");
1205 err
1206 });
1207
1208 info!(
1209 buildable_count = buildable.len(),
1210 skipped_count = skipped.len(),
1211 "Resolution complete"
1212 );
1213
1214 // Log all buildable packages
1215 for pkgname in buildable.keys() {
1216 debug!(pkgname = %pkgname.pkgname(), "Package is buildable");
1217 }
1218
1219 // Convert scan failures to ScanFailure structs
1220 let scan_failed: Vec<ScanFailure> = self
1221 .scan_failures
1222 .iter()
1223 .map(|(pkgpath, error)| ScanFailure {
1224 pkgpath: pkgpath.clone(),
1225 error: error.clone(),
1226 })
1227 .collect();
1228
1229 // Log errors but don't bail - let callers decide how to handle them
1230 for err in &errors {
1231 error!(error = %err, "Unresolved dependency");
1232 }
1233
1234 let result =
1235 ScanResult { buildable, skipped, scan_failed, all_ordered, errors };
1236
1237 if let Some(err) = cycle_error {
1238 bail!(err);
1239 }
1240
1241 // Store resolved dependencies in database for fast reverse lookups
1242 let mut resolved_deps: Vec<(i64, i64)> = Vec::new();
1243 for (pkgname, index) in &result.buildable {
1244 if let Some(&pkg_id) = pkgname_to_id.get(pkgname) {
1245 for dep in &index.depends {
1246 if let Some(&dep_id) = pkgname_to_id.get(dep) {
1247 resolved_deps.push((pkg_id, dep_id));
1248 }
1249 }
1250 }
1251 }
1252 if !resolved_deps.is_empty() {
1253 db.store_resolved_dependencies_batch(&resolved_deps)?;
1254 debug!(count = resolved_deps.len(), "Stored resolved dependencies");
1255 }
1256
1257 Ok(result)
1258 }
1259}
1260
1261pub fn find_cycle<'a>(
1262 graph: &'a DiGraphMap<&'a str, ()>,
1263) -> Option<Vec<&'a str>> {
1264 let mut visited = HashSet::new();
1265 let mut in_stack = HashSet::new();
1266 let mut stack = Vec::new();
1267
1268 for node in graph.nodes() {
1269 if visited.contains(&node) {
1270 continue;
1271 }
1272 if let Some(cycle) =
1273 dfs(graph, node, &mut visited, &mut stack, &mut in_stack)
1274 {
1275 return Some(cycle);
1276 }
1277 }
1278 None
1279}
1280
1281fn dfs<'a>(
1282 graph: &'a DiGraphMap<&'a str, ()>,
1283 node: &'a str,
1284 visited: &mut HashSet<&'a str>,
1285 stack: &mut Vec<&'a str>,
1286 in_stack: &mut HashSet<&'a str>,
1287) -> Option<Vec<&'a str>> {
1288 visited.insert(node);
1289 stack.push(node);
1290 in_stack.insert(node);
1291 for neighbor in graph.neighbors(node) {
1292 if in_stack.contains(neighbor) {
1293 if let Some(pos) = stack.iter().position(|&n| n == neighbor) {
1294 return Some(stack[pos..].to_vec());
1295 }
1296 } else if !visited.contains(neighbor) {
1297 let cycle = dfs(graph, neighbor, visited, stack, in_stack);
1298 if cycle.is_some() {
1299 return cycle;
1300 }
1301 }
1302 }
1303 stack.pop();
1304 in_stack.remove(node);
1305 None
1306}