bob/
scan.rs

1/*
2 * Copyright (c) 2025 Jonathan Perkin <jonathan@perkin.org.uk>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17//! Package dependency scanning and resolution.
18//!
19//! This module provides the [`Scan`] struct for discovering package dependencies
20//! and building a directed acyclic graph (DAG) for build ordering.
21//!
22//! # Scan Process
23//!
24//! 1. Create a scan sandbox
25//! 2. Run `make pbulk-index` on each package to discover dependencies
26//! 3. Recursively discover all transitive dependencies
27//! 4. Resolve dependency patterns to specific package versions
28//! 5. Verify no circular dependencies exist
29//! 6. Return buildable and skipped package lists
30//!
31//! # Skip Reasons
32//!
33//! Packages may be skipped for several reasons:
34//!
35//! - `PKG_SKIP_REASON` - Package explicitly marked to skip on this platform
36//! - `PKG_FAIL_REASON` - Package expected to fail on this platform
37//! - Unresolved dependencies - Required dependency not found
38//! - Circular dependencies - Package has a dependency cycle
39//!
40//! # Example
41//!
42//! ```no_run
43//! use bob::{Config, Database, RunContext, Scan};
44//! use pkgsrc::PkgPath;
45//! use std::sync::Arc;
46//! use std::sync::atomic::AtomicBool;
47//!
48//! let config = Config::load(None, false)?;
49//! let db_path = config.logdir().join("bob").join("bob.db");
50//! let db = Database::open(&db_path)?;
51//! let mut scan = Scan::new(&config);
52//!
53//! scan.add(&PkgPath::new("mail/mutt")?);
54//! scan.add(&PkgPath::new("www/curl")?);
55//!
56//! let ctx = RunContext::new(Arc::new(AtomicBool::new(false)));
57//! scan.start(&ctx, &db)?;  // Discover dependencies
58//! let result = scan.resolve(&db)?;
59//!
60//! println!("Buildable: {}", result.buildable.len());
61//! println!("Skipped: {}", result.skipped.len());
62//! # Ok::<(), anyhow::Error>(())
63//! ```
64
65use crate::tui::MultiProgress;
66use crate::{Config, RunContext, Sandbox};
67use anyhow::{Context, Result, bail};
68use indexmap::IndexMap;
69use petgraph::graphmap::DiGraphMap;
70use pkgsrc::{Depend, PkgName, PkgPath, ScanIndex};
71use rayon::prelude::*;
72use std::collections::{HashMap, HashSet};
73use std::io::BufReader;
74use std::sync::atomic::{AtomicBool, Ordering};
75use std::sync::{Arc, Mutex};
76use std::time::{Duration, Instant};
77use tracing::{debug, error, info, trace, warn};
78
79/// Reason why a package was excluded from the build.
80///
81/// Packages with skip or fail reasons set in pkgsrc are not built.
82#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
83pub enum SkipReason {
84    /// Package has `PKG_SKIP_REASON` set.
85    ///
86    /// This typically indicates the package cannot be built on the current
87    /// platform (e.g., architecture-specific code, missing dependencies).
88    PkgSkipReason(String),
89    /// Package has `PKG_FAIL_REASON` set.
90    ///
91    /// This indicates the package is known to fail on the current platform
92    /// and should not be attempted.
93    PkgFailReason(String),
94}
95
96/// Information about a package that was skipped during scanning.
97#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
98pub struct SkippedPackage {
99    /// Package name with version.
100    pub pkgname: PkgName,
101    /// Package path in pkgsrc.
102    pub pkgpath: Option<PkgPath>,
103    /// Reason the package was skipped.
104    pub reason: SkipReason,
105    /// Full resolved index (for presolve output).
106    pub index: Option<ResolvedIndex>,
107}
108
109/// Information about a package that failed to scan.
110#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
111pub struct ScanFailure {
112    /// Package path in pkgsrc (e.g., `games/plib`).
113    pub pkgpath: PkgPath,
114    /// Error message from the scan failure.
115    pub error: String,
116}
117
118/// A resolved package index entry with dependency information.
119///
120/// This extends [`ScanIndex`] with resolved dependencies (`depends`).
121#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
122pub struct ResolvedIndex {
123    /// The underlying scan index data.
124    pub index: ScanIndex,
125    /// Resolved dependencies as package names.
126    pub depends: Vec<PkgName>,
127}
128
129impl ResolvedIndex {
130    /// Create from a ScanIndex with empty depends.
131    pub fn from_scan_index(index: ScanIndex) -> Self {
132        Self { index, depends: Vec::new() }
133    }
134}
135
136impl std::ops::Deref for ResolvedIndex {
137    type Target = ScanIndex;
138    fn deref(&self) -> &Self::Target {
139        &self.index
140    }
141}
142
143impl std::ops::DerefMut for ResolvedIndex {
144    fn deref_mut(&mut self) -> &mut Self::Target {
145        &mut self.index
146    }
147}
148
149impl std::fmt::Display for ResolvedIndex {
150    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
151        write!(f, "{}", self.index)?;
152        // Only output DEPENDS= if there are dependencies
153        if !self.depends.is_empty() {
154            write!(f, "DEPENDS=")?;
155            for (i, d) in self.depends.iter().enumerate() {
156                if i > 0 {
157                    write!(f, " ")?;
158                }
159                write!(f, "{d}")?;
160            }
161            writeln!(f)?;
162        }
163        Ok(())
164    }
165}
166
167/// Result of scanning and resolving packages.
168///
169/// Returned by [`Scan::resolve`], contains the packages that can be built
170/// and those that were skipped.
171#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
172pub struct ScanResult {
173    /// Packages that can be built, indexed by package name.
174    ///
175    /// These packages have all dependencies resolved and no skip/fail reasons.
176    /// Uses IndexMap to preserve insertion order from the original scan.
177    pub buildable: IndexMap<PkgName, ResolvedIndex>,
178    /// Packages that were skipped due to skip/fail reasons.
179    pub skipped: Vec<SkippedPackage>,
180    /// Packages that failed to scan (bmake pbulk-index failed).
181    pub scan_failed: Vec<ScanFailure>,
182    /// All packages in original order with their skip reason (if any).
183    /// Used for presolve output that needs to preserve original ordering.
184    pub all_ordered: Vec<(ResolvedIndex, Option<SkipReason>)>,
185}
186
187/// Package dependency scanner.
188///
189/// Discovers all dependencies for a set of packages and resolves them into
190/// a buildable set with proper ordering.
191///
192/// # Usage
193///
194/// 1. Create a `Scan` with [`Scan::new`]
195/// 2. Add packages to scan with [`Scan::add`]
196/// 3. Run the scan with [`Scan::start`]
197/// 4. Resolve dependencies with [`Scan::resolve`]
198///
199/// # Example
200///
201/// ```no_run
202/// # use bob::{Config, Database, RunContext, Scan};
203/// # use pkgsrc::PkgPath;
204/// # use std::sync::Arc;
205/// # use std::sync::atomic::AtomicBool;
206/// # fn example() -> anyhow::Result<()> {
207/// let config = Config::load(None, false)?;
208/// let db_path = config.logdir().join("bob").join("bob.db");
209/// let db = Database::open(&db_path)?;
210/// let mut scan = Scan::new(&config);
211///
212/// scan.add(&PkgPath::new("mail/mutt")?);
213/// let ctx = RunContext::new(Arc::new(AtomicBool::new(false)));
214/// scan.start(&ctx, &db)?;
215///
216/// let result = scan.resolve(&db)?;
217/// println!("Found {} buildable packages", result.buildable.len());
218/// # Ok(())
219/// # }
220/// ```
221#[derive(Debug, Default)]
222pub struct Scan {
223    config: Config,
224    sandbox: Sandbox,
225    incoming: HashSet<PkgPath>,
226    /// Pkgpaths we've completed scanning (in this session).
227    done: HashSet<PkgPath>,
228    resolved: IndexMap<PkgName, ResolvedIndex>,
229    /// Full tree scan - discover all packages, skip recursive dependency discovery.
230    /// Defaults to true; set to false when packages are explicitly added.
231    full_tree: bool,
232    /// A previous full tree scan completed successfully.
233    full_scan_complete: bool,
234    /// Packages that failed to scan (pkgpath, error message).
235    scan_failures: Vec<(PkgPath, String)>,
236}
237
238impl Scan {
239    pub fn new(config: &Config) -> Scan {
240        let sandbox = Sandbox::new(config);
241        debug!(pkgsrc = %config.pkgsrc().display(),
242            make = %config.make().display(),
243            scan_threads = config.scan_threads(),
244            "Created new Scan instance"
245        );
246        Scan {
247            config: config.clone(),
248            sandbox,
249            full_tree: true,
250            ..Default::default()
251        }
252    }
253
254    pub fn add(&mut self, pkgpath: &PkgPath) {
255        info!(pkgpath = %pkgpath.as_path().display(), "Adding package to scan queue");
256        self.full_tree = false;
257        self.incoming.insert(pkgpath.clone());
258    }
259
260    /// Returns true if this is a full tree scan.
261    pub fn is_full_tree(&self) -> bool {
262        self.full_tree
263    }
264
265    /// Mark that a previous full tree scan completed successfully.
266    pub fn set_full_scan_complete(&mut self) {
267        self.full_scan_complete = true;
268    }
269
270    /// Initialize scan from database, checking what's already scanned.
271    /// Returns (cached_count, pending_deps_count) where pending_deps_count is the
272    /// number of dependencies discovered but not yet scanned (from interrupted scans).
273    pub fn init_from_db(
274        &mut self,
275        db: &crate::db::Database,
276    ) -> Result<(usize, usize)> {
277        let scanned = db.get_scanned_pkgpaths()?;
278        let cached_count = scanned.len();
279        let mut pending_count = 0;
280
281        if cached_count > 0 {
282            info!(
283                cached_count = cached_count,
284                "Found cached scan results in database"
285            );
286
287            // For full tree scans with full_scan_complete, we'll skip scanning
288            // For limited scans, remove already-scanned from incoming
289            if !self.full_tree {
290                self.incoming.retain(|p| !scanned.contains(&p.to_string()));
291            }
292
293            // Add scanned pkgpaths to done set
294            for pkgpath_str in &scanned {
295                if let Ok(pkgpath) = PkgPath::new(pkgpath_str) {
296                    self.done.insert(pkgpath);
297                }
298            }
299
300            // Check for dependencies that were discovered but not yet scanned.
301            // This handles the case where a scan was interrupted partway through.
302            let unscanned = db.get_unscanned_dependencies()?;
303            if !unscanned.is_empty() {
304                info!(
305                    unscanned_count = unscanned.len(),
306                    "Found unscanned dependencies from interrupted scan"
307                );
308                for pkgpath_str in unscanned {
309                    if let Ok(pkgpath) = PkgPath::new(&pkgpath_str) {
310                        if !self.done.contains(&pkgpath) {
311                            self.incoming.insert(pkgpath);
312                            pending_count += 1;
313                        }
314                    }
315                }
316            }
317        }
318
319        Ok((cached_count, pending_count))
320    }
321
322    /// Discover all packages in pkgsrc tree.
323    fn discover_packages(&mut self) -> anyhow::Result<()> {
324        println!("Discovering packages...");
325        let pkgsrc = self.config.pkgsrc().display();
326        let make = self.config.make().display();
327
328        // Get top-level SUBDIR (categories + USER_ADDITIONAL_PKGS)
329        let script = format!(
330            "cd {} && {} show-subdir-var VARNAME=SUBDIR\n",
331            pkgsrc, make
332        );
333        let child = self.sandbox.execute_script(0, &script, vec![])?;
334        let output = child
335            .wait_with_output()
336            .context("Failed to run show-subdir-var")?;
337
338        if !output.status.success() {
339            let stderr = String::from_utf8_lossy(&output.stderr);
340            bail!("Failed to get categories: {}", stderr);
341        }
342
343        let stdout = String::from_utf8_lossy(&output.stdout);
344        let entries: Vec<&str> = stdout.split_whitespace().collect();
345
346        for entry in entries {
347            if entry.contains('/') {
348                // USER_ADDITIONAL_PKGS - add directly as pkgpath
349                if let Ok(pkgpath) = PkgPath::new(entry) {
350                    self.incoming.insert(pkgpath);
351                }
352            } else {
353                // Category - get packages within it
354                let script = format!(
355                    "cd {}/{} && {} show-subdir-var VARNAME=SUBDIR\n",
356                    pkgsrc, entry, make
357                );
358                let child = self.sandbox.execute_script(0, &script, vec![])?;
359                let cat_output = child.wait_with_output();
360
361                match cat_output {
362                    Ok(o) if o.status.success() => {
363                        let pkgs = String::from_utf8_lossy(&o.stdout);
364                        for pkg in pkgs.split_whitespace() {
365                            let path = format!("{}/{}", entry, pkg);
366                            if let Ok(pkgpath) = PkgPath::new(&path) {
367                                self.incoming.insert(pkgpath);
368                            }
369                        }
370                    }
371                    Ok(o) => {
372                        let stderr = String::from_utf8_lossy(&o.stderr);
373                        debug!(category = entry, stderr = %stderr,
374                            "Failed to get packages for category");
375                    }
376                    Err(e) => {
377                        debug!(category = entry, error = %e,
378                            "Failed to run make in category");
379                    }
380                }
381            }
382        }
383
384        info!(discovered = self.incoming.len(), "Package discovery complete");
385        println!("Discovered {} package paths", self.incoming.len());
386
387        Ok(())
388    }
389
390    pub fn start(
391        &mut self,
392        ctx: &RunContext,
393        db: &crate::db::Database,
394    ) -> anyhow::Result<bool> {
395        info!(
396            incoming_count = self.incoming.len(),
397            sandbox_enabled = self.sandbox.enabled(),
398            "Starting package scan"
399        );
400
401        let pool = rayon::ThreadPoolBuilder::new()
402            .num_threads(self.config.scan_threads())
403            .build()
404            .context("Failed to build scan thread pool")?;
405
406        let shutdown_flag = Arc::clone(&ctx.shutdown);
407        let stats = ctx.stats.clone();
408
409        /*
410         * Only a single sandbox is required, 'make pbulk-index' can safely be
411         * run in parallel inside one sandbox.
412         */
413        let script_envs = self.config.script_env();
414
415        // For full tree scans where a previous scan completed, all packages
416        // are already cached - nothing to do.
417        if self.full_tree && self.full_scan_complete && !self.done.is_empty() {
418            println!("All {} package paths already scanned", self.done.len());
419            return Ok(false);
420        }
421
422        // For non-full-tree scans, prune already-cached packages from incoming
423        // before sandbox creation to avoid unnecessary setup/teardown.
424        if !self.full_tree {
425            self.incoming.retain(|p| !self.done.contains(p));
426            if self.incoming.is_empty() {
427                if !self.done.is_empty() {
428                    println!(
429                        "All {} package paths already scanned",
430                        self.done.len()
431                    );
432                }
433                return Ok(false);
434            }
435        }
436
437        if self.sandbox.enabled() {
438            println!("Creating sandbox...");
439            if let Err(e) = self.sandbox.create(0) {
440                if let Err(destroy_err) = self.sandbox.destroy(0) {
441                    eprintln!(
442                        "Warning: failed to destroy sandbox: {}",
443                        destroy_err
444                    );
445                }
446                return Err(e);
447            }
448
449            // Run pre-build script if defined
450            if let Some(pre_build) = self.config.script("pre-build") {
451                debug!("Running pre-build script");
452                let child = self.sandbox.execute(
453                    0,
454                    pre_build,
455                    script_envs.clone(),
456                    None,
457                    None,
458                )?;
459                let output = child
460                    .wait_with_output()
461                    .context("Failed to wait for pre-build")?;
462                if !output.status.success() {
463                    let stderr = String::from_utf8_lossy(&output.stderr);
464                    error!(exit_code = ?output.status.code(), stderr = %stderr, "pre-build script failed");
465                }
466            }
467        }
468
469        // For full tree scans, always discover all packages
470        if self.full_tree {
471            self.discover_packages()?;
472            self.incoming.retain(|p| !self.done.contains(p));
473        }
474
475        // Nothing to scan - all packages are cached
476        if self.incoming.is_empty() {
477            if !self.done.is_empty() {
478                println!(
479                    "All {} package paths already scanned",
480                    self.done.len()
481                );
482            }
483
484            if self.sandbox.enabled() {
485                self.cleanup_sandbox(script_envs)?;
486            }
487
488            return Ok(false);
489        }
490
491        // Clear resolved dependencies since we're scanning new packages
492        db.clear_resolved_depends()?;
493
494        println!("Scanning packages...");
495
496        // Set up multi-line progress display using ratatui inline viewport
497        // Include cached packages in total so progress shows full picture
498        let cached_count = self.done.len();
499        let total_count = cached_count + self.incoming.len();
500        let progress = Arc::new(Mutex::new(
501            MultiProgress::new(
502                "Scanning",
503                "Scanned",
504                total_count,
505                self.config.scan_threads(),
506            )
507            .expect("Failed to initialize progress display"),
508        ));
509
510        // Mark cached packages
511        if cached_count > 0 {
512            if let Ok(mut p) = progress.lock() {
513                p.state_mut().cached = cached_count;
514            }
515        }
516
517        // Flag to stop the refresh thread
518        let stop_refresh = Arc::new(AtomicBool::new(false));
519
520        // Spawn a thread to periodically refresh the display (for timer updates)
521        let progress_refresh = Arc::clone(&progress);
522        let stop_flag = Arc::clone(&stop_refresh);
523        let shutdown_for_refresh = Arc::clone(&shutdown_flag);
524        let refresh_thread = std::thread::spawn(move || {
525            while !stop_flag.load(Ordering::Relaxed)
526                && !shutdown_for_refresh.load(Ordering::SeqCst)
527            {
528                if let Ok(mut p) = progress_refresh.lock() {
529                    // Check for keyboard events (Ctrl+C raises SIGINT)
530                    let _ = p.poll_events();
531                    let _ = p.render();
532                }
533                std::thread::sleep(Duration::from_millis(50));
534            }
535        });
536
537        /*
538         * Continuously iterate over incoming queue, moving to done once
539         * processed, and adding any dependencies to incoming to be processed
540         * next.
541         */
542        let mut interrupted = false;
543        loop {
544            // Check for shutdown signal
545            if shutdown_flag.load(Ordering::Relaxed) {
546                // Immediately show interrupted message
547                stop_refresh.store(true, Ordering::Relaxed);
548                if let Ok(mut p) = progress.lock() {
549                    let _ = p.finish_interrupted();
550                }
551                interrupted = true;
552                break;
553            }
554
555            /*
556             * Convert the incoming HashSet into a Vec for parallel processing.
557             */
558            let mut parpaths: Vec<(PkgPath, Result<Vec<ScanIndex>>)> =
559                self.incoming.iter().map(|p| (p.clone(), Ok(vec![]))).collect();
560
561            let progress_clone = Arc::clone(&progress);
562            let shutdown_clone = Arc::clone(&shutdown_flag);
563            let stats_clone = stats.clone();
564            pool.install(|| {
565                parpaths.par_iter_mut().for_each(|pkg| {
566                    // Check for shutdown before starting each package
567                    if shutdown_clone.load(Ordering::Relaxed) {
568                        return;
569                    }
570
571                    let (pkgpath, result) = pkg;
572                    let pathname =
573                        pkgpath.as_path().to_string_lossy().to_string();
574
575                    // Get rayon thread index for progress tracking
576                    let thread_id = rayon::current_thread_index().unwrap_or(0);
577
578                    // Update progress - show current package for this thread
579                    if let Ok(mut p) = progress_clone.lock() {
580                        p.state_mut().set_worker_active(thread_id, &pathname);
581                    }
582
583                    let scan_start = Instant::now();
584                    *result = self.scan_pkgpath(pkgpath);
585                    let scan_duration = scan_start.elapsed();
586
587                    // Record stats if enabled
588                    if let Some(ref s) = stats_clone {
589                        s.scan(&pathname, scan_duration, result.is_ok());
590                    }
591
592                    // Update counter immediately after each package
593                    if let Ok(mut p) = progress_clone.lock() {
594                        p.state_mut().set_worker_idle(thread_id);
595                        if result.is_ok() {
596                            p.state_mut().increment_completed();
597                        } else {
598                            p.state_mut().increment_failed();
599                        }
600                    }
601                });
602            });
603
604            // Check if we were interrupted during parallel processing
605            let was_interrupted = shutdown_flag.load(Ordering::Relaxed);
606            if was_interrupted {
607                stop_refresh.store(true, Ordering::Relaxed);
608                if let Ok(mut p) = progress.lock() {
609                    let _ = p.finish_interrupted();
610                }
611                interrupted = true;
612            }
613
614            /*
615             * Process results - always save completed scans, even if
616             * interrupted, so progress is preserved on restart.
617             */
618            let mut new_incoming: HashSet<PkgPath> = HashSet::new();
619            for (pkgpath, scanpkgs) in parpaths.drain(..) {
620                let scanpkgs = match scanpkgs {
621                    Ok(pkgs) => pkgs,
622                    Err(e) => {
623                        self.scan_failures
624                            .push((pkgpath.clone(), e.to_string()));
625                        self.done.insert(pkgpath.clone());
626                        continue;
627                    }
628                };
629                self.done.insert(pkgpath.clone());
630                // Save immediately to database
631                if !scanpkgs.is_empty() {
632                    db.store_scan_pkgpath(&pkgpath.to_string(), &scanpkgs)?;
633                }
634
635                // Skip dependency discovery if interrupted
636                if was_interrupted {
637                    continue;
638                }
639
640                // Discover dependencies not yet seen
641                for pkg in scanpkgs {
642                    if let Some(ref all_deps) = pkg.all_depends {
643                        for dep in all_deps {
644                            let dep_path = dep.pkgpath();
645                            if self.done.contains(dep_path)
646                                || self.incoming.contains(dep_path)
647                                || new_incoming.contains(dep_path)
648                            {
649                                continue;
650                            }
651                            // Check database for cached dependency
652                            if db.is_pkgpath_scanned(&dep_path.to_string())? {
653                                self.done.insert(dep_path.clone());
654                                if let Ok(mut p) = progress.lock() {
655                                    p.state_mut().total += 1;
656                                    p.state_mut().cached += 1;
657                                }
658                            } else {
659                                new_incoming.insert(dep_path.clone());
660                                if let Ok(mut p) = progress.lock() {
661                                    p.state_mut().total += 1;
662                                }
663                            }
664                        }
665                    }
666                }
667            }
668
669            // Exit after saving results if interrupted
670            if was_interrupted {
671                break;
672            }
673
674            /*
675             * We're finished with the current incoming, replace it with the
676             * new incoming list.  If it is empty then we've already processed
677             * all known PKGPATHs and are done.
678             */
679            self.incoming = new_incoming;
680            if self.incoming.is_empty() {
681                break;
682            }
683        }
684
685        // Stop the refresh thread and print final summary
686        stop_refresh.store(true, Ordering::Relaxed);
687        let _ = refresh_thread.join();
688
689        // Only call finish() for normal completion; finish_interrupted()
690        // was already called immediately when interrupt was detected
691        if !interrupted {
692            if let Ok(mut p) = progress.lock() {
693                let _ = p.finish();
694            }
695        }
696
697        if self.sandbox.enabled() {
698            self.cleanup_sandbox(script_envs)?;
699        }
700
701        if interrupted {
702            return Ok(true);
703        }
704
705        Ok(false)
706    }
707
708    /// Run post-build cleanup and destroy the scan sandbox.
709    fn cleanup_sandbox(
710        &self,
711        envs: Vec<(String, String)>,
712    ) -> anyhow::Result<()> {
713        if let Some(post_build) = self.config.script("post-build") {
714            debug!("Running post-build script");
715            let child =
716                self.sandbox.execute(0, post_build, envs, None, None)?;
717            let output = child
718                .wait_with_output()
719                .context("Failed to wait for post-build")?;
720            if !output.status.success() {
721                let stderr = String::from_utf8_lossy(&output.stderr);
722                error!(exit_code = ?output.status.code(), stderr = %stderr, "post-build script failed");
723            }
724        }
725        self.sandbox.destroy(0)
726    }
727
728    /// Returns scan failures as formatted error strings.
729    pub fn scan_errors(&self) -> impl Iterator<Item = &str> {
730        self.scan_failures.iter().map(|(_, e)| e.as_str())
731    }
732
733    /// Returns scan failures with pkgpath information.
734    pub fn scan_failures(&self) -> &[(PkgPath, String)] {
735        &self.scan_failures
736    }
737
738    /**
739     * Scan a single PKGPATH, returning a [`Vec`] of [`ScanIndex`] results,
740     * as multi-version packages may return multiple results.
741     */
742    pub fn scan_pkgpath(
743        &self,
744        pkgpath: &PkgPath,
745    ) -> anyhow::Result<Vec<ScanIndex>> {
746        let pkgpath_str = pkgpath.as_path().display().to_string();
747        debug!(pkgpath = %pkgpath_str, "Scanning package");
748
749        let bmake = self.config.make().display().to_string();
750        let pkgsrcdir = self.config.pkgsrc().display().to_string();
751        let script = format!(
752            "cd {}/{} && {} pbulk-index\n",
753            pkgsrcdir, pkgpath_str, bmake
754        );
755
756        let scan_env = self.config.scan_env();
757        trace!(pkgpath = %pkgpath_str,
758            script = %script,
759            scan_env = ?scan_env,
760            "Executing pkg-scan"
761        );
762        let child = self.sandbox.execute_script(0, &script, scan_env)?;
763        let output = child.wait_with_output()?;
764
765        if !output.status.success() {
766            let stderr = String::from_utf8_lossy(&output.stderr);
767            error!(pkgpath = %pkgpath_str,
768                exit_code = ?output.status.code(),
769                stderr = %stderr,
770                "pkg-scan script failed"
771            );
772            let stderr = stderr.trim();
773            let msg = if stderr.is_empty() {
774                format!("Scan failed for {}", pkgpath_str)
775            } else {
776                format!("Scan failed for {}: {}", pkgpath_str, stderr)
777            };
778            bail!(msg);
779        }
780
781        let stdout_str = String::from_utf8_lossy(&output.stdout);
782        trace!(pkgpath = %pkgpath_str,
783            stdout_len = stdout_str.len(),
784            stdout = %stdout_str,
785            "pkg-scan script output"
786        );
787
788        let reader = BufReader::new(&output.stdout[..]);
789        let mut index: Vec<ScanIndex> =
790            ScanIndex::from_reader(reader).collect::<Result<_, _>>()?;
791
792        info!(pkgpath = %pkgpath_str,
793            packages_found = index.len(),
794            "Scan complete for pkgpath"
795        );
796
797        /*
798         * Set PKGPATH (PKG_LOCATION) as for some reason pbulk-index doesn't.
799         */
800        for pkg in &mut index {
801            pkg.pkg_location = Some(pkgpath.clone());
802            debug!(pkgpath = %pkgpath_str,
803                pkgname = %pkg.pkgname.pkgname(),
804                skip_reason = ?pkg.pkg_skip_reason,
805                fail_reason = ?pkg.pkg_fail_reason,
806                depends_count = pkg.all_depends.as_ref().map_or(0, |v| v.len()),
807                "Found package in scan"
808            );
809        }
810
811        Ok(index)
812    }
813
814    /**
815     * Resolve the list of scanned packages, by ensuring all of the [`Depend`]
816     * patterns in `all_depends` match a found package, and that there are no
817     * circular dependencies.  The best match for each is stored in the
818     * `depends` for the package in question.
819     *
820     * Return a [`ScanResult`] containing buildable packages and skipped packages.
821     *
822     * Also stores resolved dependencies in the database for fast reverse lookups.
823     */
824    pub fn resolve(&mut self, db: &crate::db::Database) -> Result<ScanResult> {
825        info!(
826            done_pkgpaths = self.done.len(),
827            "Starting dependency resolution"
828        );
829
830        // Load all packages from database
831        let all_packages = db.get_all_packages()?;
832
833        /*
834         * Populate the resolved hash with ALL packages first, including those
835         * with skip/fail reasons. This allows us to resolve dependencies for
836         * all packages before separating them.
837         */
838        let mut pkgnames: indexmap::IndexSet<PkgName> =
839            indexmap::IndexSet::new();
840
841        // Track which packages have skip/fail reasons
842        let mut skip_reasons: HashMap<PkgName, SkipReason> = HashMap::new();
843
844        // Track package_id for storing resolved dependencies
845        let mut pkgname_to_id: HashMap<PkgName, i64> = HashMap::new();
846
847        // Load full scan data for each package
848        for pkg_row in &all_packages {
849            let pkg = match db.get_full_scan_index(pkg_row.id) {
850                Ok(p) => p,
851                Err(e) => {
852                    warn!(pkgname = %pkg_row.pkgname, error = %e, "Failed to load scan data");
853                    continue;
854                }
855            };
856
857            pkgname_to_id.insert(pkg.pkgname.clone(), pkg_row.id);
858
859            debug!(pkgpath = %pkg_row.pkgpath,
860                pkgname = %pkg.pkgname.pkgname(),
861                "Processing package"
862            );
863
864            // Skip duplicate PKGNAMEs - keep only the first (preferred)
865            // variant for multi-version packages.
866            if pkgnames.contains(&pkg.pkgname) {
867                debug!(pkgname = %pkg.pkgname.pkgname(),
868                    multi_version = ?pkg.multi_version,
869                    "Skipping duplicate PKGNAME"
870                );
871                continue;
872            }
873
874            // Track skip/fail reasons but still add to resolved
875            if let Some(reason) = &pkg.pkg_skip_reason {
876                if !reason.is_empty() {
877                    info!(pkgname = %pkg.pkgname.pkgname(),
878                        reason = %reason,
879                        "Package has PKG_SKIP_REASON"
880                    );
881                    skip_reasons.insert(
882                        pkg.pkgname.clone(),
883                        SkipReason::PkgSkipReason(reason.clone()),
884                    );
885                }
886            }
887            if let Some(reason) = &pkg.pkg_fail_reason {
888                if !reason.is_empty()
889                    && !skip_reasons.contains_key(&pkg.pkgname)
890                {
891                    info!(pkgname = %pkg.pkgname.pkgname(),
892                        reason = %reason,
893                        "Package has PKG_FAIL_REASON"
894                    );
895                    skip_reasons.insert(
896                        pkg.pkgname.clone(),
897                        SkipReason::PkgFailReason(reason.clone()),
898                    );
899                }
900            }
901
902            debug!(pkgname = %pkg.pkgname.pkgname(),
903                "Adding package to resolved set"
904            );
905            pkgnames.insert(pkg.pkgname.clone());
906            self.resolved.insert(
907                pkg.pkgname.clone(),
908                ResolvedIndex::from_scan_index(pkg.clone()),
909            );
910        }
911
912        info!(
913            resolved_count = self.resolved.len(),
914            skip_reasons_count = skip_reasons.len(),
915            "Initial resolution complete"
916        );
917
918        /*
919         * Keep a cache of best Depend => PkgName matches we've already seen
920         * as it's likely the same patterns will be used in multiple places.
921         */
922        let mut match_cache: HashMap<Depend, PkgName> = HashMap::new();
923
924        /*
925         * Track packages to skip due to skipped dependencies, and truly
926         * unresolved dependencies (errors).
927         */
928        let mut skip_due_to_dep: HashMap<PkgName, String> = HashMap::new();
929        let mut errors: Vec<(PkgName, String)> = Vec::new();
930
931        // Helper to check if a dependency pattern is already satisfied
932        let is_satisfied = |depends: &[PkgName], pattern: &pkgsrc::Pattern| {
933            depends.iter().any(|existing| pattern.matches(existing.pkgname()))
934        };
935
936        for pkg in self.resolved.values_mut() {
937            let all_deps = match pkg.all_depends.clone() {
938                Some(deps) => deps,
939                None => continue,
940            };
941            for depend in &all_deps {
942                // Check for cached DEPENDS match first. If found, use it
943                // (but only add if the pattern isn't already satisfied).
944                if let Some(pkgname) = match_cache.get(depend) {
945                    if !is_satisfied(&pkg.depends, depend.pattern())
946                        && !pkg.depends.contains(pkgname)
947                    {
948                        pkg.depends.push(pkgname.clone());
949                    }
950                    continue;
951                }
952                /*
953                 * Find best DEPENDS match out of all known PKGNAME.
954                 * Collect all candidates that match the pattern.
955                 */
956                let mut candidates: Vec<&PkgName> = Vec::new();
957                for candidate in &pkgnames {
958                    if depend.pattern().matches(candidate.pkgname()) {
959                        candidates.push(candidate);
960                    }
961                }
962
963                // Find best match among all candidates using pbulk algorithm:
964                // higher version wins, larger name on tie.
965                let mut best: Option<&PkgName> = None;
966                let mut match_error: Option<pkgsrc::PatternError> = None;
967                for candidate in candidates {
968                    best = match best {
969                        None => Some(candidate),
970                        Some(current) => {
971                            match depend.pattern().best_match_pbulk(
972                                current.pkgname(),
973                                candidate.pkgname(),
974                            ) {
975                                Ok(Some(m)) if m == candidate.pkgname() => {
976                                    Some(candidate)
977                                }
978                                Ok(_) => Some(current),
979                                Err(e) => {
980                                    match_error = Some(e);
981                                    break;
982                                }
983                            }
984                        }
985                    };
986                }
987                if let Some(e) = match_error {
988                    errors.push((
989                        pkg.index.pkgname.clone(),
990                        format!(
991                            "Pattern error for {} in {}: {}",
992                            depend.pattern().pattern(),
993                            pkg.index.pkgname.pkgname(),
994                            e
995                        ),
996                    ));
997                    continue;
998                }
999                // If found, save to cache and add to depends (if not already satisfied)
1000                if let Some(pkgname) = best {
1001                    if !is_satisfied(&pkg.depends, depend.pattern())
1002                        && !pkg.depends.contains(pkgname)
1003                    {
1004                        pkg.depends.push(pkgname.clone());
1005                    }
1006                    match_cache.insert(depend.clone(), pkgname.clone());
1007                } else {
1008                    // No matching package exists
1009                    errors.push((
1010                        pkg.index.pkgname.clone(),
1011                        format!(
1012                            "No match found for {} in {}",
1013                            depend.pattern().pattern(),
1014                            pkg.index.pkgname.pkgname()
1015                        ),
1016                    ));
1017                }
1018            }
1019        }
1020
1021        /*
1022         * Iteratively propagate skips: if A depends on B, and B is now
1023         * marked to skip, then A should also be skipped.
1024         */
1025        loop {
1026            let mut new_skips: HashMap<PkgName, String> = HashMap::new();
1027
1028            for pkg in self.resolved.values() {
1029                if skip_due_to_dep.contains_key(&pkg.pkgname)
1030                    || skip_reasons.contains_key(&pkg.pkgname)
1031                {
1032                    continue;
1033                }
1034                for dep in &pkg.depends {
1035                    if skip_due_to_dep.contains_key(dep)
1036                        || skip_reasons.contains_key(dep)
1037                    {
1038                        // Our dependency is being skipped
1039                        new_skips.insert(
1040                            pkg.pkgname.clone(),
1041                            format!("Dependency {} skipped", dep.pkgname()),
1042                        );
1043                        break;
1044                    }
1045                }
1046            }
1047
1048            if new_skips.is_empty() {
1049                break;
1050            }
1051            skip_due_to_dep.extend(new_skips);
1052        }
1053
1054        // Merge skip_due_to_dep into skip_reasons
1055        for (pkgname, reason) in skip_due_to_dep.iter() {
1056            if !skip_reasons.contains_key(pkgname) {
1057                skip_reasons.insert(
1058                    pkgname.clone(),
1059                    SkipReason::PkgSkipReason(reason.clone()),
1060                );
1061            }
1062        }
1063
1064        // Filter out errors for packages that are being skipped anyway
1065        let errors: Vec<String> = errors
1066            .into_iter()
1067            .filter(|(pkgname, _)| !skip_reasons.contains_key(pkgname))
1068            .map(|(_, message)| message)
1069            .collect();
1070
1071        // Build all_ordered first to preserve original order, then separate
1072        let mut all_ordered: Vec<(ResolvedIndex, Option<SkipReason>)> =
1073            Vec::new();
1074        let mut buildable: IndexMap<PkgName, ResolvedIndex> = IndexMap::new();
1075        let mut skipped: Vec<SkippedPackage> = Vec::new();
1076
1077        for (pkgname, index) in std::mem::take(&mut self.resolved) {
1078            let reason = skip_reasons.remove(&pkgname);
1079            all_ordered.push((index.clone(), reason.clone()));
1080            if let Some(r) = reason {
1081                skipped.push(SkippedPackage {
1082                    pkgname: index.pkgname.clone(),
1083                    pkgpath: index.pkg_location.clone(),
1084                    reason: r,
1085                    index: Some(index),
1086                });
1087            } else {
1088                buildable.insert(pkgname, index);
1089            }
1090        }
1091
1092        /*
1093         * Verify that the graph is acyclic (only for buildable packages).
1094         */
1095        debug!(
1096            buildable_count = buildable.len(),
1097            "Checking for circular dependencies"
1098        );
1099        let mut graph = DiGraphMap::new();
1100        for (pkgname, index) in &buildable {
1101            for dep in &index.depends {
1102                graph.add_edge(dep.pkgname(), pkgname.pkgname(), ());
1103            }
1104        }
1105        let cycle_error = find_cycle(&graph).map(|cycle| {
1106            let mut err = "Circular dependencies detected:\n".to_string();
1107            for n in cycle.iter().rev() {
1108                err.push_str(&format!("\t{}\n", n));
1109            }
1110            err.push_str(&format!("\t{}", cycle.last().unwrap()));
1111            error!(cycle = ?cycle, "Circular dependency detected");
1112            err
1113        });
1114
1115        info!(
1116            buildable_count = buildable.len(),
1117            skipped_count = skipped.len(),
1118            "Resolution complete"
1119        );
1120
1121        // Log all buildable packages
1122        for pkgname in buildable.keys() {
1123            debug!(pkgname = %pkgname.pkgname(), "Package is buildable");
1124        }
1125
1126        // Convert scan failures to ScanFailure structs
1127        let scan_failed: Vec<ScanFailure> = self
1128            .scan_failures
1129            .iter()
1130            .map(|(pkgpath, error)| ScanFailure {
1131                pkgpath: pkgpath.clone(),
1132                error: error.clone(),
1133            })
1134            .collect();
1135
1136        let result =
1137            ScanResult { buildable, skipped, scan_failed, all_ordered };
1138
1139        // Now check for errors
1140        if !errors.is_empty() {
1141            for err in &errors {
1142                error!(error = %err, "Unresolved dependency");
1143            }
1144            bail!("Unresolved dependencies:\n  {}", errors.join("\n  "));
1145        }
1146
1147        if let Some(err) = cycle_error {
1148            bail!(err);
1149        }
1150
1151        // Store resolved dependencies in database for fast reverse lookups
1152        let mut resolved_deps: Vec<(i64, i64)> = Vec::new();
1153        for (pkgname, index) in &result.buildable {
1154            if let Some(&pkg_id) = pkgname_to_id.get(pkgname) {
1155                for dep in &index.depends {
1156                    if let Some(&dep_id) = pkgname_to_id.get(dep) {
1157                        resolved_deps.push((pkg_id, dep_id));
1158                    }
1159                }
1160            }
1161        }
1162        if !resolved_deps.is_empty() {
1163            db.store_resolved_dependencies_batch(&resolved_deps)?;
1164            debug!(count = resolved_deps.len(), "Stored resolved dependencies");
1165        }
1166
1167        Ok(result)
1168    }
1169}
1170
1171pub fn find_cycle<'a>(
1172    graph: &'a DiGraphMap<&'a str, ()>,
1173) -> Option<Vec<&'a str>> {
1174    let mut visited = HashSet::new();
1175    let mut in_stack = HashSet::new();
1176    let mut stack = Vec::new();
1177
1178    for node in graph.nodes() {
1179        if visited.contains(&node) {
1180            continue;
1181        }
1182        if let Some(cycle) =
1183            dfs(graph, node, &mut visited, &mut stack, &mut in_stack)
1184        {
1185            return Some(cycle);
1186        }
1187    }
1188    None
1189}
1190
1191fn dfs<'a>(
1192    graph: &'a DiGraphMap<&'a str, ()>,
1193    node: &'a str,
1194    visited: &mut HashSet<&'a str>,
1195    stack: &mut Vec<&'a str>,
1196    in_stack: &mut HashSet<&'a str>,
1197) -> Option<Vec<&'a str>> {
1198    visited.insert(node);
1199    stack.push(node);
1200    in_stack.insert(node);
1201    for neighbor in graph.neighbors(node) {
1202        if in_stack.contains(neighbor) {
1203            if let Some(pos) = stack.iter().position(|&n| n == neighbor) {
1204                return Some(stack[pos..].to_vec());
1205            }
1206        } else if !visited.contains(neighbor) {
1207            let cycle = dfs(graph, neighbor, visited, stack, in_stack);
1208            if cycle.is_some() {
1209                return cycle;
1210            }
1211        }
1212    }
1213    stack.pop();
1214    in_stack.remove(node);
1215    None
1216}