bob/
scan.rs

1/*
2 * Copyright (c) 2025 Jonathan Perkin <jonathan@perkin.org.uk>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17//! Package dependency scanning and resolution.
18//!
19//! This module provides the [`Scan`] struct for discovering package dependencies
20//! and building a directed acyclic graph (DAG) for build ordering.
21//!
22//! # Scan Process
23//!
24//! 1. Create a scan sandbox
25//! 2. Run `make pbulk-index` on each package to discover dependencies
26//! 3. Recursively discover all transitive dependencies
27//! 4. Resolve dependency patterns to specific package versions
28//! 5. Verify no circular dependencies exist
29//! 6. Return buildable and skipped package lists
30//!
31//! # Skip Reasons
32//!
33//! Packages may be skipped for several reasons:
34//!
35//! - `PKG_SKIP_REASON` - Package explicitly marked to skip on this platform
36//! - `PKG_FAIL_REASON` - Package expected to fail on this platform
37//! - Unresolved dependencies - Required dependency not found
38//! - Circular dependencies - Package has a dependency cycle
39//!
40//! # Example
41//!
42//! ```no_run
43//! use bob::{Config, Database, RunContext, Scan};
44//! use pkgsrc::PkgPath;
45//! use std::sync::Arc;
46//! use std::sync::atomic::AtomicBool;
47//!
48//! let config = Config::load(None, false)?;
49//! let db_path = config.logdir().join("bob").join("bob.db");
50//! let db = Database::open(&db_path)?;
51//! let mut scan = Scan::new(&config);
52//!
53//! scan.add(&PkgPath::new("mail/mutt")?);
54//! scan.add(&PkgPath::new("www/curl")?);
55//!
56//! let ctx = RunContext::new(Arc::new(AtomicBool::new(false)));
57//! scan.start(&ctx, &db)?;  // Discover dependencies
58//! let result = scan.resolve(&db)?;
59//!
60//! println!("Buildable: {}", result.buildable.len());
61//! println!("Skipped: {}", result.skipped.len());
62//! # Ok::<(), anyhow::Error>(())
63//! ```
64
65use crate::tui::MultiProgress;
66use crate::{Config, RunContext, Sandbox};
67use anyhow::{Context, Result, bail};
68use indexmap::IndexMap;
69use petgraph::graphmap::DiGraphMap;
70use pkgsrc::{Depend, PkgName, PkgPath, ScanIndex};
71use rayon::prelude::*;
72use std::collections::{HashMap, HashSet};
73use std::io::BufReader;
74use std::sync::atomic::{AtomicBool, Ordering};
75use std::sync::{Arc, Mutex};
76use std::time::Duration;
77use tracing::{debug, error, info, trace};
78
79/// Reason why a package was excluded from the build.
80///
81/// Packages with skip or fail reasons set in pkgsrc are not built.
82#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
83pub enum SkipReason {
84    /// Package has `PKG_SKIP_REASON` set.
85    ///
86    /// This typically indicates the package cannot be built on the current
87    /// platform (e.g., architecture-specific code, missing dependencies).
88    PkgSkipReason(String),
89    /// Package has `PKG_FAIL_REASON` set.
90    ///
91    /// This indicates the package is known to fail on the current platform
92    /// and should not be attempted.
93    PkgFailReason(String),
94    /// A dependency could not be resolved to any known package.
95    ///
96    /// Contains the dependency pattern that could not be matched.
97    UnresolvedDependency(String),
98}
99
100/// Information about a package that was skipped during scanning.
101#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
102pub struct SkippedPackage {
103    /// Package name with version.
104    pub pkgname: PkgName,
105    /// Package path in pkgsrc.
106    pub pkgpath: Option<PkgPath>,
107    /// Reason the package was skipped.
108    pub reason: SkipReason,
109}
110
111/// Information about a package that failed to scan.
112#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
113pub struct ScanFailure {
114    /// Package path in pkgsrc (e.g., `games/plib`).
115    pub pkgpath: PkgPath,
116    /// Error message from the scan failure.
117    pub error: String,
118}
119
120/// A resolved package index entry with dependency information.
121///
122/// This extends [`ScanIndex`] with resolved dependencies (`depends`).
123#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
124pub struct ResolvedIndex {
125    /// The underlying scan index data.
126    pub index: ScanIndex,
127    /// Resolved dependencies as package names.
128    pub depends: Vec<PkgName>,
129    /// True if this package has an unresolved dependency.
130    #[serde(default)]
131    pub has_unresolved_dep: bool,
132}
133
134impl ResolvedIndex {
135    /// Create from a ScanIndex with empty depends.
136    pub fn from_scan_index(index: ScanIndex) -> Self {
137        Self { index, depends: Vec::new(), has_unresolved_dep: false }
138    }
139}
140
141impl std::ops::Deref for ResolvedIndex {
142    type Target = ScanIndex;
143    fn deref(&self) -> &Self::Target {
144        &self.index
145    }
146}
147
148impl std::ops::DerefMut for ResolvedIndex {
149    fn deref_mut(&mut self) -> &mut Self::Target {
150        &mut self.index
151    }
152}
153
154impl std::fmt::Display for ResolvedIndex {
155    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
156        write!(f, "{}", self.index)?;
157        // Only output DEPENDS= if there are dependencies and no unresolved deps.
158        if !self.depends.is_empty() && !self.has_unresolved_dep {
159            write!(f, "DEPENDS=")?;
160            for (i, d) in self.depends.iter().enumerate() {
161                if i > 0 {
162                    write!(f, " ")?;
163                }
164                write!(f, "{d}")?;
165            }
166            writeln!(f)?;
167        }
168        Ok(())
169    }
170}
171
172/// Result of scanning and resolving packages.
173///
174/// Returned by [`Scan::resolve`], contains the packages that can be built
175/// and those that were skipped.
176#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
177pub struct ScanResult {
178    /// Packages that can be built, indexed by package name.
179    ///
180    /// These packages have all dependencies resolved and no skip/fail reasons.
181    /// Uses IndexMap to preserve insertion order from the original scan.
182    pub buildable: IndexMap<PkgName, ResolvedIndex>,
183    /// Packages that were skipped due to skip/fail reasons.
184    pub skipped: Vec<SkippedPackage>,
185    /// Packages that failed to scan (bmake pbulk-index failed).
186    pub scan_failed: Vec<ScanFailure>,
187    /// All packages in original order with their skip reason (if any).
188    /// Used for presolve output that needs to preserve original ordering.
189    pub all_ordered: Vec<(ResolvedIndex, Option<SkipReason>)>,
190    /// Unresolved dependency errors.
191    /// Callers can check this and config.strict_scan() to decide if fatal.
192    pub errors: Vec<String>,
193}
194
195/// Package dependency scanner.
196///
197/// Discovers all dependencies for a set of packages and resolves them into
198/// a buildable set with proper ordering.
199///
200/// # Usage
201///
202/// 1. Create a `Scan` with [`Scan::new`]
203/// 2. Add packages to scan with [`Scan::add`]
204/// 3. Run the scan with [`Scan::start`]
205/// 4. Resolve dependencies with [`Scan::resolve`]
206///
207/// # Example
208///
209/// ```no_run
210/// # use bob::{Config, Database, RunContext, Scan};
211/// # use pkgsrc::PkgPath;
212/// # use std::sync::Arc;
213/// # use std::sync::atomic::AtomicBool;
214/// # fn example() -> anyhow::Result<()> {
215/// let config = Config::load(None, false)?;
216/// let db_path = config.logdir().join("bob").join("bob.db");
217/// let db = Database::open(&db_path)?;
218/// let mut scan = Scan::new(&config);
219///
220/// scan.add(&PkgPath::new("mail/mutt")?);
221/// let ctx = RunContext::new(Arc::new(AtomicBool::new(false)));
222/// scan.start(&ctx, &db)?;
223///
224/// let result = scan.resolve(&db)?;
225/// println!("Found {} buildable packages", result.buildable.len());
226/// # Ok(())
227/// # }
228/// ```
229#[derive(Debug, Default)]
230pub struct Scan {
231    config: Config,
232    sandbox: Sandbox,
233    incoming: HashSet<PkgPath>,
234    /// Pkgpaths we've completed scanning (in this session).
235    done: HashSet<PkgPath>,
236    resolved: IndexMap<PkgName, ResolvedIndex>,
237    /// Full tree scan - discover all packages, skip recursive dependency discovery.
238    /// Defaults to true; set to false when packages are explicitly added.
239    full_tree: bool,
240    /// A previous full tree scan completed successfully.
241    full_scan_complete: bool,
242    /// Packages that failed to scan (pkgpath, error message).
243    scan_failures: Vec<(PkgPath, String)>,
244}
245
246impl Scan {
247    pub fn new(config: &Config) -> Scan {
248        let sandbox = Sandbox::new(config);
249        debug!(pkgsrc = %config.pkgsrc().display(),
250            make = %config.make().display(),
251            scan_threads = config.scan_threads(),
252            "Created new Scan instance"
253        );
254        Scan {
255            config: config.clone(),
256            sandbox,
257            full_tree: true,
258            ..Default::default()
259        }
260    }
261
262    pub fn add(&mut self, pkgpath: &PkgPath) {
263        info!(pkgpath = %pkgpath.as_path().display(), "Adding package to scan queue");
264        self.full_tree = false;
265        self.incoming.insert(pkgpath.clone());
266    }
267
268    /// Returns true if this is a full tree scan.
269    pub fn is_full_tree(&self) -> bool {
270        self.full_tree
271    }
272
273    /// Mark that a previous full tree scan completed successfully.
274    pub fn set_full_scan_complete(&mut self) {
275        self.full_scan_complete = true;
276    }
277
278    /// Initialize scan from database, checking what's already scanned.
279    /// Returns (cached_count, pending_deps_count) where pending_deps_count is the
280    /// number of dependencies discovered but not yet scanned (from interrupted scans).
281    pub fn init_from_db(
282        &mut self,
283        db: &crate::db::Database,
284    ) -> Result<(usize, usize)> {
285        let scanned = db.get_scanned_pkgpaths()?;
286        let cached_count = scanned.len();
287        let mut pending_count = 0;
288
289        if cached_count > 0 {
290            info!(
291                cached_count = cached_count,
292                "Found cached scan results in database"
293            );
294
295            // For full tree scans with full_scan_complete, we'll skip scanning
296            // For limited scans, remove already-scanned from incoming
297            if !self.full_tree {
298                self.incoming.retain(|p| !scanned.contains(&p.to_string()));
299            }
300
301            // Add scanned pkgpaths to done set
302            for pkgpath_str in &scanned {
303                if let Ok(pkgpath) = PkgPath::new(pkgpath_str) {
304                    self.done.insert(pkgpath);
305                }
306            }
307
308            // Check for dependencies that were discovered but not yet scanned.
309            // This handles the case where a scan was interrupted partway through.
310            let unscanned = db.get_unscanned_dependencies()?;
311            if !unscanned.is_empty() {
312                info!(
313                    unscanned_count = unscanned.len(),
314                    "Found unscanned dependencies from interrupted scan"
315                );
316                for pkgpath_str in unscanned {
317                    if let Ok(pkgpath) = PkgPath::new(&pkgpath_str) {
318                        if !self.done.contains(&pkgpath) {
319                            self.incoming.insert(pkgpath);
320                            pending_count += 1;
321                        }
322                    }
323                }
324            }
325        }
326
327        Ok((cached_count, pending_count))
328    }
329
330    /// Discover all packages in pkgsrc tree.
331    fn discover_packages(&mut self) -> anyhow::Result<()> {
332        println!("Discovering packages...");
333        let pkgsrc = self.config.pkgsrc().display();
334        let make = self.config.make().display();
335
336        // Get top-level SUBDIR (categories + USER_ADDITIONAL_PKGS)
337        let script = format!(
338            "cd {} && {} show-subdir-var VARNAME=SUBDIR\n",
339            pkgsrc, make
340        );
341        let child = self.sandbox.execute_script(0, &script, vec![])?;
342        let output = child
343            .wait_with_output()
344            .context("Failed to run show-subdir-var")?;
345
346        if !output.status.success() {
347            let stderr = String::from_utf8_lossy(&output.stderr);
348            bail!("Failed to get categories: {}", stderr);
349        }
350
351        let stdout = String::from_utf8_lossy(&output.stdout);
352        let entries: Vec<&str> = stdout.split_whitespace().collect();
353
354        for entry in entries {
355            if entry.contains('/') {
356                // USER_ADDITIONAL_PKGS - add directly as pkgpath
357                if let Ok(pkgpath) = PkgPath::new(entry) {
358                    self.incoming.insert(pkgpath);
359                }
360            } else {
361                // Category - get packages within it
362                let script = format!(
363                    "cd {}/{} && {} show-subdir-var VARNAME=SUBDIR\n",
364                    pkgsrc, entry, make
365                );
366                let child = self.sandbox.execute_script(0, &script, vec![])?;
367                let cat_output = child.wait_with_output();
368
369                match cat_output {
370                    Ok(o) if o.status.success() => {
371                        let pkgs = String::from_utf8_lossy(&o.stdout);
372                        for pkg in pkgs.split_whitespace() {
373                            let path = format!("{}/{}", entry, pkg);
374                            if let Ok(pkgpath) = PkgPath::new(&path) {
375                                self.incoming.insert(pkgpath);
376                            }
377                        }
378                    }
379                    Ok(o) => {
380                        let stderr = String::from_utf8_lossy(&o.stderr);
381                        debug!(category = entry, stderr = %stderr,
382                            "Failed to get packages for category");
383                    }
384                    Err(e) => {
385                        debug!(category = entry, error = %e,
386                            "Failed to run make in category");
387                    }
388                }
389            }
390        }
391
392        info!(discovered = self.incoming.len(), "Package discovery complete");
393        println!("Discovered {} package paths", self.incoming.len());
394
395        Ok(())
396    }
397
398    pub fn start(
399        &mut self,
400        ctx: &RunContext,
401        db: &crate::db::Database,
402    ) -> anyhow::Result<bool> {
403        info!(
404            incoming_count = self.incoming.len(),
405            sandbox_enabled = self.sandbox.enabled(),
406            "Starting package scan"
407        );
408
409        let pool = rayon::ThreadPoolBuilder::new()
410            .num_threads(self.config.scan_threads())
411            .build()
412            .context("Failed to build scan thread pool")?;
413
414        let shutdown_flag = Arc::clone(&ctx.shutdown);
415
416        /*
417         * Only a single sandbox is required, 'make pbulk-index' can safely be
418         * run in parallel inside one sandbox.
419         */
420        let script_envs = self.config.script_env();
421
422        // For full tree scans where a previous scan completed, all packages
423        // are already cached - nothing to do.
424        if self.full_tree && self.full_scan_complete && !self.done.is_empty() {
425            println!("All {} package paths already scanned", self.done.len());
426            return Ok(false);
427        }
428
429        // For non-full-tree scans, prune already-cached packages from incoming
430        // before sandbox creation to avoid unnecessary setup/teardown.
431        if !self.full_tree {
432            self.incoming.retain(|p| !self.done.contains(p));
433            if self.incoming.is_empty() {
434                if !self.done.is_empty() {
435                    println!(
436                        "All {} package paths already scanned",
437                        self.done.len()
438                    );
439                }
440                return Ok(false);
441            }
442        }
443
444        if self.sandbox.enabled() {
445            println!("Creating sandbox...");
446            if let Err(e) = self.sandbox.create(0) {
447                if let Err(destroy_err) = self.sandbox.destroy(0) {
448                    eprintln!(
449                        "Warning: failed to destroy sandbox: {}",
450                        destroy_err
451                    );
452                }
453                return Err(e);
454            }
455
456            // Run pre-build script if defined
457            if let Some(pre_build) = self.config.script("pre-build") {
458                debug!("Running pre-build script");
459                let child = self.sandbox.execute(
460                    0,
461                    pre_build,
462                    script_envs.clone(),
463                    None,
464                    None,
465                )?;
466                let output = child
467                    .wait_with_output()
468                    .context("Failed to wait for pre-build")?;
469                if !output.status.success() {
470                    let stderr = String::from_utf8_lossy(&output.stderr);
471                    error!(exit_code = ?output.status.code(), stderr = %stderr, "pre-build script failed");
472                }
473            }
474        }
475
476        // For full tree scans, always discover all packages
477        if self.full_tree {
478            self.discover_packages()?;
479            self.incoming.retain(|p| !self.done.contains(p));
480        }
481
482        // Nothing to scan - all packages are cached
483        if self.incoming.is_empty() {
484            if !self.done.is_empty() {
485                println!(
486                    "All {} package paths already scanned",
487                    self.done.len()
488                );
489            }
490
491            if self.sandbox.enabled() {
492                self.cleanup_sandbox(script_envs)?;
493            }
494
495            return Ok(false);
496        }
497
498        // Clear resolved dependencies since we're scanning new packages
499        db.clear_resolved_depends()?;
500
501        println!("Scanning packages...");
502
503        // Set up multi-line progress display using ratatui inline viewport
504        // Include cached packages in total so progress shows full picture
505        let cached_count = self.done.len();
506        let total_count = cached_count + self.incoming.len();
507        let progress = Arc::new(Mutex::new(
508            MultiProgress::new(
509                "Scanning",
510                "Scanned",
511                total_count,
512                self.config.scan_threads(),
513            )
514            .expect("Failed to initialize progress display"),
515        ));
516
517        // Mark cached packages
518        if cached_count > 0 {
519            if let Ok(mut p) = progress.lock() {
520                p.state_mut().cached = cached_count;
521            }
522        }
523
524        // Flag to stop the refresh thread
525        let stop_refresh = Arc::new(AtomicBool::new(false));
526
527        // Spawn a thread to periodically refresh the display (for timer updates)
528        let progress_refresh = Arc::clone(&progress);
529        let stop_flag = Arc::clone(&stop_refresh);
530        let shutdown_for_refresh = Arc::clone(&shutdown_flag);
531        let refresh_thread = std::thread::spawn(move || {
532            while !stop_flag.load(Ordering::Relaxed)
533                && !shutdown_for_refresh.load(Ordering::SeqCst)
534            {
535                if let Ok(mut p) = progress_refresh.lock() {
536                    // Check for keyboard events (Ctrl+C raises SIGINT)
537                    let _ = p.poll_events();
538                    let _ = p.render();
539                }
540                std::thread::sleep(Duration::from_millis(50));
541            }
542        });
543
544        // Start transaction for all writes
545        db.begin_transaction()?;
546
547        let mut interrupted = false;
548
549        // Borrow config and sandbox separately for use in scanner thread,
550        // allowing main thread to mutate self.done, self.incoming, etc.
551        let config = &self.config;
552        let sandbox = &self.sandbox;
553
554        /*
555         * Continuously iterate over incoming queue, moving to done once
556         * processed, and adding any dependencies to incoming to be processed
557         * next.
558         */
559        loop {
560            // Check for shutdown signal
561            if shutdown_flag.load(Ordering::Relaxed) {
562                stop_refresh.store(true, Ordering::Relaxed);
563                if let Ok(mut p) = progress.lock() {
564                    let _ = p.finish_interrupted();
565                }
566                interrupted = true;
567                break;
568            }
569
570            /*
571             * Convert the incoming HashSet into a Vec for parallel processing.
572             */
573            let pkgpaths: Vec<PkgPath> = self.incoming.drain().collect();
574            if pkgpaths.is_empty() {
575                break;
576            }
577
578            // Create bounded channel for streaming results
579            const CHANNEL_BUFFER_SIZE: usize = 128;
580            let (tx, rx) = std::sync::mpsc::sync_channel::<(
581                PkgPath,
582                Result<Vec<ScanIndex>>,
583            )>(CHANNEL_BUFFER_SIZE);
584
585            let mut new_incoming: HashSet<PkgPath> = HashSet::new();
586
587            std::thread::scope(|s| {
588                // Spawn scanning thread
589                let progress_clone = Arc::clone(&progress);
590                let shutdown_clone = Arc::clone(&shutdown_flag);
591                let pool_ref = &pool;
592
593                s.spawn(move || {
594                    pool_ref.install(|| {
595                        pkgpaths.par_iter().for_each(|pkgpath| {
596                            // Check for shutdown before starting
597                            if shutdown_clone.load(Ordering::Relaxed) {
598                                return;
599                            }
600
601                            let pathname =
602                                pkgpath.as_path().to_string_lossy().to_string();
603                            let thread_id =
604                                rayon::current_thread_index().unwrap_or(0);
605
606                            // Update progress - show current package
607                            if let Ok(mut p) = progress_clone.lock() {
608                                p.state_mut()
609                                    .set_worker_active(thread_id, &pathname);
610                            }
611
612                            let result = Self::scan_pkgpath_with(
613                                config, sandbox, pkgpath,
614                            );
615
616                            // Update progress counter
617                            if let Ok(mut p) = progress_clone.lock() {
618                                p.state_mut().set_worker_idle(thread_id);
619                                if result.is_ok() {
620                                    p.state_mut().increment_completed();
621                                } else {
622                                    p.state_mut().increment_failed();
623                                }
624                            }
625
626                            // Send result (blocks if buffer full = backpressure)
627                            let _ = tx.send((pkgpath.clone(), result));
628                        });
629                    });
630                    drop(tx);
631                });
632
633                // Check if we were interrupted during parallel processing
634                let was_interrupted = shutdown_flag.load(Ordering::Relaxed);
635
636                /*
637                 * Process results - write to DB and extract dependencies.
638                 */
639                for (pkgpath, result) in rx {
640                    let scanpkgs = match result {
641                        Ok(pkgs) => pkgs,
642                        Err(e) => {
643                            self.scan_failures
644                                .push((pkgpath.clone(), e.to_string()));
645                            self.done.insert(pkgpath);
646                            continue;
647                        }
648                    };
649                    self.done.insert(pkgpath.clone());
650
651                    // Save to database
652                    if !scanpkgs.is_empty() {
653                        if let Err(e) = db
654                            .store_scan_pkgpath(&pkgpath.to_string(), &scanpkgs)
655                        {
656                            error!(error = %e, "Failed to store scan results");
657                        }
658                    }
659
660                    // Skip dependency discovery for full tree scans (all
661                    // packages already discovered) or if interrupted
662                    if self.full_tree || was_interrupted {
663                        continue;
664                    }
665
666                    // Discover dependencies not yet seen
667                    for pkg in &scanpkgs {
668                        if let Some(ref all_deps) = pkg.all_depends {
669                            for dep in all_deps {
670                                let dep_path = dep.pkgpath();
671                                if self.done.contains(dep_path)
672                                    || new_incoming.contains(dep_path)
673                                {
674                                    continue;
675                                }
676                                // Check database for cached dependency
677                                match db
678                                    .is_pkgpath_scanned(&dep_path.to_string())
679                                {
680                                    Ok(true) => {
681                                        self.done.insert(dep_path.clone());
682                                        if let Ok(mut p) = progress.lock() {
683                                            p.state_mut().total += 1;
684                                            p.state_mut().cached += 1;
685                                        }
686                                    }
687                                    Ok(false) => {
688                                        new_incoming.insert(dep_path.clone());
689                                        if let Ok(mut p) = progress.lock() {
690                                            p.state_mut().total += 1;
691                                        }
692                                    }
693                                    Err(_) => {}
694                                }
695                            }
696                        }
697                    }
698                }
699            });
700
701            // Check for interruption after batch
702            if shutdown_flag.load(Ordering::Relaxed) {
703                stop_refresh.store(true, Ordering::Relaxed);
704                if let Ok(mut p) = progress.lock() {
705                    let _ = p.finish_interrupted();
706                }
707                interrupted = true;
708                break;
709            }
710
711            /*
712             * We're finished with the current incoming, replace it with the
713             * new incoming list.  If it is empty then we've already processed
714             * all known PKGPATHs and are done.
715             */
716            self.incoming = new_incoming;
717        }
718
719        // Commit transaction (partial on interrupt, full on success)
720        db.commit()?;
721
722        // Stop the refresh thread and print final summary
723        stop_refresh.store(true, Ordering::Relaxed);
724        let _ = refresh_thread.join();
725
726        // Only call finish() for normal completion; finish_interrupted()
727        // was already called immediately when interrupt was detected
728        if !interrupted {
729            if let Ok(mut p) = progress.lock() {
730                let _ = p.finish();
731            }
732        }
733
734        if self.sandbox.enabled() {
735            self.cleanup_sandbox(script_envs)?;
736        }
737
738        if interrupted {
739            return Ok(true);
740        }
741
742        Ok(false)
743    }
744
745    /// Run post-build cleanup and destroy the scan sandbox.
746    fn cleanup_sandbox(
747        &self,
748        envs: Vec<(String, String)>,
749    ) -> anyhow::Result<()> {
750        if let Some(post_build) = self.config.script("post-build") {
751            debug!("Running post-build script");
752            let child =
753                self.sandbox.execute(0, post_build, envs, None, None)?;
754            let output = child
755                .wait_with_output()
756                .context("Failed to wait for post-build")?;
757            if !output.status.success() {
758                let stderr = String::from_utf8_lossy(&output.stderr);
759                error!(exit_code = ?output.status.code(), stderr = %stderr, "post-build script failed");
760            }
761        }
762        self.sandbox.destroy(0)
763    }
764
765    /// Returns scan failures as formatted error strings.
766    pub fn scan_errors(&self) -> impl Iterator<Item = &str> {
767        self.scan_failures.iter().map(|(_, e)| e.as_str())
768    }
769
770    /// Returns scan failures with pkgpath information.
771    pub fn scan_failures(&self) -> &[(PkgPath, String)] {
772        &self.scan_failures
773    }
774
775    /**
776     * Scan a single PKGPATH, returning a [`Vec`] of [`ScanIndex`] results,
777     * as multi-version packages may return multiple results.
778     */
779    pub fn scan_pkgpath(
780        &self,
781        pkgpath: &PkgPath,
782    ) -> anyhow::Result<Vec<ScanIndex>> {
783        Self::scan_pkgpath_with(&self.config, &self.sandbox, pkgpath)
784    }
785
786    /// Scan a single PKGPATH using provided config and sandbox references.
787    /// This allows scanning without borrowing all of `self`.
788    fn scan_pkgpath_with(
789        config: &Config,
790        sandbox: &Sandbox,
791        pkgpath: &PkgPath,
792    ) -> anyhow::Result<Vec<ScanIndex>> {
793        let pkgpath_str = pkgpath.as_path().display().to_string();
794        debug!(pkgpath = %pkgpath_str, "Scanning package");
795
796        let bmake = config.make().display().to_string();
797        let pkgsrcdir = config.pkgsrc().display().to_string();
798        let script = format!(
799            "cd {}/{} && {} pbulk-index\n",
800            pkgsrcdir, pkgpath_str, bmake
801        );
802
803        let scan_env = config.scan_env();
804        trace!(pkgpath = %pkgpath_str,
805            script = %script,
806            scan_env = ?scan_env,
807            "Executing pkg-scan"
808        );
809        let child = sandbox.execute_script(0, &script, scan_env)?;
810        let output = child.wait_with_output()?;
811
812        if !output.status.success() {
813            let stderr = String::from_utf8_lossy(&output.stderr);
814            error!(pkgpath = %pkgpath_str,
815                exit_code = ?output.status.code(),
816                stderr = %stderr,
817                "pkg-scan script failed"
818            );
819            let stderr = stderr.trim();
820            let msg = if stderr.is_empty() {
821                format!("Scan failed for {}", pkgpath_str)
822            } else {
823                format!("Scan failed for {}: {}", pkgpath_str, stderr)
824            };
825            bail!(msg);
826        }
827
828        let stdout_str = String::from_utf8_lossy(&output.stdout);
829        trace!(pkgpath = %pkgpath_str,
830            stdout_len = stdout_str.len(),
831            stdout = %stdout_str,
832            "pkg-scan script output"
833        );
834
835        let reader = BufReader::new(&output.stdout[..]);
836        let all_results: Vec<ScanIndex> =
837            ScanIndex::from_reader(reader).collect::<Result<_, _>>()?;
838
839        /*
840         * Filter to keep only the first occurrence of each PKGNAME.
841         * For multi-version packages, pbulk-index returns the *_DEFAULT
842         * version first, which is the one we want.
843         */
844        let mut seen_pkgnames = HashSet::new();
845        let mut index: Vec<ScanIndex> = Vec::new();
846        for pkg in all_results {
847            if seen_pkgnames.insert(pkg.pkgname.clone()) {
848                index.push(pkg);
849            }
850        }
851
852        info!(pkgpath = %pkgpath_str,
853            packages_found = index.len(),
854            "Scan complete for pkgpath"
855        );
856
857        /*
858         * Set PKGPATH (PKG_LOCATION) as for some reason pbulk-index doesn't.
859         */
860        for pkg in &mut index {
861            pkg.pkg_location = Some(pkgpath.clone());
862            debug!(pkgpath = %pkgpath_str,
863                pkgname = %pkg.pkgname.pkgname(),
864                skip_reason = ?pkg.pkg_skip_reason,
865                fail_reason = ?pkg.pkg_fail_reason,
866                depends_count = pkg.all_depends.as_ref().map_or(0, |v| v.len()),
867                "Found package in scan"
868            );
869        }
870
871        Ok(index)
872    }
873
874    /**
875     * Resolve the list of scanned packages, by ensuring all of the [`Depend`]
876     * patterns in `all_depends` match a found package, and that there are no
877     * circular dependencies.  The best match for each is stored in the
878     * `depends` for the package in question.
879     *
880     * Return a [`ScanResult`] containing buildable packages and skipped packages.
881     *
882     * Also stores resolved dependencies in the database for fast reverse lookups.
883     */
884    pub fn resolve(&mut self, db: &crate::db::Database) -> Result<ScanResult> {
885        info!(
886            done_pkgpaths = self.done.len(),
887            "Starting dependency resolution"
888        );
889
890        // Load all scan data in one query
891        let all_scan_data = db.get_all_scan_indexes()?;
892
893        /*
894         * Populate the resolved hash with ALL packages first, including those
895         * with skip/fail reasons. This allows us to resolve dependencies for
896         * all packages before separating them.
897         */
898        let mut pkgnames: indexmap::IndexSet<PkgName> =
899            indexmap::IndexSet::new();
900
901        // Track which packages have skip/fail reasons
902        let mut skip_reasons: HashMap<PkgName, SkipReason> = HashMap::new();
903
904        // Track package_id for storing resolved dependencies
905        let mut pkgname_to_id: HashMap<PkgName, i64> = HashMap::new();
906
907        // Process all scan data, consuming to avoid clones
908        for (pkg_id, pkg) in all_scan_data {
909            debug!(pkgpath = ?pkg.pkg_location,
910                pkgname = %pkg.pkgname.pkgname(),
911                "Processing package"
912            );
913
914            // Skip duplicate PKGNAMEs - keep only the first (preferred)
915            // variant for multi-version packages.
916            if pkgnames.contains(&pkg.pkgname) {
917                debug!(pkgname = %pkg.pkgname.pkgname(),
918                    multi_version = ?pkg.multi_version,
919                    "Skipping duplicate PKGNAME"
920                );
921                continue;
922            }
923
924            // Track skip/fail reasons but still add to resolved
925            if let Some(reason) = &pkg.pkg_skip_reason {
926                if !reason.is_empty() {
927                    info!(pkgname = %pkg.pkgname.pkgname(),
928                        reason = %reason,
929                        "Package has PKG_SKIP_REASON"
930                    );
931                    skip_reasons.insert(
932                        pkg.pkgname.clone(),
933                        SkipReason::PkgSkipReason(reason.clone()),
934                    );
935                }
936            }
937            if let Some(reason) = &pkg.pkg_fail_reason {
938                if !reason.is_empty()
939                    && !skip_reasons.contains_key(&pkg.pkgname)
940                {
941                    info!(pkgname = %pkg.pkgname.pkgname(),
942                        reason = %reason,
943                        "Package has PKG_FAIL_REASON"
944                    );
945                    skip_reasons.insert(
946                        pkg.pkgname.clone(),
947                        SkipReason::PkgFailReason(reason.clone()),
948                    );
949                }
950            }
951
952            pkgname_to_id.insert(pkg.pkgname.clone(), pkg_id);
953            debug!(pkgname = %pkg.pkgname.pkgname(),
954                "Adding package to resolved set"
955            );
956            pkgnames.insert(pkg.pkgname.clone());
957            self.resolved.insert(
958                pkg.pkgname.clone(),
959                ResolvedIndex::from_scan_index(pkg),
960            );
961        }
962
963        info!(
964            resolved_count = self.resolved.len(),
965            skip_reasons_count = skip_reasons.len(),
966            "Initial resolution complete"
967        );
968
969        /*
970         * Build a hashmap of pkgbase -> Vec<&PkgName> for efficient lookups.
971         * For Dewey patterns with a known pkgbase, we can directly look up
972         * candidates instead of iterating through all packages.
973         */
974        let pkgbase_map: HashMap<&str, Vec<&PkgName>> = {
975            let mut map: HashMap<&str, Vec<&PkgName>> = HashMap::new();
976            for pkgname in &pkgnames {
977                map.entry(pkgname.pkgbase()).or_default().push(pkgname);
978            }
979            map
980        };
981
982        /*
983         * Keep a cache of best Depend => PkgName matches we've already seen
984         * as it's likely the same patterns will be used in multiple places.
985         */
986        let mut match_cache: HashMap<Depend, PkgName> = HashMap::new();
987
988        /*
989         * Track packages to skip due to skipped dependencies, and
990         * unresolved dependency errors (callers decide if these are fatal).
991         */
992        let mut skip_due_to_dep: HashMap<PkgName, String> = HashMap::new();
993        let mut errors: Vec<String> = Vec::new();
994
995        // Helper to check if a dependency pattern is already satisfied
996        let is_satisfied = |depends: &[PkgName], pattern: &pkgsrc::Pattern| {
997            depends.iter().any(|existing| pattern.matches(existing.pkgname()))
998        };
999
1000        for pkg in self.resolved.values_mut() {
1001            let all_deps = match pkg.all_depends.take() {
1002                Some(deps) => deps,
1003                None => continue,
1004            };
1005            for depend in all_deps.iter() {
1006                // Check for cached DEPENDS match first. If found, use it
1007                // (but only add if the pattern isn't already satisfied).
1008                if let Some(pkgname) = match_cache.get(depend) {
1009                    if !is_satisfied(&pkg.depends, depend.pattern())
1010                        && !pkg.depends.contains(pkgname)
1011                    {
1012                        pkg.depends.push(pkgname.clone());
1013                    }
1014                    continue;
1015                }
1016                /*
1017                 * Find best DEPENDS match out of all known PKGNAME.
1018                 * Collect all candidates that match the pattern.
1019                 *
1020                 * Use pkgbase hashmap for efficient lookups when pattern
1021                 * has a known pkgbase, otherwise fall back to full scan.
1022                 */
1023                let candidates: Vec<&PkgName> = if let Some(base) =
1024                    depend.pattern().pkgbase()
1025                {
1026                    match pkgbase_map.get(base) {
1027                        Some(v) => v
1028                            .iter()
1029                            .filter(|c| depend.pattern().matches(c.pkgname()))
1030                            .copied()
1031                            .collect(),
1032                        None => Vec::new(),
1033                    }
1034                } else {
1035                    pkgnames
1036                        .iter()
1037                        .filter(|c| depend.pattern().matches(c.pkgname()))
1038                        .collect()
1039                };
1040
1041                // Find best match among all candidates using pbulk algorithm:
1042                // higher version wins, larger name on tie.
1043                let mut best: Option<&PkgName> = None;
1044                let mut match_error: Option<pkgsrc::PatternError> = None;
1045                for candidate in candidates {
1046                    best = match best {
1047                        None => Some(candidate),
1048                        Some(current) => {
1049                            match depend.pattern().best_match_pbulk(
1050                                current.pkgname(),
1051                                candidate.pkgname(),
1052                            ) {
1053                                Ok(Some(m)) if m == candidate.pkgname() => {
1054                                    Some(candidate)
1055                                }
1056                                Ok(_) => Some(current),
1057                                Err(e) => {
1058                                    match_error = Some(e);
1059                                    break;
1060                                }
1061                            }
1062                        }
1063                    };
1064                }
1065                if let Some(e) = match_error {
1066                    let reason = format!(
1067                        "pattern error for {}: {}",
1068                        depend.pattern().pattern(),
1069                        e
1070                    );
1071                    errors.push(format!(
1072                        "{} in {}",
1073                        reason,
1074                        pkg.pkgname.pkgname()
1075                    ));
1076                    if !skip_reasons.contains_key(&pkg.pkgname) {
1077                        pkg.pkg_fail_reason = Some(format!("\"{}\"", reason));
1078                        skip_reasons.insert(
1079                            pkg.pkgname.clone(),
1080                            SkipReason::PkgFailReason(reason),
1081                        );
1082                    }
1083                    continue;
1084                }
1085                // If found, save to cache and add to depends (if not already satisfied)
1086                if let Some(pkgname) = best {
1087                    if !is_satisfied(&pkg.depends, depend.pattern())
1088                        && !pkg.depends.contains(pkgname)
1089                    {
1090                        pkg.depends.push(pkgname.clone());
1091                    }
1092                    match_cache.insert(depend.clone(), pkgname.clone());
1093                } else {
1094                    // No matching package exists
1095                    let pattern = depend.pattern().pattern().to_string();
1096                    pkg.has_unresolved_dep = true;
1097                    errors.push(format!(
1098                        "No match found for {} in {}",
1099                        pattern,
1100                        pkg.pkgname.pkgname()
1101                    ));
1102                    if !skip_reasons.contains_key(&pkg.pkgname) {
1103                        let reason = format!(
1104                            "could not resolve dependency \"{}\"",
1105                            pattern
1106                        );
1107                        pkg.pkg_fail_reason = Some(format!("\"{}\"", reason));
1108                        skip_reasons.insert(
1109                            pkg.pkgname.clone(),
1110                            SkipReason::UnresolvedDependency(pattern),
1111                        );
1112                    }
1113                }
1114            }
1115            // Restore all_depends for output formatting
1116            pkg.all_depends = Some(all_deps);
1117        }
1118
1119        /*
1120         * Iteratively propagate skips: if A depends on B, and B is now
1121         * marked to skip, then A should also be skipped.
1122         */
1123        loop {
1124            let mut new_skips: HashMap<PkgName, String> = HashMap::new();
1125
1126            for pkg in self.resolved.values() {
1127                if skip_due_to_dep.contains_key(&pkg.pkgname)
1128                    || skip_reasons.contains_key(&pkg.pkgname)
1129                {
1130                    continue;
1131                }
1132                for dep in &pkg.depends {
1133                    if skip_due_to_dep.contains_key(dep)
1134                        || skip_reasons.contains_key(dep)
1135                    {
1136                        // Our dependency is being skipped
1137                        new_skips.insert(
1138                            pkg.pkgname.clone(),
1139                            format!("Dependency {} skipped", dep.pkgname()),
1140                        );
1141                        break;
1142                    }
1143                }
1144            }
1145
1146            if new_skips.is_empty() {
1147                break;
1148            }
1149            skip_due_to_dep.extend(new_skips);
1150        }
1151
1152        // Merge skip_due_to_dep into skip_reasons
1153        for (pkgname, reason) in skip_due_to_dep.iter() {
1154            if !skip_reasons.contains_key(pkgname) {
1155                skip_reasons.insert(
1156                    pkgname.clone(),
1157                    SkipReason::PkgSkipReason(reason.clone()),
1158                );
1159            }
1160        }
1161
1162        // Build all_ordered first to preserve original order, then separate
1163        let mut all_ordered: Vec<(ResolvedIndex, Option<SkipReason>)> =
1164            Vec::new();
1165        let mut buildable: IndexMap<PkgName, ResolvedIndex> = IndexMap::new();
1166        let mut skipped: Vec<SkippedPackage> = Vec::new();
1167
1168        for (pkgname, index) in std::mem::take(&mut self.resolved) {
1169            let reason = skip_reasons.remove(&pkgname);
1170            if let Some(r) = reason {
1171                // Skipped: extract metadata, then move index to all_ordered
1172                skipped.push(SkippedPackage {
1173                    pkgname: index.pkgname.clone(),
1174                    pkgpath: index.pkg_location.clone(),
1175                    reason: r.clone(),
1176                });
1177                all_ordered.push((index, Some(r)));
1178            } else {
1179                // Buildable: clone for all_ordered, move to buildable
1180                all_ordered.push((index.clone(), None));
1181                buildable.insert(pkgname, index);
1182            }
1183        }
1184
1185        /*
1186         * Verify that the graph is acyclic (only for buildable packages).
1187         */
1188        debug!(
1189            buildable_count = buildable.len(),
1190            "Checking for circular dependencies"
1191        );
1192        let mut graph = DiGraphMap::new();
1193        for (pkgname, index) in &buildable {
1194            for dep in &index.depends {
1195                graph.add_edge(dep.pkgname(), pkgname.pkgname(), ());
1196            }
1197        }
1198        let cycle_error = find_cycle(&graph).map(|cycle| {
1199            let mut err = "Circular dependencies detected:\n".to_string();
1200            for n in cycle.iter().rev() {
1201                err.push_str(&format!("\t{}\n", n));
1202            }
1203            err.push_str(&format!("\t{}", cycle.last().unwrap()));
1204            error!(cycle = ?cycle, "Circular dependency detected");
1205            err
1206        });
1207
1208        info!(
1209            buildable_count = buildable.len(),
1210            skipped_count = skipped.len(),
1211            "Resolution complete"
1212        );
1213
1214        // Log all buildable packages
1215        for pkgname in buildable.keys() {
1216            debug!(pkgname = %pkgname.pkgname(), "Package is buildable");
1217        }
1218
1219        // Convert scan failures to ScanFailure structs
1220        let scan_failed: Vec<ScanFailure> = self
1221            .scan_failures
1222            .iter()
1223            .map(|(pkgpath, error)| ScanFailure {
1224                pkgpath: pkgpath.clone(),
1225                error: error.clone(),
1226            })
1227            .collect();
1228
1229        // Log errors but don't bail - let callers decide how to handle them
1230        for err in &errors {
1231            error!(error = %err, "Unresolved dependency");
1232        }
1233
1234        let result =
1235            ScanResult { buildable, skipped, scan_failed, all_ordered, errors };
1236
1237        if let Some(err) = cycle_error {
1238            bail!(err);
1239        }
1240
1241        // Store resolved dependencies in database for fast reverse lookups
1242        let mut resolved_deps: Vec<(i64, i64)> = Vec::new();
1243        for (pkgname, index) in &result.buildable {
1244            if let Some(&pkg_id) = pkgname_to_id.get(pkgname) {
1245                for dep in &index.depends {
1246                    if let Some(&dep_id) = pkgname_to_id.get(dep) {
1247                        resolved_deps.push((pkg_id, dep_id));
1248                    }
1249                }
1250            }
1251        }
1252        if !resolved_deps.is_empty() {
1253            db.store_resolved_dependencies_batch(&resolved_deps)?;
1254            debug!(count = resolved_deps.len(), "Stored resolved dependencies");
1255        }
1256
1257        Ok(result)
1258    }
1259}
1260
1261pub fn find_cycle<'a>(
1262    graph: &'a DiGraphMap<&'a str, ()>,
1263) -> Option<Vec<&'a str>> {
1264    let mut visited = HashSet::new();
1265    let mut in_stack = HashSet::new();
1266    let mut stack = Vec::new();
1267
1268    for node in graph.nodes() {
1269        if visited.contains(&node) {
1270            continue;
1271        }
1272        if let Some(cycle) =
1273            dfs(graph, node, &mut visited, &mut stack, &mut in_stack)
1274        {
1275            return Some(cycle);
1276        }
1277    }
1278    None
1279}
1280
1281fn dfs<'a>(
1282    graph: &'a DiGraphMap<&'a str, ()>,
1283    node: &'a str,
1284    visited: &mut HashSet<&'a str>,
1285    stack: &mut Vec<&'a str>,
1286    in_stack: &mut HashSet<&'a str>,
1287) -> Option<Vec<&'a str>> {
1288    visited.insert(node);
1289    stack.push(node);
1290    in_stack.insert(node);
1291    for neighbor in graph.neighbors(node) {
1292        if in_stack.contains(neighbor) {
1293            if let Some(pos) = stack.iter().position(|&n| n == neighbor) {
1294                return Some(stack[pos..].to_vec());
1295            }
1296        } else if !visited.contains(neighbor) {
1297            let cycle = dfs(graph, neighbor, visited, stack, in_stack);
1298            if cycle.is_some() {
1299                return cycle;
1300            }
1301        }
1302    }
1303    stack.pop();
1304    in_stack.remove(node);
1305    None
1306}