Skip to main content

bob/
scan.rs

1/*
2 * Copyright (c) 2026 Jonathan Perkin <jonathan@perkin.org.uk>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17/*!
18 * Package dependency scanning and resolution.
19 *
20 * This module provides the [`Scan`] struct for discovering package dependencies
21 * and building a directed acyclic graph (DAG) for build ordering.
22 *
23 * # Scan Process
24 *
25 * 1. Create a scan sandbox
26 * 2. Run `make pbulk-index` on each package to discover dependencies
27 * 3. Recursively discover all transitive dependencies
28 * 4. Resolve dependency patterns to specific package versions
29 * 5. Verify no circular dependencies exist
30 * 6. Return buildable and skipped package lists
31 *
32 * # Skip Reasons
33 *
34 * Packages may be skipped for several reasons:
35 *
36 * - `PKG_SKIP_REASON` - Package explicitly marked to skip on this platform
37 * - `PKG_FAIL_REASON` - Package expected to fail on this platform
38 * - Unresolved dependencies - Required dependency not found
39 * - Circular dependencies - Package has a dependency cycle
40 */
41
42use crate::config::{Pkgsrc, PkgsrcEnv};
43use crate::sandbox::{SandboxScope, wait_output_with_shutdown, wait_parse_with_shutdown};
44use crate::tui::format_duration;
45use crate::{Config, Interrupted, RunState, Sandbox};
46use crate::{PackageCounts, PackageState};
47use anyhow::{Context, Result, bail};
48use indexmap::IndexMap;
49use petgraph::algo::tarjan_scc;
50use petgraph::graph::DiGraph;
51use pkgsrc::{Pattern, PatternCache, PkgName, PkgPath, ScanIndex};
52use rayon::prelude::*;
53use std::collections::{HashMap, HashSet};
54use std::sync::Arc;
55use std::time::Instant;
56use tracing::{debug, error, info, info_span, trace, warn};
57
58/// A successfully resolved package that is ready to build.
59#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
60pub struct ResolvedPackage {
61    /// The scan index data including resolved dependencies.
62    pub index: ScanIndex,
63    /// Package path.
64    pub pkgpath: PkgPath,
65}
66
67impl ResolvedPackage {
68    /// Returns the package name.
69    pub fn pkgname(&self) -> &PkgName {
70        &self.index.pkgname
71    }
72
73    /// Returns resolved dependencies.
74    pub fn depends(&self) -> &[PkgName] {
75        self.index.depends()
76    }
77
78    /// Whether this package is part of the pkgsrc bootstrap.
79    pub fn bootstrap_pkg(&self) -> bool {
80        self.index
81            .bootstrap_pkg
82            .as_ref()
83            .is_some_and(|b| b.is_bootstrap())
84    }
85
86    /// Returns usergroup_phase if set.
87    pub fn usergroup_phase(&self) -> Option<&str> {
88        self.index.usergroup_phase.as_deref()
89    }
90
91    /// Returns multi_version if set.
92    pub fn multi_version(&self) -> Option<&[String]> {
93        self.index.multi_version.as_deref()
94    }
95
96    /// Returns PBULK_WEIGHT, defaulting to 100 if missing.
97    pub fn pbulk_weight(&self) -> usize {
98        self.index.pbulk_weight.map_or(100, |w| w as usize)
99    }
100}
101
102impl std::fmt::Display for ResolvedPackage {
103    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104        write!(f, "{}", self.index.presolve())
105    }
106}
107
108/// Result of scanning/resolving a single package.
109#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
110pub enum ScanResult {
111    /// Package is buildable.
112    Buildable(ResolvedPackage),
113    /// Package was skipped for a reason.
114    Skipped {
115        /// Package path.
116        pkgpath: PkgPath,
117        /// Package state.
118        state: PackageState,
119        /// Human-readable reason.  Populated for [`PackageState::Unresolved`]
120        /// (the multi-line list of unresolvable dependency patterns); `None`
121        /// otherwise.  Other skip kinds source their reason from the
122        /// `scan_index` table (`pkg_skip_reason` / `pkg_fail_reason`).
123        reason: Option<String>,
124        /// Scan index if available (present for most skipped packages).
125        /// `index.resolved_depends` holds the resolved deps for the package,
126        /// including partial resolutions when `state` is `Unresolved`.
127        index: Option<ScanIndex>,
128    },
129    /// Package failed to scan (bmake pbulk-index failed).
130    ScanFail {
131        /// Package path.
132        pkgpath: PkgPath,
133        /// Error message.
134        error: String,
135    },
136}
137
138impl ScanResult {
139    /// Returns the package path.
140    pub fn pkgpath(&self) -> &PkgPath {
141        match self {
142            ScanResult::Buildable(pkg) => &pkg.pkgpath,
143            ScanResult::Skipped { pkgpath, .. } => pkgpath,
144            ScanResult::ScanFail { pkgpath, .. } => pkgpath,
145        }
146    }
147
148    /// Returns the package name if available.
149    pub fn pkgname(&self) -> Option<&PkgName> {
150        match self {
151            ScanResult::Buildable(pkg) => Some(pkg.pkgname()),
152            ScanResult::Skipped { index, .. } => index.as_ref().map(|i| &i.pkgname),
153            ScanResult::ScanFail { .. } => None,
154        }
155    }
156
157    /// Returns the resolved package if buildable.
158    pub fn as_buildable(&self) -> Option<&ResolvedPackage> {
159        match self {
160            ScanResult::Buildable(pkg) => Some(pkg),
161            _ => None,
162        }
163    }
164
165    /// Returns resolved dependencies.
166    pub fn depends(&self) -> &[PkgName] {
167        match self {
168            ScanResult::Buildable(pkg) => pkg.depends(),
169            ScanResult::Skipped {
170                index: Some(idx), ..
171            } => idx.depends(),
172            _ => &[],
173        }
174    }
175}
176
177impl std::fmt::Display for ScanResult {
178    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
179        match self {
180            ScanResult::Buildable(pkg) => write!(f, "{}", pkg),
181            ScanResult::Skipped { index, pkgpath, .. } => {
182                if let Some(idx) = index {
183                    write!(f, "{}", idx.presolve())?;
184                } else {
185                    writeln!(f, "PKGPATH={}", pkgpath)?;
186                }
187                Ok(())
188            }
189            ScanResult::ScanFail { pkgpath, .. } => {
190                writeln!(f, "PKGPATH={}", pkgpath)
191            }
192        }
193    }
194}
195
196/// Result of scanning and resolving packages.
197///
198/// Returned by [`Scan::resolve`], contains all scanned packages with their outcomes.
199#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
200pub struct ScanSummary {
201    /// Number of unique package paths scanned.
202    pub pkgpaths: usize,
203    /// All packages in scan order with their outcomes.
204    pub packages: Vec<ScanResult>,
205}
206
207/// Counts of packages by state, plus buildable and scanfail totals.
208#[derive(Clone, Debug, Default)]
209pub struct ScanCounts {
210    /// Packages that are buildable.
211    pub buildable: usize,
212    /// Counts by [`PackageState`] variant.
213    pub states: PackageCounts,
214    /// Packages that failed to scan.
215    pub scanfail: usize,
216}
217
218impl ScanSummary {
219    /// Compute all counts in a single pass.
220    pub fn counts(&self) -> ScanCounts {
221        let mut c = ScanCounts::default();
222        for p in &self.packages {
223            match p {
224                ScanResult::Buildable(_) => c.buildable += 1,
225                ScanResult::Skipped { state, .. } => c.states.add(*state),
226                ScanResult::ScanFail { .. } => c.scanfail += 1,
227            }
228        }
229        c
230    }
231
232    /// Iterator over buildable packages.
233    pub fn buildable(&self) -> impl Iterator<Item = &ResolvedPackage> {
234        self.packages.iter().filter_map(|p| p.as_buildable())
235    }
236
237    /// Scan failures and unresolved dependency errors.
238    pub fn errors(&self) -> impl Iterator<Item = &str> {
239        self.packages.iter().filter_map(|p| match p {
240            ScanResult::ScanFail { error, .. } => Some(error.as_str()),
241            ScanResult::Skipped {
242                state: PackageState::Unresolved,
243                reason: Some(reason),
244                ..
245            } => Some(reason.as_str()),
246            _ => None,
247        })
248    }
249
250    /// Print the "Resolved N total packages..." line.
251    pub fn print_resolved(&self) {
252        println!(
253            "Resolved {} total packages from {} package paths",
254            self.packages.len(),
255            self.pkgpaths
256        );
257    }
258
259    /**
260     * Print package counts.
261     *
262     * If `up_to_date` is provided (i.e. the up-to-date check has run),
263     * the pending count is split into `pending` and `up-to-date`.
264     * Otherwise every buildable package is still in `Pending` by the
265     * state machine, so only `pending` is shown.
266     */
267    pub fn print_counts(&self, up_to_date: Option<usize>) {
268        use crate::PackageState;
269        use std::fmt::Write as _;
270        let c = self.counts();
271        let s = &c.states;
272        let pending_count = match up_to_date {
273            Some(n) => c.buildable.saturating_sub(n),
274            None => c.buildable,
275        };
276        let mut line = String::new();
277        let mut append = |n: usize, label: &str| {
278            if !line.is_empty() {
279                line.push_str(", ");
280            }
281            let _ = write!(line, "{n} {label}");
282        };
283        append(pending_count, PackageState::Pending.as_str());
284        if let Some(n) = up_to_date {
285            append(n, PackageState::UpToDate.as_str());
286        }
287        append(s.count(PackageState::is_skipped), "skipped");
288        append(s.count(PackageState::is_blocked), "blocked");
289        append(
290            s[PackageState::Unresolved],
291            PackageState::Unresolved.as_str(),
292        );
293        println!("{line}");
294    }
295}
296
297/**
298 * Package dependency scanner.
299 *
300 * Discovers packages and their dependencies by running `make pbulk-index`
301 * in each package directory, then resolves dependency patterns to specific
302 * package versions.
303 *
304 * Supports two modes:
305 * - **Full tree**: scans all packages in the pkgsrc tree (default).
306 * - **Limited**: scans only explicitly added packages and their transitive
307 *   dependencies, matching pbulk's `presolve` behaviour.
308 *
309 * Results are cached in the [`Database`](crate::Database) for resumable
310 * operation after interruption.
311 */
312#[derive(Debug, Default)]
313pub struct Scan {
314    config: Config,
315    sandbox: Sandbox,
316    incoming: HashSet<PkgPath>,
317    /// Pkgpaths we've completed scanning (in this session).
318    done: HashSet<PkgPath>,
319    /// Number of pkgpaths loaded from cache at start of scan.
320    initial_cached: usize,
321    /// Number of pkgpaths discovered as cached during dependency discovery.
322    discovered_cached: usize,
323    /// Full tree scan - discover all packages, skip recursive dependency discovery.
324    /// Defaults to true; set to false when packages are explicitly added.
325    full_tree: bool,
326    /// A previous full tree scan completed successfully.
327    full_scan_complete: bool,
328    /// Packages that failed to scan (pkgpath, error message).
329    scan_failures: Vec<(PkgPath, String)>,
330    /// Initial pkgpaths from limited_list (for deferred dependency discovery).
331    /// Only set for non-full-tree scans.
332    initial_pkgpaths: HashSet<PkgPath>,
333    /// Verbosity level for resolution warnings (0=quiet, 1=location, 2=multi).
334    verbosity: u8,
335    /// Sandbox ID allocated by the scope, set by `start()`.
336    sandbox_id: Option<usize>,
337}
338
339impl Scan {
340    pub fn new(config: &Config, pkgsrc: Option<&Pkgsrc>) -> Scan {
341        let sandbox = Sandbox::new(config, pkgsrc);
342        debug!(
343            scan_threads = config.scan_threads(),
344            "Created new Scan instance"
345        );
346        Scan {
347            config: config.clone(),
348            sandbox,
349            incoming: HashSet::new(),
350            done: HashSet::new(),
351            initial_cached: 0,
352            discovered_cached: 0,
353            full_tree: true,
354            full_scan_complete: false,
355            scan_failures: Vec::new(),
356            initial_pkgpaths: HashSet::new(),
357            verbosity: 0,
358            sandbox_id: None,
359        }
360    }
361
362    pub fn set_verbosity(&mut self, v: u8) {
363        self.verbosity = v;
364    }
365
366    pub fn add(&mut self, pkgpath: &PkgPath) {
367        debug!(pkgpath = %pkgpath.as_path().display(), "Adding package to scan queue");
368        self.full_tree = false;
369        self.incoming.insert(pkgpath.clone());
370        self.initial_pkgpaths.insert(pkgpath.clone());
371    }
372
373    /// Returns true if this is a full tree scan.
374    pub fn is_full_tree(&self) -> bool {
375        self.full_tree
376    }
377
378    /// Mark that a previous full tree scan completed successfully.
379    pub fn set_full_scan_complete(&mut self) {
380        self.full_scan_complete = true;
381    }
382
383    /// Initialize scan from database, checking what's already scanned.
384    /// Returns (cached_count, pending_deps_count) where pending_deps_count is the
385    /// number of dependencies discovered but not yet scanned (from interrupted scans).
386    pub fn init_from_db(&mut self, db: &crate::db::Database) -> Result<(usize, usize)> {
387        let scanned = db.get_scanned_pkgpaths()?;
388        let cached_count = scanned.len();
389        let mut pending_count = 0;
390
391        if cached_count > 0 {
392            info!(cached_count, "Found cached scan results in database");
393
394            // For full tree scans with full_scan_complete, we'll skip scanning
395            // For limited scans, remove already-scanned from incoming
396            if !self.full_tree {
397                self.incoming.retain(|p| !scanned.contains(&p.to_string()));
398            }
399
400            // Add scanned pkgpaths to done set
401            for pkgpath_str in &scanned {
402                if let Ok(pkgpath) = PkgPath::new(pkgpath_str) {
403                    self.done.insert(pkgpath);
404                }
405            }
406
407            /*
408             * For full tree scans, check for dependencies that were
409             * discovered but not yet scanned.  This handles resume
410             * after interrupt.
411             *
412             * For limited scans, the early-return check in start()
413             * calls find_missing_pkgpaths() instead, ensuring we only
414             * scan dependencies of active packages.
415             */
416            if self.full_tree {
417                let unscanned = db.get_unscanned_dependencies()?;
418                if !unscanned.is_empty() {
419                    info!(
420                        unscanned_count = unscanned.len(),
421                        "Found unscanned dependencies from interrupted scan"
422                    );
423                    for pkgpath_str in unscanned {
424                        if let Ok(pkgpath) = PkgPath::new(&pkgpath_str)
425                            && !self.done.contains(&pkgpath)
426                        {
427                            self.incoming.insert(pkgpath);
428                            pending_count += 1;
429                        }
430                    }
431                }
432            }
433        }
434
435        Ok((cached_count, pending_count))
436    }
437
438    /// Discover all packages in pkgsrc tree.
439    fn discover_packages(
440        &mut self,
441        pool: &rayon::ThreadPool,
442        shutdown: &RunState,
443        pkgsrc: &Pkgsrc,
444    ) -> anyhow::Result<()> {
445        println!("Discovering packages...");
446        let basedir = pkgsrc.basedir.display().to_string();
447
448        // Get top-level SUBDIR (categories + USER_ADDITIONAL_PKGS)
449        let child = self.sandbox.execute_command(
450            self.sandbox_id,
451            &pkgsrc.make,
452            ["-C", &basedir, "show-subdir-var", "VARNAME=SUBDIR"],
453            vec![],
454        )?;
455        let output =
456            wait_output_with_shutdown(child, shutdown).context("Failed to run show-subdir-var")?;
457
458        if !output.status.success() {
459            let stderr = String::from_utf8_lossy(&output.stderr);
460            bail!("Failed to get categories: {}", stderr);
461        }
462
463        let stdout = String::from_utf8_lossy(&output.stdout);
464        let entries: Vec<&str> = stdout.split_whitespace().collect();
465
466        // Separate USER_ADDITIONAL_PKGS (contain '/') from categories
467        let mut categories: Vec<&str> = Vec::new();
468        for entry in entries {
469            if entry.contains('/') {
470                if let Ok(pkgpath) = PkgPath::new(entry) {
471                    self.incoming.insert(pkgpath);
472                }
473            } else {
474                categories.push(entry);
475            }
476        }
477
478        // Process categories in parallel
479        let make = &pkgsrc.make;
480        let sandbox = &self.sandbox;
481        let sandbox_id = self.sandbox_id;
482        let discovered: Vec<PkgPath> = pool.install(|| {
483            categories
484                .par_iter()
485                .flat_map(|category| {
486                    let workdir = format!("{}/{}", basedir, category);
487                    let result = sandbox
488                        .execute_command(
489                            sandbox_id,
490                            make,
491                            [
492                                "-C",
493                                &workdir,
494                                "show-subdir-var",
495                                "VARNAME=SUBDIR",
496                            ],
497                            vec![],
498                        )
499                        .and_then(|c| wait_output_with_shutdown(c, shutdown));
500
501                    match result {
502                        Ok(o) if o.status.success() => {
503                            let pkgs = String::from_utf8_lossy(&o.stdout);
504                            pkgs.split_whitespace()
505                                .filter_map(|pkg| {
506                                    let path = format!("{}/{}", category, pkg);
507                                    PkgPath::new(&path).ok()
508                                })
509                                .collect::<Vec<_>>()
510                        }
511                        Ok(o) => {
512                            let stderr = String::from_utf8_lossy(&o.stderr);
513                            debug!(category = *category, %stderr, "Failed to get packages for category");
514                            vec![]
515                        }
516                        Err(e) => {
517                            debug!(category = *category, error = format!("{e:#}"), "Failed to run make in category");
518                            vec![]
519                        }
520                    }
521                })
522                .collect()
523        });
524
525        self.incoming.extend(discovered);
526
527        info!(
528            discovered = self.incoming.len(),
529            "Package discovery complete"
530        );
531        println!("Discovered {} package paths", self.incoming.len());
532
533        Ok(())
534    }
535
536    pub fn start(
537        &mut self,
538        db: &crate::db::Database,
539        scope: &mut SandboxScope,
540        pkgsrc: &Pkgsrc,
541    ) -> anyhow::Result<()> {
542        /*
543         * Adopt the scope's sandbox so the pkgsrc cell is shared.  After
544         * this, set_pkgsrc_env() on either sandbox is visible from both,
545         * which lets the scope's Drop run a correct post_build cleanup
546         * if scan exits via an error path.
547         */
548        self.sandbox = scope.sandbox().clone();
549
550        info!(
551            incoming_count = self.incoming.len(),
552            sandbox_enabled = self.sandbox.enabled(),
553            "Starting package scan"
554        );
555
556        let pool = rayon::ThreadPoolBuilder::new()
557            .num_threads(self.config.scan_threads())
558            .thread_name(|i| format!("scan-{i}"))
559            .build()
560            .context("Failed to build scan thread pool")?;
561
562        let shutdown_flag = scope.state().clone();
563
564        // For full tree scans where a previous scan completed, all packages
565        // are already cached - nothing to do.
566        if self.full_tree && self.full_scan_complete && !self.done.is_empty() {
567            println!("All {} package paths already scanned", self.done.len());
568            return Ok(());
569        }
570
571        /*
572         * For non-full-tree scans, prune already-cached packages from
573         * incoming before sandbox creation to avoid unnecessary sandbox
574         * create/destroy work.  If all initial packages are cached, check
575         * for unscanned dependencies (resume after interrupt) before
576         * deciding there's nothing to do.
577         */
578        if !self.full_tree {
579            self.incoming.retain(|p| !self.done.contains(p));
580            if self.incoming.is_empty() {
581                if let Ok(deps) = self.unscanned_deps(db) {
582                    self.incoming = deps;
583                }
584                if self.incoming.is_empty() {
585                    if !self.done.is_empty() {
586                        println!("All {} package paths already scanned", self.done.len());
587                    }
588                    return Ok(());
589                }
590            }
591        }
592
593        /*
594         * Only a single sandbox is required, 'make pbulk-index' can safely be
595         * run in parallel inside one sandbox.
596         *
597         * Ensure a sandbox exists. The caller manages overall lifecycle.
598         */
599        if scope.enabled() {
600            crate::print_status("Creating sandbox");
601            let start = Instant::now();
602            let result = scope.ensure(1).and_then(|ids| {
603                self.sandbox_id = ids.first().copied();
604                self.sandbox
605                    .run_pre_build(self.sandbox_id)
606                    .context("pre-build failed")?;
607                Ok(())
608            });
609            match result {
610                Ok(()) => crate::print_elapsed("Creating sandbox", start.elapsed()),
611                Err(e) => {
612                    crate::print_failed("Creating sandbox", start.elapsed());
613                    return Err(e);
614                }
615            }
616        }
617
618        let env = match db.load_pkgsrc_env() {
619            Ok(env) => env,
620            Err(_) => {
621                let env = PkgsrcEnv::fetch(pkgsrc, &self.sandbox, self.sandbox_id)?;
622                db.store_pkgsrc_env(&env)?;
623                let mut vcs_info = crate::vcs::VcsInfo::from_path(&pkgsrc.basedir);
624                if let Some(branch) = self.config.report_branch() {
625                    vcs_info.remote_branch = Some(branch.to_string());
626                }
627                db.store_vcs_info(&vcs_info)?;
628                env
629            }
630        };
631        self.sandbox.set_pkgsrc_env(env);
632
633        // For full tree scans, always discover all packages
634        if self.full_tree {
635            self.discover_packages(&pool, &shutdown_flag, pkgsrc)?;
636            self.incoming.retain(|p| !self.done.contains(p));
637        }
638
639        // Nothing to scan - all packages are cached
640        if self.incoming.is_empty() {
641            if !self.done.is_empty() {
642                println!("All {} package paths already scanned", self.done.len());
643            }
644
645            if scope.enabled() {
646                self.run_post_build();
647            }
648            return Ok(());
649        }
650
651        // Clear resolved dependencies since we're scanning new packages
652        db.clear_resolved_depends()?;
653
654        println!("Scanning packages...");
655
656        // Track initial cached count for final summary
657        self.initial_cached = self.done.len();
658
659        // Set up multi-line progress display using ratatui inline viewport
660        // Note: finished_title is unused since we print our own summary
661        let total_count = self.initial_cached + self.incoming.len();
662        let (progress, refresh) = crate::tui::start_progress(
663            "scan-refresh",
664            "Scanning",
665            "",
666            total_count,
667            self.config.scan_threads(),
668            self.config.tui(),
669            self.initial_cached,
670            0,
671            &shutdown_flag,
672        );
673
674        let mut db_error: Option<anyhow::Error> = None;
675
676        // Borrow config and sandbox separately for use in scanner thread,
677        // allowing main thread to mutate self.done, self.incoming, etc.
678        let sandbox = &self.sandbox;
679        let sandbox_id = self.sandbox_id;
680        let scan_env = self.scan_env();
681
682        /*
683         * For limited scans, prime incoming with any missing dependencies.
684         * This handles resume after interrupt where initial packages are
685         * already scanned but their dependencies are not.
686         */
687        if !self.full_tree
688            && self.incoming.is_empty()
689            && let Ok(deps) = self.unscanned_deps(db)
690        {
691            for pkgpath in deps {
692                self.incoming.insert(pkgpath);
693                if let Ok(mut p) = progress.lock() {
694                    p.state_mut().total += 1;
695                }
696            }
697        }
698
699        /*
700         * Continuously iterate over incoming queue, moving to done once
701         * processed, and adding any dependencies to incoming to be processed
702         * next.
703         */
704        let mut scanned_count: usize = 0;
705
706        loop {
707            // Check for interrupt (stop or shutdown).
708            if shutdown_flag.interrupted() {
709                break;
710            }
711
712            /*
713             * Convert the incoming HashSet into a Vec for parallel processing.
714             */
715            let pkgpaths: Vec<PkgPath> = self.incoming.drain().collect();
716            if pkgpaths.is_empty() {
717                break;
718            }
719
720            // Create bounded channel for streaming results
721            const CHANNEL_BUFFER_SIZE: usize = 128;
722            let (tx, rx) = std::sync::mpsc::sync_channel::<(PkgPath, Result<Vec<ScanIndex>>)>(
723                CHANNEL_BUFFER_SIZE,
724            );
725
726            let mut new_incoming: HashSet<PkgPath> = HashSet::new();
727
728            std::thread::scope(|s| {
729                // Spawn scanning thread
730                let progress_clone = Arc::clone(&progress);
731                let shutdown_clone = shutdown_flag.clone();
732                let pool_ref = &pool;
733                let scan_env_ref = &scan_env;
734
735                std::thread::Builder::new()
736                    .name("scan-dispatch".to_string())
737                    .spawn_scoped(s, move || {
738                        pool_ref.install(|| {
739                            pkgpaths.par_iter().for_each(|pkgpath| {
740                                // Check for interrupt before starting
741                                if shutdown_clone.interrupted() {
742                                    return;
743                                }
744
745                                let pathname = pkgpath.as_path().to_string_lossy().to_string();
746                                let thread_id = rayon::current_thread_index().unwrap_or(0);
747
748                                // Update progress - show current package
749                                if let Ok(mut p) = progress_clone.lock() {
750                                    p.state_mut().set_worker_active(thread_id, &pathname);
751                                    p.state_mut().increment_dispatched();
752                                }
753
754                                let result = Self::scan_pkgpath_with(
755                                    pkgsrc,
756                                    sandbox,
757                                    sandbox_id,
758                                    pkgpath,
759                                    scan_env_ref,
760                                    &shutdown_clone,
761                                );
762
763                                // Update progress counter
764                                if let Ok(mut p) = progress_clone.lock() {
765                                    p.state_mut().set_worker_idle(thread_id);
766                                    if result.is_ok() {
767                                        p.state_mut().increment_completed();
768                                    } else {
769                                        p.state_mut().increment_failed();
770                                    }
771                                }
772
773                                // Send result (blocks if buffer full = backpressure)
774                                let _ = tx.send((pkgpath.clone(), result));
775                            });
776                        });
777                        drop(tx);
778                    })
779                    .expect("failed to spawn thread");
780
781                /*
782                 * Process results and write to DB.
783                 */
784                for (pkgpath, result) in rx {
785                    scanned_count += 1;
786                    if let Ok(mut p) = progress.lock() {
787                        let total = p.state_mut().total.saturating_sub(p.state_mut().cached);
788                        p.print_progress_dot(scanned_count, total);
789                    }
790
791                    let scanpkgs = match result {
792                        Ok(pkgs) => pkgs,
793                        Err(e) => {
794                            self.scan_failures.push((pkgpath.clone(), e.to_string()));
795                            self.done.insert(pkgpath);
796                            continue;
797                        }
798                    };
799                    self.done.insert(pkgpath.clone());
800
801                    // Save to database
802                    if !scanpkgs.is_empty()
803                        && let Err(e) = db.store_scan_pkgpath(&pkgpath.to_string(), &scanpkgs)
804                    {
805                        error!(error = format!("{e:#}"), "Failed to store scan results");
806                        if db_error.is_none() {
807                            db_error = Some(e);
808                        }
809                    }
810                }
811            });
812
813            if let Ok(mut p) = progress.lock() {
814                let total = p.state_mut().total.saturating_sub(p.state_mut().cached);
815                p.flush_progress_dots(scanned_count, total);
816            }
817
818            // Check for interrupt after batch completes.
819            if shutdown_flag.interrupted() {
820                break;
821            }
822
823            // Don't start new waves if database writes are failing
824            if db_error.is_some() {
825                break;
826            }
827
828            /*
829             * We're finished with the current incoming, replace it with the
830             * new incoming list.  If it is empty then we've already processed
831             * all known PKGPATHs and are done.
832             *
833             * Filter out any pkgpaths that were already scanned this wave.
834             * This handles a race where dependency discovery finds a pkgpath
835             * before its parallel scan completes and adds it to done.
836             */
837            new_incoming.retain(|p| !self.done.contains(p));
838
839            /*
840             * For limited scans, check for missing dependency pkgpaths by
841             * doing a resolution pass. This matches pbulk's iterative
842             * approach where dependencies are only scanned if needed.
843             */
844            if !self.full_tree && new_incoming.is_empty() {
845                match self.unscanned_deps(db) {
846                    Ok(deps) if !deps.is_empty() => {
847                        let count = deps.len();
848                        for pkgpath in deps {
849                            new_incoming.insert(pkgpath);
850                            if let Ok(mut p) = progress.lock() {
851                                p.state_mut().total += 1;
852                            }
853                        }
854                        debug!(
855                            missing_count = count,
856                            "Discovered missing dependency pkgpaths"
857                        );
858                    }
859                    Err(e) => {
860                        warn!(error = format!("{e:#}"), "Failed to find missing pkgpaths");
861                    }
862                    _ => {}
863                }
864            }
865
866            self.incoming = new_incoming;
867        }
868
869        // Stop the refresh thread and print final summary
870        refresh.stop();
871
872        if !shutdown_flag.interrupted() {
873            // Get elapsed time and clean up TUI without printing generic summary
874            let elapsed = if let Ok(mut p) = progress.lock() {
875                p.finish_silent().ok()
876            } else {
877                None
878            };
879
880            // Print scan-specific summary from source of truth
881            // total = initial_cached + discovered_cached + actually_scanned
882            // where actually_scanned = succeeded + failed
883            let total = self.done.len();
884            let cached = self.initial_cached + self.discovered_cached;
885            let failed = self.scan_failures.len();
886            let succeeded = total.saturating_sub(cached).saturating_sub(failed);
887
888            let elapsed_str = elapsed
889                .map(format_duration)
890                .unwrap_or_else(|| "?".to_string());
891
892            if cached > 0 {
893                println!(
894                    "Scanned {} package paths in {} ({} scanned, {} cached, {} failed)",
895                    total, elapsed_str, succeeded, cached, failed
896                );
897            } else {
898                println!(
899                    "Scanned {} package paths in {} ({} succeeded, {} failed)",
900                    total, elapsed_str, succeeded, failed
901                );
902            }
903        }
904
905        if scope.enabled() {
906            self.run_post_build();
907        }
908
909        if shutdown_flag.interrupted() {
910            return Err(Interrupted.into());
911        }
912
913        if let Some(e) = db_error {
914            return Err(e.context("Failed to persist scan results to database"));
915        }
916
917        Ok(())
918    }
919
920    /// Run post-build operations (hook destroy actions + prefix cleanup).
921    fn run_post_build(&self) {
922        if let Err(e) = self.sandbox.run_post_build(self.sandbox_id) {
923            warn!(error = format!("{e:#}"), "post-build error");
924        }
925    }
926
927    /// Returns scan failures as formatted error strings.
928    pub fn scan_errors(&self) -> impl Iterator<Item = &str> {
929        self.scan_failures.iter().map(|(_, e)| e.as_str())
930    }
931
932    fn scan_env(&self) -> Vec<(String, String)> {
933        self.sandbox
934            .pkgsrc_env()
935            .map(|e| {
936                e.cachevars
937                    .iter()
938                    .map(|(k, v)| (k.clone(), v.clone()))
939                    .collect()
940            })
941            .unwrap_or_default()
942    }
943
944    fn unscanned_deps(&self, db: &crate::db::Database) -> Result<HashSet<PkgPath>> {
945        let missing = self.find_missing_pkgpaths(db)?;
946        Ok(missing
947            .into_iter()
948            .filter(|p| !self.done.contains(p))
949            .collect())
950    }
951
952    /*
953     * Scan a single PKGPATH using provided config and sandbox references.
954     * This allows scanning without borrowing all of `self`.
955     */
956    fn scan_pkgpath_with(
957        pkgsrc: &Pkgsrc,
958        sandbox: &Sandbox,
959        sandbox_id: Option<usize>,
960        pkgpath: &PkgPath,
961        scan_env: &[(String, String)],
962        shutdown: &RunState,
963    ) -> anyhow::Result<Vec<ScanIndex>> {
964        let pkgpath_str = pkgpath.as_path().display().to_string();
965        let span = info_span!("scan", pkgpath = %pkgpath_str);
966        let _guard = span.enter();
967        debug!("Scanning package");
968
969        let pkgsrcdir = pkgsrc.basedir.display().to_string();
970        let workdir = format!("{}/{}", pkgsrcdir, pkgpath_str);
971
972        trace!(%workdir, ?scan_env, "Executing pkg-scan");
973        let child = sandbox.execute_command(
974            sandbox_id,
975            &pkgsrc.make,
976            ["-C", &workdir, "pbulk-index"],
977            scan_env.to_vec(),
978        )?;
979
980        /*
981         * Parse output as the child produces it, keeping only the first
982         * occurrence of each PKGNAME.  For multi-version packages,
983         * pbulk-index returns the *_DEFAULT version first, which is the
984         * one we want.  Set PKGPATH (PKG_LOCATION) as for some reason
985         * pbulk-index doesn't.
986         */
987        let parse_pkgpath = pkgpath.clone();
988        let parse_span = tracing::Span::current();
989        let (status, index, stderr) = wait_parse_with_shutdown(child, shutdown, move |stdout| {
990            let _guard = parse_span.enter();
991            let mut seen_pkgnames = HashSet::new();
992            let mut index: Vec<ScanIndex> = Vec::new();
993            for pkg in ScanIndex::from_reader(stdout) {
994                let mut pkg = pkg?;
995                if !seen_pkgnames.insert(pkg.pkgname.clone()) {
996                    continue;
997                }
998                pkg.pkg_location = Some(parse_pkgpath.clone());
999                debug!(
1000                    pkgname = %pkg.pkgname.pkgname(),
1001                    skip_reason = ?pkg.pkg_skip_reason,
1002                    fail_reason = ?pkg.pkg_fail_reason,
1003                    depends_count = pkg.all_depends.as_ref().map_or(0, |v| v.iter().count()),
1004                    "Found package in scan"
1005                );
1006                index.push(pkg);
1007            }
1008            anyhow::Ok(index)
1009        })?;
1010
1011        if !status.success() {
1012            error!(exit_code = ?status.code(), %stderr, "pkg-scan script failed");
1013            let stderr = stderr.trim();
1014            let msg = if stderr.is_empty() {
1015                format!("Scan failed for {}", pkgpath_str)
1016            } else {
1017                format!("Scan failed for {}: {}", pkgpath_str, stderr)
1018            };
1019            bail!(msg);
1020        }
1021
1022        let index = index?;
1023        debug!(packages_found = index.len(), "Scan complete");
1024
1025        Ok(index)
1026    }
1027
1028    /**
1029     * Find dependency pkgpaths that need to be scanned to resolve all
1030     * dependencies.
1031     *
1032     * This is used in deferred dependency discovery mode. It does a
1033     * lightweight pass through scanned packages to find dependencies that
1034     * have no match yet. Returns the set of pkgpaths to scan next.
1035     *
1036     * Only packages from initial_pkgpaths (and their transitive dependencies
1037     * that have already been scanned) are considered.
1038     */
1039    fn find_missing_pkgpaths(&self, db: &crate::db::Database) -> Result<HashSet<PkgPath>> {
1040        /*
1041         * Build set of available pkgnames (first occurrence only, like
1042         * resolve), then iteratively expand an "active" set starting from
1043         * initial_pkgpaths. For each active package, try to match its
1044         * dependencies. If no match exists, add the dependency's pkgpath
1045         * to the missing set. If a match exists, add it to the active set.
1046         * Continue until no new packages are activated.
1047         */
1048        let mut packages: IndexMap<PkgName, ScanIndex> = IndexMap::new();
1049        db.with_scan_data(crate::db::ScanIndexFields::Resolve, |pull| {
1050            while let Some(pkg) = pull()? {
1051                if !packages.contains_key(&pkg.pkgname) {
1052                    packages.insert(pkg.pkgname.clone(), pkg);
1053                }
1054            }
1055            Ok(())
1056        })?;
1057
1058        let names: Vec<PkgName> = packages.keys().cloned().collect();
1059        let pkgbase_map = Self::build_pkgbase_map(&names);
1060
1061        let mut active_pkgnames: HashSet<PkgName> = HashSet::new();
1062        for pkg in packages.values() {
1063            if let Some(ref loc) = pkg.pkg_location
1064                && self.initial_pkgpaths.contains(loc)
1065            {
1066                active_pkgnames.insert(pkg.pkgname.clone());
1067            }
1068        }
1069
1070        let mut missing_pkgpaths: HashSet<PkgPath> = HashSet::new();
1071        let mut changed = true;
1072
1073        while changed {
1074            changed = false;
1075            let current_active: Vec<PkgName> = active_pkgnames.iter().cloned().collect();
1076
1077            for active_pkgname in current_active {
1078                let Some(pkg) = packages.get(&active_pkgname) else {
1079                    continue;
1080                };
1081                let Some(ref all_deps) = pkg.all_depends else {
1082                    continue;
1083                };
1084
1085                for depend in all_deps.depends() {
1086                    let depend = match depend {
1087                        Ok(d) => d,
1088                        Err(e) => {
1089                            warn!(
1090                                pkg = %pkg.pkgname.pkgname(),
1091                                error = format!("{e:#}"),
1092                                "Malformed dependency"
1093                            );
1094                            continue;
1095                        }
1096                    };
1097                    let candidates = Self::find_candidates(depend.pattern(), &pkgbase_map, &names);
1098
1099                    if candidates.is_empty() {
1100                        let dep_path = depend.pkgpath();
1101                        if !self.done.contains(dep_path) {
1102                            missing_pkgpaths.insert(dep_path.clone());
1103                        }
1104                    } else {
1105                        for &candidate in &candidates {
1106                            if !active_pkgnames.contains(&names[candidate]) {
1107                                active_pkgnames.insert(names[candidate].clone());
1108                                changed = true;
1109                            }
1110                        }
1111                    }
1112                }
1113            }
1114        }
1115
1116        debug!(
1117            missing_count = missing_pkgpaths.len(),
1118            active_count = active_pkgnames.len(),
1119            "Found missing dependency pkgpaths"
1120        );
1121
1122        Ok(missing_pkgpaths)
1123    }
1124
1125    /**
1126     * Build a map from pkgbase to matching PkgNames for efficient lookups.
1127     */
1128    fn build_pkgbase_map(names: &[PkgName]) -> HashMap<&str, Vec<usize>> {
1129        let mut map: HashMap<&str, Vec<usize>> = HashMap::new();
1130        for (id, pkgname) in names.iter().enumerate() {
1131            map.entry(pkgname.pkgbase()).or_default().push(id);
1132        }
1133        map
1134    }
1135
1136    /**
1137     * Find all packages matching a dependency pattern, as indices into
1138     * `names`.
1139     *
1140     * Uses pkgbase for efficient O(1) lookup when available, falling back to
1141     * iteration over all packages for patterns without a pkgbase (e.g., `p5-*`).
1142     */
1143    fn find_candidates(
1144        pattern: &Pattern,
1145        pkgbase_map: &HashMap<&str, Vec<usize>>,
1146        names: &[PkgName],
1147    ) -> Vec<usize> {
1148        if let Some(bases) = pattern.pkgbases() {
1149            let mut out = Vec::new();
1150            for base in bases {
1151                if let Some(v) = pkgbase_map.get(base) {
1152                    out.extend(
1153                        v.iter()
1154                            .filter(|&&id| pattern.matches(names[id].pkgname()))
1155                            .copied(),
1156                    );
1157                }
1158            }
1159            out
1160        } else {
1161            (0..names.len())
1162                .filter(|&id| pattern.matches(names[id].pkgname()))
1163                .collect()
1164        }
1165    }
1166
1167    /**
1168     * Find the best matching package for a dependency pattern.
1169     *
1170     * Uses pkgbase for efficient lookup when available, falling back
1171     * to all packages for patterns without a known base.  Matching
1172     * and version comparison are handled by a pbulk
1173     * [`BestMatch`](pkgsrc::pattern::BestMatch) accumulator.
1174     *
1175     * Returns:
1176     * - `Ok(Some(id))` - index into `names` of the best matching package
1177     * - `Ok(None)` - no candidates match the pattern
1178     * - `Err(e)` - version comparison error (malformed version)
1179     */
1180    fn find_best_match(
1181        pattern: &Pattern,
1182        pkgbase_map: &HashMap<&str, Vec<usize>>,
1183        names: &[PkgName],
1184    ) -> Result<Option<usize>, pkgsrc::PatternError> {
1185        let mut matcher = pattern.best_matcher_pbulk();
1186        let mut best_id: Option<usize> = None;
1187        if let Some(bases) = pattern.pkgbases() {
1188            for base in bases {
1189                if let Some(candidates) = pkgbase_map.get(base) {
1190                    for &id in candidates {
1191                        if matcher.consider(names[id].pkgname())? {
1192                            best_id = Some(id);
1193                        }
1194                    }
1195                }
1196            }
1197        } else {
1198            for (id, candidate) in names.iter().enumerate() {
1199                if matcher.consider(candidate.pkgname())? {
1200                    best_id = Some(id);
1201                }
1202            }
1203        }
1204        Ok(best_id)
1205    }
1206
1207    /**
1208     * Propagate failures through the dependency graph.
1209     *
1210     * If package A depends on B, and B has a skip reason, then A gets an
1211     * indirect skip reason matching the dependency's category:
1212     * - preskipped dep → indirect-preskipped
1213     * - prefailed dep → indirect-prefailed
1214     * - unresolved dep → indirect-unresolved
1215     *
1216     * Priority: prefailed > unresolved > preskipped (we want to report the
1217     * most severe blocker). Iterates until no new entries are added.
1218     */
1219    fn propagate_failures(depends: &[Vec<usize>], skip_reasons: &mut [Option<PackageState>]) {
1220        loop {
1221            let mut new_skip_reasons: Vec<(usize, PackageState)> = Vec::new();
1222            for (id, pkg_depends) in depends.iter().enumerate() {
1223                if skip_reasons[id].is_some() {
1224                    continue;
1225                }
1226                let mut blocking_reason: Option<PackageState> = None;
1227                for &dep in pkg_depends {
1228                    if let Some(dep_reason) = skip_reasons[dep] {
1229                        let indirect = dep_reason.indirect();
1230                        use PackageState::*;
1231                        let dominated = match blocking_reason {
1232                            None | Some(IndirectPreSkipped) => true,
1233                            Some(IndirectUnresolved) if indirect == IndirectPreFailed => true,
1234                            _ => false,
1235                        };
1236                        if dominated {
1237                            blocking_reason = Some(indirect);
1238                        }
1239                        if blocking_reason == Some(IndirectPreFailed) {
1240                            break;
1241                        }
1242                    }
1243                }
1244                if let Some(reason) = blocking_reason {
1245                    new_skip_reasons.push((id, reason));
1246                }
1247            }
1248            if new_skip_reasons.is_empty() {
1249                break;
1250            }
1251            for (id, reason) in new_skip_reasons {
1252                skip_reasons[id] = Some(reason);
1253            }
1254        }
1255    }
1256
1257    /**
1258     * Check for circular dependencies in buildable packages.
1259     *
1260     * Edges are `(dep, dependent)` pairs of indices into `names`.  Any
1261     * strongly connected group of packages, or a package depending on
1262     * itself, is an error listing every package in each group.
1263     */
1264    fn check_circular_deps(names: &[PkgName], edges: &[(u32, u32)]) -> Result<()> {
1265        let graph = DiGraph::<(), ()>::from_edges(edges.iter().copied());
1266        let mut groups: Vec<Vec<&PkgName>> = Vec::new();
1267        for scc in tarjan_scc(&graph) {
1268            if scc.len() > 1 || graph.find_edge(scc[0], scc[0]).is_some() {
1269                let mut group: Vec<&PkgName> = scc.iter().map(|n| &names[n.index()]).collect();
1270                group.sort_by(|a, b| a.pkgname().cmp(b.pkgname()));
1271                groups.push(group);
1272            }
1273        }
1274        if groups.is_empty() {
1275            return Ok(());
1276        }
1277        error!(?groups, "Circular dependencies detected");
1278        let blocks: Vec<String> = groups
1279            .iter()
1280            .map(|g| {
1281                g.iter()
1282                    .map(|n| format!("\t{}", n))
1283                    .collect::<Vec<_>>()
1284                    .join("\n")
1285            })
1286            .collect();
1287        bail!("Circular dependencies detected:\n{}", blocks.join("\n\n"));
1288    }
1289
1290    /**
1291     * Resolve dependency patterns to available package names.
1292     *
1293     * Takes scanned package data (from `make pbulk-index`) and resolves
1294     * dependency patterns like "perl>=5.0" to specific packages like
1295     * "perl-5.38.0". Returns a [`ScanSummary`] classifying each package as
1296     * Buildable, Skipped, or ScanFail.
1297     *
1298     * # Algorithm
1299     *
1300     * **Phase 1 - Load and classify**: Load all scan indexes from the
1301     * database. For each package, record any PKG_SKIP_REASON or
1302     * PKG_FAIL_REASON as a skip reason. For limited scans (non-full-tree),
1303     * seed the "active" set with packages from initial_pkgpaths.
1304     *
1305     * **Phase 2 - Setup lookups**: Build a pkgbase map for O(1) candidate
1306     * lookup by package base name (e.g., "perl" -> [perl-5.38.0, perl-5.36.0]).
1307     * Initialize a match cache to memoize resolved patterns.
1308     *
1309     * **Phase 3 - Resolution loop**: For each package (active packages only
1310     * for limited scans), resolve each dependency pattern:
1311     *   - Check the cache for a previous match
1312     *   - Find candidates via pkgbase map (fast) or full scan (for wildcards)
1313     *   - Select the best match using pbulk's version comparison rules
1314     *   - Record unresolved dependencies as skip reasons
1315     *   - For limited scans, activate matched dependencies and iterate until
1316     *     no new packages become active
1317     *
1318     * **Phase 4 - Propagate failures**: Walk the dependency graph to mark
1319     * packages with failed/skipped dependencies as IndirectFail/IndirectSkip.
1320     *
1321     * **Phase 5 - Check cycles**: Error if any buildable packages form a
1322     * circular dependency group.
1323     *
1324     * **Phase 6 - Build results**: Transform the packages into a
1325     * `Vec<ScanResult>`, filtering inactive packages for limited scans,
1326     * and return the summary.
1327     *
1328     * # Limited vs Full Tree Scans
1329     *
1330     * Full tree scans resolve all packages in pkgsrc. Limited scans (when
1331     * packages are explicitly added via `add()`) only resolve packages from
1332     * initial_pkgpaths and their transitive dependencies, matching pbulk's
1333     * presolve behavior. This avoids scanning/resolving thousands of unneeded
1334     * packages when building a small subset.
1335     */
1336    pub fn resolve<I>(&mut self, scan_data: I) -> Result<ScanSummary>
1337    where
1338        I: IntoIterator<Item = Result<ScanIndex>>,
1339    {
1340        info!(
1341            done_pkgpaths = self.done.len(),
1342            "Starting dependency resolution"
1343        );
1344
1345        /*
1346         * Packages are stored in arrival order and every resolver
1347         * structure is keyed by position.  `names` mirrors each
1348         * package's pkgname for matching while `indexes` is mutated.
1349         */
1350        let mut names: Vec<PkgName> = Vec::new();
1351        let mut indexes: Vec<ScanIndex> = Vec::new();
1352        let mut name_index: HashMap<PkgName, usize> = HashMap::new();
1353        let mut skip_reasons: Vec<Option<PackageState>> = Vec::new();
1354        let mut unresolved_reasons: HashMap<usize, Vec<String>> = HashMap::new();
1355        let mut depends: Vec<Vec<usize>> = Vec::new();
1356        let mut active: Vec<bool> = Vec::new();
1357        let use_active_filter = !self.full_tree && !self.initial_pkgpaths.is_empty();
1358
1359        for pkg in scan_data {
1360            let pkg = pkg?;
1361            if name_index.contains_key(&pkg.pkgname) {
1362                debug!(pkgname = %pkg.pkgname.pkgname(), "Skipping duplicate PKGNAME");
1363                continue;
1364            }
1365
1366            let mut skip = None;
1367            if let Some(reason) = &pkg.pkg_skip_reason
1368                && !reason.is_empty()
1369            {
1370                info!(pkgname = %pkg.pkgname.pkgname(), %reason, "PKG_SKIP_REASON");
1371                skip = Some(PackageState::PreSkipped);
1372            }
1373
1374            if let Some(reason) = &pkg.pkg_fail_reason
1375                && !reason.is_empty()
1376                && skip.is_none()
1377            {
1378                info!(pkgname = %pkg.pkgname.pkgname(), %reason, "PKG_FAIL_REASON");
1379                skip = Some(PackageState::PreFailed);
1380            }
1381
1382            active.push(
1383                use_active_filter
1384                    && pkg
1385                        .pkg_location
1386                        .as_ref()
1387                        .is_some_and(|loc| self.initial_pkgpaths.contains(loc)),
1388            );
1389            skip_reasons.push(skip);
1390            name_index.insert(pkg.pkgname.clone(), names.len());
1391            names.push(pkg.pkgname.clone());
1392            depends.push(Vec::new());
1393            indexes.push(pkg);
1394        }
1395
1396        info!(packages = indexes.len(), "Loaded packages");
1397
1398        let pkgbase_map = Self::build_pkgbase_map(&names);
1399        let verbosity = self.verbosity;
1400        let pkg_locations: Vec<Option<PkgPath>> = if verbosity >= 1 {
1401            indexes.iter().map(|idx| idx.pkg_location.clone()).collect()
1402        } else {
1403            Vec::new()
1404        };
1405        let mut match_cache: HashMap<String, usize> = HashMap::new();
1406        let mut patterns = PatternCache::with_capacity(names.len());
1407        let names_ref = &names;
1408        let is_satisfied = |deps: &[usize], pattern: &Pattern| {
1409            deps.iter()
1410                .any(|&existing| pattern.matches(names_ref[existing].pkgname()))
1411        };
1412
1413        let mut resolved = vec![false; indexes.len()];
1414        loop {
1415            let mut new_active = false;
1416            for (id, pkg) in indexes.iter_mut().enumerate() {
1417                if use_active_filter && !active[id] {
1418                    continue;
1419                }
1420                if resolved[id] {
1421                    continue;
1422                }
1423                resolved[id] = true;
1424
1425                let all_deps = match pkg.all_depends.take() {
1426                    Some(deps) => deps,
1427                    None => continue,
1428                };
1429                let pkg_depends = &mut depends[id];
1430
1431                for dep in all_deps.iter() {
1432                    let dep = match dep {
1433                        Ok(d) => d,
1434                        Err(e) => {
1435                            warn!(
1436                                pkg = %pkg.pkgname.pkgname(),
1437                                error = format!("{e:#}"),
1438                                "Malformed dependency"
1439                            );
1440                            continue;
1441                        }
1442                    };
1443
1444                    let pattern = match patterns.compile(dep.pattern()) {
1445                        Ok(p) => p,
1446                        Err(e) => {
1447                            let reason = format!(
1448                                "{}: pattern error for {}: {}",
1449                                pkg.pkgname.pkgname(),
1450                                dep.pattern(),
1451                                e
1452                            );
1453                            if skip_reasons[id].is_none() {
1454                                if pkg.pkg_fail_reason.is_none() {
1455                                    pkg.pkg_fail_reason = Some(reason);
1456                                }
1457                                skip_reasons[id] = Some(PackageState::PreFailed);
1458                            }
1459                            continue;
1460                        }
1461                    };
1462
1463                    if let Some(&dep_id) = match_cache.get(dep.pattern()) {
1464                        if !is_satisfied(pkg_depends, pattern) && !pkg_depends.contains(&dep_id) {
1465                            pkg_depends.push(dep_id);
1466                        }
1467                        continue;
1468                    }
1469
1470                    if verbosity >= 2 {
1471                        let candidates = Self::find_candidates(pattern, &pkgbase_map, names_ref);
1472                        if candidates.len() > 1 {
1473                            for &c in &candidates {
1474                                eprintln!(
1475                                    "Multiple matches for dependency {} of package {}: {}",
1476                                    dep.pattern(),
1477                                    pkg.pkgname.pkgname(),
1478                                    names_ref[c].pkgname()
1479                                );
1480                            }
1481                        }
1482                    }
1483
1484                    match Self::find_best_match(pattern, &pkgbase_map, names_ref) {
1485                        Err(e) => {
1486                            let reason = format!(
1487                                "{}: version comparison error for {}: {}",
1488                                pkg.pkgname.pkgname(),
1489                                dep.pattern(),
1490                                e
1491                            );
1492                            if skip_reasons[id].is_none() {
1493                                if pkg.pkg_fail_reason.is_none() {
1494                                    pkg.pkg_fail_reason = Some(reason);
1495                                }
1496                                skip_reasons[id] = Some(PackageState::PreFailed);
1497                            }
1498                        }
1499                        Ok(Some(best)) => {
1500                            if verbosity >= 1
1501                                && let Some(loc) = pkg_locations.get(best).and_then(|l| l.as_ref())
1502                                && let Ok(dep_path) = PkgPath::new(dep.pkgpath())
1503                                && *loc != dep_path
1504                            {
1505                                eprintln!(
1506                                    "Best matching {} differs from location {} for dependency {} of package {}",
1507                                    names_ref[best].pkgname(),
1508                                    dep_path,
1509                                    dep.pattern(),
1510                                    pkg.pkgname.pkgname()
1511                                );
1512                            }
1513                            if !is_satisfied(pkg_depends, pattern) && !pkg_depends.contains(&best) {
1514                                pkg_depends.push(best);
1515                            }
1516                            match_cache.insert(dep.pattern().to_string(), best);
1517                            if use_active_filter && !active[best] {
1518                                active[best] = true;
1519                                new_active = true;
1520                            }
1521                        }
1522                        Ok(None) => {
1523                            let fail_reason =
1524                                format!("\"could not resolve dependency \"{}\"\"", dep.pattern());
1525                            pkg.pkg_fail_reason = Some(fail_reason);
1526                            let msg = format!(
1527                                "No match found for dependency {} of package {}",
1528                                dep.pattern(),
1529                                pkg.pkgname.pkgname()
1530                            );
1531                            if !matches!(
1532                                skip_reasons[id],
1533                                Some(PackageState::PreSkipped | PackageState::PreFailed)
1534                            ) {
1535                                skip_reasons[id] = Some(PackageState::Unresolved);
1536                                unresolved_reasons.entry(id).or_default().push(msg);
1537                            }
1538                        }
1539                    }
1540                }
1541                pkg.all_depends = Some(all_deps);
1542            }
1543            if !use_active_filter || !new_active {
1544                break;
1545            }
1546        }
1547
1548        /*
1549         * Release resolver-only caches before constructing the result
1550         * Vec, which otherwise doubles peak memory for large scans.
1551         */
1552        drop(match_cache);
1553        drop(patterns);
1554        drop(pkg_locations);
1555        drop(pkgbase_map);
1556        drop(name_index);
1557        drop(resolved);
1558
1559        Self::propagate_failures(&depends, &mut skip_reasons);
1560
1561        debug!("Checking for circular dependencies");
1562        let mut edges: Vec<(u32, u32)> = Vec::new();
1563        for (id, deps) in depends.iter().enumerate() {
1564            if (use_active_filter && !active[id])
1565                || skip_reasons[id].is_some()
1566                || indexes[id].pkg_location.is_none()
1567            {
1568                continue;
1569            }
1570            for &dep in deps {
1571                edges.push((dep as u32, id as u32));
1572            }
1573        }
1574        Self::check_circular_deps(&names, &edges)?;
1575        drop(edges);
1576
1577        let mut packages: Vec<ScanResult> = Vec::new();
1578        let mut count_filtered = 0;
1579
1580        for (id, mut index) in indexes.into_iter().enumerate() {
1581            if use_active_filter && !active[id] {
1582                count_filtered += 1;
1583                continue;
1584            }
1585
1586            let Some(pkgpath) = index.pkg_location.clone() else {
1587                error!(pkgname = %names[id], "Package missing PKG_LOCATION, skipping");
1588                continue;
1589            };
1590            let resolved_depends: Vec<PkgName> = std::mem::take(&mut depends[id])
1591                .into_iter()
1592                .map(|dep| names[dep].clone())
1593                .collect();
1594            let skip = skip_reasons[id].take();
1595            /*
1596             * pbulk compat: a directly-unresolvable package omits the
1597             * DEPENDS line entirely, so leave resolved_depends as None.
1598             */
1599            let complete = skip != Some(PackageState::Unresolved);
1600            if complete && !resolved_depends.is_empty() {
1601                index.resolved_depends = Some(resolved_depends);
1602            }
1603            let result = match skip {
1604                Some(state) => {
1605                    let reason = unresolved_reasons.remove(&id).map(|v| v.join("\n"));
1606                    ScanResult::Skipped {
1607                        pkgpath,
1608                        state,
1609                        reason,
1610                        index: Some(index),
1611                    }
1612                }
1613                None => ScanResult::Buildable(ResolvedPackage { index, pkgpath }),
1614            };
1615            packages.push(result);
1616        }
1617
1618        if count_filtered > 0 {
1619            debug!(
1620                count_filtered,
1621                "Filtered inactive packages (not needed for resolution)"
1622            );
1623        }
1624
1625        for (pkgpath, error) in &self.scan_failures {
1626            packages.push(ScanResult::ScanFail {
1627                pkgpath: pkgpath.clone(),
1628                error: error.clone(),
1629            });
1630        }
1631
1632        let pkgpaths = packages
1633            .iter()
1634            .map(|p| p.pkgpath())
1635            .collect::<HashSet<_>>()
1636            .len();
1637        let summary = ScanSummary { pkgpaths, packages };
1638
1639        let c = summary.counts();
1640        info!(
1641            buildable = c.buildable,
1642            preskip = c.states[PackageState::PreSkipped],
1643            prefail = c.states[PackageState::PreFailed],
1644            unresolved = c.states[PackageState::Unresolved],
1645            "Resolution complete"
1646        );
1647
1648        Ok(summary)
1649    }
1650
1651    /**
1652     * Resolve dependencies and report results.
1653     *
1654     * Loads scan data from database, resolves dependencies, stores resolved
1655     * dependencies back to database, and reports any unresolved dependency
1656     * errors. Optionally bails if `strict` is true.
1657     */
1658    pub fn resolve_with_report(
1659        &mut self,
1660        db: &crate::db::Database,
1661        strict: bool,
1662    ) -> Result<ScanSummary> {
1663        crate::print_status("Resolving dependencies");
1664        let start = std::time::Instant::now();
1665        let mut result = db.with_scan_data(crate::db::ScanIndexFields::Resolve, |pull| {
1666            self.resolve(std::iter::from_fn(|| pull().transpose()))
1667        })?;
1668        /*
1669         * Release ALL_DEPENDS now that resolution is done; the DB
1670         * writers below only need the resolved names, and keeping
1671         * the pattern strings alive through the write phase
1672         * measurably raises peak memory on large trees.
1673         */
1674        for pkg in &mut result.packages {
1675            match pkg {
1676                ScanResult::Buildable(resolved) => {
1677                    resolved.index.all_depends = None;
1678                }
1679                ScanResult::Skipped { index, .. } => {
1680                    if let Some(idx) = index {
1681                        idx.all_depends = None;
1682                    }
1683                }
1684                ScanResult::ScanFail { .. } => {}
1685            }
1686        }
1687        db.store_resolution(&result)?;
1688        db.store_pbulk_weights()?;
1689        crate::print_elapsed("Resolving dependencies", start.elapsed());
1690
1691        let errors: Vec<_> = result.errors().collect();
1692        if !errors.is_empty() {
1693            eprintln!("Scan/resolve errors:");
1694            for e in &errors {
1695                for line in e.lines() {
1696                    eprintln!("  {line}");
1697                }
1698            }
1699            if strict {
1700                bail!("Aborting due to scan/resolve errors (strict_scan enabled)");
1701            }
1702        }
1703
1704        Ok(result)
1705    }
1706}