bob/
db.rs

1/*
2 * Copyright (c) 2026 Jonathan Perkin <jonathan@perkin.org.uk>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17/*!
18 * SQLite database for caching scan and build results.
19 *
20 * This module provides optimized database access with:
21 * - Lazy loading to minimize memory usage
22 * - Indexed reverse dependency lookups for fast failure cascades
23 * - Normalized dependency tables for efficient queries
24 * - Hybrid storage: columns for hot fields, JSON for cold data
25 *
26 * # Schema
27 *
28 * - `packages` - Core package identity and status
29 * - `depends` - Raw dependency patterns from scans
30 * - `resolved_depends` - Resolved dependencies after pattern matching
31 * - `builds` - Build results with indexed outcome
32 * - `metadata` - Key-value store for flags and cached data
33 */
34
35use std::collections::{HashMap, HashSet};
36use std::path::{Path, PathBuf};
37use std::time::Duration;
38
39use anyhow::{Context, Result};
40use pkgsrc::{PkgName, PkgPath, ScanIndex};
41use rusqlite::{Connection, params};
42use tracing::{debug, warn};
43
44use crate::build::{BuildOutcome, BuildResult};
45use crate::config::PkgsrcEnv;
46use crate::scan::SkipReason;
47
48/**
49 * Schema version - update when schema changes.
50 */
51const SCHEMA_VERSION: i32 = 3;
52
53/**
54 * Lightweight package row without full scan data.
55 */
56#[derive(Clone, Debug)]
57pub struct PackageRow {
58    pub id: i64,
59    pub pkgname: String,
60    pub pkgpath: String,
61    pub skip_reason: Option<String>,
62    pub fail_reason: Option<String>,
63    pub is_bootstrap: bool,
64    pub pbulk_weight: i32,
65}
66
67/**
68 * SQLite database for scan and build caching.
69 */
70pub struct Database {
71    conn: Connection,
72}
73
74impl Database {
75    /**
76     * Open or create a database at the given path.
77     */
78    pub fn open(path: &Path) -> Result<Self> {
79        if let Some(parent) = path.parent() {
80            std::fs::create_dir_all(parent)
81                .context("Failed to create database directory")?;
82        }
83        let conn = Connection::open(path).context("Failed to open database")?;
84        let db = Self { conn };
85        db.configure_pragmas()?;
86        db.init()?;
87        Ok(db)
88    }
89
90    /**
91     * Begin a transaction.
92     */
93    pub fn begin_transaction(&self) -> Result<()> {
94        self.conn.execute("BEGIN TRANSACTION", [])?;
95        Ok(())
96    }
97
98    /**
99     * Commit the current transaction.
100     */
101    pub fn commit(&self) -> Result<()> {
102        self.conn.execute("COMMIT", [])?;
103        Ok(())
104    }
105
106    /**
107     * Configure SQLite for performance.
108     */
109    fn configure_pragmas(&self) -> Result<()> {
110        self.conn.execute_batch(
111            "PRAGMA journal_mode = WAL;
112             PRAGMA synchronous = NORMAL;
113             PRAGMA cache_size = -64000;
114             PRAGMA temp_store = MEMORY;
115             PRAGMA mmap_size = 268435456;
116             PRAGMA foreign_keys = ON;",
117        )?;
118        Ok(())
119    }
120
121    /**
122     * Initialize schema or fail if version mismatch.
123     */
124    fn init(&self) -> Result<()> {
125        // Check if schema_version table exists
126        let has_schema_version: bool = self.conn.query_row(
127            "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='schema_version'",
128            [],
129            |row| row.get::<_, i32>(0).map(|c| c > 0),
130        )?;
131
132        if !has_schema_version {
133            // Fresh database, create schema
134            self.create_schema()?;
135        } else {
136            // Check version matches
137            let version: i32 = self.conn.query_row(
138                "SELECT version FROM schema_version LIMIT 1",
139                [],
140                |row| row.get(0),
141            )?;
142
143            if version != SCHEMA_VERSION {
144                anyhow::bail!(
145                    "Schema mismatch: found v{}, expected v{}. \
146                     Run 'bob clean' to restart.",
147                    version,
148                    SCHEMA_VERSION
149                );
150            }
151        }
152
153        Ok(())
154    }
155
156    /**
157     * Create the database schema.
158     */
159    fn create_schema(&self) -> Result<()> {
160        self.conn.execute_batch(&format!(
161            "CREATE TABLE schema_version (version INTEGER NOT NULL);
162             INSERT INTO schema_version (version) VALUES ({});
163
164             CREATE TABLE packages (
165                 id INTEGER PRIMARY KEY AUTOINCREMENT,
166                 pkgname TEXT UNIQUE NOT NULL,
167                 pkgpath TEXT NOT NULL,
168                 skip_reason TEXT,
169                 fail_reason TEXT,
170                 is_bootstrap INTEGER DEFAULT 0,
171                 pbulk_weight INTEGER DEFAULT 100,
172                 scan_data TEXT
173             );
174
175             CREATE INDEX idx_packages_pkgpath ON packages(pkgpath);
176             CREATE INDEX idx_packages_status ON packages(skip_reason, fail_reason);
177
178             CREATE TABLE depends (
179                 id INTEGER PRIMARY KEY AUTOINCREMENT,
180                 package_id INTEGER NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
181                 depend_pattern TEXT NOT NULL,
182                 depend_pkgpath TEXT NOT NULL,
183                 UNIQUE(package_id, depend_pattern)
184             );
185
186             CREATE INDEX idx_depends_package ON depends(package_id);
187             CREATE INDEX idx_depends_pkgpath ON depends(depend_pkgpath);
188
189             CREATE TABLE resolved_depends (
190                 id INTEGER PRIMARY KEY AUTOINCREMENT,
191                 package_id INTEGER NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
192                 depends_on_id INTEGER NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
193                 UNIQUE(package_id, depends_on_id)
194             );
195
196             CREATE INDEX idx_resolved_depends_package ON resolved_depends(package_id);
197             CREATE INDEX idx_resolved_depends_depends_on ON resolved_depends(depends_on_id);
198
199             CREATE TABLE builds (
200                 id INTEGER PRIMARY KEY AUTOINCREMENT,
201                 package_id INTEGER NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
202                 outcome TEXT NOT NULL,
203                 outcome_detail TEXT,
204                 duration_ms INTEGER NOT NULL DEFAULT 0,
205                 log_dir TEXT,
206                 UNIQUE(package_id)
207             );
208
209             CREATE INDEX idx_builds_outcome ON builds(outcome);
210             CREATE INDEX idx_builds_package ON builds(package_id);
211
212             CREATE TABLE metadata (
213                 key TEXT PRIMARY KEY,
214                 value TEXT NOT NULL
215             );",
216            SCHEMA_VERSION
217        ))?;
218
219        debug!(version = SCHEMA_VERSION, "Created schema");
220        Ok(())
221    }
222
223    // ========================================================================
224    // PACKAGE QUERIES
225    // ========================================================================
226
227    /**
228     * Store a package from scan results.
229     */
230    pub fn store_package(
231        &self,
232        pkgpath: &str,
233        index: &ScanIndex,
234    ) -> Result<i64> {
235        let pkgname = index.pkgname.pkgname();
236
237        let skip_reason =
238            index.pkg_skip_reason.as_ref().filter(|s| !s.is_empty());
239        let fail_reason =
240            index.pkg_fail_reason.as_ref().filter(|s| !s.is_empty());
241        let is_bootstrap = index.bootstrap_pkg.as_deref() == Some("yes");
242        let pbulk_weight: i32 = index
243            .pbulk_weight
244            .as_ref()
245            .and_then(|s| s.parse().ok())
246            .unwrap_or(100);
247
248        let scan_data = serde_json::to_string(index)?;
249
250        {
251            let mut stmt = self.conn.prepare_cached(
252                "INSERT OR REPLACE INTO packages
253                 (pkgname, pkgpath, skip_reason, fail_reason,
254                  is_bootstrap, pbulk_weight, scan_data)
255                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
256            )?;
257            stmt.execute(params![
258                pkgname,
259                pkgpath,
260                skip_reason,
261                fail_reason,
262                is_bootstrap,
263                pbulk_weight,
264                scan_data
265            ])?;
266        }
267
268        let package_id = self.conn.last_insert_rowid();
269
270        // Store raw dependencies
271        if let Some(ref deps) = index.all_depends {
272            let mut stmt = self.conn.prepare_cached(
273                "INSERT OR IGNORE INTO depends (package_id, depend_pattern, depend_pkgpath)
274                 VALUES (?1, ?2, ?3)",
275            )?;
276            for dep in deps {
277                stmt.execute(params![
278                    package_id,
279                    dep.pattern().pattern(),
280                    dep.pkgpath().to_string()
281                ])?;
282            }
283        }
284
285        debug!(pkgname = pkgname, package_id = package_id, "Stored package");
286        Ok(package_id)
287    }
288
289    /**
290     * Store scan results for a pkgpath.
291     */
292    pub fn store_scan_pkgpath(
293        &self,
294        pkgpath: &str,
295        indexes: &[ScanIndex],
296    ) -> Result<()> {
297        for index in indexes {
298            self.store_package(pkgpath, index)?;
299        }
300        Ok(())
301    }
302
303    /**
304     * Get package by name.
305     */
306    pub fn get_package_by_name(
307        &self,
308        pkgname: &str,
309    ) -> Result<Option<PackageRow>> {
310        let result = self.conn.query_row(
311            "SELECT id, pkgname, pkgpath, skip_reason, fail_reason, is_bootstrap, pbulk_weight
312             FROM packages WHERE pkgname = ?1",
313            [pkgname],
314            |row| Ok(PackageRow {
315                id: row.get(0)?,
316                pkgname: row.get(1)?,
317                pkgpath: row.get(2)?,
318                skip_reason: row.get(3)?,
319                fail_reason: row.get(4)?,
320                is_bootstrap: row.get::<_, i32>(5)? != 0,
321                pbulk_weight: row.get(6)?,
322            }),
323        );
324
325        match result {
326            Ok(pkg) => Ok(Some(pkg)),
327            Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
328            Err(e) => Err(e.into()),
329        }
330    }
331
332    /**
333     * Get package ID by name.
334     */
335    pub fn get_package_id(&self, pkgname: &str) -> Result<Option<i64>> {
336        let result = self.conn.query_row(
337            "SELECT id FROM packages WHERE pkgname = ?1",
338            [pkgname],
339            |row| row.get(0),
340        );
341
342        match result {
343            Ok(id) => Ok(Some(id)),
344            Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
345            Err(e) => Err(e.into()),
346        }
347    }
348
349    /**
350     * Get pkgname by package ID.
351     */
352    pub fn get_pkgname(&self, package_id: i64) -> Result<String> {
353        self.conn
354            .query_row(
355                "SELECT pkgname FROM packages WHERE id = ?1",
356                [package_id],
357                |row| row.get(0),
358            )
359            .context("Package not found")
360    }
361
362    /**
363     * Get packages by pkgpath.
364     */
365    pub fn get_packages_by_path(
366        &self,
367        pkgpath: &str,
368    ) -> Result<Vec<PackageRow>> {
369        let mut stmt = self.conn.prepare(
370            "SELECT id, pkgname, pkgpath, skip_reason, fail_reason, is_bootstrap, pbulk_weight
371             FROM packages WHERE pkgpath = ?1"
372        )?;
373
374        let rows = stmt.query_map([pkgpath], |row| {
375            Ok(PackageRow {
376                id: row.get(0)?,
377                pkgname: row.get(1)?,
378                pkgpath: row.get(2)?,
379                skip_reason: row.get(3)?,
380                fail_reason: row.get(4)?,
381                is_bootstrap: row.get::<_, i32>(5)? != 0,
382                pbulk_weight: row.get(6)?,
383            })
384        })?;
385
386        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
387    }
388
389    /**
390     * Check if pkgpath is scanned.
391     */
392    pub fn is_pkgpath_scanned(&self, pkgpath: &str) -> Result<bool> {
393        let count: i32 = self.conn.query_row(
394            "SELECT COUNT(*) FROM packages WHERE pkgpath = ?1",
395            [pkgpath],
396            |row| row.get(0),
397        )?;
398        Ok(count > 0)
399    }
400
401    /**
402     * Get all scanned pkgpaths.
403     */
404    pub fn get_scanned_pkgpaths(&self) -> Result<HashSet<String>> {
405        let mut stmt =
406            self.conn.prepare("SELECT DISTINCT pkgpath FROM packages")?;
407        let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
408        rows.collect::<Result<HashSet<_>, _>>().map_err(Into::into)
409    }
410
411    /**
412     * Get pkgpaths that are referenced as dependencies but haven't been scanned
413     * yet. These are dependencies that were discovered during scanning but the
414     * scan was interrupted before they could be processed.
415     */
416    pub fn get_unscanned_dependencies(&self) -> Result<HashSet<String>> {
417        let mut stmt = self.conn.prepare(
418            "SELECT DISTINCT d.depend_pkgpath
419             FROM depends d
420             WHERE d.depend_pkgpath NOT IN (SELECT pkgpath FROM packages)",
421        )?;
422        let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
423        rows.collect::<Result<HashSet<_>, _>>().map_err(Into::into)
424    }
425
426    /**
427     * Count of scanned packages.
428     */
429    pub fn count_packages(&self) -> Result<i64> {
430        self.conn
431            .query_row("SELECT COUNT(*) FROM packages", [], |row| row.get(0))
432            .context("Failed to count packages")
433    }
434
435    /**
436     * Count of scanned pkgpaths.
437     */
438    pub fn count_scan(&self) -> Result<i64> {
439        self.conn
440            .query_row(
441                "SELECT COUNT(DISTINCT pkgpath) FROM packages",
442                [],
443                |row| row.get(0),
444            )
445            .context("Failed to count scan")
446    }
447
448    /**
449     * Get all packages (lightweight).
450     */
451    pub fn get_all_packages(&self) -> Result<Vec<PackageRow>> {
452        let mut stmt = self.conn.prepare(
453            "SELECT id, pkgname, pkgpath, skip_reason, fail_reason, is_bootstrap, pbulk_weight
454             FROM packages ORDER BY id"
455        )?;
456
457        let rows = stmt.query_map([], |row| {
458            Ok(PackageRow {
459                id: row.get(0)?,
460                pkgname: row.get(1)?,
461                pkgpath: row.get(2)?,
462                skip_reason: row.get(3)?,
463                fail_reason: row.get(4)?,
464                is_bootstrap: row.get::<_, i32>(5)? != 0,
465                pbulk_weight: row.get(6)?,
466            })
467        })?;
468
469        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
470    }
471
472    /**
473     * Get all buildable packages (no skip/fail reason).
474     */
475    pub fn get_buildable_packages(&self) -> Result<Vec<PackageRow>> {
476        let mut stmt = self.conn.prepare(
477            "SELECT id, pkgname, pkgpath, skip_reason, fail_reason, is_bootstrap, pbulk_weight
478             FROM packages WHERE skip_reason IS NULL AND fail_reason IS NULL"
479        )?;
480
481        let rows = stmt.query_map([], |row| {
482            Ok(PackageRow {
483                id: row.get(0)?,
484                pkgname: row.get(1)?,
485                pkgpath: row.get(2)?,
486                skip_reason: row.get(3)?,
487                fail_reason: row.get(4)?,
488                is_bootstrap: row.get::<_, i32>(5)? != 0,
489                pbulk_weight: row.get(6)?,
490            })
491        })?;
492
493        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
494    }
495
496    /**
497     * Load full ScanIndex for a package.
498     */
499    pub fn get_full_scan_index(&self, package_id: i64) -> Result<ScanIndex> {
500        let json: String = self.conn.query_row(
501            "SELECT scan_data FROM packages WHERE id = ?1",
502            [package_id],
503            |row| row.get(0),
504        )?;
505        serde_json::from_str(&json).context("Failed to deserialize scan data")
506    }
507
508    /**
509     * Load all ScanIndex data in one query.
510     */
511    pub fn get_all_scan_indexes(&self) -> Result<Vec<(i64, ScanIndex)>> {
512        let mut stmt = self
513            .conn
514            .prepare("SELECT id, scan_data FROM packages ORDER BY id")?;
515        let rows = stmt.query_map([], |row| {
516            let id: i64 = row.get(0)?;
517            let json: String = row.get(1)?;
518            Ok((id, json))
519        })?;
520        let mut results = Vec::new();
521        for row in rows {
522            let (id, json) = row?;
523            let index: ScanIndex =
524                serde_json::from_str(&json).with_context(|| {
525                    format!(
526                        "Failed to deserialize scan data for package {}",
527                        id
528                    )
529                })?;
530            results.push((id, index));
531        }
532        Ok(results)
533    }
534
535    /**
536     * Load full ScanIndex by pkgname.
537     */
538    pub fn get_scan_index_by_name(
539        &self,
540        pkgname: &str,
541    ) -> Result<Option<ScanIndex>> {
542        let result = self.conn.query_row(
543            "SELECT scan_data FROM packages WHERE pkgname = ?1",
544            [pkgname],
545            |row| row.get::<_, String>(0),
546        );
547
548        match result {
549            Ok(json) => {
550                let index: ScanIndex = serde_json::from_str(&json)
551                    .context("Failed to deserialize scan data")?;
552                Ok(Some(index))
553            }
554            Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
555            Err(e) => Err(e.into()),
556        }
557    }
558
559    /**
560     * Clear all scan data.
561     */
562    pub fn clear_scan(&self) -> Result<()> {
563        self.conn.execute("DELETE FROM packages", [])?;
564        self.clear_full_scan_complete()?;
565        Ok(())
566    }
567
568    // ========================================================================
569    // DEPENDENCY QUERIES
570    // ========================================================================
571
572    /**
573     * Store resolved dependencies in batch.
574     */
575    pub fn store_resolved_dependencies_batch(
576        &self,
577        deps: &[(i64, i64)],
578    ) -> Result<()> {
579        self.conn.execute("BEGIN TRANSACTION", [])?;
580        let mut stmt = self.conn.prepare(
581            "INSERT OR IGNORE INTO resolved_depends (package_id, depends_on_id) VALUES (?1, ?2)",
582        )?;
583        for (package_id, depends_on_id) in deps {
584            stmt.execute(params![package_id, depends_on_id])?;
585        }
586        drop(stmt);
587        self.conn.execute("COMMIT", [])?;
588        Ok(())
589    }
590
591    /**
592     * Get all transitive reverse dependencies using recursive CTE.
593     */
594    pub fn get_transitive_reverse_deps(
595        &self,
596        package_id: i64,
597    ) -> Result<Vec<i64>> {
598        let mut stmt = self.conn.prepare(
599            "WITH RECURSIVE affected(id) AS (
600                SELECT ?1
601                UNION
602                SELECT rd.package_id
603                FROM resolved_depends rd
604                JOIN affected a ON rd.depends_on_id = a.id
605            )
606            SELECT id FROM affected WHERE id != ?1",
607        )?;
608        let rows = stmt.query_map([package_id], |row| row.get::<_, i64>(0))?;
609        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
610    }
611
612    /**
613     * Clear all resolved dependencies.
614     */
615    pub fn clear_resolved_depends(&self) -> Result<()> {
616        self.conn.execute("DELETE FROM resolved_depends", [])?;
617        Ok(())
618    }
619
620    // ========================================================================
621    // BUILD QUERIES
622    // ========================================================================
623
624    /**
625     * Store a build result by package ID.
626     */
627    pub fn store_build_result(
628        &self,
629        package_id: i64,
630        result: &BuildResult,
631    ) -> Result<()> {
632        let (outcome, detail) = build_outcome_to_db(&result.outcome);
633        let duration_ms = result.duration.as_millis() as i64;
634        let log_dir = result.log_dir.as_ref().map(|p| p.display().to_string());
635
636        self.conn.execute(
637            "INSERT OR REPLACE INTO builds
638             (package_id, outcome, outcome_detail, duration_ms, log_dir)
639             VALUES (?1, ?2, ?3, ?4, ?5)",
640            params![package_id, outcome, detail, duration_ms, log_dir],
641        )?;
642
643        debug!(
644            package_id = package_id,
645            outcome = outcome,
646            "Stored build result"
647        );
648        Ok(())
649    }
650
651    /**
652     * Store a build result by pkgname.
653     */
654    pub fn store_build_by_name(&self, result: &BuildResult) -> Result<()> {
655        if let Some(pkg) = self.get_package_by_name(result.pkgname.pkgname())? {
656            self.store_build_result(pkg.id, result)
657        } else {
658            warn!(pkgname = %result.pkgname.pkgname(), "Package not found in database for build result");
659            Ok(())
660        }
661    }
662
663    /**
664     * Get build result for a package.
665     */
666    pub fn get_build_result(
667        &self,
668        package_id: i64,
669    ) -> Result<Option<BuildResult>> {
670        let result = self.conn.query_row(
671            "SELECT p.pkgname, p.pkgpath, b.outcome, b.outcome_detail, b.duration_ms, b.log_dir
672             FROM builds b
673             JOIN packages p ON b.package_id = p.id
674             WHERE b.package_id = ?1",
675            [package_id],
676            |row| {
677                let pkgname: String = row.get(0)?;
678                let pkgpath: Option<String> = row.get(1)?;
679                let outcome: String = row.get(2)?;
680                let detail: Option<String> = row.get(3)?;
681                let duration_ms: i64 = row.get(4)?;
682                let log_dir: Option<String> = row.get(5)?;
683                Ok((pkgname, pkgpath, outcome, detail, duration_ms, log_dir))
684            },
685        );
686
687        match result {
688            Ok((pkgname, pkgpath, outcome, detail, duration_ms, log_dir)) => {
689                let build_outcome = db_outcome_to_build(&outcome, detail);
690                Ok(Some(BuildResult {
691                    pkgname: PkgName::new(&pkgname),
692                    pkgpath: pkgpath.and_then(|p| PkgPath::new(&p).ok()),
693                    outcome: build_outcome,
694                    duration: Duration::from_millis(duration_ms as u64),
695                    log_dir: log_dir.map(std::path::PathBuf::from),
696                }))
697            }
698            Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
699            Err(e) => Err(e.into()),
700        }
701    }
702
703    /**
704     * Count of build results.
705     */
706    pub fn count_build(&self) -> Result<i64> {
707        self.conn
708            .query_row("SELECT COUNT(*) FROM builds", [], |row| row.get(0))
709            .context("Failed to count builds")
710    }
711
712    /**
713     * Delete build result for a pkgname.
714     */
715    pub fn delete_build_by_name(&self, pkgname: &str) -> Result<bool> {
716        let rows = self.conn.execute(
717            "DELETE FROM builds WHERE package_id IN (SELECT id FROM packages WHERE pkgname = ?1)",
718            params![pkgname],
719        )?;
720        Ok(rows > 0)
721    }
722
723    /**
724     * Delete build results by pkgpath.
725     */
726    pub fn delete_build_by_pkgpath(&self, pkgpath: &str) -> Result<usize> {
727        let rows = self.conn.execute(
728            "DELETE FROM builds WHERE package_id IN (SELECT id FROM packages WHERE pkgpath = ?1)",
729            params![pkgpath],
730        )?;
731        Ok(rows)
732    }
733
734    /**
735     * Clear all build results.
736     */
737    pub fn clear_builds(&self) -> Result<usize> {
738        let rows = self.conn.execute("DELETE FROM builds", [])?;
739        Ok(rows)
740    }
741
742    /**
743     * Get all build results from the database.
744     */
745    pub fn get_all_build_results(&self) -> Result<Vec<BuildResult>> {
746        let mut stmt = self.conn.prepare(
747            "SELECT p.pkgname, p.pkgpath, b.outcome, b.outcome_detail, b.duration_ms, b.log_dir
748             FROM builds b
749             JOIN packages p ON b.package_id = p.id
750             ORDER BY p.pkgname"
751        )?;
752
753        let rows = stmt.query_map([], |row| {
754            let pkgname: String = row.get(0)?;
755            let pkgpath: Option<String> = row.get(1)?;
756            let outcome: String = row.get(2)?;
757            let detail: Option<String> = row.get(3)?;
758            let duration_ms: i64 = row.get(4)?;
759            let log_dir: Option<String> = row.get(5)?;
760            Ok((pkgname, pkgpath, outcome, detail, duration_ms, log_dir))
761        })?;
762
763        let mut results = Vec::new();
764        for row in rows {
765            let (pkgname, pkgpath, outcome, detail, duration_ms, log_dir) =
766                row?;
767            let build_outcome = db_outcome_to_build(&outcome, detail);
768            results.push(BuildResult {
769                pkgname: PkgName::new(&pkgname),
770                pkgpath: pkgpath.and_then(|p| PkgPath::new(&p).ok()),
771                outcome: build_outcome,
772                duration: Duration::from_millis(duration_ms as u64),
773                log_dir: log_dir.map(std::path::PathBuf::from),
774            });
775        }
776
777        Ok(results)
778    }
779
780    /**
781     * Count how many packages are broken by each failed package.
782     * Returns a map from pkgname to the count of packages that depend on it.
783     */
784    pub fn count_breaks_for_failed(
785        &self,
786    ) -> Result<std::collections::HashMap<String, usize>> {
787        use std::collections::HashMap;
788
789        let mut counts: HashMap<String, usize> = HashMap::new();
790
791        // Get all failed package IDs and their names
792        let mut stmt = self.conn.prepare(
793            "SELECT p.id, p.pkgname FROM builds b
794             JOIN packages p ON b.package_id = p.id
795             WHERE b.outcome = 'failed'",
796        )?;
797
798        let failed: Vec<(i64, String)> = stmt
799            .query_map([], |row| Ok((row.get(0)?, row.get(1)?)))?
800            .filter_map(|r| r.ok())
801            .collect();
802
803        // For each failed package, count indirect failures that reference it
804        for (_pkg_id, pkgname) in failed {
805            let count: i64 = self.conn.query_row(
806                "SELECT COUNT(*) FROM builds b
807                 JOIN packages p ON b.package_id = p.id
808                 WHERE b.outcome = 'indirect_failed'
809                 AND b.outcome_detail LIKE ?1",
810                params![format!("%{}", pkgname)],
811                |row| row.get(0),
812            )?;
813            counts.insert(pkgname, count as usize);
814        }
815
816        Ok(counts)
817    }
818
819    /**
820     * Get total build duration from all builds.
821     */
822    pub fn get_total_build_duration(&self) -> Result<Duration> {
823        let total_ms: i64 = self.conn.query_row(
824            "SELECT COALESCE(SUM(duration_ms), 0) FROM builds",
825            [],
826            |row| row.get(0),
827        )?;
828        Ok(Duration::from_millis(total_ms as u64))
829    }
830
831    /**
832     * Get pre-failed packages (those with skip_reason or fail_reason but no
833     * build result). Returns (pkgname, pkgpath, reason).
834     */
835    pub fn get_prefailed_packages(
836        &self,
837    ) -> Result<Vec<(String, Option<String>, String)>> {
838        let mut stmt = self.conn.prepare(
839            "SELECT p.pkgname, p.pkgpath,
840                    COALESCE(p.fail_reason, p.skip_reason) as reason
841             FROM packages p
842             WHERE (p.skip_reason IS NOT NULL OR p.fail_reason IS NOT NULL)
843               AND NOT EXISTS (SELECT 1 FROM builds b WHERE b.package_id = p.id)
844             ORDER BY p.pkgname",
845        )?;
846
847        let rows = stmt
848            .query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))?;
849
850        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
851    }
852
853    /**
854     * Get packages without build results that depend on failed packages.
855     * Returns (pkgname, pkgpath, failed_deps) where failed_deps is
856     * comma-separated. Excludes packages that have skip_reason or fail_reason
857     * (they're pre-failed). Only lists root failures (direct failures), not
858     * indirect failures.
859     */
860    pub fn get_indirect_failures(
861        &self,
862    ) -> Result<Vec<(String, Option<String>, String)>> {
863        // Find packages that:
864        // 1. Have no build result
865        // 2. Have no skip_reason or fail_reason (not pre-failed)
866        // 3. Depend (transitively) on a package with a direct failure
867        // Group by package and aggregate failed deps into comma-separated string
868        // Only 'failed' and 'prefailed' are root causes, not 'indirect_*'
869        let mut stmt = self.conn.prepare(
870            "WITH RECURSIVE
871             -- Only direct failures are root causes
872             failed_pkgs(id) AS (
873                 SELECT package_id FROM builds
874                 WHERE outcome IN ('failed', 'prefailed')
875             ),
876             -- Packages affected by failures (transitive closure)
877             affected(id, root_id) AS (
878                 SELECT id, id FROM failed_pkgs
879                 UNION
880                 SELECT rd.package_id, a.root_id
881                 FROM resolved_depends rd
882                 JOIN affected a ON rd.depends_on_id = a.id
883                 WHERE rd.package_id NOT IN (SELECT id FROM failed_pkgs)
884             )
885             SELECT p.pkgname, p.pkgpath, GROUP_CONCAT(DISTINCT fp.pkgname) as failed_deps
886             FROM affected a
887             JOIN packages p ON a.id = p.id
888             JOIN packages fp ON a.root_id = fp.id
889             WHERE a.id != a.root_id
890               AND NOT EXISTS (SELECT 1 FROM builds b WHERE b.package_id = a.id)
891               AND p.skip_reason IS NULL
892               AND p.fail_reason IS NULL
893             GROUP BY p.id, p.pkgname, p.pkgpath
894             ORDER BY p.pkgname",
895        )?;
896
897        let rows = stmt
898            .query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))?;
899
900        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
901    }
902
903    /**
904     * Mark a package and all its transitive reverse dependencies as failed.
905     * Returns the count of packages marked.
906     */
907    pub fn mark_failure_cascade(
908        &self,
909        package_id: i64,
910        reason: &str,
911        duration: Duration,
912    ) -> Result<usize> {
913        let pkgname = self.get_pkgname(package_id)?;
914
915        // Get all affected packages using recursive CTE
916        let mut stmt = self.conn.prepare(
917            "WITH RECURSIVE affected(id, depth) AS (
918                SELECT ?1, 0
919                UNION
920                SELECT rd.package_id, a.depth + 1
921                FROM resolved_depends rd
922                JOIN affected a ON rd.depends_on_id = a.id
923            )
924            SELECT id, depth FROM affected ORDER BY depth",
925        )?;
926
927        let affected: Vec<(i64, i32)> = stmt
928            .query_map([package_id], |row| {
929                Ok((row.get::<_, i64>(0)?, row.get::<_, i32>(1)?))
930            })?
931            .filter_map(|r| r.ok())
932            .collect();
933
934        // Batch insert failures
935        self.conn.execute("BEGIN TRANSACTION", [])?;
936
937        for (id, depth) in &affected {
938            let (outcome, detail, dur) = if *depth == 0 {
939                ("failed", reason.to_string(), duration.as_millis() as i64)
940            } else {
941                ("indirect_failed", format!("depends on failed {}", pkgname), 0)
942            };
943
944            self.conn.execute(
945                "INSERT OR REPLACE INTO builds
946                 (package_id, outcome, outcome_detail, duration_ms)
947                 VALUES (?1, ?2, ?3, ?4)",
948                params![id, outcome, detail, dur],
949            )?;
950        }
951
952        self.conn.execute("COMMIT", [])?;
953
954        debug!(
955            package_id = package_id,
956            affected_count = affected.len(),
957            "Marked failure cascade"
958        );
959        Ok(affected.len())
960    }
961
962    // ========================================================================
963    // METADATA
964    // ========================================================================
965
966    /**
967     * Check if a full tree scan has been completed.
968     */
969    pub fn full_scan_complete(&self) -> bool {
970        self.conn
971            .query_row(
972                "SELECT value FROM metadata WHERE key = 'full_scan_complete'",
973                [],
974                |row| row.get::<_, String>(0),
975            )
976            .map(|v| v == "true")
977            .unwrap_or(false)
978    }
979
980    /**
981     * Mark a full tree scan as complete.
982     */
983    pub fn set_full_scan_complete(&self) -> Result<()> {
984        self.conn.execute(
985            "INSERT OR REPLACE INTO metadata (key, value) VALUES ('full_scan_complete', 'true')",
986            [],
987        )?;
988        Ok(())
989    }
990
991    /**
992     * Clear the full tree scan complete marker.
993     */
994    pub fn clear_full_scan_complete(&self) -> Result<()> {
995        self.conn.execute(
996            "DELETE FROM metadata WHERE key = 'full_scan_complete'",
997            [],
998        )?;
999        Ok(())
1000    }
1001
1002    /**
1003     * Store the pkgsrc environment to the database.  Errors if already present
1004     * as this should only ever be done once.
1005     */
1006    pub fn store_pkgsrc_env(&self, env: &PkgsrcEnv) -> Result<()> {
1007        let json = serde_json::json!({
1008            "packages": env.packages,
1009            "pkgtools": env.pkgtools,
1010            "prefix": env.prefix,
1011            "pkg_dbdir": env.pkg_dbdir,
1012            "pkg_refcount_dbdir": env.pkg_refcount_dbdir,
1013            "cachevars": env.cachevars,
1014        });
1015        self.conn.execute(
1016            "INSERT INTO metadata (key, value) VALUES ('pkgsrc_env', ?1)",
1017            params![json.to_string()],
1018        )?;
1019        Ok(())
1020    }
1021
1022    /**
1023     * Load the pkgsrc environment from the database.
1024     */
1025    pub fn load_pkgsrc_env(&self) -> Result<PkgsrcEnv> {
1026        let json_str: String = self
1027            .conn
1028            .query_row(
1029                "SELECT value FROM metadata WHERE key = 'pkgsrc_env'",
1030                [],
1031                |row| row.get(0),
1032            )
1033            .context("pkgsrc environment not found in database")?;
1034
1035        let json: serde_json::Value = serde_json::from_str(&json_str)
1036            .context("Invalid pkgsrc_env JSON")?;
1037
1038        let get_path = |key: &str| -> Result<PathBuf> {
1039            json.get(key)
1040                .and_then(|v| v.as_str())
1041                .map(PathBuf::from)
1042                .ok_or_else(|| anyhow::anyhow!("Missing {} in pkgsrc_env", key))
1043        };
1044
1045        let cachevars: HashMap<String, String> = json
1046            .get("cachevars")
1047            .and_then(|v| serde_json::from_value(v.clone()).ok())
1048            .unwrap_or_default();
1049
1050        Ok(PkgsrcEnv {
1051            packages: get_path("packages")?,
1052            pkgtools: get_path("pkgtools")?,
1053            prefix: get_path("prefix")?,
1054            pkg_dbdir: get_path("pkg_dbdir")?,
1055            pkg_refcount_dbdir: get_path("pkg_refcount_dbdir")?,
1056            cachevars,
1057        })
1058    }
1059
1060    /**
1061     * Get all package names with successful build outcomes.
1062     */
1063    pub fn get_successful_packages(&self) -> Result<Vec<String>> {
1064        let mut stmt = self.conn.prepare(
1065            "SELECT p.pkgname FROM builds b
1066             JOIN packages p ON b.package_id = p.id
1067             WHERE b.outcome IN ('success', 'up_to_date')
1068             ORDER BY p.pkgname",
1069        )?;
1070
1071        let pkgnames = stmt
1072            .query_map([], |row| row.get::<_, String>(0))?
1073            .collect::<std::result::Result<Vec<_>, _>>()?;
1074
1075        Ok(pkgnames)
1076    }
1077
1078    /**
1079     * Execute arbitrary SQL and print results.
1080     */
1081    pub fn execute_raw(&self, sql: &str) -> Result<()> {
1082        let mut stmt = self.conn.prepare(sql)?;
1083        let column_count = stmt.column_count();
1084
1085        if column_count == 0 {
1086            // Non-query statement (INSERT, UPDATE, DELETE, etc.)
1087            let affected = stmt.execute([])?;
1088            if affected > 0 {
1089                println!("{} row(s) affected", affected);
1090            }
1091        } else {
1092            // Query statement (SELECT, PRAGMA, etc.)
1093            let mut rows = stmt.query([])?;
1094
1095            while let Some(row) = rows.next()? {
1096                let values: Vec<String> = (0..column_count)
1097                    .map(|i| {
1098                        row.get_ref(i)
1099                            .map(|v| match v {
1100                                rusqlite::types::ValueRef::Null => {
1101                                    String::new()
1102                                }
1103                                rusqlite::types::ValueRef::Integer(i) => {
1104                                    i.to_string()
1105                                }
1106                                rusqlite::types::ValueRef::Real(f) => {
1107                                    f.to_string()
1108                                }
1109                                rusqlite::types::ValueRef::Text(s) => {
1110                                    String::from_utf8_lossy(s).to_string()
1111                                }
1112                                rusqlite::types::ValueRef::Blob(b) => {
1113                                    format!("<blob:{} bytes>", b.len())
1114                                }
1115                            })
1116                            .unwrap_or_default()
1117                    })
1118                    .collect();
1119                println!("{}", values.join("|"));
1120            }
1121        }
1122
1123        Ok(())
1124    }
1125}
1126
1127// ============================================================================
1128// HELPER FUNCTIONS
1129// ============================================================================
1130
1131/**
1132 * Convert BuildOutcome to database format.
1133 */
1134fn build_outcome_to_db(
1135    outcome: &BuildOutcome,
1136) -> (&'static str, Option<String>) {
1137    match outcome {
1138        BuildOutcome::Success => ("success", None),
1139        BuildOutcome::UpToDate => ("up_to_date", None),
1140        BuildOutcome::Failed(s) => ("failed", Some(s.clone())),
1141        BuildOutcome::Skipped(reason) => match reason {
1142            SkipReason::PkgSkip(s) => ("pkg_skip", Some(s.clone())),
1143            SkipReason::PkgFail(s) => ("pkg_fail", Some(s.clone())),
1144            SkipReason::IndirectSkip(s) => ("indirect_skip", Some(s.clone())),
1145            SkipReason::IndirectFail(s) => ("indirect_fail", Some(s.clone())),
1146            SkipReason::UnresolvedDep(s) => ("unresolved_dep", Some(s.clone())),
1147        },
1148    }
1149}
1150
1151/**
1152 * Convert database format to BuildOutcome.
1153 */
1154fn db_outcome_to_build(outcome: &str, detail: Option<String>) -> BuildOutcome {
1155    match outcome {
1156        "success" => BuildOutcome::Success,
1157        "up_to_date" => BuildOutcome::UpToDate,
1158        "failed" => BuildOutcome::Failed(detail.unwrap_or_default()),
1159        "pkg_skip" => BuildOutcome::Skipped(SkipReason::PkgSkip(
1160            detail.unwrap_or_default(),
1161        )),
1162        "pkg_fail" => BuildOutcome::Skipped(SkipReason::PkgFail(
1163            detail.unwrap_or_default(),
1164        )),
1165        "indirect_skip" => BuildOutcome::Skipped(SkipReason::IndirectSkip(
1166            detail.unwrap_or_default(),
1167        )),
1168        "indirect_fail" | "indirect_failed" => BuildOutcome::Skipped(
1169            SkipReason::IndirectFail(detail.unwrap_or_default()),
1170        ),
1171        "unresolved_dep" => BuildOutcome::Skipped(SkipReason::UnresolvedDep(
1172            detail.unwrap_or_default(),
1173        )),
1174        _ => BuildOutcome::Failed(format!("Unknown outcome: {}", outcome)),
1175    }
1176}