Skip to main content

modo/db/
maintenance.rs

1use crate::cron::{CronContext, CronHandler, FromCronContext};
2use crate::error::{Error, Result};
3use crate::extractor::Service;
4
5use super::Database;
6
7/// Database health metrics from PRAGMA introspection.
8///
9/// Contains page-level statistics useful for deciding whether to run
10/// `VACUUM`. Does **not** derive `Serialize` — these are internal
11/// infrastructure metrics that must not be exposed on unauthenticated
12/// endpoints.
13#[derive(Debug, Clone)]
14pub struct DbHealth {
15    /// Total number of pages in the database.
16    pub page_count: u64,
17    /// Number of pages on the freelist (reclaimable by VACUUM).
18    pub freelist_count: u64,
19    /// Size of each page in bytes.
20    pub page_size: u64,
21    /// Percentage of pages on the freelist (0.0–100.0).
22    pub free_percent: f64,
23    /// Total database file size in bytes (`page_count * page_size`).
24    pub total_size_bytes: u64,
25    /// Wasted space in bytes (`freelist_count * page_size`).
26    pub wasted_bytes: u64,
27}
28
29impl DbHealth {
30    /// Collect health metrics via `PRAGMA page_count`, `freelist_count`,
31    /// `page_size`. Computes derived fields from those three values.
32    ///
33    /// # Errors
34    ///
35    /// Returns an error if any PRAGMA query fails or returns an unexpected value.
36    pub async fn collect(conn: &libsql::Connection) -> Result<Self> {
37        let page_count = Self::pragma_u64(conn, "page_count").await?;
38        let freelist_count = Self::pragma_u64(conn, "freelist_count").await?;
39        let page_size = Self::pragma_u64(conn, "page_size").await?;
40
41        let free_percent = if page_count > 0 {
42            (freelist_count as f64 / page_count as f64) * 100.0
43        } else {
44            0.0
45        };
46
47        Ok(Self {
48            page_count,
49            freelist_count,
50            page_size,
51            free_percent,
52            total_size_bytes: page_count * page_size,
53            wasted_bytes: freelist_count * page_size,
54        })
55    }
56
57    /// Returns `true` if `free_percent >= threshold_percent`.
58    pub fn needs_vacuum(&self, threshold_percent: f64) -> bool {
59        self.free_percent >= threshold_percent
60    }
61
62    async fn pragma_u64(conn: &libsql::Connection, name: &str) -> Result<u64> {
63        let mut rows = conn
64            .query(&format!("PRAGMA {name}"), ())
65            .await
66            .map_err(Error::from)?;
67        let row = rows
68            .next()
69            .await
70            .map_err(Error::from)?
71            .ok_or_else(|| Error::internal(format!("PRAGMA {name} returned no rows")))?;
72        let val: i64 = row.get(0).map_err(Error::from)?;
73        u64::try_from(val)
74            .map_err(|_| Error::internal(format!("PRAGMA {name} returned negative value: {val}")))
75    }
76}
77
78/// Options for [`run_vacuum`].
79#[derive(Debug, Clone)]
80pub struct VacuumOptions {
81    /// Only vacuum if freelist exceeds this percentage. Default: `20.0`.
82    pub threshold_percent: f64,
83    /// Log-only mode — report health without running VACUUM. Default: `false`.
84    pub dry_run: bool,
85}
86
87impl Default for VacuumOptions {
88    fn default() -> Self {
89        Self {
90            threshold_percent: 20.0,
91            dry_run: false,
92        }
93    }
94}
95
96/// Result of a [`run_vacuum`] call.
97#[derive(Debug, Clone)]
98pub struct VacuumResult {
99    /// Health snapshot taken before the vacuum decision.
100    pub health_before: DbHealth,
101    /// Health snapshot taken after VACUUM. `None` if skipped or dry_run.
102    pub health_after: Option<DbHealth>,
103    /// Whether VACUUM actually executed.
104    pub vacuumed: bool,
105    /// Wall-clock duration of the full operation.
106    pub duration: std::time::Duration,
107}
108
109/// Run VACUUM with safety checks.
110///
111/// 1. Collects health metrics.
112/// 2. If `free_percent < threshold` or `dry_run`, returns early.
113/// 3. Executes `VACUUM`.
114/// 4. Collects health metrics again.
115///
116/// Logs before/after metrics at `debug` level.
117///
118/// # Errors
119///
120/// Returns an error if health collection or the `VACUUM` statement fails.
121pub async fn run_vacuum(conn: &libsql::Connection, opts: VacuumOptions) -> Result<VacuumResult> {
122    let start = std::time::Instant::now();
123    let health_before = DbHealth::collect(conn).await?;
124
125    tracing::debug!(
126        page_count = health_before.page_count,
127        freelist_count = health_before.freelist_count,
128        free_pct = health_before.free_percent,
129        wasted_bytes = health_before.wasted_bytes,
130        "vacuum: health before"
131    );
132
133    if opts.dry_run || !health_before.needs_vacuum(opts.threshold_percent) {
134        tracing::debug!(
135            free_pct = health_before.free_percent,
136            threshold = opts.threshold_percent,
137            dry_run = opts.dry_run,
138            "vacuum: skipped"
139        );
140        return Ok(VacuumResult {
141            health_before,
142            health_after: None,
143            vacuumed: false,
144            duration: start.elapsed(),
145        });
146    }
147
148    conn.execute("VACUUM", ())
149        .await
150        .map_err(|e| Error::internal("VACUUM failed").chain(e))?;
151
152    let health_after = DbHealth::collect(conn).await?;
153
154    tracing::debug!(
155        page_count = health_after.page_count,
156        freelist_count = health_after.freelist_count,
157        free_pct = health_after.free_percent,
158        wasted_bytes = health_after.wasted_bytes,
159        "vacuum: health after"
160    );
161
162    Ok(VacuumResult {
163        health_before,
164        health_after: Some(health_after),
165        vacuumed: true,
166        duration: start.elapsed(),
167    })
168}
169
170/// Shorthand: run [`run_vacuum`] with the given threshold and default options.
171///
172/// # Errors
173///
174/// Returns an error if health collection or the `VACUUM` statement fails.
175pub async fn vacuum_if_needed(
176    conn: &libsql::Connection,
177    threshold_percent: f64,
178) -> Result<VacuumResult> {
179    run_vacuum(
180        conn,
181        VacuumOptions {
182            threshold_percent,
183            ..Default::default()
184        },
185    )
186    .await
187}
188
189/// Cron handler that checks database health and vacuums if the freelist
190/// ratio exceeds the configured threshold. Logs results at `info` level.
191///
192/// Created by [`vacuum_handler`].
193#[derive(Clone)]
194pub struct VacuumHandler {
195    threshold_percent: f64,
196}
197
198impl CronHandler<(Service<Database>,)> for VacuumHandler {
199    async fn call(self, ctx: CronContext) -> Result<()> {
200        let Service(db) = Service::<Database>::from_cron_context(&ctx)?;
201
202        let result = run_vacuum(
203            db.conn(),
204            VacuumOptions {
205                threshold_percent: self.threshold_percent,
206                ..Default::default()
207            },
208        )
209        .await?;
210
211        if let Some(after) = result.health_after.as_ref() {
212            tracing::info!(
213                before_free_pct = result.health_before.free_percent,
214                after_free_pct = after.free_percent,
215                reclaimed_bytes = result
216                    .health_before
217                    .wasted_bytes
218                    .saturating_sub(after.wasted_bytes),
219                duration_ms = result.duration.as_millis(),
220                "vacuum completed"
221            );
222        } else {
223            tracing::info!(
224                free_pct = result.health_before.free_percent,
225                threshold = self.threshold_percent,
226                "vacuum skipped, below threshold"
227            );
228        }
229
230        Ok(())
231    }
232}
233
234/// Returns a cron handler that checks DB health and vacuums if needed.
235///
236/// The handler extracts [`Service<Database>`] from the cron context.
237/// Register the `Database` in the service registry before building the
238/// scheduler.
239///
240/// # Example
241///
242/// ```rust,no_run
243/// use modo::cron::Scheduler;
244/// use modo::db;
245/// use modo::service::Registry;
246///
247/// # async fn example() -> modo::Result<()> {
248/// let mut registry = Registry::new();
249/// // registry.add(db.clone());
250///
251/// let scheduler = Scheduler::builder(&registry)
252///     .job("0 3 * * 0", db::vacuum_handler(20.0))?
253///     .start()
254///     .await;
255/// # Ok(())
256/// # }
257/// ```
258pub fn vacuum_handler(threshold_percent: f64) -> VacuumHandler {
259    VacuumHandler { threshold_percent }
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265
266    async fn test_conn() -> libsql::Connection {
267        let db = libsql::Builder::new_local(":memory:")
268            .build()
269            .await
270            .unwrap();
271        db.connect().unwrap()
272    }
273
274    #[tokio::test]
275    async fn collect_returns_metrics_for_fresh_db() {
276        let conn = test_conn().await;
277        // Create a table to force page allocation in the in-memory database.
278        conn.execute("CREATE TABLE _health_probe (id INTEGER PRIMARY KEY)", ())
279            .await
280            .unwrap();
281        let health = DbHealth::collect(&conn).await.unwrap();
282
283        assert!(health.page_count > 0);
284        assert_eq!(health.freelist_count, 0);
285        assert!(health.page_size > 0);
286        assert_eq!(health.free_percent, 0.0);
287        assert_eq!(
288            health.total_size_bytes,
289            health.page_count * health.page_size
290        );
291        assert_eq!(health.wasted_bytes, 0);
292    }
293
294    #[tokio::test]
295    async fn needs_vacuum_threshold_logic() {
296        let health = DbHealth {
297            page_count: 100,
298            freelist_count: 25,
299            page_size: 4096,
300            free_percent: 25.0,
301            total_size_bytes: 100 * 4096,
302            wasted_bytes: 25 * 4096,
303        };
304
305        assert!(health.needs_vacuum(20.0));
306        assert!(health.needs_vacuum(25.0));
307        assert!(!health.needs_vacuum(30.0));
308    }
309
310    #[tokio::test]
311    async fn run_vacuum_skips_when_below_threshold() {
312        let conn = test_conn().await;
313
314        let result = run_vacuum(
315            &conn,
316            VacuumOptions {
317                threshold_percent: 20.0,
318                ..Default::default()
319            },
320        )
321        .await
322        .unwrap();
323
324        assert!(!result.vacuumed);
325        assert!(result.health_after.is_none());
326        assert_eq!(result.health_before.freelist_count, 0);
327    }
328
329    #[tokio::test]
330    async fn run_vacuum_skips_in_dry_run() {
331        let conn = test_conn().await;
332
333        let result = run_vacuum(
334            &conn,
335            VacuumOptions {
336                threshold_percent: 0.0, // would trigger on any freelist
337                dry_run: true,
338            },
339        )
340        .await
341        .unwrap();
342
343        assert!(!result.vacuumed);
344        assert!(result.health_after.is_none());
345    }
346
347    #[tokio::test]
348    async fn run_vacuum_executes_when_threshold_met() {
349        let conn = test_conn().await;
350
351        // Create a table, insert rows, delete them to produce freelist pages
352        conn.execute("CREATE TABLE bloat (id INTEGER PRIMARY KEY, data TEXT)", ())
353            .await
354            .unwrap();
355
356        // Insert enough data to create multiple pages
357        for i in 0..500 {
358            conn.execute(
359                "INSERT INTO bloat (id, data) VALUES (?1, ?2)",
360                libsql::params![i, "x".repeat(200)],
361            )
362            .await
363            .unwrap();
364        }
365
366        // Delete all rows — pages go to freelist
367        conn.execute("DELETE FROM bloat", ()).await.unwrap();
368
369        let health = DbHealth::collect(&conn).await.unwrap();
370        assert!(
371            health.freelist_count > 0,
372            "expected freelist pages after bulk delete"
373        );
374
375        // Run vacuum with a very low threshold so it triggers
376        let result = run_vacuum(
377            &conn,
378            VacuumOptions {
379                threshold_percent: 0.0,
380                ..Default::default()
381            },
382        )
383        .await
384        .unwrap();
385
386        assert!(result.vacuumed);
387        let after = result.health_after.unwrap();
388        assert!(
389            after.freelist_count < health.freelist_count,
390            "freelist should shrink after vacuum"
391        );
392    }
393
394    #[tokio::test]
395    async fn vacuum_if_needed_delegates_correctly() {
396        let conn = test_conn().await;
397
398        // Fresh DB, 0% free — should skip with threshold 20
399        let result = vacuum_if_needed(&conn, 20.0).await.unwrap();
400        assert!(!result.vacuumed);
401    }
402}