Skip to main content

adler_server/
state.rs

1//! Shared application state: registry, sites cache, HTTP client, scans.
2
3use std::collections::HashMap;
4use std::path::PathBuf;
5use std::sync::Arc;
6
7use adler_core::{Client, Registry, Site};
8use tokio::sync::RwLock;
9use tokio::task::JoinHandle;
10
11use crate::scan::{ScanHandle, ScanId};
12
13/// State shared across all axum handlers.
14///
15/// Cheap to clone — every field is an [`Arc`] or a small primitive.
16/// axum requires `State<T>` to be `Clone`, hence this design.
17#[derive(Clone)]
18pub struct AppState {
19    /// Pre-filtered site list (registry + workspace flags applied at
20    /// startup). Held as an `Arc<[Site]>` to avoid re-cloning the
21    /// 2.5k-entry vector on every scan dispatch.
22    pub sites: Arc<[Site]>,
23    /// Shared HTTP client (connection pool, throttle, etc.).
24    pub client: Arc<Client>,
25    /// In-flight + recently-finished scans, keyed by ID.
26    pub scans: Arc<RwLock<HashMap<ScanId, ScanHandle>>>,
27    /// Running-scan task handles, keyed by [`ScanId`]. Lets the
28    /// refilter endpoint cancel an in-flight scan via
29    /// [`JoinHandle::abort`] before spawning a successor with the new
30    /// filter. Entries are removed when their scan finishes naturally
31    /// (the task's last act before returning) or when the eviction
32    /// policy reaps them alongside the [`ScanHandle`].
33    pub scan_tasks: Arc<RwLock<HashMap<ScanId, JoinHandle<()>>>>,
34    /// Maximum number of scans retained in memory. Beyond this, the
35    /// oldest finished scan is evicted on the next insertion (a tiny
36    /// LRU — we never need more than ~dozens of recent scans in a
37    /// human-driven web session).
38    pub scan_capacity: usize,
39    /// Directory where finished scans are persisted as JSON. `None`
40    /// disables persistence (used by tests and ephemeral runs).
41    pub scans_dir: Option<Arc<PathBuf>>,
42}
43
44impl AppState {
45    /// Build initial state from a registry + a pre-built HTTP client.
46    ///
47    /// The full registry is filtered with the supplied predicate; the
48    /// result is materialised into an `Arc<[Site]>` once so handler
49    /// dispatch is a pointer copy. Persistence is off by default —
50    /// chain [`Self::with_scans_dir`] to enable.
51    #[must_use]
52    pub fn new(sites: Vec<Site>, client: Client, scan_capacity: usize) -> Self {
53        Self {
54            sites: Arc::from(sites.into_boxed_slice()),
55            client: Arc::new(client),
56            scans: Arc::new(RwLock::new(HashMap::new())),
57            scan_tasks: Arc::new(RwLock::new(HashMap::new())),
58            scan_capacity: scan_capacity.max(1),
59            scans_dir: None,
60        }
61    }
62
63    /// Convenience: build state from a [`Registry`] using the
64    /// "no filter, NSFW excluded" default. The web UI exposes
65    /// per-scan filters anyway, so the initial site list is the full
66    /// non-NSFW set.
67    #[must_use]
68    pub fn from_registry(registry: &Registry, client: Client, scan_capacity: usize) -> Self {
69        let sites = registry.filter(&[], &[], &[], &[], false);
70        Self::new(sites, client, scan_capacity)
71    }
72
73    /// Enable on-disk persistence of finished scans under `dir`. Files
74    /// are written as `<scan_id>.json` after each scan completes;
75    /// startup reads them back so history survives server restarts.
76    #[must_use]
77    pub fn with_scans_dir(mut self, dir: PathBuf) -> Self {
78        self.scans_dir = Some(Arc::new(dir));
79        self
80    }
81
82    /// Insert a fresh scan handle, evicting the oldest finished entry
83    /// (or the oldest entry overall, if none has finished) when we are
84    /// at capacity.
85    pub async fn insert_scan(&self, id: ScanId, handle: ScanHandle) {
86        let mut evicted: Option<ScanId> = None;
87        let mut scans = self.scans.write().await;
88        if scans.len() >= self.scan_capacity {
89            let mut finished_candidate: Option<(ScanId, std::time::Duration)> = None;
90            let mut any_candidate: Option<(ScanId, std::time::Duration)> = None;
91            for (k, v) in scans.iter() {
92                let age = v.elapsed();
93                if v.is_finished_now()
94                    && finished_candidate
95                        .as_ref()
96                        .is_none_or(|(_, prev)| age > *prev)
97                {
98                    finished_candidate = Some((k.clone(), age));
99                }
100                if any_candidate.as_ref().is_none_or(|(_, prev)| age > *prev) {
101                    any_candidate = Some((k.clone(), age));
102                }
103            }
104            if let Some((victim, _)) = finished_candidate.or(any_candidate) {
105                scans.remove(&victim);
106                evicted = Some(victim);
107            }
108        }
109        scans.insert(id, handle);
110        drop(scans);
111        if let Some(v) = evicted {
112            self.scan_tasks.write().await.remove(&v);
113        }
114    }
115
116    /// Register an in-flight scan task. The handle is stored so the
117    /// refilter endpoint can abort it before starting a successor.
118    pub async fn register_scan_task(&self, id: ScanId, task: JoinHandle<()>) {
119        self.scan_tasks.write().await.insert(id, task);
120    }
121
122    /// Remove an in-flight scan task entry. Used at the end of
123    /// `crate::scan::run` so the map doesn't accumulate completed tasks.
124    pub async fn forget_scan_task(&self, id: &ScanId) {
125        self.scan_tasks.write().await.remove(id);
126    }
127
128    /// Abort the running task for `id` (if any). Returns true when an
129    /// abort signal was actually sent; false when no live task was
130    /// recorded (already finished, or never started). Doesn't wait for
131    /// the task to observe the abort — `JoinHandle::abort` is
132    /// non-blocking and the caller continues immediately.
133    pub async fn abort_scan(&self, id: &ScanId) -> bool {
134        let task = self.scan_tasks.write().await.remove(id);
135        task.is_some_and(|t| {
136            t.abort();
137            true
138        })
139    }
140
141    /// Look up a scan by ID, cloning the handle (cheap — `Arc` inside).
142    pub async fn get_scan(&self, id: &ScanId) -> Option<ScanHandle> {
143        self.scans.read().await.get(id).cloned()
144    }
145}
146
147#[cfg(test)]
148mod tests {
149    use super::*;
150    use crate::scan::{FinishedScan, Summary};
151
152    fn client() -> Client {
153        Client::builder().build().expect("default client")
154    }
155
156    #[tokio::test]
157    async fn evicts_oldest_finished_when_over_capacity() {
158        let state = AppState::new(Vec::new(), client(), 2);
159
160        let id_a = ScanId::from("aaaaaaaaaaaa".to_owned());
161        let handle_a = ScanHandle::new("a", 0, 4);
162        handle_a
163            .publish(FinishedScan {
164                summary: Summary::default(),
165                outcomes: Vec::new(),
166                elapsed_ms: 0,
167            })
168            .await;
169        state.insert_scan(id_a.clone(), handle_a).await;
170
171        let id_b = ScanId::from("bbbbbbbbbbbb".to_owned());
172        state
173            .insert_scan(id_b.clone(), ScanHandle::new("b", 0, 4))
174            .await;
175
176        // Capacity is 2; both fit.
177        assert!(state.get_scan(&id_a).await.is_some());
178        assert!(state.get_scan(&id_b).await.is_some());
179
180        // Inserting a third evicts the finished one (a) over the
181        // running one (b).
182        let id_c = ScanId::from("cccccccccccc".to_owned());
183        state
184            .insert_scan(id_c.clone(), ScanHandle::new("c", 0, 4))
185            .await;
186
187        assert!(
188            state.get_scan(&id_a).await.is_none(),
189            "finished scan should be evicted first"
190        );
191        assert!(state.get_scan(&id_b).await.is_some());
192        assert!(state.get_scan(&id_c).await.is_some());
193    }
194}