adler_server/state.rs
1//! Shared application state: registry, sites cache, HTTP client, scans.
2
3use std::collections::HashMap;
4use std::path::PathBuf;
5use std::sync::Arc;
6
7use adler_core::{Client, Registry, Site, SiteFilter};
8use tokio::sync::RwLock;
9use tokio::task::JoinHandle;
10
11use crate::scan::{ScanHandle, ScanId};
12
13/// State shared across all axum handlers.
14///
15/// Cheap to clone — every field is an [`Arc`] or a small primitive.
16/// axum requires `State<T>` to be `Clone`, hence this design.
17#[derive(Clone)]
18pub struct AppState {
19 /// Pre-filtered site list (registry + workspace flags applied at
20 /// startup). Held as an `Arc<[Site]>` to avoid re-cloning the
21 /// 2.5k-entry vector on every scan dispatch.
22 pub sites: Arc<[Site]>,
23 /// Startup-filtered catalogue including disabled/parked entries.
24 /// Scan handlers use [`Self::sites`]; catalogue and error-diagnostic
25 /// surfaces use this to explain why an otherwise matching site is not
26 /// currently scannable.
27 pub catalog: Arc<[Site]>,
28 /// Shared HTTP client (connection pool, throttle, etc.).
29 pub client: Arc<Client>,
30 /// In-flight + recently-finished scans, keyed by ID.
31 pub scans: Arc<RwLock<HashMap<ScanId, ScanHandle>>>,
32 /// Running-scan task handles, keyed by [`ScanId`]. Lets the
33 /// refilter endpoint cancel an in-flight scan via
34 /// [`JoinHandle::abort`] before spawning a successor with the new
35 /// filter. Entries are removed when their scan finishes naturally
36 /// (the task's last act before returning) or when the eviction
37 /// policy reaps them alongside the [`ScanHandle`].
38 pub scan_tasks: Arc<RwLock<HashMap<ScanId, JoinHandle<()>>>>,
39 /// Maximum number of scans retained in memory. Beyond this, the
40 /// oldest finished scan is evicted on the next insertion (a tiny
41 /// LRU — we never need more than ~dozens of recent scans in a
42 /// human-driven web session).
43 pub scan_capacity: usize,
44 /// Directory where finished scans are persisted as JSON. `None`
45 /// disables persistence (used by tests and ephemeral runs).
46 pub scans_dir: Option<Arc<PathBuf>>,
47}
48
49impl AppState {
50 /// Build initial state from a registry + a pre-built HTTP client.
51 ///
52 /// The full registry is filtered with the supplied predicate; the
53 /// result is materialised into an `Arc<[Site]>` once so handler
54 /// dispatch is a pointer copy. Persistence is off by default —
55 /// chain [`Self::with_scans_dir`] to enable.
56 #[must_use]
57 pub fn new(sites: Vec<Site>, client: Client, scan_capacity: usize) -> Self {
58 Self::with_catalog(sites.clone(), sites, client, scan_capacity)
59 }
60
61 /// Build initial state with separate scan and catalogue views.
62 /// `sites` must contain enabled entries only; `catalog` may include
63 /// disabled entries for diagnostics.
64 #[must_use]
65 pub fn with_catalog(
66 sites: Vec<Site>,
67 catalog: Vec<Site>,
68 client: Client,
69 scan_capacity: usize,
70 ) -> Self {
71 Self {
72 sites: Arc::from(sites.into_boxed_slice()),
73 catalog: Arc::from(catalog.into_boxed_slice()),
74 client: Arc::new(client),
75 scans: Arc::new(RwLock::new(HashMap::new())),
76 scan_tasks: Arc::new(RwLock::new(HashMap::new())),
77 scan_capacity: scan_capacity.max(1),
78 scans_dir: None,
79 }
80 }
81
82 /// Convenience: build state from a [`Registry`] using the
83 /// "no filter, NSFW excluded" default. The web UI exposes
84 /// per-scan filters anyway, so the initial site list is the full
85 /// non-NSFW set.
86 #[must_use]
87 pub fn from_registry(registry: &Registry, client: Client, scan_capacity: usize) -> Self {
88 let filter = SiteFilter::default();
89 let sites = registry.filter_with(&filter);
90 let catalog = registry.matches_with(&filter);
91 Self::with_catalog(sites, catalog, client, scan_capacity)
92 }
93
94 /// Enable on-disk persistence of finished scans under `dir`. Files
95 /// are written as `<scan_id>.json` after each scan completes;
96 /// startup reads them back so history survives server restarts.
97 #[must_use]
98 pub fn with_scans_dir(mut self, dir: PathBuf) -> Self {
99 self.scans_dir = Some(Arc::new(dir));
100 self
101 }
102
103 /// Insert a fresh scan handle, evicting the oldest finished entry
104 /// (or the oldest entry overall, if none has finished) when we are
105 /// at capacity.
106 pub async fn insert_scan(&self, id: ScanId, handle: ScanHandle) {
107 let mut evicted: Option<ScanId> = None;
108 let mut scans = self.scans.write().await;
109 if scans.len() >= self.scan_capacity {
110 let mut finished_candidate: Option<(ScanId, std::time::Duration)> = None;
111 let mut any_candidate: Option<(ScanId, std::time::Duration)> = None;
112 for (k, v) in scans.iter() {
113 let age = v.elapsed();
114 if v.is_finished_now()
115 && finished_candidate
116 .as_ref()
117 .is_none_or(|(_, prev)| age > *prev)
118 {
119 finished_candidate = Some((k.clone(), age));
120 }
121 if any_candidate.as_ref().is_none_or(|(_, prev)| age > *prev) {
122 any_candidate = Some((k.clone(), age));
123 }
124 }
125 if let Some((victim, _)) = finished_candidate.or(any_candidate) {
126 scans.remove(&victim);
127 evicted = Some(victim);
128 }
129 }
130 scans.insert(id, handle);
131 drop(scans);
132 if let Some(v) = evicted {
133 self.scan_tasks.write().await.remove(&v);
134 }
135 }
136
137 /// Register an in-flight scan task. The handle is stored so the
138 /// refilter endpoint can abort it before starting a successor.
139 pub async fn register_scan_task(&self, id: ScanId, task: JoinHandle<()>) {
140 self.scan_tasks.write().await.insert(id, task);
141 }
142
143 /// Remove an in-flight scan task entry. Used at the end of
144 /// `crate::scan::run` so the map doesn't accumulate completed tasks.
145 pub async fn forget_scan_task(&self, id: &ScanId) {
146 self.scan_tasks.write().await.remove(id);
147 }
148
149 /// Abort the running task for `id` (if any). Returns true when an
150 /// abort signal was actually sent; false when no live task was
151 /// recorded (already finished, or never started). Doesn't wait for
152 /// the task to observe the abort — `JoinHandle::abort` is
153 /// non-blocking and the caller continues immediately.
154 pub async fn abort_scan(&self, id: &ScanId) -> bool {
155 let task = self.scan_tasks.write().await.remove(id);
156 task.is_some_and(|t| {
157 t.abort();
158 true
159 })
160 }
161
162 /// Look up a scan by ID, cloning the handle (cheap — `Arc` inside).
163 pub async fn get_scan(&self, id: &ScanId) -> Option<ScanHandle> {
164 self.scans.read().await.get(id).cloned()
165 }
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171 use crate::scan::{FinishedScan, Summary};
172
173 fn client() -> Client {
174 Client::builder().build().expect("default client")
175 }
176
177 #[tokio::test]
178 async fn evicts_oldest_finished_when_over_capacity() {
179 let state = AppState::new(Vec::new(), client(), 2);
180
181 let id_a = ScanId::from("aaaaaaaaaaaa".to_owned());
182 let handle_a = ScanHandle::new("a", 0, 4);
183 handle_a
184 .publish(FinishedScan {
185 summary: Summary::default(),
186 outcomes: Vec::new(),
187 elapsed_ms: 0,
188 })
189 .await;
190 state.insert_scan(id_a.clone(), handle_a).await;
191
192 let id_b = ScanId::from("bbbbbbbbbbbb".to_owned());
193 state
194 .insert_scan(id_b.clone(), ScanHandle::new("b", 0, 4))
195 .await;
196
197 // Capacity is 2; both fit.
198 assert!(state.get_scan(&id_a).await.is_some());
199 assert!(state.get_scan(&id_b).await.is_some());
200
201 // Inserting a third evicts the finished one (a) over the
202 // running one (b).
203 let id_c = ScanId::from("cccccccccccc".to_owned());
204 state
205 .insert_scan(id_c.clone(), ScanHandle::new("c", 0, 4))
206 .await;
207
208 assert!(
209 state.get_scan(&id_a).await.is_none(),
210 "finished scan should be evicted first"
211 );
212 assert!(state.get_scan(&id_b).await.is_some());
213 assert!(state.get_scan(&id_c).await.is_some());
214 }
215}