Skip to main content

adler_core/
executor.rs

1//! Concurrent fan-out runner for site probes.
2//!
3//! Spawns one task per site and bounds the maximum in-flight count with a
4//! [`Semaphore`]. Tasks are independent — a panic or hang in one site never
5//! blocks results from the rest. Each task self-aborts when the global
6//! deadline (if any) is reached; remaining sites surface as
7//! [`MatchKind::Uncertain`].
8
9use std::num::NonZeroUsize;
10use std::sync::Arc;
11use std::time::Duration;
12
13use tokio::sync::Semaphore;
14use tokio::task::JoinSet;
15use tokio::time::{Instant as TokioInstant, timeout_at};
16
17use crate::check::{CheckOutcome, MatchKind};
18use crate::client::Client;
19use crate::site::Site;
20use crate::username::Username;
21
22/// Default concurrency for [`run`].
23///
24/// Most sites are distinct hosts, so the per-host throttle rarely serialises;
25/// the bottleneck is network round-trips, and 32 in-flight probes keeps the
26/// pipe full without hammering any single host.
27const DEFAULT_CONCURRENCY: NonZeroUsize = match NonZeroUsize::new(32) {
28    Some(n) => n,
29    None => unreachable!(),
30};
31
32/// Tunables for [`run`].
33#[derive(Debug, Clone)]
34#[must_use = "ExecutorOptions does nothing until passed to executor::run"]
35pub struct ExecutorOptions {
36    /// Maximum number of in-flight site probes.
37    pub concurrency: NonZeroUsize,
38    /// Total wall-clock deadline for the entire scan. Sites still in flight
39    /// when this elapses produce [`MatchKind::Uncertain`] outcomes.
40    pub deadline: Option<Duration>,
41}
42
43impl Default for ExecutorOptions {
44    fn default() -> Self {
45        Self {
46            concurrency: DEFAULT_CONCURRENCY,
47            deadline: None,
48        }
49    }
50}
51
52impl ExecutorOptions {
53    /// Override [`Self::concurrency`].
54    pub fn concurrency(mut self, n: NonZeroUsize) -> Self {
55        self.concurrency = n;
56        self
57    }
58
59    /// Set a total scan deadline.
60    pub fn deadline(mut self, d: Duration) -> Self {
61        self.deadline = Some(d);
62        self
63    }
64}
65
66/// Run a fan-out scan over `sites`, returning one outcome per site.
67///
68/// Results come back in completion order (not input order) — sort by name
69/// for stable presentation. A panicking site task is logged at `error` and
70/// silently dropped; transient HTTP failures already become
71/// [`MatchKind::Uncertain`] inside `Client::check`.
72pub async fn run(
73    client: &Client,
74    sites: &[Site],
75    username: &Username,
76    options: ExecutorOptions,
77) -> Vec<CheckOutcome> {
78    run_with_progress(client, sites, username, options, |_| {}).await
79}
80
81/// Variant of [`run`] that invokes `on_outcome` for each completed probe.
82///
83/// Useful for driving a live progress indicator or for emitting streaming
84/// output before the full scan finishes. The callback runs on the executor
85/// task between completions; long work inside it will throttle the loop.
86pub async fn run_with_progress<F>(
87    client: &Client,
88    sites: &[Site],
89    username: &Username,
90    options: ExecutorOptions,
91    mut on_outcome: F,
92) -> Vec<CheckOutcome>
93where
94    F: FnMut(&CheckOutcome),
95{
96    let semaphore = Arc::new(Semaphore::new(options.concurrency.get()));
97    let deadline_at = options.deadline.map(|d| TokioInstant::now() + d);
98    let mut set: JoinSet<CheckOutcome> = JoinSet::new();
99
100    for site in sites {
101        let site = site.clone();
102        let username = username.clone();
103        let client = client.clone();
104        let permits = Arc::clone(&semaphore);
105        set.spawn(async move {
106            let permit = match permits.acquire_owned().await {
107                Ok(p) => p,
108                Err(_closed) => {
109                    return CheckOutcome {
110                        site: site.name.clone(),
111                        url: site.url_for(&username),
112                        kind: MatchKind::Uncertain,
113                        reason: Some(crate::check::UncertainReason::SchedulerClosed),
114                        elapsed_ms: 0,
115                        enrichment: std::collections::BTreeMap::new(),
116                        evidence: Vec::new(),
117                    };
118                }
119            };
120            let probe = client.check(&site, &username);
121            let outcome = match deadline_at {
122                None => probe.await,
123                Some(at) => match timeout_at(at, probe).await {
124                    Ok(o) => o,
125                    Err(_elapsed) => CheckOutcome {
126                        site: site.name.clone(),
127                        url: site.url_for(&username),
128                        kind: MatchKind::Uncertain,
129                        reason: Some(crate::check::UncertainReason::Deadline),
130                        elapsed_ms: 0,
131                        enrichment: std::collections::BTreeMap::new(),
132                        evidence: Vec::new(),
133                    },
134                },
135            };
136            drop(permit);
137            outcome
138        });
139    }
140
141    let mut results = Vec::with_capacity(sites.len());
142    while let Some(joined) = set.join_next().await {
143        match joined {
144            Ok(outcome) => {
145                on_outcome(&outcome);
146                results.push(outcome);
147            }
148            Err(err) if err.is_cancelled() => {
149                tracing::warn!(error = %err, "check task cancelled");
150            }
151            Err(err) => {
152                tracing::error!(error = %err, "check task panicked");
153            }
154        }
155    }
156    results
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162    use crate::site::{Signal, UrlTemplate};
163    use wiremock::matchers::{any, path};
164    use wiremock::{Mock, MockServer, ResponseTemplate};
165
166    /// Test sites are uniformly defined with a Found/NotFound status pair,
167    /// matching how production sites.json migrates from Phase 1.
168    fn site(server: &MockServer, name: &str, segment: &str) -> Site {
169        Site {
170            name: name.into(),
171            url: UrlTemplate::new(format!("{}/{}/{{username}}", server.uri(), segment)).unwrap(),
172            signals: vec![
173                Signal::StatusFound { codes: vec![200] },
174                Signal::StatusNotFound { codes: vec![404] },
175            ],
176            known_present: None,
177            known_absent: None,
178            extract: Vec::new(),
179            tags: Vec::new(),
180            request_headers: std::collections::BTreeMap::new(),
181            regex_check: None,
182            engine: None,
183            strip_bad_char: None,
184            request_method: crate::site::HttpMethod::Get,
185            request_body: None,
186            protection: Vec::new(),
187            disabled: false,
188            source: None,
189            popularity: None,
190            access: crate::AccessPolicy::default(),
191        }
192    }
193
194    fn fast_client() -> Client {
195        Client::builder()
196            .timeout(Duration::from_secs(5))
197            // Tests share host 127.0.0.1 — disable throttling so concurrency
198            // assertions actually exercise the executor.
199            .min_request_interval(Duration::ZERO)
200            .build()
201            .unwrap()
202    }
203
204    fn opts_with_concurrency(n: usize) -> ExecutorOptions {
205        ExecutorOptions::default().concurrency(NonZeroUsize::new(n).unwrap())
206    }
207
208    #[tokio::test]
209    async fn runs_all_sites_concurrently() {
210        let server = MockServer::start().await;
211
212        Mock::given(any())
213            .and(path("/a/alice"))
214            .respond_with(ResponseTemplate::new(200))
215            .mount(&server)
216            .await;
217        Mock::given(any())
218            .and(path("/b/alice"))
219            .respond_with(ResponseTemplate::new(404))
220            .mount(&server)
221            .await;
222        Mock::given(any())
223            .and(path("/c/alice"))
224            .respond_with(ResponseTemplate::new(200))
225            .mount(&server)
226            .await;
227
228        let sites = vec![
229            site(&server, "A", "a"),
230            site(&server, "B", "b"),
231            site(&server, "C", "c"),
232        ];
233        let user = Username::new("alice").unwrap();
234        let mut out = run(&fast_client(), &sites, &user, opts_with_concurrency(4)).await;
235        out.sort_by(|a, b| a.site.cmp(&b.site));
236
237        assert_eq!(out.len(), 3);
238        assert_eq!(out[0].kind, MatchKind::Found);
239        assert_eq!(out[1].kind, MatchKind::NotFound);
240        assert_eq!(out[2].kind, MatchKind::Found);
241    }
242
243    #[tokio::test]
244    async fn respects_concurrency_limit() {
245        let server = MockServer::start().await;
246        for i in 0..6 {
247            Mock::given(any())
248                .and(path(format!("/{i}/alice")))
249                .respond_with(ResponseTemplate::new(200).set_delay(Duration::from_millis(50)))
250                .mount(&server)
251                .await;
252        }
253        let sites: Vec<Site> = (0..6)
254            .map(|i| site(&server, &format!("S{i}"), &i.to_string()))
255            .collect();
256        let user = Username::new("alice").unwrap();
257        let started = std::time::Instant::now();
258        let out = run(&fast_client(), &sites, &user, opts_with_concurrency(2)).await;
259        let elapsed = started.elapsed();
260        assert_eq!(out.len(), 6);
261        // 6 sites / 2 concurrent * 50 ms = 150 ms floor.
262        assert!(
263            elapsed >= Duration::from_millis(120),
264            "expected ≥120 ms, got {elapsed:?}",
265        );
266    }
267
268    #[tokio::test]
269    async fn empty_input_returns_empty() {
270        let user = Username::new("alice").unwrap();
271        let out = run(&fast_client(), &[], &user, opts_with_concurrency(4)).await;
272        assert!(out.is_empty());
273    }
274
275    #[tokio::test]
276    async fn run_with_progress_invokes_callback_per_outcome() {
277        use std::sync::Mutex;
278        let server = MockServer::start().await;
279        Mock::given(any())
280            .and(path("/a/alice"))
281            .respond_with(ResponseTemplate::new(200))
282            .mount(&server)
283            .await;
284        Mock::given(any())
285            .and(path("/b/alice"))
286            .respond_with(ResponseTemplate::new(404))
287            .mount(&server)
288            .await;
289        let sites = vec![site(&server, "A", "a"), site(&server, "B", "b")];
290        let user = Username::new("alice").unwrap();
291        let calls = Mutex::new(0);
292        let outcomes = run_with_progress(
293            &fast_client(),
294            &sites,
295            &user,
296            opts_with_concurrency(4),
297            |_| *calls.lock().unwrap() += 1,
298        )
299        .await;
300        assert_eq!(outcomes.len(), 2);
301        assert_eq!(*calls.lock().unwrap(), 2);
302    }
303
304    #[tokio::test]
305    async fn deadline_marks_slow_sites_uncertain() {
306        let server = MockServer::start().await;
307        Mock::given(any())
308            .and(path("/slow/alice"))
309            .respond_with(ResponseTemplate::new(200).set_delay(Duration::from_secs(2)))
310            .mount(&server)
311            .await;
312        Mock::given(any())
313            .and(path("/fast/alice"))
314            .respond_with(ResponseTemplate::new(200))
315            .mount(&server)
316            .await;
317        let sites = vec![site(&server, "Slow", "slow"), site(&server, "Fast", "fast")];
318        let user = Username::new("alice").unwrap();
319        let options = ExecutorOptions::default()
320            .concurrency(NonZeroUsize::new(4).unwrap())
321            .deadline(Duration::from_millis(200));
322        let started = std::time::Instant::now();
323        let mut out = run(&fast_client(), &sites, &user, options).await;
324        let elapsed = started.elapsed();
325        out.sort_by(|a, b| a.site.cmp(&b.site));
326
327        assert_eq!(out.len(), 2);
328        // Fast site completed; slow one hit the deadline.
329        let fast = out.iter().find(|o| o.site == "Fast").unwrap();
330        let slow = out.iter().find(|o| o.site == "Slow").unwrap();
331        assert_eq!(fast.kind, MatchKind::Found);
332        assert_eq!(slow.kind, MatchKind::Uncertain);
333        assert_eq!(slow.reason, Some(crate::check::UncertainReason::Deadline));
334        assert!(
335            elapsed < Duration::from_millis(800),
336            "scan should abort near the deadline, got {elapsed:?}",
337        );
338    }
339}