Skip to main content

adler_core/
executor.rs

1//! Concurrent fan-out runner for site probes.
2//!
3//! Spawns one task per site and bounds the maximum in-flight count with a
4//! [`Semaphore`]. Tasks are independent — a panic or hang in one site never
5//! blocks results from the rest. Each task self-aborts when the global
6//! deadline (if any) is reached; remaining sites surface as
7//! [`MatchKind::Uncertain`].
8
9use std::num::NonZeroUsize;
10use std::sync::Arc;
11use std::time::Duration;
12
13use tokio::sync::Semaphore;
14use tokio::task::JoinSet;
15use tokio::time::{Instant as TokioInstant, timeout_at};
16
17use crate::check::{CheckOutcome, MatchKind};
18use crate::client::Client;
19use crate::site::Site;
20use crate::username::Username;
21
22/// Default concurrency for [`run`].
23///
24/// Most sites are distinct hosts, so the per-host throttle rarely serialises;
25/// the bottleneck is network round-trips, and 32 in-flight probes keeps the
26/// pipe full without hammering any single host.
27const DEFAULT_CONCURRENCY: NonZeroUsize = match NonZeroUsize::new(32) {
28    Some(n) => n,
29    None => unreachable!(),
30};
31
32/// Tunables for [`run`].
33#[derive(Debug, Clone)]
34#[must_use = "ExecutorOptions does nothing until passed to executor::run"]
35pub struct ExecutorOptions {
36    /// Maximum number of in-flight site probes.
37    pub concurrency: NonZeroUsize,
38    /// Total wall-clock deadline for the entire scan. Sites still in flight
39    /// when this elapses produce [`MatchKind::Uncertain`] outcomes.
40    pub deadline: Option<Duration>,
41}
42
43impl Default for ExecutorOptions {
44    fn default() -> Self {
45        Self {
46            concurrency: DEFAULT_CONCURRENCY,
47            deadline: None,
48        }
49    }
50}
51
52impl ExecutorOptions {
53    /// Override [`Self::concurrency`].
54    pub fn concurrency(mut self, n: NonZeroUsize) -> Self {
55        self.concurrency = n;
56        self
57    }
58
59    /// Set a total scan deadline.
60    pub fn deadline(mut self, d: Duration) -> Self {
61        self.deadline = Some(d);
62        self
63    }
64}
65
66/// Run a fan-out scan over `sites`, returning one outcome per site.
67///
68/// Results come back in completion order (not input order) — sort by name
69/// for stable presentation. A panicking site task is logged at `error` and
70/// silently dropped; transient HTTP failures already become
71/// [`MatchKind::Uncertain`] inside `Client::check`.
72pub async fn run(
73    client: &Client,
74    sites: &[Site],
75    username: &Username,
76    options: ExecutorOptions,
77) -> Vec<CheckOutcome> {
78    run_with_progress(client, sites, username, options, |_| {}).await
79}
80
81/// Variant of [`run`] that invokes `on_outcome` for each completed probe.
82///
83/// Useful for driving a live progress indicator or for emitting streaming
84/// output before the full scan finishes. The callback runs on the executor
85/// task between completions; long work inside it will throttle the loop.
86pub async fn run_with_progress<F>(
87    client: &Client,
88    sites: &[Site],
89    username: &Username,
90    options: ExecutorOptions,
91    mut on_outcome: F,
92) -> Vec<CheckOutcome>
93where
94    F: FnMut(&CheckOutcome),
95{
96    let semaphore = Arc::new(Semaphore::new(options.concurrency.get()));
97    let deadline_at = options.deadline.map(|d| TokioInstant::now() + d);
98    let mut set: JoinSet<CheckOutcome> = JoinSet::new();
99
100    for site in sites {
101        let site = site.clone();
102        let username = username.clone();
103        let client = client.clone();
104        let permits = Arc::clone(&semaphore);
105        set.spawn(async move {
106            let permit = match permits.acquire_owned().await {
107                Ok(p) => p,
108                Err(_closed) => {
109                    return CheckOutcome {
110                        site: site.name.clone(),
111                        url: site.url_for(&username),
112                        kind: MatchKind::Uncertain,
113                        reason: Some(crate::check::UncertainReason::SchedulerClosed),
114                        elapsed_ms: 0,
115                        enrichment: std::collections::BTreeMap::new(),
116                        evidence: Vec::new(),
117                    };
118                }
119            };
120            let probe = client.check(&site, &username);
121            let outcome = match deadline_at {
122                None => probe.await,
123                Some(at) => match timeout_at(at, probe).await {
124                    Ok(o) => o,
125                    Err(_elapsed) => CheckOutcome {
126                        site: site.name.clone(),
127                        url: site.url_for(&username),
128                        kind: MatchKind::Uncertain,
129                        reason: Some(crate::check::UncertainReason::Deadline),
130                        elapsed_ms: 0,
131                        enrichment: std::collections::BTreeMap::new(),
132                        evidence: Vec::new(),
133                    },
134                },
135            };
136            drop(permit);
137            outcome
138        });
139    }
140
141    let mut results = Vec::with_capacity(sites.len());
142    while let Some(joined) = set.join_next().await {
143        match joined {
144            Ok(outcome) => {
145                on_outcome(&outcome);
146                results.push(outcome);
147            }
148            Err(err) if err.is_cancelled() => {
149                tracing::warn!(error = %err, "check task cancelled");
150            }
151            Err(err) => {
152                tracing::error!(error = %err, "check task panicked");
153            }
154        }
155    }
156    results
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162    use crate::site::{Signal, UrlTemplate};
163    use wiremock::matchers::{any, path};
164    use wiremock::{Mock, MockServer, ResponseTemplate};
165
166    /// Test sites are uniformly defined with a Found/NotFound status pair,
167    /// matching how production sites.json migrates from Phase 1.
168    fn site(server: &MockServer, name: &str, segment: &str) -> Site {
169        Site {
170            name: name.into(),
171            url: UrlTemplate::new(format!("{}/{}/{{username}}", server.uri(), segment)).unwrap(),
172            signals: vec![
173                Signal::StatusFound { codes: vec![200] },
174                Signal::StatusNotFound { codes: vec![404] },
175            ],
176            known_present: None,
177            known_absent: None,
178            extract: Vec::new(),
179            tags: Vec::new(),
180            request_headers: std::collections::BTreeMap::new(),
181            regex_check: None,
182            engine: None,
183            strip_bad_char: None,
184            request_method: crate::site::HttpMethod::Get,
185            request_body: None,
186            protection: Vec::new(),
187            disabled: false,
188            source: None,
189            popularity: None,
190        }
191    }
192
193    fn fast_client() -> Client {
194        Client::builder()
195            .timeout(Duration::from_secs(5))
196            // Tests share host 127.0.0.1 — disable throttling so concurrency
197            // assertions actually exercise the executor.
198            .min_request_interval(Duration::ZERO)
199            .build()
200            .unwrap()
201    }
202
203    fn opts_with_concurrency(n: usize) -> ExecutorOptions {
204        ExecutorOptions::default().concurrency(NonZeroUsize::new(n).unwrap())
205    }
206
207    #[tokio::test]
208    async fn runs_all_sites_concurrently() {
209        let server = MockServer::start().await;
210
211        Mock::given(any())
212            .and(path("/a/alice"))
213            .respond_with(ResponseTemplate::new(200))
214            .mount(&server)
215            .await;
216        Mock::given(any())
217            .and(path("/b/alice"))
218            .respond_with(ResponseTemplate::new(404))
219            .mount(&server)
220            .await;
221        Mock::given(any())
222            .and(path("/c/alice"))
223            .respond_with(ResponseTemplate::new(200))
224            .mount(&server)
225            .await;
226
227        let sites = vec![
228            site(&server, "A", "a"),
229            site(&server, "B", "b"),
230            site(&server, "C", "c"),
231        ];
232        let user = Username::new("alice").unwrap();
233        let mut out = run(&fast_client(), &sites, &user, opts_with_concurrency(4)).await;
234        out.sort_by(|a, b| a.site.cmp(&b.site));
235
236        assert_eq!(out.len(), 3);
237        assert_eq!(out[0].kind, MatchKind::Found);
238        assert_eq!(out[1].kind, MatchKind::NotFound);
239        assert_eq!(out[2].kind, MatchKind::Found);
240    }
241
242    #[tokio::test]
243    async fn respects_concurrency_limit() {
244        let server = MockServer::start().await;
245        for i in 0..6 {
246            Mock::given(any())
247                .and(path(format!("/{i}/alice")))
248                .respond_with(ResponseTemplate::new(200).set_delay(Duration::from_millis(50)))
249                .mount(&server)
250                .await;
251        }
252        let sites: Vec<Site> = (0..6)
253            .map(|i| site(&server, &format!("S{i}"), &i.to_string()))
254            .collect();
255        let user = Username::new("alice").unwrap();
256        let started = std::time::Instant::now();
257        let out = run(&fast_client(), &sites, &user, opts_with_concurrency(2)).await;
258        let elapsed = started.elapsed();
259        assert_eq!(out.len(), 6);
260        // 6 sites / 2 concurrent * 50 ms = 150 ms floor.
261        assert!(
262            elapsed >= Duration::from_millis(120),
263            "expected ≥120 ms, got {elapsed:?}",
264        );
265    }
266
267    #[tokio::test]
268    async fn empty_input_returns_empty() {
269        let user = Username::new("alice").unwrap();
270        let out = run(&fast_client(), &[], &user, opts_with_concurrency(4)).await;
271        assert!(out.is_empty());
272    }
273
274    #[tokio::test]
275    async fn run_with_progress_invokes_callback_per_outcome() {
276        use std::sync::Mutex;
277        let server = MockServer::start().await;
278        Mock::given(any())
279            .and(path("/a/alice"))
280            .respond_with(ResponseTemplate::new(200))
281            .mount(&server)
282            .await;
283        Mock::given(any())
284            .and(path("/b/alice"))
285            .respond_with(ResponseTemplate::new(404))
286            .mount(&server)
287            .await;
288        let sites = vec![site(&server, "A", "a"), site(&server, "B", "b")];
289        let user = Username::new("alice").unwrap();
290        let calls = Mutex::new(0);
291        let outcomes = run_with_progress(
292            &fast_client(),
293            &sites,
294            &user,
295            opts_with_concurrency(4),
296            |_| *calls.lock().unwrap() += 1,
297        )
298        .await;
299        assert_eq!(outcomes.len(), 2);
300        assert_eq!(*calls.lock().unwrap(), 2);
301    }
302
303    #[tokio::test]
304    async fn deadline_marks_slow_sites_uncertain() {
305        let server = MockServer::start().await;
306        Mock::given(any())
307            .and(path("/slow/alice"))
308            .respond_with(ResponseTemplate::new(200).set_delay(Duration::from_secs(2)))
309            .mount(&server)
310            .await;
311        Mock::given(any())
312            .and(path("/fast/alice"))
313            .respond_with(ResponseTemplate::new(200))
314            .mount(&server)
315            .await;
316        let sites = vec![site(&server, "Slow", "slow"), site(&server, "Fast", "fast")];
317        let user = Username::new("alice").unwrap();
318        let options = ExecutorOptions::default()
319            .concurrency(NonZeroUsize::new(4).unwrap())
320            .deadline(Duration::from_millis(200));
321        let started = std::time::Instant::now();
322        let mut out = run(&fast_client(), &sites, &user, options).await;
323        let elapsed = started.elapsed();
324        out.sort_by(|a, b| a.site.cmp(&b.site));
325
326        assert_eq!(out.len(), 2);
327        // Fast site completed; slow one hit the deadline.
328        let fast = out.iter().find(|o| o.site == "Fast").unwrap();
329        let slow = out.iter().find(|o| o.site == "Slow").unwrap();
330        assert_eq!(fast.kind, MatchKind::Found);
331        assert_eq!(slow.kind, MatchKind::Uncertain);
332        assert_eq!(slow.reason, Some(crate::check::UncertainReason::Deadline));
333        assert!(
334            elapsed < Duration::from_millis(800),
335            "scan should abort near the deadline, got {elapsed:?}",
336        );
337    }
338}