Skip to main content

adler_core/
executor.rs

1//! Concurrent fan-out runner for site probes.
2//!
3//! Spawns one task per site and bounds the maximum in-flight count with a
4//! [`Semaphore`]. Tasks are independent — a panic or hang in one site never
5//! blocks results from the rest. Each task self-aborts when the global
6//! deadline (if any) is reached; remaining sites surface as
7//! [`MatchKind::Uncertain`].
8
9use std::num::NonZeroUsize;
10use std::sync::Arc;
11use std::time::Duration;
12
13use tokio::sync::Semaphore;
14use tokio::task::JoinSet;
15use tokio::time::{Instant as TokioInstant, timeout_at};
16
17use crate::check::{CheckOutcome, MatchKind};
18use crate::client::Client;
19use crate::site::Site;
20use crate::username::Username;
21
22/// Default concurrency for [`run`].
23///
24/// Most sites are distinct hosts, so the per-host throttle rarely serialises;
25/// the bottleneck is network round-trips, and 32 in-flight probes keeps the
26/// pipe full without hammering any single host.
27const DEFAULT_CONCURRENCY: NonZeroUsize = match NonZeroUsize::new(32) {
28    Some(n) => n,
29    None => unreachable!(),
30};
31
32/// Tunables for [`run`].
33#[derive(Debug, Clone)]
34#[must_use = "ExecutorOptions does nothing until passed to executor::run"]
35pub struct ExecutorOptions {
36    /// Maximum number of in-flight site probes.
37    pub concurrency: NonZeroUsize,
38    /// Total wall-clock deadline for the entire scan. Sites still in flight
39    /// when this elapses produce [`MatchKind::Uncertain`] outcomes.
40    pub deadline: Option<Duration>,
41}
42
43impl Default for ExecutorOptions {
44    fn default() -> Self {
45        Self {
46            concurrency: DEFAULT_CONCURRENCY,
47            deadline: None,
48        }
49    }
50}
51
52impl ExecutorOptions {
53    /// Override [`Self::concurrency`].
54    pub fn concurrency(mut self, n: NonZeroUsize) -> Self {
55        self.concurrency = n;
56        self
57    }
58
59    /// Set a total scan deadline.
60    pub fn deadline(mut self, d: Duration) -> Self {
61        self.deadline = Some(d);
62        self
63    }
64}
65
66/// Run a fan-out scan over `sites`, returning one outcome per site.
67///
68/// Results come back in completion order (not input order) — sort by name
69/// for stable presentation. A panicking site task is logged at `error` and
70/// silently dropped; transient HTTP failures already become
71/// [`MatchKind::Uncertain`] inside `Client::check`.
72pub async fn run(
73    client: &Client,
74    sites: &[Site],
75    username: &Username,
76    options: ExecutorOptions,
77) -> Vec<CheckOutcome> {
78    run_with_progress(client, sites, username, options, |_| {}).await
79}
80
81/// Variant of [`run`] that invokes `on_outcome` for each completed probe.
82///
83/// Useful for driving a live progress indicator or for emitting streaming
84/// output before the full scan finishes. The callback runs on the executor
85/// task between completions; long work inside it will throttle the loop.
86pub async fn run_with_progress<F>(
87    client: &Client,
88    sites: &[Site],
89    username: &Username,
90    options: ExecutorOptions,
91    mut on_outcome: F,
92) -> Vec<CheckOutcome>
93where
94    F: FnMut(&CheckOutcome),
95{
96    let semaphore = Arc::new(Semaphore::new(options.concurrency.get()));
97    let deadline_at = options.deadline.map(|d| TokioInstant::now() + d);
98    let mut set: JoinSet<CheckOutcome> = JoinSet::new();
99
100    for site in sites {
101        let site = site.clone();
102        let username = username.clone();
103        let client = client.clone();
104        let permits = Arc::clone(&semaphore);
105        set.spawn(async move {
106            let permit = match permits.acquire_owned().await {
107                Ok(p) => p,
108                Err(_closed) => {
109                    return CheckOutcome {
110                        site: site.name.clone(),
111                        url: site.url_for(&username),
112                        kind: MatchKind::Uncertain,
113                        reason: Some(crate::check::UncertainReason::SchedulerClosed),
114                        elapsed_ms: 0,
115                        enrichment: std::collections::BTreeMap::new(),
116                        evidence: Vec::new(),
117                    };
118                }
119            };
120            let probe = client.check(&site, &username);
121            let outcome = match deadline_at {
122                None => probe.await,
123                Some(at) => match timeout_at(at, probe).await {
124                    Ok(o) => o,
125                    Err(_elapsed) => CheckOutcome {
126                        site: site.name.clone(),
127                        url: site.url_for(&username),
128                        kind: MatchKind::Uncertain,
129                        reason: Some(crate::check::UncertainReason::Deadline),
130                        elapsed_ms: 0,
131                        enrichment: std::collections::BTreeMap::new(),
132                        evidence: Vec::new(),
133                    },
134                },
135            };
136            drop(permit);
137            outcome
138        });
139    }
140
141    let mut results = Vec::with_capacity(sites.len());
142    while let Some(joined) = set.join_next().await {
143        match joined {
144            Ok(outcome) => {
145                on_outcome(&outcome);
146                results.push(outcome);
147            }
148            Err(err) if err.is_cancelled() => {
149                tracing::warn!(error = %err, "check task cancelled");
150            }
151            Err(err) => {
152                tracing::error!(error = %err, "check task panicked");
153            }
154        }
155    }
156    results
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162    use crate::site::{Signal, UrlTemplate};
163    use wiremock::matchers::{method, path};
164    use wiremock::{Mock, MockServer, ResponseTemplate};
165
166    /// Test sites are uniformly defined with a Found/NotFound status pair,
167    /// matching how production sites.json migrates from Phase 1.
168    fn site(server: &MockServer, name: &str, segment: &str) -> Site {
169        Site {
170            name: name.into(),
171            url: UrlTemplate::new(format!("{}/{}/{{username}}", server.uri(), segment)).unwrap(),
172            signals: vec![
173                Signal::StatusFound { codes: vec![200] },
174                Signal::StatusNotFound { codes: vec![404] },
175            ],
176            known_present: None,
177            known_absent: None,
178            extract: Vec::new(),
179            tags: Vec::new(),
180            request_headers: std::collections::BTreeMap::new(),
181            regex_check: None,
182            engine: None,
183        }
184    }
185
186    fn fast_client() -> Client {
187        Client::builder()
188            .timeout(Duration::from_secs(5))
189            // Tests share host 127.0.0.1 — disable throttling so concurrency
190            // assertions actually exercise the executor.
191            .min_request_interval(Duration::ZERO)
192            .build()
193            .unwrap()
194    }
195
196    fn opts_with_concurrency(n: usize) -> ExecutorOptions {
197        ExecutorOptions::default().concurrency(NonZeroUsize::new(n).unwrap())
198    }
199
200    #[tokio::test]
201    async fn runs_all_sites_concurrently() {
202        let server = MockServer::start().await;
203
204        Mock::given(method("GET"))
205            .and(path("/a/alice"))
206            .respond_with(ResponseTemplate::new(200))
207            .mount(&server)
208            .await;
209        Mock::given(method("GET"))
210            .and(path("/b/alice"))
211            .respond_with(ResponseTemplate::new(404))
212            .mount(&server)
213            .await;
214        Mock::given(method("GET"))
215            .and(path("/c/alice"))
216            .respond_with(ResponseTemplate::new(200))
217            .mount(&server)
218            .await;
219
220        let sites = vec![
221            site(&server, "A", "a"),
222            site(&server, "B", "b"),
223            site(&server, "C", "c"),
224        ];
225        let user = Username::new("alice").unwrap();
226        let mut out = run(&fast_client(), &sites, &user, opts_with_concurrency(4)).await;
227        out.sort_by(|a, b| a.site.cmp(&b.site));
228
229        assert_eq!(out.len(), 3);
230        assert_eq!(out[0].kind, MatchKind::Found);
231        assert_eq!(out[1].kind, MatchKind::NotFound);
232        assert_eq!(out[2].kind, MatchKind::Found);
233    }
234
235    #[tokio::test]
236    async fn respects_concurrency_limit() {
237        let server = MockServer::start().await;
238        for i in 0..6 {
239            Mock::given(method("GET"))
240                .and(path(format!("/{i}/alice")))
241                .respond_with(ResponseTemplate::new(200).set_delay(Duration::from_millis(50)))
242                .mount(&server)
243                .await;
244        }
245        let sites: Vec<Site> = (0..6)
246            .map(|i| site(&server, &format!("S{i}"), &i.to_string()))
247            .collect();
248        let user = Username::new("alice").unwrap();
249        let started = std::time::Instant::now();
250        let out = run(&fast_client(), &sites, &user, opts_with_concurrency(2)).await;
251        let elapsed = started.elapsed();
252        assert_eq!(out.len(), 6);
253        // 6 sites / 2 concurrent * 50 ms = 150 ms floor.
254        assert!(
255            elapsed >= Duration::from_millis(120),
256            "expected ≥120 ms, got {elapsed:?}",
257        );
258    }
259
260    #[tokio::test]
261    async fn empty_input_returns_empty() {
262        let user = Username::new("alice").unwrap();
263        let out = run(&fast_client(), &[], &user, opts_with_concurrency(4)).await;
264        assert!(out.is_empty());
265    }
266
267    #[tokio::test]
268    async fn run_with_progress_invokes_callback_per_outcome() {
269        use std::sync::Mutex;
270        let server = MockServer::start().await;
271        Mock::given(method("GET"))
272            .and(path("/a/alice"))
273            .respond_with(ResponseTemplate::new(200))
274            .mount(&server)
275            .await;
276        Mock::given(method("GET"))
277            .and(path("/b/alice"))
278            .respond_with(ResponseTemplate::new(404))
279            .mount(&server)
280            .await;
281        let sites = vec![site(&server, "A", "a"), site(&server, "B", "b")];
282        let user = Username::new("alice").unwrap();
283        let calls = Mutex::new(0);
284        let outcomes = run_with_progress(
285            &fast_client(),
286            &sites,
287            &user,
288            opts_with_concurrency(4),
289            |_| *calls.lock().unwrap() += 1,
290        )
291        .await;
292        assert_eq!(outcomes.len(), 2);
293        assert_eq!(*calls.lock().unwrap(), 2);
294    }
295
296    #[tokio::test]
297    async fn deadline_marks_slow_sites_uncertain() {
298        let server = MockServer::start().await;
299        Mock::given(method("GET"))
300            .and(path("/slow/alice"))
301            .respond_with(ResponseTemplate::new(200).set_delay(Duration::from_secs(2)))
302            .mount(&server)
303            .await;
304        Mock::given(method("GET"))
305            .and(path("/fast/alice"))
306            .respond_with(ResponseTemplate::new(200))
307            .mount(&server)
308            .await;
309        let sites = vec![site(&server, "Slow", "slow"), site(&server, "Fast", "fast")];
310        let user = Username::new("alice").unwrap();
311        let options = ExecutorOptions::default()
312            .concurrency(NonZeroUsize::new(4).unwrap())
313            .deadline(Duration::from_millis(200));
314        let started = std::time::Instant::now();
315        let mut out = run(&fast_client(), &sites, &user, options).await;
316        let elapsed = started.elapsed();
317        out.sort_by(|a, b| a.site.cmp(&b.site));
318
319        assert_eq!(out.len(), 2);
320        // Fast site completed; slow one hit the deadline.
321        let fast = out.iter().find(|o| o.site == "Fast").unwrap();
322        let slow = out.iter().find(|o| o.site == "Slow").unwrap();
323        assert_eq!(fast.kind, MatchKind::Found);
324        assert_eq!(slow.kind, MatchKind::Uncertain);
325        assert_eq!(slow.reason, Some(crate::check::UncertainReason::Deadline));
326        assert!(
327            elapsed < Duration::from_millis(800),
328            "scan should abort near the deadline, got {elapsed:?}",
329        );
330    }
331}