Skip to main content

adler_core/
check.rs

1//! Verdict types produced when a site is probed.
2
3use std::collections::BTreeMap;
4use std::fmt;
5
6use serde::{Deserialize, Serialize};
7
8/// Outcome of a single site probe.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
10#[serde(rename_all = "snake_case")]
11pub enum MatchKind {
12    /// The account exists on this site.
13    Found,
14    /// The account does not exist on this site.
15    NotFound,
16    /// The response was inconclusive (network error, unexpected status,
17    /// ambiguous content). Reported separately so the user can review them
18    /// rather than silently dropping signal.
19    Uncertain,
20}
21
22impl MatchKind {
23    /// True if the verdict represents a positive (existing) account.
24    pub const fn is_found(self) -> bool {
25        matches!(self, Self::Found)
26    }
27}
28
29/// Why a probe was inconclusive.
30///
31/// `Uncertain` outcomes carry a typed reason rather than a free-form string,
32/// so logic that reacts to specific cases (e.g. retry on a transient ban)
33/// matches an enum variant instead of a fragile string. The [`fmt::Display`]
34/// rendering is what the CLI prints; serialization is the externally-tagged
35/// default (unit variants → a `snake_case` string, detail-carrying variants →
36/// `{ "network": "…" }`).
37#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
38#[serde(rename_all = "snake_case")]
39pub enum UncertainReason {
40    /// HTTP 429, or 503 with a `Retry-After` header.
41    RateLimited,
42    /// A Cloudflare interstitial / "checking your browser" page.
43    CloudflareChallenge,
44    /// A captcha gate.
45    Captcha,
46    /// The path is disallowed by the host's `robots.txt` (`--respect-robots`).
47    RobotsDisallowed,
48    /// The scan deadline elapsed before this site finished.
49    Deadline,
50    /// The executor's scheduler was closed (does not happen in practice).
51    SchedulerClosed,
52    /// A transport/network error while issuing the request.
53    Network(String),
54    /// An error reading the response body.
55    BodyRead(String),
56    /// A `bot-protected` site needed the browser backend but the per-scan
57    /// `--browser-budget` cap was already spent on earlier sites.
58    BrowserBudget,
59    /// The username doesn't satisfy the site's `regex_check`
60    /// (e.g. too short, contains forbidden characters). Reported
61    /// without issuing any HTTP request — saves both network and the
62    /// false-positive class where the site 404s on illegal usernames
63    /// in ways our signal can't tell apart from a missing account.
64    UsernameNotAllowed,
65    /// The browser backend itself failed (timeout, navigation error,
66    /// session drop, …) for a `bot-protected` site.
67    BrowserFailed(String),
68    /// The site's [`AccessPolicy`](crate::AccessPolicy) requires an
69    /// egress (country / IP type) that no configured proxy in the pool
70    /// satisfies, so the probe was skipped rather than fetched from the
71    /// wrong location. "Couldn't reach from the required geo" is not
72    /// "account absent" — hence `Uncertain`, never `NotFound`.
73    GeoUnavailable,
74    /// The site's [`AccessPolicy`](crate::AccessPolicy) names a session
75    /// (`access.session`) that wasn't supplied, so the probe was skipped
76    /// rather than sent unauthenticated into a login wall — which reads
77    /// the same for an existing and a missing account.
78    SessionRequired,
79    /// Any other reason (e.g. a `doctor` pre-flight skip).
80    Other(String),
81}
82
83impl fmt::Display for UncertainReason {
84    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
85        match self {
86            Self::RateLimited => f.write_str("rate_limited"),
87            Self::CloudflareChallenge => f.write_str("cloudflare_challenge"),
88            Self::Captcha => f.write_str("captcha"),
89            Self::RobotsDisallowed => f.write_str("robots_disallowed"),
90            Self::Deadline => f.write_str("deadline reached"),
91            Self::SchedulerClosed => f.write_str("scheduler closed"),
92            Self::Network(detail) => write!(f, "request: {detail}"),
93            Self::BodyRead(detail) => write!(f, "body read: {detail}"),
94            Self::BrowserBudget => f.write_str("browser_budget_exceeded"),
95            Self::UsernameNotAllowed => f.write_str("username_not_allowed"),
96            Self::BrowserFailed(detail) => write!(f, "browser: {detail}"),
97            Self::GeoUnavailable => f.write_str("geo_unavailable"),
98            Self::SessionRequired => f.write_str("session_required"),
99            Self::Other(detail) => f.write_str(detail),
100        }
101    }
102}
103
104/// Result of probing a single site for a username.
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct CheckOutcome {
107    /// Site name (matches `Site::name`).
108    pub site: String,
109    /// Concrete URL that was requested.
110    pub url: String,
111    /// Verdict produced by the site's detection strategy.
112    pub kind: MatchKind,
113    /// Why the outcome is `Uncertain`, if it is. `None` for `Found` /
114    /// `NotFound`.
115    #[serde(default, skip_serializing_if = "Option::is_none")]
116    pub reason: Option<UncertainReason>,
117    /// Wall-clock duration of the probe.
118    pub elapsed_ms: u64,
119    /// Fields extracted from a `Found` profile when `--enrich` is active
120    /// (e.g. `name`, `bio`, `avatar`). Empty unless enrichment ran and the
121    /// site has extractor rules. Ordered by field name.
122    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
123    pub enrichment: BTreeMap<String, String>,
124    /// Human-readable descriptions of the signals that produced the verdict —
125    /// e.g. `"HTTP 404 (status_not_found)"`. Empty for `Uncertain` (no signal
126    /// fired). Surfaced by `--explain`; always present in JSON output.
127    #[serde(default, skip_serializing_if = "Vec::is_empty")]
128    pub evidence: Vec<String>,
129    /// Which transport produced this outcome (HTTP / impersonate / browser).
130    /// `None` only on outcomes from older persisted scans saved before this
131    /// field existed; live scans always populate it.
132    #[serde(default, skip_serializing_if = "Option::is_none")]
133    pub transport: Option<crate::escalation::TransportTier>,
134    /// Number of *automatic* escalations to a heavier transport beyond the
135    /// site's primary route — usually 0, at most 1 today (HTTP / impersonate
136    /// → browser on `Uncertain(CloudflareChallenge | RateLimited)`).
137    /// Stamped so the doctor can spot sites where the primary route
138    /// systematically fails and the registry should pre-tag them.
139    #[serde(default, skip_serializing_if = "is_zero_u8")]
140    pub escalations: u8,
141}
142
143#[allow(clippy::trivially_copy_pass_by_ref)]
144fn is_zero_u8(n: &u8) -> bool {
145    *n == 0
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151
152    #[test]
153    fn match_kind_serialises_snake_case() {
154        assert_eq!(
155            serde_json::to_string(&MatchKind::Found).unwrap(),
156            "\"found\""
157        );
158        assert_eq!(
159            serde_json::to_string(&MatchKind::NotFound).unwrap(),
160            "\"not_found\""
161        );
162        assert_eq!(
163            serde_json::to_string(&MatchKind::Uncertain).unwrap(),
164            "\"uncertain\""
165        );
166    }
167
168    #[test]
169    fn match_kind_is_found() {
170        assert!(MatchKind::Found.is_found());
171        assert!(!MatchKind::NotFound.is_found());
172        assert!(!MatchKind::Uncertain.is_found());
173    }
174
175    #[test]
176    fn outcome_skips_absent_reason() {
177        let outcome = CheckOutcome {
178            site: "GitHub".into(),
179            url: "https://github.com/alice".into(),
180            kind: MatchKind::Found,
181            reason: None,
182            elapsed_ms: 42,
183            enrichment: BTreeMap::new(),
184            evidence: Vec::new(),
185            transport: None,
186            escalations: 0,
187        };
188        let json = serde_json::to_string(&outcome).unwrap();
189        assert!(
190            !json.contains("reason"),
191            "reason field must be omitted when None"
192        );
193        assert!(
194            !json.contains("enrichment"),
195            "enrichment must be omitted when empty"
196        );
197        assert!(
198            !json.contains("transport"),
199            "transport must be omitted when None"
200        );
201        assert!(
202            !json.contains("escalations"),
203            "escalations must be omitted when zero"
204        );
205        assert!(json.contains("\"kind\":\"found\""));
206        assert!(json.contains("\"elapsed_ms\":42"));
207    }
208
209    #[test]
210    fn unit_reason_serialises_as_snake_case_string() {
211        let outcome = CheckOutcome {
212            site: "GitHub".into(),
213            url: "https://github.com/alice".into(),
214            kind: MatchKind::Uncertain,
215            reason: Some(UncertainReason::RateLimited),
216            elapsed_ms: 5_000,
217            enrichment: BTreeMap::new(),
218            evidence: Vec::new(),
219            transport: None,
220            escalations: 0,
221        };
222        let json = serde_json::to_string(&outcome).unwrap();
223        assert!(json.contains("\"reason\":\"rate_limited\""), "{json}");
224    }
225
226    #[test]
227    fn detail_reason_serialises_as_tagged_object() {
228        let json = serde_json::to_string(&UncertainReason::Network("refused".into())).unwrap();
229        assert_eq!(json, "{\"network\":\"refused\"}");
230    }
231
232    #[test]
233    fn reason_display_matches_legacy_note_text() {
234        assert_eq!(UncertainReason::RateLimited.to_string(), "rate_limited");
235        assert_eq!(UncertainReason::Deadline.to_string(), "deadline reached");
236        assert_eq!(
237            UncertainReason::Network("boom".into()).to_string(),
238            "request: boom"
239        );
240    }
241}