adler_core/check.rs
1//! Verdict types produced when a site is probed.
2
3use std::collections::BTreeMap;
4use std::fmt;
5
6use serde::{Deserialize, Serialize};
7
8/// Outcome of a single site probe.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
10#[serde(rename_all = "snake_case")]
11pub enum MatchKind {
12 /// The account exists on this site.
13 Found,
14 /// The account does not exist on this site.
15 NotFound,
16 /// The response was inconclusive (network error, unexpected status,
17 /// ambiguous content). Reported separately so the user can review them
18 /// rather than silently dropping signal.
19 Uncertain,
20}
21
22impl MatchKind {
23 /// True if the verdict represents a positive (existing) account.
24 pub const fn is_found(self) -> bool {
25 matches!(self, Self::Found)
26 }
27}
28
29/// Why a probe was inconclusive.
30///
31/// `Uncertain` outcomes carry a typed reason rather than a free-form string,
32/// so logic that reacts to specific cases (e.g. retry on a transient ban)
33/// matches an enum variant instead of a fragile string. The [`fmt::Display`]
34/// rendering is what the CLI prints; serialization is the externally-tagged
35/// default (unit variants → a `snake_case` string, detail-carrying variants →
36/// `{ "network": "…" }`).
37#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
38#[serde(rename_all = "snake_case")]
39pub enum UncertainReason {
40 /// HTTP 429, or 503 with a `Retry-After` header.
41 RateLimited,
42 /// A Cloudflare interstitial / "checking your browser" page.
43 CloudflareChallenge,
44 /// A captcha gate.
45 Captcha,
46 /// The path is disallowed by the host's `robots.txt` (`--respect-robots`).
47 RobotsDisallowed,
48 /// The scan deadline elapsed before this site finished.
49 Deadline,
50 /// The executor's scheduler was closed (does not happen in practice).
51 SchedulerClosed,
52 /// A transport/network error while issuing the request.
53 Network(String),
54 /// An error reading the response body.
55 BodyRead(String),
56 /// A `bot-protected` site needed the browser backend but the per-scan
57 /// `--browser-budget` cap was already spent on earlier sites.
58 BrowserBudget,
59 /// The username doesn't satisfy the site's `regex_check`
60 /// (e.g. too short, contains forbidden characters). Reported
61 /// without issuing any HTTP request — saves both network and the
62 /// false-positive class where the site 404s on illegal usernames
63 /// in ways our signal can't tell apart from a missing account.
64 UsernameNotAllowed,
65 /// The browser backend itself failed (timeout, navigation error,
66 /// session drop, …) for a `bot-protected` site.
67 BrowserFailed(String),
68 /// The site's [`AccessPolicy`](crate::AccessPolicy) requires an
69 /// egress (country / IP type) that no configured proxy in the pool
70 /// satisfies, so the probe was skipped rather than fetched from the
71 /// wrong location. "Couldn't reach from the required geo" is not
72 /// "account absent" — hence `Uncertain`, never `NotFound`.
73 GeoUnavailable,
74 /// The site's [`AccessPolicy`](crate::AccessPolicy) names a session
75 /// (`access.session`) that wasn't supplied, so the probe was skipped
76 /// rather than sent unauthenticated into a login wall — which reads
77 /// the same for an existing and a missing account.
78 SessionRequired,
79 /// Any other reason (e.g. a `doctor` pre-flight skip).
80 Other(String),
81}
82
83impl fmt::Display for UncertainReason {
84 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
85 match self {
86 Self::RateLimited => f.write_str("rate_limited"),
87 Self::CloudflareChallenge => f.write_str("cloudflare_challenge"),
88 Self::Captcha => f.write_str("captcha"),
89 Self::RobotsDisallowed => f.write_str("robots_disallowed"),
90 Self::Deadline => f.write_str("deadline reached"),
91 Self::SchedulerClosed => f.write_str("scheduler closed"),
92 Self::Network(detail) => write!(f, "request: {detail}"),
93 Self::BodyRead(detail) => write!(f, "body read: {detail}"),
94 Self::BrowserBudget => f.write_str("browser_budget_exceeded"),
95 Self::UsernameNotAllowed => f.write_str("username_not_allowed"),
96 Self::BrowserFailed(detail) => write!(f, "browser: {detail}"),
97 Self::GeoUnavailable => f.write_str("geo_unavailable"),
98 Self::SessionRequired => f.write_str("session_required"),
99 Self::Other(detail) => f.write_str(detail),
100 }
101 }
102}
103
104/// Result of probing a single site for a username.
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct CheckOutcome {
107 /// Site name (matches `Site::name`).
108 pub site: String,
109 /// Concrete URL that was requested.
110 pub url: String,
111 /// Verdict produced by the site's detection strategy.
112 pub kind: MatchKind,
113 /// Why the outcome is `Uncertain`, if it is. `None` for `Found` /
114 /// `NotFound`.
115 #[serde(default, skip_serializing_if = "Option::is_none")]
116 pub reason: Option<UncertainReason>,
117 /// Wall-clock duration of the probe.
118 pub elapsed_ms: u64,
119 /// Fields extracted from a `Found` profile when `--enrich` is active
120 /// (e.g. `name`, `bio`, `avatar`). Empty unless enrichment ran and the
121 /// site has extractor rules. Ordered by field name.
122 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
123 pub enrichment: BTreeMap<String, String>,
124 /// Human-readable descriptions of the signals that produced the verdict —
125 /// e.g. `"HTTP 404 (status_not_found)"`. Empty for `Uncertain` (no signal
126 /// fired). Surfaced by `--explain`; always present in JSON output.
127 #[serde(default, skip_serializing_if = "Vec::is_empty")]
128 pub evidence: Vec<String>,
129 /// Which transport produced this outcome (HTTP / impersonate / browser).
130 /// `None` only on outcomes from older persisted scans saved before this
131 /// field existed; live scans always populate it.
132 #[serde(default, skip_serializing_if = "Option::is_none")]
133 pub transport: Option<crate::escalation::TransportTier>,
134 /// Number of *automatic* escalations to a heavier transport beyond the
135 /// site's primary route — usually 0, at most 1 today (HTTP / impersonate
136 /// → browser on `Uncertain(CloudflareChallenge | RateLimited)`).
137 /// Stamped so the doctor can spot sites where the primary route
138 /// systematically fails and the registry should pre-tag them.
139 #[serde(default, skip_serializing_if = "is_zero_u8")]
140 pub escalations: u8,
141}
142
143#[allow(clippy::trivially_copy_pass_by_ref)]
144fn is_zero_u8(n: &u8) -> bool {
145 *n == 0
146}
147
148#[cfg(test)]
149mod tests {
150 use super::*;
151
152 #[test]
153 fn match_kind_serialises_snake_case() {
154 assert_eq!(
155 serde_json::to_string(&MatchKind::Found).unwrap(),
156 "\"found\""
157 );
158 assert_eq!(
159 serde_json::to_string(&MatchKind::NotFound).unwrap(),
160 "\"not_found\""
161 );
162 assert_eq!(
163 serde_json::to_string(&MatchKind::Uncertain).unwrap(),
164 "\"uncertain\""
165 );
166 }
167
168 #[test]
169 fn match_kind_is_found() {
170 assert!(MatchKind::Found.is_found());
171 assert!(!MatchKind::NotFound.is_found());
172 assert!(!MatchKind::Uncertain.is_found());
173 }
174
175 #[test]
176 fn outcome_skips_absent_reason() {
177 let outcome = CheckOutcome {
178 site: "GitHub".into(),
179 url: "https://github.com/alice".into(),
180 kind: MatchKind::Found,
181 reason: None,
182 elapsed_ms: 42,
183 enrichment: BTreeMap::new(),
184 evidence: Vec::new(),
185 transport: None,
186 escalations: 0,
187 };
188 let json = serde_json::to_string(&outcome).unwrap();
189 assert!(
190 !json.contains("reason"),
191 "reason field must be omitted when None"
192 );
193 assert!(
194 !json.contains("enrichment"),
195 "enrichment must be omitted when empty"
196 );
197 assert!(
198 !json.contains("transport"),
199 "transport must be omitted when None"
200 );
201 assert!(
202 !json.contains("escalations"),
203 "escalations must be omitted when zero"
204 );
205 assert!(json.contains("\"kind\":\"found\""));
206 assert!(json.contains("\"elapsed_ms\":42"));
207 }
208
209 #[test]
210 fn unit_reason_serialises_as_snake_case_string() {
211 let outcome = CheckOutcome {
212 site: "GitHub".into(),
213 url: "https://github.com/alice".into(),
214 kind: MatchKind::Uncertain,
215 reason: Some(UncertainReason::RateLimited),
216 elapsed_ms: 5_000,
217 enrichment: BTreeMap::new(),
218 evidence: Vec::new(),
219 transport: None,
220 escalations: 0,
221 };
222 let json = serde_json::to_string(&outcome).unwrap();
223 assert!(json.contains("\"reason\":\"rate_limited\""), "{json}");
224 }
225
226 #[test]
227 fn detail_reason_serialises_as_tagged_object() {
228 let json = serde_json::to_string(&UncertainReason::Network("refused".into())).unwrap();
229 assert_eq!(json, "{\"network\":\"refused\"}");
230 }
231
232 #[test]
233 fn reason_display_matches_legacy_note_text() {
234 assert_eq!(UncertainReason::RateLimited.to_string(), "rate_limited");
235 assert_eq!(UncertainReason::Deadline.to_string(), "deadline reached");
236 assert_eq!(
237 UncertainReason::Network("boom".into()).to_string(),
238 "request: boom"
239 );
240 }
241}