1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
//! Verdict types produced when a site is probed.
use std::collections::BTreeMap;
use std::fmt;
use serde::{Deserialize, Serialize};
/// Outcome of a single site probe.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum MatchKind {
/// The account exists on this site.
Found,
/// The account does not exist on this site.
NotFound,
/// The response was inconclusive (network error, unexpected status,
/// ambiguous content). Reported separately so the user can review them
/// rather than silently dropping signal.
Uncertain,
}
impl MatchKind {
/// True if the verdict represents a positive (existing) account.
pub const fn is_found(self) -> bool {
matches!(self, Self::Found)
}
}
/// Why a probe was inconclusive.
///
/// `Uncertain` outcomes carry a typed reason rather than a free-form string,
/// so logic that reacts to specific cases (e.g. retry on a transient ban)
/// matches an enum variant instead of a fragile string. The [`fmt::Display`]
/// rendering is what the CLI prints; serialization is the externally-tagged
/// default (unit variants → a `snake_case` string, detail-carrying variants →
/// `{ "network": "…" }`).
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum UncertainReason {
/// HTTP 429, or 503 with a `Retry-After` header.
RateLimited,
/// A Cloudflare interstitial / "checking your browser" page.
CloudflareChallenge,
/// A captcha gate.
Captcha,
/// The path is disallowed by the host's `robots.txt` (`--respect-robots`).
RobotsDisallowed,
/// The scan deadline elapsed before this site finished.
Deadline,
/// The executor's scheduler was closed (does not happen in practice).
SchedulerClosed,
/// A transport/network error while issuing the request.
Network(String),
/// An error reading the response body.
BodyRead(String),
/// A `bot-protected` site needed the browser backend but the per-scan
/// `--browser-budget` cap was already spent on earlier sites.
BrowserBudget,
/// The username doesn't satisfy the site's `regex_check`
/// (e.g. too short, contains forbidden characters). Reported
/// without issuing any HTTP request — saves both network and the
/// false-positive class where the site 404s on illegal usernames
/// in ways our signal can't tell apart from a missing account.
UsernameNotAllowed,
/// The browser backend itself failed (timeout, navigation error,
/// session drop, …) for a `bot-protected` site.
BrowserFailed(String),
/// Any other reason (e.g. a `doctor` pre-flight skip).
Other(String),
}
impl fmt::Display for UncertainReason {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::RateLimited => f.write_str("rate_limited"),
Self::CloudflareChallenge => f.write_str("cloudflare_challenge"),
Self::Captcha => f.write_str("captcha"),
Self::RobotsDisallowed => f.write_str("robots_disallowed"),
Self::Deadline => f.write_str("deadline reached"),
Self::SchedulerClosed => f.write_str("scheduler closed"),
Self::Network(detail) => write!(f, "request: {detail}"),
Self::BodyRead(detail) => write!(f, "body read: {detail}"),
Self::BrowserBudget => f.write_str("browser_budget_exceeded"),
Self::UsernameNotAllowed => f.write_str("username_not_allowed"),
Self::BrowserFailed(detail) => write!(f, "browser: {detail}"),
Self::Other(detail) => f.write_str(detail),
}
}
}
/// Result of probing a single site for a username.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CheckOutcome {
/// Site name (matches `Site::name`).
pub site: String,
/// Concrete URL that was requested.
pub url: String,
/// Verdict produced by the site's detection strategy.
pub kind: MatchKind,
/// Why the outcome is `Uncertain`, if it is. `None` for `Found` /
/// `NotFound`.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub reason: Option<UncertainReason>,
/// Wall-clock duration of the probe.
pub elapsed_ms: u64,
/// Fields extracted from a `Found` profile when `--enrich` is active
/// (e.g. `name`, `bio`, `avatar`). Empty unless enrichment ran and the
/// site has extractor rules. Ordered by field name.
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
pub enrichment: BTreeMap<String, String>,
/// Human-readable descriptions of the signals that produced the verdict —
/// e.g. `"HTTP 404 (status_not_found)"`. Empty for `Uncertain` (no signal
/// fired). Surfaced by `--explain`; always present in JSON output.
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub evidence: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn match_kind_serialises_snake_case() {
assert_eq!(
serde_json::to_string(&MatchKind::Found).unwrap(),
"\"found\""
);
assert_eq!(
serde_json::to_string(&MatchKind::NotFound).unwrap(),
"\"not_found\""
);
assert_eq!(
serde_json::to_string(&MatchKind::Uncertain).unwrap(),
"\"uncertain\""
);
}
#[test]
fn match_kind_is_found() {
assert!(MatchKind::Found.is_found());
assert!(!MatchKind::NotFound.is_found());
assert!(!MatchKind::Uncertain.is_found());
}
#[test]
fn outcome_skips_absent_reason() {
let outcome = CheckOutcome {
site: "GitHub".into(),
url: "https://github.com/alice".into(),
kind: MatchKind::Found,
reason: None,
elapsed_ms: 42,
enrichment: BTreeMap::new(),
evidence: Vec::new(),
};
let json = serde_json::to_string(&outcome).unwrap();
assert!(
!json.contains("reason"),
"reason field must be omitted when None"
);
assert!(
!json.contains("enrichment"),
"enrichment must be omitted when empty"
);
assert!(json.contains("\"kind\":\"found\""));
assert!(json.contains("\"elapsed_ms\":42"));
}
#[test]
fn unit_reason_serialises_as_snake_case_string() {
let outcome = CheckOutcome {
site: "GitHub".into(),
url: "https://github.com/alice".into(),
kind: MatchKind::Uncertain,
reason: Some(UncertainReason::RateLimited),
elapsed_ms: 5_000,
enrichment: BTreeMap::new(),
evidence: Vec::new(),
};
let json = serde_json::to_string(&outcome).unwrap();
assert!(json.contains("\"reason\":\"rate_limited\""), "{json}");
}
#[test]
fn detail_reason_serialises_as_tagged_object() {
let json = serde_json::to_string(&UncertainReason::Network("refused".into())).unwrap();
assert_eq!(json, "{\"network\":\"refused\"}");
}
#[test]
fn reason_display_matches_legacy_note_text() {
assert_eq!(UncertainReason::RateLimited.to_string(), "rate_limited");
assert_eq!(UncertainReason::Deadline.to_string(), "deadline reached");
assert_eq!(
UncertainReason::Network("boom".into()).to_string(),
"request: boom"
);
}
}