Skip to main content

adler_core/
access.rs

1//! Per-site access policy and the egress (proxy) model.
2//!
3//! Access-engine phase 3: route the raw-HTTP probe path through a
4//! geo / IP-type-appropriate egress. A site declares what it needs via
5//! [`AccessPolicy`] (e.g. "only reachable from a Polish residential
6//! IP"); the client matches that against a configured pool of
7//! [`EgressSpec`]s. If the policy is unconstrained the request uses the
8//! client's default egress (direct, or the global `--proxy`); if it's
9//! constrained but nothing in the pool fits, the probe is reported as
10//! `Uncertain(GeoUnavailable)` — **never** a false `NotFound`, since
11//! "couldn't reach from the required location" is not "account absent".
12//!
13//! The browser transport keeps its backend's own egress; this phase
14//! routes the HTTP path only.
15
16use std::collections::{BTreeMap, HashMap};
17use std::fmt;
18use std::sync::Arc;
19
20use serde::{Deserialize, Serialize};
21
22use crate::transport::HttpFetcher;
23
24/// ISO-3166-1 alpha-2 country code, stored lowercased (e.g. `pl`, `de`).
25/// A newtype so a geo requirement can't be confused with an arbitrary
26/// string and is validated at the boundary.
27#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
28#[serde(try_from = "String", into = "String")]
29pub struct CountryCode([u8; 2]);
30
31impl CountryCode {
32    /// Parse a two-letter code, lowercasing ASCII. `None` for anything
33    /// that isn't exactly two ASCII letters.
34    #[must_use]
35    pub fn new(s: &str) -> Option<Self> {
36        let b = s.as_bytes();
37        if b.len() == 2 && b[0].is_ascii_alphabetic() && b[1].is_ascii_alphabetic() {
38            Some(Self([b[0].to_ascii_lowercase(), b[1].to_ascii_lowercase()]))
39        } else {
40            None
41        }
42    }
43
44    /// The lowercased two-letter code.
45    #[must_use]
46    pub fn as_str(&self) -> &str {
47        // Constructed only from ASCII letters, so this is always valid.
48        std::str::from_utf8(&self.0).unwrap_or("??")
49    }
50}
51
52impl TryFrom<String> for CountryCode {
53    type Error = String;
54    fn try_from(s: String) -> Result<Self, Self::Error> {
55        Self::new(&s).ok_or_else(|| format!("invalid country code: {s:?}"))
56    }
57}
58
59impl From<CountryCode> for String {
60    fn from(c: CountryCode) -> Self {
61        c.as_str().to_owned()
62    }
63}
64
65/// The kind of network an egress exits from.
66///
67/// A site's `ip_type` requirement is matched against this. (`Direct`
68/// isn't a kind here — the unproxied default egress is selected by an
69/// *unconstrained* policy, not by requesting a kind.)
70#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
71#[serde(rename_all = "kebab-case")]
72#[non_exhaustive]
73pub enum EgressKind {
74    /// A datacenter / hosting-provider IP (cheap, easily fingerprinted
75    /// and blocked). The default when a config entry omits `kind`.
76    #[default]
77    Datacenter,
78    /// A residential ISP IP (harder to block; what most "real users"
79    /// look like).
80    Residential,
81    /// A mobile-carrier IP (shared CGNAT ranges; highest trust on many
82    /// sites).
83    Mobile,
84    /// A Tor exit node.
85    Tor,
86}
87
88/// A configured egress (proxy) the client can route through.
89///
90/// Produced from CLI / config; the live client pairs each spec with its
91/// own HTTP client (reqwest bakes the proxy in at build time).
92/// Deserialises from the `[[egress]]` entries of a proxy-pool config
93/// file.
94#[derive(Debug, Clone, Deserialize)]
95pub struct EgressSpec {
96    /// Proxy URL — `http://`, `https://`, `socks5://`, or `socks5h://`.
97    pub url: String,
98    /// Country this egress exits from, if known.
99    #[serde(default)]
100    pub country: Option<CountryCode>,
101    /// Network kind this egress exits from (defaults to `datacenter`).
102    #[serde(default)]
103    pub kind: EgressKind,
104}
105
106/// What a site needs from its egress. The default (empty) means "no
107/// special routing" — the request uses the client's default egress.
108#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
109pub struct AccessPolicy {
110    /// Require an egress in one of these countries.
111    #[serde(default, skip_serializing_if = "Vec::is_empty")]
112    pub geo: Vec<CountryCode>,
113    /// Require an egress of this network kind.
114    #[serde(default, skip_serializing_if = "Option::is_none")]
115    pub ip_type: Option<EgressKind>,
116    /// Name of an operator-supplied session (see `--sessions`) whose
117    /// headers (cookies / auth tokens) this site's probes must carry.
118    /// The site is unreachable without it, so a missing session yields
119    /// `Uncertain(SessionRequired)` rather than a login-wall false
120    /// `NotFound`.
121    #[serde(default, skip_serializing_if = "Option::is_none")]
122    pub session: Option<String>,
123}
124
125impl AccessPolicy {
126    /// True when the policy imposes no constraint at all (the common
127    /// case). Drives `skip_serializing_if` so existing `sites.json`
128    /// entries serialise unchanged.
129    #[must_use]
130    pub fn is_default(&self) -> bool {
131        self.geo.is_empty() && self.ip_type.is_none() && self.session.is_none()
132    }
133}
134
135/// An operator-supplied authenticated session for a site: a bag of HTTP
136/// headers (typically `Cookie`, sometimes `Authorization` / CSRF
137/// tokens) applied to probes for sites whose `access.session` names it.
138///
139/// This is "use a real account", not evasion — the operator brings a
140/// session they're entitled to. Header *values* are secrets: they're
141/// redacted from `Debug` and are never logged or serialised.
142#[derive(Clone, Default)]
143pub struct Session {
144    headers: BTreeMap<String, String>,
145}
146
147impl Session {
148    /// Build a session from plain header name→value pairs (e.g. parsed
149    /// from a `--sessions` config file).
150    #[must_use]
151    pub fn from_headers(headers: BTreeMap<String, String>) -> Self {
152        Self { headers }
153    }
154
155    /// Merge this session's headers over `base` (the session wins on
156    /// conflict), producing the header set for the outgoing request.
157    pub(crate) fn apply(&self, base: &BTreeMap<String, String>) -> BTreeMap<String, String> {
158        let mut out = base.clone();
159        for (k, v) in &self.headers {
160            out.insert(k.clone(), v.clone());
161        }
162        out
163    }
164}
165
166impl fmt::Debug for Session {
167    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
168        // Redact values — session headers carry cookies / tokens.
169        f.debug_struct("Session")
170            .field("headers", &self.headers.keys().collect::<Vec<_>>())
171            .finish_non_exhaustive()
172    }
173}
174
175/// Named-session store, indexed by the name a site references via
176/// `access.session`. Empty by default → a no-op.
177#[derive(Clone, Default, Debug)]
178pub struct SessionStore {
179    sessions: HashMap<String, Session>,
180}
181
182impl SessionStore {
183    /// An empty store.
184    #[must_use]
185    pub fn new() -> Self {
186        Self::default()
187    }
188
189    /// Insert (or replace) a named session.
190    pub fn insert(&mut self, name: impl Into<String>, session: Session) {
191        self.sessions.insert(name.into(), session);
192    }
193
194    /// True when no session is configured.
195    #[must_use]
196    pub fn is_empty(&self) -> bool {
197        self.sessions.is_empty()
198    }
199
200    /// Number of configured sessions.
201    #[must_use]
202    pub fn len(&self) -> usize {
203        self.sessions.len()
204    }
205
206    pub(crate) fn get(&self, name: &str) -> Option<&Session> {
207        self.sessions.get(name)
208    }
209}
210
211/// One built egress: its match metadata plus the HTTP client that
212/// routes through it.
213struct EgressEntry {
214    country: Option<CountryCode>,
215    kind: EgressKind,
216    fetcher: Arc<HttpFetcher>,
217}
218
219/// Runtime pool of built egresses. Empty by default → every site uses
220/// the client's default egress, so an empty pool is a no-op.
221pub(crate) struct EgressPool {
222    entries: Vec<EgressEntry>,
223}
224
225/// Result of matching a site's [`AccessPolicy`] against the pool.
226pub(crate) enum EgressChoice {
227    /// Unconstrained policy → use the client's default egress.
228    Default,
229    /// Route through this egress's HTTP client.
230    Use(Arc<HttpFetcher>),
231    /// Constrained policy with no matching egress → honest
232    /// `Uncertain(GeoUnavailable)` rather than a false `NotFound`.
233    Unavailable,
234}
235
236impl EgressPool {
237    pub(crate) fn new(entries: Vec<(Option<CountryCode>, EgressKind, Arc<HttpFetcher>)>) -> Self {
238        Self {
239            entries: entries
240                .into_iter()
241                .map(|(country, kind, fetcher)| EgressEntry {
242                    country,
243                    kind,
244                    fetcher,
245                })
246                .collect(),
247        }
248    }
249
250    /// Pick an egress for `policy`. Unconstrained → [`EgressChoice::Default`].
251    /// Constrained → a random matching egress, or [`EgressChoice::Unavailable`]
252    /// when none fit (geo and/or kind don't match any pool entry).
253    pub(crate) fn select(&self, policy: &AccessPolicy) -> EgressChoice {
254        // Only geo / IP-type constrain the egress; a session-only policy
255        // (no geo, no ip_type) still uses the default egress.
256        if policy.geo.is_empty() && policy.ip_type.is_none() {
257            return EgressChoice::Default;
258        }
259        let matches: Vec<&EgressEntry> = self
260            .entries
261            .iter()
262            .filter(|e| {
263                let geo_ok = policy.geo.is_empty()
264                    || e.country.as_ref().is_some_and(|c| policy.geo.contains(c));
265                let kind_ok = policy.ip_type.is_none_or(|k| e.kind == k);
266                geo_ok && kind_ok
267            })
268            .collect();
269        match matches.len() {
270            0 => EgressChoice::Unavailable,
271            n => EgressChoice::Use(Arc::clone(&matches[fastrand::usize(0..n)].fetcher)),
272        }
273    }
274}
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279    use crate::transport::HttpFetcher;
280
281    fn cc(s: &str) -> CountryCode {
282        CountryCode::new(s).expect("valid country code")
283    }
284
285    fn dummy_fetcher() -> Arc<HttpFetcher> {
286        Arc::new(HttpFetcher::new(reqwest::Client::new()))
287    }
288
289    fn pool() -> EgressPool {
290        EgressPool::new(vec![
291            (Some(cc("pl")), EgressKind::Residential, dummy_fetcher()),
292            (Some(cc("de")), EgressKind::Datacenter, dummy_fetcher()),
293        ])
294    }
295
296    #[test]
297    fn country_code_normalises_and_rejects() {
298        assert_eq!(CountryCode::new("PL").unwrap().as_str(), "pl");
299        assert!(CountryCode::new("p").is_none());
300        assert!(CountryCode::new("pol").is_none());
301        assert!(CountryCode::new("p1").is_none());
302    }
303
304    #[test]
305    fn unconstrained_policy_uses_default_egress() {
306        let choice = pool().select(&AccessPolicy::default());
307        assert!(matches!(choice, EgressChoice::Default));
308    }
309
310    #[test]
311    fn geo_match_picks_an_egress() {
312        let policy = AccessPolicy {
313            geo: vec![cc("pl")],
314            ip_type: None,
315            session: None,
316        };
317        assert!(matches!(pool().select(&policy), EgressChoice::Use(_)));
318    }
319
320    #[test]
321    fn ip_type_match_picks_an_egress() {
322        let policy = AccessPolicy {
323            geo: Vec::new(),
324            ip_type: Some(EgressKind::Datacenter),
325            session: None,
326        };
327        assert!(matches!(pool().select(&policy), EgressChoice::Use(_)));
328    }
329
330    #[test]
331    fn geo_present_but_wrong_kind_is_unavailable() {
332        // PL exists in the pool, but only as Residential — asking for a
333        // PL *Mobile* egress must fail rather than fall back.
334        let policy = AccessPolicy {
335            geo: vec![cc("pl")],
336            ip_type: Some(EgressKind::Mobile),
337            session: None,
338        };
339        assert!(matches!(pool().select(&policy), EgressChoice::Unavailable));
340    }
341
342    #[test]
343    fn unknown_geo_is_unavailable() {
344        let policy = AccessPolicy {
345            geo: vec![cc("jp")],
346            ip_type: None,
347            session: None,
348        };
349        assert!(matches!(pool().select(&policy), EgressChoice::Unavailable));
350    }
351
352    #[test]
353    fn empty_pool_with_constraint_is_unavailable() {
354        let empty = EgressPool::new(Vec::new());
355        let policy = AccessPolicy {
356            geo: vec![cc("pl")],
357            ip_type: None,
358            session: None,
359        };
360        assert!(matches!(empty.select(&policy), EgressChoice::Unavailable));
361    }
362
363    #[test]
364    fn session_apply_overrides_base_headers() {
365        let mut base = BTreeMap::new();
366        base.insert("X-IG-App-ID".to_string(), "936".to_string());
367        base.insert("Cookie".to_string(), "old".to_string());
368        let mut sh = BTreeMap::new();
369        sh.insert("Cookie".to_string(), "sessionid=real".to_string());
370        let merged = Session::from_headers(sh).apply(&base);
371        // Session wins on conflict; non-conflicting base header preserved.
372        assert_eq!(merged.get("Cookie").unwrap(), "sessionid=real");
373        assert_eq!(merged.get("X-IG-App-ID").unwrap(), "936");
374    }
375
376    #[test]
377    fn session_store_insert_and_lookup() {
378        let mut store = SessionStore::new();
379        assert!(store.is_empty());
380        store.insert("ig", Session::from_headers(BTreeMap::new()));
381        assert!(!store.is_empty());
382        assert!(store.get("ig").is_some());
383        assert!(store.get("missing").is_none());
384    }
385}