Skip to main content

adler_core/
access.rs

1//! Per-site access policy and the egress (proxy) model.
2//!
3//! Access-engine phase 3: route the raw-HTTP probe path through a
4//! geo / IP-type-appropriate egress. A site declares what it needs via
5//! [`AccessPolicy`] (e.g. "only reachable from a Polish residential
6//! IP"); the client matches that against a configured pool of
7//! [`EgressSpec`]s. If the policy is unconstrained the request uses the
8//! client's default egress (direct, or the global `--proxy`); if it's
9//! constrained but nothing in the pool fits, the probe is reported as
10//! `Uncertain(GeoUnavailable)` — **never** a false `NotFound`, since
11//! "couldn't reach from the required location" is not "account absent".
12//!
13//! The browser transport keeps its backend's own egress; this phase
14//! routes the HTTP path only.
15
16use std::collections::{BTreeMap, HashMap};
17use std::fmt;
18use std::sync::Arc;
19
20use serde::{Deserialize, Serialize};
21
22use crate::transport::HttpFetcher;
23
24/// ISO-3166-1 alpha-2 country code, stored lowercased (e.g. `pl`, `de`).
25/// A newtype so a geo requirement can't be confused with an arbitrary
26/// string and is validated at the boundary.
27#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
28#[serde(try_from = "String", into = "String")]
29pub struct CountryCode([u8; 2]);
30
31impl CountryCode {
32    /// Parse a two-letter code, lowercasing ASCII. `None` for anything
33    /// that isn't exactly two ASCII letters.
34    #[must_use]
35    pub fn new(s: &str) -> Option<Self> {
36        let b = s.as_bytes();
37        if b.len() == 2 && b[0].is_ascii_alphabetic() && b[1].is_ascii_alphabetic() {
38            Some(Self([b[0].to_ascii_lowercase(), b[1].to_ascii_lowercase()]))
39        } else {
40            None
41        }
42    }
43
44    /// The lowercased two-letter code.
45    #[must_use]
46    pub fn as_str(&self) -> &str {
47        // Constructed only from ASCII letters, so this is always valid.
48        std::str::from_utf8(&self.0).unwrap_or("??")
49    }
50}
51
52impl TryFrom<String> for CountryCode {
53    type Error = String;
54    fn try_from(s: String) -> Result<Self, Self::Error> {
55        Self::new(&s).ok_or_else(|| format!("invalid country code: {s:?}"))
56    }
57}
58
59impl From<CountryCode> for String {
60    fn from(c: CountryCode) -> Self {
61        c.as_str().to_owned()
62    }
63}
64
65/// The kind of network an egress exits from.
66///
67/// A site's `ip_type` requirement is matched against this. (`Direct`
68/// isn't a kind here — the unproxied default egress is selected by an
69/// *unconstrained* policy, not by requesting a kind.)
70#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
71#[serde(rename_all = "kebab-case")]
72#[non_exhaustive]
73pub enum EgressKind {
74    /// A datacenter / hosting-provider IP (cheap, easily fingerprinted
75    /// and blocked). The default when a config entry omits `kind`.
76    #[default]
77    Datacenter,
78    /// A residential ISP IP (harder to block; what most "real users"
79    /// look like).
80    Residential,
81    /// A mobile-carrier IP (shared CGNAT ranges; highest trust on many
82    /// sites).
83    Mobile,
84    /// A Tor exit node.
85    Tor,
86}
87
88/// A configured egress (proxy) the client can route through.
89///
90/// Produced from CLI / config; the live client pairs each spec with its
91/// own HTTP client (reqwest bakes the proxy in at build time).
92/// Deserialises from the `[[egress]]` entries of a proxy-pool config
93/// file.
94#[derive(Debug, Clone, Deserialize)]
95pub struct EgressSpec {
96    /// Proxy URL — `http://`, `https://`, `socks5://`, or `socks5h://`.
97    pub url: String,
98    /// Country this egress exits from, if known.
99    #[serde(default)]
100    pub country: Option<CountryCode>,
101    /// Network kind this egress exits from (defaults to `datacenter`).
102    #[serde(default)]
103    pub kind: EgressKind,
104    /// Operator-supplied identifier for this egress — used by the web
105    /// UI's per-scan egress subset selection (and by any other call
106    /// site that needs to refer to a specific egress by stable name).
107    /// Optional: an unnamed egress still participates in policy-based
108    /// matching, it just can't be selected by name.
109    #[serde(default)]
110    pub name: Option<String>,
111}
112
113/// What a site needs from its egress. The default (empty) means "no
114/// special routing" — the request uses the client's default egress.
115#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
116pub struct AccessPolicy {
117    /// Require an egress in one of these countries.
118    #[serde(default, skip_serializing_if = "Vec::is_empty")]
119    pub geo: Vec<CountryCode>,
120    /// Require an egress of this network kind.
121    #[serde(default, skip_serializing_if = "Option::is_none")]
122    pub ip_type: Option<EgressKind>,
123    /// Name of an operator-supplied session (see `--sessions`) whose
124    /// headers (cookies / auth tokens) this site's probes must carry.
125    /// The site is unreachable without it, so a missing session yields
126    /// `Uncertain(SessionRequired)` rather than a login-wall false
127    /// `NotFound`.
128    #[serde(default, skip_serializing_if = "Option::is_none")]
129    pub session: Option<String>,
130}
131
132impl AccessPolicy {
133    /// True when the policy imposes no constraint at all (the common
134    /// case). Drives `skip_serializing_if` so existing `sites.json`
135    /// entries serialise unchanged.
136    #[must_use]
137    pub fn is_default(&self) -> bool {
138        self.geo.is_empty() && self.ip_type.is_none() && self.session.is_none()
139    }
140}
141
142/// An operator-supplied authenticated session for a site: a bag of HTTP
143/// headers (typically `Cookie`, sometimes `Authorization` / CSRF
144/// tokens) applied to probes for sites whose `access.session` names it.
145///
146/// This is "use a real account", not evasion — the operator brings a
147/// session they're entitled to. Header *values* are secrets: they're
148/// redacted from `Debug` and are never logged or serialised.
149#[derive(Clone, Default)]
150pub struct Session {
151    headers: BTreeMap<String, String>,
152}
153
154impl Session {
155    /// Build a session from plain header name→value pairs (e.g. parsed
156    /// from a `--sessions` config file).
157    #[must_use]
158    pub fn from_headers(headers: BTreeMap<String, String>) -> Self {
159        Self { headers }
160    }
161
162    /// Merge this session's headers over `base` (the session wins on
163    /// conflict), producing the header set for the outgoing request.
164    pub(crate) fn apply(&self, base: &BTreeMap<String, String>) -> BTreeMap<String, String> {
165        let mut out = base.clone();
166        for (k, v) in &self.headers {
167            out.insert(k.clone(), v.clone());
168        }
169        out
170    }
171}
172
173impl fmt::Debug for Session {
174    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
175        // Redact values — session headers carry cookies / tokens.
176        f.debug_struct("Session")
177            .field("headers", &self.headers.keys().collect::<Vec<_>>())
178            .finish_non_exhaustive()
179    }
180}
181
182/// Named-session store, indexed by the name a site references via
183/// `access.session`. Empty by default → a no-op.
184#[derive(Clone, Default, Debug)]
185pub struct SessionStore {
186    sessions: HashMap<String, Session>,
187}
188
189impl SessionStore {
190    /// An empty store.
191    #[must_use]
192    pub fn new() -> Self {
193        Self::default()
194    }
195
196    /// Insert (or replace) a named session.
197    pub fn insert(&mut self, name: impl Into<String>, session: Session) {
198        self.sessions.insert(name.into(), session);
199    }
200
201    /// True when no session is configured.
202    #[must_use]
203    pub fn is_empty(&self) -> bool {
204        self.sessions.is_empty()
205    }
206
207    /// Number of configured sessions.
208    #[must_use]
209    pub fn len(&self) -> usize {
210        self.sessions.len()
211    }
212
213    pub(crate) fn get(&self, name: &str) -> Option<&Session> {
214        self.sessions.get(name)
215    }
216
217    /// Names of the configured sessions, sorted lexicographically for a
218    /// stable display order. Values stay private — by design the public
219    /// surface only ever leaks the keys an operator referenced via
220    /// `access.session`, never the cookie/token bytes themselves.
221    #[must_use]
222    pub fn names(&self) -> Vec<String> {
223        let mut names: Vec<String> = self.sessions.keys().cloned().collect();
224        names.sort();
225        names
226    }
227}
228
229/// Read-only metadata for one configured egress, surfaced via
230/// [`Client::egress_summary`](crate::Client::egress_summary).
231///
232/// Carries only the match-relevant facets (name + country + kind); the
233/// proxy URL is *deliberately omitted* — those typically embed
234/// credentials (`socks5://user:pass@host:1080`) that have no business
235/// landing in a JSON response served to a browser.
236#[derive(Debug, Clone, Serialize)]
237pub struct EgressSummary {
238    /// Operator-supplied name, if any. Used by per-scan egress subset
239    /// selection (`POST /api/scan` with `egress_names`).
240    #[serde(skip_serializing_if = "Option::is_none")]
241    pub name: Option<String>,
242    /// Country this egress exits from, if declared.
243    #[serde(skip_serializing_if = "Option::is_none")]
244    pub country: Option<CountryCode>,
245    /// Network kind (`datacenter` / `residential` / `mobile` / `tor`).
246    pub kind: EgressKind,
247}
248
249/// One built egress: its match metadata plus the HTTP client that
250/// routes through it.
251struct EgressEntry {
252    name: Option<String>,
253    country: Option<CountryCode>,
254    kind: EgressKind,
255    fetcher: Arc<HttpFetcher>,
256}
257
258/// Runtime pool of built egresses. Empty by default → every site uses
259/// the client's default egress, so an empty pool is a no-op.
260pub(crate) struct EgressPool {
261    entries: Vec<EgressEntry>,
262}
263
264/// Result of matching a site's [`AccessPolicy`] against the pool.
265pub(crate) enum EgressChoice {
266    /// Unconstrained policy → use the client's default egress.
267    Default,
268    /// Route through this egress's HTTP client.
269    Use(Arc<HttpFetcher>),
270    /// Constrained policy with no matching egress → honest
271    /// `Uncertain(GeoUnavailable)` rather than a false `NotFound`.
272    Unavailable,
273}
274
275/// Constructor tuple for [`EgressPool`]: one row per configured proxy
276/// carries its operator-supplied `name` (if any), its country and
277/// kind, and the already-built `reqwest`-backed fetcher.
278pub(crate) type EgressEntryTuple = (
279    Option<String>,
280    Option<CountryCode>,
281    EgressKind,
282    Arc<HttpFetcher>,
283);
284
285impl EgressPool {
286    pub(crate) fn new(entries: Vec<EgressEntryTuple>) -> Self {
287        Self {
288            entries: entries
289                .into_iter()
290                .map(|(name, country, kind, fetcher)| EgressEntry {
291                    name,
292                    country,
293                    kind,
294                    fetcher,
295                })
296                .collect(),
297        }
298    }
299
300    /// Read-only view of the pool — `(name, country, kind)` for every
301    /// configured egress, in the order they were registered. Used by the
302    /// `GET /api/access` endpoint so the SPA can show what's configured
303    /// without ever touching proxy URLs.
304    pub(crate) fn summary(&self) -> Vec<EgressSummary> {
305        self.entries
306            .iter()
307            .map(|e| EgressSummary {
308                name: e.name.clone(),
309                country: e.country.clone(),
310                kind: e.kind,
311            })
312            .collect()
313    }
314
315    /// Return a new pool containing only entries whose `name` matches
316    /// one of `names`. Entries without a name are excluded (they can't
317    /// be referenced by name). `names` being empty is treated as "no
318    /// filter" and a clone of the full pool is returned — that
319    /// preserves the policy-driven default for callers who didn't ask
320    /// for an explicit subset.
321    pub(crate) fn subset(&self, names: &[String]) -> Self {
322        if names.is_empty() {
323            return Self {
324                entries: self
325                    .entries
326                    .iter()
327                    .map(|e| EgressEntry {
328                        name: e.name.clone(),
329                        country: e.country.clone(),
330                        kind: e.kind,
331                        fetcher: Arc::clone(&e.fetcher),
332                    })
333                    .collect(),
334            };
335        }
336        let wanted: std::collections::HashSet<&str> = names.iter().map(String::as_str).collect();
337        Self {
338            entries: self
339                .entries
340                .iter()
341                .filter(|e| e.name.as_deref().is_some_and(|n| wanted.contains(n)))
342                .map(|e| EgressEntry {
343                    name: e.name.clone(),
344                    country: e.country.clone(),
345                    kind: e.kind,
346                    fetcher: Arc::clone(&e.fetcher),
347                })
348                .collect(),
349        }
350    }
351
352    /// Names of egresses configured in this pool, in registration
353    /// order. Used by the server to validate `egress_names` on
354    /// `POST /api/scan`.
355    pub(crate) fn names(&self) -> Vec<String> {
356        self.entries.iter().filter_map(|e| e.name.clone()).collect()
357    }
358
359    /// Pick an egress for `policy`. Unconstrained → [`EgressChoice::Default`].
360    /// Constrained → a random matching egress, or [`EgressChoice::Unavailable`]
361    /// when none fit (geo and/or kind don't match any pool entry).
362    pub(crate) fn select(&self, policy: &AccessPolicy) -> EgressChoice {
363        // Only geo / IP-type constrain the egress; a session-only policy
364        // (no geo, no ip_type) still uses the default egress.
365        if policy.geo.is_empty() && policy.ip_type.is_none() {
366            return EgressChoice::Default;
367        }
368        let matches: Vec<&EgressEntry> = self
369            .entries
370            .iter()
371            .filter(|e| {
372                let geo_ok = policy.geo.is_empty()
373                    || e.country.as_ref().is_some_and(|c| policy.geo.contains(c));
374                let kind_ok = policy.ip_type.is_none_or(|k| e.kind == k);
375                geo_ok && kind_ok
376            })
377            .collect();
378        match matches.len() {
379            0 => EgressChoice::Unavailable,
380            n => EgressChoice::Use(Arc::clone(&matches[fastrand::usize(0..n)].fetcher)),
381        }
382    }
383}
384
385#[cfg(test)]
386mod tests {
387    use super::*;
388    use crate::transport::HttpFetcher;
389
390    fn cc(s: &str) -> CountryCode {
391        CountryCode::new(s).expect("valid country code")
392    }
393
394    fn dummy_fetcher() -> Arc<HttpFetcher> {
395        Arc::new(HttpFetcher::new(reqwest::Client::new()))
396    }
397
398    fn pool() -> EgressPool {
399        EgressPool::new(vec![
400            (
401                None,
402                Some(cc("pl")),
403                EgressKind::Residential,
404                dummy_fetcher(),
405            ),
406            (
407                None,
408                Some(cc("de")),
409                EgressKind::Datacenter,
410                dummy_fetcher(),
411            ),
412        ])
413    }
414
415    #[test]
416    fn country_code_normalises_and_rejects() {
417        assert_eq!(CountryCode::new("PL").unwrap().as_str(), "pl");
418        assert!(CountryCode::new("p").is_none());
419        assert!(CountryCode::new("pol").is_none());
420        assert!(CountryCode::new("p1").is_none());
421    }
422
423    #[test]
424    fn unconstrained_policy_uses_default_egress() {
425        let choice = pool().select(&AccessPolicy::default());
426        assert!(matches!(choice, EgressChoice::Default));
427    }
428
429    #[test]
430    fn geo_match_picks_an_egress() {
431        let policy = AccessPolicy {
432            geo: vec![cc("pl")],
433            ip_type: None,
434            session: None,
435        };
436        assert!(matches!(pool().select(&policy), EgressChoice::Use(_)));
437    }
438
439    #[test]
440    fn ip_type_match_picks_an_egress() {
441        let policy = AccessPolicy {
442            geo: Vec::new(),
443            ip_type: Some(EgressKind::Datacenter),
444            session: None,
445        };
446        assert!(matches!(pool().select(&policy), EgressChoice::Use(_)));
447    }
448
449    #[test]
450    fn geo_present_but_wrong_kind_is_unavailable() {
451        // PL exists in the pool, but only as Residential — asking for a
452        // PL *Mobile* egress must fail rather than fall back.
453        let policy = AccessPolicy {
454            geo: vec![cc("pl")],
455            ip_type: Some(EgressKind::Mobile),
456            session: None,
457        };
458        assert!(matches!(pool().select(&policy), EgressChoice::Unavailable));
459    }
460
461    #[test]
462    fn unknown_geo_is_unavailable() {
463        let policy = AccessPolicy {
464            geo: vec![cc("jp")],
465            ip_type: None,
466            session: None,
467        };
468        assert!(matches!(pool().select(&policy), EgressChoice::Unavailable));
469    }
470
471    #[test]
472    fn empty_pool_with_constraint_is_unavailable() {
473        let empty = EgressPool::new(Vec::new());
474        let policy = AccessPolicy {
475            geo: vec![cc("pl")],
476            ip_type: None,
477            session: None,
478        };
479        assert!(matches!(empty.select(&policy), EgressChoice::Unavailable));
480    }
481
482    #[test]
483    fn session_apply_overrides_base_headers() {
484        let mut base = BTreeMap::new();
485        base.insert("X-IG-App-ID".to_string(), "936".to_string());
486        base.insert("Cookie".to_string(), "old".to_string());
487        let mut sh = BTreeMap::new();
488        sh.insert("Cookie".to_string(), "sessionid=real".to_string());
489        let merged = Session::from_headers(sh).apply(&base);
490        // Session wins on conflict; non-conflicting base header preserved.
491        assert_eq!(merged.get("Cookie").unwrap(), "sessionid=real");
492        assert_eq!(merged.get("X-IG-App-ID").unwrap(), "936");
493    }
494
495    #[test]
496    fn session_store_insert_and_lookup() {
497        let mut store = SessionStore::new();
498        assert!(store.is_empty());
499        store.insert("ig", Session::from_headers(BTreeMap::new()));
500        assert!(!store.is_empty());
501        assert!(store.get("ig").is_some());
502        assert!(store.get("missing").is_none());
503    }
504}