Skip to main content

adler_core/
access.rs

1//! Per-site access policy and the egress (proxy) model.
2//!
3//! Access-engine phase 3: route the raw-HTTP probe path through a
4//! geo / IP-type-appropriate egress. A site declares what it needs via
5//! [`AccessPolicy`] (e.g. "only reachable from a Polish residential
6//! IP"); the client matches that against a configured pool of
7//! [`EgressSpec`]s. If the policy is unconstrained the request uses the
8//! client's default egress (direct, or the global `--proxy`); if it's
9//! constrained but nothing in the pool fits, the probe is reported as
10//! `Uncertain(GeoUnavailable)` — **never** a false `NotFound`, since
11//! "couldn't reach from the required location" is not "account absent".
12//!
13//! The browser transport keeps its backend's own egress; this phase
14//! routes the HTTP path only.
15
16use std::sync::Arc;
17
18use serde::{Deserialize, Serialize};
19
20use crate::transport::HttpFetcher;
21
22/// ISO-3166-1 alpha-2 country code, stored lowercased (e.g. `pl`, `de`).
23/// A newtype so a geo requirement can't be confused with an arbitrary
24/// string and is validated at the boundary.
25#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
26#[serde(try_from = "String", into = "String")]
27pub struct CountryCode([u8; 2]);
28
29impl CountryCode {
30    /// Parse a two-letter code, lowercasing ASCII. `None` for anything
31    /// that isn't exactly two ASCII letters.
32    #[must_use]
33    pub fn new(s: &str) -> Option<Self> {
34        let b = s.as_bytes();
35        if b.len() == 2 && b[0].is_ascii_alphabetic() && b[1].is_ascii_alphabetic() {
36            Some(Self([b[0].to_ascii_lowercase(), b[1].to_ascii_lowercase()]))
37        } else {
38            None
39        }
40    }
41
42    /// The lowercased two-letter code.
43    #[must_use]
44    pub fn as_str(&self) -> &str {
45        // Constructed only from ASCII letters, so this is always valid.
46        std::str::from_utf8(&self.0).unwrap_or("??")
47    }
48}
49
50impl TryFrom<String> for CountryCode {
51    type Error = String;
52    fn try_from(s: String) -> Result<Self, Self::Error> {
53        Self::new(&s).ok_or_else(|| format!("invalid country code: {s:?}"))
54    }
55}
56
57impl From<CountryCode> for String {
58    fn from(c: CountryCode) -> Self {
59        c.as_str().to_owned()
60    }
61}
62
63/// The kind of network an egress exits from.
64///
65/// A site's `ip_type` requirement is matched against this. (`Direct`
66/// isn't a kind here — the unproxied default egress is selected by an
67/// *unconstrained* policy, not by requesting a kind.)
68#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
69#[serde(rename_all = "kebab-case")]
70#[non_exhaustive]
71pub enum EgressKind {
72    /// A datacenter / hosting-provider IP (cheap, easily fingerprinted
73    /// and blocked). The default when a config entry omits `kind`.
74    #[default]
75    Datacenter,
76    /// A residential ISP IP (harder to block; what most "real users"
77    /// look like).
78    Residential,
79    /// A mobile-carrier IP (shared CGNAT ranges; highest trust on many
80    /// sites).
81    Mobile,
82    /// A Tor exit node.
83    Tor,
84}
85
86/// A configured egress (proxy) the client can route through.
87///
88/// Produced from CLI / config; the live client pairs each spec with its
89/// own HTTP client (reqwest bakes the proxy in at build time).
90/// Deserialises from the `[[egress]]` entries of a proxy-pool config
91/// file.
92#[derive(Debug, Clone, Deserialize)]
93pub struct EgressSpec {
94    /// Proxy URL — `http://`, `https://`, `socks5://`, or `socks5h://`.
95    pub url: String,
96    /// Country this egress exits from, if known.
97    #[serde(default)]
98    pub country: Option<CountryCode>,
99    /// Network kind this egress exits from (defaults to `datacenter`).
100    #[serde(default)]
101    pub kind: EgressKind,
102}
103
104/// What a site needs from its egress. The default (empty) means "no
105/// special routing" — the request uses the client's default egress.
106#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
107pub struct AccessPolicy {
108    /// Require an egress in one of these countries.
109    #[serde(default, skip_serializing_if = "Vec::is_empty")]
110    pub geo: Vec<CountryCode>,
111    /// Require an egress of this network kind.
112    #[serde(default, skip_serializing_if = "Option::is_none")]
113    pub ip_type: Option<EgressKind>,
114}
115
116impl AccessPolicy {
117    /// True when the policy imposes no egress constraint (the common
118    /// case). Drives `skip_serializing_if` so existing `sites.json`
119    /// entries serialise unchanged.
120    #[must_use]
121    pub fn is_default(&self) -> bool {
122        self.geo.is_empty() && self.ip_type.is_none()
123    }
124}
125
126/// One built egress: its match metadata plus the HTTP client that
127/// routes through it.
128struct EgressEntry {
129    country: Option<CountryCode>,
130    kind: EgressKind,
131    fetcher: Arc<HttpFetcher>,
132}
133
134/// Runtime pool of built egresses. Empty by default → every site uses
135/// the client's default egress, so an empty pool is a no-op.
136pub(crate) struct EgressPool {
137    entries: Vec<EgressEntry>,
138}
139
140/// Result of matching a site's [`AccessPolicy`] against the pool.
141pub(crate) enum EgressChoice {
142    /// Unconstrained policy → use the client's default egress.
143    Default,
144    /// Route through this egress's HTTP client.
145    Use(Arc<HttpFetcher>),
146    /// Constrained policy with no matching egress → honest
147    /// `Uncertain(GeoUnavailable)` rather than a false `NotFound`.
148    Unavailable,
149}
150
151impl EgressPool {
152    pub(crate) fn new(entries: Vec<(Option<CountryCode>, EgressKind, Arc<HttpFetcher>)>) -> Self {
153        Self {
154            entries: entries
155                .into_iter()
156                .map(|(country, kind, fetcher)| EgressEntry {
157                    country,
158                    kind,
159                    fetcher,
160                })
161                .collect(),
162        }
163    }
164
165    /// Pick an egress for `policy`. Unconstrained → [`EgressChoice::Default`].
166    /// Constrained → a random matching egress, or [`EgressChoice::Unavailable`]
167    /// when none fit (geo and/or kind don't match any pool entry).
168    pub(crate) fn select(&self, policy: &AccessPolicy) -> EgressChoice {
169        if policy.is_default() {
170            return EgressChoice::Default;
171        }
172        let matches: Vec<&EgressEntry> = self
173            .entries
174            .iter()
175            .filter(|e| {
176                let geo_ok = policy.geo.is_empty()
177                    || e.country.as_ref().is_some_and(|c| policy.geo.contains(c));
178                let kind_ok = policy.ip_type.is_none_or(|k| e.kind == k);
179                geo_ok && kind_ok
180            })
181            .collect();
182        match matches.len() {
183            0 => EgressChoice::Unavailable,
184            n => EgressChoice::Use(Arc::clone(&matches[fastrand::usize(0..n)].fetcher)),
185        }
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192    use crate::transport::HttpFetcher;
193
194    fn cc(s: &str) -> CountryCode {
195        CountryCode::new(s).expect("valid country code")
196    }
197
198    fn dummy_fetcher() -> Arc<HttpFetcher> {
199        Arc::new(HttpFetcher::new(reqwest::Client::new()))
200    }
201
202    fn pool() -> EgressPool {
203        EgressPool::new(vec![
204            (Some(cc("pl")), EgressKind::Residential, dummy_fetcher()),
205            (Some(cc("de")), EgressKind::Datacenter, dummy_fetcher()),
206        ])
207    }
208
209    #[test]
210    fn country_code_normalises_and_rejects() {
211        assert_eq!(CountryCode::new("PL").unwrap().as_str(), "pl");
212        assert!(CountryCode::new("p").is_none());
213        assert!(CountryCode::new("pol").is_none());
214        assert!(CountryCode::new("p1").is_none());
215    }
216
217    #[test]
218    fn unconstrained_policy_uses_default_egress() {
219        let choice = pool().select(&AccessPolicy::default());
220        assert!(matches!(choice, EgressChoice::Default));
221    }
222
223    #[test]
224    fn geo_match_picks_an_egress() {
225        let policy = AccessPolicy {
226            geo: vec![cc("pl")],
227            ip_type: None,
228        };
229        assert!(matches!(pool().select(&policy), EgressChoice::Use(_)));
230    }
231
232    #[test]
233    fn ip_type_match_picks_an_egress() {
234        let policy = AccessPolicy {
235            geo: Vec::new(),
236            ip_type: Some(EgressKind::Datacenter),
237        };
238        assert!(matches!(pool().select(&policy), EgressChoice::Use(_)));
239    }
240
241    #[test]
242    fn geo_present_but_wrong_kind_is_unavailable() {
243        // PL exists in the pool, but only as Residential — asking for a
244        // PL *Mobile* egress must fail rather than fall back.
245        let policy = AccessPolicy {
246            geo: vec![cc("pl")],
247            ip_type: Some(EgressKind::Mobile),
248        };
249        assert!(matches!(pool().select(&policy), EgressChoice::Unavailable));
250    }
251
252    #[test]
253    fn unknown_geo_is_unavailable() {
254        let policy = AccessPolicy {
255            geo: vec![cc("jp")],
256            ip_type: None,
257        };
258        assert!(matches!(pool().select(&policy), EgressChoice::Unavailable));
259    }
260
261    #[test]
262    fn empty_pool_with_constraint_is_unavailable() {
263        let empty = EgressPool::new(Vec::new());
264        let policy = AccessPolicy {
265            geo: vec![cc("pl")],
266            ip_type: None,
267        };
268        assert!(matches!(empty.select(&policy), EgressChoice::Unavailable));
269    }
270}