lead_oxide/
proxy.rs

1//! [`Proxy`][Proxy]s represent information about the proxies returned by
2//! [`Fetcher`][crate::fetcher::Fetcher].
3
4use std::{net::SocketAddrV4, time::Duration};
5
6use crate::{
7    constants::REPO_URI,
8    types::{Level, Protocol},
9};
10
11use chrono::NaiveDateTime;
12use iso_country::Country;
13use serde::{de::Deserializer, Deserialize};
14
15/// Internal
16#[derive(Deserialize, Clone, Debug, PartialEq)]
17struct Response {
18    pub data: Vec<RawProxy>,
19}
20
21/// Internal
22#[derive(Deserialize, Clone, Debug, PartialEq)]
23struct RawProxy {
24    #[serde(rename = "ipPort")]
25    socket: SocketAddrV4,
26    #[serde(deserialize_with = "ignore_bad_countries")]
27    country: Country,
28    last_checked: String,
29    #[serde(rename = "proxy_level")]
30    level: Level,
31    #[serde(rename = "type")]
32    protocol: Protocol,
33    #[serde(rename = "speed")]
34    time_to_connect: String,
35    #[serde(rename = "support")]
36    supports: RawSupports,
37}
38
39// Sometimes country codes other than iso 3166-1 are returned so switch those to unspecified
40/// Internal
41fn ignore_bad_countries<'de, D>(deserializer: D) -> Result<Country, D::Error>
42where
43    D: Deserializer<'de>,
44{
45    Deserialize::deserialize(deserializer).or(Ok(Country::Unspecified))
46}
47
48/// Internal
49#[derive(Deserialize, Clone, Copy, Debug, PartialEq)]
50struct RawSupports {
51    https: Option<u8>,
52    get: Option<u8>,
53    post: Option<u8>,
54    cookies: Option<u8>,
55    referer: Option<u8>,
56    #[serde(rename = "user_agent")]
57    forwards_user_agent: Option<u8>,
58    #[serde(rename = "google")]
59    connects_to_google: Option<u8>,
60}
61
62/// All the information representing a proxy.
63///
64/// Typically most people will likely only use the `socket` value, but this contains all the
65/// information on a proxy.
66#[derive(Clone, Debug, PartialEq)]
67pub struct Proxy {
68    pub socket: SocketAddrV4,
69    pub country: Country,
70    pub last_checked: NaiveDateTime,
71    pub level: Level,
72    pub protocol: Protocol,
73    pub time_to_connect: Duration,
74    pub supports: Supports,
75}
76
77impl From<RawProxy> for Proxy {
78    fn from(raw: RawProxy) -> Self {
79        let last_checked = NaiveDateTime::parse_from_str(&raw.last_checked, "%F %T")
80            .unwrap_or_else(|_| {
81                panic!(
82                    "The API returned an invalid time. Please raise an issue to address this at {}",
83                    REPO_URI
84                )
85            });
86
87        let secs_to_connect = raw.time_to_connect.parse().unwrap_or_else(|_| {
88            panic!(
89                "The API returned an invalid int. Please raise an issue to address this at {}",
90                REPO_URI
91            )
92        });
93        let time_to_connect = Duration::from_secs(secs_to_connect);
94
95        Self {
96            socket: raw.socket,
97            country: raw.country,
98            last_checked,
99            level: raw.level,
100            protocol: raw.protocol,
101            time_to_connect,
102            supports: Supports::from(raw.supports),
103        }
104    }
105}
106
107/// Internal
108pub(crate) fn proxies_from_json(json: &str) -> Result<Vec<Proxy>, serde_json::Error> {
109    let resp: Response = serde_json::from_str(json)?;
110    Ok(resp
111        .data
112        .into_iter()
113        .map(Proxy::from)
114        // Just to play it safe we filter out any results with an incorrect country field. We could
115        // be smarter and only use this in the presence of a blocklist if this causes issues. Just
116        // to note this is typically less than 10% or responses.
117        .filter(|&Proxy { country, .. }| country != Country::Unspecified)
118        .collect())
119}
120
121/// Represents all the attributes that the [`Proxy`][Proxy] supports.
122#[derive(Clone, Copy, Debug, Default, PartialEq)]
123pub struct Supports {
124    pub https: bool,
125    pub get: bool,
126    pub post: bool,
127    pub cookies: bool,
128    pub referer: bool,
129    pub forwards_user_agent: bool,
130    pub connects_to_google: bool,
131}
132
133impl From<RawSupports> for Supports {
134    fn from(raw: RawSupports) -> Self {
135        let parse_field = |field| match field {
136            Some(val) => val == 1,
137            // null is assumed to be false just to play it safe
138            None => false,
139        };
140
141        Self {
142            https: parse_field(raw.https),
143            get: parse_field(raw.get),
144            post: parse_field(raw.post),
145            cookies: parse_field(raw.cookies),
146            referer: parse_field(raw.referer),
147            forwards_user_agent: parse_field(raw.forwards_user_agent),
148            connects_to_google: parse_field(raw.connects_to_google),
149        }
150    }
151}
152
153#[cfg(test)]
154mod test {
155    use super::*;
156
157    use chrono::NaiveDate;
158    use std::{fs, path::Path};
159
160    #[test]
161    fn deserialization() -> Result<(), serde_json::Error> {
162        // Just some setup
163        let sample_file = Path::new("tests").join("samples").join("response.json");
164        let raw_response = fs::read_to_string(&sample_file).expect("Can't open the response file");
165
166        // And now onto testing
167        let proxies = proxies_from_json(&raw_response)?;
168
169        let date = NaiveDate::from_ymd(2020, 12, 13);
170
171        let common = Proxy {
172            socket: "1.2.3.4:1234".parse().unwrap(),
173            country: Country::US,
174            last_checked: date.and_hms(0, 0, 0),
175            level: Level::Elite,
176            protocol: Protocol::Http,
177            time_to_connect: Duration::from_secs(0),
178            supports: Supports {
179                get: true,
180                post: true,
181                cookies: true,
182                referer: true,
183                forwards_user_agent: true,
184                ..Supports::default()
185            },
186        };
187
188        // The proxy with an empty country field got filtered out
189        let ideal = vec![
190            Proxy {
191                socket: "67.225.164.154:80".parse().unwrap(),
192                last_checked: date.and_hms(20, 6, 41),
193                time_to_connect: Duration::from_secs(10),
194                ..common
195            },
196            Proxy {
197                socket: "35.181.4.4:80".parse().unwrap(),
198                last_checked: date.and_hms(20, 10, 11),
199                time_to_connect: Duration::from_secs(1),
200                supports: Supports {
201                    forwards_user_agent: true,
202                    ..Supports::default()
203                },
204                ..common
205            },
206            Proxy {
207                socket: "89.24.76.185:32842".parse().unwrap(),
208                country: Country::CZ,
209                last_checked: date.and_hms(20, 1, 52),
210                protocol: Protocol::Socks5,
211                time_to_connect: Duration::from_secs(18),
212                ..common
213            },
214            Proxy {
215                socket: "125.99.120.166:40390".parse().unwrap(),
216                country: Country::IN,
217                last_checked: date.and_hms(20, 10, 11),
218                protocol: Protocol::Socks4,
219                time_to_connect: Duration::from_secs(14),
220                ..common
221            },
222        ];
223
224        for (i, (parsed, desired)) in proxies.iter().zip(ideal.iter()).enumerate() {
225            eprintln!("Checking proxy {}", i);
226            assert_eq!(parsed, desired);
227        }
228        assert_eq!(proxies, ideal);
229
230        Ok(())
231    }
232}