misaki_core/
lib.rs

1mod flaresolverr;
2
3use crate::flaresolverr::FlareSolverrClient;
4use anyhow::Result;
5use async_stream::stream;
6use bon::bon;
7use futures::Stream;
8use reqwest::{Client, StatusCode};
9use serde::{Deserialize, Serialize};
10use std::time::Duration;
11
12const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
13
14/// Represents the status of a URL, including its HTTP status code if available.
15#[derive(Serialize, Deserialize, Debug, Clone)]
16pub struct UrlStatus {
17    /// The URL as a string.
18    pub url: String,
19    /// The HTTP status code returned for the URL, if available.
20    pub status: Option<u16>,
21}
22
23/// A utility for checking the availability and HTTP status of URLs, with optional
24/// integration for FlareSolverr to bypass anti-bot challenges.
25#[derive(Clone, Debug)]
26pub struct LinkChecker {
27    /// The HTTP client used for making requests.
28    client: Client,
29    /// An optional client for interacting with the FlareSolverr service.
30    /// If `None`, FlareSolverr integration is disabled.
31    flaresolverr: Option<FlareSolverrClient>,
32}
33
34impl Default for LinkChecker {
35    /// Initializes a `LinkChecker` with a default `Client` configured
36    /// with a 10-second timeout and no FlareSolverr integration.
37    fn default() -> Self {
38        Self {
39            client: Client::builder().timeout(DEFAULT_TIMEOUT).build().unwrap(),
40            flaresolverr: None,
41        }
42    }
43}
44
45/// Implements the core functionality for `LinkChecker`.
46#[bon]
47impl LinkChecker {
48    /// Creates a new `LinkChecker` instance.
49    #[builder]
50    pub async fn new(
51        /// The HTTP client to be used for all requests.
52        client: Option<Client>,
53        /// An optional URL for the FlareSolverr service.
54        ///
55        /// If provided, FlareSolverr integration is enabled using this URL,
56        /// and a new session will be established. Otherwise, FlareSolverr
57        /// will not be used for link checking.
58        flaresolverr: Option<String>,
59    ) -> Result<Self> {
60        let client = client
61            .map(Ok)
62            .unwrap_or_else(|| Client::builder().timeout(DEFAULT_TIMEOUT).build())?;
63        let flaresolverr = if let Some(url) = flaresolverr {
64            Some(FlareSolverrClient::new(client.clone(), 60, url).await?)
65        } else {
66            None
67        };
68        Ok(Self {
69            client,
70            flaresolverr,
71        })
72    }
73
74    /// An internal asynchronous helper function to perform a single URL check.
75    async fn checker(
76        url: String,
77        client: Client,
78        flaresolverr: Option<FlareSolverrClient>,
79    ) -> UrlStatus {
80        let result = &client.get(&url).send().await;
81
82        match result {
83            Ok(response) => {
84                let code = response.status();
85                // If a 403 Forbidden status is received, try with FlareSolverr if available
86                if code == StatusCode::FORBIDDEN {
87                    if let Some(solver) = flaresolverr {
88                        solver.check(&url).await
89                    } else {
90                        // If no FlareSolverr, return the 403 status directly
91                        UrlStatus {
92                            url,
93                            status: Some(code.as_u16()),
94                        }
95                    }
96                } else {
97                    // For any other status code, return it directly
98                    UrlStatus {
99                        url,
100                        status: Some(code.as_u16()),
101                    }
102                }
103            }
104            // If the direct request fails (e.g., network error), return UrlStatus with no status code
105            Err(_) => UrlStatus { url, status: None },
106        }
107    }
108
109    /// Checks the status of a single URL.
110    ///
111    /// # Arguments
112    ///
113    /// * `url` - The URL to check.
114    ///
115    /// # Returns
116    ///
117    /// The status of the checked URL.
118    pub async fn check(&self, url: impl Into<String>) -> UrlStatus {
119        Self::checker(url.into(), self.client.clone(), self.flaresolverr.clone()).await
120    }
121
122    /// Checks the status of multiple URLs concurrently.
123    ///
124    /// # Arguments
125    ///
126    /// * `urls` - An iterator over items that can be converted into `String`.
127    ///
128    /// # Returns
129    ///
130    /// A vector containing the `UrlStatus` for each unique URL provided.
131    pub async fn check_all<I, S>(&self, urls: I) -> impl Stream<Item = UrlStatus>
132    where
133        I: IntoIterator<Item = S>,
134        S: Into<String>,
135    {
136        let mut tasks = tokio::task::JoinSet::new();
137        // Spawn a new task for each URL check
138        for url in urls {
139            tasks.spawn(Self::checker(
140                url.into(),
141                self.client.clone(),
142                self.flaresolverr.clone(),
143            ));
144        }
145        // Wait for all tasks to complete and collect their results
146        stream! {
147            while let Some(task) = tasks.join_next().await {
148            if let Ok(status) = task {
149                yield status;
150            }
151        }
152        }
153    }
154
155    /// Close the `LinkChecker` instance, specifically destroying the FlareSolverr
156    /// session if one was active.
157    ///
158    /// Each instance of `LinkChecker` establishes a new session with the FlareSolverr service.
159    /// Sessions must be explicitly destroyed using [`close`](Self::close) when no longer needed.
160    /// Accumulating too many active sessions can degrade FlareSolverr performance.
161    ///
162    /// # Returns
163    ///
164    /// An [`anyhow::Result`] indicating success or an error if the FlareSolverr
165    /// session could not be destroyed.
166    pub async fn close(self) -> Result<()> {
167        // If a FlareSolverr client exists, close its session
168        if let Some(solverr) = self.flaresolverr {
169            solverr.close().await?
170        }
171        Ok(())
172    }
173}