misaki_core/
lib.rs

1mod flaresolverr;
2
3use crate::flaresolverr::FlareSolverrClient;
4use anyhow::Result;
5use async_stream::stream;
6use bon::bon;
7use futures::Stream;
8use reqwest::{Client, StatusCode};
9use serde::{Deserialize, Serialize};
10use std::time::Duration;
11
12const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
13
14/// Represents the status of a URL, including its HTTP status code if available.
15#[derive(Serialize, Deserialize, Debug, Clone)]
16pub struct UrlStatus {
17    /// The URL as a string.
18    pub url: String,
19    /// The HTTP status code returned for the URL, if available.
20    pub status: Option<u16>,
21}
22
23/// A utility for checking the availability and HTTP status of URLs, with optional
24/// integration for FlareSolverr to bypass anti-bot challenges.
25#[derive(Clone, Debug)]
26pub struct LinkChecker {
27    /// The HTTP client used for making requests.
28    client: Client,
29    /// An optional client for interacting with the FlareSolverr service.
30    /// If `None`, FlareSolverr integration is disabled.
31    flaresolverr: Option<FlareSolverrClient>,
32}
33
34impl Default for LinkChecker {
35    /// Initializes a `LinkChecker` with a default `Client` configured
36    /// with a 10-second timeout and no FlareSolverr integration.
37    /// 
38    /// # Panics
39    /// Panics if the underlying [Client::builder] panics.
40    /// Use [Self::builder] if you want to handle the error.
41    fn default() -> Self {
42        Self {
43            client: Client::builder().timeout(DEFAULT_TIMEOUT).build().unwrap(),
44            flaresolverr: None,
45        }
46    }
47}
48
49/// Implements the core functionality for `LinkChecker`.
50#[bon]
51impl LinkChecker {
52    /// Creates a new `LinkChecker` instance.
53    #[builder]
54    pub async fn new(
55        /// The HTTP client to be used for all requests.
56        client: Option<Client>,
57        /// An optional URL for the FlareSolverr service.
58        ///
59        /// If provided, FlareSolverr integration is enabled using this URL,
60        /// and a new session will be established. Otherwise, FlareSolverr
61        /// will not be used for link checking.
62        flaresolverr: Option<String>,
63    ) -> Result<Self> {
64        let client = client
65            .map(Ok)
66            .unwrap_or_else(|| Client::builder().timeout(DEFAULT_TIMEOUT).build())?;
67        let flaresolverr = if let Some(url) = flaresolverr {
68            Some(FlareSolverrClient::new(client.clone(), 60, url).await?)
69        } else {
70            None
71        };
72        Ok(Self {
73            client,
74            flaresolverr,
75        })
76    }
77
78    /// An internal asynchronous helper function to perform a single URL check.
79    async fn checker(
80        url: String,
81        client: Client,
82        flaresolverr: Option<FlareSolverrClient>,
83    ) -> UrlStatus {
84        let result = &client.get(&url).send().await;
85
86        match result {
87            Ok(response) => {
88                let code = response.status();
89                // If a 403 Forbidden status is received, try with FlareSolverr if available
90                if code == StatusCode::FORBIDDEN {
91                    if let Some(solver) = flaresolverr {
92                        solver.check(&url).await
93                    } else {
94                        // If no FlareSolverr, return the 403 status directly
95                        UrlStatus {
96                            url,
97                            status: Some(code.as_u16()),
98                        }
99                    }
100                } else {
101                    // For any other status code, return it directly
102                    UrlStatus {
103                        url,
104                        status: Some(code.as_u16()),
105                    }
106                }
107            }
108            // If the direct request fails (e.g., network error), return UrlStatus with no status code
109            Err(_) => UrlStatus { url, status: None },
110        }
111    }
112
113    /// Checks the status of a single URL.
114    ///
115    /// # Arguments
116    ///
117    /// * `url` - The URL to check.
118    ///
119    /// # Returns
120    ///
121    /// The status of the checked URL.
122    pub async fn check(&self, url: impl Into<String>) -> UrlStatus {
123        Self::checker(url.into(), self.client.clone(), self.flaresolverr.clone()).await
124    }
125
126    /// Checks the status of multiple URLs concurrently.
127    ///
128    /// # Arguments
129    ///
130    /// * `urls` - An iterator over items that can be converted into `String`.
131    ///
132    /// # Returns
133    ///
134    /// A vector containing the `UrlStatus` for each unique URL provided.
135    pub async fn check_all<I, S>(&self, urls: I) -> impl Stream<Item = UrlStatus>
136    where
137        I: IntoIterator<Item = S>,
138        S: Into<String>,
139    {
140        let mut tasks = tokio::task::JoinSet::new();
141        // Spawn a new task for each URL check
142        for url in urls {
143            tasks.spawn(Self::checker(
144                url.into(),
145                self.client.clone(),
146                self.flaresolverr.clone(),
147            ));
148        }
149        // Wait for all tasks to complete and collect their results
150        stream! {
151            while let Some(task) = tasks.join_next().await {
152            if let Ok(status) = task {
153                yield status;
154            }
155        }
156        }
157    }
158
159    /// Close the `LinkChecker` instance, specifically destroying the FlareSolverr
160    /// session if one was active.
161    ///
162    /// Each instance of `LinkChecker` establishes a new session with the FlareSolverr service.
163    /// Sessions must be explicitly destroyed using [`close`](Self::close) when no longer needed.
164    /// Accumulating too many active sessions can degrade FlareSolverr performance.
165    ///
166    /// # Returns
167    ///
168    /// An [`anyhow::Result`] indicating success or an error if the FlareSolverr
169    /// session could not be destroyed.
170    pub async fn close(self) -> Result<()> {
171        // If a FlareSolverr client exists, close its session
172        if let Some(solverr) = self.flaresolverr {
173            solverr.close().await?
174        }
175        Ok(())
176    }
177}