misaki_core/lib.rs
1mod flaresolverr;
2
3use crate::flaresolverr::FlareSolverrClient;
4use anyhow::Result;
5use async_stream::stream;
6use bon::bon;
7use futures::Stream;
8use reqwest::{Client, StatusCode};
9use serde::{Deserialize, Serialize};
10use std::time::Duration;
11
12const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
13
14/// Represents the status of a URL, including its HTTP status code if available.
15#[derive(Serialize, Deserialize, Debug, Clone)]
16pub struct UrlStatus {
17 /// The URL as a string.
18 pub url: String,
19 /// The HTTP status code returned for the URL, if available.
20 pub status: Option<u16>,
21}
22
23/// A utility for checking the availability and HTTP status of URLs, with optional
24/// integration for FlareSolverr to bypass anti-bot challenges.
25#[derive(Clone, Debug)]
26pub struct LinkChecker {
27 /// The HTTP client used for making requests.
28 client: Client,
29 /// An optional client for interacting with the FlareSolverr service.
30 /// If `None`, FlareSolverr integration is disabled.
31 flaresolverr: Option<FlareSolverrClient>,
32}
33
34impl Default for LinkChecker {
35 /// Initializes a `LinkChecker` with a default `Client` configured
36 /// with a 10-second timeout and no FlareSolverr integration.
37 fn default() -> Self {
38 Self {
39 client: Client::builder().timeout(DEFAULT_TIMEOUT).build().unwrap(),
40 flaresolverr: None,
41 }
42 }
43}
44
45/// Implements the core functionality for `LinkChecker`.
46#[bon]
47impl LinkChecker {
48 /// Creates a new `LinkChecker` instance.
49 #[builder]
50 pub async fn new(
51 /// The HTTP client to be used for all requests.
52 client: Option<Client>,
53 /// An optional URL for the FlareSolverr service.
54 ///
55 /// If provided, FlareSolverr integration is enabled using this URL,
56 /// and a new session will be established. Otherwise, FlareSolverr
57 /// will not be used for link checking.
58 flaresolverr: Option<String>,
59 ) -> Result<Self> {
60 let client = client
61 .map(Ok)
62 .unwrap_or_else(|| Client::builder().timeout(DEFAULT_TIMEOUT).build())?;
63 let flaresolverr = if let Some(url) = flaresolverr {
64 Some(FlareSolverrClient::new(client.clone(), 60, url).await?)
65 } else {
66 None
67 };
68 Ok(Self {
69 client,
70 flaresolverr,
71 })
72 }
73
74 /// An internal asynchronous helper function to perform a single URL check.
75 async fn checker(
76 url: String,
77 client: Client,
78 flaresolverr: Option<FlareSolverrClient>,
79 ) -> UrlStatus {
80 let result = &client.get(&url).send().await;
81
82 match result {
83 Ok(response) => {
84 let code = response.status();
85 // If a 403 Forbidden status is received, try with FlareSolverr if available
86 if code == StatusCode::FORBIDDEN {
87 if let Some(solver) = flaresolverr {
88 solver.check(&url).await
89 } else {
90 // If no FlareSolverr, return the 403 status directly
91 UrlStatus {
92 url,
93 status: Some(code.as_u16()),
94 }
95 }
96 } else {
97 // For any other status code, return it directly
98 UrlStatus {
99 url,
100 status: Some(code.as_u16()),
101 }
102 }
103 }
104 // If the direct request fails (e.g., network error), return UrlStatus with no status code
105 Err(_) => UrlStatus { url, status: None },
106 }
107 }
108
109 /// Checks the status of a single URL.
110 ///
111 /// # Arguments
112 ///
113 /// * `url` - The URL to check.
114 ///
115 /// # Returns
116 ///
117 /// The status of the checked URL.
118 pub async fn check(&self, url: impl Into<String>) -> UrlStatus {
119 Self::checker(url.into(), self.client.clone(), self.flaresolverr.clone()).await
120 }
121
122 /// Checks the status of multiple URLs concurrently.
123 ///
124 /// # Arguments
125 ///
126 /// * `urls` - An iterator over items that can be converted into `String`.
127 ///
128 /// # Returns
129 ///
130 /// A vector containing the `UrlStatus` for each unique URL provided.
131 pub async fn check_all<I, S>(&self, urls: I) -> impl Stream<Item = UrlStatus>
132 where
133 I: IntoIterator<Item = S>,
134 S: Into<String>,
135 {
136 let mut tasks = tokio::task::JoinSet::new();
137 // Spawn a new task for each URL check
138 for url in urls {
139 tasks.spawn(Self::checker(
140 url.into(),
141 self.client.clone(),
142 self.flaresolverr.clone(),
143 ));
144 }
145 // Wait for all tasks to complete and collect their results
146 stream! {
147 while let Some(task) = tasks.join_next().await {
148 if let Ok(status) = task {
149 yield status;
150 }
151 }
152 }
153 }
154
155 /// Close the `LinkChecker` instance, specifically destroying the FlareSolverr
156 /// session if one was active.
157 ///
158 /// Each instance of `LinkChecker` establishes a new session with the FlareSolverr service.
159 /// Sessions must be explicitly destroyed using [`close`](Self::close) when no longer needed.
160 /// Accumulating too many active sessions can degrade FlareSolverr performance.
161 ///
162 /// # Returns
163 ///
164 /// An [`anyhow::Result`] indicating success or an error if the FlareSolverr
165 /// session could not be destroyed.
166 pub async fn close(self) -> Result<()> {
167 // If a FlareSolverr client exists, close its session
168 if let Some(solverr) = self.flaresolverr {
169 solverr.close().await?
170 }
171 Ok(())
172 }
173}