retina_fetch/
fetch.rs

1// Copyright (C) 2023 Tristan Gerritsen <tristan@thewoosh.org>
2// All Rights Reserved.
3
4use std::{sync::Arc, path::Path};
5
6use log::{warn, trace};
7use tokio::{runtime::Runtime, sync::mpsc::channel};
8use url::Url;
9
10use crate::{
11    Error,
12    FetchPromise,
13    FetchResponse,
14    InternalError,
15    NetworkError,
16    Request,
17    RequestReferrer,
18    Response,
19};
20
21type HyperConnector = hyper_tls::HttpsConnector<hyper::client::HttpConnector>;
22type HyperClient = hyper::client::Client<HyperConnector>;
23
24/// This struct contains the [Fetch] object, which can be used to fetch
25/// resources using the internet. This object can be safely shared across
26/// components and threads.
27#[derive(Clone, Debug)]
28pub struct Fetch {
29    client: HyperClient,
30    runtime: Arc<tokio::runtime::Runtime>,
31    user_agent_product: Arc<str>,
32}
33
34impl Fetch {
35    /// Create a new FetchPromise that resolves instantaneously.
36    pub fn create_instantaneous_response(&self, request: Arc<Request>, response: FetchResponse) -> FetchPromise {
37        let (sender, receiver) = channel(1);
38        self.runtime.spawn(async move {
39            sender.send(response).await.unwrap();
40        });
41        FetchPromise {
42            request,
43            receiver
44        }
45    }
46
47    /// Create a new [Fetch] object.
48    pub fn new() -> Self {
49        Self::with_user_agent("Mozilla/5.0 Retina-Fetch")
50    }
51
52    /// Create a new [Fetch] object.
53    pub fn with_user_agent<S>(user_agent: S) -> Self
54            where S: Into<Arc<str>> {
55        let connector = HyperConnector::new();
56        let client = hyper::client::Client::builder().build::<_, hyper::Body>(connector);
57
58        let runtime = tokio::runtime::Builder::new_multi_thread()
59            .enable_all()
60            .build()
61            .unwrap();
62
63        let runtime = Arc::new(runtime);
64
65        spawn_runtime_stopper(Arc::clone(&runtime));
66
67        Self {
68            client,
69            runtime,
70            user_agent_product: user_agent.into(),
71        }
72    }
73
74    /// Load the resource associated with the [`request`][Request]
75    /// asynchronously.
76    pub fn fetch(&self, request: Request) -> FetchPromise {
77        let request = Arc::new(request);
78
79        match request.url.scheme() {
80            "file" => self.fetch_file(request),
81            "http" | "https" => self.fetch_http(request),
82            _ => self.fetch_unknown_scheme(request),
83        }
84    }
85
86    /// Fetch a document, given the [`url`][Url] and optionally a
87    /// [referrer][RequestReferrer].
88    pub fn fetch_document(&self, url: Url, referrer: RequestReferrer) -> FetchPromise {
89        if url.scheme() == "about" {
90            return self.fetch_document_about(url);
91        }
92
93        if url.scheme() == "file" {
94            return self.fetch_document_file(url);
95        }
96
97        self.fetch(Request::get_document(url, referrer))
98    }
99
100    /// Fetch a `about` document.
101    fn fetch_document_about(&self, url: Url) -> FetchPromise {
102        let body = match url.path() {
103            // https://fetch.spec.whatwg.org/#scheme-fetch
104            "blank" => "",
105            _ => "", // TODO
106        };
107
108        let request = Arc::new(Request::get_document(url, RequestReferrer::default()));
109
110        self.create_instantaneous_response(
111            Arc::clone(&request),
112            Ok(Response::new_about(request, body)),
113        )
114    }
115
116    /// Fetch a file from the filesystem.
117    fn fetch_file(&self, request: Arc<Request>) -> FetchPromise {
118        let (sender, receiver) = channel(1);
119
120        let task_request = Arc::clone(&request);
121        self.runtime.spawn(async move {
122            let request = task_request;
123            let mut path = request.url.path();
124
125            if cfg!(windows) && path.starts_with('/') {
126                path = &path[1..];
127            }
128
129            let path = Path::new(path);
130            if !path.exists() {
131                sender.send(Err(Error::NetworkError(NetworkError::LocalFileNotFound))).await.unwrap();
132                return;
133            }
134
135            let file = tokio::fs::File::open(path).await.unwrap();
136            let decoder = tokio_util::codec::BytesCodec::new();
137
138            let file = tokio_util::codec::FramedRead::new(file, decoder);
139            sender.send(Ok(Response::new_file(request, file))).await.unwrap();
140        });
141
142        FetchPromise {
143            request,
144            receiver,
145        }
146    }
147
148    /// Fetch using the HTTP protocol, this also includes the TLS-wrapped HTTPS.
149    fn fetch_http(&self, request: Arc<Request>) -> FetchPromise {
150        let task_client = self.client.clone();
151        let task_request = Arc::clone(&request);
152
153        let (sender, receiver) = channel(1);
154
155        let user_agent = Arc::clone(&self.user_agent_product);
156        self.runtime.spawn(async move {
157            let client = task_client;
158            let request = task_request;
159
160            let mut hyper_request = hyper::Request::builder()
161                .uri(request.url.as_str())
162                .method(&request.method)
163                .header(http::header::ACCEPT, request.accept_header_value())
164                .header(http::header::CONNECTION, "keep-alive")
165                .header(http::header::USER_AGENT, user_agent.as_ref())
166                .header("Sec-Fetch-Dest", request.destination.as_str())
167                .header("Sec-Fetch-Mode", request.mode.as_str())
168            ;
169
170            // TODO follow <https://w3c.github.io/webappsec-referrer-policy/#determine-requests-referrer>
171            if let RequestReferrer::Url(url) = &request.referrer {
172                hyper_request = hyper_request.header(http::header::REFERER, url.to_string());
173            }
174
175            let hyper_request = hyper_request
176                .body(hyper::Body::empty());
177
178            let hyper_request = match hyper_request {
179                Ok(request) => request,
180                Err(e) => {
181                    log::warn!("Failed to build request: {e}");
182                    sender.send(Err(Error::InternalError(InternalError::HyperError))).await.unwrap();
183                    return;
184                }
185            };
186
187            let response = match client.request(hyper_request).await {
188                Ok(response) => {
189                    if response.status().is_redirection() {
190                        trace!("Redirection from {}", request.url.as_str());
191                    }
192
193                    if response.status().is_client_error() || response.status().is_server_error() {
194                        warn!("Failed to fetch \"{}\": {}", request.url.as_ref(), response.status());
195                        warn!("Response Headers: {:#?}", response.headers());
196                    }
197
198                    Ok((request, response).into())
199                }
200                Err(e) => Err(e.into()),
201            };
202
203            sender.send(response).await.unwrap();
204        });
205
206        FetchPromise {
207            request,
208            receiver,
209        }
210    }
211
212    /// Handle a fetch to an unknown scheme.
213    fn fetch_unknown_scheme(&self, request: Arc<Request>) -> FetchPromise {
214        warn!("Unknown scheme: \"{}\" for URL: {}", request.url().scheme(), request.url().as_str());
215        warn!("{:#?}", request.url());
216        if request.destination == crate::RequestDestination::Document {
217            self.fetch_unknown_scheme_document(request)
218        } else {
219            self.fetch_unknown_scheme_asset(request)
220        }
221    }
222
223    /// Asset in the sense of non-documents
224    fn fetch_unknown_scheme_asset(&self, request: Arc<Request>) -> FetchPromise {
225        self.create_instantaneous_response(
226            Arc::clone(&request),
227            Ok(Response::new_about(request, "")),
228        )
229    }
230
231    fn fetch_unknown_scheme_document(&self, request: Arc<Request>) -> FetchPromise {
232        self.create_instantaneous_response(
233            Arc::clone(&request),
234            Ok(Response::new_about(request, "Unknown URL scheme.")),
235        )
236    }
237
238    fn fetch_document_file(&self, url: Url) -> FetchPromise {
239        self.fetch_file(Arc::new(Request::get_document(url, RequestReferrer::default())))
240    }
241}
242
243fn spawn_runtime_stopper(runtime: Arc<Runtime>) {
244    std::thread::spawn(move || {
245        loop {
246            std::thread::sleep(std::time::Duration::from_secs(30));
247
248            if Arc::weak_count(&runtime) != 0 {
249                continue;
250            }
251
252            if Arc::strong_count(&runtime) == 1 {
253                return;
254            }
255        }
256    });
257}