actix_prerender/
middleware.rs

1use std::rc::Rc;
2
3use actix_service::Service;
4use actix_web::body::{EitherBody, MessageBody};
5use actix_web::dev::{ServiceRequest, ServiceResponse};
6use actix_web::http::header::ContentEncoding;
7use actix_web::http::uri::PathAndQuery;
8use actix_web::http::{header, Method};
9use actix_web::{Error, HttpResponse};
10use futures_util::future::LocalBoxFuture;
11use futures_util::TryFutureExt;
12use log::trace;
13use reqwest::header::HeaderMap;
14use reqwest::Client;
15use url::Url;
16
17use crate::error::PrerenderError;
18use crate::{IGNORED_EXTENSIONS, USER_AGENTS};
19
20pub struct Inner {
21    pub(crate) inner_client: Client,
22
23    pub(crate) forward_headers: bool,
24    pub(crate) before_render_fn: Option<fn(&ServiceRequest, &mut HeaderMap)>,
25
26    pub(crate) prerender_service_url: Url,
27    pub(crate) prerender_token: Option<String>,
28}
29
30pub(crate) fn prerender_url() -> Url {
31    Url::parse("https://service.prerender.io").unwrap()
32}
33
34/// Decides if should prerender the page or not.
35///
36/// Will NOT prerender on the following cases:
37/// * HTTP is not GET or HEAD
38/// * User agent is NOT crawler
39/// * Is requesting a resource on `IGNORED_EXTENSIONS`
40pub(crate) fn should_prerender(req: &ServiceRequest) -> bool {
41    let request_headers = req.headers();
42    let mut is_crawler = false;
43
44    if ![Method::GET, Method::HEAD].contains(req.method()) {
45        return false;
46    }
47
48    let req_ua_lowercase = if let Some(user_agent) = request_headers.get(header::USER_AGENT) {
49        let user_agent = user_agent.to_str().map(str::to_lowercase);
50        if let Ok(ua) = user_agent {
51            ua
52        } else {
53            return false;
54        }
55    } else {
56        return false;
57    };
58
59    if USER_AGENTS
60        .iter()
61        .any(|crawler_ua| req_ua_lowercase.contains(&*crawler_ua.to_lowercase()))
62    {
63        is_crawler = true;
64    }
65
66    // check for ignored extensions
67    let is_ignored_extension_url = req.uri().path_and_query().map_or_else(
68        || false,
69        |path_query| IGNORED_EXTENSIONS.iter().any(|ext| path_query.as_str().contains(ext)),
70    );
71    if is_ignored_extension_url {
72        return false;
73    }
74
75    is_crawler
76}
77
78#[derive()]
79pub struct PrerenderMiddleware<S> {
80    pub(crate) service: S,
81    pub(crate) inner: Rc<Inner>,
82}
83
84impl<S> PrerenderMiddleware<S> {
85    pub fn prepare_build_api_url(service_url: &Url, req: &ServiceRequest) -> String {
86        let req_uri = req.uri();
87        let req_headers = req.headers();
88
89        let mut scheme = req.uri().scheme_str().unwrap_or("http");
90
91        // handle visitors using Cloudflare Flexible SSL
92        if let Some(Ok(hdr_value)) = req_headers.get("cf-visitor").map(|val| val.to_str()) {
93            let index = hdr_value.rmatch_indices("http").collect::<Vec<_>>().remove(0).0;
94            scheme = &hdr_value[index..hdr_value.len() - 1];
95        }
96
97        if let Some(Ok(hdr_value)) = req_headers.get("X-Forwarded-Proto").map(|val| val.to_str()) {
98            scheme = hdr_value.split(',').collect::<Vec<_>>().remove(0);
99        }
100
101        let host = req
102            .uri()
103            .host()
104            .or_else(|| req_headers.get("X-Forwarded-Host").and_then(|hdr| hdr.to_str().ok()))
105            .or_else(|| req_headers.get(header::HOST).and_then(|hdr| hdr.to_str().ok()))
106            .unwrap();
107
108        let url_path_query = req_uri.path_and_query().map(PathAndQuery::as_str).unwrap();
109        format!("{}render?url={}://{}{}", service_url, scheme, host, url_path_query)
110    }
111
112    pub async fn get_rendered_response(inner: &Inner, req: ServiceRequest) -> Result<ServiceResponse, PrerenderError> {
113        let mut prerender_headers = HeaderMap::new();
114
115        // we forward every header, with the exception of `HOST`
116        if inner.forward_headers {
117            req.headers().iter().for_each(|a| {
118                prerender_headers.append(a.0, a.1.to_str().unwrap().parse().unwrap());
119            });
120            prerender_headers.remove(header::HOST);
121        }
122
123        prerender_headers.insert(header::ACCEPT_ENCODING, ContentEncoding::Gzip.to_header_value());
124        if let Some(token) = &inner.prerender_token {
125            prerender_headers.append("X-Prerender-Token", token.parse().unwrap());
126        }
127
128        if let Some(before_render_fn) = &inner.before_render_fn {
129            before_render_fn(&req, &mut prerender_headers);
130        }
131
132        let url_to_request = Self::prepare_build_api_url(&inner.prerender_service_url, &req);
133        trace!("sending request to: {}", &url_to_request);
134
135        let prerender_response = inner
136            .inner_client
137            .get(url_to_request)
138            .headers(prerender_headers)
139            .send()
140            .and_then(reqwest::Response::bytes)
141            .await?;
142
143        let http_response = HttpResponse::Ok().content_type("text/html").body(prerender_response);
144        Ok(req.into_response(http_response))
145    }
146}
147
148impl<S, B> Service<ServiceRequest> for PrerenderMiddleware<S>
149where
150    S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
151    S::Future: 'static,
152    B: MessageBody,
153{
154    type Response = ServiceResponse<EitherBody<B>>;
155    type Error = Error;
156    type Future = LocalBoxFuture<'static, Result<ServiceResponse<EitherBody<B>>, Error>>;
157
158    actix_service::forward_ready!(service);
159
160    fn call(&self, req: ServiceRequest) -> Self::Future {
161        // life goes on
162        if !should_prerender(&req) {
163            let fut = self.service.call(req);
164            return Box::pin(async move { fut.await.map(ServiceResponse::map_into_left_body) });
165        }
166
167        let inner = Rc::clone(&self.inner);
168        Box::pin(async move {
169            Self::get_rendered_response(&inner, req)
170                .await
171                .map(ServiceResponse::map_into_right_body)
172                .map_err(Into::into)
173        })
174    }
175}
176
177#[cfg(test)]
178mod tests {
179
180    use crate::builder::Prerender;
181    use actix_web::dev::ServiceRequest;
182    use actix_web::http::header;
183    use actix_web::middleware::Compat;
184    use actix_web::test::TestRequest;
185    use actix_web::App;
186    use reqwest::header::HeaderMap;
187    use url::Url;
188
189    use crate::middleware::{prerender_url, should_prerender, PrerenderMiddleware};
190
191    fn _init_logger() {
192        let _ = env_logger::builder().is_test(true).try_init();
193    }
194
195    #[actix_web::test]
196    async fn compat_compat() {
197        App::new().wrap(Compat::new(Prerender::build().use_prerender_io("".to_string())));
198    }
199
200    #[actix_web::test]
201    async fn test_human_valid_resource() {
202        let req = TestRequest::get()
203            .insert_header((
204                header::USER_AGENT,
205                "Mozilla/5.0 (X11; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0",
206            ))
207            .uri("http://yourserver.com/clothes/tshirts?query=xl")
208            .to_srv_request();
209
210        assert!(!should_prerender(&req));
211    }
212
213    #[actix_web::test]
214    async fn test_crawler_valid_resource() {
215        let req = TestRequest::get()
216            .insert_header((
217                header::USER_AGENT,
218                "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
219            ))
220            .uri("http://yourserver.com/clothes/tshirts?query=xl")
221            .to_srv_request();
222
223        assert!(should_prerender(&req));
224    }
225
226    #[actix_web::test]
227    async fn test_crawler_ignored_resource() {
228        let req = TestRequest::get()
229            .insert_header((
230                header::USER_AGENT,
231                "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
232            ))
233            .uri("http://yourserver.com/clothes/tshirts/blue.jpg")
234            .to_srv_request();
235
236        assert!(!should_prerender(&req));
237    }
238
239    #[actix_web::test]
240    async fn test_crawler_wrong_http_method() {
241        let req = TestRequest::post()
242            .insert_header((
243                header::USER_AGENT,
244                "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
245            ))
246            .uri("http://yourserver.com/clothes/tshirts/red-dotted")
247            .to_srv_request();
248
249        let render = should_prerender(&req);
250        assert!(!render);
251    }
252
253    fn _create_middleware() -> Prerender {
254        Prerender::build().use_prerender_io("".to_string())
255    }
256
257    #[actix_web::test]
258    async fn test_url_common() {
259        let req_url = "http://yourserver.com/clothes/tshirts/red-dotted";
260
261        let req = TestRequest::post()
262            .insert_header((
263                header::USER_AGENT,
264                "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
265            ))
266            .uri(req_url)
267            .to_srv_request();
268
269        assert_eq!(
270            PrerenderMiddleware::<()>::prepare_build_api_url(&prerender_url(), &req),
271            format!("{}render?url={}", prerender_url(), req_url)
272        );
273
274        assert_eq!(
275            PrerenderMiddleware::<()>::prepare_build_api_url(&Url::parse("http://localhost:5000").unwrap(), &req),
276            format!("http://localhost:5000/render?url={}", req_url)
277        );
278    }
279
280    #[actix_web::test]
281    async fn test_url_https() {
282        let req_url = "https://mercadoskin.com.br/market/csgo";
283
284        let req = TestRequest::get()
285            .insert_header((
286                header::USER_AGENT,
287                "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
288            ))
289            .uri(req_url)
290            .to_srv_request();
291
292        assert_eq!(
293            PrerenderMiddleware::<()>::prepare_build_api_url(&Url::parse("http://localhost:5000").unwrap(), &req),
294            format!("http://localhost:5000/render?url={}", req_url)
295        );
296    }
297
298    #[actix_web::test]
299    async fn test_url_x_forwarded_proto_single() {
300        let req_url = "http://mercadoskin.com.br/market/csgo";
301
302        let req = TestRequest::get()
303            .insert_header((
304                header::USER_AGENT,
305                "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
306            ))
307            .insert_header(("X-Forwarded-Proto", "https"))
308            .uri(req_url)
309            .to_srv_request();
310
311        assert_eq!(
312            PrerenderMiddleware::<()>::prepare_build_api_url(&Url::parse("http://localhost:5000").unwrap(), &req),
313            "http://localhost:5000/render?url=https://mercadoskin.com.br/market/csgo".to_string()
314        );
315    }
316
317    #[actix_web::test]
318    async fn test_url_x_forwarded_proto_double() {
319        let req_url = "http://mercadoskin.com.br/market/csgo";
320
321        let req = TestRequest::get()
322            .insert_header((
323                header::USER_AGENT,
324                "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
325            ))
326            .insert_header(("X-Forwarded-Proto", "https,http"))
327            .uri(req_url)
328            .to_srv_request();
329
330        assert_eq!(
331            PrerenderMiddleware::<()>::prepare_build_api_url(&Url::parse("http://localhost:5000").unwrap(), &req),
332            "http://localhost:5000/render?url=https://mercadoskin.com.br/market/csgo".to_string()
333        );
334    }
335
336    #[actix_web::test]
337    async fn test_url_cf_visitor() {
338        let req_url = "http://mercadoskin.com.br/market/csgo";
339
340        let req = TestRequest::get()
341            .insert_header((
342                header::USER_AGENT,
343                "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
344            ))
345            .insert_header(("cf-visitor", r#""scheme":"https""#))
346            .uri(req_url)
347            .to_srv_request();
348
349        assert_eq!(
350            PrerenderMiddleware::<()>::prepare_build_api_url(&Url::parse("http://localhost:5000").unwrap(), &req),
351            "http://localhost:5000/render?url=https://mercadoskin.com.br/market/csgo".to_string()
352        );
353    }
354
355    #[actix_web::test]
356    async fn test_forward_cookies() {
357        let req_url = "http://mercadoskin.com.br/market/csgo";
358
359        fn pointer_fn(_req: &ServiceRequest, _map: &mut HeaderMap) {}
360
361        let _req = TestRequest::get()
362            .insert_header((
363                header::USER_AGENT,
364                "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
365            ))
366            .insert_header(("cf-visitor", r#""scheme":"https""#))
367            .uri(req_url)
368            .to_srv_request();
369
370        let _middleware = Prerender::build()
371            .set_before_render_fn(pointer_fn)
372            .use_custom_prerender_url("https://localhost:3001")
373            .unwrap();
374    }
375}