actix_prerender/
middleware.rs1use std::rc::Rc;
2
3use actix_service::Service;
4use actix_web::body::{EitherBody, MessageBody};
5use actix_web::dev::{ServiceRequest, ServiceResponse};
6use actix_web::http::header::ContentEncoding;
7use actix_web::http::uri::PathAndQuery;
8use actix_web::http::{header, Method};
9use actix_web::{Error, HttpResponse};
10use futures_util::future::LocalBoxFuture;
11use futures_util::TryFutureExt;
12use log::trace;
13use reqwest::header::HeaderMap;
14use reqwest::Client;
15use url::Url;
16
17use crate::error::PrerenderError;
18use crate::{IGNORED_EXTENSIONS, USER_AGENTS};
19
20pub struct Inner {
21 pub(crate) inner_client: Client,
22
23 pub(crate) forward_headers: bool,
24 pub(crate) before_render_fn: Option<fn(&ServiceRequest, &mut HeaderMap)>,
25
26 pub(crate) prerender_service_url: Url,
27 pub(crate) prerender_token: Option<String>,
28}
29
30pub(crate) fn prerender_url() -> Url {
31 Url::parse("https://service.prerender.io").unwrap()
32}
33
34pub(crate) fn should_prerender(req: &ServiceRequest) -> bool {
41 let request_headers = req.headers();
42 let mut is_crawler = false;
43
44 if ![Method::GET, Method::HEAD].contains(req.method()) {
45 return false;
46 }
47
48 let req_ua_lowercase = if let Some(user_agent) = request_headers.get(header::USER_AGENT) {
49 let user_agent = user_agent.to_str().map(str::to_lowercase);
50 if let Ok(ua) = user_agent {
51 ua
52 } else {
53 return false;
54 }
55 } else {
56 return false;
57 };
58
59 if USER_AGENTS
60 .iter()
61 .any(|crawler_ua| req_ua_lowercase.contains(&*crawler_ua.to_lowercase()))
62 {
63 is_crawler = true;
64 }
65
66 let is_ignored_extension_url = req.uri().path_and_query().map_or_else(
68 || false,
69 |path_query| IGNORED_EXTENSIONS.iter().any(|ext| path_query.as_str().contains(ext)),
70 );
71 if is_ignored_extension_url {
72 return false;
73 }
74
75 is_crawler
76}
77
78#[derive()]
79pub struct PrerenderMiddleware<S> {
80 pub(crate) service: S,
81 pub(crate) inner: Rc<Inner>,
82}
83
84impl<S> PrerenderMiddleware<S> {
85 pub fn prepare_build_api_url(service_url: &Url, req: &ServiceRequest) -> String {
86 let req_uri = req.uri();
87 let req_headers = req.headers();
88
89 let mut scheme = req.uri().scheme_str().unwrap_or("http");
90
91 if let Some(Ok(hdr_value)) = req_headers.get("cf-visitor").map(|val| val.to_str()) {
93 let index = hdr_value.rmatch_indices("http").collect::<Vec<_>>().remove(0).0;
94 scheme = &hdr_value[index..hdr_value.len() - 1];
95 }
96
97 if let Some(Ok(hdr_value)) = req_headers.get("X-Forwarded-Proto").map(|val| val.to_str()) {
98 scheme = hdr_value.split(',').collect::<Vec<_>>().remove(0);
99 }
100
101 let host = req
102 .uri()
103 .host()
104 .or_else(|| req_headers.get("X-Forwarded-Host").and_then(|hdr| hdr.to_str().ok()))
105 .or_else(|| req_headers.get(header::HOST).and_then(|hdr| hdr.to_str().ok()))
106 .unwrap();
107
108 let url_path_query = req_uri.path_and_query().map(PathAndQuery::as_str).unwrap();
109 format!("{}render?url={}://{}{}", service_url, scheme, host, url_path_query)
110 }
111
112 pub async fn get_rendered_response(inner: &Inner, req: ServiceRequest) -> Result<ServiceResponse, PrerenderError> {
113 let mut prerender_headers = HeaderMap::new();
114
115 if inner.forward_headers {
117 req.headers().iter().for_each(|a| {
118 prerender_headers.append(a.0, a.1.to_str().unwrap().parse().unwrap());
119 });
120 prerender_headers.remove(header::HOST);
121 }
122
123 prerender_headers.insert(header::ACCEPT_ENCODING, ContentEncoding::Gzip.to_header_value());
124 if let Some(token) = &inner.prerender_token {
125 prerender_headers.append("X-Prerender-Token", token.parse().unwrap());
126 }
127
128 if let Some(before_render_fn) = &inner.before_render_fn {
129 before_render_fn(&req, &mut prerender_headers);
130 }
131
132 let url_to_request = Self::prepare_build_api_url(&inner.prerender_service_url, &req);
133 trace!("sending request to: {}", &url_to_request);
134
135 let prerender_response = inner
136 .inner_client
137 .get(url_to_request)
138 .headers(prerender_headers)
139 .send()
140 .and_then(reqwest::Response::bytes)
141 .await?;
142
143 let http_response = HttpResponse::Ok().content_type("text/html").body(prerender_response);
144 Ok(req.into_response(http_response))
145 }
146}
147
148impl<S, B> Service<ServiceRequest> for PrerenderMiddleware<S>
149where
150 S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
151 S::Future: 'static,
152 B: MessageBody,
153{
154 type Response = ServiceResponse<EitherBody<B>>;
155 type Error = Error;
156 type Future = LocalBoxFuture<'static, Result<ServiceResponse<EitherBody<B>>, Error>>;
157
158 actix_service::forward_ready!(service);
159
160 fn call(&self, req: ServiceRequest) -> Self::Future {
161 if !should_prerender(&req) {
163 let fut = self.service.call(req);
164 return Box::pin(async move { fut.await.map(ServiceResponse::map_into_left_body) });
165 }
166
167 let inner = Rc::clone(&self.inner);
168 Box::pin(async move {
169 Self::get_rendered_response(&inner, req)
170 .await
171 .map(ServiceResponse::map_into_right_body)
172 .map_err(Into::into)
173 })
174 }
175}
176
177#[cfg(test)]
178mod tests {
179
180 use crate::builder::Prerender;
181 use actix_web::dev::ServiceRequest;
182 use actix_web::http::header;
183 use actix_web::middleware::Compat;
184 use actix_web::test::TestRequest;
185 use actix_web::App;
186 use reqwest::header::HeaderMap;
187 use url::Url;
188
189 use crate::middleware::{prerender_url, should_prerender, PrerenderMiddleware};
190
191 fn _init_logger() {
192 let _ = env_logger::builder().is_test(true).try_init();
193 }
194
195 #[actix_web::test]
196 async fn compat_compat() {
197 App::new().wrap(Compat::new(Prerender::build().use_prerender_io("".to_string())));
198 }
199
200 #[actix_web::test]
201 async fn test_human_valid_resource() {
202 let req = TestRequest::get()
203 .insert_header((
204 header::USER_AGENT,
205 "Mozilla/5.0 (X11; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0",
206 ))
207 .uri("http://yourserver.com/clothes/tshirts?query=xl")
208 .to_srv_request();
209
210 assert!(!should_prerender(&req));
211 }
212
213 #[actix_web::test]
214 async fn test_crawler_valid_resource() {
215 let req = TestRequest::get()
216 .insert_header((
217 header::USER_AGENT,
218 "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
219 ))
220 .uri("http://yourserver.com/clothes/tshirts?query=xl")
221 .to_srv_request();
222
223 assert!(should_prerender(&req));
224 }
225
226 #[actix_web::test]
227 async fn test_crawler_ignored_resource() {
228 let req = TestRequest::get()
229 .insert_header((
230 header::USER_AGENT,
231 "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
232 ))
233 .uri("http://yourserver.com/clothes/tshirts/blue.jpg")
234 .to_srv_request();
235
236 assert!(!should_prerender(&req));
237 }
238
239 #[actix_web::test]
240 async fn test_crawler_wrong_http_method() {
241 let req = TestRequest::post()
242 .insert_header((
243 header::USER_AGENT,
244 "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
245 ))
246 .uri("http://yourserver.com/clothes/tshirts/red-dotted")
247 .to_srv_request();
248
249 let render = should_prerender(&req);
250 assert!(!render);
251 }
252
253 fn _create_middleware() -> Prerender {
254 Prerender::build().use_prerender_io("".to_string())
255 }
256
257 #[actix_web::test]
258 async fn test_url_common() {
259 let req_url = "http://yourserver.com/clothes/tshirts/red-dotted";
260
261 let req = TestRequest::post()
262 .insert_header((
263 header::USER_AGENT,
264 "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
265 ))
266 .uri(req_url)
267 .to_srv_request();
268
269 assert_eq!(
270 PrerenderMiddleware::<()>::prepare_build_api_url(&prerender_url(), &req),
271 format!("{}render?url={}", prerender_url(), req_url)
272 );
273
274 assert_eq!(
275 PrerenderMiddleware::<()>::prepare_build_api_url(&Url::parse("http://localhost:5000").unwrap(), &req),
276 format!("http://localhost:5000/render?url={}", req_url)
277 );
278 }
279
280 #[actix_web::test]
281 async fn test_url_https() {
282 let req_url = "https://mercadoskin.com.br/market/csgo";
283
284 let req = TestRequest::get()
285 .insert_header((
286 header::USER_AGENT,
287 "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
288 ))
289 .uri(req_url)
290 .to_srv_request();
291
292 assert_eq!(
293 PrerenderMiddleware::<()>::prepare_build_api_url(&Url::parse("http://localhost:5000").unwrap(), &req),
294 format!("http://localhost:5000/render?url={}", req_url)
295 );
296 }
297
298 #[actix_web::test]
299 async fn test_url_x_forwarded_proto_single() {
300 let req_url = "http://mercadoskin.com.br/market/csgo";
301
302 let req = TestRequest::get()
303 .insert_header((
304 header::USER_AGENT,
305 "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
306 ))
307 .insert_header(("X-Forwarded-Proto", "https"))
308 .uri(req_url)
309 .to_srv_request();
310
311 assert_eq!(
312 PrerenderMiddleware::<()>::prepare_build_api_url(&Url::parse("http://localhost:5000").unwrap(), &req),
313 "http://localhost:5000/render?url=https://mercadoskin.com.br/market/csgo".to_string()
314 );
315 }
316
317 #[actix_web::test]
318 async fn test_url_x_forwarded_proto_double() {
319 let req_url = "http://mercadoskin.com.br/market/csgo";
320
321 let req = TestRequest::get()
322 .insert_header((
323 header::USER_AGENT,
324 "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
325 ))
326 .insert_header(("X-Forwarded-Proto", "https,http"))
327 .uri(req_url)
328 .to_srv_request();
329
330 assert_eq!(
331 PrerenderMiddleware::<()>::prepare_build_api_url(&Url::parse("http://localhost:5000").unwrap(), &req),
332 "http://localhost:5000/render?url=https://mercadoskin.com.br/market/csgo".to_string()
333 );
334 }
335
336 #[actix_web::test]
337 async fn test_url_cf_visitor() {
338 let req_url = "http://mercadoskin.com.br/market/csgo";
339
340 let req = TestRequest::get()
341 .insert_header((
342 header::USER_AGENT,
343 "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
344 ))
345 .insert_header(("cf-visitor", r#""scheme":"https""#))
346 .uri(req_url)
347 .to_srv_request();
348
349 assert_eq!(
350 PrerenderMiddleware::<()>::prepare_build_api_url(&Url::parse("http://localhost:5000").unwrap(), &req),
351 "http://localhost:5000/render?url=https://mercadoskin.com.br/market/csgo".to_string()
352 );
353 }
354
355 #[actix_web::test]
356 async fn test_forward_cookies() {
357 let req_url = "http://mercadoskin.com.br/market/csgo";
358
359 fn pointer_fn(_req: &ServiceRequest, _map: &mut HeaderMap) {}
360
361 let _req = TestRequest::get()
362 .insert_header((
363 header::USER_AGENT,
364 "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
365 ))
366 .insert_header(("cf-visitor", r#""scheme":"https""#))
367 .uri(req_url)
368 .to_srv_request();
369
370 let _middleware = Prerender::build()
371 .set_before_render_fn(pointer_fn)
372 .use_custom_prerender_url("https://localhost:3001")
373 .unwrap();
374 }
375}