Skip to content

Commit e885d20

Browse files
committed
feat: first commit
Signed-off-by: Martin <martin@hotmail.com.br>
0 parents  commit e885d20

File tree

6 files changed

+338
-0
lines changed

6 files changed

+338
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
/target
2+
Cargo.lock
3+
/.idea/

Cargo.toml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
[package]
2+
name = "actix-prerender"
3+
version = "0.1.0"
4+
edition = "2021"
5+
authors = ["Martin Mariano <contato@martinmariano.com>"]
6+
description = "Actix middleware that sends requests to Prerender.io or a custom Prerender service URL."
7+
repository = ""
8+
license = "MIT"
9+
10+
[dependencies]
11+
actix-service = "2"
12+
actix-utils = "3"
13+
actix-web = { version = "4", default-features = false }
14+
smartstring = "1"
15+
16+
futures-util = "0.3"
17+
log = "0.4"
18+
19+
[dev-dependencies]
20+
actix-web = { version = "4", default_features = false, features = ["macros"] }
21+
env_logger = "0.9"

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# actix-prerender
2+
3+
A very simple middleware that sends requests which comes from common crawler
4+
user-agents to be pre-rendered via "prerender".
5+
6+
It accepts the external service provided by `prerender.io`, or a custom external
7+
`prerender_service_url`.

src/consts.rs

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
pub const USER_AGENTS: &[&str; 35] = &[
2+
"googlebot",
3+
"Yahoo! Slurp",
4+
"bingbot",
5+
"yandex",
6+
"baiduspider",
7+
"facebookexternalhit",
8+
"twitterbot",
9+
"rogerbot",
10+
"linkedinbot",
11+
"embedly",
12+
"quora link preview",
13+
"showyoubot",
14+
"outbrain",
15+
"pinterest/0.",
16+
"developers.google.com/+/web/snippet",
17+
"slackbot",
18+
"vkShare",
19+
"W3C_Validator",
20+
"redditbot",
21+
"Applebot",
22+
"WhatsApp",
23+
"flipboard",
24+
"tumblr",
25+
"bitlybot",
26+
"SkypeUriPreview",
27+
"nuzzel",
28+
"Discordbot",
29+
"Google Page Speed",
30+
"Qwantify",
31+
"pinterestbot",
32+
"Bitrix link preview",
33+
"XING-contenttabreceiver",
34+
"Chrome-Lighthouse",
35+
"TelegramBot",
36+
"SeznamBot",
37+
];
38+
39+
pub const IGNORED_EXTENSIONS: &[&str; 41] = &[
40+
".css",
41+
".xml",
42+
".less",
43+
".png",
44+
".jpg",
45+
".jpeg",
46+
".gif",
47+
".pdf",
48+
".doc",
49+
".txt",
50+
".ico",
51+
".rss",
52+
".zip",
53+
".mp3",
54+
".rar",
55+
".exe",
56+
".wmv",
57+
".doc",
58+
".avi",
59+
".ppt",
60+
".mpg",
61+
".mpeg",
62+
".tif",
63+
".wav",
64+
".mov",
65+
".psd",
66+
".ai",
67+
".xls",
68+
".mp4",
69+
".m4a",
70+
".swf",
71+
".dat",
72+
".dmg",
73+
".iso",
74+
".flv",
75+
".m4v",
76+
".torrent",
77+
".woff",
78+
".ttf",
79+
".svg",
80+
".webmanifest",
81+
];

src/lib.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
//! Prerender for Actix Web
2+
3+
#![forbid(unsafe_code)]
4+
#![deny(nonstandard_style)]
5+
#![allow(clippy::must_use_candidate)]
6+
#![warn(future_incompatible, missing_debug_implementations)]
7+
#![doc(html_logo_url = "https://actix.rs/img/logo.png")]
8+
#![doc(html_favicon_url = "https://actix.rs/favicon.ico")]
9+
10+
use consts::{IGNORED_EXTENSIONS, USER_AGENTS};
11+
12+
mod consts;
13+
pub mod middleware;
14+
15+
// impl<S, B> Service<ServiceRequest> for PrerenderMiddleWare
16+
// where
17+
// S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
18+
// S::Future: 'static,
19+
// B: MessageBody + 'static,
20+
// {
21+
// type Response = ServiceResponse<EitherBody<B>>;
22+
// type Error = Error;
23+
// type Future = LocalBoxFuture<'static, Result<ServiceResponse<EitherBody<B>>, Error>>;
24+
//
25+
// actix_service::forward_ready!(service);
26+
//
27+
// fn call(&self, req: ServiceRequest) -> Self::Future {
28+
// todo!()
29+
// }
30+
// }

src/middleware.rs

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
use crate::{IGNORED_EXTENSIONS, USER_AGENTS};
2+
use actix_web::dev::ServiceRequest;
3+
use actix_web::http::header::HeaderMap;
4+
use actix_web::http::uri::PathAndQuery;
5+
use actix_web::http::{header, Method};
6+
7+
#[derive(Debug)]
8+
struct PrerenderMiddleware {
9+
prerender_service_url: String,
10+
}
11+
12+
impl PrerenderMiddleware {
13+
pub fn prepare_build_api_url(&self, req: &ServiceRequest) -> String {
14+
let req_uri = req.uri();
15+
let req_headers = req.headers();
16+
17+
// TODO: this.host?
18+
let host = req
19+
.uri()
20+
.host()
21+
.or_else(|| {
22+
req_headers
23+
.get("X-Forwarded-Host")
24+
.and_then(|hdr| hdr.to_str().ok())
25+
})
26+
.or_else(|| {
27+
req_headers
28+
.get(header::HOST)
29+
.and_then(|hdr| hdr.to_str().ok())
30+
})
31+
.unwrap();
32+
33+
let scheme = req.uri().scheme_str().unwrap_or("http");
34+
let url_path_query = req_uri.path_and_query().map(PathAndQuery::as_str).unwrap();
35+
36+
format!(
37+
"{}{}://{}{}",
38+
&*self.prerender_service_url, scheme, host, url_path_query
39+
)
40+
}
41+
}
42+
43+
#[derive(Debug)]
44+
struct PrerenderMiddlewareBuilder {}
45+
46+
impl PrerenderMiddlewareBuilder {
47+
pub fn use_prerender_io() -> PrerenderMiddleware {
48+
PrerenderMiddleware {
49+
prerender_service_url: prerender_url().to_string(),
50+
}
51+
}
52+
53+
pub fn use_custom_prerender_url(prerender_service_url: &impl ToString) -> PrerenderMiddleware {
54+
PrerenderMiddleware {
55+
prerender_service_url: prerender_service_url.to_string(),
56+
}
57+
}
58+
}
59+
60+
impl PrerenderMiddleware {
61+
pub fn builder() -> PrerenderMiddlewareBuilder {
62+
PrerenderMiddlewareBuilder {}
63+
}
64+
}
65+
66+
/// Decides if should prerender the page or not.
67+
///
68+
/// Will NOT prerender on the following cases:
69+
/// * HTTP is not GET or HEAD
70+
/// * User agent is NOT crawler
71+
/// * Is requesting a resource on `IGNORED_EXTENSIONS`
72+
pub fn should_prerender(req: &ServiceRequest) -> bool {
73+
let request_headers = req.headers();
74+
let mut is_crawler = false;
75+
76+
if ![Method::GET, Method::HEAD].contains(req.method()) {
77+
return false;
78+
}
79+
80+
let req_ua_lowercase = if let Some(user_agent) = request_headers.get(header::USER_AGENT) {
81+
let user_agent = user_agent.to_str();
82+
if let Ok(ua) = user_agent {
83+
ua.to_lowercase()
84+
} else {
85+
return false;
86+
}
87+
} else {
88+
return false;
89+
};
90+
91+
if USER_AGENTS
92+
.iter()
93+
.any(|crawler_ua| req_ua_lowercase.contains(&*crawler_ua.to_lowercase()))
94+
{
95+
is_crawler = true;
96+
}
97+
98+
// check for ignored extensions
99+
let is_ignored_extension_url = req.uri().path_and_query().map_or_else(
100+
|| false,
101+
|path_query| {
102+
IGNORED_EXTENSIONS
103+
.iter()
104+
.any(|ext| path_query.as_str().contains(ext))
105+
},
106+
);
107+
if is_ignored_extension_url {
108+
return false;
109+
}
110+
111+
is_crawler
112+
}
113+
114+
pub fn get_prerendered_response(req: ServiceRequest) {
115+
let mut prerender_request_headers = HeaderMap::new();
116+
let forward_headers = true;
117+
118+
if forward_headers {
119+
prerender_request_headers = req.headers().clone();
120+
prerender_request_headers.remove(header::HOST);
121+
}
122+
123+
prerender_request_headers.append(header::ACCEPT_ENCODING, "gzip".parse().unwrap());
124+
125+
// TODO: accept `X-Prerender-Token`
126+
// prerender_request_headers.insert("X-Prerender-Token", pre_render_token);
127+
}
128+
129+
pub fn prerender_url() -> &'static str {
130+
"https://service.prerender.io/"
131+
}
132+
133+
#[cfg(test)]
134+
mod tests {
135+
use crate::middleware::should_prerender;
136+
use actix_web::http::{header, Method};
137+
use actix_web::test::TestRequest;
138+
139+
fn init_logger() {
140+
let _ = env_logger::builder().is_test(true).try_init();
141+
}
142+
143+
#[test]
144+
fn test_human_valid_resource() {
145+
let req = TestRequest::get()
146+
.insert_header((
147+
header::USER_AGENT,
148+
"Mozilla/5.0 (X11; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0",
149+
))
150+
.uri("http://yourserver.com/clothes/tshirts?query=xl")
151+
.to_srv_request();
152+
153+
assert!(!should_prerender(&req));
154+
}
155+
156+
#[test]
157+
fn test_crawler_valid_resource() {
158+
let req = TestRequest::get()
159+
.insert_header((
160+
header::USER_AGENT,
161+
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
162+
))
163+
.uri("http://yourserver.com/clothes/tshirts?query=xl")
164+
.to_srv_request();
165+
166+
assert!(should_prerender(&req));
167+
}
168+
169+
#[test]
170+
fn test_crawler_ignored_resource() {
171+
let req = TestRequest::get()
172+
.insert_header((
173+
header::USER_AGENT,
174+
"LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
175+
))
176+
.uri("http://yourserver.com/clothes/tshirts/blue.jpg")
177+
.to_srv_request();
178+
179+
let render = should_prerender(&req);
180+
assert!(!render);
181+
}
182+
183+
#[test]
184+
fn test_crawler_wrong_http_method() {
185+
let req = TestRequest::post()
186+
.insert_header((
187+
header::USER_AGENT,
188+
"LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
189+
))
190+
.uri("http://yourserver.com/clothes/tshirts/red-dotted")
191+
.to_srv_request();
192+
193+
let render = should_prerender(&req);
194+
assert!(!render);
195+
}
196+
}

0 commit comments

Comments
 (0)