1#![allow(dead_code)]
20
21mod oembed;
22mod parse;
23mod ssrf;
24
25use serde::Serialize;
26use worker::*;
27
28use oembed::TwitterCachePayload;
29use parse::OgCachePayload;
30
31const CACHE_TTL_OG: u32 = 10 * 24 * 60 * 60; const CACHE_TTL_TWITTER: u32 = 24 * 60 * 60; #[derive(Serialize)]
39struct OgPreviewResponse {
40 r#type: &'static str,
41 url: String,
42 domain: String,
43 favicon: String,
44 #[serde(skip_serializing_if = "Option::is_none")]
45 title: Option<String>,
46 #[serde(skip_serializing_if = "Option::is_none")]
47 description: Option<String>,
48 #[serde(skip_serializing_if = "Option::is_none")]
49 image: Option<String>,
50 #[serde(rename = "siteName", skip_serializing_if = "Option::is_none")]
51 site_name: Option<String>,
52 cached: bool,
53}
54
55#[derive(Serialize)]
56struct TwitterEmbedResponse {
57 r#type: &'static str,
58 url: String,
59 html: String,
60 author_name: String,
61 author_url: String,
62 provider_name: String,
63 cached: bool,
64}
65
66#[derive(Serialize)]
67struct ErrorResponse {
68 error: String,
69}
70
71#[derive(Serialize)]
72struct HealthResponse {
73 status: &'static str,
74 service: &'static str,
75 runtime: &'static str,
76}
77
78#[derive(Serialize)]
79struct StatsResponse {
80 cache: &'static str,
81 note: &'static str,
82}
83
84#[derive(Serialize, serde::Deserialize)]
86#[serde(untagged)]
87enum CachePayload {
88 Twitter(TwitterCachePayload),
89 Og(OgCachePayload),
90}
91
92fn allowed_origin(env: &Env) -> String {
95 env.var("ALLOWED_ORIGIN")
96 .map(|v| v.to_string())
97 .unwrap_or_else(|_| "https://example.com".to_string())
98}
99
100fn cors_headers(env: &Env) -> Headers {
101 let headers = Headers::new();
102 let _ = headers.set("Access-Control-Allow-Origin", &allowed_origin(env));
103 let _ = headers.set("Access-Control-Allow-Methods", "GET, OPTIONS");
104 let _ = headers.set("Access-Control-Allow-Headers", "Content-Type");
105 let _ = headers.set("Access-Control-Max-Age", "86400");
106 headers
107}
108
109fn json_response(body: &impl Serialize, status: u16, env: &Env) -> Result<Response> {
110 json_response_extra(body, status, env, None)
111}
112
113fn json_response_extra(
114 body: &impl Serialize,
115 status: u16,
116 env: &Env,
117 extra_headers: Option<(&str, &str)>,
118) -> Result<Response> {
119 let json = serde_json::to_string(body).map_err(|e| Error::RustError(e.to_string()))?;
120 let headers = cors_headers(env);
121 let _ = headers.set("Content-Type", "application/json");
122 if let Some((key, value)) = extra_headers {
123 let _ = headers.set(key, value);
124 }
125 Ok(Response::from_body(ResponseBody::Body(json.into_bytes()))?
126 .with_headers(headers)
127 .with_status(status))
128}
129
130pub(crate) fn percent_encode(input: &str) -> String {
133 let mut encoded = String::with_capacity(input.len() * 3);
134 for byte in input.bytes() {
135 match byte {
136 b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
137 encoded.push(byte as char);
138 }
139 _ => {
140 encoded.push_str(&format!("%{:02X}", byte));
141 }
142 }
143 }
144 encoded
145}
146
147fn cache_key(target_url: &str) -> String {
150 format!(
151 "https://link-preview-cache.internal/v1?url={}",
152 percent_encode(target_url)
153 )
154}
155
156async fn get_from_cache(target_url: &str) -> Option<Response> {
157 let cache = Cache::default();
158 let key = cache_key(target_url);
159 cache.get(&key, false).await.ok().flatten()
160}
161
162async fn put_to_cache(target_url: &str, payload: &CachePayload, ttl: u32, env: &Env) {
163 let cache = Cache::default();
164 let key = cache_key(target_url);
165
166 let body = match serde_json::to_string(payload) {
167 Ok(b) => b,
168 Err(_) => return,
169 };
170
171 let headers = cors_headers(env);
172 let _ = headers.set("Content-Type", "application/json");
173 let _ = headers.set("Cache-Control", &format!("public, max-age={}", ttl));
174
175 if let Ok(response) =
176 Response::from_body(ResponseBody::Body(body.into_bytes())).map(|r| r.with_headers(headers))
177 {
178 let _ = cache.put(&key, response).await;
179 }
180}
181
182async fn handle_preview(req: &Request, env: &Env) -> Result<Response> {
185 let url = req.url()?;
186 let target_url = url
187 .query_pairs()
188 .find(|(k, _)| k == "url")
189 .map(|(_, v)| v.to_string());
190
191 let target_url = match target_url {
192 Some(u) => u,
193 None => {
194 return json_response(
195 &ErrorResponse {
196 error: "Missing url parameter".to_string(),
197 },
198 400,
199 env,
200 )
201 }
202 };
203
204 if Url::parse(&target_url).is_err() {
206 return json_response(
207 &ErrorResponse {
208 error: "Invalid URL".to_string(),
209 },
210 400,
211 env,
212 );
213 }
214
215 if ssrf::is_private_url(&target_url) {
217 return json_response(
218 &ErrorResponse {
219 error: "URL not allowed (private or internal address)".to_string(),
220 },
221 400,
222 env,
223 );
224 }
225
226 let is_twitter = oembed::is_twitter_url(&target_url);
227
228 if let Some(mut cached) = get_from_cache(&target_url).await {
230 if let Ok(text) = cached.text().await {
231 if let Ok(mut data) = serde_json::from_str::<serde_json::Value>(&text) {
232 data["cached"] = serde_json::Value::Bool(true);
233 return json_response_extra(&data, 200, env, Some(("X-Cache", "HIT")));
234 }
235 }
236 }
237
238 if is_twitter {
239 match oembed::fetch_twitter_embed(&target_url).await {
240 Ok(data) => {
241 let cache_payload = CachePayload::Twitter(data.clone());
242 put_to_cache(&target_url, &cache_payload, CACHE_TTL_TWITTER, env).await;
243
244 let response = TwitterEmbedResponse {
245 r#type: "twitter",
246 url: data.url,
247 html: data.html,
248 author_name: data.author_name,
249 author_url: data.author_url,
250 provider_name: data.provider_name,
251 cached: false,
252 };
253 json_response_extra(&response, 200, env, Some(("X-Cache", "MISS")))
254 }
255 Err(e) => json_response(
256 &ErrorResponse {
257 error: e.to_string(),
258 },
259 500,
260 env,
261 ),
262 }
263 } else {
264 match parse::fetch_open_graph_data(&target_url).await {
265 Ok(data) => {
266 let cache_payload = CachePayload::Og(data.clone());
267 put_to_cache(&target_url, &cache_payload, CACHE_TTL_OG, env).await;
268
269 let response = OgPreviewResponse {
270 r#type: "opengraph",
271 url: data.url,
272 domain: data.domain,
273 favicon: data.favicon,
274 title: data.title,
275 description: data.description,
276 image: data.image,
277 site_name: data.site_name,
278 cached: false,
279 };
280 json_response_extra(&response, 200, env, Some(("X-Cache", "MISS")))
281 }
282 Err(e) => json_response(
283 &ErrorResponse {
284 error: e.to_string(),
285 },
286 500,
287 env,
288 ),
289 }
290 }
291}
292
293fn handle_health(env: &Env) -> Result<Response> {
294 json_response(
295 &HealthResponse {
296 status: "ok",
297 service: "link-preview-api",
298 runtime: "workers-rs",
299 },
300 200,
301 env,
302 )
303}
304
305fn handle_stats(env: &Env) -> Result<Response> {
306 json_response(
307 &StatsResponse {
308 cache: "cf-cache-api",
309 note: "Per-key hit stats are available in Cloudflare Analytics dashboard",
310 },
311 200,
312 env,
313 )
314}
315
316#[event(fetch)]
319async fn fetch(req: Request, env: Env, _ctx: Context) -> Result<Response> {
320 if req.method() == Method::Options {
322 let headers = cors_headers(&env);
323 return Ok(Response::empty()?.with_headers(headers).with_status(204));
324 }
325
326 let ip = nostr_bbs_rate_limit::client_ip(&req);
328 if !nostr_bbs_rate_limit::check_rate_limit(&env, "RATE_LIMIT", &ip, 30, 60).await {
329 return json_response(
330 &ErrorResponse {
331 error: "Too many requests".to_string(),
332 },
333 429,
334 &env,
335 );
336 }
337
338 let url = req.url()?;
339 let path = url.path();
340
341 let result = match (req.method(), path) {
342 (Method::Get, "/preview") => handle_preview(&req, &env).await,
343 (Method::Get, "/health") => handle_health(&env),
344 (Method::Get, "/stats") => handle_stats(&env),
345 _ => json_response(
346 &ErrorResponse {
347 error: "Not found".to_string(),
348 },
349 404,
350 &env,
351 ),
352 };
353
354 match result {
355 Ok(resp) => Ok(resp),
356 Err(e) => {
357 console_error!("Worker error: {}", e);
358 json_response(
359 &ErrorResponse {
360 error: e.to_string(),
361 },
362 500,
363 &env,
364 )
365 }
366 }
367}
368
369#[event(scheduled)]
371async fn scheduled(_event: ScheduledEvent, _env: Env, _ctx: ScheduleContext) {
372 }
374
375#[cfg(test)]
378mod tests {
379 use super::*;
380
381 #[test]
383 fn cache_key_is_deterministic() {
384 let key1 = cache_key("https://example.com/page");
385 let key2 = cache_key("https://example.com/page");
386 assert_eq!(key1, key2);
387 assert!(key1.starts_with("https://link-preview-cache.internal/v1?url="));
388 }
389
390 #[test]
391 fn cache_keys_differ_for_different_urls() {
392 let key1 = cache_key("https://example.com/a");
393 let key2 = cache_key("https://example.com/b");
394 assert_ne!(key1, key2);
395 }
396
397 #[test]
399 fn encodes_special_chars() {
400 assert_eq!(percent_encode("hello world"), "hello%20world");
401 assert_eq!(percent_encode("a=b&c=d"), "a%3Db%26c%3Dd");
402 }
403
404 #[test]
405 fn preserves_unreserved_chars() {
406 assert_eq!(percent_encode("abc-_.~"), "abc-_.~");
407 assert_eq!(percent_encode("ABC123"), "ABC123");
408 }
409}