Skip to main content

nostr_bbs_preview_worker/
lib.rs

1//! nostr-bbs link-preview-api Worker (Rust port)
2//!
3//! Proxies requests to fetch OpenGraph metadata, bypassing CORS.
4//! Replaces the TypeScript Cloudflare Workers implementation.
5//!
6//! ## Module structure
7//!
8//! - `ssrf` -- SSRF protection (private/internal URL blocking)
9//! - `parse` -- OpenGraph metadata extraction, HTML entity decoding
10//! - `oembed` -- Twitter/X oEmbed detection and fetching
11//! ## Endpoints
12//!
13//!   GET /preview?url=...  -- fetch OG metadata or Twitter oEmbed
14//!   GET /health           -- health check
15//!   GET /stats            -- cache statistics (CF Cache API)
16//!   OPTIONS               -- CORS preflight
17
18// Worker entry points are invoked via wasm-bindgen and appear unused in native builds.
19#![allow(dead_code)]
20
21mod oembed;
22mod parse;
23mod ssrf;
24
25use serde::Serialize;
26use worker::*;
27
28use oembed::TwitterCachePayload;
29use parse::OgCachePayload;
30
31// ── Constants ────────────────────────────────────────────────────────────────
32
33const CACHE_TTL_OG: u32 = 10 * 24 * 60 * 60; // 10 days (seconds)
34const CACHE_TTL_TWITTER: u32 = 24 * 60 * 60; // 1 day  (seconds)
35
36// ── Response types ───────────────────────────────────────────────────────────
37
38#[derive(Serialize)]
39struct OgPreviewResponse {
40    r#type: &'static str,
41    url: String,
42    domain: String,
43    favicon: String,
44    #[serde(skip_serializing_if = "Option::is_none")]
45    title: Option<String>,
46    #[serde(skip_serializing_if = "Option::is_none")]
47    description: Option<String>,
48    #[serde(skip_serializing_if = "Option::is_none")]
49    image: Option<String>,
50    #[serde(rename = "siteName", skip_serializing_if = "Option::is_none")]
51    site_name: Option<String>,
52    cached: bool,
53}
54
55#[derive(Serialize)]
56struct TwitterEmbedResponse {
57    r#type: &'static str,
58    url: String,
59    html: String,
60    author_name: String,
61    author_url: String,
62    provider_name: String,
63    cached: bool,
64}
65
66#[derive(Serialize)]
67struct ErrorResponse {
68    error: String,
69}
70
71#[derive(Serialize)]
72struct HealthResponse {
73    status: &'static str,
74    service: &'static str,
75    runtime: &'static str,
76}
77
78#[derive(Serialize)]
79struct StatsResponse {
80    cache: &'static str,
81    note: &'static str,
82}
83
84/// Unified cache payload for serialization/deserialization.
85#[derive(Serialize, serde::Deserialize)]
86#[serde(untagged)]
87enum CachePayload {
88    Twitter(TwitterCachePayload),
89    Og(OgCachePayload),
90}
91
92// ── CORS ─────────────────────────────────────────────────────────────────────
93
94fn allowed_origin(env: &Env) -> String {
95    env.var("ALLOWED_ORIGIN")
96        .map(|v| v.to_string())
97        .unwrap_or_else(|_| "https://example.com".to_string())
98}
99
100fn cors_headers(env: &Env) -> Headers {
101    let headers = Headers::new();
102    let _ = headers.set("Access-Control-Allow-Origin", &allowed_origin(env));
103    let _ = headers.set("Access-Control-Allow-Methods", "GET, OPTIONS");
104    let _ = headers.set("Access-Control-Allow-Headers", "Content-Type");
105    let _ = headers.set("Access-Control-Max-Age", "86400");
106    headers
107}
108
109fn json_response(body: &impl Serialize, status: u16, env: &Env) -> Result<Response> {
110    json_response_extra(body, status, env, None)
111}
112
113fn json_response_extra(
114    body: &impl Serialize,
115    status: u16,
116    env: &Env,
117    extra_headers: Option<(&str, &str)>,
118) -> Result<Response> {
119    let json = serde_json::to_string(body).map_err(|e| Error::RustError(e.to_string()))?;
120    let headers = cors_headers(env);
121    let _ = headers.set("Content-Type", "application/json");
122    if let Some((key, value)) = extra_headers {
123        let _ = headers.set(key, value);
124    }
125    Ok(Response::from_body(ResponseBody::Body(json.into_bytes()))?
126        .with_headers(headers)
127        .with_status(status))
128}
129
130// ── Percent encoding (inline to avoid extra crate) ───────────────────────────
131
132pub(crate) fn percent_encode(input: &str) -> String {
133    let mut encoded = String::with_capacity(input.len() * 3);
134    for byte in input.bytes() {
135        match byte {
136            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
137                encoded.push(byte as char);
138            }
139            _ => {
140                encoded.push_str(&format!("%{:02X}", byte));
141            }
142        }
143    }
144    encoded
145}
146
147// ── Cache helpers (CF Cache API) ─────────────────────────────────────────────
148
149fn cache_key(target_url: &str) -> String {
150    format!(
151        "https://link-preview-cache.internal/v1?url={}",
152        percent_encode(target_url)
153    )
154}
155
156async fn get_from_cache(target_url: &str) -> Option<Response> {
157    let cache = Cache::default();
158    let key = cache_key(target_url);
159    cache.get(&key, false).await.ok().flatten()
160}
161
162async fn put_to_cache(target_url: &str, payload: &CachePayload, ttl: u32, env: &Env) {
163    let cache = Cache::default();
164    let key = cache_key(target_url);
165
166    let body = match serde_json::to_string(payload) {
167        Ok(b) => b,
168        Err(_) => return,
169    };
170
171    let headers = cors_headers(env);
172    let _ = headers.set("Content-Type", "application/json");
173    let _ = headers.set("Cache-Control", &format!("public, max-age={}", ttl));
174
175    if let Ok(response) =
176        Response::from_body(ResponseBody::Body(body.into_bytes())).map(|r| r.with_headers(headers))
177    {
178        let _ = cache.put(&key, response).await;
179    }
180}
181
182// ── Handlers ─────────────────────────────────────────────────────────────────
183
184async fn handle_preview(req: &Request, env: &Env) -> Result<Response> {
185    let url = req.url()?;
186    let target_url = url
187        .query_pairs()
188        .find(|(k, _)| k == "url")
189        .map(|(_, v)| v.to_string());
190
191    let target_url = match target_url {
192        Some(u) => u,
193        None => {
194            return json_response(
195                &ErrorResponse {
196                    error: "Missing url parameter".to_string(),
197                },
198                400,
199                env,
200            )
201        }
202    };
203
204    // Validate URL
205    if Url::parse(&target_url).is_err() {
206        return json_response(
207            &ErrorResponse {
208                error: "Invalid URL".to_string(),
209            },
210            400,
211            env,
212        );
213    }
214
215    // SSRF check
216    if ssrf::is_private_url(&target_url) {
217        return json_response(
218            &ErrorResponse {
219                error: "URL not allowed (private or internal address)".to_string(),
220            },
221            400,
222            env,
223        );
224    }
225
226    let is_twitter = oembed::is_twitter_url(&target_url);
227
228    // Check CF Cache API
229    if let Some(mut cached) = get_from_cache(&target_url).await {
230        if let Ok(text) = cached.text().await {
231            if let Ok(mut data) = serde_json::from_str::<serde_json::Value>(&text) {
232                data["cached"] = serde_json::Value::Bool(true);
233                return json_response_extra(&data, 200, env, Some(("X-Cache", "HIT")));
234            }
235        }
236    }
237
238    if is_twitter {
239        match oembed::fetch_twitter_embed(&target_url).await {
240            Ok(data) => {
241                let cache_payload = CachePayload::Twitter(data.clone());
242                put_to_cache(&target_url, &cache_payload, CACHE_TTL_TWITTER, env).await;
243
244                let response = TwitterEmbedResponse {
245                    r#type: "twitter",
246                    url: data.url,
247                    html: data.html,
248                    author_name: data.author_name,
249                    author_url: data.author_url,
250                    provider_name: data.provider_name,
251                    cached: false,
252                };
253                json_response_extra(&response, 200, env, Some(("X-Cache", "MISS")))
254            }
255            Err(e) => json_response(
256                &ErrorResponse {
257                    error: e.to_string(),
258                },
259                500,
260                env,
261            ),
262        }
263    } else {
264        match parse::fetch_open_graph_data(&target_url).await {
265            Ok(data) => {
266                let cache_payload = CachePayload::Og(data.clone());
267                put_to_cache(&target_url, &cache_payload, CACHE_TTL_OG, env).await;
268
269                let response = OgPreviewResponse {
270                    r#type: "opengraph",
271                    url: data.url,
272                    domain: data.domain,
273                    favicon: data.favicon,
274                    title: data.title,
275                    description: data.description,
276                    image: data.image,
277                    site_name: data.site_name,
278                    cached: false,
279                };
280                json_response_extra(&response, 200, env, Some(("X-Cache", "MISS")))
281            }
282            Err(e) => json_response(
283                &ErrorResponse {
284                    error: e.to_string(),
285                },
286                500,
287                env,
288            ),
289        }
290    }
291}
292
293fn handle_health(env: &Env) -> Result<Response> {
294    json_response(
295        &HealthResponse {
296            status: "ok",
297            service: "link-preview-api",
298            runtime: "workers-rs",
299        },
300        200,
301        env,
302    )
303}
304
305fn handle_stats(env: &Env) -> Result<Response> {
306    json_response(
307        &StatsResponse {
308            cache: "cf-cache-api",
309            note: "Per-key hit stats are available in Cloudflare Analytics dashboard",
310        },
311        200,
312        env,
313    )
314}
315
316// ── Router ───────────────────────────────────────────────────────────────────
317
318#[event(fetch)]
319async fn fetch(req: Request, env: Env, _ctx: Context) -> Result<Response> {
320    // CORS preflight
321    if req.method() == Method::Options {
322        let headers = cors_headers(&env);
323        return Ok(Response::empty()?.with_headers(headers).with_status(204));
324    }
325
326    // Rate limit: 30 requests per 60 seconds per IP
327    let ip = nostr_bbs_rate_limit::client_ip(&req);
328    if !nostr_bbs_rate_limit::check_rate_limit(&env, "RATE_LIMIT", &ip, 30, 60).await {
329        return json_response(
330            &ErrorResponse {
331                error: "Too many requests".to_string(),
332            },
333            429,
334            &env,
335        );
336    }
337
338    let url = req.url()?;
339    let path = url.path();
340
341    let result = match (req.method(), path) {
342        (Method::Get, "/preview") => handle_preview(&req, &env).await,
343        (Method::Get, "/health") => handle_health(&env),
344        (Method::Get, "/stats") => handle_stats(&env),
345        _ => json_response(
346            &ErrorResponse {
347                error: "Not found".to_string(),
348            },
349            404,
350            &env,
351        ),
352    };
353
354    match result {
355        Ok(resp) => Ok(resp),
356        Err(e) => {
357            console_error!("Worker error: {}", e);
358            json_response(
359                &ErrorResponse {
360                    error: e.to_string(),
361                },
362                500,
363                &env,
364            )
365        }
366    }
367}
368
369// Cron keep-warm: prevents cold starts by running periodically
370#[event(scheduled)]
371async fn scheduled(_event: ScheduledEvent, _env: Env, _ctx: ScheduleContext) {
372    // No persistent storage to touch -- the cron itself keeps the isolate warm
373}
374
375// ── Tests ────────────────────────────────────────────────────────────────────
376
377#[cfg(test)]
378mod tests {
379    use super::*;
380
381    // Cache key tests
382    #[test]
383    fn cache_key_is_deterministic() {
384        let key1 = cache_key("https://example.com/page");
385        let key2 = cache_key("https://example.com/page");
386        assert_eq!(key1, key2);
387        assert!(key1.starts_with("https://link-preview-cache.internal/v1?url="));
388    }
389
390    #[test]
391    fn cache_keys_differ_for_different_urls() {
392        let key1 = cache_key("https://example.com/a");
393        let key2 = cache_key("https://example.com/b");
394        assert_ne!(key1, key2);
395    }
396
397    // Percent encoding tests
398    #[test]
399    fn encodes_special_chars() {
400        assert_eq!(percent_encode("hello world"), "hello%20world");
401        assert_eq!(percent_encode("a=b&c=d"), "a%3Db%26c%3Dd");
402    }
403
404    #[test]
405    fn preserves_unreserved_chars() {
406        assert_eq!(percent_encode("abc-_.~"), "abc-_.~");
407        assert_eq!(percent_encode("ABC123"), "ABC123");
408    }
409}