phantom-frame 0.2.11

A high-performance prerendering proxy engine with caching support
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
#[cfg(all(feature = "native-tls", feature = "rustls"))]
compile_error!("Features `native-tls` and `rustls` are mutually exclusive — enable only one.");

pub mod cache;
pub mod compression;
pub mod config;
pub mod control;
pub mod path_matcher;
pub mod proxy;

use axum::{extract::Extension, Router};
use cache::{CacheHandle, CacheStore};
use proxy::ProxyState;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::mpsc;

/// Controls which backend responses are eligible for caching.
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum CacheStrategy {
    /// Cache every response that passes the existing path and method filters.
    #[default]
    All,
    /// Disable caching entirely, including 404 cache entries.
    None,
    /// Cache HTML documents only.
    OnlyHtml,
    /// Cache everything except image responses.
    NoImages,
    /// Cache image responses only.
    OnlyImages,
    /// Cache non-HTML static/application assets.
    OnlyAssets,
}

impl CacheStrategy {
    /// Check whether a response with the given content type can be cached.
    pub fn allows_content_type(&self, content_type: Option<&str>) -> bool {
        let content_type = content_type
            .and_then(|value| value.split(';').next())
            .map(|value| value.trim().to_ascii_lowercase());

        match self {
            Self::All => true,
            Self::None => false,
            Self::OnlyHtml => content_type
                .as_deref()
                .is_some_and(|value| value == "text/html" || value == "application/xhtml+xml"),
            Self::NoImages => !content_type
                .as_deref()
                .is_some_and(|value| value.starts_with("image/")),
            Self::OnlyImages => content_type
                .as_deref()
                .is_some_and(|value| value.starts_with("image/")),
            Self::OnlyAssets => content_type.as_deref().is_some_and(|value| {
                value.starts_with("image/")
                    || value.starts_with("font/")
                    || value == "text/css"
                    || value == "text/javascript"
                    || value == "application/javascript"
                    || value == "application/x-javascript"
                    || value == "application/json"
                    || value == "application/manifest+json"
                    || value == "application/wasm"
                    || value == "application/xml"
                    || value == "text/xml"
            }),
        }
    }
}

impl std::fmt::Display for CacheStrategy {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let value = match self {
            Self::All => "all",
            Self::None => "none",
            Self::OnlyHtml => "only_html",
            Self::NoImages => "no_images",
            Self::OnlyImages => "only_images",
            Self::OnlyAssets => "only_assets",
        };

        f.write_str(value)
    }
}

/// Controls how cacheable responses are stored in memory.
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum CompressStrategy {
    /// Store cache entries without additional compression.
    None,
    /// Store cache entries with Brotli compression.
    #[default]
    Brotli,
    /// Store cache entries with gzip compression.
    Gzip,
    /// Store cache entries with deflate compression.
    Deflate,
}

impl std::fmt::Display for CompressStrategy {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let value = match self {
            Self::None => "none",
            Self::Brotli => "brotli",
            Self::Gzip => "gzip",
            Self::Deflate => "deflate",
        };

        f.write_str(value)
    }
}

/// Controls where cached response bodies are stored.
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum CacheStorageMode {
    /// Keep cached bodies in process memory.
    #[default]
    Memory,
    /// Persist cached bodies to the filesystem and load them on cache hits.
    Filesystem,
}

impl std::fmt::Display for CacheStorageMode {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let value = match self {
            Self::Memory => "memory",
            Self::Filesystem => "filesystem",
        };

        f.write_str(value)
    }
}

/// The type of a webhook — controls whether the webhook gates access or just receives a notification.
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum WebhookType {
    /// The webhook call must complete with a 2xx response before the request is forwarded.
    /// A non-2xx response (or a timeout / network error) causes the request to be denied.
    /// A 3xx response is forwarded to the client as a redirect with the `Location` header intact.
    Blocking,
    /// The webhook call is dispatched in the background without blocking the request.
    #[default]
    Notify,
    /// The webhook response body (plain text) is used as the cache key for this request.
    /// On failure, a non-2xx response, or an empty body, the default cache key is used instead.
    /// Works in both Dynamic and PreGenerate modes.
    CacheKey,
}

/// Configuration for a single webhook attached to a server.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct WebhookConfig {
    /// The URL to POST the request metadata to.
    pub url: String,

    /// Whether this webhook is blocking (gates access) or notify (fire-and-forget).
    #[serde(rename = "type", default)]
    pub webhook_type: WebhookType,

    /// Timeout in milliseconds for the webhook call (default: 5000 ms).
    /// On timeout, a blocking webhook denies the request with 503.
    #[serde(default)]
    pub timeout_ms: Option<u64>,
}

/// Controls the operating mode of the proxy.
#[derive(Clone, Debug, Default)]
pub enum ProxyMode {
    /// Dynamic mode: every request is served from cache when available, or
    /// forwarded to the upstream backend on a cache miss (default).
    #[default]
    Dynamic,
    /// Pre-generate (SSG) mode: a fixed list of `paths` is fetched from the
    /// upstream server at startup and served exclusively from the cache.
    ///
    /// On a cache miss:
    /// - `fallthrough = false` (default): return 404 immediately.
    /// - `fallthrough = true`: fall through to the upstream backend.
    ///
    /// Use the [`CacheHandle`] returned by [`create_proxy`] to manage snapshots
    /// at runtime via `add_snapshot`, `refresh_snapshot`, `remove_snapshot`, and
    /// `refresh_all_snapshots`.
    PreGenerate {
        /// The paths to pre-generate at startup (e.g. `"/book/1"`).
        paths: Vec<String>,
        /// When `true`, cache misses fall through to the upstream backend.
        /// Defaults to `false`.
        fallthrough: bool,
    },
}

/// Information about an incoming request for cache key generation
#[derive(Clone, Debug)]
pub struct RequestInfo<'a> {
    /// HTTP method (e.g., "GET", "POST", "PUT", "DELETE")
    pub method: &'a str,
    /// Request path (e.g., "/api/users")
    pub path: &'a str,
    /// Query string (e.g., "id=123&sort=asc")
    pub query: &'a str,
    /// Request headers (for custom cache key logic based on headers)
    pub headers: &'a axum::http::HeaderMap,
}

/// Configuration for creating a proxy
#[derive(Clone)]
pub struct CreateProxyConfig {
    /// The backend URL to proxy requests to
    pub proxy_url: String,

    /// Paths to include in caching (empty means include all)
    /// Supports wildcards and method prefixes: "/api/*", "POST /api/*", "GET /*/users", etc.
    pub include_paths: Vec<String>,

    /// Paths to exclude from caching (empty means exclude none)
    /// Supports wildcards and method prefixes: "/admin/*", "POST *", "PUT /api/*", etc.
    /// Exclude overrides include
    pub exclude_paths: Vec<String>,

    /// Enable WebSocket and protocol upgrade support (default: true)
    /// When enabled, requests with Connection: Upgrade headers will bypass
    /// the cache and establish a direct bidirectional TCP tunnel
    pub enable_websocket: bool,

    /// Only allow GET requests, reject all others (default: false)
    /// When true, only GET requests are processed; POST, PUT, DELETE, etc. return 405 Method Not Allowed
    /// Useful for static site prerendering where mutations shouldn't be allowed
    pub forward_get_only: bool,

    /// Custom cache key generator
    /// Takes request info and returns a cache key
    /// Default: method + path + query string
    pub cache_key_fn: Arc<dyn Fn(&RequestInfo) -> String + Send + Sync>,
    /// Capacity for special 404 cache. When 0, 404 caching is disabled.
    pub cache_404_capacity: usize,

    /// When true, treat a response containing the meta tag `<meta name="phantom-404" content="true">` as a 404
    /// This is an optional performance-affecting fallback to detect framework-generated 404 pages.
    pub use_404_meta: bool,

    /// Controls which responses should be cached after the backend responds.
    pub cache_strategy: CacheStrategy,

    /// Controls how cached bodies are stored in memory.
    pub compress_strategy: CompressStrategy,

    /// Controls where cached response bodies are stored.
    pub cache_storage_mode: CacheStorageMode,

    /// Optional override for filesystem-backed cache bodies.
    pub cache_directory: Option<PathBuf>,

    /// Controls the operating mode of the proxy (Dynamic vs PreGenerate/SSG).
    pub proxy_mode: ProxyMode,

    /// Webhooks called for every request before cache reads.
    /// Blocking webhooks gate access; notify webhooks are fire-and-forget.
    pub webhooks: Vec<WebhookConfig>,
}

impl CreateProxyConfig {
    /// Create a new config with default settings
    pub fn new(proxy_url: String) -> Self {
        Self {
            proxy_url,
            include_paths: vec![],
            exclude_paths: vec![],
            enable_websocket: true,
            forward_get_only: false,
            cache_key_fn: Arc::new(|req_info| {
                if req_info.query.is_empty() {
                    format!("{}:{}", req_info.method, req_info.path)
                } else {
                    format!("{}:{}?{}", req_info.method, req_info.path, req_info.query)
                }
            }),
            cache_404_capacity: 100,
            use_404_meta: false,
            cache_strategy: CacheStrategy::All,
            compress_strategy: CompressStrategy::Brotli,
            cache_storage_mode: CacheStorageMode::Memory,
            cache_directory: None,
            proxy_mode: ProxyMode::Dynamic,
            webhooks: vec![],
        }
    }

    /// Set include paths
    pub fn with_include_paths(mut self, paths: Vec<String>) -> Self {
        self.include_paths = paths;
        self
    }

    /// Set exclude paths
    pub fn with_exclude_paths(mut self, paths: Vec<String>) -> Self {
        self.exclude_paths = paths;
        self
    }

    /// Enable or disable WebSocket and protocol upgrade support
    pub fn with_websocket_enabled(mut self, enabled: bool) -> Self {
        self.enable_websocket = enabled;
        self
    }

    /// Only allow GET requests, reject all others
    pub fn with_forward_get_only(mut self, enabled: bool) -> Self {
        self.forward_get_only = enabled;
        self
    }

    /// Set custom cache key function
    pub fn with_cache_key_fn<F>(mut self, f: F) -> Self
    where
        F: Fn(&RequestInfo) -> String + Send + Sync + 'static,
    {
        self.cache_key_fn = Arc::new(f);
        self
    }

    /// Set 404 cache capacity. When 0, 404 caching is disabled.
    pub fn with_cache_404_capacity(mut self, capacity: usize) -> Self {
        self.cache_404_capacity = capacity;
        self
    }

    /// Treat pages that include the special meta tag as 404 pages
    pub fn with_use_404_meta(mut self, enabled: bool) -> Self {
        self.use_404_meta = enabled;
        self
    }

    /// Set the cache strategy used to decide which response types are stored.
    pub fn with_cache_strategy(mut self, strategy: CacheStrategy) -> Self {
        self.cache_strategy = strategy;
        self
    }

    /// Alias for callers that prefer a more fluent builder name.
    pub fn caching_strategy(self, strategy: CacheStrategy) -> Self {
        self.with_cache_strategy(strategy)
    }

    /// Set the compression strategy used for stored cache entries.
    pub fn with_compress_strategy(mut self, strategy: CompressStrategy) -> Self {
        self.compress_strategy = strategy;
        self
    }

    /// Alias for callers that prefer a more fluent builder name.
    pub fn compression_strategy(self, strategy: CompressStrategy) -> Self {
        self.with_compress_strategy(strategy)
    }

    /// Set the backing store for cached response bodies.
    pub fn with_cache_storage_mode(mut self, mode: CacheStorageMode) -> Self {
        self.cache_storage_mode = mode;
        self
    }

    /// Set the filesystem directory used for disk-backed cache bodies.
    pub fn with_cache_directory(mut self, directory: impl Into<PathBuf>) -> Self {
        self.cache_directory = Some(directory.into());
        self
    }

    /// Set the proxy operating mode.
    /// Use `ProxyMode::PreGenerate { paths, fallthrough }` to enable SSG mode.
    pub fn with_proxy_mode(mut self, mode: ProxyMode) -> Self {
        self.proxy_mode = mode;
        self
    }

    /// Set the webhooks for this server.
    /// Blocking webhooks gate access; notify webhooks are fire-and-forget.
    pub fn with_webhooks(mut self, webhooks: Vec<WebhookConfig>) -> Self {
        self.webhooks = webhooks;
        self
    }
}

/// The main library interface for using phantom-frame as a library
/// Returns a proxy handler function and a cache handle
pub fn create_proxy(config: CreateProxyConfig) -> (Router, CacheHandle) {
    let upstream_client =
        proxy::build_upstream_client().expect("failed to build shared upstream HTTP client");
    let webhook_client =
        proxy::build_webhook_client().expect("failed to build shared webhook HTTP client");

    // In PreGenerate mode, create a channel for the snapshot worker
    let (handle, snapshot_rx) = if let ProxyMode::PreGenerate { .. } = &config.proxy_mode {
        let (tx, rx) = mpsc::channel(32);
        (CacheHandle::new_with_snapshots(tx), Some(rx))
    } else {
        (CacheHandle::new(), None)
    };

    let cache = CacheStore::with_storage(
        handle.clone(),
        config.cache_404_capacity,
        config.cache_storage_mode.clone(),
        config.cache_directory.clone(),
    );

    // Spawn background task to listen for invalidation events
    spawn_invalidation_listener(cache.clone());

    // Spawn snapshot worker (warm-up + runtime snapshot management) in PreGenerate mode
    if let (Some(rx), ProxyMode::PreGenerate { paths, .. }) = (snapshot_rx, &config.proxy_mode) {
        let worker = SnapshotWorker {
            rx,
            cache: cache.clone(),
            upstream_client: upstream_client.clone(),
            proxy_url: config.proxy_url.clone(),
            compress_strategy: config.compress_strategy.clone(),
            cache_key_fn: config.cache_key_fn.clone(),
            snapshots: paths.clone(),
        };
        tokio::spawn(worker.run());
    }

    let proxy_state = Arc::new(ProxyState::new(
        cache,
        config,
        upstream_client,
        webhook_client,
    ));

    let app = Router::new()
        .fallback(proxy::proxy_handler)
        .layer(Extension(proxy_state));

    (app, handle)
}

/// Create a proxy handler with an existing cache handle.
/// Useful for sharing a single handle across multiple proxy instances so that
/// invalidation propagates to all caches simultaneously.
///
/// Note: snapshot operations (PreGenerate mode warm-up) are not available
/// through this variant — use [`create_proxy`] for full PreGenerate support.
pub fn create_proxy_with_handle(config: CreateProxyConfig, handle: CacheHandle) -> Router {
    let upstream_client =
        proxy::build_upstream_client().expect("failed to build shared upstream HTTP client");
    let webhook_client =
        proxy::build_webhook_client().expect("failed to build shared webhook HTTP client");

    let cache = CacheStore::with_storage(
        handle,
        config.cache_404_capacity,
        config.cache_storage_mode.clone(),
        config.cache_directory.clone(),
    );

    // Spawn background task to listen for invalidation events
    spawn_invalidation_listener(cache.clone());

    let proxy_state = Arc::new(ProxyState::new(
        cache,
        config,
        upstream_client,
        webhook_client,
    ));

    Router::new()
        .fallback(proxy::proxy_handler)
        .layer(Extension(proxy_state))
}

/// Spawn a background task to listen for cache invalidation events.
fn spawn_invalidation_listener(cache: CacheStore) {
    let mut receiver = cache.handle().subscribe();

    tokio::spawn(async move {
        loop {
            match receiver.recv().await {
                Ok(cache::InvalidationMessage::All) => {
                    tracing::debug!("Cache invalidation triggered: clearing all entries");
                    cache.clear().await;
                }
                Ok(cache::InvalidationMessage::Pattern(pattern)) => {
                    tracing::debug!(
                        "Cache invalidation triggered: clearing entries matching pattern '{}'",
                        pattern
                    );
                    cache.clear_by_pattern(&pattern).await;
                }
                Err(e) => {
                    tracing::error!("Invalidation channel error: {}", e);
                    break;
                }
            }
        }
    });
}

/// Background worker that handles snapshot warm-up and runtime snapshot operations
/// for `ProxyMode::PreGenerate`.
struct SnapshotWorker {
    rx: mpsc::Receiver<cache::SnapshotRequest>,
    cache: CacheStore,
    upstream_client: reqwest::Client,
    proxy_url: String,
    compress_strategy: CompressStrategy,
    cache_key_fn: Arc<dyn Fn(&RequestInfo) -> String + Send + Sync>,
    /// Current snapshot list — grows/shrinks via add/remove operations.
    snapshots: Vec<String>,
}

impl SnapshotWorker {
    async fn run(mut self) {
        // Warm-up: pre-generate all initial snapshot paths before handling requests.
        let initial = self.snapshots.clone();
        for path in &initial {
            if let Err(e) = self.fetch_and_store(path).await {
                tracing::warn!("Failed to pre-generate snapshot '{}': {}", path, e);
            }
        }

        // Process runtime snapshot requests.
        while let Some(req) = self.rx.recv().await {
            match req.op {
                cache::SnapshotOp::Add(path) => match self.fetch_and_store(&path).await {
                    Ok(()) => self.snapshots.push(path),
                    Err(e) => tracing::warn!("add_snapshot '{}' failed: {}", path, e),
                },
                cache::SnapshotOp::Refresh(path) => {
                    if let Err(e) = self.fetch_and_store(&path).await {
                        tracing::warn!("refresh_snapshot '{}' failed: {}", path, e);
                    }
                }
                cache::SnapshotOp::Remove(path) => {
                    let empty_headers = axum::http::HeaderMap::new();
                    let req_info = RequestInfo {
                        method: "GET",
                        path: &path,
                        query: "",
                        headers: &empty_headers,
                    };
                    let key = (self.cache_key_fn)(&req_info);
                    self.cache.clear_by_pattern(&key).await;
                    self.snapshots.retain(|s| s != &path);
                }
                cache::SnapshotOp::RefreshAll => {
                    let paths: Vec<String> = self.snapshots.clone();
                    for path in &paths {
                        if let Err(e) = self.fetch_and_store(path).await {
                            tracing::warn!("refresh_all_snapshots '{}' failed: {}", path, e);
                        }
                    }
                }
            }
            // Signal completion to the caller.
            let _ = req.done.send(());
        }
    }

    async fn fetch_and_store(&self, path: &str) -> anyhow::Result<()> {
        proxy::fetch_and_cache_snapshot(
            path,
            &self.upstream_client,
            &self.proxy_url,
            &self.cache,
            &self.compress_strategy,
            &self.cache_key_fn,
        )
        .await
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_cache_strategy_content_types() {
        assert!(CacheStrategy::All.allows_content_type(None));
        assert!(!CacheStrategy::None.allows_content_type(Some("text/html")));
        assert!(CacheStrategy::OnlyHtml.allows_content_type(Some("text/html; charset=utf-8")));
        assert!(!CacheStrategy::OnlyHtml.allows_content_type(Some("image/png")));
        assert!(CacheStrategy::NoImages.allows_content_type(Some("text/css")));
        assert!(!CacheStrategy::NoImages.allows_content_type(Some("image/webp")));
        assert!(CacheStrategy::OnlyImages.allows_content_type(Some("image/svg+xml")));
        assert!(!CacheStrategy::OnlyImages.allows_content_type(Some("application/javascript")));
        assert!(CacheStrategy::OnlyAssets.allows_content_type(Some("application/javascript")));
        assert!(CacheStrategy::OnlyAssets.allows_content_type(Some("image/png")));
        assert!(!CacheStrategy::OnlyAssets.allows_content_type(Some("text/html")));
        assert!(!CacheStrategy::OnlyAssets.allows_content_type(None));
    }

    #[test]
    fn test_compress_strategy_display() {
        assert_eq!(CompressStrategy::default().to_string(), "brotli");
        assert_eq!(CompressStrategy::None.to_string(), "none");
        assert_eq!(CompressStrategy::Gzip.to_string(), "gzip");
        assert_eq!(CompressStrategy::Deflate.to_string(), "deflate");
    }

    #[tokio::test]
    async fn test_create_proxy() {
        let config = CreateProxyConfig::new("http://localhost:8080".to_string());
        assert_eq!(config.compress_strategy, CompressStrategy::Brotli);
        let (_app, handle) = create_proxy(config);
        handle.invalidate_all();
        handle.invalidate("GET:/api/*");
        // Just ensure it compiles and runs without panic
    }
}