Skip to main content

synapse_pingora/
vhost.rs

1//! Virtual host matching for multi-site routing.
2//!
3//! This module provides hostname-based routing with support for exact matches
4//! and wildcard patterns (e.g., `*.example.com`).
5//!
6//! # Performance Optimizations (Phase 1)
7//! - Uses `unicase::Ascii` for zero-allocation case-insensitive matching
8//! - Uses `ahash::RandomState` for 2-3x faster HashMap lookups
9
10use crate::config::AccessControlConfig;
11use crate::headers::CompiledHeaderConfig;
12use crate::shadow::ShadowMirrorConfig;
13use ahash::RandomState;
14use regex::Regex;
15use std::collections::HashMap;
16use tracing::{debug, warn};
17use unicase::Ascii;
18
19/// Configuration for a single virtual host site.
20#[derive(Debug, Clone)]
21pub struct SiteConfig {
22    /// Hostname pattern (exact or wildcard like `*.example.com`)
23    pub hostname: String,
24    /// Upstream backend addresses
25    pub upstreams: Vec<String>,
26    /// Whether TLS is enabled for this site
27    pub tls_enabled: bool,
28    /// Path to TLS certificate (if TLS enabled)
29    pub tls_cert: Option<String>,
30    /// Path to TLS private key (if TLS enabled)
31    pub tls_key: Option<String>,
32    /// WAF threshold override (0-100, None uses global default)
33    pub waf_threshold: Option<u8>,
34    /// Whether WAF is enabled for this site
35    pub waf_enabled: bool,
36    /// Access control configuration (optional)
37    pub access_control: Option<AccessControlConfig>,
38    /// Header manipulation configuration (optional)
39    pub headers: Option<CompiledHeaderConfig>,
40    /// Shadow mirroring configuration for honeypot delivery
41    pub shadow_mirror: Option<ShadowMirrorConfig>,
42}
43
44impl Default for SiteConfig {
45    fn default() -> Self {
46        Self {
47            hostname: String::new(),
48            upstreams: Vec::new(),
49            tls_enabled: false,
50            tls_cert: None,
51            tls_key: None,
52            waf_threshold: None,
53            waf_enabled: true,
54            access_control: None,
55            headers: None,
56            shadow_mirror: None,
57        }
58    }
59}
60
61impl From<crate::config::SiteYamlConfig> for SiteConfig {
62    fn from(yaml: crate::config::SiteYamlConfig) -> Self {
63        Self {
64            hostname: yaml.hostname,
65            upstreams: yaml
66                .upstreams
67                .iter()
68                .map(|u| format!("{}:{}", u.host, u.port))
69                .collect(),
70            tls_enabled: yaml.tls.is_some(),
71            tls_cert: yaml.tls.as_ref().map(|t| t.cert_path.clone()),
72            tls_key: yaml.tls.as_ref().map(|t| t.key_path.clone()),
73            waf_threshold: yaml.waf.as_ref().and_then(|w| w.threshold),
74            waf_enabled: yaml.waf.as_ref().map(|w| w.enabled).unwrap_or(true),
75            access_control: yaml.access_control,
76            headers: yaml.headers.as_ref().map(|headers| headers.compile()),
77            shadow_mirror: yaml.shadow_mirror,
78        }
79    }
80}
81
82/// Compiled wildcard pattern for hostname matching.
83#[derive(Debug)]
84struct WildcardPattern {
85    /// Original pattern string
86    pattern: String,
87    /// Compiled regex for matching
88    regex: Regex,
89    /// Reference to site config
90    site_index: usize,
91}
92
93/// Virtual host matcher with O(1) exact matching and wildcard fallback.
94///
95/// Security features:
96/// - Limits wildcard complexity (max 3 wildcards, 253 char limit)
97/// - Sanitizes host headers (rejects null bytes, invalid chars)
98/// - Case-insensitive matching via pre-normalization
99///
100/// Performance features (Phase 1):
101/// - Uses `Ascii<String>` for case-insensitive keys (zero-allocation lookups)
102/// - Uses `ahash::RandomState` for 2-3x faster HashMap operations
103#[derive(Debug)]
104pub struct VhostMatcher {
105    /// Exact hostname -> site index mapping (O(1) lookup with fast hashing)
106    exact_matches: HashMap<Ascii<String>, usize, RandomState>,
107    /// Wildcard patterns checked in order
108    wildcard_patterns: Vec<WildcardPattern>,
109    /// All site configurations
110    sites: Vec<SiteConfig>,
111    /// Default site index (if any)
112    default_site: Option<usize>,
113}
114
115impl VhostMatcher {
116    /// Maximum allowed wildcards in a pattern (prevents ReDoS).
117    const MAX_WILDCARDS: usize = 3;
118    /// Maximum hostname length per RFC 1035.
119    const MAX_HOSTNAME_LEN: usize = 253;
120
121    /// Creates a new VhostMatcher from site configurations.
122    ///
123    /// # Errors
124    /// Returns an error if:
125    /// - A wildcard pattern has too many wildcards
126    /// - A hostname exceeds the maximum length
127    /// - A wildcard pattern fails to compile
128    pub fn new(sites: Vec<SiteConfig>) -> Result<Self, VhostError> {
129        // Pre-allocate with capacity hint (PERF-P3-1)
130        let mut exact_matches = HashMap::with_capacity_and_hasher(sites.len(), RandomState::new());
131        let mut wildcard_patterns = Vec::with_capacity(sites.len() / 4); // ~25% wildcards typical
132        let mut default_site = None;
133
134        for (index, site) in sites.iter().enumerate() {
135            // Validate hostname length
136            if site.hostname.len() > Self::MAX_HOSTNAME_LEN {
137                return Err(VhostError::HostnameTooLong {
138                    hostname: site.hostname.clone(),
139                    max_len: Self::MAX_HOSTNAME_LEN,
140                });
141            }
142
143            // Normalize hostname - Ascii handles case-insensitive comparison
144            let normalized = site.hostname.to_lowercase();
145
146            // Check if this is a wildcard pattern
147            if normalized.contains('*') {
148                // Validate wildcard count
149                let wildcard_count = normalized.matches('*').count();
150                if wildcard_count > Self::MAX_WILDCARDS {
151                    return Err(VhostError::TooManyWildcards {
152                        pattern: site.hostname.clone(),
153                        count: wildcard_count,
154                        max: Self::MAX_WILDCARDS,
155                    });
156                }
157
158                // Convert wildcard pattern to regex
159                let regex_pattern = Self::wildcard_to_regex(&normalized);
160                let regex = Regex::new(&regex_pattern).map_err(|e| VhostError::InvalidPattern {
161                    pattern: site.hostname.clone(),
162                    reason: e.to_string(),
163                })?;
164
165                wildcard_patterns.push(WildcardPattern {
166                    pattern: normalized,
167                    regex,
168                    site_index: index,
169                });
170            } else if normalized == "_" || normalized == "default" {
171                // Special default site marker
172                default_site = Some(index);
173            } else {
174                // Exact match - wrap in Ascii for case-insensitive key (PERF-P0-1)
175                exact_matches.insert(Ascii::new(normalized), index);
176            }
177        }
178
179        // Sort wildcards by specificity (more specific patterns first)
180        wildcard_patterns.sort_by(|a, b| {
181            // More segments = more specific
182            let a_segments = a.pattern.matches('.').count();
183            let b_segments = b.pattern.matches('.').count();
184            b_segments.cmp(&a_segments)
185        });
186
187        Ok(Self {
188            exact_matches,
189            wildcard_patterns,
190            sites,
191            default_site,
192        })
193    }
194
195    /// Create an empty matcher with no sites.
196    pub fn empty() -> Self {
197        Self {
198            exact_matches: HashMap::with_hasher(RandomState::new()),
199            wildcard_patterns: Vec::new(),
200            sites: Vec::new(),
201            default_site: None,
202        }
203    }
204
205    /// Converts a wildcard pattern to a regex pattern.
206    fn wildcard_to_regex(pattern: &str) -> String {
207        let mut regex = String::from("^");
208        for ch in pattern.chars() {
209            match ch {
210                '*' => regex.push_str("[a-z0-9-]*"),
211                '.' => regex.push_str("\\."),
212                '-' => regex.push('-'),
213                c if c.is_ascii_alphanumeric() => regex.push(c),
214                _ => regex.push_str(&regex::escape(&ch.to_string())),
215            }
216        }
217        regex.push('$');
218        regex
219    }
220
221    /// Sanitizes and validates a host header value.
222    ///
223    /// # Security
224    /// - Rejects null bytes
225    /// - Rejects non-ASCII characters
226    /// - Strips port numbers
227    /// - Normalizes to lowercase
228    pub fn sanitize_host(host: &str) -> Result<String, VhostError> {
229        // Reject null bytes (potential injection)
230        if host.contains('\0') {
231            return Err(VhostError::InvalidHost {
232                host: host.to_string(),
233                reason: "contains null byte".to_string(),
234            });
235        }
236
237        // Reject non-printable or non-ASCII characters
238        if !host.chars().all(|c| c.is_ascii() && !c.is_control()) {
239            return Err(VhostError::InvalidHost {
240                host: host.to_string(),
241                reason: "contains invalid characters".to_string(),
242            });
243        }
244
245        // Strip port number if present
246        let hostname = host.split(':').next().unwrap_or(host);
247
248        // Validate hostname characters (RFC 1123)
249        if !hostname.is_empty() && !Self::is_valid_hostname(hostname) {
250            return Err(VhostError::InvalidHost {
251                host: host.to_string(),
252                reason: "invalid hostname characters".to_string(),
253            });
254        }
255
256        Ok(hostname.to_lowercase())
257    }
258
259    /// Validates that a hostname contains only valid DNS characters.
260    fn is_valid_hostname(hostname: &str) -> bool {
261        hostname
262            .chars()
263            .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '.')
264            && !hostname.starts_with('-')
265            && !hostname.ends_with('-')
266    }
267
268    /// Matches a host header to a site configuration.
269    ///
270    /// # Arguments
271    /// * `host` - The raw Host header value
272    ///
273    /// # Returns
274    /// The matching site configuration, or None if no match found.
275    ///
276    /// # Performance
277    /// Uses `Ascii::new()` for zero-allocation case-insensitive lookup (PERF-P0-1)
278    #[inline]
279    pub fn match_host(&self, host: &str) -> Option<&SiteConfig> {
280        // Sanitize the host header (returns lowercase)
281        let hostname = match Self::sanitize_host(host) {
282            Ok(h) => h,
283            Err(e) => {
284                warn!("Invalid host header: {}", e);
285                return self.default_site.map(|i| &self.sites[i]);
286            }
287        };
288
289        // Try exact match first (O(1)) - Ascii provides case-insensitive comparison (PERF-P0-1)
290        if let Some(&index) = self.exact_matches.get(&Ascii::new(hostname.clone())) {
291            debug!("Exact match for host '{}' -> site {}", hostname, index);
292            return Some(&self.sites[index]);
293        }
294
295        // Try wildcard patterns (O(n) where n = wildcard count)
296        for pattern in &self.wildcard_patterns {
297            if pattern.regex.is_match(&hostname) {
298                debug!(
299                    "Wildcard match for host '{}' -> pattern '{}' -> site {}",
300                    hostname, pattern.pattern, pattern.site_index
301                );
302                return Some(&self.sites[pattern.site_index]);
303            }
304        }
305
306        // Fall back to default site
307        if let Some(index) = self.default_site {
308            debug!("Using default site for host '{}'", hostname);
309            return Some(&self.sites[index]);
310        }
311
312        debug!("No match found for host '{}'", hostname);
313        None
314    }
315
316    /// Returns all configured sites.
317    pub fn sites(&self) -> &[SiteConfig] {
318        &self.sites
319    }
320
321    /// Returns the number of configured sites.
322    pub fn site_count(&self) -> usize {
323        self.sites.len()
324    }
325}
326
327/// Errors that can occur during vhost matching.
328#[derive(Debug, thiserror::Error)]
329pub enum VhostError {
330    #[error("hostname '{hostname}' exceeds maximum length of {max_len}")]
331    HostnameTooLong { hostname: String, max_len: usize },
332
333    #[error("pattern '{pattern}' has {count} wildcards, max is {max}")]
334    TooManyWildcards {
335        pattern: String,
336        count: usize,
337        max: usize,
338    },
339
340    #[error("invalid pattern '{pattern}': {reason}")]
341    InvalidPattern { pattern: String, reason: String },
342
343    #[error("invalid host header '{host}': {reason}")]
344    InvalidHost { host: String, reason: String },
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350
351    fn make_site(hostname: &str) -> SiteConfig {
352        SiteConfig {
353            hostname: hostname.to_string(),
354            upstreams: vec!["127.0.0.1:8080".to_string()],
355            ..Default::default()
356        }
357    }
358
359    #[test]
360    fn test_exact_match() {
361        let sites = vec![make_site("example.com"), make_site("api.example.com")];
362        let matcher = VhostMatcher::new(sites).unwrap();
363
364        assert!(matcher.match_host("example.com").is_some());
365        assert!(matcher.match_host("api.example.com").is_some());
366        assert!(matcher.match_host("other.com").is_none());
367    }
368
369    #[test]
370    fn test_case_insensitive() {
371        let sites = vec![make_site("Example.COM")];
372        let matcher = VhostMatcher::new(sites).unwrap();
373
374        assert!(matcher.match_host("example.com").is_some());
375        assert!(matcher.match_host("EXAMPLE.COM").is_some());
376        assert!(matcher.match_host("Example.Com").is_some());
377    }
378
379    #[test]
380    fn test_wildcard_match() {
381        let sites = vec![make_site("*.example.com"), make_site("example.com")];
382        let matcher = VhostMatcher::new(sites).unwrap();
383
384        assert!(matcher.match_host("example.com").is_some());
385        assert!(matcher.match_host("api.example.com").is_some());
386        assert!(matcher.match_host("www.example.com").is_some());
387        assert!(matcher.match_host("other.com").is_none());
388    }
389
390    #[test]
391    fn test_port_stripping() {
392        let sites = vec![make_site("example.com")];
393        let matcher = VhostMatcher::new(sites).unwrap();
394
395        assert!(matcher.match_host("example.com:8080").is_some());
396        assert!(matcher.match_host("example.com:443").is_some());
397    }
398
399    #[test]
400    fn test_default_site() {
401        let sites = vec![make_site("example.com"), make_site("_")];
402        let matcher = VhostMatcher::new(sites).unwrap();
403
404        assert!(matcher.match_host("example.com").is_some());
405        assert!(matcher.match_host("unknown.com").is_some()); // Falls back to default
406    }
407
408    #[test]
409    fn test_sanitize_null_byte() {
410        let result = VhostMatcher::sanitize_host("example\0.com");
411        assert!(result.is_err());
412    }
413
414    #[test]
415    fn test_sanitize_non_ascii() {
416        let result = VhostMatcher::sanitize_host("δΎ‹γˆ.com");
417        assert!(result.is_err());
418    }
419
420    #[test]
421    fn test_too_many_wildcards() {
422        let sites = vec![make_site("*.*.*.*")];
423        let result = VhostMatcher::new(sites);
424        assert!(result.is_err());
425    }
426
427    #[test]
428    fn test_hostname_too_long() {
429        let long_hostname = "a".repeat(300);
430        let sites = vec![make_site(&long_hostname)];
431        let result = VhostMatcher::new(sites);
432        assert!(result.is_err());
433    }
434
435    #[test]
436    fn test_wildcard_specificity() {
437        let sites = vec![make_site("*.example.com"), make_site("*.api.example.com")];
438        let matcher = VhostMatcher::new(sites).unwrap();
439
440        // More specific pattern should match first
441        let site = matcher.match_host("v1.api.example.com").unwrap();
442        assert_eq!(site.hostname, "*.api.example.com");
443    }
444}