Skip to main content

fbi_proxy/
routes.rs

1//! Rule-based routing engine for fbi-proxy.
2//!
3//! This module implements a configurable, placeholder-based rule system
4//! that replaces (eventually) the hardcoded `parse_host` logic in
5//! `rs/fbi-proxy.rs`. Routes are described declaratively (e.g. in YAML)
6//! as a `match` pattern + a `target` template + optional `headers`
7//! templates. The engine compiles each rule into a regular expression
8//! and, at request time, picks the first rule whose pattern matches
9//! the incoming host, then expands the templates using the captured
10//! placeholder values.
11//!
12//! # Placeholder syntax
13//!
14//! Placeholders in patterns and templates use brace syntax:
15//!
16//! * `{name}`       — matches one host segment: `[^.]+`
17//! * `{name:int}`   — matches one numeric segment: `\d+`
18//! * `{name:slug}`  — matches `[a-z0-9-]+`
19//! * `{name:multi}` — matches one or more dot-separated segments:
20//!                    `[^.]+(\.[^.]+)*`. Use this for DNS-passthrough
21//!                    patterns like `{upstream:multi}.{domain}` that
22//!                    need to capture e.g. `github.com` as one value.
23//!
24//! A given placeholder name can appear in both the `match` pattern
25//! (where it captures) and in the `target` / `headers` templates
26//! (where it is substituted from the corresponding capture).
27//!
28//! Literal characters in patterns (dots, dashes, etc.) are anchored
29//! by Rust's `regex` crate after escaping; the whole pattern is
30//! implicitly anchored with `^...$`.
31//!
32//! # `{domain}` and multi-dot subdomain semantics
33//!
34//! `{domain}` is **not** special-cased by this engine. It is just a
35//! placeholder name like any other. The default `routes.yaml` uses
36//! `{domain}` by convention to mean "the trailing fbi-proxy domain
37//! (e.g. `fbi.com`)" but the engine treats it the same as `{host}`,
38//! `{port}`, etc.
39//!
40//! This means a pattern like `{prefix}.{host}.{domain}` is *greedy
41//! left-to-right* in the sense that each placeholder matches a single
42//! dot-free segment. For a host like `a.b.c.fbi.com` against
43//! `{prefix}.{host}.{domain}`, no match is produced because `{domain}`
44//! can only consume one segment (`com`), `{host}` consumes `fbi`, and
45//! `{prefix}` would have to consume `a.b.c` — which it can't, because
46//! `{prefix}` is `[^.]+`.
47//!
48//! Callers that want multi-dot domains (e.g. `fbi.example.com`) should
49//! either:
50//!   1. Strip the domain suffix before calling `match_host` (which is
51//!      what `match_host_with_domain` does), or
52//!   2. Encode the multi-dot literal directly in the pattern
53//!      (e.g. `{prefix}.{host}.fbi.example.com`).
54//!
55//! `match_host_with_domain(routes, host, Some("fbi.example.com"))` is
56//! the convenience helper: it strips `.fbi.example.com` from the host
57//! before matching, then re-injects the value as the `{domain}`
58//! capture for template expansion.
59
60use regex::Regex;
61use serde::{Deserialize, Serialize};
62use std::collections::HashMap;
63use std::fmt;
64
65/// Placeholder kind — controls the regex fragment used to match.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum PlaceholderKind {
68    /// `{name}` — matches one host segment (no dot): `[^.]+`.
69    Any,
70    /// `{name:int}` — matches `\d+`.
71    Int,
72    /// `{name:slug}` — matches `[a-z0-9-]+`.
73    Slug,
74    /// `{name:multi}` — matches one or more dot-separated segments.
75    /// Use for DNS-passthrough patterns (e.g. `{upstream:multi}.fbi.com`
76    /// capturing `github.com` as one value).
77    Multi,
78}
79
80impl PlaceholderKind {
81    fn regex_fragment(self) -> &'static str {
82        match self {
83            PlaceholderKind::Any => "[^.]+",
84            PlaceholderKind::Int => r"\d+",
85            PlaceholderKind::Slug => "[a-z0-9-]+",
86            PlaceholderKind::Multi => r"[^.]+(?:\.[^.]+)*",
87        }
88    }
89}
90
91/// Special-cased placeholder names that need to match more than a
92/// single dot-free segment. Currently only `{domain}`: it matches
93/// two-or-more dot-separated segments (e.g. `fbi.com`, `fbi.example.com`)
94/// but NOT a single bare segment like `com`. This is important because
95/// it makes the default rule ordering unambiguous: in
96/// `{prefix}.{host}.{domain}`, the trailing `{domain}` greedily eats
97/// the multi-segment suffix instead of collapsing to a single segment
98/// (which would cause `myserver.fbi.com` to be mis-classified as
99/// `prefix=myserver, host=fbi, domain=com`).
100fn special_regex_fragment(name: &str) -> Option<&'static str> {
101    match name {
102        "domain" => Some(r"[a-zA-Z0-9\-]+(?:\.[a-zA-Z0-9\-]+)+"),
103        _ => None,
104    }
105}
106
107/// A single named placeholder captured by a compiled route.
108#[derive(Debug, Clone)]
109pub struct Placeholder {
110    pub name: String,
111    pub kind: PlaceholderKind,
112}
113
114/// User-supplied route configuration (e.g. from `routes.yaml`).
115#[derive(Debug, Clone, Default, Deserialize, Serialize)]
116pub struct RouteConfig {
117    pub name: String,
118    /// Pattern matched against the Host header (without port).
119    /// E.g. `"{port:int}.{domain}"`.
120    #[serde(rename = "match")]
121    pub r#match: String,
122    /// Optional path-prefix matcher. When set, the rule only matches
123    /// requests whose path falls under this prefix; among host-matching
124    /// rules, the longest matching prefix wins. The path is forwarded
125    /// upstream as-is (never stripped).
126    #[serde(default, skip_serializing_if = "Option::is_none")]
127    pub path: Option<String>,
128    /// Target template, e.g. `"127.0.0.1:{port}"`.
129    pub target: String,
130    /// Header templates. The special key `"Host"` (case-insensitive)
131    /// is surfaced separately on `RouteHit::host_header`.
132    #[serde(default, skip_serializing_if = "Option::is_none")]
133    pub headers: Option<HashMap<String, String>>,
134}
135
136/// Top-level shape of `routes.yaml`.
137#[derive(Debug, Clone, Deserialize, Serialize)]
138pub struct RoutesFile {
139    #[serde(default = "default_version")]
140    pub version: u32,
141    pub routes: Vec<RouteConfig>,
142}
143
144fn default_version() -> u32 {
145    1
146}
147
148/// Parse a `routes.yaml`-style document.
149pub fn parse_yaml(src: &str) -> Result<RoutesFile, serde_yaml::Error> {
150    serde_yaml::from_str(src)
151}
152
153/// A compiled route — regex + templates — ready to evaluate per request.
154#[derive(Debug, Clone)]
155pub struct CompiledRoute {
156    pub name: String,
157    pub pattern: Regex,
158    pub placeholders: Vec<Placeholder>,
159    pub target_template: String,
160    pub header_templates: HashMap<String, String>,
161    /// Original (uncompiled) `match` pattern, retained so the admin API
162    /// can report and round-trip the source rule.
163    pub match_pattern: String,
164    /// Normalized optional path prefix (e.g. `"/_vscode/"`). `None`
165    /// matches any path (lowest path priority).
166    pub path_prefix: Option<String>,
167    /// Namespace this route belongs to — the conf.d fragment stem, or
168    /// `"default"` for the bundled defaults. Used for `ps` grouping.
169    pub namespace: String,
170}
171
172/// Result of a successful match.
173#[derive(Debug, Clone, PartialEq, Eq)]
174pub struct RouteHit {
175    pub route_name: String,
176    /// Expanded `target` template (e.g. `"api:3001"`).
177    pub target: String,
178    /// Expanded `Host` header from the `headers` map, if any.
179    pub host_header: Option<String>,
180    /// Other expanded headers, excluding `Host` (case-insensitive).
181    pub other_headers: HashMap<String, String>,
182}
183
184/// Compile-time error from `compile`.
185#[derive(Debug, Clone)]
186pub enum CompileError {
187    /// A placeholder spec was malformed, e.g. `{na me}` or `{:int}`.
188    InvalidPlaceholder { route: String, placeholder: String, reason: String },
189    /// An unknown placeholder kind, e.g. `{name:foo}`.
190    UnknownKind { route: String, name: String, kind: String },
191    /// The same placeholder name was declared twice in the same pattern.
192    DuplicatePlaceholder { route: String, name: String },
193    /// The generated regex failed to compile (very unlikely — usually
194    /// an indication of weird literal characters that escaped wrong).
195    InvalidRegex { route: String, source: String },
196    /// A `{name}` appeared in the target/header template but was never
197    /// declared in the match pattern.
198    UndeclaredPlaceholder { route: String, name: String, location: String },
199    /// Unbalanced braces in pattern or template.
200    UnbalancedBraces { route: String, location: String },
201}
202
203impl fmt::Display for CompileError {
204    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
205        match self {
206            CompileError::InvalidPlaceholder { route, placeholder, reason } => {
207                write!(f, "route '{}': invalid placeholder '{{{}}}': {}", route, placeholder, reason)
208            }
209            CompileError::UnknownKind { route, name, kind } => {
210                write!(f, "route '{}': unknown placeholder kind ':{}' for '{{{}}}' (expected int|slug|multi or none)", route, kind, name)
211            }
212            CompileError::DuplicatePlaceholder { route, name } => {
213                write!(f, "route '{}': placeholder '{{{}}}' declared twice in match pattern", route, name)
214            }
215            CompileError::InvalidRegex { route, source } => {
216                write!(f, "route '{}': internal regex compile error: {}", route, source)
217            }
218            CompileError::UndeclaredPlaceholder { route, name, location } => {
219                write!(f, "route '{}': placeholder '{{{}}}' used in {} but never declared in match pattern", route, name, location)
220            }
221            CompileError::UnbalancedBraces { route, location } => {
222                write!(f, "route '{}': unbalanced braces in {}", route, location)
223            }
224        }
225    }
226}
227
228impl std::error::Error for CompileError {}
229
230// ---------------------------------------------------------------------------
231// Parsing helpers
232// ---------------------------------------------------------------------------
233
234/// A token of a parsed pattern / template string.
235#[derive(Debug, Clone, PartialEq, Eq)]
236enum Token {
237    Literal(String),
238    Placeholder { name: String, kind: Option<String> },
239}
240
241/// Tokenize a `{name[:kind]}`-style template. Returns the token list
242/// or `Err(UnbalancedBraces)` on malformed input.
243fn tokenize(s: &str, route: &str, location: &str) -> Result<Vec<Token>, CompileError> {
244    let mut out = Vec::new();
245    let mut buf = String::new();
246    let mut chars = s.chars().peekable();
247
248    while let Some(c) = chars.next() {
249        if c == '{' {
250            // flush literal
251            if !buf.is_empty() {
252                out.push(Token::Literal(std::mem::take(&mut buf)));
253            }
254            // collect until '}'
255            let mut spec = String::new();
256            let mut closed = false;
257            while let Some(&nc) = chars.peek() {
258                chars.next();
259                if nc == '}' {
260                    closed = true;
261                    break;
262                }
263                spec.push(nc);
264            }
265            if !closed {
266                return Err(CompileError::UnbalancedBraces {
267                    route: route.to_string(),
268                    location: location.to_string(),
269                });
270            }
271            // parse "name" or "name:kind"
272            let (name, kind) = match spec.split_once(':') {
273                Some((n, k)) => (n.to_string(), Some(k.to_string())),
274                None => (spec.clone(), None),
275            };
276            out.push(Token::Placeholder { name, kind });
277        } else if c == '}' {
278            return Err(CompileError::UnbalancedBraces {
279                route: route.to_string(),
280                location: location.to_string(),
281            });
282        } else {
283            buf.push(c);
284        }
285    }
286    if !buf.is_empty() {
287        out.push(Token::Literal(buf));
288    }
289    Ok(out)
290}
291
292fn parse_kind(route: &str, name: &str, kind: Option<&str>) -> Result<PlaceholderKind, CompileError> {
293    match kind {
294        None | Some("") => Ok(PlaceholderKind::Any),
295        Some("int") => Ok(PlaceholderKind::Int),
296        Some("slug") => Ok(PlaceholderKind::Slug),
297        Some("multi") => Ok(PlaceholderKind::Multi),
298        Some(other) => Err(CompileError::UnknownKind {
299            route: route.to_string(),
300            name: name.to_string(),
301            kind: other.to_string(),
302        }),
303    }
304}
305
306fn validate_name(route: &str, raw_spec: &str, name: &str) -> Result<(), CompileError> {
307    if name.is_empty() {
308        return Err(CompileError::InvalidPlaceholder {
309            route: route.to_string(),
310            placeholder: raw_spec.to_string(),
311            reason: "empty placeholder name".to_string(),
312        });
313    }
314    let first = name.chars().next().unwrap();
315    if !(first.is_ascii_alphabetic() || first == '_') {
316        return Err(CompileError::InvalidPlaceholder {
317            route: route.to_string(),
318            placeholder: raw_spec.to_string(),
319            reason: "name must start with a letter or '_'".to_string(),
320        });
321    }
322    for c in name.chars() {
323        if !(c.is_ascii_alphanumeric() || c == '_') {
324            return Err(CompileError::InvalidPlaceholder {
325                route: route.to_string(),
326                placeholder: raw_spec.to_string(),
327                reason: format!("name contains invalid character '{}'", c),
328            });
329        }
330    }
331    Ok(())
332}
333
334// ---------------------------------------------------------------------------
335// Compile
336// ---------------------------------------------------------------------------
337
338/// Compile a list of `RouteConfig`s into ready-to-use `CompiledRoute`s
339/// under the `"default"` namespace.
340///
341/// Returns the first error encountered.
342pub fn compile(routes: Vec<RouteConfig>) -> Result<Vec<CompiledRoute>, CompileError> {
343    compile_in_namespace(routes, "default")
344}
345
346/// Like [`compile`], but tags every produced route with `namespace`
347/// (the conf.d fragment stem). Used when merging multiple fragments.
348pub fn compile_in_namespace(
349    routes: Vec<RouteConfig>,
350    namespace: &str,
351) -> Result<Vec<CompiledRoute>, CompileError> {
352    let mut out = Vec::with_capacity(routes.len());
353    for r in routes {
354        out.push(compile_one(r, namespace)?);
355    }
356    Ok(out)
357}
358
359/// Normalize a path prefix: guarantee a leading `/`. Trailing slash is
360/// left as the author wrote it (it affects boundary matching).
361fn normalize_path_prefix(p: &str) -> String {
362    if p.starts_with('/') {
363        p.to_string()
364    } else {
365        format!("/{}", p)
366    }
367}
368
369fn compile_one(cfg: RouteConfig, namespace: &str) -> Result<CompiledRoute, CompileError> {
370    let route_name = cfg.name.clone();
371    let match_pattern = cfg.r#match.clone();
372    let tokens = tokenize(&cfg.r#match, &route_name, "match pattern")?;
373
374    let mut declared: Vec<Placeholder> = Vec::new();
375    let mut regex_src = String::from("^");
376    for tok in &tokens {
377        match tok {
378            Token::Literal(lit) => {
379                regex_src.push_str(&regex::escape(lit));
380            }
381            Token::Placeholder { name, kind } => {
382                let raw_spec = match kind {
383                    Some(k) => format!("{}:{}", name, k),
384                    None => name.clone(),
385                };
386                validate_name(&route_name, &raw_spec, name)?;
387                let parsed_kind = parse_kind(&route_name, name, kind.as_deref())?;
388                if declared.iter().any(|p| p.name == *name) {
389                    return Err(CompileError::DuplicatePlaceholder {
390                        route: route_name,
391                        name: name.clone(),
392                    });
393                }
394                declared.push(Placeholder { name: name.clone(), kind: parsed_kind });
395                regex_src.push('(');
396                regex_src.push_str("?P<");
397                regex_src.push_str(name);
398                regex_src.push('>');
399                // If the user did not specify an explicit kind (e.g.
400                // `{domain}` not `{domain:slug}`) AND the name is one
401                // of the well-known multi-segment names, broaden the
402                // fragment to allow dots. This is what makes
403                // `{port:int}.{domain}` work for `3000.fbi.com`.
404                if kind.is_none() {
405                    if let Some(frag) = special_regex_fragment(name) {
406                        regex_src.push_str(frag);
407                    } else {
408                        regex_src.push_str(parsed_kind.regex_fragment());
409                    }
410                } else {
411                    regex_src.push_str(parsed_kind.regex_fragment());
412                }
413                regex_src.push(')');
414            }
415        }
416    }
417    regex_src.push('$');
418
419    let pattern = Regex::new(&regex_src).map_err(|e| CompileError::InvalidRegex {
420        route: route_name.clone(),
421        source: e.to_string(),
422    })?;
423
424    // Validate target template references known placeholders only.
425    let target_tokens = tokenize(&cfg.target, &route_name, "target template")?;
426    for tok in &target_tokens {
427        if let Token::Placeholder { name, .. } = tok {
428            validate_name(&route_name, name, name)?;
429            if !declared.iter().any(|p| p.name == *name) {
430                return Err(CompileError::UndeclaredPlaceholder {
431                    route: route_name,
432                    name: name.clone(),
433                    location: "target template".to_string(),
434                });
435            }
436        }
437    }
438
439    let mut header_templates: HashMap<String, String> = HashMap::new();
440    if let Some(headers) = cfg.headers {
441        for (k, v) in headers {
442            let header_tokens = tokenize(&v, &route_name, &format!("header '{}'", k))?;
443            for tok in &header_tokens {
444                if let Token::Placeholder { name, .. } = tok {
445                    validate_name(&route_name, name, name)?;
446                    if !declared.iter().any(|p| p.name == *name) {
447                        return Err(CompileError::UndeclaredPlaceholder {
448                            route: route_name.clone(),
449                            name: name.clone(),
450                            location: format!("header '{}'", k),
451                        });
452                    }
453                }
454            }
455            header_templates.insert(k, v);
456        }
457    }
458
459    let path_prefix = cfg.path.as_deref().map(normalize_path_prefix);
460
461    Ok(CompiledRoute {
462        name: route_name,
463        pattern,
464        placeholders: declared,
465        target_template: cfg.target,
466        header_templates,
467        match_pattern,
468        path_prefix,
469        namespace: namespace.to_string(),
470    })
471}
472
473// ---------------------------------------------------------------------------
474// Match
475// ---------------------------------------------------------------------------
476
477/// Strip a trailing `:port` from a host string. Used for normalization
478/// before matching.
479fn strip_port(host: &str) -> &str {
480    match host.rfind(':') {
481        Some(i) => &host[..i],
482        None => host,
483    }
484}
485
486/// Strip trailing slash if present (some clients include one).
487fn strip_trailing_slash(host: &str) -> &str {
488    host.strip_suffix('/').unwrap_or(host)
489}
490
491fn normalize(host: &str) -> String {
492    // Host header is case-insensitive per RFC 7230 §5.4.
493    strip_trailing_slash(strip_port(host)).to_ascii_lowercase()
494}
495
496/// Expand a template string using captured placeholders.
497fn expand(template: &str, captures: &HashMap<String, String>) -> String {
498    // We can re-use the tokenizer here, but since we already validated
499    // at compile-time, this is purely substitution: scan for {name[:kind]}
500    // and replace.
501    let mut out = String::with_capacity(template.len());
502    let mut chars = template.chars().peekable();
503    while let Some(c) = chars.next() {
504        if c == '{' {
505            let mut spec = String::new();
506            while let Some(&nc) = chars.peek() {
507                chars.next();
508                if nc == '}' {
509                    break;
510                }
511                spec.push(nc);
512            }
513            // strip optional :kind
514            let name = match spec.split_once(':') {
515                Some((n, _)) => n.to_string(),
516                None => spec,
517            };
518            if let Some(v) = captures.get(&name) {
519                out.push_str(v);
520            }
521            // if not present, drop silently — compile() has already
522            // validated that all placeholders are declared.
523        } else {
524            out.push(c);
525        }
526    }
527    out
528}
529
530/// Does `req_path` fall under `prefix`? `prefix` is normalized (leading
531/// `/`). Boundary-aware: `"/_vscode/"` matches `/_vscode` and
532/// `/_vscode/...` but NOT `/_vscodex`.
533fn path_matches(prefix: &str, req_path: &str) -> bool {
534    if prefix == "/" {
535        return true;
536    }
537    if let Some(stripped) = prefix.strip_suffix('/') {
538        req_path == stripped || req_path.starts_with(prefix)
539    } else {
540        req_path == prefix || req_path.starts_with(&format!("{}/", prefix))
541    }
542}
543
544/// Try to match a host against the compiled routes. Returns the first
545/// match (top-to-bottom order in the config).
546pub fn match_host(routes: &[CompiledRoute], host: &str) -> Option<RouteHit> {
547    match_host_with_domain(routes, host, None)
548}
549
550/// Like `match_host`, but if `default_domain` is `Some("fbi.com")`,
551/// the host must end with `.fbi.com` (or be exactly `fbi.com`),
552/// otherwise no match is produced. The full host (including the
553/// domain suffix) is then matched against each compiled route's
554/// pattern, so `{domain}` in the pattern naturally captures the
555/// multi-dot suffix.
556///
557/// If `default_domain` is `None`, the host is matched as-is.
558pub fn match_host_with_domain(
559    routes: &[CompiledRoute],
560    host: &str,
561    default_domain: Option<&str>,
562) -> Option<RouteHit> {
563    match_request(routes, host, "/", default_domain)
564}
565
566/// Match a host **and** request path against the compiled routes.
567///
568/// Among all routes whose host pattern matches (and whose `path_prefix`
569/// matches `req_path`, if any), the one with the **longest matching path
570/// prefix** wins; ties are broken by declaration order (earliest wins).
571/// A route with no `path_prefix` has the lowest path priority, so an
572/// explicit `path: /` rule still beats a path-less rule for the same
573/// host.
574pub fn match_request(
575    routes: &[CompiledRoute],
576    host: &str,
577    req_path: &str,
578    default_domain: Option<&str>,
579) -> Option<RouteHit> {
580    let host = normalize(host);
581
582    if let Some(domain) = default_domain {
583        if !domain.is_empty() {
584            let domain_lc = domain.to_ascii_lowercase();
585            if host != domain_lc && !host.ends_with(&format!(".{}", domain_lc)) {
586                return None;
587            }
588        }
589    }
590
591    // Select the best candidate by path-prefix length. `priority` is the
592    // prefix byte length, or 0 for a path-less route. We require a
593    // strictly-greater priority to replace the current best, so the
594    // earliest declaration wins on ties.
595    let mut best_idx: Option<usize> = None;
596    let mut best_priority: i64 = -1;
597    for (i, route) in routes.iter().enumerate() {
598        if !route.pattern.is_match(&host) {
599            continue;
600        }
601        let priority: i64 = match &route.path_prefix {
602            None => 0,
603            Some(prefix) => {
604                if !path_matches(prefix, req_path) {
605                    continue;
606                }
607                prefix.len() as i64
608            }
609        };
610        if priority > best_priority {
611            best_priority = priority;
612            best_idx = Some(i);
613        }
614    }
615
616    let route = routes.get(best_idx?)?;
617    let caps = route.pattern.captures(&host)?;
618    let mut values: HashMap<String, String> = HashMap::new();
619    for p in &route.placeholders {
620        if let Some(m) = caps.name(&p.name) {
621            values.insert(p.name.clone(), m.as_str().to_string());
622        }
623    }
624
625    let target = expand(&route.target_template, &values);
626
627    let mut host_header: Option<String> = None;
628    let mut other_headers: HashMap<String, String> = HashMap::new();
629    for (k, tmpl) in &route.header_templates {
630        let v = expand(tmpl, &values);
631        if k.eq_ignore_ascii_case("host") {
632            host_header = Some(v);
633        } else {
634            other_headers.insert(k.clone(), v);
635        }
636    }
637
638    Some(RouteHit {
639        route_name: route.name.clone(),
640        target,
641        host_header,
642        other_headers,
643    })
644}
645
646// ---------------------------------------------------------------------------
647// Tests
648// ---------------------------------------------------------------------------
649
650#[cfg(test)]
651mod tests {
652    use super::*;
653
654    fn default_routes() -> Vec<CompiledRoute> {
655        let configs = vec![
656            RouteConfig {
657                name: "port-as-host".into(),
658                r#match: "{port:int}.{domain}".into(),
659                path: None,
660                target: "127.0.0.1:{port}".into(),
661                headers: None,
662            },
663            RouteConfig {
664                name: "host-double-dash-port".into(),
665                r#match: "{host}--{port:int}.{domain}".into(),
666                path: None,
667                target: "{host}:{port}".into(),
668                headers: Some({
669                    let mut h = HashMap::new();
670                    h.insert("Host".into(), "{host}".into());
671                    h
672                }),
673            },
674            RouteConfig {
675                name: "subdomain-hoisting".into(),
676                r#match: "{prefix}.{host}.{domain}".into(),
677                path: None,
678                target: "{host}:80".into(),
679                headers: Some({
680                    let mut h = HashMap::new();
681                    h.insert("Host".into(), "{prefix}".into());
682                    h
683                }),
684            },
685            RouteConfig {
686                name: "direct-forward".into(),
687                r#match: "{host}.{domain}".into(),
688                path: None,
689                target: "{host}:80".into(),
690                headers: Some({
691                    let mut h = HashMap::new();
692                    h.insert("Host".into(), "{host}".into());
693                    h
694                }),
695            },
696        ];
697        compile(configs).expect("compile default routes")
698    }
699
700    /// All default-rule tests use the `fbi.com` domain filter, which
701    /// is the way these rules are intended to be used (domain stripping
702    /// is handled by the filter; the rules then route the remaining
703    /// prefix).
704    fn m(routes: &[CompiledRoute], host: &str) -> Option<RouteHit> {
705        match_host_with_domain(routes, host, Some("fbi.com"))
706    }
707
708    #[test]
709    fn empty_routes_no_match() {
710        let hit = match_host(&[], "anything.fbi.com");
711        assert!(hit.is_none());
712    }
713
714    #[test]
715    fn port_as_host_matches() {
716        let routes = default_routes();
717        let hit = m(&routes, "3000.fbi.com").expect("should match");
718        assert_eq!(hit.route_name, "port-as-host");
719        assert_eq!(hit.target, "127.0.0.1:3000");
720        assert_eq!(hit.host_header, None);
721    }
722
723    #[test]
724    fn host_double_dash_port_matches() {
725        let routes = default_routes();
726        let hit = m(&routes, "api--3001.fbi.com").expect("should match");
727        assert_eq!(hit.route_name, "host-double-dash-port");
728        assert_eq!(hit.target, "api:3001");
729        assert_eq!(hit.host_header.as_deref(), Some("api"));
730    }
731
732    #[test]
733    fn subdomain_hoisting_matches() {
734        let routes = default_routes();
735        let hit = m(&routes, "admin.app.fbi.com").expect("should match");
736        assert_eq!(hit.route_name, "subdomain-hoisting");
737        assert_eq!(hit.target, "app:80");
738        assert_eq!(hit.host_header.as_deref(), Some("admin"));
739    }
740
741    #[test]
742    fn direct_forward_matches() {
743        let routes = default_routes();
744        let hit = m(&routes, "myserver.fbi.com").expect("should match");
745        assert_eq!(hit.route_name, "direct-forward");
746        assert_eq!(hit.target, "myserver:80");
747        assert_eq!(hit.host_header.as_deref(), Some("myserver"));
748    }
749
750    #[test]
751    fn port_in_host_is_stripped_before_match() {
752        let routes = default_routes();
753        let hit = m(&routes, "myserver.fbi.com:8080").expect("should match");
754        assert_eq!(hit.route_name, "direct-forward");
755        assert_eq!(hit.target, "myserver:80");
756    }
757
758    #[test]
759    fn trailing_slash_stripped() {
760        let routes = default_routes();
761        let hit = m(&routes, "3000.fbi.com/").expect("should match");
762        assert_eq!(hit.route_name, "port-as-host");
763    }
764
765    #[test]
766    fn host_header_is_case_insensitive() {
767        let routes = default_routes();
768        let hit = m(&routes, "API--3001.FBI.COM").expect("should match");
769        assert_eq!(hit.route_name, "host-double-dash-port");
770        assert_eq!(hit.target, "api:3001");
771    }
772
773    #[test]
774    fn multi_dot_subdomain_assigns_domain_greedily() {
775        // For `a.b.c.fbi.com` against `{prefix}.{host}.{domain}`, the
776        // regex anchors left-to-right: {prefix} and {host} each
777        // capture one dot-free segment, and {domain} (which has the
778        // special multi-dot fragment) captures the rest.
779        //
780        // So the match is: prefix=a, host=b, domain=c.fbi.com.
781        //
782        // This may or may not be what the user intends. Document this
783        // ambiguity: if the user wants `prefix=a.b.c, host=fbi,
784        // domain=com`, they need a different pattern (with explicit
785        // literals for the trailing domain).
786        let routes = default_routes();
787        let hit = match_host(&routes, "a.b.c.fbi.com").expect("should match");
788        assert_eq!(hit.route_name, "subdomain-hoisting");
789        // host=b, target={host}:80 = b:80
790        assert_eq!(hit.target, "b:80");
791        // Host header = {prefix} = "a"
792        assert_eq!(hit.host_header.as_deref(), Some("a"));
793    }
794
795    #[test]
796    fn multi_dot_subdomain_with_domain_filter_is_unambiguous() {
797        // When the caller passes the default-domain (`fbi.com`), the
798        // regex still matches the full host but {domain} is now
799        // constrained to exactly the trailing "fbi.com" suffix via
800        // the domain filter. Actually, the filter only validates the
801        // suffix — the regex itself is still greedy. But for the
802        // typical "this is my fbi-proxy domain" usage, the host shape
803        // is single-prefix.subdomain.{domain}, which works as
804        // expected.
805        let routes = default_routes();
806        // admin.app.fbi.com -> subdomain-hoisting (prefix=admin, host=app, domain=fbi.com)
807        let hit = match_host_with_domain(&routes, "admin.app.fbi.com", Some("fbi.com"))
808            .expect("should match");
809        assert_eq!(hit.route_name, "subdomain-hoisting");
810        assert_eq!(hit.target, "app:80");
811        assert_eq!(hit.host_header.as_deref(), Some("admin"));
812    }
813
814    #[test]
815    fn first_match_wins() {
816        let routes = compile(vec![
817            RouteConfig {
818                name: "first".into(),
819                r#match: "{x}.{y}".into(),
820                path: None,
821                target: "first-target".into(),
822                headers: None,
823            },
824            RouteConfig {
825                name: "second".into(),
826                r#match: "{x}.{y}".into(),
827                path: None,
828                target: "second-target".into(),
829                headers: None,
830            },
831        ])
832        .unwrap();
833        let hit = match_host(&routes, "a.b").expect("should match");
834        assert_eq!(hit.route_name, "first");
835        assert_eq!(hit.target, "first-target");
836    }
837
838    #[test]
839    fn unknown_placeholder_kind_errors() {
840        let err = compile(vec![RouteConfig {
841            name: "bad".into(),
842            r#match: "{port:zzz}.com".into(),
843            path: None,
844            target: "x".into(),
845            headers: None,
846        }])
847        .unwrap_err();
848        match err {
849            CompileError::UnknownKind { kind, .. } => assert_eq!(kind, "zzz"),
850            e => panic!("expected UnknownKind, got {:?}", e),
851        }
852    }
853
854    #[test]
855    fn unbalanced_braces_in_pattern_errors() {
856        let err = compile(vec![RouteConfig {
857            name: "bad".into(),
858            r#match: "{port".into(),
859            path: None,
860            target: "x".into(),
861            headers: None,
862        }])
863        .unwrap_err();
864        match err {
865            CompileError::UnbalancedBraces { location, .. } => {
866                assert!(location.contains("match"))
867            }
868            e => panic!("expected UnbalancedBraces, got {:?}", e),
869        }
870    }
871
872    #[test]
873    fn duplicate_placeholder_errors() {
874        let err = compile(vec![RouteConfig {
875            name: "bad".into(),
876            r#match: "{x}.{x}".into(),
877            path: None,
878            target: "y".into(),
879            headers: None,
880        }])
881        .unwrap_err();
882        match err {
883            CompileError::DuplicatePlaceholder { name, .. } => assert_eq!(name, "x"),
884            e => panic!("expected DuplicatePlaceholder, got {:?}", e),
885        }
886    }
887
888    #[test]
889    fn undeclared_placeholder_in_target_errors() {
890        let err = compile(vec![RouteConfig {
891            name: "bad".into(),
892            r#match: "{x}.{y}".into(),
893            path: None,
894            target: "{z}".into(),
895            headers: None,
896        }])
897        .unwrap_err();
898        match err {
899            CompileError::UndeclaredPlaceholder { name, location, .. } => {
900                assert_eq!(name, "z");
901                assert!(location.contains("target"));
902            }
903            e => panic!("expected UndeclaredPlaceholder, got {:?}", e),
904        }
905    }
906
907    #[test]
908    fn invalid_placeholder_name_errors() {
909        let err = compile(vec![RouteConfig {
910            name: "bad".into(),
911            r#match: "{1foo}".into(),
912            path: None,
913            target: "x".into(),
914            headers: None,
915        }])
916        .unwrap_err();
917        match err {
918            CompileError::InvalidPlaceholder { .. } => {}
919            e => panic!("expected InvalidPlaceholder, got {:?}", e),
920        }
921    }
922
923    #[test]
924    fn int_kind_rejects_non_numeric() {
925        let routes = default_routes();
926        // "abc.fbi.com" should NOT match port-as-host (because abc isn't \d+),
927        // but should fall through to direct-forward.
928        let hit = m(&routes, "abc.fbi.com").expect("should match");
929        assert_eq!(hit.route_name, "direct-forward");
930        assert_eq!(hit.target, "abc:80");
931    }
932
933    #[test]
934    fn match_host_with_domain_filter_accepts_matching() {
935        let routes = default_routes();
936        let hit = match_host_with_domain(&routes, "3000.fbi.com", Some("fbi.com"))
937            .expect("should match");
938        assert_eq!(hit.route_name, "port-as-host");
939        assert_eq!(hit.target, "127.0.0.1:3000");
940    }
941
942    #[test]
943    fn match_host_with_domain_filter_rejects_non_matching() {
944        let routes = default_routes();
945        let hit = match_host_with_domain(&routes, "evil.example.com", Some("fbi.com"));
946        assert!(hit.is_none());
947    }
948
949    #[test]
950    fn match_host_with_multi_dot_domain() {
951        // The default-domain filter (`fbi.example.com`) only validates
952        // the suffix. The pattern itself still matches the full host,
953        // and {domain} naturally captures multi-segment trailing parts.
954        let routes = compile(vec![RouteConfig {
955            name: "direct".into(),
956            r#match: "{host}.{domain}".into(),
957            path: None,
958            target: "{host}:80".into(),
959            headers: None,
960        }])
961        .unwrap();
962        let hit =
963            match_host_with_domain(&routes, "myserver.fbi.example.com", Some("fbi.example.com"))
964                .expect("should match");
965        assert_eq!(hit.target, "myserver:80");
966    }
967
968    #[test]
969    fn match_host_with_multi_dot_domain_rejects_wrong_suffix() {
970        let routes = compile(vec![RouteConfig {
971            name: "direct".into(),
972            r#match: "{host}.{domain}".into(),
973            path: None,
974            target: "{host}:80".into(),
975            headers: None,
976        }])
977        .unwrap();
978        let hit = match_host_with_domain(&routes, "myserver.other.com", Some("fbi.example.com"));
979        assert!(hit.is_none());
980    }
981
982    #[test]
983    fn multi_kind_captures_multi_dot_segments() {
984        let routes = compile(vec![RouteConfig {
985            name: "dns-passthrough".into(),
986            r#match: "{upstream:multi}.fbi.com".into(),
987            path: None,
988            target: "{upstream}:80".into(),
989            headers: None,
990        }])
991        .unwrap();
992
993        let hit = match_host(&routes, "github.com.fbi.com").unwrap();
994        assert_eq!(hit.target, "github.com:80");
995
996        let hit = match_host(&routes, "api.example.org.fbi.com").unwrap();
997        assert_eq!(hit.target, "api.example.org:80");
998
999        // Single segment still matches (one-or-more).
1000        let hit = match_host(&routes, "single.fbi.com").unwrap();
1001        assert_eq!(hit.target, "single:80");
1002    }
1003
1004    #[test]
1005    fn multi_kind_with_host_header_rewrite() {
1006        let routes = compile(vec![RouteConfig {
1007            name: "dns-with-host".into(),
1008            r#match: "{upstream:multi}.fbi.com".into(),
1009            path: None,
1010            target: "{upstream}:443".into(),
1011            headers: Some(HashMap::from([("Host".into(), "{upstream}".into())])),
1012        }])
1013        .unwrap();
1014        let hit = match_host(&routes, "api.example.com.fbi.com").unwrap();
1015        assert_eq!(hit.target, "api.example.com:443");
1016        assert_eq!(hit.host_header.as_deref(), Some("api.example.com"));
1017    }
1018
1019    #[test]
1020    fn multi_kind_with_routes_yaml() {
1021        let yaml = r#"
1022routes:
1023  - name: dns-passthrough
1024    match: "{upstream:multi}.{domain}"
1025    target: "{upstream}:80"
1026"#;
1027        let parsed = parse_yaml(yaml).unwrap();
1028        let routes = compile(parsed.routes).unwrap();
1029        let hit = match_host(&routes, "github.com.fbi.com").unwrap();
1030        assert_eq!(hit.target, "github.com:80");
1031    }
1032
1033    #[test]
1034    fn slug_kind_accepts_lowercase_and_dashes() {
1035        let routes = compile(vec![RouteConfig {
1036            name: "slugged".into(),
1037            r#match: "{name:slug}.example".into(),
1038            path: None,
1039            target: "{name}".into(),
1040            headers: None,
1041        }])
1042        .unwrap();
1043        assert!(match_host(&routes, "my-service.example").is_some());
1044        // Uppercase normalized to lowercase by `normalize`, so it matches.
1045        assert!(match_host(&routes, "MY-SERVICE.example").is_some());
1046        // Underscores not allowed in slug.
1047        assert!(match_host(&routes, "my_service.example").is_none());
1048    }
1049
1050    #[test]
1051    fn parse_yaml_default_routes() {
1052        let yaml = r#"
1053version: 1
1054routes:
1055  - name: port-as-host
1056    match: "{port:int}.{domain}"
1057    target: "127.0.0.1:{port}"
1058  - name: direct-forward
1059    match: "{host}.{domain}"
1060    target: "{host}:80"
1061    headers:
1062      Host: "{host}"
1063"#;
1064        let parsed = parse_yaml(yaml).expect("yaml should parse");
1065        assert_eq!(parsed.version, 1);
1066        assert_eq!(parsed.routes.len(), 2);
1067        assert_eq!(parsed.routes[0].name, "port-as-host");
1068        assert_eq!(parsed.routes[0].r#match, "{port:int}.{domain}");
1069        assert_eq!(parsed.routes[1].headers.as_ref().unwrap()["Host"], "{host}");
1070
1071        let compiled = compile(parsed.routes).unwrap();
1072        let hit = match_host(&compiled, "3000.fbi.com").unwrap();
1073        assert_eq!(hit.target, "127.0.0.1:3000");
1074    }
1075
1076    #[test]
1077    fn expand_passes_through_unknown_placeholders_silently() {
1078        // expand() is internal but exercised here as a sanity check:
1079        // a template referencing an unknown name returns the template
1080        // minus the placeholder. (compile() rejects this, so users
1081        // can't hit it; this just guards against panics in expand.)
1082        let mut caps = HashMap::new();
1083        caps.insert("a".to_string(), "X".to_string());
1084        assert_eq!(expand("{a}-{b}", &caps), "X-");
1085    }
1086
1087    // ----- path-prefix matching (web-code use case) -----
1088
1089    fn web_code_routes() -> Vec<CompiledRoute> {
1090        compile_in_namespace(
1091            vec![
1092                RouteConfig {
1093                    name: "root".into(),
1094                    r#match: "fbi.com".into(),
1095                    path: Some("/".into()),
1096                    target: "localhost:3001".into(),
1097                    headers: None,
1098                },
1099                RouteConfig {
1100                    name: "vscode".into(),
1101                    r#match: "fbi.com".into(),
1102                    path: Some("/_vscode/".into()),
1103                    target: "localhost:9999".into(),
1104                    headers: None,
1105                },
1106            ],
1107            "web-code",
1108        )
1109        .expect("compile web-code routes")
1110    }
1111
1112    #[test]
1113    fn path_prefix_longest_wins() {
1114        let routes = web_code_routes();
1115        let hit = match_request(&routes, "fbi.com", "/_vscode/", Some("fbi.com")).unwrap();
1116        assert_eq!(hit.target, "localhost:9999");
1117        let hit = match_request(&routes, "fbi.com", "/_vscode/stable/x.js", Some("fbi.com")).unwrap();
1118        assert_eq!(hit.target, "localhost:9999");
1119        let hit = match_request(&routes, "fbi.com", "/", Some("fbi.com")).unwrap();
1120        assert_eq!(hit.target, "localhost:3001");
1121        let hit = match_request(&routes, "fbi.com", "/owner/repo/tree/main", Some("fbi.com")).unwrap();
1122        assert_eq!(hit.target, "localhost:3001");
1123    }
1124
1125    #[test]
1126    fn path_prefix_boundary_not_substring() {
1127        let routes = web_code_routes();
1128        let hit = match_request(&routes, "fbi.com", "/_vscodex", Some("fbi.com")).unwrap();
1129        assert_eq!(hit.target, "localhost:3001");
1130        let hit = match_request(&routes, "fbi.com", "/_vscode", Some("fbi.com")).unwrap();
1131        assert_eq!(hit.target, "localhost:9999");
1132    }
1133
1134    #[test]
1135    fn explicit_root_path_beats_pathless() {
1136        let routes = compile(vec![
1137            RouteConfig {
1138                name: "pathless".into(),
1139                r#match: "fbi.com".into(),
1140                path: None,
1141                target: "localhost:1".into(),
1142                headers: None,
1143            },
1144            RouteConfig {
1145                name: "rooted".into(),
1146                r#match: "fbi.com".into(),
1147                path: Some("/".into()),
1148                target: "localhost:2".into(),
1149                headers: None,
1150            },
1151        ])
1152        .unwrap();
1153        let hit = match_request(&routes, "fbi.com", "/anything", None).unwrap();
1154        assert_eq!(hit.target, "localhost:2");
1155    }
1156
1157    #[test]
1158    fn namespace_is_tagged_on_compiled_route() {
1159        let routes = web_code_routes();
1160        assert!(routes.iter().all(|r| r.namespace == "web-code"));
1161        let bundled = compile(vec![RouteConfig {
1162            name: "x".into(),
1163            r#match: "{host}".into(),
1164            path: None,
1165            target: "{host}:80".into(),
1166            headers: None,
1167        }])
1168        .unwrap();
1169        assert_eq!(bundled[0].namespace, "default");
1170    }
1171}