Skip to main content

fbi_proxy/
routes.rs

1//! Rule-based routing engine for fbi-proxy.
2//!
3//! This module implements a configurable, placeholder-based rule system
4//! that replaces (eventually) the hardcoded `parse_host` logic in
5//! `rs/fbi-proxy.rs`. Routes are described declaratively (e.g. in YAML)
6//! as a `match` pattern + a `target` template + optional `headers`
7//! templates. The engine compiles each rule into a regular expression
8//! and, at request time, picks the first rule whose pattern matches
9//! the incoming host, then expands the templates using the captured
10//! placeholder values.
11//!
12//! # Placeholder syntax
13//!
14//! Placeholders in patterns and templates use brace syntax:
15//!
16//! * `{name}`       — matches one host segment: `[^.]+`
17//! * `{name:int}`   — matches one numeric segment: `\d+`
18//! * `{name:slug}`  — matches `[a-z0-9-]+`
19//! * `{name:multi}` — matches one or more dot-separated segments:
20//!                    `[^.]+(\.[^.]+)*`. Use this for DNS-passthrough
21//!                    patterns like `{upstream:multi}.{domain}` that
22//!                    need to capture e.g. `github.com` as one value.
23//!
24//! A given placeholder name can appear in both the `match` pattern
25//! (where it captures) and in the `target` / `headers` templates
26//! (where it is substituted from the corresponding capture).
27//!
28//! Literal characters in patterns (dots, dashes, etc.) are anchored
29//! by Rust's `regex` crate after escaping; the whole pattern is
30//! implicitly anchored with `^...$`.
31//!
32//! # `{domain}` and multi-dot subdomain semantics
33//!
34//! `{domain}` is **not** special-cased by this engine. It is just a
35//! placeholder name like any other. The default `routes.yaml` uses
36//! `{domain}` by convention to mean "the trailing fbi-proxy domain
37//! (e.g. `fbi.com`)" but the engine treats it the same as `{host}`,
38//! `{port}`, etc.
39//!
40//! This means a pattern like `{prefix}.{host}.{domain}` is *greedy
41//! left-to-right* in the sense that each placeholder matches a single
42//! dot-free segment. For a host like `a.b.c.fbi.com` against
43//! `{prefix}.{host}.{domain}`, no match is produced because `{domain}`
44//! can only consume one segment (`com`), `{host}` consumes `fbi`, and
45//! `{prefix}` would have to consume `a.b.c` — which it can't, because
46//! `{prefix}` is `[^.]+`.
47//!
48//! Callers that want multi-dot domains (e.g. `fbi.example.com`) should
49//! either:
50//!   1. Strip the domain suffix before calling `match_host` (which is
51//!      what `match_host_with_domain` does), or
52//!   2. Encode the multi-dot literal directly in the pattern
53//!      (e.g. `{prefix}.{host}.fbi.example.com`).
54//!
55//! `match_host_with_domain(routes, host, Some("fbi.example.com"))` is
56//! the convenience helper: it strips `.fbi.example.com` from the host
57//! before matching, then re-injects the value as the `{domain}`
58//! capture for template expansion.
59
60use regex::Regex;
61use serde::Deserialize;
62use std::collections::HashMap;
63use std::fmt;
64
65/// Placeholder kind — controls the regex fragment used to match.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum PlaceholderKind {
68    /// `{name}` — matches one host segment (no dot): `[^.]+`.
69    Any,
70    /// `{name:int}` — matches `\d+`.
71    Int,
72    /// `{name:slug}` — matches `[a-z0-9-]+`.
73    Slug,
74    /// `{name:multi}` — matches one or more dot-separated segments.
75    /// Use for DNS-passthrough patterns (e.g. `{upstream:multi}.fbi.com`
76    /// capturing `github.com` as one value).
77    Multi,
78}
79
80impl PlaceholderKind {
81    fn regex_fragment(self) -> &'static str {
82        match self {
83            PlaceholderKind::Any => "[^.]+",
84            PlaceholderKind::Int => r"\d+",
85            PlaceholderKind::Slug => "[a-z0-9-]+",
86            PlaceholderKind::Multi => r"[^.]+(?:\.[^.]+)*",
87        }
88    }
89}
90
91/// Special-cased placeholder names that need to match more than a
92/// single dot-free segment. Currently only `{domain}`: it matches
93/// two-or-more dot-separated segments (e.g. `fbi.com`, `fbi.example.com`)
94/// but NOT a single bare segment like `com`. This is important because
95/// it makes the default rule ordering unambiguous: in
96/// `{prefix}.{host}.{domain}`, the trailing `{domain}` greedily eats
97/// the multi-segment suffix instead of collapsing to a single segment
98/// (which would cause `myserver.fbi.com` to be mis-classified as
99/// `prefix=myserver, host=fbi, domain=com`).
100fn special_regex_fragment(name: &str) -> Option<&'static str> {
101    match name {
102        "domain" => Some(r"[a-zA-Z0-9\-]+(?:\.[a-zA-Z0-9\-]+)+"),
103        _ => None,
104    }
105}
106
107/// A single named placeholder captured by a compiled route.
108#[derive(Debug, Clone)]
109pub struct Placeholder {
110    pub name: String,
111    pub kind: PlaceholderKind,
112}
113
114/// User-supplied route configuration (e.g. from `routes.yaml`).
115#[derive(Debug, Clone, Deserialize)]
116pub struct RouteConfig {
117    pub name: String,
118    /// Pattern matched against the Host header (without port).
119    /// E.g. `"{port:int}.{domain}"`.
120    #[serde(rename = "match")]
121    pub r#match: String,
122    /// Target template, e.g. `"127.0.0.1:{port}"`.
123    pub target: String,
124    /// Header templates. The special key `"Host"` (case-insensitive)
125    /// is surfaced separately on `RouteHit::host_header`.
126    #[serde(default)]
127    pub headers: Option<HashMap<String, String>>,
128}
129
130/// Top-level shape of `routes.yaml`.
131#[derive(Debug, Clone, Deserialize)]
132pub struct RoutesFile {
133    #[serde(default = "default_version")]
134    pub version: u32,
135    pub routes: Vec<RouteConfig>,
136}
137
138fn default_version() -> u32 {
139    1
140}
141
142/// Parse a `routes.yaml`-style document.
143pub fn parse_yaml(src: &str) -> Result<RoutesFile, serde_yaml::Error> {
144    serde_yaml::from_str(src)
145}
146
147/// A compiled route — regex + templates — ready to evaluate per request.
148#[derive(Debug, Clone)]
149pub struct CompiledRoute {
150    pub name: String,
151    pub pattern: Regex,
152    pub placeholders: Vec<Placeholder>,
153    pub target_template: String,
154    pub header_templates: HashMap<String, String>,
155}
156
157/// Result of a successful match.
158#[derive(Debug, Clone, PartialEq, Eq)]
159pub struct RouteHit {
160    pub route_name: String,
161    /// Expanded `target` template (e.g. `"api:3001"`).
162    pub target: String,
163    /// Expanded `Host` header from the `headers` map, if any.
164    pub host_header: Option<String>,
165    /// Other expanded headers, excluding `Host` (case-insensitive).
166    pub other_headers: HashMap<String, String>,
167}
168
169/// Compile-time error from `compile`.
170#[derive(Debug, Clone)]
171pub enum CompileError {
172    /// A placeholder spec was malformed, e.g. `{na me}` or `{:int}`.
173    InvalidPlaceholder { route: String, placeholder: String, reason: String },
174    /// An unknown placeholder kind, e.g. `{name:foo}`.
175    UnknownKind { route: String, name: String, kind: String },
176    /// The same placeholder name was declared twice in the same pattern.
177    DuplicatePlaceholder { route: String, name: String },
178    /// The generated regex failed to compile (very unlikely — usually
179    /// an indication of weird literal characters that escaped wrong).
180    InvalidRegex { route: String, source: String },
181    /// A `{name}` appeared in the target/header template but was never
182    /// declared in the match pattern.
183    UndeclaredPlaceholder { route: String, name: String, location: String },
184    /// Unbalanced braces in pattern or template.
185    UnbalancedBraces { route: String, location: String },
186}
187
188impl fmt::Display for CompileError {
189    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
190        match self {
191            CompileError::InvalidPlaceholder { route, placeholder, reason } => {
192                write!(f, "route '{}': invalid placeholder '{{{}}}': {}", route, placeholder, reason)
193            }
194            CompileError::UnknownKind { route, name, kind } => {
195                write!(f, "route '{}': unknown placeholder kind ':{}' for '{{{}}}' (expected int|slug|multi or none)", route, kind, name)
196            }
197            CompileError::DuplicatePlaceholder { route, name } => {
198                write!(f, "route '{}': placeholder '{{{}}}' declared twice in match pattern", route, name)
199            }
200            CompileError::InvalidRegex { route, source } => {
201                write!(f, "route '{}': internal regex compile error: {}", route, source)
202            }
203            CompileError::UndeclaredPlaceholder { route, name, location } => {
204                write!(f, "route '{}': placeholder '{{{}}}' used in {} but never declared in match pattern", route, name, location)
205            }
206            CompileError::UnbalancedBraces { route, location } => {
207                write!(f, "route '{}': unbalanced braces in {}", route, location)
208            }
209        }
210    }
211}
212
213impl std::error::Error for CompileError {}
214
215// ---------------------------------------------------------------------------
216// Parsing helpers
217// ---------------------------------------------------------------------------
218
219/// A token of a parsed pattern / template string.
220#[derive(Debug, Clone, PartialEq, Eq)]
221enum Token {
222    Literal(String),
223    Placeholder { name: String, kind: Option<String> },
224}
225
226/// Tokenize a `{name[:kind]}`-style template. Returns the token list
227/// or `Err(UnbalancedBraces)` on malformed input.
228fn tokenize(s: &str, route: &str, location: &str) -> Result<Vec<Token>, CompileError> {
229    let mut out = Vec::new();
230    let mut buf = String::new();
231    let mut chars = s.chars().peekable();
232
233    while let Some(c) = chars.next() {
234        if c == '{' {
235            // flush literal
236            if !buf.is_empty() {
237                out.push(Token::Literal(std::mem::take(&mut buf)));
238            }
239            // collect until '}'
240            let mut spec = String::new();
241            let mut closed = false;
242            while let Some(&nc) = chars.peek() {
243                chars.next();
244                if nc == '}' {
245                    closed = true;
246                    break;
247                }
248                spec.push(nc);
249            }
250            if !closed {
251                return Err(CompileError::UnbalancedBraces {
252                    route: route.to_string(),
253                    location: location.to_string(),
254                });
255            }
256            // parse "name" or "name:kind"
257            let (name, kind) = match spec.split_once(':') {
258                Some((n, k)) => (n.to_string(), Some(k.to_string())),
259                None => (spec.clone(), None),
260            };
261            out.push(Token::Placeholder { name, kind });
262        } else if c == '}' {
263            return Err(CompileError::UnbalancedBraces {
264                route: route.to_string(),
265                location: location.to_string(),
266            });
267        } else {
268            buf.push(c);
269        }
270    }
271    if !buf.is_empty() {
272        out.push(Token::Literal(buf));
273    }
274    Ok(out)
275}
276
277fn parse_kind(route: &str, name: &str, kind: Option<&str>) -> Result<PlaceholderKind, CompileError> {
278    match kind {
279        None | Some("") => Ok(PlaceholderKind::Any),
280        Some("int") => Ok(PlaceholderKind::Int),
281        Some("slug") => Ok(PlaceholderKind::Slug),
282        Some("multi") => Ok(PlaceholderKind::Multi),
283        Some(other) => Err(CompileError::UnknownKind {
284            route: route.to_string(),
285            name: name.to_string(),
286            kind: other.to_string(),
287        }),
288    }
289}
290
291fn validate_name(route: &str, raw_spec: &str, name: &str) -> Result<(), CompileError> {
292    if name.is_empty() {
293        return Err(CompileError::InvalidPlaceholder {
294            route: route.to_string(),
295            placeholder: raw_spec.to_string(),
296            reason: "empty placeholder name".to_string(),
297        });
298    }
299    let first = name.chars().next().unwrap();
300    if !(first.is_ascii_alphabetic() || first == '_') {
301        return Err(CompileError::InvalidPlaceholder {
302            route: route.to_string(),
303            placeholder: raw_spec.to_string(),
304            reason: "name must start with a letter or '_'".to_string(),
305        });
306    }
307    for c in name.chars() {
308        if !(c.is_ascii_alphanumeric() || c == '_') {
309            return Err(CompileError::InvalidPlaceholder {
310                route: route.to_string(),
311                placeholder: raw_spec.to_string(),
312                reason: format!("name contains invalid character '{}'", c),
313            });
314        }
315    }
316    Ok(())
317}
318
319// ---------------------------------------------------------------------------
320// Compile
321// ---------------------------------------------------------------------------
322
323/// Compile a list of `RouteConfig`s into ready-to-use `CompiledRoute`s.
324///
325/// Returns the first error encountered.
326pub fn compile(routes: Vec<RouteConfig>) -> Result<Vec<CompiledRoute>, CompileError> {
327    let mut out = Vec::with_capacity(routes.len());
328    for r in routes {
329        out.push(compile_one(r)?);
330    }
331    Ok(out)
332}
333
334fn compile_one(cfg: RouteConfig) -> Result<CompiledRoute, CompileError> {
335    let route_name = cfg.name.clone();
336    let tokens = tokenize(&cfg.r#match, &route_name, "match pattern")?;
337
338    let mut declared: Vec<Placeholder> = Vec::new();
339    let mut regex_src = String::from("^");
340    for tok in &tokens {
341        match tok {
342            Token::Literal(lit) => {
343                regex_src.push_str(&regex::escape(lit));
344            }
345            Token::Placeholder { name, kind } => {
346                let raw_spec = match kind {
347                    Some(k) => format!("{}:{}", name, k),
348                    None => name.clone(),
349                };
350                validate_name(&route_name, &raw_spec, name)?;
351                let parsed_kind = parse_kind(&route_name, name, kind.as_deref())?;
352                if declared.iter().any(|p| p.name == *name) {
353                    return Err(CompileError::DuplicatePlaceholder {
354                        route: route_name,
355                        name: name.clone(),
356                    });
357                }
358                declared.push(Placeholder { name: name.clone(), kind: parsed_kind });
359                regex_src.push('(');
360                regex_src.push_str("?P<");
361                regex_src.push_str(name);
362                regex_src.push('>');
363                // If the user did not specify an explicit kind (e.g.
364                // `{domain}` not `{domain:slug}`) AND the name is one
365                // of the well-known multi-segment names, broaden the
366                // fragment to allow dots. This is what makes
367                // `{port:int}.{domain}` work for `3000.fbi.com`.
368                if kind.is_none() {
369                    if let Some(frag) = special_regex_fragment(name) {
370                        regex_src.push_str(frag);
371                    } else {
372                        regex_src.push_str(parsed_kind.regex_fragment());
373                    }
374                } else {
375                    regex_src.push_str(parsed_kind.regex_fragment());
376                }
377                regex_src.push(')');
378            }
379        }
380    }
381    regex_src.push('$');
382
383    let pattern = Regex::new(&regex_src).map_err(|e| CompileError::InvalidRegex {
384        route: route_name.clone(),
385        source: e.to_string(),
386    })?;
387
388    // Validate target template references known placeholders only.
389    let target_tokens = tokenize(&cfg.target, &route_name, "target template")?;
390    for tok in &target_tokens {
391        if let Token::Placeholder { name, .. } = tok {
392            validate_name(&route_name, name, name)?;
393            if !declared.iter().any(|p| p.name == *name) {
394                return Err(CompileError::UndeclaredPlaceholder {
395                    route: route_name,
396                    name: name.clone(),
397                    location: "target template".to_string(),
398                });
399            }
400        }
401    }
402
403    let mut header_templates: HashMap<String, String> = HashMap::new();
404    if let Some(headers) = cfg.headers {
405        for (k, v) in headers {
406            let header_tokens = tokenize(&v, &route_name, &format!("header '{}'", k))?;
407            for tok in &header_tokens {
408                if let Token::Placeholder { name, .. } = tok {
409                    validate_name(&route_name, name, name)?;
410                    if !declared.iter().any(|p| p.name == *name) {
411                        return Err(CompileError::UndeclaredPlaceholder {
412                            route: route_name.clone(),
413                            name: name.clone(),
414                            location: format!("header '{}'", k),
415                        });
416                    }
417                }
418            }
419            header_templates.insert(k, v);
420        }
421    }
422
423    Ok(CompiledRoute {
424        name: route_name,
425        pattern,
426        placeholders: declared,
427        target_template: cfg.target,
428        header_templates,
429    })
430}
431
432// ---------------------------------------------------------------------------
433// Match
434// ---------------------------------------------------------------------------
435
436/// Strip a trailing `:port` from a host string. Used for normalization
437/// before matching.
438fn strip_port(host: &str) -> &str {
439    match host.rfind(':') {
440        Some(i) => &host[..i],
441        None => host,
442    }
443}
444
445/// Strip trailing slash if present (some clients include one).
446fn strip_trailing_slash(host: &str) -> &str {
447    host.strip_suffix('/').unwrap_or(host)
448}
449
450fn normalize(host: &str) -> String {
451    // Host header is case-insensitive per RFC 7230 §5.4.
452    strip_trailing_slash(strip_port(host)).to_ascii_lowercase()
453}
454
455/// Expand a template string using captured placeholders.
456fn expand(template: &str, captures: &HashMap<String, String>) -> String {
457    // We can re-use the tokenizer here, but since we already validated
458    // at compile-time, this is purely substitution: scan for {name[:kind]}
459    // and replace.
460    let mut out = String::with_capacity(template.len());
461    let mut chars = template.chars().peekable();
462    while let Some(c) = chars.next() {
463        if c == '{' {
464            let mut spec = String::new();
465            while let Some(&nc) = chars.peek() {
466                chars.next();
467                if nc == '}' {
468                    break;
469                }
470                spec.push(nc);
471            }
472            // strip optional :kind
473            let name = match spec.split_once(':') {
474                Some((n, _)) => n.to_string(),
475                None => spec,
476            };
477            if let Some(v) = captures.get(&name) {
478                out.push_str(v);
479            }
480            // if not present, drop silently — compile() has already
481            // validated that all placeholders are declared.
482        } else {
483            out.push(c);
484        }
485    }
486    out
487}
488
489/// Try to match a host against the compiled routes. Returns the first
490/// match (top-to-bottom order in the config).
491pub fn match_host(routes: &[CompiledRoute], host: &str) -> Option<RouteHit> {
492    match_host_with_domain(routes, host, None)
493}
494
495/// Like `match_host`, but if `default_domain` is `Some("fbi.com")`,
496/// the host must end with `.fbi.com` (or be exactly `fbi.com`),
497/// otherwise no match is produced. The full host (including the
498/// domain suffix) is then matched against each compiled route's
499/// pattern, so `{domain}` in the pattern naturally captures the
500/// multi-dot suffix.
501///
502/// If `default_domain` is `None`, the host is matched as-is.
503pub fn match_host_with_domain(
504    routes: &[CompiledRoute],
505    host: &str,
506    default_domain: Option<&str>,
507) -> Option<RouteHit> {
508    let host = normalize(host);
509
510    if let Some(domain) = default_domain {
511        if !domain.is_empty() {
512            let domain_lc = domain.to_ascii_lowercase();
513            if host != domain_lc && !host.ends_with(&format!(".{}", domain_lc)) {
514                return None;
515            }
516        }
517    }
518
519    for route in routes {
520        if let Some(caps) = route.pattern.captures(&host) {
521            let mut values: HashMap<String, String> = HashMap::new();
522            for p in &route.placeholders {
523                if let Some(m) = caps.name(&p.name) {
524                    values.insert(p.name.clone(), m.as_str().to_string());
525                }
526            }
527
528            let target = expand(&route.target_template, &values);
529
530            let mut host_header: Option<String> = None;
531            let mut other_headers: HashMap<String, String> = HashMap::new();
532            for (k, tmpl) in &route.header_templates {
533                let v = expand(tmpl, &values);
534                if k.eq_ignore_ascii_case("host") {
535                    host_header = Some(v);
536                } else {
537                    other_headers.insert(k.clone(), v);
538                }
539            }
540
541            return Some(RouteHit {
542                route_name: route.name.clone(),
543                target,
544                host_header,
545                other_headers,
546            });
547        }
548    }
549    None
550}
551
552// ---------------------------------------------------------------------------
553// Tests
554// ---------------------------------------------------------------------------
555
556#[cfg(test)]
557mod tests {
558    use super::*;
559
560    fn default_routes() -> Vec<CompiledRoute> {
561        let configs = vec![
562            RouteConfig {
563                name: "port-as-host".into(),
564                r#match: "{port:int}.{domain}".into(),
565                target: "127.0.0.1:{port}".into(),
566                headers: None,
567            },
568            RouteConfig {
569                name: "host-double-dash-port".into(),
570                r#match: "{host}--{port:int}.{domain}".into(),
571                target: "{host}:{port}".into(),
572                headers: Some({
573                    let mut h = HashMap::new();
574                    h.insert("Host".into(), "{host}".into());
575                    h
576                }),
577            },
578            RouteConfig {
579                name: "subdomain-hoisting".into(),
580                r#match: "{prefix}.{host}.{domain}".into(),
581                target: "{host}:80".into(),
582                headers: Some({
583                    let mut h = HashMap::new();
584                    h.insert("Host".into(), "{prefix}".into());
585                    h
586                }),
587            },
588            RouteConfig {
589                name: "direct-forward".into(),
590                r#match: "{host}.{domain}".into(),
591                target: "{host}:80".into(),
592                headers: Some({
593                    let mut h = HashMap::new();
594                    h.insert("Host".into(), "{host}".into());
595                    h
596                }),
597            },
598        ];
599        compile(configs).expect("compile default routes")
600    }
601
602    /// All default-rule tests use the `fbi.com` domain filter, which
603    /// is the way these rules are intended to be used (domain stripping
604    /// is handled by the filter; the rules then route the remaining
605    /// prefix).
606    fn m(routes: &[CompiledRoute], host: &str) -> Option<RouteHit> {
607        match_host_with_domain(routes, host, Some("fbi.com"))
608    }
609
610    #[test]
611    fn empty_routes_no_match() {
612        let hit = match_host(&[], "anything.fbi.com");
613        assert!(hit.is_none());
614    }
615
616    #[test]
617    fn port_as_host_matches() {
618        let routes = default_routes();
619        let hit = m(&routes, "3000.fbi.com").expect("should match");
620        assert_eq!(hit.route_name, "port-as-host");
621        assert_eq!(hit.target, "127.0.0.1:3000");
622        assert_eq!(hit.host_header, None);
623    }
624
625    #[test]
626    fn host_double_dash_port_matches() {
627        let routes = default_routes();
628        let hit = m(&routes, "api--3001.fbi.com").expect("should match");
629        assert_eq!(hit.route_name, "host-double-dash-port");
630        assert_eq!(hit.target, "api:3001");
631        assert_eq!(hit.host_header.as_deref(), Some("api"));
632    }
633
634    #[test]
635    fn subdomain_hoisting_matches() {
636        let routes = default_routes();
637        let hit = m(&routes, "admin.app.fbi.com").expect("should match");
638        assert_eq!(hit.route_name, "subdomain-hoisting");
639        assert_eq!(hit.target, "app:80");
640        assert_eq!(hit.host_header.as_deref(), Some("admin"));
641    }
642
643    #[test]
644    fn direct_forward_matches() {
645        let routes = default_routes();
646        let hit = m(&routes, "myserver.fbi.com").expect("should match");
647        assert_eq!(hit.route_name, "direct-forward");
648        assert_eq!(hit.target, "myserver:80");
649        assert_eq!(hit.host_header.as_deref(), Some("myserver"));
650    }
651
652    #[test]
653    fn port_in_host_is_stripped_before_match() {
654        let routes = default_routes();
655        let hit = m(&routes, "myserver.fbi.com:8080").expect("should match");
656        assert_eq!(hit.route_name, "direct-forward");
657        assert_eq!(hit.target, "myserver:80");
658    }
659
660    #[test]
661    fn trailing_slash_stripped() {
662        let routes = default_routes();
663        let hit = m(&routes, "3000.fbi.com/").expect("should match");
664        assert_eq!(hit.route_name, "port-as-host");
665    }
666
667    #[test]
668    fn host_header_is_case_insensitive() {
669        let routes = default_routes();
670        let hit = m(&routes, "API--3001.FBI.COM").expect("should match");
671        assert_eq!(hit.route_name, "host-double-dash-port");
672        assert_eq!(hit.target, "api:3001");
673    }
674
675    #[test]
676    fn multi_dot_subdomain_assigns_domain_greedily() {
677        // For `a.b.c.fbi.com` against `{prefix}.{host}.{domain}`, the
678        // regex anchors left-to-right: {prefix} and {host} each
679        // capture one dot-free segment, and {domain} (which has the
680        // special multi-dot fragment) captures the rest.
681        //
682        // So the match is: prefix=a, host=b, domain=c.fbi.com.
683        //
684        // This may or may not be what the user intends. Document this
685        // ambiguity: if the user wants `prefix=a.b.c, host=fbi,
686        // domain=com`, they need a different pattern (with explicit
687        // literals for the trailing domain).
688        let routes = default_routes();
689        let hit = match_host(&routes, "a.b.c.fbi.com").expect("should match");
690        assert_eq!(hit.route_name, "subdomain-hoisting");
691        // host=b, target={host}:80 = b:80
692        assert_eq!(hit.target, "b:80");
693        // Host header = {prefix} = "a"
694        assert_eq!(hit.host_header.as_deref(), Some("a"));
695    }
696
697    #[test]
698    fn multi_dot_subdomain_with_domain_filter_is_unambiguous() {
699        // When the caller passes the default-domain (`fbi.com`), the
700        // regex still matches the full host but {domain} is now
701        // constrained to exactly the trailing "fbi.com" suffix via
702        // the domain filter. Actually, the filter only validates the
703        // suffix — the regex itself is still greedy. But for the
704        // typical "this is my fbi-proxy domain" usage, the host shape
705        // is single-prefix.subdomain.{domain}, which works as
706        // expected.
707        let routes = default_routes();
708        // admin.app.fbi.com -> subdomain-hoisting (prefix=admin, host=app, domain=fbi.com)
709        let hit = match_host_with_domain(&routes, "admin.app.fbi.com", Some("fbi.com"))
710            .expect("should match");
711        assert_eq!(hit.route_name, "subdomain-hoisting");
712        assert_eq!(hit.target, "app:80");
713        assert_eq!(hit.host_header.as_deref(), Some("admin"));
714    }
715
716    #[test]
717    fn first_match_wins() {
718        let routes = compile(vec![
719            RouteConfig {
720                name: "first".into(),
721                r#match: "{x}.{y}".into(),
722                target: "first-target".into(),
723                headers: None,
724            },
725            RouteConfig {
726                name: "second".into(),
727                r#match: "{x}.{y}".into(),
728                target: "second-target".into(),
729                headers: None,
730            },
731        ])
732        .unwrap();
733        let hit = match_host(&routes, "a.b").expect("should match");
734        assert_eq!(hit.route_name, "first");
735        assert_eq!(hit.target, "first-target");
736    }
737
738    #[test]
739    fn unknown_placeholder_kind_errors() {
740        let err = compile(vec![RouteConfig {
741            name: "bad".into(),
742            r#match: "{port:zzz}.com".into(),
743            target: "x".into(),
744            headers: None,
745        }])
746        .unwrap_err();
747        match err {
748            CompileError::UnknownKind { kind, .. } => assert_eq!(kind, "zzz"),
749            e => panic!("expected UnknownKind, got {:?}", e),
750        }
751    }
752
753    #[test]
754    fn unbalanced_braces_in_pattern_errors() {
755        let err = compile(vec![RouteConfig {
756            name: "bad".into(),
757            r#match: "{port".into(),
758            target: "x".into(),
759            headers: None,
760        }])
761        .unwrap_err();
762        match err {
763            CompileError::UnbalancedBraces { location, .. } => {
764                assert!(location.contains("match"))
765            }
766            e => panic!("expected UnbalancedBraces, got {:?}", e),
767        }
768    }
769
770    #[test]
771    fn duplicate_placeholder_errors() {
772        let err = compile(vec![RouteConfig {
773            name: "bad".into(),
774            r#match: "{x}.{x}".into(),
775            target: "y".into(),
776            headers: None,
777        }])
778        .unwrap_err();
779        match err {
780            CompileError::DuplicatePlaceholder { name, .. } => assert_eq!(name, "x"),
781            e => panic!("expected DuplicatePlaceholder, got {:?}", e),
782        }
783    }
784
785    #[test]
786    fn undeclared_placeholder_in_target_errors() {
787        let err = compile(vec![RouteConfig {
788            name: "bad".into(),
789            r#match: "{x}.{y}".into(),
790            target: "{z}".into(),
791            headers: None,
792        }])
793        .unwrap_err();
794        match err {
795            CompileError::UndeclaredPlaceholder { name, location, .. } => {
796                assert_eq!(name, "z");
797                assert!(location.contains("target"));
798            }
799            e => panic!("expected UndeclaredPlaceholder, got {:?}", e),
800        }
801    }
802
803    #[test]
804    fn invalid_placeholder_name_errors() {
805        let err = compile(vec![RouteConfig {
806            name: "bad".into(),
807            r#match: "{1foo}".into(),
808            target: "x".into(),
809            headers: None,
810        }])
811        .unwrap_err();
812        match err {
813            CompileError::InvalidPlaceholder { .. } => {}
814            e => panic!("expected InvalidPlaceholder, got {:?}", e),
815        }
816    }
817
818    #[test]
819    fn int_kind_rejects_non_numeric() {
820        let routes = default_routes();
821        // "abc.fbi.com" should NOT match port-as-host (because abc isn't \d+),
822        // but should fall through to direct-forward.
823        let hit = m(&routes, "abc.fbi.com").expect("should match");
824        assert_eq!(hit.route_name, "direct-forward");
825        assert_eq!(hit.target, "abc:80");
826    }
827
828    #[test]
829    fn match_host_with_domain_filter_accepts_matching() {
830        let routes = default_routes();
831        let hit = match_host_with_domain(&routes, "3000.fbi.com", Some("fbi.com"))
832            .expect("should match");
833        assert_eq!(hit.route_name, "port-as-host");
834        assert_eq!(hit.target, "127.0.0.1:3000");
835    }
836
837    #[test]
838    fn match_host_with_domain_filter_rejects_non_matching() {
839        let routes = default_routes();
840        let hit = match_host_with_domain(&routes, "evil.example.com", Some("fbi.com"));
841        assert!(hit.is_none());
842    }
843
844    #[test]
845    fn match_host_with_multi_dot_domain() {
846        // The default-domain filter (`fbi.example.com`) only validates
847        // the suffix. The pattern itself still matches the full host,
848        // and {domain} naturally captures multi-segment trailing parts.
849        let routes = compile(vec![RouteConfig {
850            name: "direct".into(),
851            r#match: "{host}.{domain}".into(),
852            target: "{host}:80".into(),
853            headers: None,
854        }])
855        .unwrap();
856        let hit =
857            match_host_with_domain(&routes, "myserver.fbi.example.com", Some("fbi.example.com"))
858                .expect("should match");
859        assert_eq!(hit.target, "myserver:80");
860    }
861
862    #[test]
863    fn match_host_with_multi_dot_domain_rejects_wrong_suffix() {
864        let routes = compile(vec![RouteConfig {
865            name: "direct".into(),
866            r#match: "{host}.{domain}".into(),
867            target: "{host}:80".into(),
868            headers: None,
869        }])
870        .unwrap();
871        let hit = match_host_with_domain(&routes, "myserver.other.com", Some("fbi.example.com"));
872        assert!(hit.is_none());
873    }
874
875    #[test]
876    fn multi_kind_captures_multi_dot_segments() {
877        let routes = compile(vec![RouteConfig {
878            name: "dns-passthrough".into(),
879            r#match: "{upstream:multi}.fbi.com".into(),
880            target: "{upstream}:80".into(),
881            headers: None,
882        }])
883        .unwrap();
884
885        let hit = match_host(&routes, "github.com.fbi.com").unwrap();
886        assert_eq!(hit.target, "github.com:80");
887
888        let hit = match_host(&routes, "api.example.org.fbi.com").unwrap();
889        assert_eq!(hit.target, "api.example.org:80");
890
891        // Single segment still matches (one-or-more).
892        let hit = match_host(&routes, "single.fbi.com").unwrap();
893        assert_eq!(hit.target, "single:80");
894    }
895
896    #[test]
897    fn multi_kind_with_host_header_rewrite() {
898        let routes = compile(vec![RouteConfig {
899            name: "dns-with-host".into(),
900            r#match: "{upstream:multi}.fbi.com".into(),
901            target: "{upstream}:443".into(),
902            headers: Some(HashMap::from([("Host".into(), "{upstream}".into())])),
903        }])
904        .unwrap();
905        let hit = match_host(&routes, "api.example.com.fbi.com").unwrap();
906        assert_eq!(hit.target, "api.example.com:443");
907        assert_eq!(hit.host_header.as_deref(), Some("api.example.com"));
908    }
909
910    #[test]
911    fn multi_kind_with_routes_yaml() {
912        let yaml = r#"
913routes:
914  - name: dns-passthrough
915    match: "{upstream:multi}.{domain}"
916    target: "{upstream}:80"
917"#;
918        let parsed = parse_yaml(yaml).unwrap();
919        let routes = compile(parsed.routes).unwrap();
920        let hit = match_host(&routes, "github.com.fbi.com").unwrap();
921        assert_eq!(hit.target, "github.com:80");
922    }
923
924    #[test]
925    fn slug_kind_accepts_lowercase_and_dashes() {
926        let routes = compile(vec![RouteConfig {
927            name: "slugged".into(),
928            r#match: "{name:slug}.example".into(),
929            target: "{name}".into(),
930            headers: None,
931        }])
932        .unwrap();
933        assert!(match_host(&routes, "my-service.example").is_some());
934        // Uppercase normalized to lowercase by `normalize`, so it matches.
935        assert!(match_host(&routes, "MY-SERVICE.example").is_some());
936        // Underscores not allowed in slug.
937        assert!(match_host(&routes, "my_service.example").is_none());
938    }
939
940    #[test]
941    fn parse_yaml_default_routes() {
942        let yaml = r#"
943version: 1
944routes:
945  - name: port-as-host
946    match: "{port:int}.{domain}"
947    target: "127.0.0.1:{port}"
948  - name: direct-forward
949    match: "{host}.{domain}"
950    target: "{host}:80"
951    headers:
952      Host: "{host}"
953"#;
954        let parsed = parse_yaml(yaml).expect("yaml should parse");
955        assert_eq!(parsed.version, 1);
956        assert_eq!(parsed.routes.len(), 2);
957        assert_eq!(parsed.routes[0].name, "port-as-host");
958        assert_eq!(parsed.routes[0].r#match, "{port:int}.{domain}");
959        assert_eq!(parsed.routes[1].headers.as_ref().unwrap()["Host"], "{host}");
960
961        let compiled = compile(parsed.routes).unwrap();
962        let hit = match_host(&compiled, "3000.fbi.com").unwrap();
963        assert_eq!(hit.target, "127.0.0.1:3000");
964    }
965
966    #[test]
967    fn expand_passes_through_unknown_placeholders_silently() {
968        // expand() is internal but exercised here as a sanity check:
969        // a template referencing an unknown name returns the template
970        // minus the placeholder. (compile() rejects this, so users
971        // can't hit it; this just guards against panics in expand.)
972        let mut caps = HashMap::new();
973        caps.insert("a".to_string(), "X".to_string());
974        assert_eq!(expand("{a}-{b}", &caps), "X-");
975    }
976}