Skip to main content

rustio_admin/middleware/
locale.rs

1//! Per-request negotiated locale.
2//!
3//! Reads the inbound `Accept-Language` header, picks the operator's
4//! first preferred language tag, and stashes it in the request
5//! context so future code (the upcoming message-catalog surface;
6//! see `ROADMAP.md` § Internationalization & RTL) can render
7//! framework strings in that locale.
8//!
9//! ## Scope of v1
10//!
11//! This middleware is the *foundation* for locale-aware rendering.
12//! It does not translate anything yet — there is no message catalog
13//! shipped today. Adding `.middleware(middleware::locale)` to a
14//! project's router is free: it parses the header, stashes the
15//! result, and otherwise leaves rendering unchanged. When the
16//! catalog lands, templates / handlers read the locale via
17//! `req.ctx().get::<Locale>()` and pick the matching message.
18//!
19//! ## Placement in the middleware chain
20//!
21//! Order is irrelevant for locale negotiation — the value is read
22//! from the inbound header, not produced by any other middleware,
23//! and not consumed until a handler renders. Drop it anywhere
24//! after `logger` for cleanliness. The framework does not require
25//! this middleware to be registered; project apps that never
26//! consume the locale are free to leave it out.
27//!
28//! ## Parser scope
29//!
30//! [`parse_accept_language`] returns the **first** valid language
31//! tag in the header. Real `Accept-Language` headers carry quality
32//! values (`en-US,en;q=0.9,fr;q=0.8`) and the RFC 9110 negotiation
33//! rule sorts by `q` descending. The simplification is intentional:
34//! every browser sends the preferred locale **first** in practice,
35//! so first-wins matches client intent without the cost of a
36//! sorted-vector traversal on every request. When the message
37//! catalog ships, we can revisit if data shows real clients hitting
38//! the edge case.
39//!
40//! Tag shape is validated lightly — ASCII letters, digits, and `-`
41//! only, length-capped at 16 characters — so a malicious sender
42//! can't smuggle control bytes into a downstream consumer that
43//! later interpolates the value into a path or filename.
44//!
45//! ## Reading the locale from a handler
46//!
47//! ```ignore
48//! use rustio_admin::middleware::Locale;
49//!
50//! let locale = req
51//!     .ctx()
52//!     .get::<Locale>()
53//!     .map(|l| l.as_str().to_string())
54//!     .unwrap_or_else(|| "en".to_string());
55//! ```
56
57use crate::error::Result;
58use crate::http::{Request, Response};
59use crate::router::Next;
60
61/// Header the middleware reads. Lower-case matches HTTP/2 wire
62/// format and what every browser sends in practice.
63const ACCEPT_LANGUAGE_HEADER: &str = "accept-language";
64
65/// Fallback locale when no header is sent, the header parses to
66/// nothing valid, or the project app didn't mount the middleware.
67/// `"en"` matches the framework's existing default strings (every
68/// label and button is currently English).
69pub const DEFAULT_LOCALE: &str = "en";
70
71/// Cap on the locale tag length kept after parsing. Real tags fit
72/// well under this (`en-US-x-foo-bar` is 15); the limit defends
73/// downstream consumers from adversarial inputs without rejecting
74/// any realistic locale.
75const MAX_TAG_LEN: usize = 16;
76
77// public:
78/// Wrapper carried in the request context so handlers can pull
79/// the negotiated locale out via `req.ctx().get::<Locale>()`. The
80/// inner string is the tag as it appeared in `Accept-Language`
81/// (e.g. `"en"`, `"en-US"`, `"ar-EG"`), already validated.
82#[derive(Debug, Clone)]
83pub struct Locale(pub String);
84
85impl Locale {
86    // public:
87    /// Borrow the underlying tag string.
88    pub fn as_str(&self) -> &str {
89        &self.0
90    }
91}
92
93// public:
94/// Middleware: parse the inbound `Accept-Language` header and
95/// stash the negotiated locale in the request context. Falls back
96/// to [`DEFAULT_LOCALE`] when the header is absent or contains no
97/// valid tag. Never fails — locale negotiation is best-effort by
98/// design, and a missing or malformed header collapses to the
99/// default rather than rejecting the request.
100pub async fn locale(mut req: Request, next: Next) -> Result<Response> {
101    let tag = parse_accept_language(req.header(ACCEPT_LANGUAGE_HEADER))
102        .unwrap_or_else(|| DEFAULT_LOCALE.to_string());
103    req.ctx_mut().insert(Locale(tag));
104    next.run(req).await
105}
106
107/// Parse an `Accept-Language` header value, returning the first
108/// language tag that passes the validator. Pulled out as a free
109/// function so the negotiation policy is unit-testable without a
110/// `Request`.
111///
112/// Returns:
113///
114/// - `Some(tag)` when the header contains at least one valid tag.
115///   Validation: ASCII letters / digits / hyphens only, length
116///   1..=[`MAX_TAG_LEN`].
117/// - `None` when the header is missing, empty after trimming, or
118///   contains no valid tag.
119pub fn parse_accept_language(header: Option<&str>) -> Option<String> {
120    let raw = header?.trim();
121    if raw.is_empty() {
122        return None;
123    }
124    for segment in raw.split(',') {
125        // Strip the `;q=…` weight if present; we don't sort by
126        // q-value (see module docs for the rationale).
127        let tag = segment.split(';').next().unwrap_or(segment).trim();
128        if is_valid_tag(tag) {
129            return Some(tag.to_string());
130        }
131    }
132    None
133}
134
135/// `true` when `tag` looks like a well-formed language tag.
136/// Conservative — rejects empty strings, oversized inputs, and
137/// anything outside ASCII letters / digits / hyphens.
138fn is_valid_tag(tag: &str) -> bool {
139    if tag.is_empty() || tag.len() > MAX_TAG_LEN {
140        return false;
141    }
142    tag.chars().all(|c| c.is_ascii_alphanumeric() || c == '-')
143}
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148
149    #[test]
150    fn missing_header_returns_none() {
151        assert!(parse_accept_language(None).is_none());
152    }
153
154    #[test]
155    fn empty_header_returns_none() {
156        assert!(parse_accept_language(Some("")).is_none());
157        assert!(parse_accept_language(Some("   ")).is_none());
158    }
159
160    #[test]
161    fn single_tag_returned_as_is() {
162        assert_eq!(parse_accept_language(Some("en")), Some("en".to_string()));
163        assert_eq!(
164            parse_accept_language(Some("en-US")),
165            Some("en-US".to_string())
166        );
167    }
168
169    #[test]
170    fn first_tag_wins_in_comma_separated_list() {
171        // Realistic browser header — preferred locale comes first.
172        let header = "en-US,en;q=0.9,fr;q=0.8,*;q=0.5";
173        assert_eq!(
174            parse_accept_language(Some(header)),
175            Some("en-US".to_string())
176        );
177    }
178
179    #[test]
180    fn q_value_is_stripped_from_the_tag() {
181        // The `;q=N` weight must not bleed into the stored tag.
182        assert_eq!(
183            parse_accept_language(Some("ar;q=0.9")),
184            Some("ar".to_string())
185        );
186    }
187
188    #[test]
189    fn invalid_first_tag_falls_through_to_next() {
190        // `*` is a wildcard, not a tag; a tag with control bytes
191        // is rejected. The parser skips both and finds `de`.
192        let header = "*,en\nbad,de;q=0.8";
193        assert_eq!(parse_accept_language(Some(header)), Some("de".to_string()));
194    }
195
196    #[test]
197    fn oversized_tag_is_rejected() {
198        // A pathological header that would smuggle in a long
199        // attacker-controlled string. Must collapse to None when
200        // it is the only segment.
201        let evil = "a".repeat(50);
202        assert!(parse_accept_language(Some(&evil)).is_none());
203    }
204
205    #[test]
206    fn header_with_only_invalid_tags_returns_none() {
207        assert!(parse_accept_language(Some("*,?,!!!")).is_none());
208        assert!(parse_accept_language(Some(",,,")).is_none());
209    }
210
211    #[test]
212    fn whitespace_between_tags_is_tolerated() {
213        // RFC permits optional whitespace after commas;
214        // real-world Firefox + Safari both emit it.
215        assert_eq!(
216            parse_accept_language(Some("en-US, en;q=0.9, fr;q=0.8")),
217            Some("en-US".to_string())
218        );
219    }
220
221    #[test]
222    fn arabic_and_cjk_tags_are_accepted() {
223        // The framework already self-hosts Tajawal + Noto Naskh
224        // Arabic; the locale middleware must accept the tag those
225        // fonts target.
226        assert_eq!(
227            parse_accept_language(Some("ar-EG,ar;q=0.9,en;q=0.6")),
228            Some("ar-EG".to_string())
229        );
230        assert_eq!(
231            parse_accept_language(Some("zh-Hans-CN,zh;q=0.9")),
232            Some("zh-Hans-CN".to_string())
233        );
234    }
235
236    #[test]
237    fn locale_as_str_round_trips() {
238        let l = Locale("en-US".to_string());
239        assert_eq!(l.as_str(), "en-US");
240    }
241}