Skip to main content

keyhog_scanner/
jwt.rs

1//! JWT structural validation.
2//!
3//! A bare JWT regex (three base64url segments separated by dots) catches an
4//! enormous number of false positives - Etag headers, hash digests, opaque
5//! session IDs, tracking pixels, etc. This module decodes the header +
6//! payload and validates the JWT shape (`alg`/`typ`/`exp`) so we can:
7//!
8//!   1. Boost confidence on credentials that ARE real JWTs (correctly
9//!      structured header + valid algorithm).
10//!   2. Suppress credentials that LOOK like JWTs but aren't (random base64,
11//!      malformed header).
12//!   3. Surface metadata: `alg`, `iss`, `sub`, `aud`, `exp` as evidence in
13//!      the finding output, helping responders rotate the right credential.
14//!   4. Flag `alg=none` JWTs as a SECURITY ANOMALY - these are unsigned,
15//!      forgeable, and almost always indicate a misconfiguration or active
16//!      attack.
17
18#![deny(unsafe_code)]
19
20use serde::Deserialize;
21use std::collections::BTreeMap;
22
23/// Result of a JWT structural check.
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct JwtAnalysis {
26    /// Header `alg` field (e.g. `RS256`, `HS256`, `none`).
27    pub alg: String,
28    /// Header `typ` field when present (typically `JWT` or `at+jwt`).
29    pub typ: Option<String>,
30    /// Header `kid` field - useful for key-rotation forensics.
31    pub kid: Option<String>,
32    /// Payload `iss` claim - surfaces the issuer service.
33    pub iss: Option<String>,
34    /// Payload `sub` claim - subject (user/service identifier).
35    pub sub: Option<String>,
36    /// Payload `aud` claim - single audience or comma-joined list.
37    pub aud: Option<String>,
38    /// Payload `exp` claim, if numeric.
39    pub exp: Option<i64>,
40    /// Whether the JWT has expired relative to `Instant::now`.
41    pub expired: Option<bool>,
42    /// Anomalies detected during analysis. Non-empty implies a suspicious
43    /// JWT that warrants higher reporting severity.
44    pub anomalies: Vec<JwtAnomaly>,
45}
46
47#[derive(Debug, Clone, PartialEq, Eq)]
48#[non_exhaustive]
49pub enum JwtAnomaly {
50    /// `alg = "none"` - unsigned token. Should never appear in production
51    /// credentials; almost always a misconfiguration or active forgery
52    /// attack. RFC 7519 §6 calls this out as risky.
53    AlgNone,
54    /// Algorithm not on the standard registry list. Legitimate JWTs use a
55    /// well-known algorithm (RS256, HS256, ES256, …); custom values are
56    /// rare and frequently indicate fake / handcrafted tokens.
57    UnknownAlg(String),
58    /// `typ` present but not in the standard set (`JWT`, `at+jwt`, `id+jwt`,
59    /// `dpop+jwt`).
60    NonStandardTyp(String),
61    /// Token already expired.
62    Expired,
63}
64
65/// Render anomalies into a `metadata` map suitable for SARIF properties or
66/// the text reporter. Returns `None` when there are no anomalies.
67pub fn anomalies_to_metadata(analysis: &JwtAnalysis) -> Option<BTreeMap<String, String>> {
68    if analysis.anomalies.is_empty() {
69        return None;
70    }
71    let mut out = BTreeMap::new();
72    for anomaly in &analysis.anomalies {
73        match anomaly {
74            JwtAnomaly::AlgNone => {
75                out.insert(
76                    "jwt.alg_none".to_string(),
77                    "true (unsigned token: RFC 7519 §6 risk)".to_string(),
78                );
79            }
80            JwtAnomaly::UnknownAlg(alg) => {
81                out.insert("jwt.unknown_alg".to_string(), alg.clone());
82            }
83            JwtAnomaly::NonStandardTyp(typ) => {
84                out.insert("jwt.non_standard_typ".to_string(), typ.clone());
85            }
86            JwtAnomaly::Expired => {
87                out.insert("jwt.expired".to_string(), "true".to_string());
88            }
89        }
90    }
91    Some(out)
92}
93
94/// Wire the structural analysis of `credential` into a finding's `metadata`
95/// map. Returns `None` when `credential` is not a parseable JWT (so non-JWT
96/// findings carry no JWT keys); otherwise returns the claim evidence the
97/// module doc promises - `jwt.alg`, and any of `jwt.iss` / `jwt.sub` /
98/// `jwt.aud` / `jwt.exp` that are present - PLUS every anomaly key from
99/// [`anomalies_to_metadata`] (notably `jwt.alg_none` for an unsigned forgery).
100///
101/// This is the single, shared bridge between the fully-built [`analyze`] and
102/// the scan output: the in-process finalize, the verify skip branch, and the
103/// daemon-route finalize all call it, so the JWT evidence reaches the operator
104/// regardless of route (no `jwt.alg_none` divergence between in-process and
105/// daemon). The keys use a `String`/`String` shape so a `VerifiedFinding`'s
106/// `HashMap<String, String>` metadata can absorb them directly.
107pub fn finding_metadata(credential: &str) -> Option<std::collections::HashMap<String, String>> {
108    let analysis = analyze(credential)?;
109    let mut meta = std::collections::HashMap::new();
110
111    // The algorithm is the primary structural evidence and is always present
112    // (`analyze` substitutes `<missing>` when the header omits it), so surface
113    // it unconditionally for any real JWT.
114    meta.insert("jwt.alg".to_string(), analysis.alg.clone());
115    if let Some(iss) = &analysis.iss {
116        meta.insert("jwt.iss".to_string(), iss.clone());
117    }
118    if let Some(sub) = &analysis.sub {
119        meta.insert("jwt.sub".to_string(), sub.clone());
120    }
121    if let Some(aud) = &analysis.aud {
122        meta.insert("jwt.aud".to_string(), aud.clone());
123    }
124    if let Some(exp) = analysis.exp {
125        meta.insert("jwt.exp".to_string(), exp.to_string());
126    }
127
128    // Anomaly keys (jwt.alg_none / jwt.unknown_alg / jwt.non_standard_typ /
129    // jwt.expired). The dedicated `alg=none` key is the load-bearing security
130    // signal: an unsigned, trivially forgeable token.
131    if let Some(anomalies) = anomalies_to_metadata(&analysis) {
132        for (k, v) in anomalies {
133            meta.insert(k, v);
134        }
135    }
136
137    Some(meta)
138}
139
140/// Returns `true` when `s` looks like a JWT (three base64url segments).
141/// Cheap shape check - does NOT decode.
142pub fn looks_like_jwt(s: &str) -> bool {
143    let s = s.trim();
144    const MAX_JWT_SEGMENT_LEN: usize = 16 * 1024; // 16KB limit per segment
145
146    let mut parts = s.split('.');
147    let (Some(h), Some(p), Some(sig), None) =
148        (parts.next(), parts.next(), parts.next(), parts.next())
149    else {
150        return false;
151    };
152
153    // Length gate to prevent quadratic DoS on pathological inputs (millions of dots)
154    if h.len() > MAX_JWT_SEGMENT_LEN
155        || p.len() > MAX_JWT_SEGMENT_LEN
156        || sig.len() > MAX_JWT_SEGMENT_LEN
157    {
158        return false;
159    }
160
161    !h.is_empty()
162        && !p.is_empty()
163        && !sig.is_empty()
164        && h.bytes().all(is_base64url_byte)
165        && p.bytes().all(is_base64url_byte)
166        && sig.bytes().all(is_base64url_byte)
167}
168
169/// Full structural analysis. Returns `None` if `s` is not a parseable JWT
170/// (missing dots, non-base64url header/payload, malformed JSON inside).
171///
172/// Signature verification is intentionally NOT performed - that requires
173/// the issuer's public key, which we don't have. Structural validation is
174/// the high-recall layer; the verifier crate handles cryptographic checks
175/// for services that expose them.
176pub fn analyze(s: &str) -> Option<JwtAnalysis> {
177    let s = s.trim();
178    if !looks_like_jwt(s) {
179        return None;
180    }
181    let mut parts = s.split('.');
182    let header_b64 = parts.next()?;
183    let payload_b64 = parts.next()?;
184    // We don't read the signature segment beyond the shape check.
185    let _signature_b64 = parts.next()?;
186
187    let header_json = decode_b64url(header_b64)?;
188    let payload_json = decode_b64url(payload_b64)?;
189
190    if !check_nesting_depth(&header_json, 15) || !check_nesting_depth(&payload_json, 15) {
191        return None;
192    }
193
194    let header: JwtHeader = serde_json::from_slice(&header_json).ok()?;
195    let mut payload: JwtPayload = serde_json::from_slice(&payload_json).ok()?;
196    let aud = payload.take_aud();
197    let iss = payload.iss.take();
198    let sub = payload.sub.take();
199
200    let mut anomalies = Vec::new();
201
202    let alg = header.alg.unwrap_or_else(|| "<missing>".to_string());
203    if alg.eq_ignore_ascii_case("none") {
204        anomalies.push(JwtAnomaly::AlgNone);
205    } else if !is_known_alg(&alg) {
206        anomalies.push(JwtAnomaly::UnknownAlg(alg.clone()));
207    }
208
209    if let Some(typ) = header.typ.as_deref() {
210        if !is_standard_typ(typ) {
211            anomalies.push(JwtAnomaly::NonStandardTyp(typ.to_string()));
212        }
213    }
214
215    let exp_val = payload.exp.take();
216    let exp = exp_val.and_then(|v| match v {
217        serde_json::Value::Number(n) => n.as_i64(),
218        _ => None,
219    });
220
221    let expired = exp.map(|exp_val| {
222        let now = std::time::SystemTime::now()
223            .duration_since(std::time::UNIX_EPOCH)
224            .map(|d| d.as_secs() as i64)
225            .unwrap_or(0);
226        let is_expired = now >= exp_val;
227        if is_expired {
228            anomalies.push(JwtAnomaly::Expired);
229        }
230        is_expired
231    });
232
233    Some(JwtAnalysis {
234        alg,
235        typ: header.typ,
236        kid: header.kid,
237        iss,
238        sub,
239        aud,
240        exp,
241        expired,
242        anomalies,
243    })
244}
245
246#[inline]
247fn is_base64url_byte(b: u8) -> bool {
248    b.is_ascii_alphanumeric() || b == b'-' || b == b'_' || b == b'='
249}
250
251fn decode_b64url(s: &str) -> Option<Vec<u8>> {
252    use base64::engine::general_purpose::URL_SAFE_NO_PAD;
253    use base64::Engine;
254    // Strip any padding the input might have (base64url is unpadded by spec).
255    let trimmed = s.trim_end_matches('=');
256    URL_SAFE_NO_PAD.decode(trimmed).ok()
257}
258
259fn is_known_alg(alg: &str) -> bool {
260    matches!(
261        alg,
262        "RS256"
263            | "RS384"
264            | "RS512"
265            | "HS256"
266            | "HS384"
267            | "HS512"
268            | "ES256"
269            | "ES384"
270            | "ES512"
271            | "ES256K"
272            | "PS256"
273            | "PS384"
274            | "PS512"
275            | "EdDSA"
276    )
277}
278
279fn is_standard_typ(typ: &str) -> bool {
280    matches!(typ, "JWT" | "at+jwt" | "id+jwt" | "dpop+jwt" | "logout+jwt")
281}
282
283#[derive(Deserialize)]
284struct JwtHeader {
285    alg: Option<String>,
286    typ: Option<String>,
287    kid: Option<String>,
288}
289
290#[derive(Deserialize)]
291struct JwtPayload {
292    iss: Option<String>,
293    sub: Option<String>,
294    #[serde(default)]
295    aud: serde_json::Value,
296    exp: Option<serde_json::Value>,
297}
298
299impl JwtPayload {
300    fn take_aud(&mut self) -> Option<String> {
301        match std::mem::take(&mut self.aud) {
302            serde_json::Value::String(s) if !s.is_empty() => Some(s),
303            serde_json::Value::Array(items) if !items.is_empty() => {
304                let joined: Vec<String> = items
305                    .into_iter()
306                    .filter_map(|v| match v {
307                        serde_json::Value::String(s) => Some(s),
308                        _ => None,
309                    })
310                    .collect();
311                if joined.is_empty() {
312                    None
313                } else {
314                    Some(joined.join(","))
315                }
316            }
317            _ => None,
318        }
319    }
320}
321
322fn check_nesting_depth(json: &[u8], max_depth: usize) -> bool {
323    let mut depth = 0;
324    let mut in_string = false;
325    let mut escaped = false;
326    for &b in json {
327        if escaped {
328            escaped = false;
329            continue;
330        }
331        if b == b'\\' {
332            if in_string {
333                escaped = true;
334            }
335            continue;
336        }
337        if b == b'"' {
338            in_string = !in_string;
339            continue;
340        }
341        if !in_string {
342            if b == b'{' || b == b'[' {
343                depth += 1;
344                if depth > max_depth {
345                    return false;
346                }
347            } else if b == b'}' || b == b']' {
348                depth = depth.saturating_sub(1);
349            }
350        }
351    }
352    true
353}