Skip to main content

keyhog_scanner/checksum/
gitlab.rs

1use super::{ChecksumResult, ChecksumValidator};
2
3/// Validates GitLab token structure.
4///
5/// GitLab PATs: classic tokens are `glpat-` + 20 base64url chars, but the
6/// routable tokens GitLab ships since 16.x (`glpat-`, `glrt-`, `glcbt-`, …)
7/// are LONGER and embed their own CRC32 in a base64-encoded trailer. This
8/// validator does structural checks only — it does not recompute the routable
9/// CRC — so it must not claim a token is fabricated merely because its length
10/// is not the classic 20: that false `Invalid` verdict makes the engine DROP
11/// every modern GitLab token (the `atlantis-credentials` /
12/// `gitlab-personal-access-token` contract regressions). The rule is:
13///   - body contains a char a GitLab token cannot                → `Invalid`
14///   - body is base64url-shaped and within the real-world length → `Valid`
15///   - anything else (too short / absurdly long to model)        → `NotApplicable`
16/// so we only ever DROP on a positively-malformed body, never on an
17/// unrecognised-but-plausible length.
18pub struct GitlabTokenValidator;
19
20/// Real-world GitLab token body lengths: classic PAT is 20; routable 16.x+
21/// tokens run longer (random + base64 CRC trailer). 64 is a generous ceiling
22/// that still rejects pathological inputs.
23const GITLAB_BODY_MIN: usize = 20;
24const GITLAB_BODY_MAX: usize = 64;
25
26fn gitlab_body_charset_ok(payload: &str) -> bool {
27    // base64url body chars, plus `.` — the single separator GitLab routable
28    // tokens (`glrt-t<n>_<body>.<suffix>`, and the routable `glpat-`/`glcbt-`
29    // variants) place between the encoded body and its short CRC suffix.
30    // Classic tokens never contain `.` (their detector regex forbids it), so
31    // admitting `.` here cannot turn a classic token Valid — it only stops the
32    // validator from false-dropping a legitimately `.`-bearing routable token.
33    payload
34        .chars()
35        .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.')
36}
37
38impl ChecksumValidator for GitlabTokenValidator {
39    fn validator_id(&self) -> &str {
40        "gitlab-token"
41    }
42
43    fn validate(&self, credential: &str) -> ChecksumResult {
44        if let Some(payload) = credential.strip_prefix("glpat-") {
45            if !gitlab_body_charset_ok(payload) {
46                return ChecksumResult::Invalid;
47            }
48            return match payload.len() {
49                // classic 20 .. routable-token band: structural pass.
50                GITLAB_BODY_MIN..=GITLAB_BODY_MAX => ChecksumResult::Valid,
51                // a `glpat-` prefix with fewer than 20 body chars cannot be any
52                // real GitLab token: fabricated/truncated -> drop.
53                n if n < GITLAB_BODY_MIN => ChecksumResult::Invalid,
54                // implausibly long: a format we don't model. Don't false-drop a
55                // possible future token shape; let entropy/other gates decide.
56                _ => ChecksumResult::NotApplicable,
57            };
58        }
59        if let Some(payload) = credential
60            .strip_prefix("glcbt-")
61            .or_else(|| credential.strip_prefix("glrt-"))
62        {
63            if !gitlab_body_charset_ok(payload) {
64                return ChecksumResult::Invalid;
65            }
66            // CI-build / runner tokens have no fixed classic length; 16 is the
67            // floor below which the body is too short to be real.
68            return match payload.len() {
69                16..=GITLAB_BODY_MAX => ChecksumResult::Valid,
70                n if n < 16 => ChecksumResult::Invalid,
71                _ => ChecksumResult::NotApplicable,
72            };
73        }
74        ChecksumResult::NotApplicable
75    }
76}