use crate::segment::{BuiltinSegment, CharsetName, MatchCapture, Segment};
use aho_corasick::AhoCorasick;
use std::sync::{Arc, LazyLock};
#[derive(Clone)]
pub(crate) struct StructuralDef {
pub(crate) identifier: String,
pub(crate) segments: Arc<[Segment]>,
}
#[cfg(test)]
impl StructuralDef {
pub(crate) fn try_match(&self, payload: &[u8], pos: usize) -> Option<MatchCapture> {
let mut variable_lengths = Vec::new();
let end = match_segments(payload, pos, &self.segments, &mut variable_lengths)?;
Some(MatchCapture {
end,
variable_lengths,
})
}
}
fn match_segments(
payload: &[u8],
cur: usize,
segs: &[Segment],
var_lens: &mut Vec<usize>,
) -> Option<usize> {
if segs.is_empty() {
return Some(cur);
}
match &segs[0] {
Segment::Literal(bytes) => {
let end = cur + bytes.len();
if payload.get(cur..end)? == bytes.as_slice() {
match_segments(payload, end, &segs[1..], var_lens)
} else {
None
}
}
Segment::Variable { charset, min, max } => {
let cs = charset.resolve();
for var_len in (*min..=*max).rev() {
let end = cur + var_len;
if end > payload.len() {
continue;
}
if payload[cur..end].iter().all(|&b| cs.contains(b)) {
let saved = var_lens.len();
var_lens.push(var_len);
if let Some(result) = match_segments(payload, end, &segs[1..], var_lens) {
return Some(result);
}
var_lens.truncate(saved);
}
}
None
}
Segment::Opaque { value, .. } => {
let end = cur + value.len();
if payload.get(cur..end)? == value.as_slice() {
match_segments(payload, end, &segs[1..], var_lens)
} else {
None
}
}
}
}
const ANTHROPIC_SEGS: [BuiltinSegment; 3] = [
BuiltinSegment::Literal(b"sk-ant-api03-"),
BuiltinSegment::Variable {
charset: CharsetName::UrlSafeBase64,
min: 93,
max: 93,
},
BuiltinSegment::Literal(b"AA"),
];
static ANTHROPIC_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "anthropic".into(),
segments: ANTHROPIC_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const OPENAI_CLASSIC_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"sk-"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 48,
max: 48,
},
];
static OPENAI_CLASSIC_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "openai_classic".into(),
segments: OPENAI_CLASSIC_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const OPENAI_PROJECT_SEGS: [BuiltinSegment; 4] = [
BuiltinSegment::Literal(b"sk-proj-"),
BuiltinSegment::Variable {
charset: CharsetName::UrlSafeBase64,
min: 58,
max: 74,
},
BuiltinSegment::Literal(b"T3BlbkFJ"),
BuiltinSegment::Variable {
charset: CharsetName::UrlSafeBase64,
min: 58,
max: 74,
},
];
static OPENAI_PROJECT_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "openai_project".into(),
segments: OPENAI_PROJECT_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const AWS_AKIA_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"AKIA"),
BuiltinSegment::Variable {
charset: CharsetName::UppercaseAlphanumeric,
min: 16,
max: 16,
},
];
static AWS_AKIA_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "aws_akia".into(),
segments: AWS_AKIA_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const AWS_ASIA_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"ASIA"),
BuiltinSegment::Variable {
charset: CharsetName::UppercaseAlphanumeric,
min: 16,
max: 16,
},
];
static AWS_ASIA_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "aws_asia".into(),
segments: AWS_ASIA_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const GITHUB_CLASSIC_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"ghp_"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 36,
max: 36,
},
];
static GITHUB_CLASSIC_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "github_classic".into(),
segments: GITHUB_CLASSIC_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const GITHUB_FG_SEGS: [BuiltinSegment; 4] = [
BuiltinSegment::Literal(b"github_pat_"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 22,
max: 22,
},
BuiltinSegment::Literal(b"_"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 59,
max: 59,
},
];
static GITHUB_FG_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "github_fine_grained".into(),
segments: GITHUB_FG_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const GCP_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"AIza"),
BuiltinSegment::Variable {
charset: CharsetName::UrlSafeBase64,
min: 35,
max: 35,
},
];
static GCP_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "gcp".into(),
segments: GCP_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const OPENROUTER_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"sk-or-v1-"),
BuiltinSegment::Variable {
charset: CharsetName::HexLower,
min: 64,
max: 64,
},
];
static OPENROUTER_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "openrouter".into(),
segments: OPENROUTER_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const OPENAI_SVCACCT_SEGS: [BuiltinSegment; 4] = [
BuiltinSegment::Literal(b"sk-svcacct-"),
BuiltinSegment::Variable {
charset: CharsetName::UrlSafeBase64,
min: 58,
max: 74,
},
BuiltinSegment::Literal(b"T3BlbkFJ"),
BuiltinSegment::Variable {
charset: CharsetName::UrlSafeBase64,
min: 58,
max: 74,
},
];
static OPENAI_SVCACCT_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "openai_svcacct".into(),
segments: OPENAI_SVCACCT_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const GOOGLE_OAUTH_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"GOCSPX-"),
BuiltinSegment::Variable {
charset: CharsetName::UrlSafeBase64,
min: 28,
max: 28,
},
];
static GOOGLE_OAUTH_SECRET_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "google_oauth_secret".into(),
segments: GOOGLE_OAUTH_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const SLACK_BOT_SEGS: [BuiltinSegment; 6] = [
BuiltinSegment::Literal(b"xoxb-"),
BuiltinSegment::Variable {
charset: CharsetName::Digits,
min: 10,
max: 13,
},
BuiltinSegment::Literal(b"-"),
BuiltinSegment::Variable {
charset: CharsetName::Digits,
min: 10,
max: 13,
},
BuiltinSegment::Literal(b"-"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 24,
max: 24,
},
];
static SLACK_BOT_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "slack_bot".into(),
segments: SLACK_BOT_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const ANTHROPIC_ADMIN01_SEGS: [BuiltinSegment; 3] = [
BuiltinSegment::Literal(b"sk-ant-admin01-"),
BuiltinSegment::Variable {
charset: CharsetName::UrlSafeBase64,
min: 93,
max: 93,
},
BuiltinSegment::Literal(b"AA"),
];
static ANTHROPIC_ADMIN01_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "anthropic_admin01".into(),
segments: ANTHROPIC_ADMIN01_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const ANTHROPIC_ADMIN03_SEGS: [BuiltinSegment; 3] = [
BuiltinSegment::Literal(b"sk-ant-admin03-"),
BuiltinSegment::Variable {
charset: CharsetName::UrlSafeBase64,
min: 93,
max: 93,
},
BuiltinSegment::Literal(b"AA"),
];
static ANTHROPIC_ADMIN03_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "anthropic_admin03".into(),
segments: ANTHROPIC_ADMIN03_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const LINEAR_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"lin_api_"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 40,
max: 40,
},
];
static LINEAR_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "linear".into(),
segments: LINEAR_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const GROQ_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"gsk_"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 52,
max: 52,
},
];
static GROQ_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "groq".into(),
segments: GROQ_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const PERPLEXITY_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"pplx-"),
BuiltinSegment::Variable {
charset: CharsetName::HexLower,
min: 48,
max: 48,
},
];
static PERPLEXITY_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "perplexity".into(),
segments: PERPLEXITY_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const CEREBRAS_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"csk-"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 48,
max: 48,
},
];
static CEREBRAS_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "cerebras".into(),
segments: CEREBRAS_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const STRIPE_LIVE_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"sk_live_"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 24,
max: 32,
},
];
static STRIPE_LIVE_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "stripe_live".into(),
segments: STRIPE_LIVE_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const STRIPE_TEST_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"sk_test_"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 24,
max: 32,
},
];
static STRIPE_TEST_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "stripe_test".into(),
segments: STRIPE_TEST_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const CLERK_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"sk_live_"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 45,
max: 55,
},
];
static CLERK_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "clerk".into(),
segments: CLERK_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const SVIX_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"svix_"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 30,
max: 50,
},
];
static SVIX_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "svix".into(),
segments: SVIX_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const CHROMATIC_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"chpt_"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 30,
max: 50,
},
];
static CHROMATIC_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "chromatic".into(),
segments: CHROMATIC_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const GITHUB_OAUTH_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"gho_"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 36,
max: 36,
},
];
static GITHUB_OAUTH_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "github_oauth".into(),
segments: GITHUB_OAUTH_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const GITHUB_APP_SERVER_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"ghs_"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 36,
max: 36,
},
];
static GITHUB_APP_SERVER_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "github_app_server".into(),
segments: GITHUB_APP_SERVER_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const GITHUB_APP_USER_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"ghu_"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 36,
max: 36,
},
];
static GITHUB_APP_USER_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "github_app_user".into(),
segments: GITHUB_APP_USER_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
const GITHUB_REFRESH_SEGS: [BuiltinSegment; 2] = [
BuiltinSegment::Literal(b"ghr_"),
BuiltinSegment::Variable {
charset: CharsetName::Alphanumeric,
min: 36,
max: 76,
},
];
static GITHUB_REFRESH_DEF: LazyLock<StructuralDef> = LazyLock::new(|| StructuralDef {
identifier: "github_refresh".into(),
segments: GITHUB_REFRESH_SEGS
.iter()
.map(Segment::from)
.collect::<Vec<_>>()
.into(),
});
static ALL_STRUCTURAL_DEFS: LazyLock<Vec<&'static StructuralDef>> = LazyLock::new(|| {
vec![
&*ANTHROPIC_DEF,
&*ANTHROPIC_ADMIN01_DEF,
&*ANTHROPIC_ADMIN03_DEF,
&*OPENAI_CLASSIC_DEF,
&*OPENAI_PROJECT_DEF,
&*OPENAI_SVCACCT_DEF,
&*AWS_AKIA_DEF,
&*AWS_ASIA_DEF,
&*GITHUB_CLASSIC_DEF,
&*GITHUB_FG_DEF,
&*GCP_DEF,
&*OPENROUTER_DEF,
&*GOOGLE_OAUTH_SECRET_DEF,
&*SLACK_BOT_DEF,
&*LINEAR_DEF,
&*GROQ_DEF,
&*PERPLEXITY_DEF,
&*CEREBRAS_DEF,
&*STRIPE_LIVE_DEF,
&*STRIPE_TEST_DEF,
&*CLERK_DEF,
&*SVIX_DEF,
&*CHROMATIC_DEF,
&*GITHUB_OAUTH_DEF,
&*GITHUB_APP_SERVER_DEF,
&*GITHUB_APP_USER_DEF,
&*GITHUB_REFRESH_DEF,
]
});
pub(crate) fn all_defs() -> &'static [&'static StructuralDef] {
&ALL_STRUCTURAL_DEFS
}
pub(crate) fn build_ac_automaton(patterns: &[Pattern]) -> AhoCorasick {
let prefixes: Vec<&[u8]> = patterns
.iter()
.filter_map(|p| p.first_segment_bytes())
.collect();
AhoCorasick::new(&prefixes).expect("AC build should not fail for valid patterns")
}
#[derive(Clone)]
pub struct Pattern {
pub(crate) identifier: String,
pub(crate) segments: Arc<[Segment]>,
pub(crate) salt: [u8; 32],
pub(crate) digests: Vec<[u8; 32]>,
}
impl Pattern {
pub fn id(&self) -> &str {
&self.identifier
}
pub(crate) fn first_segment_is_literal(&self) -> bool {
matches!(self.segments.first(), Some(Segment::Literal(_)))
}
pub(crate) fn first_segment_bytes(&self) -> Option<&[u8]> {
match self.segments.first() {
Some(Segment::Literal(bytes)) => Some(bytes),
Some(Segment::Opaque { value, .. }) => Some(value),
_ => None,
}
}
pub(crate) fn try_match(&self, payload: &[u8], pos: usize) -> Option<MatchCapture> {
let mut variable_lengths = Vec::new();
let end = match_segments(payload, pos, &self.segments, &mut variable_lengths)?;
if !self.digests.is_empty() {
let candidate = &payload[pos..end];
use subtle::{Choice, ConstantTimeEq};
let computed_hmac = crate::crypto::hmac_sha256(&self.salt, candidate);
let matches_any =
bool::from(self.digests.iter().fold(Choice::from(0u8), |acc, digest| {
acc | computed_hmac.ct_eq(digest)
}));
if !matches_any {
return None;
}
}
Some(MatchCapture {
end,
variable_lengths,
})
}
}
use rand::RngCore;
use rand::rngs::OsRng;
fn random_salt() -> [u8; 32] {
let mut salt = [0u8; 32];
OsRng.fill_bytes(&mut salt);
salt
}
pub fn anthropic() -> Pattern {
Pattern {
identifier: ANTHROPIC_DEF.identifier.clone(),
segments: ANTHROPIC_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn anthropic_admin01() -> Pattern {
Pattern {
identifier: ANTHROPIC_ADMIN01_DEF.identifier.clone(),
segments: ANTHROPIC_ADMIN01_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn anthropic_admin03() -> Pattern {
Pattern {
identifier: ANTHROPIC_ADMIN03_DEF.identifier.clone(),
segments: ANTHROPIC_ADMIN03_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn openai_classic() -> Pattern {
Pattern {
identifier: OPENAI_CLASSIC_DEF.identifier.clone(),
segments: OPENAI_CLASSIC_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn openai_project() -> Pattern {
Pattern {
identifier: OPENAI_PROJECT_DEF.identifier.clone(),
segments: OPENAI_PROJECT_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn openai_svcacct() -> Pattern {
Pattern {
identifier: OPENAI_SVCACCT_DEF.identifier.clone(),
segments: OPENAI_SVCACCT_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn aws_akia() -> Pattern {
Pattern {
identifier: AWS_AKIA_DEF.identifier.clone(),
segments: AWS_AKIA_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn aws_asia() -> Pattern {
Pattern {
identifier: AWS_ASIA_DEF.identifier.clone(),
segments: AWS_ASIA_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn github_classic() -> Pattern {
Pattern {
identifier: GITHUB_CLASSIC_DEF.identifier.clone(),
segments: GITHUB_CLASSIC_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn github_fine_grained() -> Pattern {
Pattern {
identifier: GITHUB_FG_DEF.identifier.clone(),
segments: GITHUB_FG_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn gcp() -> Pattern {
Pattern {
identifier: GCP_DEF.identifier.clone(),
segments: GCP_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn openrouter() -> Pattern {
Pattern {
identifier: OPENROUTER_DEF.identifier.clone(),
segments: OPENROUTER_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn google_oauth_secret() -> Pattern {
Pattern {
identifier: GOOGLE_OAUTH_SECRET_DEF.identifier.clone(),
segments: GOOGLE_OAUTH_SECRET_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn slack_bot() -> Pattern {
Pattern {
identifier: SLACK_BOT_DEF.identifier.clone(),
segments: SLACK_BOT_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn linear() -> Pattern {
Pattern {
identifier: LINEAR_DEF.identifier.clone(),
segments: LINEAR_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn groq() -> Pattern {
Pattern {
identifier: GROQ_DEF.identifier.clone(),
segments: GROQ_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn perplexity() -> Pattern {
Pattern {
identifier: PERPLEXITY_DEF.identifier.clone(),
segments: PERPLEXITY_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn cerebras() -> Pattern {
Pattern {
identifier: CEREBRAS_DEF.identifier.clone(),
segments: CEREBRAS_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn stripe_live() -> Pattern {
Pattern {
identifier: STRIPE_LIVE_DEF.identifier.clone(),
segments: STRIPE_LIVE_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn stripe_test() -> Pattern {
Pattern {
identifier: STRIPE_TEST_DEF.identifier.clone(),
segments: STRIPE_TEST_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn clerk() -> Pattern {
Pattern {
identifier: CLERK_DEF.identifier.clone(),
segments: CLERK_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn svix() -> Pattern {
Pattern {
identifier: SVIX_DEF.identifier.clone(),
segments: SVIX_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn chromatic() -> Pattern {
Pattern {
identifier: CHROMATIC_DEF.identifier.clone(),
segments: CHROMATIC_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn github_oauth() -> Pattern {
Pattern {
identifier: GITHUB_OAUTH_DEF.identifier.clone(),
segments: GITHUB_OAUTH_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn github_app_server() -> Pattern {
Pattern {
identifier: GITHUB_APP_SERVER_DEF.identifier.clone(),
segments: GITHUB_APP_SERVER_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn github_app_user() -> Pattern {
Pattern {
identifier: GITHUB_APP_USER_DEF.identifier.clone(),
segments: GITHUB_APP_USER_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn github_refresh() -> Pattern {
Pattern {
identifier: GITHUB_REFRESH_DEF.identifier.clone(),
segments: GITHUB_REFRESH_DEF.segments.clone(),
salt: random_salt(),
digests: vec![],
}
}
pub fn all() -> Vec<Pattern> {
vec![
anthropic(),
anthropic_admin01(),
anthropic_admin03(),
openai_classic(),
openai_project(),
openai_svcacct(),
aws_akia(),
aws_asia(),
github_classic(),
github_fine_grained(),
gcp(),
openrouter(),
google_oauth_secret(),
slack_bot(),
linear(),
groq(),
perplexity(),
cerebras(),
stripe_live(),
stripe_test(),
clerk(),
svix(),
chromatic(),
github_oauth(),
github_app_server(),
github_app_user(),
github_refresh(),
]
}
#[cfg(test)]
mod tests {
use super::*;
use crate::segment::Segment;
#[test]
fn test_structural_all_classes_present() {
let all = all();
let leading_lits: Vec<&[u8]> = all
.iter()
.filter_map(|p| match p.segments.first() {
Some(Segment::Literal(b)) => Some(b.as_slice()),
_ => None,
})
.collect();
for expected in &[
b"sk-ant-api03-".as_slice(),
b"sk-ant-admin01-",
b"sk-ant-admin03-",
b"sk-",
b"sk-proj-",
b"sk-svcacct-",
b"AKIA",
b"ASIA",
b"ghp_",
b"github_pat_",
b"AIza",
b"sk-or-v1-",
b"GOCSPX-",
b"xoxb-",
b"lin_api_",
b"gsk_",
b"pplx-",
b"csk-",
b"sk_live_",
b"sk_test_",
b"svix_",
b"chpt_",
b"gho_",
b"ghs_",
b"ghu_",
b"ghr_",
] {
assert!(
leading_lits.contains(expected),
"missing leading literal: {}",
std::str::from_utf8(expected).unwrap()
);
}
}
#[test]
fn test_aws_akia_try_match_returns_capture() {
let payload = b"access_key: AKIAIOSFODNN7EXAMPLE";
let akia_pos = payload.windows(4).position(|w| w == b"AKIA").unwrap();
let cap = AWS_AKIA_DEF.try_match(payload, akia_pos).unwrap();
assert_eq!(cap.end, akia_pos + 20);
assert_eq!(cap.variable_lengths, vec![16]);
}
#[test]
fn test_gcp_key_try_match_returns_capture() {
let payload = b"AIzaSyD-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
let cap = GCP_DEF.try_match(payload, 0).unwrap();
assert_eq!(cap.end, 39);
assert_eq!(cap.variable_lengths, vec![35]);
}
#[test]
fn test_anthropic_suffix_aa_enforced() {
let good = b"sk-ant-api03-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
assert!(ANTHROPIC_DEF.try_match(good, 0).is_some());
let mut bad = good.to_vec();
let len = bad.len();
bad[len - 2] = b'B';
bad[len - 1] = b'B';
assert!(ANTHROPIC_DEF.try_match(&bad, 0).is_none());
}
#[test]
fn test_openai_project_requires_t3blbkfj() {
let good: Vec<u8> = b"sk-proj-"
.iter()
.chain(b"B".repeat(58).iter())
.chain(b"T3BlbkFJ".iter())
.chain(b"B".repeat(58).iter())
.copied()
.collect();
assert!(OPENAI_PROJECT_DEF.try_match(&good, 0).is_some());
let bad: Vec<u8> = b"sk-proj-"
.iter()
.chain(b"B".repeat(124).iter())
.copied()
.collect();
assert!(OPENAI_PROJECT_DEF.try_match(&bad, 0).is_none());
}
#[test]
fn test_github_fg_requires_underscore_separator() {
let good: Vec<u8> = b"github_pat_"
.iter()
.chain(b"A".repeat(22).iter())
.chain(b"_".iter())
.chain(b"B".repeat(59).iter())
.copied()
.collect();
assert!(GITHUB_FG_DEF.try_match(&good, 0).is_some());
let bad: Vec<u8> = b"github_pat_"
.iter()
.chain(b"A".repeat(82).iter())
.copied()
.collect();
assert!(GITHUB_FG_DEF.try_match(&bad, 0).is_none());
}
#[test]
fn test_slack_requires_digit_segments() {
let good = b"xoxb-1234567890-1234567890-AAAAAAAAAAAAAAAAAAAAAAAA";
assert!(SLACK_BOT_DEF.try_match(good, 0).is_some());
let bad = b"xoxb-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAAAAAAAAAAAAAAAA";
assert!(SLACK_BOT_DEF.try_match(bad, 0).is_none());
}
#[test]
fn test_prefix_mismatch_returns_none() {
let payload = b"not-a-key";
assert!(ANTHROPIC_DEF.try_match(payload, 0).is_none());
}
#[test]
fn test_all_defs_identifiers_unique() {
let defs = all_defs();
assert_eq!(defs.len(), 27, "must have 27 built-in structural defs");
let mut ids: Vec<&str> = defs.iter().map(|d| d.identifier.as_str()).collect();
ids.sort();
ids.dedup();
assert_eq!(ids.len(), 27, "all identifiers must be unique");
}
#[test]
fn test_all_defs_matches_all_patterns() {
let defs = all_defs();
let all = all();
assert_eq!(
defs.len(),
all.len(),
"all_defs and patterns::all must have same count"
);
for def in defs {
assert!(
all.iter().any(|p| p.identifier == def.identifier),
"all_defs entry {} must appear in patterns::all()",
def.identifier
);
}
}
}