#![allow(dead_code)]
use regex::{Captures, Regex};
#[derive(Debug, Clone)]
pub struct PatternPart {
pub name: &'static str,
pub description: &'static str,
pub pattern: &'static str,
}
impl PatternPart {
pub const fn new(name: &'static str, description: &'static str, pattern: &'static str) -> Self {
Self {
name,
description,
pattern,
}
}
}
#[derive(Debug, Clone)]
pub struct ModelMatch<'a> {
pub input: &'a str,
pub matched_text: &'a str,
pub part: &'static PatternPart,
}
#[derive(Debug, Clone)]
pub struct ModelRegex {
regex: Regex,
pattern: String,
}
fn model_match_from_captures<'a>(captures: Captures<'a>, input: &'a str) -> Option<ModelMatch<'a>> {
let whole = captures.get(0)?;
for part in ModelRegex::parts() {
if captures.name(part.name).is_some() {
return Some(ModelMatch {
input,
matched_text: whole.as_str(),
part,
});
}
}
None
}
impl ModelRegex {
pub fn new() -> Result<Self, regex::Error> {
let pattern = Self::build_pattern();
let regex = Regex::new(&pattern)?;
Ok(Self { regex, pattern })
}
pub fn pattern(&self) -> &str {
&self.pattern
}
pub fn parts() -> &'static [PatternPart] {
&MODEL_PATTERN_PARTS
}
pub fn explain() -> String {
Self::parts()
.iter()
.map(|part| {
format!(
"## {}\n{}\n\nRegex:\n{}\n",
part.name, part.description, part.pattern
)
})
.collect::<Vec<_>>()
.join("\n")
}
pub fn is_match(&self, input: &str) -> bool {
self.regex.is_match(input)
}
pub fn explain_match<'a>(&self, input: &'a str) -> Option<ModelMatch<'a>> {
model_match_from_captures(self.regex.captures(input)?, input)
}
pub fn explain_matches<'a>(&self, input: &'a str) -> Vec<ModelMatch<'a>> {
self.regex
.captures_iter(input)
.filter_map(|caps| model_match_from_captures(caps, input))
.collect()
}
fn build_pattern() -> String {
let alternatives = Self::parts()
.iter()
.map(|part| format!("(?P<{}>{})", part.name, part.pattern))
.collect::<Vec<_>>()
.join("|");
format!(r"\b(?:{})\b", alternatives)
}
}
static MODEL_PATTERN_PARTS: [PatternPart; 8] = [
PatternPart::new(
"claude_instant",
"Matches Claude Instant models like `claude-instant-1`, `claude-instant-1.2`, or `claude-instant-1-100k`.",
r"claude-instant-\d+(?:\.\d+)?(?:-\d+k)?",
),
PatternPart::new(
"claude_numbered_family",
"Matches numbered Claude family models like `claude-3-sonnet`, `claude-3-5-haiku`, `claude-3-7-sonnet`, or `claude-4-opus-20241022-v2`.",
r"claude-[234](?:-[357])?-(?:haiku|sonnet|opus)(?:-\d{8})?(?:-v\d+)?",
),
PatternPart::new(
"claude_family_first",
"Matches Claude models where the family name comes before the version, like `claude-haiku`, `claude-haiku-3-5`, `claude-sonnet-3-7`, or `claude-opus-4`.",
r"claude-(?:haiku|sonnet|opus)(?:-\d(?:-\d)?)?(?:-\d{8})?(?:-v\d+)?",
),
PatternPart::new(
"gemini",
"Matches Gemini models like `gemini-1.5-flash`, `gemini-2.0-pro`, `gemini-2.5-flash-preview-05-20`, or similar dated/preview variants.",
r"gemini-\d+(?:\.\d+)?-(?:flash|pro)(?:-[a-z0-9]+)*(?:-\d{2}-\d{2}|-\d{3}|-\d{4})?",
),
PatternPart::new(
"codex",
"Matches Codex models like `codex-mini-latest`, `gpt-5.3-codex`, `gpt-5.3-codex-spark`, or similar Codex-specialized variants.",
r"(?:codex-[a-z0-9]+(?:-[a-z0-9]+)*|gpt-\d+(?:\.\d+)?(?:-[a-z0-9]+)*-codex(?:-[a-z0-9]+)*)",
),
PatternPart::new(
"gpt",
"Matches GPT models like `gpt-4`, `gpt-4o`, `gpt-4.1-mini`, `gpt-4-turbo`, or dated variants like `gpt-4-0125-preview`.",
r"gpt-\d+(?:\.\d+)?(?:-[a-z0-9]+)*(?:-\d{4}-\d{2}-\d{2}|[a-z])?",
),
PatternPart::new(
"grok",
"Matches Grok models like `grok-1`, `grok-2`, `grok-2-mini`, or `grok-3-beta`.",
r"grok-\d+(?:-\d+)?(?:-[a-z0-9]+)*",
),
PatternPart::new(
"bare_alias",
"Matches bare shorthand aliases like `sonnet`, `opus`, `haiku`, `gpt`, `grok`, or `codex`.",
r"(?:sonnet|opus|haiku|gpt|grok|codex)",
),
];
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn matches_claude_models() {
let models = ModelRegex::new().unwrap();
assert!(models.is_match("claude-instant-1"));
assert!(models.is_match("claude-instant-1.2"));
assert!(models.is_match("claude-instant-1-100k"));
assert!(models.is_match("claude-3-sonnet"));
assert!(models.is_match("claude-3-5-haiku"));
assert!(models.is_match("claude-3-7-sonnet"));
assert!(models.is_match("claude-4-opus"));
assert!(models.is_match("claude-4-opus-20241022-v2"));
assert!(models.is_match("claude-haiku"));
assert!(models.is_match("claude-haiku-3-5"));
assert!(models.is_match("claude-sonnet-3-7"));
assert!(models.is_match("claude-opus-4"));
}
#[test]
fn matches_gemini_models() {
let models = ModelRegex::new().unwrap();
assert!(models.is_match("gemini-1.5-flash"));
assert!(models.is_match("gemini-2.0-pro"));
assert!(models.is_match("gemini-2.5-flash-preview-05-20"));
assert!(models.is_match("gemini-2.5-pro-preview-123"));
assert!(models.is_match("gemini-2.5-flash-2025"));
}
#[test]
fn matches_codex_models() {
let models = ModelRegex::new().unwrap();
assert!(models.is_match("codex-mini-latest"));
assert!(models.is_match("codex-cli"));
assert!(models.is_match("gpt-5.3-codex"));
assert!(models.is_match("gpt-5.3-codex-spark"));
}
#[test]
fn matches_gpt_models() {
let models = ModelRegex::new().unwrap();
assert!(models.is_match("gpt-4"));
assert!(models.is_match("gpt-4o"));
assert!(models.is_match("gpt-4.1"));
assert!(models.is_match("gpt-4.1-mini"));
assert!(models.is_match("gpt-4-0125-preview"));
assert!(models.is_match("gpt-4-2024-08-06"));
}
#[test]
fn matches_grok_models() {
let models = ModelRegex::new().unwrap();
assert!(models.is_match("grok-1"));
assert!(models.is_match("grok-2"));
assert!(models.is_match("grok-2-mini"));
assert!(models.is_match("grok-3-beta"));
}
#[test]
fn matches_bare_aliases() {
let models = ModelRegex::new().unwrap();
assert!(models.is_match("sonnet"));
assert!(models.is_match("opus"));
assert!(models.is_match("haiku"));
assert!(models.is_match("gpt"));
assert!(models.is_match("grok"));
assert!(models.is_match("codex"));
}
#[test]
fn explains_first_match() {
let models = ModelRegex::new().unwrap();
let result = models
.explain_match("please use claude-3-5-sonnet")
.unwrap();
assert_eq!(result.matched_text, "claude-3-5-sonnet");
assert_eq!(result.part.name, "claude_numbered_family");
}
#[test]
fn explains_codex_before_generic_gpt() {
let models = ModelRegex::new().unwrap();
let result = models.explain_match("use gpt-5.3-codex-spark").unwrap();
assert_eq!(result.matched_text, "gpt-5.3-codex-spark");
assert_eq!(result.part.name, "codex");
}
#[test]
fn explains_multiple_matches() {
let models = ModelRegex::new().unwrap();
let results = models
.explain_matches("use gpt-4o, gemini-1.5-flash, codex-mini-latest, and grok-2-mini");
let matched = results
.iter()
.map(|result| result.matched_text)
.collect::<Vec<_>>();
assert_eq!(
matched,
vec![
"gpt-4o",
"gemini-1.5-flash",
"codex-mini-latest",
"grok-2-mini"
]
);
}
}