use serde::{Deserialize, Serialize};
use crate::interfaces::Model;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum Dialect {
Base,
LegacyBpe,
}
impl Dialect {
#[must_use]
pub const fn name(&self) -> &'static str {
match self {
Self::Base => "Base",
Self::LegacyBpe => "LegacyBpe",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum Rule {
ArrowChain,
StateParens,
SubjectElision,
TypedSlots,
JitBinding,
}
impl Rule {
#[must_use]
pub const fn name(&self) -> &'static str {
match self {
Self::ArrowChain => "arrow_chain",
Self::StateParens => "state_parens",
Self::SubjectElision => "subject_elision",
Self::TypedSlots => "typed_slots",
Self::JitBinding => "jit_binding",
}
}
}
#[must_use]
pub fn pick_dialect(model: &Model) -> Dialect {
match model {
Model::Gpt4 | Model::Gpt4o => Dialect::LegacyBpe,
Model::Llama3Custom(_) | Model::Qwen3Custom(_) => Dialect::LegacyBpe,
Model::ClaudeOpus47 | Model::ClaudeSonnet47 | Model::ClaudeHaiku47 => Dialect::Base,
Model::Gpt5 => Dialect::Base,
Model::Gemini25Ultra | Model::Gemini25Pro => Dialect::Base,
Model::Grok4 => Dialect::Base,
Model::Registered(_) => Dialect::Base,
}
}
#[must_use]
pub fn rules_for(dialect: Dialect) -> &'static [Rule] {
const BASE: &[Rule] = &[
Rule::ArrowChain,
Rule::StateParens,
Rule::SubjectElision,
Rule::TypedSlots,
];
const LEGACY_BPE: &[Rule] = &[
Rule::ArrowChain,
Rule::StateParens,
Rule::SubjectElision,
Rule::TypedSlots,
Rule::JitBinding,
];
match dialect {
Dialect::Base => BASE,
Dialect::LegacyBpe => LEGACY_BPE,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn base_has_four_rules_never_including_jit() {
let rules = rules_for(Dialect::Base);
assert_eq!(rules.len(), 4);
assert!(!rules.contains(&Rule::JitBinding));
}
#[test]
fn legacy_bpe_has_five_rules_including_jit() {
let rules = rules_for(Dialect::LegacyBpe);
assert_eq!(rules.len(), 5);
assert!(rules.contains(&Rule::JitBinding));
}
#[test]
fn modern_claude_maps_to_base() {
assert_eq!(pick_dialect(&Model::ClaudeOpus47), Dialect::Base);
assert_eq!(pick_dialect(&Model::ClaudeSonnet47), Dialect::Base);
assert_eq!(pick_dialect(&Model::ClaudeHaiku47), Dialect::Base);
}
#[test]
fn modern_gemini_maps_to_base() {
assert_eq!(pick_dialect(&Model::Gemini25Pro), Dialect::Base);
assert_eq!(pick_dialect(&Model::Gemini25Ultra), Dialect::Base);
}
#[test]
fn legacy_openai_maps_to_legacy_bpe() {
assert_eq!(pick_dialect(&Model::Gpt4), Dialect::LegacyBpe);
assert_eq!(pick_dialect(&Model::Gpt4o), Dialect::LegacyBpe);
}
#[test]
fn open_weights_map_to_legacy_bpe() {
assert_eq!(
pick_dialect(&Model::Llama3Custom("meta/70b".into())),
Dialect::LegacyBpe,
);
assert_eq!(
pick_dialect(&Model::Qwen3Custom("qwen3-8b".into())),
Dialect::LegacyBpe,
);
}
#[test]
fn registered_models_default_to_base_never_legacy() {
let m = Model::Registered("some-new-provider-2027".into());
assert_eq!(pick_dialect(&m), Dialect::Base);
}
#[test]
fn gpt5_modern_generation_is_base() {
assert_eq!(pick_dialect(&Model::Gpt5), Dialect::Base);
}
#[test]
fn dialect_names_match_spec() {
assert_eq!(Dialect::Base.name(), "Base");
assert_eq!(Dialect::LegacyBpe.name(), "LegacyBpe");
}
#[test]
fn rule_names_match_f_gram_identifiers() {
assert_eq!(Rule::ArrowChain.name(), "arrow_chain");
assert_eq!(Rule::StateParens.name(), "state_parens");
assert_eq!(Rule::SubjectElision.name(), "subject_elision");
assert_eq!(Rule::TypedSlots.name(), "typed_slots");
assert_eq!(Rule::JitBinding.name(), "jit_binding");
}
#[test]
fn rules_for_is_deterministic_and_order_preserving() {
let a = rules_for(Dialect::LegacyBpe);
let b = rules_for(Dialect::LegacyBpe);
assert_eq!(a, b);
assert_eq!(&a[..4], rules_for(Dialect::Base));
assert_eq!(a[4], Rule::JitBinding);
}
#[test]
fn dialect_serde_round_trips() {
for d in [Dialect::Base, Dialect::LegacyBpe] {
let s = serde_json::to_string(&d).unwrap();
let back: Dialect = serde_json::from_str(&s).unwrap();
assert_eq!(d, back);
}
}
}