1use serde::{Deserialize, Serialize};
10
11use crate::interfaces::Model;
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
20pub enum Dialect {
21 Base,
23 LegacyBpe,
25}
26
27impl Dialect {
28 #[must_use]
30 pub const fn name(&self) -> &'static str {
31 match self {
32 Self::Base => "Base",
33 Self::LegacyBpe => "LegacyBpe",
34 }
35 }
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
42pub enum Rule {
43 ArrowChain,
45 StateParens,
47 SubjectElision,
49 TypedSlots,
51 JitBinding,
53}
54
55impl Rule {
56 #[must_use]
58 pub const fn name(&self) -> &'static str {
59 match self {
60 Self::ArrowChain => "arrow_chain",
61 Self::StateParens => "state_parens",
62 Self::SubjectElision => "subject_elision",
63 Self::TypedSlots => "typed_slots",
64 Self::JitBinding => "jit_binding",
65 }
66 }
67}
68
69#[must_use]
80pub fn pick_dialect(model: &Model) -> Dialect {
81 match model {
82 Model::Gpt4 | Model::Gpt4o => Dialect::LegacyBpe,
84 Model::Llama3Custom(_) | Model::Qwen3Custom(_) => Dialect::LegacyBpe,
85
86 Model::ClaudeOpus47 | Model::ClaudeSonnet47 | Model::ClaudeHaiku47 => Dialect::Base,
88 Model::Gpt5 => Dialect::Base,
89 Model::Gemini25Ultra | Model::Gemini25Pro => Dialect::Base,
90 Model::Grok4 => Dialect::Base,
91
92 Model::Registered(_) => Dialect::Base,
94 }
95}
96
97#[must_use]
103pub fn rules_for(dialect: Dialect) -> &'static [Rule] {
104 const BASE: &[Rule] = &[
105 Rule::ArrowChain,
106 Rule::StateParens,
107 Rule::SubjectElision,
108 Rule::TypedSlots,
109 ];
110 const LEGACY_BPE: &[Rule] = &[
111 Rule::ArrowChain,
112 Rule::StateParens,
113 Rule::SubjectElision,
114 Rule::TypedSlots,
115 Rule::JitBinding,
116 ];
117 match dialect {
118 Dialect::Base => BASE,
119 Dialect::LegacyBpe => LEGACY_BPE,
120 }
121}
122
123#[cfg(test)]
124mod tests {
125 use super::*;
126
127 #[test]
128 fn base_has_four_rules_never_including_jit() {
129 let rules = rules_for(Dialect::Base);
130 assert_eq!(rules.len(), 4);
131 assert!(!rules.contains(&Rule::JitBinding));
132 }
133
134 #[test]
135 fn legacy_bpe_has_five_rules_including_jit() {
136 let rules = rules_for(Dialect::LegacyBpe);
137 assert_eq!(rules.len(), 5);
138 assert!(rules.contains(&Rule::JitBinding));
139 }
140
141 #[test]
142 fn modern_claude_maps_to_base() {
143 assert_eq!(pick_dialect(&Model::ClaudeOpus47), Dialect::Base);
144 assert_eq!(pick_dialect(&Model::ClaudeSonnet47), Dialect::Base);
145 assert_eq!(pick_dialect(&Model::ClaudeHaiku47), Dialect::Base);
146 }
147
148 #[test]
149 fn modern_gemini_maps_to_base() {
150 assert_eq!(pick_dialect(&Model::Gemini25Pro), Dialect::Base);
151 assert_eq!(pick_dialect(&Model::Gemini25Ultra), Dialect::Base);
152 }
153
154 #[test]
155 fn legacy_openai_maps_to_legacy_bpe() {
156 assert_eq!(pick_dialect(&Model::Gpt4), Dialect::LegacyBpe);
157 assert_eq!(pick_dialect(&Model::Gpt4o), Dialect::LegacyBpe);
158 }
159
160 #[test]
161 fn open_weights_map_to_legacy_bpe() {
162 assert_eq!(
163 pick_dialect(&Model::Llama3Custom("meta/70b".into())),
164 Dialect::LegacyBpe,
165 );
166 assert_eq!(
167 pick_dialect(&Model::Qwen3Custom("qwen3-8b".into())),
168 Dialect::LegacyBpe,
169 );
170 }
171
172 #[test]
173 fn registered_models_default_to_base_never_legacy() {
174 let m = Model::Registered("some-new-provider-2027".into());
175 assert_eq!(pick_dialect(&m), Dialect::Base);
176 }
177
178 #[test]
179 fn gpt5_modern_generation_is_base() {
180 assert_eq!(pick_dialect(&Model::Gpt5), Dialect::Base);
181 }
182
183 #[test]
184 fn dialect_names_match_spec() {
185 assert_eq!(Dialect::Base.name(), "Base");
186 assert_eq!(Dialect::LegacyBpe.name(), "LegacyBpe");
187 }
188
189 #[test]
190 fn rule_names_match_f_gram_identifiers() {
191 assert_eq!(Rule::ArrowChain.name(), "arrow_chain");
192 assert_eq!(Rule::StateParens.name(), "state_parens");
193 assert_eq!(Rule::SubjectElision.name(), "subject_elision");
194 assert_eq!(Rule::TypedSlots.name(), "typed_slots");
195 assert_eq!(Rule::JitBinding.name(), "jit_binding");
196 }
197
198 #[test]
199 fn rules_for_is_deterministic_and_order_preserving() {
200 let a = rules_for(Dialect::LegacyBpe);
201 let b = rules_for(Dialect::LegacyBpe);
202 assert_eq!(a, b);
203 assert_eq!(&a[..4], rules_for(Dialect::Base));
205 assert_eq!(a[4], Rule::JitBinding);
206 }
207
208 #[test]
209 fn dialect_serde_round_trips() {
210 for d in [Dialect::Base, Dialect::LegacyBpe] {
211 let s = serde_json::to_string(&d).unwrap();
212 let back: Dialect = serde_json::from_str(&s).unwrap();
213 assert_eq!(d, back);
214 }
215 }
216}