1use crate::models::{Encoding, ModelCard, Pricing};
24
25pub type EmbeddedModel = (&'static str, &'static str, Encoding, u32);
27
28pub static EMBEDDED_MODELS: &[EmbeddedModel] = &[
33 ("openai/gpt-5.2", "og52", Encoding::O200kBase, 400000),
39 ("openai/gpt-5.2-pro", "og52p", Encoding::O200kBase, 400000),
40 ("openai/gpt-5.2-codex", "og52c", Encoding::O200kBase, 400000),
41 ("openai/gpt-5.2-chat", "og52ch", Encoding::O200kBase, 128000),
42 ("openai/gpt-5.1", "og51", Encoding::O200kBase, 400000),
43 ("openai/gpt-5.1-codex", "og51c", Encoding::O200kBase, 400000),
44 (
45 "openai/gpt-5.1-codex-mini",
46 "og51cm",
47 Encoding::O200kBase,
48 400000,
49 ),
50 ("openai/gpt-5", "og5", Encoding::O200kBase, 400000),
51 ("openai/gpt-5-pro", "og5p", Encoding::O200kBase, 400000),
52 ("openai/gpt-5-mini", "og5m", Encoding::O200kBase, 128000),
53 ("openai/gpt-5-nano", "og5n", Encoding::O200kBase, 64000),
54 ("openai/gpt-4.1", "og41", Encoding::O200kBase, 128000),
56 ("openai/gpt-4.1-mini", "og41m", Encoding::O200kBase, 128000),
57 ("openai/gpt-4.1-nano", "og41n", Encoding::O200kBase, 64000),
58 ("openai/gpt-4o", "og4o", Encoding::O200kBase, 128000),
60 ("openai/gpt-4o-mini", "og4om", Encoding::O200kBase, 128000),
61 (
62 "openai/gpt-4o-2024-11-20",
63 "og4o1120",
64 Encoding::O200kBase,
65 128000,
66 ),
67 (
68 "openai/gpt-4o-2024-08-06",
69 "og4o0806",
70 Encoding::O200kBase,
71 128000,
72 ),
73 (
74 "openai/gpt-4o-2024-05-13",
75 "og4o0513",
76 Encoding::O200kBase,
77 128000,
78 ),
79 ("openai/gpt-4-turbo", "og4t", Encoding::Cl100kBase, 128000),
81 (
82 "openai/gpt-4-turbo-preview",
83 "og4tp",
84 Encoding::Cl100kBase,
85 128000,
86 ),
87 ("openai/gpt-4", "og4", Encoding::Cl100kBase, 8192),
88 ("openai/gpt-4-32k", "og432k", Encoding::Cl100kBase, 32768),
89 ("openai/gpt-3.5-turbo", "og35t", Encoding::Cl100kBase, 16385),
91 (
92 "openai/gpt-3.5-turbo-16k",
93 "og35t16k",
94 Encoding::Cl100kBase,
95 16385,
96 ),
97 ("openai/o4-mini", "oo4m", Encoding::O200kBase, 200000),
99 ("openai/o4-mini-high", "oo4mh", Encoding::O200kBase, 200000),
100 ("openai/o3", "oo3", Encoding::O200kBase, 200000),
101 ("openai/o3-pro", "oo3p", Encoding::O200kBase, 200000),
102 ("openai/o3-mini", "oo3m", Encoding::O200kBase, 200000),
103 ("openai/o3-mini-high", "oo3mh", Encoding::O200kBase, 200000),
104 ("openai/o1", "oo1", Encoding::O200kBase, 200000),
105 ("openai/o1-pro", "oo1p", Encoding::O200kBase, 200000),
106 (
112 "meta-llama/llama-4-maverick",
113 "ml4mav",
114 Encoding::LlamaBpe,
115 256000,
116 ),
117 (
118 "meta-llama/llama-4-scout",
119 "ml4sc",
120 Encoding::LlamaBpe,
121 256000,
122 ),
123 (
125 "meta-llama/llama-guard-4-12b",
126 "mlg412",
127 Encoding::LlamaBpe,
128 131072,
129 ),
130 (
131 "meta-llama/llama-guard-3-8b",
132 "mlg38",
133 Encoding::LlamaBpe,
134 131072,
135 ),
136 (
138 "meta-llama/llama-3.3-70b",
139 "ml3370",
140 Encoding::LlamaBpe,
141 128000,
142 ),
143 (
144 "meta-llama/llama-3.3-70b-instruct",
145 "ml3370i",
146 Encoding::LlamaBpe,
147 128000,
148 ),
149 (
151 "meta-llama/llama-3.1-405b",
152 "ml31405",
153 Encoding::LlamaBpe,
154 128000,
155 ),
156 (
157 "meta-llama/llama-3.1-405b-instruct",
158 "ml31405i",
159 Encoding::LlamaBpe,
160 128000,
161 ),
162 (
163 "meta-llama/llama-3.1-70b",
164 "ml3170",
165 Encoding::LlamaBpe,
166 128000,
167 ),
168 (
169 "meta-llama/llama-3.1-70b-instruct",
170 "ml3170i",
171 Encoding::LlamaBpe,
172 128000,
173 ),
174 (
175 "meta-llama/llama-3.1-8b",
176 "ml318",
177 Encoding::LlamaBpe,
178 128000,
179 ),
180 (
181 "meta-llama/llama-3.1-8b-instruct",
182 "ml318i",
183 Encoding::LlamaBpe,
184 128000,
185 ),
186 ("meta-llama/llama-3-70b", "ml370", Encoding::LlamaBpe, 8192),
188 (
189 "meta-llama/llama-3-70b-instruct",
190 "ml370i",
191 Encoding::LlamaBpe,
192 8192,
193 ),
194 ("meta-llama/llama-3-8b", "ml38", Encoding::LlamaBpe, 8192),
195 (
196 "meta-llama/llama-3-8b-instruct",
197 "ml38i",
198 Encoding::LlamaBpe,
199 8192,
200 ),
201 (
207 "mistralai/mistral-large-2512",
208 "mim-l2512",
209 Encoding::LlamaBpe,
210 262144,
211 ),
212 (
214 "mistralai/ministral-14b-2512",
215 "mimin14",
216 Encoding::LlamaBpe,
217 262144,
218 ),
219 (
220 "mistralai/ministral-8b-2512",
221 "mimin8",
222 Encoding::LlamaBpe,
223 262144,
224 ),
225 (
226 "mistralai/ministral-3b-2512",
227 "mimin3",
228 Encoding::LlamaBpe,
229 131072,
230 ),
231 (
233 "mistralai/devstral-2512",
234 "midev2512",
235 Encoding::LlamaBpe,
236 262144,
237 ),
238 (
239 "mistralai/devstral-medium",
240 "midevmed",
241 Encoding::LlamaBpe,
242 262144,
243 ),
244 (
245 "mistralai/devstral-small",
246 "midevsm",
247 Encoding::LlamaBpe,
248 131072,
249 ),
250 (
252 "mistralai/codestral-2508",
253 "micod2508",
254 Encoding::LlamaBpe,
255 262144,
256 ),
257 (
259 "mistralai/mistral-large",
260 "mim-l",
261 Encoding::LlamaBpe,
262 128000,
263 ),
264 (
265 "mistralai/mistral-large-latest",
266 "mim-ll",
267 Encoding::LlamaBpe,
268 128000,
269 ),
270 (
271 "mistralai/mistral-large-2411",
272 "mim-l2411",
273 Encoding::LlamaBpe,
274 128000,
275 ),
276 (
278 "mistralai/mistral-medium",
279 "mim-m",
280 Encoding::LlamaBpe,
281 32000,
282 ),
283 (
284 "mistralai/mistral-small",
285 "mim-s",
286 Encoding::LlamaBpe,
287 32000,
288 ),
289 (
290 "mistralai/mistral-small-latest",
291 "mim-sl",
292 Encoding::LlamaBpe,
293 32000,
294 ),
295 (
297 "mistralai/mixtral-8x7b",
298 "mimx87",
299 Encoding::LlamaBpe,
300 32000,
301 ),
302 (
303 "mistralai/mixtral-8x7b-instruct",
304 "mimx87i",
305 Encoding::LlamaBpe,
306 32000,
307 ),
308 (
309 "mistralai/mixtral-8x22b",
310 "mimx822",
311 Encoding::LlamaBpe,
312 65000,
313 ),
314 (
315 "mistralai/mixtral-8x22b-instruct",
316 "mimx822i",
317 Encoding::LlamaBpe,
318 65000,
319 ),
320 ("mistralai/mistral-7b", "mim7", Encoding::LlamaBpe, 32000),
322 (
323 "mistralai/mistral-7b-instruct",
324 "mim7i",
325 Encoding::LlamaBpe,
326 32000,
327 ),
328 (
330 "mistralai/codestral-latest",
331 "micodl",
332 Encoding::LlamaBpe,
333 32000,
334 ),
335 (
336 "mistralai/codestral-mamba",
337 "micodm",
338 Encoding::LlamaBpe,
339 256000,
340 ),
341 (
347 "deepseek/deepseek-v3.2",
348 "ddv32",
349 Encoding::Heuristic,
350 163840,
351 ),
352 (
353 "deepseek/deepseek-v3.2-speciale",
354 "ddv32s",
355 Encoding::Heuristic,
356 163840,
357 ),
358 (
360 "deepseek/deepseek-chat-v3.1",
361 "ddv31",
362 Encoding::Heuristic,
363 163840,
364 ),
365 ("deepseek/deepseek-r1", "ddr1", Encoding::Heuristic, 163840),
367 (
368 "deepseek/deepseek-r1-0528",
369 "ddr10528",
370 Encoding::Heuristic,
371 163840,
372 ),
373 (
374 "deepseek/deepseek-r1-distill-llama-70b",
375 "ddr1dl70",
376 Encoding::LlamaBpe,
377 131072,
378 ),
379 (
380 "deepseek/deepseek-r1-distill-qwen-32b",
381 "ddr1dq32",
382 Encoding::Heuristic,
383 131072,
384 ),
385 ("deepseek/deepseek-v3", "ddv3", Encoding::Heuristic, 64000),
387 (
388 "deepseek/deepseek-chat",
389 "ddchat",
390 Encoding::Heuristic,
391 64000,
392 ),
393 (
394 "deepseek/deepseek-chat-v3-0324",
395 "ddv30324",
396 Encoding::Heuristic,
397 64000,
398 ),
399 (
405 "qwen/qwen3-235b-a22b",
406 "qq3235",
407 Encoding::Heuristic,
408 131072,
409 ),
410 (
411 "qwen/qwen3-235b-a22b-2507",
412 "qq32352507",
413 Encoding::Heuristic,
414 131072,
415 ),
416 ("qwen/qwen3-32b", "qq332", Encoding::Heuristic, 131072),
417 ("qwen/qwen3-14b", "qq314", Encoding::Heuristic, 131072),
418 ("qwen/qwen3-8b", "qq38", Encoding::Heuristic, 131072),
419 ("qwen/qwen3-30b-a3b", "qq330a3", Encoding::Heuristic, 131072),
421 (
422 "qwen/qwen3-30b-a3b-instruct-2507",
423 "qq330i",
424 Encoding::Heuristic,
425 131072,
426 ),
427 ("qwen/qwen3-coder", "qq3cod", Encoding::Heuristic, 262144),
429 (
430 "qwen/qwen3-coder-plus",
431 "qq3codp",
432 Encoding::Heuristic,
433 262144,
434 ),
435 (
436 "qwen/qwen3-coder-flash",
437 "qq3codf",
438 Encoding::Heuristic,
439 131072,
440 ),
441 (
443 "qwen/qwen3-vl-235b-a22b-instruct",
444 "qq3vl235",
445 Encoding::Heuristic,
446 131072,
447 ),
448 (
449 "qwen/qwen3-vl-32b-instruct",
450 "qq3vl32",
451 Encoding::Heuristic,
452 131072,
453 ),
454 ("qwen/qwen-2.5-72b", "qq2572", Encoding::Heuristic, 131072),
456 (
457 "qwen/qwen-2.5-72b-instruct",
458 "qq2572i",
459 Encoding::Heuristic,
460 131072,
461 ),
462 (
463 "qwen/qwen-2.5-coder-32b-instruct",
464 "qqc32i",
465 Encoding::Heuristic,
466 131072,
467 ),
468 (
469 "qwen/qwen-2.5-7b-instruct",
470 "qq257i",
471 Encoding::Heuristic,
472 131072,
473 ),
474 ("qwen/qwq-32b", "qqwq32", Encoding::Heuristic, 131072),
476 (
482 "nvidia/nemotron-3-nano-30b-a3b",
483 "nn3nano30",
484 Encoding::LlamaBpe,
485 262144,
486 ),
487 (
488 "nvidia/nemotron-nano-12b-v2-vl",
489 "nnnano12vl",
490 Encoding::LlamaBpe,
491 131072,
492 ),
493 ("nvidia/nemotron-70b", "nn70", Encoding::LlamaBpe, 32768),
495 (
496 "nvidia/llama-3.1-nemotron-70b-instruct",
497 "nnl3170i",
498 Encoding::LlamaBpe,
499 32768,
500 ),
501 (
507 "google/gemma-3-27b-it",
508 "gg327",
509 Encoding::Heuristic,
510 131072,
511 ),
512 (
513 "google/gemma-3-12b-it",
514 "gg312",
515 Encoding::Heuristic,
516 131072,
517 ),
518 ("google/gemma-3-4b-it", "gg34", Encoding::Heuristic, 131072),
519 (
521 "google/gemma-3n-e4b-it",
522 "gg3n4",
523 Encoding::Heuristic,
524 131072,
525 ),
526 ("google/gemma-2-27b-it", "gg227", Encoding::Heuristic, 8192),
528 ("google/gemma-2-9b-it", "gg29", Encoding::Heuristic, 8192),
529 (
535 "allenai/olmo-3.1-32b-instruct",
536 "aolmo3132i",
537 Encoding::Heuristic,
538 65536,
539 ),
540 (
541 "allenai/olmo-3.1-32b-think",
542 "aolmo3132t",
543 Encoding::Heuristic,
544 65536,
545 ),
546 (
548 "allenai/olmo-3-32b-think",
549 "aolmo332t",
550 Encoding::Heuristic,
551 65536,
552 ),
553 (
554 "allenai/olmo-3-7b-instruct",
555 "aolmo37i",
556 Encoding::Heuristic,
557 65536,
558 ),
559];
560
561pub fn get_embedded_models() -> Vec<ModelCard> {
563 EMBEDDED_MODELS
564 .iter()
565 .map(|(id, abbrev, encoding, ctx_len)| {
566 ModelCard::with_abbrev(*id, *abbrev)
567 .encoding(*encoding)
568 .context_length(*ctx_len)
569 })
570 .collect()
571}
572
573pub fn get_embedded_by_id(id: &str) -> Option<ModelCard> {
575 EMBEDDED_MODELS
576 .iter()
577 .find(|(model_id, _, _, _)| *model_id == id)
578 .map(|(id, abbrev, encoding, ctx_len)| {
579 ModelCard::with_abbrev(*id, *abbrev)
580 .encoding(*encoding)
581 .context_length(*ctx_len)
582 })
583}
584
585pub fn get_embedded_by_abbrev(abbrev: &str) -> Option<ModelCard> {
587 EMBEDDED_MODELS
588 .iter()
589 .find(|(_, model_abbrev, _, _)| *model_abbrev == abbrev)
590 .map(|(id, abbrev, encoding, ctx_len)| {
591 ModelCard::with_abbrev(*id, *abbrev)
592 .encoding(*encoding)
593 .context_length(*ctx_len)
594 })
595}
596
597pub fn get_pricing(model_id: &str) -> Option<Pricing> {
599 match model_id {
601 "openai/gpt-5.2" | "openai/gpt-5.2-codex" => Some(Pricing::from_per_million(1.75, 7.00)),
603 "openai/gpt-5.2-pro" => Some(Pricing::from_per_million(21.00, 84.00)),
604 "openai/gpt-5.1" | "openai/gpt-5.1-codex" => Some(Pricing::from_per_million(1.25, 5.00)),
605 "openai/gpt-5" => Some(Pricing::from_per_million(1.75, 7.00)),
606 "openai/gpt-5-pro" => Some(Pricing::from_per_million(15.00, 60.00)),
607 "openai/gpt-5-mini" => Some(Pricing::from_per_million(0.30, 1.20)),
608 "openai/gpt-5-nano" => Some(Pricing::from_per_million(0.10, 0.40)),
609
610 "openai/gpt-4.1" => Some(Pricing::from_per_million(2.00, 8.00)),
612 "openai/gpt-4.1-mini" => Some(Pricing::from_per_million(0.40, 1.60)),
613 "openai/gpt-4o" => Some(Pricing::from_per_million(2.50, 10.00)),
614 "openai/gpt-4o-mini" => Some(Pricing::from_per_million(0.15, 0.60)),
615 "openai/gpt-4-turbo" => Some(Pricing::from_per_million(10.00, 30.00)),
616 "openai/gpt-4" => Some(Pricing::from_per_million(30.00, 60.00)),
617 "openai/gpt-3.5-turbo" => Some(Pricing::from_per_million(0.50, 1.50)),
618
619 "openai/o4-mini" => Some(Pricing::from_per_million(1.10, 4.40)),
621 "openai/o3" => Some(Pricing::from_per_million(10.00, 40.00)),
622 "openai/o3-pro" => Some(Pricing::from_per_million(20.00, 80.00)),
623 "openai/o3-mini" => Some(Pricing::from_per_million(1.10, 4.40)),
624 "openai/o1" => Some(Pricing::from_per_million(15.00, 60.00)),
625 "openai/o1-pro" => Some(Pricing::from_per_million(150.00, 600.00)),
626
627 "deepseek/deepseek-v3.2" | "deepseek/deepseek-chat" => {
629 Some(Pricing::from_per_million(0.25, 1.00))
630 },
631 "deepseek/deepseek-r1" => Some(Pricing::from_per_million(0.55, 2.19)),
632
633 "mistralai/mistral-large-2512" => Some(Pricing::from_per_million(0.50, 1.50)),
635 "mistralai/ministral-8b-2512" => Some(Pricing::from_per_million(0.15, 0.15)),
636 "mistralai/devstral-2512" => Some(Pricing::from_per_million(0.05, 0.05)),
637
638 "qwen/qwen3-235b-a22b" => Some(Pricing::from_per_million(0.20, 0.60)),
640 "qwen/qwen3-coder" => Some(Pricing::from_per_million(0.14, 0.14)),
641
642 "meta-llama/llama-4-maverick" => Some(Pricing::from_per_million(0.20, 0.60)),
644 "meta-llama/llama-4-scout" => Some(Pricing::from_per_million(0.08, 0.30)),
645
646 "google/gemma-3-27b-it" => Some(Pricing::from_per_million(0.10, 0.10)),
648
649 _ => None,
650 }
651}
652
653#[cfg(test)]
654mod tests {
655 use super::*;
656
657 #[test]
658 fn test_embedded_models_count() {
659 assert!(
661 EMBEDDED_MODELS.len() >= 80,
662 "Expected at least 80 embedded models, got {}",
663 EMBEDDED_MODELS.len()
664 );
665 }
666
667 #[test]
668 fn test_unique_abbreviations() {
669 let mut seen = std::collections::HashSet::new();
670 for (id, abbrev, _, _) in EMBEDDED_MODELS {
671 assert!(
672 seen.insert(*abbrev),
673 "Duplicate abbreviation '{abbrev}' for model '{id}'"
674 );
675 }
676 }
677
678 #[test]
679 fn test_unique_ids() {
680 let mut seen = std::collections::HashSet::new();
681 for (id, _, _, _) in EMBEDDED_MODELS {
682 assert!(seen.insert(*id), "Duplicate model ID '{id}'");
683 }
684 }
685
686 #[test]
687 fn test_get_embedded_by_id() {
688 let card = get_embedded_by_id("openai/gpt-4o").expect("Should find gpt-4o");
689 assert_eq!(card.abbrev, "og4o");
690 assert_eq!(card.encoding, Encoding::O200kBase);
691 }
692
693 #[test]
694 fn test_get_embedded_by_abbrev() {
695 let card = get_embedded_by_abbrev("ml3170i").expect("Should find llama-3.1-70b-instruct");
696 assert_eq!(card.id, "meta-llama/llama-3.1-70b-instruct");
697 assert_eq!(card.context_length, 128000);
698 }
699
700 #[test]
701 fn test_pricing() {
702 let pricing = get_pricing("openai/gpt-4o").expect("Should have pricing for gpt-4o");
703 let cost = pricing.prompt * 1_000_000.0;
705 assert!((cost - 2.50).abs() < 0.01);
706 }
707
708 #[test]
709 fn test_all_providers_represented() {
710 use crate::models::Provider;
711
712 let models = get_embedded_models();
713 let providers: std::collections::HashSet<_> = models.iter().map(|m| m.provider).collect();
714
715 assert!(providers.contains(&Provider::OpenAI), "Missing OpenAI");
717 assert!(providers.contains(&Provider::Meta), "Missing Meta");
718 assert!(providers.contains(&Provider::Mistral), "Missing Mistral");
719 assert!(providers.contains(&Provider::DeepSeek), "Missing DeepSeek");
720 assert!(providers.contains(&Provider::Qwen), "Missing Qwen");
721 assert!(providers.contains(&Provider::Nvidia), "Missing Nvidia");
722 assert!(
723 providers.contains(&Provider::Google),
724 "Missing Google (Gemma)"
725 );
726 }
727}