Skip to main content

token_count/tokenizers/google/
models.rs

1//! Google Gemini model definitions and metadata
2//!
3//! Note: Model names must match those supported by the gemini-tokenizer crate (v0.2.0).
4//! All Gemini models use the same Gemma 3 SentencePiece tokenizer.
5
6use crate::tokenizers::registry::ModelConfig;
7
8/// Get all Google Gemini model configurations
9///
10/// Returns a vector of ModelConfig for all supported Gemini models.
11/// Models are ordered by generation (GA first for stability) and tier (Pro > Flash > Lite).
12///
13/// Note: Only includes models supported by gemini-tokenizer v0.2.0. Additional models
14/// may be added as the upstream library expands support.
15pub fn google_models() -> Vec<ModelConfig> {
16    vec![
17        // Gemini 2.5 Series (GA, deprecated June 2026)
18        ModelConfig {
19            name: "gemini-2.5-pro".to_string(),
20            encoding: "gemini-gemma3".to_string(),
21            context_window: 1_000_000,
22            description: "Pro model, 1M context (GA, deprecated June 2026)".to_string(),
23            aliases: vec![
24                "gemini-pro".to_string(),
25                "gemini-2-pro".to_string(),
26                "gemini-2.5".to_string(),
27                "google/gemini-pro".to_string(),
28            ],
29        },
30        ModelConfig {
31            name: "gemini-2.5-flash".to_string(),
32            encoding: "gemini-gemma3".to_string(),
33            context_window: 1_000_000,
34            description: "Flash model, 1M context (GA, deprecated June 2026, default)".to_string(),
35            aliases: vec![
36                "gemini".to_string(),
37                "gemini-flash".to_string(),
38                "gemini-2-flash".to_string(),
39                "google/gemini".to_string(),
40                "google/gemini-flash".to_string(),
41            ],
42        },
43        ModelConfig {
44            name: "gemini-2.5-flash-lite".to_string(),
45            encoding: "gemini-gemma3".to_string(),
46            context_window: 1_000_000,
47            description: "Flash Lite model, 1M context (GA, deprecated June 2026, fastest)"
48                .to_string(),
49            aliases: vec![
50                "gemini-lite".to_string(),
51                "gemini-2-lite".to_string(),
52                "gemini-2.5-lite".to_string(),
53                "google/gemini-lite".to_string(),
54            ],
55        },
56        // Gemini 3.x Series (Preview)
57        ModelConfig {
58            name: "gemini-3-pro-preview".to_string(),
59            encoding: "gemini-gemma3".to_string(),
60            context_window: 1_000_000,
61            description: "Pro model, 1M context (Preview)".to_string(),
62            aliases: vec!["gemini-3-pro".to_string(), "gemini-3".to_string()],
63        },
64    ]
65}
66
67#[cfg(test)]
68mod tests {
69    use super::*;
70
71    #[test]
72    fn test_google_models_count() {
73        let models = google_models();
74        assert_eq!(models.len(), 4, "Should have 4 Gemini models");
75    }
76
77    #[test]
78    fn test_default_alias() {
79        let models = google_models();
80        let flash = models.iter().find(|m| m.name == "gemini-2.5-flash");
81        assert!(flash.is_some());
82        assert!(
83            flash.unwrap().aliases.contains(&"gemini".to_string()),
84            "gemini alias should point to gemini-2.5-flash"
85        );
86    }
87
88    #[test]
89    fn test_all_have_1m_context() {
90        let models = google_models();
91        for model in models {
92            assert_eq!(model.context_window, 1_000_000, "All models should have 1M context");
93        }
94    }
95
96    #[test]
97    fn test_all_use_same_encoding() {
98        let models = google_models();
99        for model in models {
100            assert_eq!(
101                model.encoding, "gemini-gemma3",
102                "All models should use gemini-gemma3 encoding"
103            );
104        }
105    }
106}