openai_models/
lib.rs

1use std::{collections::VecDeque, str::FromStr};
2
3use chrono::NaiveDate;
4use derive_more::derive::Display;
5use serde::{Deserialize, Serialize};
6
7pub mod error;
8pub mod llm;
9
10pub mod openai {
11    pub use async_openai::*;
12}
13
14// General models, note might alias to a specific model
15#[derive(Debug, Clone, Serialize, Deserialize, Display)]
16pub enum OpenAIModel {
17    #[display("gpt-4o")]
18    GPT4O,
19    #[display("gpt-4o-mini")]
20    GPT4OMINI,
21    #[display("o1")]
22    O1,
23    #[display("o1-mini")]
24    O1MINI,
25    #[display("gpt-3.5-turbo")]
26    GPT35TURBO,
27    #[display("gpt-4")]
28    GPT4,
29    #[display("gpt-4-turbo")]
30    GPT4TURBO,
31    #[display("gpt-5-mini")]
32    GPT5MINI,
33    #[display("gpt-5-nano")]
34    GPT5NANO,
35    #[display("gpt-5.2")]
36    GPT52,
37    #[display("gpt-5")]
38    GPT5,
39    #[display("gpt-5.1")]
40    GPT51,
41    #[display("gpt-5-pro")]
42    GPT5PRO,
43    #[display("gpt-4.1")]
44    GPT41,
45    #[display("gpt-4.1-mini")]
46    GPT41MINI,
47    #[display("gpt-4.1-nano")]
48    GPT41NANO,
49    #[display("o3")]
50    O3,
51    #[display("o4-mini")]
52    O4MINI,
53    #[display("o3-mini")]
54    O3MINI,
55    #[display("o3-pro")]
56    O3PRO,
57    #[display("gemini-3-pro-preview")]
58    GEMINI3PRO,
59    #[display("gemini-3-flash-preview")]
60    GEMINI3FLASH,
61    #[display("gemini-2.5-pro")]
62    GEMINI25PRO,
63    #[display("gemini-2.5-flash")]
64    GEMINI25FLASH,
65    #[display("{_0}")]
66    Other(String, PricingInfo),
67}
68
69impl FromStr for OpenAIModel {
70    type Err = String;
71    fn from_str(s: &str) -> Result<Self, Self::Err> {
72        match s {
73            "gpt-4o" | "gpt4o" => Ok(Self::GPT4O),
74            "gpt-4" | "gpt" => Ok(Self::GPT4),
75            "gpt-4-turbo" | "gpt4turbo" => Ok(Self::GPT4TURBO),
76            "gpt-4o-mini" | "gpt4omini" => Ok(Self::GPT4OMINI),
77            "o1" => Ok(Self::O1),
78            "o1-mini" => Ok(Self::O1MINI),
79            "gpt-3.5-turbo" | "gpt3.5turbo" => Ok(Self::GPT35TURBO),
80            "gpt-5.2" => Ok(Self::GPT52),
81            "gpt-5-mini" | "gpt-5mini" => Ok(Self::GPT5MINI),
82            "gpt-5" => Ok(Self::GPT5),
83            "gpt-5-nano" | "gpt-5nano" => Ok(Self::GPT5NANO),
84            "gpt-5.1" => Ok(Self::GPT51),
85            "gpt-5-pro" | "gpt5pro" => Ok(Self::GPT5PRO),
86            "gpt-4.1" | "gpt41" => Ok(Self::GPT41),
87            "gpt-4.1-mini" | "gpt41mini" => Ok(Self::GPT41MINI),
88            "gpt-4.1-nano" | "gpt41nano" => Ok(Self::GPT41NANO),
89            "o3" => Ok(Self::O3),
90            "o4-mini" | "o4mini" => Ok(Self::O4MINI),
91            "o3-mini" | "o3mini" => Ok(Self::O3MINI),
92            "o3-pro" | "o3pro" => Ok(Self::O3PRO),
93            "gemini-3-pro-preview" | "gemini-3-pro" => Ok(Self::GEMINI3PRO),
94            "gemini-3-flash-preview" | "gemini-3-flash" => Ok(Self::GEMINI3FLASH),
95            "gemini-2.5-pro" => Ok(Self::GEMINI25PRO),
96            "gemini-2.5-flash" => Ok(Self::GEMINI25FLASH),
97            _ => {
98                if !s.contains(",") {
99                    return Ok(Self::Other(
100                        s.to_string(),
101                        PricingInfo {
102                            input_tokens: 0.0f64,
103                            output_tokens: 0.0f64,
104                            cached_input_tokens: None,
105                        },
106                    ));
107                }
108                let mut tks = s
109                    .split(",")
110                    .map(|t| t.to_string())
111                    .collect::<VecDeque<String>>();
112
113                if tks.len() >= 2 {
114                    let model = tks.pop_front().unwrap();
115                    let tks = tks
116                        .into_iter()
117                        .map(|t| f64::from_str(&t))
118                        .collect::<Result<Vec<f64>, _>>()
119                        .map_err(|e| e.to_string())?;
120
121                    let pricing = if tks.len() == 2 {
122                        PricingInfo {
123                            input_tokens: tks[0],
124                            output_tokens: tks[1],
125                            cached_input_tokens: None,
126                        }
127                    } else if tks.len() == 3 {
128                        PricingInfo {
129                            input_tokens: tks[0],
130                            output_tokens: tks[1],
131                            cached_input_tokens: Some(tks[2]),
132                        }
133                    } else {
134                        return Err("fail to parse pricing".to_string());
135                    };
136
137                    Ok(Self::Other(model, pricing))
138                } else {
139                    Err("unreconigized model".to_string())
140                }
141            }
142        }
143    }
144}
145
146// USD per 1M tokens
147// From https://openai.com/api/pricing/
148#[derive(Copy, Debug, Clone, Serialize, Deserialize)]
149pub struct PricingInfo {
150    pub input_tokens: f64,
151    pub output_tokens: f64,
152    pub cached_input_tokens: Option<f64>,
153}
154
155impl FromStr for PricingInfo {
156    type Err = String;
157
158    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
159        let tks = s
160            .split(",")
161            .map(f64::from_str)
162            .collect::<Result<Vec<f64>, _>>()
163            .map_err(|e| e.to_string())?;
164
165        if tks.len() == 2 {
166            Ok(PricingInfo {
167                input_tokens: tks[0],
168                output_tokens: tks[1],
169                cached_input_tokens: None,
170            })
171        } else if tks.len() == 3 {
172            Ok(PricingInfo {
173                input_tokens: tks[0],
174                output_tokens: tks[1],
175                cached_input_tokens: Some(tks[2]),
176            })
177        } else {
178            Err("fail to parse pricing".to_string())
179        }
180    }
181}
182
183/// Model specification info from https://developers.openai.com/api/docs/models
184#[derive(Copy, Debug, Clone)]
185pub struct ModelInfo {
186    /// Context window size in tokens
187    pub context_window: u64,
188    /// Maximum output tokens
189    pub max_output_tokens: u64,
190    /// Knowledge cutoff date
191    pub knowledge_cutoff: NaiveDate,
192}
193
194impl OpenAIModel {
195    pub fn pricing(&self) -> PricingInfo {
196        match self {
197            Self::GPT4O => PricingInfo {
198                input_tokens: 2.5,
199                output_tokens: 10.00,
200                cached_input_tokens: Some(1.25),
201            },
202            Self::GPT4OMINI => PricingInfo {
203                input_tokens: 0.15,
204                cached_input_tokens: Some(0.075),
205                output_tokens: 0.6,
206            },
207            Self::O1 => PricingInfo {
208                input_tokens: 15.00,
209                cached_input_tokens: Some(7.5),
210                output_tokens: 60.00,
211            },
212            Self::O1MINI => PricingInfo {
213                input_tokens: 1.10,
214                cached_input_tokens: Some(0.55),
215                output_tokens: 4.40,
216            },
217            Self::GPT35TURBO => PricingInfo {
218                input_tokens: 0.50,
219                cached_input_tokens: None,
220                output_tokens: 1.50,
221            },
222            Self::GPT4 => PricingInfo {
223                input_tokens: 30.0,
224                output_tokens: 60.0,
225                cached_input_tokens: None,
226            },
227            Self::GPT4TURBO => PricingInfo {
228                input_tokens: 10.0,
229                output_tokens: 30.0,
230                cached_input_tokens: None,
231            },
232            Self::GPT52 => PricingInfo {
233                input_tokens: 1.75,
234                output_tokens: 14.00,
235                cached_input_tokens: Some(0.175),
236            },
237            Self::GPT5MINI => PricingInfo {
238                input_tokens: 0.25,
239                output_tokens: 2.00,
240                cached_input_tokens: Some(0.025),
241            },
242            Self::GPT5NANO => PricingInfo {
243                input_tokens: 0.05,
244                output_tokens: 0.40,
245                cached_input_tokens: Some(0.005),
246            },
247            Self::GPT5 => PricingInfo {
248                input_tokens: 1.25,
249                output_tokens: 10.0,
250                cached_input_tokens: Some(0.125),
251            },
252            Self::GPT51 => PricingInfo {
253                input_tokens: 1.25,
254                output_tokens: 10.00,
255                cached_input_tokens: Some(0.125),
256            },
257            Self::GPT5PRO => PricingInfo {
258                input_tokens: 15.00,
259                output_tokens: 120.00,
260                cached_input_tokens: None,
261            },
262            Self::GPT41 => PricingInfo {
263                input_tokens: 2.00,
264                output_tokens: 8.00,
265                cached_input_tokens: Some(0.50),
266            },
267            Self::GPT41MINI => PricingInfo {
268                input_tokens: 0.40,
269                output_tokens: 1.60,
270                cached_input_tokens: Some(0.10),
271            },
272            Self::GPT41NANO => PricingInfo {
273                input_tokens: 0.10,
274                output_tokens: 0.40,
275                cached_input_tokens: Some(0.025),
276            },
277            Self::O3 => PricingInfo {
278                input_tokens: 2.00,
279                output_tokens: 8.00,
280                cached_input_tokens: Some(0.50),
281            },
282            Self::O4MINI => PricingInfo {
283                input_tokens: 1.10,
284                output_tokens: 4.40,
285                cached_input_tokens: Some(0.275),
286            },
287            Self::O3MINI => PricingInfo {
288                input_tokens: 1.10,
289                output_tokens: 4.40,
290                cached_input_tokens: Some(0.55),
291            },
292            Self::O3PRO => PricingInfo {
293                input_tokens: 20.00,
294                output_tokens: 80.00,
295                cached_input_tokens: None,
296            },
297            Self::GEMINI3PRO => PricingInfo {
298                input_tokens: 2.00,   // TODO: 4.00 for > 200k tokens
299                output_tokens: 12.00, // TODO: 18.00 for > 200k tokens
300                cached_input_tokens: None,
301            },
302            Self::GEMINI3FLASH => PricingInfo {
303                input_tokens: 0.50,
304                output_tokens: 3.0,
305                cached_input_tokens: None,
306            },
307            Self::GEMINI25PRO => PricingInfo {
308                input_tokens: 1.25,   // 2.50
309                output_tokens: 10.00, // 15.00
310                cached_input_tokens: None,
311            },
312            Self::GEMINI25FLASH => PricingInfo {
313                input_tokens: 0.30,
314                output_tokens: 2.50,
315                cached_input_tokens: None,
316            },
317            Self::Other(_, pricing) => *pricing,
318        }
319    }
320
321    pub fn batch_pricing(&self) -> Option<PricingInfo> {
322        match self {
323            Self::GPT4O => Some(PricingInfo {
324                input_tokens: 1.25,
325                output_tokens: 5.00,
326                cached_input_tokens: None,
327            }),
328            Self::GPT4OMINI => Some(PricingInfo {
329                input_tokens: 0.075,
330                output_tokens: 0.30,
331                cached_input_tokens: None,
332            }),
333            Self::GPT41 => Some(PricingInfo {
334                input_tokens: 1.00,
335                output_tokens: 4.00,
336                cached_input_tokens: Some(0.25),
337            }),
338            Self::GPT41MINI => Some(PricingInfo {
339                input_tokens: 0.20,
340                output_tokens: 0.80,
341                cached_input_tokens: Some(0.05),
342            }),
343            Self::GPT41NANO => Some(PricingInfo {
344                input_tokens: 0.05,
345                output_tokens: 0.20,
346                cached_input_tokens: Some(0.0125),
347            }),
348            Self::O3 => Some(PricingInfo {
349                input_tokens: 1.00,
350                output_tokens: 4.00,
351                cached_input_tokens: Some(0.25),
352            }),
353            Self::O4MINI => Some(PricingInfo {
354                input_tokens: 0.55,
355                output_tokens: 2.20,
356                cached_input_tokens: Some(0.1375),
357            }),
358            Self::O3MINI => Some(PricingInfo {
359                input_tokens: 0.55,
360                output_tokens: 2.20,
361                cached_input_tokens: Some(0.275),
362            }),
363            _ => None,
364        }
365    }
366
367    /// Model specification information from https://developers.openai.com/api/docs/models
368    pub fn info(&self) -> Option<ModelInfo> {
369        match self {
370            Self::GPT52 => Some(ModelInfo {
371                context_window: 400_000,
372                max_output_tokens: 128_000,
373                knowledge_cutoff: NaiveDate::from_ymd_opt(2025, 8, 31).unwrap(),
374            }),
375            Self::GPT51 => Some(ModelInfo {
376                context_window: 400_000,
377                max_output_tokens: 128_000,
378                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 9, 30).unwrap(),
379            }),
380            Self::GPT5 => Some(ModelInfo {
381                context_window: 400_000,
382                max_output_tokens: 128_000,
383                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 9, 30).unwrap(),
384            }),
385            Self::GPT5MINI => Some(ModelInfo {
386                context_window: 400_000,
387                max_output_tokens: 128_000,
388                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 5, 31).unwrap(),
389            }),
390            Self::GPT5NANO => Some(ModelInfo {
391                context_window: 400_000,
392                max_output_tokens: 128_000,
393                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 5, 31).unwrap(),
394            }),
395            Self::GPT5PRO => Some(ModelInfo {
396                context_window: 400_000,
397                max_output_tokens: 272_000,
398                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 9, 30).unwrap(),
399            }),
400            Self::GPT41 => Some(ModelInfo {
401                context_window: 1_047_576,
402                max_output_tokens: 32_768,
403                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 6, 1).unwrap(),
404            }),
405            Self::GPT41MINI => Some(ModelInfo {
406                context_window: 1_047_576,
407                max_output_tokens: 32_768,
408                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 6, 1).unwrap(),
409            }),
410            Self::GPT41NANO => Some(ModelInfo {
411                context_window: 1_047_576,
412                max_output_tokens: 32_768,
413                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 6, 1).unwrap(),
414            }),
415            Self::O3 => Some(ModelInfo {
416                context_window: 200_000,
417                max_output_tokens: 100_000,
418                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 6, 1).unwrap(),
419            }),
420            Self::O4MINI => Some(ModelInfo {
421                context_window: 200_000,
422                max_output_tokens: 100_000,
423                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 6, 1).unwrap(),
424            }),
425            Self::O3MINI => Some(ModelInfo {
426                context_window: 200_000,
427                max_output_tokens: 100_000,
428                knowledge_cutoff: NaiveDate::from_ymd_opt(2023, 10, 1).unwrap(),
429            }),
430            Self::O3PRO => Some(ModelInfo {
431                context_window: 200_000,
432                max_output_tokens: 100_000,
433                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 6, 1).unwrap(),
434            }),
435            Self::O1 => Some(ModelInfo {
436                context_window: 200_000,
437                max_output_tokens: 100_000,
438                knowledge_cutoff: NaiveDate::from_ymd_opt(2023, 10, 1).unwrap(),
439            }),
440            Self::O1MINI => Some(ModelInfo {
441                context_window: 128_000,
442                max_output_tokens: 65_536,
443                knowledge_cutoff: NaiveDate::from_ymd_opt(2023, 10, 1).unwrap(),
444            }),
445            Self::GPT4O => Some(ModelInfo {
446                context_window: 128_000,
447                max_output_tokens: 16_384,
448                knowledge_cutoff: NaiveDate::from_ymd_opt(2023, 10, 1).unwrap(),
449            }),
450            Self::GPT4OMINI => Some(ModelInfo {
451                context_window: 128_000,
452                max_output_tokens: 16_384,
453                knowledge_cutoff: NaiveDate::from_ymd_opt(2023, 10, 1).unwrap(),
454            }),
455            Self::GPT4 => Some(ModelInfo {
456                context_window: 8_192,
457                max_output_tokens: 8_192,
458                knowledge_cutoff: NaiveDate::from_ymd_opt(2023, 12, 1).unwrap(),
459            }),
460            Self::GPT4TURBO => Some(ModelInfo {
461                context_window: 128_000,
462                max_output_tokens: 4_096,
463                knowledge_cutoff: NaiveDate::from_ymd_opt(2023, 12, 1).unwrap(),
464            }),
465            Self::GPT35TURBO => Some(ModelInfo {
466                context_window: 16_385,
467                max_output_tokens: 4_096,
468                knowledge_cutoff: NaiveDate::from_ymd_opt(2021, 9, 1).unwrap(),
469            }),
470            _ => None,
471        }
472    }
473}
openai_models/lib.rs

openai_models/
lib.rs