Skip to main content

openai_models/
lib.rs

1use std::{collections::VecDeque, str::FromStr};
2
3use chrono::NaiveDate;
4use derive_more::derive::Display;
5use serde::{Deserialize, Serialize};
6
7pub mod error;
8pub mod llm;
9
10pub mod openai {
11    pub use async_openai::*;
12}
13
14// General models, note might alias to a specific model
15#[derive(Debug, Clone, Serialize, Deserialize, Display)]
16pub enum OpenAIModel {
17    #[display("gpt-4o")]
18    GPT4O,
19    #[display("gpt-4o-mini")]
20    GPT4OMINI,
21    #[display("o1")]
22    O1,
23    #[display("o1-mini")]
24    O1MINI,
25    #[display("gpt-3.5-turbo")]
26    GPT35TURBO,
27    #[display("gpt-4")]
28    GPT4,
29    #[display("gpt-4-turbo")]
30    GPT4TURBO,
31    #[display("gpt-5-mini")]
32    GPT5MINI,
33    #[display("gpt-5-nano")]
34    GPT5NANO,
35    #[display("gpt-5.2")]
36    GPT52,
37    #[display("gpt-5")]
38    GPT5,
39    #[display("gpt-5.1")]
40    GPT51,
41    #[display("gpt-5-pro")]
42    GPT5PRO,
43    #[display("gpt-4.1")]
44    GPT41,
45    #[display("gpt-4.1-mini")]
46    GPT41MINI,
47    #[display("gpt-4.1-nano")]
48    GPT41NANO,
49    #[display("o3")]
50    O3,
51    #[display("o4-mini")]
52    O4MINI,
53    #[display("o3-mini")]
54    O3MINI,
55    #[display("o3-pro")]
56    O3PRO,
57    #[display("gemini-3-pro-preview")]
58    GEMINI3PRO,
59    #[display("gemini-3-flash-preview")]
60    GEMINI3FLASH,
61    #[display("gemini-2.5-pro")]
62    GEMINI25PRO,
63    #[display("gemini-2.5-flash")]
64    GEMINI25FLASH,
65    #[display("{_0}")]
66    Other(String, PricingInfo),
67}
68
69impl FromStr for OpenAIModel {
70    type Err = String;
71    fn from_str(s: &str) -> Result<Self, Self::Err> {
72        match s {
73            "gpt-4o" | "gpt4o" => Ok(Self::GPT4O),
74            "gpt-4" | "gpt" => Ok(Self::GPT4),
75            "gpt-4-turbo" | "gpt4turbo" => Ok(Self::GPT4TURBO),
76            "gpt-4o-mini" | "gpt4omini" => Ok(Self::GPT4OMINI),
77            "o1" => Ok(Self::O1),
78            "o1-mini" => Ok(Self::O1MINI),
79            "gpt-3.5-turbo" | "gpt3.5turbo" => Ok(Self::GPT35TURBO),
80            "gpt-5.2" => Ok(Self::GPT52),
81            "gpt-5-mini" | "gpt-5mini" => Ok(Self::GPT5MINI),
82            "gpt-5" => Ok(Self::GPT5),
83            "gpt-5-nano" | "gpt-5nano" => Ok(Self::GPT5NANO),
84            "gpt-5.1" => Ok(Self::GPT51),
85            "gpt-5-pro" | "gpt5pro" => Ok(Self::GPT5PRO),
86            "gpt-4.1" | "gpt41" => Ok(Self::GPT41),
87            "gpt-4.1-mini" | "gpt41mini" => Ok(Self::GPT41MINI),
88            "gpt-4.1-nano" | "gpt41nano" => Ok(Self::GPT41NANO),
89            "o3" => Ok(Self::O3),
90            "o4-mini" | "o4mini" => Ok(Self::O4MINI),
91            "o3-mini" | "o3mini" => Ok(Self::O3MINI),
92            "o3-pro" | "o3pro" => Ok(Self::O3PRO),
93            "gemini-3-pro-preview" | "gemini-3-pro" => Ok(Self::GEMINI3PRO),
94            "gemini-3-flash-preview" | "gemini-3-flash" => Ok(Self::GEMINI3FLASH),
95            "gemini-2.5-pro" => Ok(Self::GEMINI25PRO),
96            "gemini-2.5-flash" => Ok(Self::GEMINI25FLASH),
97            _ => {
98                if !s.contains(",") {
99                    log::info!("No valid model detected, assume not billed");
100                    return Ok(Self::Other(
101                        s.to_string(),
102                        PricingInfo {
103                            input_tokens: 0.0f64,
104                            output_tokens: 0.0f64,
105                            cached_input_tokens: None,
106                        },
107                    ));
108                }
109                let mut tks = s
110                    .split(",")
111                    .map(|t| t.to_string())
112                    .collect::<VecDeque<String>>();
113
114                if tks.len() >= 2 {
115                    let model = tks.pop_front().unwrap();
116                    let tks = tks
117                        .into_iter()
118                        .map(|t| f64::from_str(&t))
119                        .collect::<Result<Vec<f64>, _>>()
120                        .map_err(|e| e.to_string())?;
121
122                    let pricing = if tks.len() == 2 {
123                        PricingInfo {
124                            input_tokens: tks[0],
125                            output_tokens: tks[1],
126                            cached_input_tokens: None,
127                        }
128                    } else if tks.len() == 3 {
129                        PricingInfo {
130                            input_tokens: tks[0],
131                            output_tokens: tks[1],
132                            cached_input_tokens: Some(tks[2]),
133                        }
134                    } else {
135                        return Err("fail to parse pricing".to_string());
136                    };
137
138                    Ok(Self::Other(model, pricing))
139                } else {
140                    Err("unreconigized model".to_string())
141                }
142            }
143        }
144    }
145}
146
147// USD per 1M tokens
148// From https://openai.com/api/pricing/
149#[derive(Copy, Debug, Clone, Serialize, Deserialize)]
150pub struct PricingInfo {
151    pub input_tokens: f64,
152    pub output_tokens: f64,
153    pub cached_input_tokens: Option<f64>,
154}
155
156impl FromStr for PricingInfo {
157    type Err = String;
158
159    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
160        let tks = s
161            .split(",")
162            .map(f64::from_str)
163            .collect::<Result<Vec<f64>, _>>()
164            .map_err(|e| e.to_string())?;
165
166        if tks.len() == 2 {
167            Ok(PricingInfo {
168                input_tokens: tks[0],
169                output_tokens: tks[1],
170                cached_input_tokens: None,
171            })
172        } else if tks.len() == 3 {
173            Ok(PricingInfo {
174                input_tokens: tks[0],
175                output_tokens: tks[1],
176                cached_input_tokens: Some(tks[2]),
177            })
178        } else {
179            Err("fail to parse pricing".to_string())
180        }
181    }
182}
183
184/// Model specification info from https://developers.openai.com/api/docs/models
185#[derive(Copy, Debug, Clone)]
186pub struct ModelInfo {
187    /// Context window size in tokens
188    pub context_window: u64,
189    /// Maximum output tokens
190    pub max_output_tokens: u64,
191    /// Knowledge cutoff date
192    pub knowledge_cutoff: NaiveDate,
193}
194
195impl OpenAIModel {
196    pub fn pricing(&self) -> PricingInfo {
197        match self {
198            Self::GPT4O => PricingInfo {
199                input_tokens: 2.5,
200                output_tokens: 10.00,
201                cached_input_tokens: Some(1.25),
202            },
203            Self::GPT4OMINI => PricingInfo {
204                input_tokens: 0.15,
205                cached_input_tokens: Some(0.075),
206                output_tokens: 0.6,
207            },
208            Self::O1 => PricingInfo {
209                input_tokens: 15.00,
210                cached_input_tokens: Some(7.5),
211                output_tokens: 60.00,
212            },
213            Self::O1MINI => PricingInfo {
214                input_tokens: 1.10,
215                cached_input_tokens: Some(0.55),
216                output_tokens: 4.40,
217            },
218            Self::GPT35TURBO => PricingInfo {
219                input_tokens: 0.50,
220                cached_input_tokens: None,
221                output_tokens: 1.50,
222            },
223            Self::GPT4 => PricingInfo {
224                input_tokens: 30.0,
225                output_tokens: 60.0,
226                cached_input_tokens: None,
227            },
228            Self::GPT4TURBO => PricingInfo {
229                input_tokens: 10.0,
230                output_tokens: 30.0,
231                cached_input_tokens: None,
232            },
233            Self::GPT52 => PricingInfo {
234                input_tokens: 1.75,
235                output_tokens: 14.00,
236                cached_input_tokens: Some(0.175),
237            },
238            Self::GPT5MINI => PricingInfo {
239                input_tokens: 0.25,
240                output_tokens: 2.00,
241                cached_input_tokens: Some(0.025),
242            },
243            Self::GPT5NANO => PricingInfo {
244                input_tokens: 0.05,
245                output_tokens: 0.40,
246                cached_input_tokens: Some(0.005),
247            },
248            Self::GPT5 => PricingInfo {
249                input_tokens: 1.25,
250                output_tokens: 10.0,
251                cached_input_tokens: Some(0.125),
252            },
253            Self::GPT51 => PricingInfo {
254                input_tokens: 1.25,
255                output_tokens: 10.00,
256                cached_input_tokens: Some(0.125),
257            },
258            Self::GPT5PRO => PricingInfo {
259                input_tokens: 15.00,
260                output_tokens: 120.00,
261                cached_input_tokens: None,
262            },
263            Self::GPT41 => PricingInfo {
264                input_tokens: 2.00,
265                output_tokens: 8.00,
266                cached_input_tokens: Some(0.50),
267            },
268            Self::GPT41MINI => PricingInfo {
269                input_tokens: 0.40,
270                output_tokens: 1.60,
271                cached_input_tokens: Some(0.10),
272            },
273            Self::GPT41NANO => PricingInfo {
274                input_tokens: 0.10,
275                output_tokens: 0.40,
276                cached_input_tokens: Some(0.025),
277            },
278            Self::O3 => PricingInfo {
279                input_tokens: 2.00,
280                output_tokens: 8.00,
281                cached_input_tokens: Some(0.50),
282            },
283            Self::O4MINI => PricingInfo {
284                input_tokens: 1.10,
285                output_tokens: 4.40,
286                cached_input_tokens: Some(0.275),
287            },
288            Self::O3MINI => PricingInfo {
289                input_tokens: 1.10,
290                output_tokens: 4.40,
291                cached_input_tokens: Some(0.55),
292            },
293            Self::O3PRO => PricingInfo {
294                input_tokens: 20.00,
295                output_tokens: 80.00,
296                cached_input_tokens: None,
297            },
298            Self::GEMINI3PRO => PricingInfo {
299                input_tokens: 2.00,   // TODO: 4.00 for > 200k tokens
300                output_tokens: 12.00, // TODO: 18.00 for > 200k tokens
301                cached_input_tokens: None,
302            },
303            Self::GEMINI3FLASH => PricingInfo {
304                input_tokens: 0.50,
305                output_tokens: 3.0,
306                cached_input_tokens: None,
307            },
308            Self::GEMINI25PRO => PricingInfo {
309                input_tokens: 1.25,   // 2.50
310                output_tokens: 10.00, // 15.00
311                cached_input_tokens: None,
312            },
313            Self::GEMINI25FLASH => PricingInfo {
314                input_tokens: 0.30,
315                output_tokens: 2.50,
316                cached_input_tokens: None,
317            },
318            Self::Other(_, pricing) => *pricing,
319        }
320    }
321
322    pub fn batch_pricing(&self) -> Option<PricingInfo> {
323        match self {
324            Self::GPT4O => Some(PricingInfo {
325                input_tokens: 1.25,
326                output_tokens: 5.00,
327                cached_input_tokens: None,
328            }),
329            Self::GPT4OMINI => Some(PricingInfo {
330                input_tokens: 0.075,
331                output_tokens: 0.30,
332                cached_input_tokens: None,
333            }),
334            Self::GPT41 => Some(PricingInfo {
335                input_tokens: 1.00,
336                output_tokens: 4.00,
337                cached_input_tokens: Some(0.25),
338            }),
339            Self::GPT41MINI => Some(PricingInfo {
340                input_tokens: 0.20,
341                output_tokens: 0.80,
342                cached_input_tokens: Some(0.05),
343            }),
344            Self::GPT41NANO => Some(PricingInfo {
345                input_tokens: 0.05,
346                output_tokens: 0.20,
347                cached_input_tokens: Some(0.0125),
348            }),
349            Self::O3 => Some(PricingInfo {
350                input_tokens: 1.00,
351                output_tokens: 4.00,
352                cached_input_tokens: Some(0.25),
353            }),
354            Self::O4MINI => Some(PricingInfo {
355                input_tokens: 0.55,
356                output_tokens: 2.20,
357                cached_input_tokens: Some(0.1375),
358            }),
359            Self::O3MINI => Some(PricingInfo {
360                input_tokens: 0.55,
361                output_tokens: 2.20,
362                cached_input_tokens: Some(0.275),
363            }),
364            _ => None,
365        }
366    }
367
368    /// Model specification information from https://developers.openai.com/api/docs/models
369    pub fn info(&self) -> Option<ModelInfo> {
370        match self {
371            Self::GPT52 => Some(ModelInfo {
372                context_window: 400_000,
373                max_output_tokens: 128_000,
374                knowledge_cutoff: NaiveDate::from_ymd_opt(2025, 8, 31).unwrap(),
375            }),
376            Self::GPT51 => Some(ModelInfo {
377                context_window: 400_000,
378                max_output_tokens: 128_000,
379                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 9, 30).unwrap(),
380            }),
381            Self::GPT5 => Some(ModelInfo {
382                context_window: 400_000,
383                max_output_tokens: 128_000,
384                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 9, 30).unwrap(),
385            }),
386            Self::GPT5MINI => Some(ModelInfo {
387                context_window: 400_000,
388                max_output_tokens: 128_000,
389                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 5, 31).unwrap(),
390            }),
391            Self::GPT5NANO => Some(ModelInfo {
392                context_window: 400_000,
393                max_output_tokens: 128_000,
394                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 5, 31).unwrap(),
395            }),
396            Self::GPT5PRO => Some(ModelInfo {
397                context_window: 400_000,
398                max_output_tokens: 272_000,
399                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 9, 30).unwrap(),
400            }),
401            Self::GPT41 => Some(ModelInfo {
402                context_window: 1_047_576,
403                max_output_tokens: 32_768,
404                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 6, 1).unwrap(),
405            }),
406            Self::GPT41MINI => Some(ModelInfo {
407                context_window: 1_047_576,
408                max_output_tokens: 32_768,
409                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 6, 1).unwrap(),
410            }),
411            Self::GPT41NANO => Some(ModelInfo {
412                context_window: 1_047_576,
413                max_output_tokens: 32_768,
414                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 6, 1).unwrap(),
415            }),
416            Self::O3 => Some(ModelInfo {
417                context_window: 200_000,
418                max_output_tokens: 100_000,
419                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 6, 1).unwrap(),
420            }),
421            Self::O4MINI => Some(ModelInfo {
422                context_window: 200_000,
423                max_output_tokens: 100_000,
424                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 6, 1).unwrap(),
425            }),
426            Self::O3MINI => Some(ModelInfo {
427                context_window: 200_000,
428                max_output_tokens: 100_000,
429                knowledge_cutoff: NaiveDate::from_ymd_opt(2023, 10, 1).unwrap(),
430            }),
431            Self::O3PRO => Some(ModelInfo {
432                context_window: 200_000,
433                max_output_tokens: 100_000,
434                knowledge_cutoff: NaiveDate::from_ymd_opt(2024, 6, 1).unwrap(),
435            }),
436            Self::O1 => Some(ModelInfo {
437                context_window: 200_000,
438                max_output_tokens: 100_000,
439                knowledge_cutoff: NaiveDate::from_ymd_opt(2023, 10, 1).unwrap(),
440            }),
441            Self::O1MINI => Some(ModelInfo {
442                context_window: 128_000,
443                max_output_tokens: 65_536,
444                knowledge_cutoff: NaiveDate::from_ymd_opt(2023, 10, 1).unwrap(),
445            }),
446            Self::GPT4O => Some(ModelInfo {
447                context_window: 128_000,
448                max_output_tokens: 16_384,
449                knowledge_cutoff: NaiveDate::from_ymd_opt(2023, 10, 1).unwrap(),
450            }),
451            Self::GPT4OMINI => Some(ModelInfo {
452                context_window: 128_000,
453                max_output_tokens: 16_384,
454                knowledge_cutoff: NaiveDate::from_ymd_opt(2023, 10, 1).unwrap(),
455            }),
456            Self::GPT4 => Some(ModelInfo {
457                context_window: 8_192,
458                max_output_tokens: 8_192,
459                knowledge_cutoff: NaiveDate::from_ymd_opt(2023, 12, 1).unwrap(),
460            }),
461            Self::GPT4TURBO => Some(ModelInfo {
462                context_window: 128_000,
463                max_output_tokens: 4_096,
464                knowledge_cutoff: NaiveDate::from_ymd_opt(2023, 12, 1).unwrap(),
465            }),
466            Self::GPT35TURBO => Some(ModelInfo {
467                context_window: 16_385,
468                max_output_tokens: 4_096,
469                knowledge_cutoff: NaiveDate::from_ymd_opt(2021, 9, 1).unwrap(),
470            }),
471            _ => None,
472        }
473    }
474}