Skip to main content

gemini_tokenizer/
types.rs

1// <FILE>src/types.rs</FILE> - <DESC>Lightweight Gemini content types for token counting</DESC>
2// <VERS>VERSION: 0.2.0</VERS>
3// <WCTX>Ergonomics update to match Python SDK API</WCTX>
4// <CLOG>Add Contents, CountTokensResult, ComputeTokensResult, TokensInfo, CountTokensConfig</CLOG>
5
6//! Lightweight types mirroring the Google Gemini API structures needed for token counting.
7//!
8//! These types support serde serialization/deserialization for JSON interop.
9//! They are intentionally minimal — only the fields relevant to token counting
10//! are included.
11
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15/// A content message in a conversation, containing a role and parts.
16#[derive(Debug, Clone, Serialize, Deserialize, Default)]
17pub struct Content {
18    /// The role of the content author (e.g., "user", "model").
19    #[serde(skip_serializing_if = "Option::is_none")]
20    pub role: Option<String>,
21
22    /// The parts that make up this content message.
23    #[serde(skip_serializing_if = "Option::is_none")]
24    pub parts: Option<Vec<Part>>,
25}
26
27/// A single part of a content message.
28///
29/// Each part contains exactly one of the possible content types:
30/// text, function_call, or function_response.
31#[derive(Debug, Clone, Serialize, Deserialize, Default)]
32pub struct Part {
33    /// Plain text content.
34    #[serde(skip_serializing_if = "Option::is_none")]
35    pub text: Option<String>,
36
37    /// A function call made by the model.
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub function_call: Option<FunctionCall>,
40
41    /// A response to a function call.
42    #[serde(skip_serializing_if = "Option::is_none")]
43    pub function_response: Option<FunctionResponse>,
44}
45
46/// A function call made by the model.
47#[derive(Debug, Clone, Serialize, Deserialize, Default)]
48pub struct FunctionCall {
49    /// The name of the function to call.
50    #[serde(skip_serializing_if = "Option::is_none")]
51    pub name: Option<String>,
52
53    /// The arguments to pass to the function, as a JSON-like map.
54    #[serde(skip_serializing_if = "Option::is_none")]
55    pub args: Option<HashMap<String, serde_json::Value>>,
56}
57
58/// A response from a function call.
59#[derive(Debug, Clone, Serialize, Deserialize, Default)]
60pub struct FunctionResponse {
61    /// The name of the function that was called.
62    #[serde(skip_serializing_if = "Option::is_none")]
63    pub name: Option<String>,
64
65    /// The response data from the function.
66    #[serde(skip_serializing_if = "Option::is_none")]
67    pub response: Option<HashMap<String, serde_json::Value>>,
68}
69
70/// A tool definition containing function declarations.
71#[derive(Debug, Clone, Serialize, Deserialize, Default)]
72pub struct Tool {
73    /// The function declarations that make up this tool.
74    #[serde(skip_serializing_if = "Option::is_none")]
75    pub function_declarations: Option<Vec<FunctionDeclaration>>,
76}
77
78/// A declaration of a function that the model can call.
79#[derive(Debug, Clone, Serialize, Deserialize, Default)]
80pub struct FunctionDeclaration {
81    /// The name of the function.
82    #[serde(skip_serializing_if = "Option::is_none")]
83    pub name: Option<String>,
84
85    /// A description of what the function does.
86    #[serde(skip_serializing_if = "Option::is_none")]
87    pub description: Option<String>,
88
89    /// The schema for the function's parameters.
90    #[serde(skip_serializing_if = "Option::is_none")]
91    pub parameters: Option<Schema>,
92
93    /// The schema for the function's response.
94    #[serde(skip_serializing_if = "Option::is_none")]
95    pub response: Option<Schema>,
96}
97
98/// A JSON Schema definition used to describe function parameters and responses.
99///
100/// This is a recursive type that can describe nested object structures.
101#[derive(Debug, Clone, Serialize, Deserialize, Default)]
102pub struct Schema {
103    /// The data type (e.g., "STRING", "NUMBER", "OBJECT", "ARRAY").
104    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
105    pub schema_type: Option<String>,
106
107    /// The format of the data (e.g., "int32", "date-time").
108    #[serde(skip_serializing_if = "Option::is_none")]
109    pub format: Option<String>,
110
111    /// A description of what this schema represents.
112    #[serde(skip_serializing_if = "Option::is_none")]
113    pub description: Option<String>,
114
115    /// The title of this schema.
116    #[serde(skip_serializing_if = "Option::is_none")]
117    pub title: Option<String>,
118
119    /// A default value for this schema.
120    #[serde(skip_serializing_if = "Option::is_none")]
121    pub default: Option<serde_json::Value>,
122
123    /// Allowed enum values for this field.
124    #[serde(rename = "enum", skip_serializing_if = "Option::is_none")]
125    pub enum_values: Option<Vec<String>>,
126
127    /// Required property names (for object types).
128    #[serde(skip_serializing_if = "Option::is_none")]
129    pub required: Option<Vec<String>>,
130
131    /// Property ordering hints.
132    #[serde(skip_serializing_if = "Option::is_none")]
133    pub property_ordering: Option<Vec<String>>,
134
135    /// Schema for array items.
136    #[serde(skip_serializing_if = "Option::is_none")]
137    pub items: Option<Box<Schema>>,
138
139    /// Named properties (for object types).
140    #[serde(skip_serializing_if = "Option::is_none")]
141    pub properties: Option<HashMap<String, Schema>>,
142
143    /// An example value.
144    #[serde(skip_serializing_if = "Option::is_none")]
145    pub example: Option<serde_json::Value>,
146}
147
148/// Input contents, accepting either a text string or structured Content objects.
149///
150/// Matches the Python SDK's flexible content input where `count_tokens` and
151/// `compute_tokens` accept both plain strings and structured Content objects.
152///
153/// # From implementations
154///
155/// - `&str` → wraps as a single user Content with one text Part
156/// - `&[Content]` → uses the Content slice directly
157/// - `&Vec<Content>` → delegates to the slice implementation
158pub enum Contents<'a> {
159    /// Plain text input (will be wrapped as a user Content).
160    Text(&'a str),
161    /// Structured Content objects.
162    Structured(&'a [Content]),
163}
164
165impl<'a> From<&'a str> for Contents<'a> {
166    fn from(s: &'a str) -> Self {
167        Contents::Text(s)
168    }
169}
170
171impl<'a> From<&'a [Content]> for Contents<'a> {
172    fn from(c: &'a [Content]) -> Self {
173        Contents::Structured(c)
174    }
175}
176
177impl<'a> From<&'a Vec<Content>> for Contents<'a> {
178    fn from(c: &'a Vec<Content>) -> Self {
179        Contents::Structured(c.as_slice())
180    }
181}
182
183/// Result of counting tokens, matching the Python SDK's `CountTokensResult`.
184#[derive(Debug, Clone)]
185pub struct CountTokensResult {
186    /// The total number of tokens.
187    pub total_tokens: usize,
188}
189
190impl std::fmt::Display for CountTokensResult {
191    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
192        write!(f, "total_tokens={}", self.total_tokens)
193    }
194}
195
196/// Information about tokens for a single content part,
197/// matching the Python SDK's `TokensInfo`.
198#[derive(Debug, Clone)]
199pub struct TokensInfo {
200    /// The token IDs in the vocabulary.
201    pub token_ids: Vec<u32>,
202    /// The token pieces as byte sequences (UTF-8 encoded, with SentencePiece
203    /// space markers replaced by actual spaces).
204    pub tokens: Vec<Vec<u8>>,
205    /// The role of the content this part belongs to (e.g., "user", "model").
206    pub role: Option<String>,
207}
208
209/// Result of computing tokens, matching the Python SDK's `ComputeTokensResult`.
210#[derive(Debug, Clone)]
211pub struct ComputeTokensResult {
212    /// Token information for each content part.
213    pub tokens_info: Vec<TokensInfo>,
214}
215
216/// Configuration for `count_tokens`, matching the Python SDK's `CountTokensConfig`.
217///
218/// Provides optional tools, system instruction, and response schema that
219/// contribute additional tokens to the count.
220#[derive(Debug, Clone, Default)]
221pub struct CountTokensConfig {
222    /// Tool definitions whose declarations contribute tokens.
223    pub tools: Option<Vec<Tool>>,
224    /// System instruction content that contributes tokens.
225    pub system_instruction: Option<Content>,
226    /// Response schema that contributes tokens.
227    pub response_schema: Option<Schema>,
228}
229
230// <FILE>src/types.rs</FILE> - <DESC>Lightweight Gemini content types for token counting</DESC>
231// <VERS>END OF VERSION: 0.2.0</VERS>