outfox_openai/spec/
vector_store.rs

1use std::collections::HashMap;
2
3use derive_builder::Builder;
4use serde::{Deserialize, Serialize};
5
6use crate::error::OpenAIError;
7
8use super::StaticChunkingStrategy;
9
10#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
11#[builder(name = "CreateVectorStoreRequestBuilder")]
12#[builder(pattern = "mutable")]
13#[builder(setter(into, strip_option), default)]
14#[builder(derive(Debug))]
15#[builder(build_fn(error = "OpenAIError"))]
16pub struct CreateVectorStoreRequest {
17    /// A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that the vector store should use. Useful for tools like `file_search` that can access files.
18    #[serde(skip_serializing_if = "Option::is_none")]
19    pub file_ids: Option<Vec<String>>,
20    /// The name of the vector store.
21    #[serde(skip_serializing_if = "Option::is_none")]
22    pub name: Option<String>,
23
24    /// The expiration policy for a vector store.
25    #[serde(skip_serializing_if = "Option::is_none")]
26    pub expires_after: Option<VectorStoreExpirationAfter>,
27
28    /// The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy. Only applicable if `file_ids` is non-empty.
29    #[serde(skip_serializing_if = "Option::is_none")]
30    pub chunking_strategy: Option<VectorStoreChunkingStrategy>,
31
32    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long.
33    #[serde(skip_serializing_if = "Option::is_none")]
34    pub metadata: Option<HashMap<String, serde_json::Value>>,
35}
36
37#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
38#[serde(tag = "type")]
39pub enum VectorStoreChunkingStrategy {
40    /// The default strategy. This strategy currently uses a `max_chunk_size_tokens` of `800` and `chunk_overlap_tokens` of `400`.
41    #[default]
42    #[serde(rename = "auto")]
43    Auto,
44    #[serde(rename = "static")]
45    Static {
46        #[serde(rename = "static")]
47        config: StaticChunkingStrategy,
48    },
49}
50
51/// Vector store expiration policy
52#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
53pub struct VectorStoreExpirationAfter {
54    /// Anchor timestamp after which the expiration policy applies. Supported anchors: `last_active_at`.
55    pub anchor: String,
56    /// The number of days after the anchor time that the vector store will expire.
57    pub days: u16, // min: 1, max: 365
58}
59
60/// A vector store is a collection of processed files can be used by the `file_search` tool.
61#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
62pub struct VectorStoreObject {
63    /// The identifier, which can be referenced in API endpoints.
64    pub id: String,
65    /// The object type, which is always `vector_store`.
66    pub object: String,
67    /// The Unix timestamp (in seconds) for when the vector store was created.
68    pub created_at: u32,
69    /// The name of the vector store.
70    pub name: Option<String>,
71    /// The total number of bytes used by the files in the vector store.
72    pub usage_bytes: u64,
73    pub file_counts: VectorStoreFileCounts,
74    /// The status of the vector store, which can be either `expired`, `in_progress`, or `completed`. A status of `completed` indicates that the vector store is ready for use.
75    pub status: VectorStoreStatus,
76    pub expires_after: Option<VectorStoreExpirationAfter>,
77    /// The Unix timestamp (in seconds) for when the vector store will expire.
78    pub expires_at: Option<u32>,
79    /// The Unix timestamp (in seconds) for when the vector store was last active.
80    pub last_active_at: Option<u32>,
81
82    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long.
83    pub metadata: Option<HashMap<String, serde_json::Value>>,
84}
85
86#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
87#[serde(rename_all = "snake_case")]
88pub enum VectorStoreStatus {
89    Expired,
90    InProgress,
91    Completed,
92}
93
94#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
95pub struct VectorStoreFileCounts {
96    /// The number of files that are currently being processed.
97    pub in_progress: u32,
98    /// The number of files that have been successfully processed.
99    pub completed: u32,
100    /// The number of files that have failed to process.
101    pub failed: u32,
102    /// The number of files that were cancelled.
103    pub cancelled: u32,
104    /// The total number of files.
105    pub total: u32,
106}
107
108#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
109pub struct ListVectorStoresResponse {
110    pub object: String,
111    pub data: Vec<VectorStoreObject>,
112    pub first_id: Option<String>,
113    pub last_id: Option<String>,
114    pub has_more: bool,
115}
116
117#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
118pub struct DeleteVectorStoreResponse {
119    pub id: String,
120    pub object: String,
121    pub deleted: bool,
122}
123
124#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
125#[builder(name = "UpdateVectorStoreRequestBuilder")]
126#[builder(pattern = "mutable")]
127#[builder(setter(into, strip_option), default)]
128#[builder(derive(Debug))]
129#[builder(build_fn(error = "OpenAIError"))]
130pub struct UpdateVectorStoreRequest {
131    #[serde(skip_serializing_if = "Option::is_none")]
132    pub name: Option<String>,
133    #[serde(skip_serializing_if = "Option::is_none")]
134    pub expires_after: Option<VectorStoreExpirationAfter>,
135    #[serde(skip_serializing_if = "Option::is_none")]
136    pub metadata: Option<HashMap<String, serde_json::Value>>,
137}
138
139#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
140pub struct ListVectorStoreFilesResponse {
141    pub object: String,
142    pub data: Vec<VectorStoreFileObject>,
143    pub first_id: Option<String>,
144    pub last_id: Option<String>,
145    pub has_more: bool,
146}
147
148#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
149pub struct VectorStoreFileObject {
150    /// The identifier, which can be referenced in API endpoints.
151    pub id: String,
152    /// The object type, which is always `vector_store.file`.
153    pub object: String,
154    /// The total vector store usage in bytes. Note that this may be different from the original file size.
155    pub usage_bytes: u64,
156    /// The Unix timestamp (in seconds) for when the vector store file was created.
157    pub created_at: u32,
158    /// The ID of the [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) that the [File](https://platform.openai.com/docs/api-reference/files) is attached to.
159    pub vector_store_id: String,
160    /// The status of the vector store file, which can be either `in_progress`, `completed`, `cancelled`, or `failed`. The status `completed` indicates that the vector store file is ready for use.
161    pub status: VectorStoreFileStatus,
162    /// The last error associated with this vector store file. Will be `null` if there are no errors.
163    pub last_error: Option<VectorStoreFileError>,
164    /// The strategy used to chunk the file.
165    pub chunking_strategy: Option<VectorStoreFileObjectChunkingStrategy>,
166}
167
168#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
169#[serde(rename_all = "snake_case")]
170pub enum VectorStoreFileStatus {
171    InProgress,
172    Completed,
173    Cancelled,
174    Failed,
175}
176
177#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
178pub struct VectorStoreFileError {
179    pub code: VectorStoreFileErrorCode,
180    /// A human-readable description of the error.
181    pub message: String,
182}
183
184#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
185#[serde(rename_all = "snake_case")]
186pub enum VectorStoreFileErrorCode {
187    ServerError,
188    UnsupportedFile,
189    InvalidFile,
190}
191
192#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
193#[serde(tag = "type")]
194#[serde(rename_all = "lowercase")]
195pub enum VectorStoreFileObjectChunkingStrategy {
196    /// This is returned when the chunking strategy is unknown. Typically, this is because the file was indexed before the `chunking_strategy` concept was introduced in the API.
197    Other,
198    Static {
199        r#static: StaticChunkingStrategy,
200    },
201}
202
203#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
204#[builder(name = "CreateVectorStoreFileRequestBuilder")]
205#[builder(pattern = "mutable")]
206#[builder(setter(into, strip_option), default)]
207#[builder(derive(Debug))]
208#[builder(build_fn(error = "OpenAIError"))]
209pub struct CreateVectorStoreFileRequest {
210    /// A [File](https://platform.openai.com/docs/api-reference/files) ID that the vector store should use. Useful for tools like `file_search` that can access files.
211    pub file_id: String,
212    #[serde(skip_serializing_if = "Option::is_none")]
213    pub chunking_strategy: Option<VectorStoreChunkingStrategy>,
214    #[serde(skip_serializing_if = "Option::is_none")]
215    pub attributes: Option<HashMap<String, AttributeValue>>,
216}
217
218#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
219pub struct DeleteVectorStoreFileResponse {
220    pub id: String,
221    pub object: String,
222    pub deleted: bool,
223}
224
225#[derive(Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)]
226#[builder(name = "CreateVectorStoreFileBatchRequestBuilder")]
227#[builder(pattern = "mutable")]
228#[builder(setter(into, strip_option), default)]
229#[builder(derive(Debug))]
230#[builder(build_fn(error = "OpenAIError"))]
231pub struct CreateVectorStoreFileBatchRequest {
232    /// A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that the vector store should use. Useful for tools like `file_search` that can access files.
233    pub file_ids: Vec<String>, // minItems: 1, maxItems: 500
234    pub chunking_strategy: Option<VectorStoreChunkingStrategy>,
235}
236
237#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
238#[serde(rename_all = "snake_case")]
239pub enum VectorStoreFileBatchStatus {
240    InProgress,
241    Completed,
242    Cancelled,
243    Failed,
244}
245
246#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
247pub struct VectorStoreFileBatchCounts {
248    /// The number of files that are currently being processed.
249    pub in_progress: u32,
250    /// The number of files that have been successfully processed.
251    pub completed: u32,
252    /// The number of files that have failed to process.
253    pub failed: u32,
254    /// The number of files that were cancelled.
255    pub cancelled: u32,
256    /// The total number of files.
257    pub total: u32,
258}
259
260///  A batch of files attached to a vector store.
261#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
262pub struct VectorStoreFileBatchObject {
263    /// The identifier, which can be referenced in API endpoints.
264    pub id: String,
265    /// The object type, which is always `vector_store.file_batch`.
266    pub object: String,
267    /// The Unix timestamp (in seconds) for when the vector store files batch was created.
268    pub created_at: u32,
269    /// The ID of the [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) that the [File](https://platform.openai.com/docs/api-reference/files) is attached to.
270    pub vector_store_id: String,
271    /// The status of the vector store files batch, which can be either `in_progress`, `completed`, `cancelled` or `failed`.
272    pub status: VectorStoreFileBatchStatus,
273    pub file_counts: VectorStoreFileBatchCounts,
274}
275
276/// Represents the parsed content of a vector store file.
277#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
278pub struct VectorStoreFileContentResponse {
279    /// The object type, which is always `vector_store.file_content.page`
280    pub object: String,
281
282    /// Parsed content of the file.
283    pub data: Vec<VectorStoreFileContentObject>,
284
285    /// Indicates if there are more content pages to fetch.
286    pub has_more: bool,
287
288    /// The token for the next page, if any.
289    pub next_page: Option<String>,
290}
291
292/// Represents the parsed content of a vector store file.
293#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
294pub struct VectorStoreFileContentObject {
295    /// The content type (currently only `"text"`)
296    #[serde(rename = "type")]
297    pub kind: String,
298
299    /// The text content
300    pub text: String,
301}
302
303#[derive(Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)]
304#[builder(name = "VectorStoreSearchRequestBuilder")]
305#[builder(pattern = "mutable")]
306#[builder(setter(into, strip_option), default)]
307#[builder(derive(Debug))]
308#[builder(build_fn(error = "OpenAIError"))]
309pub struct VectorStoreSearchRequest {
310    /// A query string for a search.
311    pub query: VectorStoreSearchQuery,
312
313    /// Whether to rewrite the natural language query for vector search.
314    #[serde(skip_serializing_if = "Option::is_none")]
315    pub rewrite_query: Option<bool>,
316
317    /// The maximum number of results to return. This number should be between 1 and 50 inclusive.
318    #[serde(skip_serializing_if = "Option::is_none")]
319    pub max_num_results: Option<u8>,
320
321    /// A filter to apply based on file attributes.
322    #[serde(skip_serializing_if = "Option::is_none")]
323    pub filters: Option<VectorStoreSearchFilter>,
324
325    /// Ranking options for search.
326    #[serde(skip_serializing_if = "Option::is_none")]
327    pub ranking_options: Option<RankingOptions>,
328}
329
330#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
331#[serde(untagged)]
332pub enum VectorStoreSearchQuery {
333    /// A single query to search for.
334    Text(String),
335    /// A list of queries to search for.
336    Array(Vec<String>),
337}
338
339impl Default for VectorStoreSearchQuery {
340    fn default() -> Self {
341        Self::Text(String::new())
342    }
343}
344
345impl From<String> for VectorStoreSearchQuery {
346    fn from(query: String) -> Self {
347        Self::Text(query)
348    }
349}
350
351impl From<&str> for VectorStoreSearchQuery {
352    fn from(query: &str) -> Self {
353        Self::Text(query.to_string())
354    }
355}
356
357impl From<Vec<String>> for VectorStoreSearchQuery {
358    fn from(query: Vec<String>) -> Self {
359        Self::Array(query)
360    }
361}
362
363#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
364#[serde(untagged)]
365pub enum VectorStoreSearchFilter {
366    Comparison(ComparisonFilter),
367    Compound(CompoundFilter),
368}
369
370impl From<ComparisonFilter> for VectorStoreSearchFilter {
371    fn from(filter: ComparisonFilter) -> Self {
372        Self::Comparison(filter)
373    }
374}
375
376impl From<CompoundFilter> for VectorStoreSearchFilter {
377    fn from(filter: CompoundFilter) -> Self {
378        Self::Compound(filter)
379    }
380}
381
382/// A filter used to compare a specified attribute key to a given value using a defined comparison operation.
383#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
384pub struct ComparisonFilter {
385    /// Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`.
386    #[serde(rename = "type")]
387    pub kind: ComparisonType,
388
389    /// The key to compare against the value.
390    pub key: String,
391
392    /// The value to compare against the attribute key; supports string, number, or boolean types.
393    pub value: AttributeValue,
394}
395
396/// Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`.
397#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
398#[serde(rename_all = "lowercase")]
399pub enum ComparisonType {
400    Eq,
401    Ne,
402    Gt,
403    Gte,
404    Lt,
405    Lte,
406}
407
408/// The value to compare against the attribute key; supports string, number, or boolean types.
409#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
410#[serde(untagged)]
411pub enum AttributeValue {
412    String(String),
413    Number(i64),
414    Boolean(bool),
415}
416
417impl From<String> for AttributeValue {
418    fn from(value: String) -> Self {
419        Self::String(value)
420    }
421}
422
423impl From<i64> for AttributeValue {
424    fn from(value: i64) -> Self {
425        Self::Number(value)
426    }
427}
428
429impl From<bool> for AttributeValue {
430    fn from(value: bool) -> Self {
431        Self::Boolean(value)
432    }
433}
434
435impl From<&str> for AttributeValue {
436    fn from(value: &str) -> Self {
437        Self::String(value.to_string())
438    }
439}
440
441/// Ranking options for search.
442#[derive(Debug, Serialize, Default, Deserialize, Clone, PartialEq)]
443pub struct RankingOptions {
444    #[serde(skip_serializing_if = "Option::is_none")]
445    pub ranker: Option<Ranker>,
446
447    #[serde(skip_serializing_if = "Option::is_none")]
448    pub score_threshold: Option<f32>,
449}
450
451#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
452pub enum Ranker {
453    #[serde(rename = "auto")]
454    Auto,
455    #[serde(rename = "default-2024-11-15")]
456    Default20241115,
457}
458
459/// Combine multiple filters using `and` or `or`.
460#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
461pub struct CompoundFilter {
462    /// Type of operation: `and` or `or`.
463    #[serde(rename = "type")]
464    pub kind: CompoundFilterType,
465
466    /// Array of filters to combine. Items can be `ComparisonFilter` or `CompoundFilter`
467    pub filters: Vec<VectorStoreSearchFilter>,
468}
469
470/// Type of operation: `and` or `or`.
471#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
472#[serde(rename_all = "lowercase")]
473pub enum CompoundFilterType {
474    And,
475    Or,
476}
477
478#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
479pub struct VectorStoreSearchResultsPage {
480    /// The object type, which is always `vector_store.search_results.page`.
481    pub object: String,
482
483    /// The query used for this search.
484    pub search_query: Vec<String>,
485
486    /// The list of search result items.
487    pub data: Vec<VectorStoreSearchResultItem>,
488
489    /// Indicates if there are more results to fetch.
490    pub has_more: bool,
491
492    /// The token for the next page, if any.
493    pub next_page: Option<String>,
494}
495
496#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
497pub struct VectorStoreSearchResultItem {
498    /// The ID of the vector store file.
499    pub file_id: String,
500
501    /// The name of the vector store file.
502    pub filename: String,
503
504    /// The similarity score for the result.
505    pub score: f32, // minimum: 0, maximum: 1
506
507    /// Attributes of the vector store file.
508    pub attributes: HashMap<String, AttributeValue>,
509
510    /// Content chunks from the file.
511    pub content: Vec<VectorStoreSearchResultContentObject>,
512}
513
514#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
515pub struct VectorStoreSearchResultContentObject {
516    /// The type of content
517    #[serde(rename = "type")]
518    pub kind: String,
519
520    /// The text content returned from search.
521    pub text: String,
522}