Skip to main content

novel_openai/spec/vectorstores/
vector_store.rs

1use std::collections::HashMap;
2
3use derive_builder::Builder;
4use serde::{Deserialize, Serialize};
5
6use crate::error::OpenAIError;
7use crate::spec::Metadata;
8use crate::spec::vectorstores::{Filter, StaticChunkingStrategy};
9
10#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
11#[builder(name = "CreateVectorStoreRequestArgs")]
12#[builder(pattern = "mutable")]
13#[builder(setter(into, strip_option), default)]
14#[builder(derive(Debug))]
15#[builder(build_fn(error = "OpenAIError"))]
16pub struct CreateVectorStoreRequest {
17    /// A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that the vector store should use. Useful for tools like `file_search` that can access files.
18    #[serde(skip_serializing_if = "Option::is_none")]
19    pub file_ids: Option<Vec<String>>,
20    /// The name of the vector store.
21    #[serde(skip_serializing_if = "Option::is_none")]
22    pub name: Option<String>,
23    /// A description for the vector store. Can be used to describe the vector store's purpose.
24    #[serde(skip_serializing_if = "Option::is_none")]
25    pub description: Option<String>,
26
27    /// The expiration policy for a vector store.
28    #[serde(skip_serializing_if = "Option::is_none")]
29    pub expires_after: Option<VectorStoreExpirationAfter>,
30
31    /// The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy.
32    /// Only applicable if `file_ids` is non-empty.
33    #[serde(skip_serializing_if = "Option::is_none")]
34    pub chunking_strategy: Option<ChunkingStrategyRequestParam>,
35
36    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing
37    /// additional information about the object in a structured format. Keys can be a maximum of 64
38    /// characters long and values can be a maximum of 512 characters long.
39    #[serde(skip_serializing_if = "Option::is_none")]
40    pub metadata: Option<Metadata>,
41}
42
43#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
44#[serde(tag = "type")]
45pub enum ChunkingStrategyRequestParam {
46    /// The default strategy. This strategy currently uses a `max_chunk_size_tokens` of `800` and
47    /// `chunk_overlap_tokens` of `400`.
48    #[default]
49    #[serde(rename = "auto")]
50    Auto,
51    #[serde(rename = "static")]
52    Static {
53        #[serde(rename = "static")]
54        config: StaticChunkingStrategy,
55    },
56}
57
58/// Vector store expiration policy
59#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
60pub struct VectorStoreExpirationAfter {
61    /// Anchor timestamp after which the expiration policy applies. Supported anchors:
62    /// `last_active_at`.
63    pub anchor: String,
64    /// The number of days after the anchor time that the vector store will expire.
65    pub days: u16, // min: 1, max: 365
66}
67
68/// A vector store is a collection of processed files can be used by the `file_search` tool.
69#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
70pub struct VectorStoreObject {
71    /// The identifier, which can be referenced in API endpoints.
72    pub id: String,
73    /// The object type, which is always `vector_store`.
74    pub object: String,
75    /// The Unix timestamp (in seconds) for when the vector store was created.
76    pub created_at: u64,
77    /// The name of the vector store.
78    pub name: Option<String>,
79    /// The total number of bytes used by the files in the vector store.
80    pub usage_bytes: u64,
81    pub file_counts: VectorStoreFileCounts,
82    /// The status of the vector store, which can be either `expired`, `in_progress`, or
83    /// `completed`. A status of `completed` indicates that the vector store is ready for use.
84    pub status: VectorStoreStatus,
85    pub expires_after: Option<VectorStoreExpirationAfter>,
86    /// The Unix timestamp (in seconds) for when the vector store will expire.
87    pub expires_at: Option<u64>,
88    /// The Unix timestamp (in seconds) for when the vector store was last active.
89    pub last_active_at: Option<u64>,
90
91    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing
92    /// additional information about the object in a structured format. Keys can be a maximum of 64
93    /// characters long and values can be a maximum of 512 characters long.
94    pub metadata: Option<Metadata>,
95}
96
97#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
98#[serde(rename_all = "snake_case")]
99pub enum VectorStoreStatus {
100    Expired,
101    InProgress,
102    Completed,
103}
104
105#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
106pub struct VectorStoreFileCounts {
107    /// The number of files that are currently being processed.
108    pub in_progress: u32,
109    /// The number of files that have been successfully processed.
110    pub completed: u32,
111    /// The number of files that have failed to process.
112    pub failed: u32,
113    /// The number of files that were cancelled.
114    pub cancelled: u32,
115    /// The total number of files.
116    pub total: u32,
117}
118
119#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
120pub struct ListVectorStoresResponse {
121    pub object: String,
122    pub data: Vec<VectorStoreObject>,
123    pub first_id: Option<String>,
124    pub last_id: Option<String>,
125    pub has_more: bool,
126}
127
128#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
129pub struct DeleteVectorStoreResponse {
130    pub id: String,
131    pub object: String,
132    pub deleted: bool,
133}
134
135#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
136#[builder(name = "UpdateVectorStoreRequestArgs")]
137#[builder(pattern = "mutable")]
138#[builder(setter(into, strip_option), default)]
139#[builder(derive(Debug))]
140#[builder(build_fn(error = "OpenAIError"))]
141pub struct UpdateVectorStoreRequest {
142    #[serde(skip_serializing_if = "Option::is_none")]
143    pub name: Option<String>,
144    #[serde(skip_serializing_if = "Option::is_none")]
145    pub expires_after: Option<VectorStoreExpirationAfter>,
146    #[serde(skip_serializing_if = "Option::is_none")]
147    pub metadata: Option<Metadata>,
148}
149
150#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
151pub struct ListVectorStoreFilesResponse {
152    pub object: String,
153    pub data: Vec<VectorStoreFileObject>,
154    pub first_id: Option<String>,
155    pub last_id: Option<String>,
156    pub has_more: bool,
157}
158
159#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
160pub struct VectorStoreFileObject {
161    /// The identifier, which can be referenced in API endpoints.
162    pub id: String,
163    /// The object type, which is always `vector_store.file`.
164    pub object: String,
165    /// The total vector store usage in bytes. Note that this may be different from the original
166    /// file size.
167    pub usage_bytes: u64,
168    /// The Unix timestamp (in seconds) for when the vector store file was created.
169    pub created_at: u64,
170    /// The ID of the [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) that the [File](https://platform.openai.com/docs/api-reference/files) is attached to.
171    pub vector_store_id: String,
172    /// The status of the vector store file, which can be either `in_progress`, `completed`,
173    /// `cancelled`, or `failed`. The status `completed` indicates that the vector store file is
174    /// ready for use.
175    pub status: VectorStoreFileStatus,
176    /// The last error associated with this vector store file. Will be `null` if there are no
177    /// errors.
178    pub last_error: Option<VectorStoreFileError>,
179    /// The strategy used to chunk the file.
180    pub chunking_strategy: Option<ChunkingStrategyResponse>,
181    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing
182    /// additional information about the object in a structured format, and querying for objects
183    /// via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values
184    /// are strings with a maximum length of 512 characters, booleans, or numbers.
185    pub attributes: Option<VectorStoreFileAttributes>,
186}
187
188#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
189#[serde(rename_all = "snake_case")]
190pub enum VectorStoreFileStatus {
191    InProgress,
192    Completed,
193    Cancelled,
194    Failed,
195}
196
197#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
198pub struct VectorStoreFileError {
199    pub code: VectorStoreFileErrorCode,
200    /// A human-readable description of the error.
201    pub message: String,
202}
203
204#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
205#[serde(rename_all = "snake_case")]
206pub enum VectorStoreFileErrorCode {
207    ServerError,
208    UnsupportedFile,
209    InvalidFile,
210}
211
212#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
213#[serde(tag = "type")]
214#[serde(rename_all = "lowercase")]
215pub enum ChunkingStrategyResponse {
216    /// This is returned when the chunking strategy is unknown. Typically, this is because the file
217    /// was indexed before the `chunking_strategy` concept was introduced in the API.
218    #[serde(rename = "other")]
219    Other,
220    #[serde(rename = "static")]
221    Static { r#static: StaticChunkingStrategy },
222}
223
224#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
225#[serde(transparent)]
226pub struct VectorStoreFileAttributes(pub HashMap<String, AttributeValue>);
227
228impl From<HashMap<String, AttributeValue>> for VectorStoreFileAttributes {
229    fn from(attributes: HashMap<String, AttributeValue>) -> Self {
230        Self(attributes)
231    }
232}
233
234#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
235#[builder(name = "CreateVectorStoreFileRequestArgs")]
236#[builder(pattern = "mutable")]
237#[builder(setter(into, strip_option), default)]
238#[builder(derive(Debug))]
239#[builder(build_fn(error = "OpenAIError"))]
240pub struct CreateVectorStoreFileRequest {
241    /// A [File](https://platform.openai.com/docs/api-reference/files) ID that the vector store should use. Useful for tools like `file_search` that can access files.
242    pub file_id: String,
243    #[serde(skip_serializing_if = "Option::is_none")]
244    pub chunking_strategy: Option<ChunkingStrategyRequestParam>,
245    #[serde(skip_serializing_if = "Option::is_none")]
246    pub attributes: Option<VectorStoreFileAttributes>,
247}
248
249#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
250pub struct DeleteVectorStoreFileResponse {
251    pub id: String,
252    pub object: String,
253    pub deleted: bool,
254}
255
256#[derive(Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)]
257#[builder(name = "CreateVectorStoreFileBatchRequestArgs")]
258#[builder(pattern = "mutable")]
259#[builder(setter(into, strip_option), default)]
260#[builder(derive(Debug))]
261#[builder(build_fn(error = "OpenAIError"))]
262pub struct CreateVectorStoreFileBatchRequest {
263    /// A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that the vector store
264    /// should use. Useful for tools like `file_search` that can access files. If `attributes` or
265    /// `chunking_strategy` are provided, they will be applied to all files in the batch. Mutually
266    /// exclusive with `files`.
267    #[serde(skip_serializing_if = "Option::is_none")]
268    pub file_ids: Option<Vec<String>>, // minItems: 1, maxItems: 500
269    /// A list of objects that each include a `file_id` plus optional `attributes` or
270    /// `chunking_strategy`. Use this when you need to override metadata for specific files.
271    /// The global `attributes` or `chunking_strategy` will be ignored and must be specified
272    /// for each file. Mutually exclusive with `file_ids`.
273    #[serde(skip_serializing_if = "Option::is_none")]
274    pub files: Option<Vec<CreateVectorStoreFileRequest>>,
275    #[serde(skip_serializing_if = "Option::is_none")]
276    pub chunking_strategy: Option<ChunkingStrategyRequestParam>,
277    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing
278    /// additional information about the object in a structured format, and querying for objects
279    /// via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values
280    /// are strings with a maximum length of 512 characters, booleans, or numbers.
281    #[serde(skip_serializing_if = "Option::is_none")]
282    pub attributes: Option<VectorStoreFileAttributes>,
283}
284
285#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
286#[serde(rename_all = "snake_case")]
287pub enum VectorStoreFileBatchStatus {
288    InProgress,
289    Completed,
290    Cancelled,
291    Failed,
292}
293
294#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
295pub struct VectorStoreFileBatchCounts {
296    /// The number of files that are currently being processed.
297    pub in_progress: u32,
298    /// The number of files that have been processed.
299    pub completed: u32,
300    /// The number of files that have failed to process.
301    pub failed: u32,
302    /// The number of files that were cancelled.
303    pub cancelled: u32,
304    /// The total number of files.
305    pub total: u32,
306}
307
308/// A batch of files attached to a vector store.
309#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
310pub struct VectorStoreFileBatchObject {
311    /// The identifier, which can be referenced in API endpoints.
312    pub id: String,
313    /// The object type, which is always `vector_store.files_batch`.
314    pub object: String,
315    /// The Unix timestamp (in seconds) for when the vector store files batch was created.
316    pub created_at: u64,
317    /// The ID of the [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) that the [File](https://platform.openai.com/docs/api-reference/files) is attached to.
318    pub vector_store_id: String,
319    /// The status of the vector store files batch, which can be either `in_progress`, `completed`,
320    /// `cancelled` or `failed`.
321    pub status: VectorStoreFileBatchStatus,
322    pub file_counts: VectorStoreFileBatchCounts,
323}
324
325/// Represents the parsed content of a vector store file.
326#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
327pub struct VectorStoreFileContentResponse {
328    /// The object type, which is always `vector_store.file_content.page`
329    pub object: String,
330
331    /// Parsed content of the file.
332    pub data: Vec<VectorStoreFileContentObject>,
333
334    /// Indicates if there are more content pages to fetch.
335    pub has_more: bool,
336
337    /// The token for the next page, if any.
338    pub next_page: Option<String>,
339}
340
341/// Represents the parsed content of a vector store file.
342#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
343pub struct VectorStoreFileContentObject {
344    /// The content type (currently only `"text"`)
345    pub kind: String,
346
347    /// The text content
348    pub text: String,
349}
350
351#[derive(Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)]
352#[builder(name = "VectorStoreSearchRequestArgs")]
353#[builder(pattern = "mutable")]
354#[builder(setter(into, strip_option), default)]
355#[builder(derive(Debug))]
356#[builder(build_fn(error = "OpenAIError"))]
357pub struct VectorStoreSearchRequest {
358    /// A query string for a search.
359    pub query: VectorStoreSearchQuery,
360
361    /// Whether to rewrite the natural language query for vector search.
362    #[serde(skip_serializing_if = "Option::is_none")]
363    pub rewrite_query: Option<bool>,
364
365    /// The maximum number of results to return. This number should be between 1 and 50 inclusive.
366    #[serde(skip_serializing_if = "Option::is_none")]
367    pub max_num_results: Option<u8>,
368
369    /// A filter to apply based on file attributes.
370    #[serde(skip_serializing_if = "Option::is_none")]
371    pub filters: Option<Filter>,
372
373    /// Ranking options for search.
374    #[serde(skip_serializing_if = "Option::is_none")]
375    pub ranking_options: Option<RankingOptions>,
376}
377
378#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
379#[serde(untagged)]
380pub enum VectorStoreSearchQuery {
381    /// A single query to search for.
382    Text(String),
383    /// A list of queries to search for.
384    Array(Vec<String>),
385}
386
387impl Default for VectorStoreSearchQuery {
388    fn default() -> Self {
389        Self::Text(String::new())
390    }
391}
392
393impl From<String> for VectorStoreSearchQuery {
394    fn from(query: String) -> Self {
395        Self::Text(query)
396    }
397}
398
399impl From<&str> for VectorStoreSearchQuery {
400    fn from(query: &str) -> Self {
401        Self::Text(query.to_string())
402    }
403}
404
405impl From<Vec<String>> for VectorStoreSearchQuery {
406    fn from(query: Vec<String>) -> Self {
407        Self::Array(query)
408    }
409}
410
411/// The value to compare against the attribute key; supports string, number, or boolean types.
412#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
413#[serde(untagged)]
414pub enum AttributeValue {
415    String(String),
416    Number(i64),
417    Boolean(bool),
418}
419
420impl From<String> for AttributeValue {
421    fn from(value: String) -> Self {
422        Self::String(value)
423    }
424}
425
426impl From<i64> for AttributeValue {
427    fn from(value: i64) -> Self {
428        Self::Number(value)
429    }
430}
431
432impl From<bool> for AttributeValue {
433    fn from(value: bool) -> Self {
434        Self::Boolean(value)
435    }
436}
437
438impl From<&str> for AttributeValue {
439    fn from(value: &str) -> Self {
440        Self::String(value.to_string())
441    }
442}
443
444/// Ranking options for search.
445#[derive(Debug, Serialize, Default, Deserialize, Clone, PartialEq)]
446pub struct RankingOptions {
447    #[serde(skip_serializing_if = "Option::is_none")]
448    pub ranker: Option<Ranker>,
449
450    #[serde(skip_serializing_if = "Option::is_none")]
451    pub score_threshold: Option<f32>,
452}
453
454#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
455pub enum Ranker {
456    /// Enable re-ranking; set to `none` to disable, which can help reduce latency.
457    #[serde(rename = "none")]
458    None,
459    #[serde(rename = "auto")]
460    Auto,
461    #[serde(rename = "default-2024-11-15")]
462    Default20241115,
463}
464
465#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
466pub struct VectorStoreSearchResultsPage {
467    /// The object type, which is always `vector_store.search_results.page`.
468    pub object: String,
469
470    /// The query used for this search.
471    pub search_query: Vec<String>,
472
473    /// The list of search result items.
474    pub data: Vec<VectorStoreSearchResultItem>,
475
476    /// Indicates if there are more results to fetch.
477    pub has_more: bool,
478
479    /// The token for the next page, if any.
480    pub next_page: Option<String>,
481}
482
483#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
484pub struct VectorStoreSearchResultItem {
485    /// The ID of the vector store file.
486    pub file_id: String,
487
488    /// The name of the vector store file.
489    pub filename: String,
490
491    /// The similarity score for the result.
492    pub score: f32, // minimum: 0, maximum: 1
493
494    /// Attributes of the vector store file.
495    pub attributes: VectorStoreFileAttributes,
496
497    /// Content chunks from the file.
498    pub content: Vec<VectorStoreSearchResultContentObject>,
499}
500
501#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
502pub struct VectorStoreSearchResultContentObject {
503    /// The type of content
504    pub kind: String,
505
506    /// The text content returned from search.
507    pub text: String,
508}
509
510#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
511pub struct UpdateVectorStoreFileAttributesRequest {
512    pub attributes: VectorStoreFileAttributes,
513}