xai_openapi/
search.rs

1//! Document search API types for `/v1/documents/search` endpoint.
2
3use serde::{Deserialize, Serialize};
4
5use crate::prelude::*;
6
7/// `SearchRequest` defines the request to search for documents.
8#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
9pub struct SearchRequest {
10    /// The query to search for which will be embedded using the
11    /// same embedding model as the one used for the source to query.
12    pub query: String,
13
14    /// The source to query.
15    pub source: DocumentsSource,
16
17    /// User-defined instructions to be included in the search query.
18    #[serde(skip_serializing_if = "Option::is_none")]
19    pub instructions: Option<String>,
20
21    /// The number of chunks to return. Defaults to 10.
22    #[serde(skip_serializing_if = "Option::is_none")]
23    pub limit: Option<i32>,
24
25    /// Deprecated: Metric now comes from collection creation.
26    #[serde(skip_serializing_if = "Option::is_none")]
27    pub ranking_metric: Option<RankingMetric>,
28
29    /// How to perform the document search. Defaults to hybrid retrieval.
30    #[serde(skip_serializing_if = "Option::is_none")]
31    pub retrieval_mode: Option<RetrievalMode>,
32}
33
34/// `SearchResponse` defines the response to a search request.
35#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
36pub struct SearchResponse {
37    /// The search matches.
38    pub matches: Vec<SearchMatch>,
39}
40
41/// `SearchMatch` defines a single match from a search request.
42#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
43pub struct SearchMatch {
44    /// The document ID.
45    pub file_id: String,
46
47    /// The chunk ID.
48    pub chunk_id: String,
49
50    /// The chunk content.
51    pub chunk_content: String,
52
53    /// The relevance score.
54    pub score: f32,
55
56    /// The collection ID(s).
57    pub collection_ids: Vec<String>,
58
59    /// Metadata fields belonging to the document of this chunk.
60    #[serde(default)]
61    pub fields: HashMap<String, String>,
62}
63
64/// `DocumentsSource` defines the source of documents to search over.
65#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
66pub struct DocumentsSource {
67    /// The collection IDs to search in.
68    pub collection_ids: Vec<String>,
69}
70
71/// Parameters to control realtime data.
72#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
73pub struct SearchParameters {
74    /// Date from which to consider the results in ISO-8601 YYYY-MM-DD format.
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub from_date: Option<String>,
77
78    /// Date up to which to consider the results in ISO-8601 YYYY-MM-DD format.
79    #[serde(skip_serializing_if = "Option::is_none")]
80    pub to_date: Option<String>,
81
82    /// Maximum number of search results to use. Defaults to 15.
83    #[serde(skip_serializing_if = "Option::is_none")]
84    pub max_search_results: Option<i32>,
85
86    /// Choose the mode to query realtime data: `off`, `on` (default), or `auto`.
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub mode: Option<String>,
89
90    /// Whether to return citations in the response or not. Defaults to true.
91    #[serde(skip_serializing_if = "Option::is_none")]
92    pub return_citations: Option<bool>,
93
94    /// List of sources to search in.
95    #[serde(skip_serializing_if = "Option::is_none")]
96    pub sources: Option<Vec<SearchSource>>,
97}
98
99/// Search source for realtime data.
100#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
101#[serde(tag = "type", rename_all = "lowercase")]
102pub enum SearchSource {
103    /// Search X (Twitter).
104    X {
105        /// List of X handles to exclude from the search results.
106        #[serde(skip_serializing_if = "Option::is_none")]
107        excluded_x_handles: Option<Vec<String>>,
108
109        /// X Handles of the users from whom to consider the posts.
110        #[serde(skip_serializing_if = "Option::is_none")]
111        included_x_handles: Option<Vec<String>>,
112
113        /// DEPRECATED: Use `included_x_handles` instead.
114        #[serde(skip_serializing_if = "Option::is_none")]
115        x_handles: Option<Vec<String>>,
116
117        /// The minimum favorite count of the X posts to consider.
118        #[serde(skip_serializing_if = "Option::is_none")]
119        post_favorite_count: Option<i32>,
120
121        /// The minimum view count of the X posts to consider.
122        #[serde(skip_serializing_if = "Option::is_none")]
123        post_view_count: Option<i32>,
124    },
125    /// Search the web.
126    Web {
127        /// List of websites to allow in the search results (whitelist).
128        #[serde(skip_serializing_if = "Option::is_none")]
129        allowed_websites: Option<Vec<String>>,
130
131        /// List of websites to exclude from the search results.
132        #[serde(skip_serializing_if = "Option::is_none")]
133        excluded_websites: Option<Vec<String>>,
134
135        /// ISO alpha-2 code of the country for filtering results.
136        #[serde(skip_serializing_if = "Option::is_none")]
137        country: Option<String>,
138
139        /// If set to true, mature content won't be considered. Defaults to true.
140        #[serde(skip_serializing_if = "Option::is_none")]
141        safe_search: Option<bool>,
142    },
143    /// Search news sources.
144    News {
145        /// ISO alpha-2 code of the country for filtering results.
146        #[serde(skip_serializing_if = "Option::is_none")]
147        country: Option<String>,
148
149        /// List of websites to exclude from the search results.
150        #[serde(skip_serializing_if = "Option::is_none")]
151        excluded_websites: Option<Vec<String>>,
152    },
153    /// Search knowledge bases.
154    Rss {
155        /// List of RSS feed URLs to search.
156        #[serde(skip_serializing_if = "Option::is_none")]
157        urls: Option<Vec<String>>,
158    },
159}
160
161impl Default for SearchSource {
162    fn default() -> Self {
163        SearchSource::Web {
164            allowed_websites: None,
165            excluded_websites: None,
166            country: None,
167            safe_search: None,
168        }
169    }
170}
171
172/// Deprecated: Metric now comes from collection creation.
173#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
174#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
175pub enum RankingMetric {
176    /// Unknown ranking metric.
177    #[default]
178    RankingMetricUnknown,
179    /// L2 distance metric.
180    RankingMetricL2Distance,
181    /// Cosine similarity metric.
182    RankingMetricCosineSimilarity,
183}
184
185/// Retrieval mode configuration for document search.
186#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
187#[serde(tag = "type", rename_all = "lowercase")]
188pub enum RetrievalMode {
189    /// Hybrid search combining keyword and semantic search.
190    Hybrid {
191        /// Which reranker to use to limit results to the desired value.
192        #[serde(skip_serializing_if = "Option::is_none")]
193        reranker: Option<HybridReranker>,
194
195        /// Additional multiplier to requested search limit. Valid range is [1, 100].
196        #[serde(skip_serializing_if = "Option::is_none")]
197        search_multiplier: Option<i32>,
198    },
199    /// Semantic search using dense embeddings.
200    Semantic {
201        /// Optional reranker, always used when doing search across multiple collections.
202        #[serde(skip_serializing_if = "Option::is_none")]
203        reranker: Option<RerankerModel>,
204    },
205    /// Keyword search using sparse embeddings.
206    Keyword {
207        /// Optional reranker, always used when doing search across multiple collections.
208        #[serde(skip_serializing_if = "Option::is_none")]
209        reranker: Option<RerankerModel>,
210    },
211}
212
213impl Default for RetrievalMode {
214    fn default() -> Self {
215        RetrievalMode::Hybrid {
216            reranker: None,
217            search_multiplier: None,
218        }
219    }
220}
221
222/// Reranker configuration for hybrid retrieval.
223#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
224#[serde(tag = "type", rename_all = "snake_case")]
225pub enum HybridReranker {
226    /// Use a reranker model to perform the reranking.
227    RerankerModel {
228        /// Instructions for the reranking model.
229        #[serde(skip_serializing_if = "Option::is_none")]
230        instructions: Option<String>,
231
232        /// The model to use for reranking.
233        #[serde(skip_serializing_if = "Option::is_none")]
234        model: Option<String>,
235    },
236    /// Use RRF (Reciprocal Rank Fusion) to perform the reranking.
237    Rrf {
238        /// Weight for embedding (dense) search results. Between 0 and 1, defaults to 0.5.
239        #[serde(skip_serializing_if = "Option::is_none")]
240        embedding_weight: Option<f32>,
241
242        /// Weight for keyword (sparse) search results. Between 0 and 1, defaults to 0.5.
243        #[serde(skip_serializing_if = "Option::is_none")]
244        text_weight: Option<f32>,
245
246        /// The RRF constant k used in the formula. Defaults to 60.
247        #[serde(skip_serializing_if = "Option::is_none")]
248        k: Option<i32>,
249    },
250}
251
252impl Default for HybridReranker {
253    fn default() -> Self {
254        HybridReranker::Rrf {
255            embedding_weight: None,
256            text_weight: None,
257            k: None,
258        }
259    }
260}
261
262/// Configuration for model-based reranking.
263#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
264pub struct RerankerModel {
265    /// Instructions for the reranking model.
266    #[serde(skip_serializing_if = "Option::is_none")]
267    pub instructions: Option<String>,
268
269    /// The model to use for reranking.
270    #[serde(skip_serializing_if = "Option::is_none")]
271    pub model: Option<String>,
272}
273
274/// Configuration for reciprocal rank fusion (RRF) reranking.
275#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
276pub struct ReciprocalRankFusion {
277    /// Weight for embedding (dense) search results. Between 0 and 1, defaults to 0.5.
278    #[serde(skip_serializing_if = "Option::is_none")]
279    pub embedding_weight: Option<f32>,
280
281    /// Weight for keyword (sparse) search results. Between 0 and 1, defaults to 0.5.
282    #[serde(skip_serializing_if = "Option::is_none")]
283    pub text_weight: Option<f32>,
284
285    /// The RRF constant k used in the reciprocal rank fusion formula. Defaults to 60.
286    #[serde(skip_serializing_if = "Option::is_none")]
287    pub k: Option<i32>,
288}