Skip to main content

dakera_client/
client.rs

1//! Dakera client implementation
2
3use reqwest::{Client, StatusCode};
4use std::sync::{Arc, Mutex};
5use std::time::Duration;
6use tracing::{debug, instrument};
7
8use serde::Deserialize;
9
10use crate::error::{ClientError, Result, ServerErrorCode};
11use crate::types::*;
12
13/// Default timeout for requests
14const DEFAULT_TIMEOUT_SECS: u64 = 30;
15
16/// Dakera client for interacting with the vector database
17#[derive(Debug, Clone)]
18pub struct DakeraClient {
19    /// HTTP client
20    pub(crate) client: Client,
21    /// Base URL of the Dakera server
22    pub(crate) base_url: String,
23    /// Retry configuration (wired into API call sites in a follow-up; suppressed until then)
24    #[allow(dead_code)]
25    pub(crate) retry_config: RetryConfig,
26    /// OPS-1: last seen rate-limit headers (shared across clones)
27    pub(crate) last_rate_limit: Arc<Mutex<Option<RateLimitHeaders>>>,
28}
29
30impl DakeraClient {
31    /// Create a new client with the given base URL
32    ///
33    /// # Example
34    ///
35    /// ```rust,no_run
36    /// use dakera_client::DakeraClient;
37    ///
38    /// let client = DakeraClient::new("http://localhost:3000").unwrap();
39    /// ```
40    pub fn new(base_url: impl Into<String>) -> Result<Self> {
41        DakeraClientBuilder::new(base_url).build()
42    }
43
44    /// Create a new client builder for more configuration options
45    pub fn builder(base_url: impl Into<String>) -> DakeraClientBuilder {
46        DakeraClientBuilder::new(base_url)
47    }
48
49    // ========================================================================
50    // Health & Status
51    // ========================================================================
52
53    /// Check server health
54    #[instrument(skip(self))]
55    pub async fn health(&self) -> Result<HealthResponse> {
56        let url = format!("{}/health", self.base_url);
57        let response = self.client.get(&url).send().await?;
58
59        if response.status().is_success() {
60            Ok(response.json().await?)
61        } else {
62            // Health endpoint might return simple OK
63            Ok(HealthResponse {
64                healthy: true,
65                version: None,
66                uptime_seconds: None,
67            })
68        }
69    }
70
71    /// Check if server is ready
72    #[instrument(skip(self))]
73    pub async fn ready(&self) -> Result<ReadinessResponse> {
74        let url = format!("{}/health/ready", self.base_url);
75        let response = self.client.get(&url).send().await?;
76
77        if response.status().is_success() {
78            Ok(response.json().await?)
79        } else {
80            Ok(ReadinessResponse {
81                ready: false,
82                components: None,
83            })
84        }
85    }
86
87    /// Check if server is live
88    #[instrument(skip(self))]
89    pub async fn live(&self) -> Result<bool> {
90        let url = format!("{}/health/live", self.base_url);
91        let response = self.client.get(&url).send().await?;
92        Ok(response.status().is_success())
93    }
94
95    // ========================================================================
96    // Namespace Operations
97    // ========================================================================
98
99    /// List all namespaces
100    #[instrument(skip(self))]
101    pub async fn list_namespaces(&self) -> Result<Vec<String>> {
102        let url = format!("{}/v1/namespaces", self.base_url);
103        let response = self.client.get(&url).send().await?;
104        self.handle_response::<ListNamespacesResponse>(response)
105            .await
106            .map(|r| r.namespaces)
107    }
108
109    /// Get namespace information
110    #[instrument(skip(self))]
111    pub async fn get_namespace(&self, namespace: &str) -> Result<NamespaceInfo> {
112        let url = format!("{}/v1/namespaces/{}", self.base_url, namespace);
113        let response = self.client.get(&url).send().await?;
114        self.handle_response(response).await
115    }
116
117    /// Create a new namespace
118    #[instrument(skip(self, request))]
119    pub async fn create_namespace(
120        &self,
121        namespace: &str,
122        request: CreateNamespaceRequest,
123    ) -> Result<NamespaceInfo> {
124        let url = format!("{}/v1/namespaces/{}", self.base_url, namespace);
125        let response = self.client.post(&url).json(&request).send().await?;
126        self.handle_response(response).await
127    }
128
129    /// Create or update a namespace configuration (upsert semantics — v0.6.0).
130    ///
131    /// Creates the namespace if it does not exist, or updates its distance-metric
132    /// configuration if it already exists.  Dimension changes are rejected to
133    /// prevent silent data corruption.  Requires `Scope::Write`.
134    #[instrument(skip(self, request), fields(namespace = %namespace))]
135    pub async fn configure_namespace(
136        &self,
137        namespace: &str,
138        request: ConfigureNamespaceRequest,
139    ) -> Result<ConfigureNamespaceResponse> {
140        let url = format!("{}/v1/namespaces/{}", self.base_url, namespace);
141        let response = self.client.put(&url).json(&request).send().await?;
142        self.handle_response(response).await
143    }
144
145    // ========================================================================
146    // Vector Operations
147    // ========================================================================
148
149    /// Upsert vectors into a namespace
150    #[instrument(skip(self, request), fields(vector_count = request.vectors.len()))]
151    pub async fn upsert(&self, namespace: &str, request: UpsertRequest) -> Result<UpsertResponse> {
152        let url = format!("{}/v1/namespaces/{}/vectors", self.base_url, namespace);
153        debug!(
154            "Upserting {} vectors to {}",
155            request.vectors.len(),
156            namespace
157        );
158
159        let response = self.client.post(&url).json(&request).send().await?;
160        self.handle_response(response).await
161    }
162
163    /// Upsert a single vector (convenience method)
164    #[instrument(skip(self, vector))]
165    pub async fn upsert_one(&self, namespace: &str, vector: Vector) -> Result<UpsertResponse> {
166        self.upsert(namespace, UpsertRequest::single(vector)).await
167    }
168
169    /// Upsert vectors in column format (Turbopuffer-inspired)
170    ///
171    /// This format is more efficient for bulk upserts as it avoids repeating
172    /// field names for each vector. All arrays must have equal length.
173    ///
174    /// # Example
175    ///
176    /// ```rust,no_run
177    /// use dakera_client::{DakeraClient, ColumnUpsertRequest};
178    ///
179    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
180    /// let client = DakeraClient::new("http://localhost:3000")?;
181    ///
182    /// let request = ColumnUpsertRequest::new(
183    ///     vec!["id1".to_string(), "id2".to_string(), "id3".to_string()],
184    ///     vec![
185    ///         vec![0.1, 0.2, 0.3],
186    ///         vec![0.4, 0.5, 0.6],
187    ///         vec![0.7, 0.8, 0.9],
188    ///     ],
189    /// )
190    /// .with_attribute("category", vec![
191    ///     serde_json::json!("A"),
192    ///     serde_json::json!("B"),
193    ///     serde_json::json!("A"),
194    /// ]);
195    ///
196    /// let response = client.upsert_columns("my-namespace", request).await?;
197    /// println!("Upserted {} vectors", response.upserted_count);
198    /// # Ok(())
199    /// # }
200    /// ```
201    #[instrument(skip(self, request), fields(namespace = %namespace, count = request.ids.len()))]
202    pub async fn upsert_columns(
203        &self,
204        namespace: &str,
205        request: ColumnUpsertRequest,
206    ) -> Result<UpsertResponse> {
207        let url = format!(
208            "{}/v1/namespaces/{}/upsert-columns",
209            self.base_url, namespace
210        );
211        debug!(
212            "Upserting {} vectors in column format to {}",
213            request.ids.len(),
214            namespace
215        );
216
217        let response = self.client.post(&url).json(&request).send().await?;
218        self.handle_response(response).await
219    }
220
221    /// Query for similar vectors
222    #[instrument(skip(self, request), fields(top_k = request.top_k))]
223    pub async fn query(&self, namespace: &str, request: QueryRequest) -> Result<QueryResponse> {
224        let url = format!("{}/v1/namespaces/{}/query", self.base_url, namespace);
225        debug!(
226            "Querying namespace {} for top {} results",
227            namespace, request.top_k
228        );
229
230        let response = self.client.post(&url).json(&request).send().await?;
231        self.handle_response(response).await
232    }
233
234    /// Simple query with just a vector and top_k (convenience method)
235    #[instrument(skip(self, vector))]
236    pub async fn query_simple(
237        &self,
238        namespace: &str,
239        vector: Vec<f32>,
240        top_k: u32,
241    ) -> Result<QueryResponse> {
242        self.query(namespace, QueryRequest::new(vector, top_k))
243            .await
244    }
245
246    /// Execute multiple queries in a single request
247    ///
248    /// This allows executing multiple vector similarity queries in parallel,
249    /// which is more efficient than making separate requests.
250    ///
251    /// # Example
252    ///
253    /// ```rust,no_run
254    /// use dakera_client::{DakeraClient, BatchQueryRequest, BatchQueryItem};
255    ///
256    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
257    /// let client = DakeraClient::new("http://localhost:3000")?;
258    ///
259    /// let request = BatchQueryRequest::new(vec![
260    ///     BatchQueryItem::new(vec![0.1, 0.2, 0.3], 5).with_id("query1"),
261    ///     BatchQueryItem::new(vec![0.4, 0.5, 0.6], 10).with_id("query2"),
262    /// ]);
263    ///
264    /// let response = client.batch_query("my-namespace", request).await?;
265    /// println!("Executed {} queries in {}ms", response.query_count, response.total_latency_ms);
266    /// # Ok(())
267    /// # }
268    /// ```
269    #[instrument(skip(self, request), fields(namespace = %namespace, query_count = request.queries.len()))]
270    pub async fn batch_query(
271        &self,
272        namespace: &str,
273        request: BatchQueryRequest,
274    ) -> Result<BatchQueryResponse> {
275        let url = format!("{}/v1/namespaces/{}/batch-query", self.base_url, namespace);
276        debug!(
277            "Batch querying namespace {} with {} queries",
278            namespace,
279            request.queries.len()
280        );
281
282        let response = self.client.post(&url).json(&request).send().await?;
283        self.handle_response(response).await
284    }
285
286    /// Delete vectors by ID
287    #[instrument(skip(self, request), fields(id_count = request.ids.len()))]
288    pub async fn delete(&self, namespace: &str, request: DeleteRequest) -> Result<DeleteResponse> {
289        let url = format!(
290            "{}/v1/namespaces/{}/vectors/delete",
291            self.base_url, namespace
292        );
293        debug!("Deleting {} vectors from {}", request.ids.len(), namespace);
294
295        let response = self.client.post(&url).json(&request).send().await?;
296        self.handle_response(response).await
297    }
298
299    /// Delete a single vector by ID (convenience method)
300    #[instrument(skip(self))]
301    pub async fn delete_one(&self, namespace: &str, id: &str) -> Result<DeleteResponse> {
302        self.delete(namespace, DeleteRequest::single(id)).await
303    }
304
305    // ========================================================================
306    // Full-Text Search Operations
307    // ========================================================================
308
309    /// Index documents for full-text search
310    #[instrument(skip(self, request), fields(doc_count = request.documents.len()))]
311    pub async fn index_documents(
312        &self,
313        namespace: &str,
314        request: IndexDocumentsRequest,
315    ) -> Result<IndexDocumentsResponse> {
316        let url = format!(
317            "{}/v1/namespaces/{}/fulltext/index",
318            self.base_url, namespace
319        );
320        debug!(
321            "Indexing {} documents in {}",
322            request.documents.len(),
323            namespace
324        );
325
326        let response = self.client.post(&url).json(&request).send().await?;
327        self.handle_response(response).await
328    }
329
330    /// Index a single document (convenience method)
331    #[instrument(skip(self, document))]
332    pub async fn index_document(
333        &self,
334        namespace: &str,
335        document: Document,
336    ) -> Result<IndexDocumentsResponse> {
337        self.index_documents(
338            namespace,
339            IndexDocumentsRequest {
340                documents: vec![document],
341            },
342        )
343        .await
344    }
345
346    /// Perform full-text search
347    #[instrument(skip(self, request))]
348    pub async fn fulltext_search(
349        &self,
350        namespace: &str,
351        request: FullTextSearchRequest,
352    ) -> Result<FullTextSearchResponse> {
353        let url = format!(
354            "{}/v1/namespaces/{}/fulltext/search",
355            self.base_url, namespace
356        );
357        debug!("Full-text search in {} for: {}", namespace, request.query);
358
359        let response = self.client.post(&url).json(&request).send().await?;
360        self.handle_response(response).await
361    }
362
363    /// Simple full-text search (convenience method)
364    #[instrument(skip(self))]
365    pub async fn search_text(
366        &self,
367        namespace: &str,
368        query: &str,
369        top_k: u32,
370    ) -> Result<FullTextSearchResponse> {
371        self.fulltext_search(namespace, FullTextSearchRequest::new(query, top_k))
372            .await
373    }
374
375    /// Get full-text index statistics
376    #[instrument(skip(self))]
377    pub async fn fulltext_stats(&self, namespace: &str) -> Result<FullTextStats> {
378        let url = format!(
379            "{}/v1/namespaces/{}/fulltext/stats",
380            self.base_url, namespace
381        );
382        let response = self.client.get(&url).send().await?;
383        self.handle_response(response).await
384    }
385
386    /// Delete documents from full-text index
387    #[instrument(skip(self, request))]
388    pub async fn fulltext_delete(
389        &self,
390        namespace: &str,
391        request: DeleteRequest,
392    ) -> Result<DeleteResponse> {
393        let url = format!(
394            "{}/v1/namespaces/{}/fulltext/delete",
395            self.base_url, namespace
396        );
397        let response = self.client.post(&url).json(&request).send().await?;
398        self.handle_response(response).await
399    }
400
401    // ========================================================================
402    // Hybrid Search Operations
403    // ========================================================================
404
405    /// Perform hybrid search (vector + full-text)
406    #[instrument(skip(self, request), fields(top_k = request.top_k))]
407    pub async fn hybrid_search(
408        &self,
409        namespace: &str,
410        request: HybridSearchRequest,
411    ) -> Result<HybridSearchResponse> {
412        let url = format!("{}/v1/namespaces/{}/hybrid", self.base_url, namespace);
413        debug!(
414            "Hybrid search in {} with vector_weight={}",
415            namespace, request.vector_weight
416        );
417
418        let response = self.client.post(&url).json(&request).send().await?;
419        self.handle_response(response).await
420    }
421
422    // ========================================================================
423    // Multi-Vector Search Operations
424    // ========================================================================
425
426    /// Multi-vector search with positive/negative vectors and MMR
427    ///
428    /// This performs semantic search using multiple positive vectors (to search towards)
429    /// and optional negative vectors (to search away from). Supports MMR (Maximal Marginal
430    /// Relevance) for result diversity.
431    ///
432    /// # Example
433    ///
434    /// ```rust,no_run
435    /// use dakera_client::{DakeraClient, MultiVectorSearchRequest};
436    ///
437    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
438    /// let client = DakeraClient::new("http://localhost:3000")?;
439    ///
440    /// // Search towards multiple concepts, away from others
441    /// let request = MultiVectorSearchRequest::new(vec![
442    ///     vec![0.1, 0.2, 0.3],  // positive vector 1
443    ///     vec![0.4, 0.5, 0.6],  // positive vector 2
444    /// ])
445    /// .with_negative_vectors(vec![
446    ///     vec![0.7, 0.8, 0.9],  // negative vector
447    /// ])
448    /// .with_top_k(10)
449    /// .with_mmr(0.7);  // Enable MMR with lambda=0.7
450    ///
451    /// let response = client.multi_vector_search("my-namespace", request).await?;
452    /// for result in response.results {
453    ///     println!("ID: {}, Score: {}", result.id, result.score);
454    /// }
455    /// # Ok(())
456    /// # }
457    /// ```
458    #[instrument(skip(self, request), fields(namespace = %namespace))]
459    pub async fn multi_vector_search(
460        &self,
461        namespace: &str,
462        request: MultiVectorSearchRequest,
463    ) -> Result<MultiVectorSearchResponse> {
464        let url = format!("{}/v1/namespaces/{}/multi-vector", self.base_url, namespace);
465        debug!(
466            "Multi-vector search in {} with {} positive vectors",
467            namespace,
468            request.positive_vectors.len()
469        );
470
471        let response = self.client.post(&url).json(&request).send().await?;
472        self.handle_response(response).await
473    }
474
475    // ========================================================================
476    // Aggregation Operations
477    // ========================================================================
478
479    /// Aggregate vectors with grouping (Turbopuffer-inspired)
480    ///
481    /// This performs aggregation queries on vector metadata, supporting
482    /// count, sum, avg, min, and max operations with optional grouping.
483    ///
484    /// # Example
485    ///
486    /// ```rust,no_run
487    /// use dakera_client::{DakeraClient, AggregationRequest};
488    ///
489    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
490    /// let client = DakeraClient::new("http://localhost:3000")?;
491    ///
492    /// // Count all vectors and sum scores, grouped by category
493    /// let request = AggregationRequest::new()
494    ///     .with_count("total_count")
495    ///     .with_sum("total_score", "score")
496    ///     .with_avg("avg_score", "score")
497    ///     .with_group_by("category");
498    ///
499    /// let response = client.aggregate("my-namespace", request).await?;
500    /// if let Some(groups) = response.aggregation_groups {
501    ///     for group in groups {
502    ///         println!("Group: {:?}", group.group_key);
503    ///     }
504    /// }
505    /// # Ok(())
506    /// # }
507    /// ```
508    #[instrument(skip(self, request), fields(namespace = %namespace))]
509    pub async fn aggregate(
510        &self,
511        namespace: &str,
512        request: AggregationRequest,
513    ) -> Result<AggregationResponse> {
514        let url = format!("{}/v1/namespaces/{}/aggregate", self.base_url, namespace);
515        debug!(
516            "Aggregating in namespace {} with {} aggregations",
517            namespace,
518            request.aggregate_by.len()
519        );
520
521        let response = self.client.post(&url).json(&request).send().await?;
522        self.handle_response(response).await
523    }
524
525    // ========================================================================
526    // Unified Query Operations
527    // ========================================================================
528
529    /// Unified query with flexible ranking options (Turbopuffer-inspired)
530    ///
531    /// This provides a unified API for vector search (ANN/kNN), full-text search (BM25),
532    /// and attribute ordering. Supports combining multiple ranking functions with
533    /// Sum, Max, and Product operators.
534    ///
535    /// # Example
536    ///
537    /// ```rust,no_run
538    /// use dakera_client::{DakeraClient, UnifiedQueryRequest, SortDirection};
539    ///
540    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
541    /// let client = DakeraClient::new("http://localhost:3000")?;
542    ///
543    /// // Vector ANN search
544    /// let request = UnifiedQueryRequest::vector_search(vec![0.1, 0.2, 0.3], 10);
545    /// let response = client.unified_query("my-namespace", request).await?;
546    ///
547    /// // Full-text BM25 search
548    /// let request = UnifiedQueryRequest::fulltext_search("content", "hello world", 10);
549    /// let response = client.unified_query("my-namespace", request).await?;
550    ///
551    /// // Attribute ordering with filter
552    /// let request = UnifiedQueryRequest::attribute_order("timestamp", SortDirection::Desc, 10)
553    ///     .with_filter(serde_json::json!({"category": {"$eq": "science"}}));
554    /// let response = client.unified_query("my-namespace", request).await?;
555    ///
556    /// for result in response.results {
557    ///     println!("ID: {}, Score: {:?}", result.id, result.dist);
558    /// }
559    /// # Ok(())
560    /// # }
561    /// ```
562    #[instrument(skip(self, request), fields(namespace = %namespace))]
563    pub async fn unified_query(
564        &self,
565        namespace: &str,
566        request: UnifiedQueryRequest,
567    ) -> Result<UnifiedQueryResponse> {
568        let url = format!(
569            "{}/v1/namespaces/{}/unified-query",
570            self.base_url, namespace
571        );
572        debug!(
573            "Unified query in namespace {} with top_k={}",
574            namespace, request.top_k
575        );
576
577        let response = self.client.post(&url).json(&request).send().await?;
578        self.handle_response(response).await
579    }
580
581    /// Simple vector search using the unified query API (convenience method)
582    ///
583    /// This is a shortcut for `unified_query` with a vector ANN search.
584    #[instrument(skip(self, vector))]
585    pub async fn unified_vector_search(
586        &self,
587        namespace: &str,
588        vector: Vec<f32>,
589        top_k: usize,
590    ) -> Result<UnifiedQueryResponse> {
591        self.unified_query(namespace, UnifiedQueryRequest::vector_search(vector, top_k))
592            .await
593    }
594
595    /// Simple full-text search using the unified query API (convenience method)
596    ///
597    /// This is a shortcut for `unified_query` with a BM25 full-text search.
598    #[instrument(skip(self))]
599    pub async fn unified_text_search(
600        &self,
601        namespace: &str,
602        field: &str,
603        query: &str,
604        top_k: usize,
605    ) -> Result<UnifiedQueryResponse> {
606        self.unified_query(
607            namespace,
608            UnifiedQueryRequest::fulltext_search(field, query, top_k),
609        )
610        .await
611    }
612
613    // ========================================================================
614    // Query Explain Operations
615    // ========================================================================
616
617    /// Explain query execution plan (similar to SQL EXPLAIN)
618    ///
619    /// This provides detailed information about how a query will be executed,
620    /// including index selection, execution stages, cost estimates, and
621    /// performance recommendations.
622    ///
623    /// # Example
624    ///
625    /// ```rust,no_run
626    /// use dakera_client::{DakeraClient, QueryExplainRequest};
627    ///
628    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
629    /// let client = DakeraClient::new("http://localhost:3000")?;
630    ///
631    /// // Explain a vector search query
632    /// let request = QueryExplainRequest::vector_search(vec![0.1, 0.2, 0.3], 10)
633    ///     .with_verbose();
634    /// let plan = client.explain_query("my-namespace", request).await?;
635    ///
636    /// println!("Query plan: {}", plan.summary);
637    /// println!("Estimated time: {}ms", plan.cost_estimate.estimated_time_ms);
638    ///
639    /// for stage in &plan.stages {
640    ///     println!("Stage {}: {} - {}", stage.order, stage.name, stage.description);
641    /// }
642    ///
643    /// for rec in &plan.recommendations {
644    ///     println!("Recommendation ({}): {}", rec.priority, rec.description);
645    /// }
646    /// # Ok(())
647    /// # }
648    /// ```
649    #[instrument(skip(self, request), fields(namespace = %namespace))]
650    pub async fn explain_query(
651        &self,
652        namespace: &str,
653        request: QueryExplainRequest,
654    ) -> Result<QueryExplainResponse> {
655        let url = format!("{}/v1/namespaces/{}/explain", self.base_url, namespace);
656        debug!(
657            "Explaining query in namespace {} (query_type={:?}, top_k={})",
658            namespace, request.query_type, request.top_k
659        );
660
661        let response = self.client.post(&url).json(&request).send().await?;
662        self.handle_response(response).await
663    }
664
665    // ========================================================================
666    // Cache Warming Operations
667    // ========================================================================
668
669    /// Warm cache for vectors in a namespace
670    ///
671    /// This pre-loads vectors into cache tiers for faster subsequent access.
672    /// Supports priority levels and can run in the background.
673    ///
674    /// # Example
675    ///
676    /// ```rust,no_run
677    /// use dakera_client::{DakeraClient, WarmCacheRequest, WarmingPriority};
678    ///
679    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
680    /// let client = DakeraClient::new("http://localhost:3000")?;
681    ///
682    /// // Warm entire namespace with high priority
683    /// let response = client.warm_cache(
684    ///     WarmCacheRequest::new("my-namespace")
685    ///         .with_priority(WarmingPriority::High)
686    /// ).await?;
687    ///
688    /// println!("Warmed {} entries", response.entries_warmed);
689    /// # Ok(())
690    /// # }
691    /// ```
692    #[instrument(skip(self, request), fields(namespace = %request.namespace, priority = ?request.priority))]
693    pub async fn warm_cache(&self, request: WarmCacheRequest) -> Result<WarmCacheResponse> {
694        let url = format!(
695            "{}/v1/namespaces/{}/cache/warm",
696            self.base_url, request.namespace
697        );
698        debug!(
699            "Warming cache for namespace {} with priority {:?}",
700            request.namespace, request.priority
701        );
702
703        let response = self.client.post(&url).json(&request).send().await?;
704        self.handle_response(response).await
705    }
706
707    /// Warm specific vectors by ID (convenience method)
708    #[instrument(skip(self, vector_ids))]
709    pub async fn warm_vectors(
710        &self,
711        namespace: &str,
712        vector_ids: Vec<String>,
713    ) -> Result<WarmCacheResponse> {
714        self.warm_cache(WarmCacheRequest::new(namespace).with_vector_ids(vector_ids))
715            .await
716    }
717
718    // ========================================================================
719    // Export Operations
720    // ========================================================================
721
722    /// Export vectors from a namespace with pagination
723    ///
724    /// This exports all vectors from a namespace, supporting pagination for
725    /// large datasets. Use the `next_cursor` from the response to fetch
726    /// subsequent pages.
727    ///
728    /// # Example
729    ///
730    /// ```rust,no_run
731    /// use dakera_client::{DakeraClient, ExportRequest};
732    ///
733    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
734    /// let client = DakeraClient::new("http://localhost:3000")?;
735    ///
736    /// // Export first page of vectors
737    /// let mut request = ExportRequest::new().with_top_k(1000);
738    /// let response = client.export("my-namespace", request).await?;
739    ///
740    /// println!("Exported {} vectors", response.returned_count);
741    ///
742    /// // Fetch next page if available
743    /// if let Some(cursor) = response.next_cursor {
744    ///     let next_request = ExportRequest::new().with_cursor(cursor);
745    ///     let next_response = client.export("my-namespace", next_request).await?;
746    /// }
747    /// # Ok(())
748    /// # }
749    /// ```
750    #[instrument(skip(self, request), fields(namespace = %namespace))]
751    pub async fn export(&self, namespace: &str, request: ExportRequest) -> Result<ExportResponse> {
752        let url = format!("{}/v1/namespaces/{}/export", self.base_url, namespace);
753        debug!(
754            "Exporting vectors from namespace {} (top_k={}, cursor={:?})",
755            namespace, request.top_k, request.cursor
756        );
757
758        let response = self.client.post(&url).json(&request).send().await?;
759        self.handle_response(response).await
760    }
761
762    /// Export all vectors from a namespace (convenience method)
763    ///
764    /// This is a simple wrapper that exports with default settings.
765    #[instrument(skip(self))]
766    pub async fn export_all(&self, namespace: &str) -> Result<ExportResponse> {
767        self.export(namespace, ExportRequest::new()).await
768    }
769
770    // ========================================================================
771    // Operations
772    // ========================================================================
773
774    /// Get system diagnostics
775    #[instrument(skip(self))]
776    pub async fn diagnostics(&self) -> Result<SystemDiagnostics> {
777        let url = format!("{}/ops/diagnostics", self.base_url);
778        let response = self.client.get(&url).send().await?;
779        self.handle_response(response).await
780    }
781
782    /// List background jobs
783    #[instrument(skip(self))]
784    pub async fn list_jobs(&self) -> Result<Vec<JobInfo>> {
785        let url = format!("{}/ops/jobs", self.base_url);
786        let response = self.client.get(&url).send().await?;
787        self.handle_response(response).await
788    }
789
790    /// Get a specific job status
791    #[instrument(skip(self))]
792    pub async fn get_job(&self, job_id: &str) -> Result<Option<JobInfo>> {
793        let url = format!("{}/ops/jobs/{}", self.base_url, job_id);
794        let response = self.client.get(&url).send().await?;
795
796        if response.status() == StatusCode::NOT_FOUND {
797            return Ok(None);
798        }
799
800        self.handle_response(response).await.map(Some)
801    }
802
803    /// Trigger index compaction
804    #[instrument(skip(self, request))]
805    pub async fn compact(&self, request: CompactionRequest) -> Result<CompactionResponse> {
806        let url = format!("{}/ops/compact", self.base_url);
807        let response = self.client.post(&url).json(&request).send().await?;
808        self.handle_response(response).await
809    }
810
811    /// Request graceful shutdown
812    #[instrument(skip(self))]
813    pub async fn shutdown(&self) -> Result<()> {
814        let url = format!("{}/ops/shutdown", self.base_url);
815        let response = self.client.post(&url).send().await?;
816
817        if response.status().is_success() {
818            Ok(())
819        } else {
820            let status = response.status().as_u16();
821            let text = response.text().await.unwrap_or_default();
822            Err(ClientError::Server {
823                status,
824                message: text,
825                code: None,
826            })
827        }
828    }
829
830    // ========================================================================
831    // Fetch by ID
832    // ========================================================================
833
834    /// Fetch vectors by their IDs
835    #[instrument(skip(self, request), fields(id_count = request.ids.len()))]
836    pub async fn fetch(&self, namespace: &str, request: FetchRequest) -> Result<FetchResponse> {
837        let url = format!("{}/v1/namespaces/{}/fetch", self.base_url, namespace);
838        debug!("Fetching {} vectors from {}", request.ids.len(), namespace);
839        let response = self.client.post(&url).json(&request).send().await?;
840        self.handle_response(response).await
841    }
842
843    /// Fetch vectors by IDs (convenience method)
844    #[instrument(skip(self))]
845    pub async fn fetch_by_ids(&self, namespace: &str, ids: &[&str]) -> Result<Vec<Vector>> {
846        let request = FetchRequest::new(ids.iter().map(|s| s.to_string()).collect());
847        self.fetch(namespace, request).await.map(|r| r.vectors)
848    }
849
850    // ========================================================================
851    // Text Auto-Embedding Operations
852    // ========================================================================
853
854    /// Upsert text documents with automatic server-side embedding generation
855    #[instrument(skip(self, request), fields(doc_count = request.documents.len()))]
856    pub async fn upsert_text(
857        &self,
858        namespace: &str,
859        request: UpsertTextRequest,
860    ) -> Result<TextUpsertResponse> {
861        let url = format!("{}/v1/namespaces/{}/upsert-text", self.base_url, namespace);
862        debug!(
863            "Upserting {} text documents to {}",
864            request.documents.len(),
865            namespace
866        );
867        let response = self.client.post(&url).json(&request).send().await?;
868        self.handle_response(response).await
869    }
870
871    /// Query using natural language text with automatic server-side embedding
872    #[instrument(skip(self, request), fields(top_k = request.top_k))]
873    pub async fn query_text(
874        &self,
875        namespace: &str,
876        request: QueryTextRequest,
877    ) -> Result<TextQueryResponse> {
878        let url = format!("{}/v1/namespaces/{}/query-text", self.base_url, namespace);
879        debug!("Text query in {} for: {}", namespace, request.text);
880        let response = self.client.post(&url).json(&request).send().await?;
881        self.handle_response(response).await
882    }
883
884    /// Query text (convenience method)
885    #[instrument(skip(self))]
886    pub async fn query_text_simple(
887        &self,
888        namespace: &str,
889        text: &str,
890        top_k: u32,
891    ) -> Result<TextQueryResponse> {
892        self.query_text(namespace, QueryTextRequest::new(text, top_k))
893            .await
894    }
895
896    /// Execute multiple text queries with automatic embedding in a single request
897    #[instrument(skip(self, request), fields(query_count = request.queries.len()))]
898    pub async fn batch_query_text(
899        &self,
900        namespace: &str,
901        request: BatchQueryTextRequest,
902    ) -> Result<BatchQueryTextResponse> {
903        let url = format!(
904            "{}/v1/namespaces/{}/batch-query-text",
905            self.base_url, namespace
906        );
907        debug!(
908            "Batch text query in {} with {} queries",
909            namespace,
910            request.queries.len()
911        );
912        let response = self.client.post(&url).json(&request).send().await?;
913        self.handle_response(response).await
914    }
915
916    // ========================================================================
917    // Private Helpers
918    // ========================================================================
919
920    /// Rate-limit headers from the most recent API response (OPS-1).
921    ///
922    /// Returns `None` until the first successful request has been made.
923    pub fn last_rate_limit_headers(&self) -> Option<RateLimitHeaders> {
924        self.last_rate_limit.lock().ok()?.clone()
925    }
926
927    /// Handle response and deserialize JSON
928    pub(crate) async fn handle_response<T: serde::de::DeserializeOwned>(
929        &self,
930        response: reqwest::Response,
931    ) -> Result<T> {
932        let status = response.status();
933
934        // OPS-1: capture rate-limit headers before consuming the response body
935        if let Ok(mut guard) = self.last_rate_limit.lock() {
936            *guard = Some(RateLimitHeaders::from_response(&response));
937        }
938
939        if status.is_success() {
940            Ok(response.json().await?)
941        } else {
942            let status_code = status.as_u16();
943            // Extract Retry-After before consuming response
944            let retry_after = response
945                .headers()
946                .get("Retry-After")
947                .and_then(|v| v.to_str().ok())
948                .and_then(|s| s.parse::<u64>().ok());
949            let text = response.text().await.unwrap_or_default();
950
951            if status_code == 429 {
952                return Err(ClientError::RateLimitExceeded { retry_after });
953            }
954
955            #[derive(Deserialize)]
956            struct ErrorBody {
957                error: Option<String>,
958                code: Option<ServerErrorCode>,
959            }
960
961            let (message, code) = if let Ok(body) = serde_json::from_str::<ErrorBody>(&text) {
962                (body.error.unwrap_or_else(|| text.clone()), body.code)
963            } else {
964                (text, None)
965            };
966
967            match status_code {
968                401 => Err(ClientError::Server {
969                    status: 401,
970                    message,
971                    code,
972                }),
973                403 => Err(ClientError::Authorization {
974                    status: 403,
975                    message,
976                    code,
977                }),
978                404 => match &code {
979                    Some(ServerErrorCode::NamespaceNotFound) => {
980                        Err(ClientError::NamespaceNotFound(message))
981                    }
982                    Some(ServerErrorCode::VectorNotFound) => {
983                        Err(ClientError::VectorNotFound(message))
984                    }
985                    _ => Err(ClientError::Server {
986                        status: 404,
987                        message,
988                        code,
989                    }),
990                },
991                _ => Err(ClientError::Server {
992                    status: status_code,
993                    message,
994                    code,
995                }),
996            }
997        }
998    }
999
1000    /// Execute a fallible async operation with retry logic and exponential backoff.
1001    ///
1002    /// Retries on transient errors (5xx, rate-limit, connection/timeout).
1003    /// Respects the `Retry-After` header when the server returns HTTP 429.
1004    /// Does NOT retry on 4xx client errors (except 429).
1005    ///
1006    /// NOTE: API call-site wiring is deferred to a follow-up (infrastructure PR).
1007    #[allow(dead_code)]
1008    pub(crate) async fn execute_with_retry<F, Fut, T>(&self, f: F) -> Result<T>
1009    where
1010        F: Fn() -> Fut,
1011        Fut: std::future::Future<Output = Result<T>>,
1012    {
1013        let rc = &self.retry_config;
1014
1015        for attempt in 0..rc.max_retries {
1016            match f().await {
1017                Ok(v) => return Ok(v),
1018                Err(e) => {
1019                    let is_last = attempt == rc.max_retries - 1;
1020                    if is_last || !e.is_retryable() {
1021                        return Err(e);
1022                    }
1023
1024                    let wait = match &e {
1025                        ClientError::RateLimitExceeded {
1026                            retry_after: Some(secs),
1027                        } => Duration::from_secs(*secs),
1028                        _ => {
1029                            let base_ms = rc.base_delay.as_millis() as f64;
1030                            let backoff_ms = base_ms * 2f64.powi(attempt as i32);
1031                            let capped_ms = backoff_ms.min(rc.max_delay.as_millis() as f64);
1032                            let final_ms = if rc.jitter {
1033                                // Simple deterministic jitter: vary between 50% and 150%
1034                                let seed = (attempt as u64).wrapping_mul(6364136223846793005);
1035                                let factor = 0.5 + (seed % 1000) as f64 / 1000.0;
1036                                capped_ms * factor
1037                            } else {
1038                                capped_ms
1039                            };
1040                            Duration::from_millis(final_ms as u64)
1041                        }
1042                    };
1043
1044                    tokio::time::sleep(wait).await;
1045                }
1046            }
1047        }
1048
1049        // Unreachable: the loop always returns on the last attempt
1050        Err(ClientError::Config("retry loop exhausted".to_string()))
1051    }
1052}
1053
1054/// Builder for DakeraClient
1055#[derive(Debug)]
1056pub struct DakeraClientBuilder {
1057    base_url: String,
1058    timeout: Duration,
1059    connect_timeout: Option<Duration>,
1060    retry_config: RetryConfig,
1061    user_agent: Option<String>,
1062}
1063
1064impl DakeraClientBuilder {
1065    /// Create a new builder
1066    pub fn new(base_url: impl Into<String>) -> Self {
1067        Self {
1068            base_url: base_url.into(),
1069            timeout: Duration::from_secs(DEFAULT_TIMEOUT_SECS),
1070            connect_timeout: None,
1071            retry_config: RetryConfig::default(),
1072            user_agent: None,
1073        }
1074    }
1075
1076    /// Set the request timeout
1077    pub fn timeout(mut self, timeout: Duration) -> Self {
1078        self.timeout = timeout;
1079        self
1080    }
1081
1082    /// Set the request timeout in seconds
1083    pub fn timeout_secs(mut self, secs: u64) -> Self {
1084        self.timeout = Duration::from_secs(secs);
1085        self
1086    }
1087
1088    /// Set the connection establishment timeout (defaults to `timeout`).
1089    pub fn connect_timeout(mut self, timeout: Duration) -> Self {
1090        self.connect_timeout = Some(timeout);
1091        self
1092    }
1093
1094    /// Set fine-grained retry configuration.
1095    pub fn retry_config(mut self, config: RetryConfig) -> Self {
1096        self.retry_config = config;
1097        self
1098    }
1099
1100    /// Set the maximum number of retry attempts.
1101    pub fn max_retries(mut self, max_retries: u32) -> Self {
1102        self.retry_config.max_retries = max_retries;
1103        self
1104    }
1105
1106    /// Set a custom user agent
1107    pub fn user_agent(mut self, user_agent: impl Into<String>) -> Self {
1108        self.user_agent = Some(user_agent.into());
1109        self
1110    }
1111
1112    /// Build the client
1113    pub fn build(self) -> Result<DakeraClient> {
1114        // Normalize base URL (remove trailing slash)
1115        let base_url = self.base_url.trim_end_matches('/').to_string();
1116
1117        // Validate URL
1118        if !base_url.starts_with("http://") && !base_url.starts_with("https://") {
1119            return Err(ClientError::InvalidUrl(
1120                "URL must start with http:// or https://".to_string(),
1121            ));
1122        }
1123
1124        let user_agent = self
1125            .user_agent
1126            .unwrap_or_else(|| format!("dakera-client/{}", env!("CARGO_PKG_VERSION")));
1127
1128        let connect_timeout = self.connect_timeout.unwrap_or(self.timeout);
1129
1130        let client = Client::builder()
1131            .timeout(self.timeout)
1132            .connect_timeout(connect_timeout)
1133            .user_agent(user_agent)
1134            .build()
1135            .map_err(|e| ClientError::Config(e.to_string()))?;
1136
1137        Ok(DakeraClient {
1138            client,
1139            base_url,
1140            retry_config: self.retry_config,
1141            last_rate_limit: Arc::new(Mutex::new(None)),
1142        })
1143    }
1144}
1145
1146// ============================================================================
1147// SSE Streaming (CE-1)
1148// ============================================================================
1149
1150impl DakeraClient {
1151    /// Subscribe to namespace-scoped SSE events.
1152    ///
1153    /// Opens a long-lived connection to `GET /v1/namespaces/{namespace}/events`
1154    /// and returns a [`tokio::sync::mpsc::Receiver`] that yields
1155    /// [`DakeraEvent`] results as they arrive.  The background task exits when
1156    /// the server closes the stream or the receiver is dropped.
1157    ///
1158    /// Requires a Read-scoped API key.
1159    ///
1160    /// # Example
1161    ///
1162    /// ```rust,no_run
1163    /// use dakera_client::DakeraClient;
1164    ///
1165    /// #[tokio::main]
1166    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
1167    ///     let client = DakeraClient::new("http://localhost:3000")?;
1168    ///     let mut rx = client.stream_namespace_events("my-ns").await?;
1169    ///     while let Some(result) = rx.recv().await {
1170    ///         println!("{:?}", result?);
1171    ///     }
1172    ///     Ok(())
1173    /// }
1174    /// ```
1175    pub async fn stream_namespace_events(
1176        &self,
1177        namespace: &str,
1178    ) -> Result<tokio::sync::mpsc::Receiver<Result<crate::events::DakeraEvent>>> {
1179        let url = format!(
1180            "{}/v1/namespaces/{}/events",
1181            self.base_url,
1182            urlencoding::encode(namespace)
1183        );
1184        self.stream_sse(url).await
1185    }
1186
1187    /// Subscribe to the global SSE event stream (all namespaces).
1188    ///
1189    /// Opens a long-lived connection to `GET /ops/events` and returns a
1190    /// [`tokio::sync::mpsc::Receiver`] that yields [`DakeraEvent`] results.
1191    ///
1192    /// Requires an Admin-scoped API key.
1193    pub async fn stream_global_events(
1194        &self,
1195    ) -> Result<tokio::sync::mpsc::Receiver<Result<crate::events::DakeraEvent>>> {
1196        let url = format!("{}/ops/events", self.base_url);
1197        self.stream_sse(url).await
1198    }
1199
1200    /// Subscribe to the memory lifecycle SSE event stream (DASH-B).
1201    ///
1202    /// Opens a long-lived connection to `GET /v1/events/stream` and returns a
1203    /// [`tokio::sync::mpsc::Receiver`] that yields [`MemoryEvent`] results as
1204    /// they arrive.  The background task exits when the server closes the stream
1205    /// or the receiver is dropped.
1206    ///
1207    /// Requires a Read-scoped API key.
1208    pub async fn stream_memory_events(
1209        &self,
1210    ) -> Result<tokio::sync::mpsc::Receiver<Result<crate::events::MemoryEvent>>> {
1211        let url = format!("{}/v1/events/stream", self.base_url);
1212        self.stream_sse(url).await
1213    }
1214
1215    /// Low-level generic SSE streaming helper.
1216    async fn stream_sse<T>(&self, url: String) -> Result<tokio::sync::mpsc::Receiver<Result<T>>>
1217    where
1218        T: serde::de::DeserializeOwned + Send + 'static,
1219    {
1220        use futures_util::StreamExt;
1221
1222        let response = self
1223            .client
1224            .get(&url)
1225            .header("Accept", "text/event-stream")
1226            .header("Cache-Control", "no-cache")
1227            .send()
1228            .await?;
1229
1230        if !response.status().is_success() {
1231            let status = response.status().as_u16();
1232            let body = response.text().await.unwrap_or_default();
1233            return Err(ClientError::Server {
1234                status,
1235                message: body,
1236                code: None,
1237            });
1238        }
1239
1240        let (tx, rx) = tokio::sync::mpsc::channel(64);
1241
1242        tokio::spawn(async move {
1243            let mut byte_stream = response.bytes_stream();
1244            let mut remaining = String::new();
1245            let mut data_lines: Vec<String> = Vec::new();
1246
1247            while let Some(chunk) = byte_stream.next().await {
1248                match chunk {
1249                    Ok(bytes) => {
1250                        remaining.push_str(&String::from_utf8_lossy(&bytes));
1251                        while let Some(pos) = remaining.find('\n') {
1252                            let raw = &remaining[..pos];
1253                            let line = raw.trim_end_matches('\r').to_string();
1254                            remaining = remaining[pos + 1..].to_string();
1255
1256                            if line.starts_with(':') {
1257                                // SSE comment / heartbeat — skip
1258                            } else if let Some(data) = line.strip_prefix("data:") {
1259                                data_lines.push(data.trim_start().to_string());
1260                            } else if line.is_empty() {
1261                                if !data_lines.is_empty() {
1262                                    let payload = data_lines.join("\n");
1263                                    data_lines.clear();
1264                                    let result = serde_json::from_str::<T>(&payload)
1265                                        .map_err(ClientError::Json);
1266                                    if tx.send(result).await.is_err() {
1267                                        return; // receiver dropped
1268                                    }
1269                                }
1270                            } else {
1271                                // Unrecognised field (e.g. "event:") — ignore
1272                            }
1273                        }
1274                    }
1275                    Err(e) => {
1276                        let _ = tx.send(Err(ClientError::Http(e))).await;
1277                        return;
1278                    }
1279                }
1280            }
1281        });
1282
1283        Ok(rx)
1284    }
1285}
1286
1287#[cfg(test)]
1288mod tests {
1289    use super::*;
1290
1291    #[test]
1292    fn test_client_builder() {
1293        let client = DakeraClient::new("http://localhost:3000");
1294        assert!(client.is_ok());
1295    }
1296
1297    #[test]
1298    fn test_client_builder_with_options() {
1299        let client = DakeraClient::builder("http://localhost:3000")
1300            .timeout_secs(60)
1301            .user_agent("test-client/1.0")
1302            .build();
1303        assert!(client.is_ok());
1304    }
1305
1306    #[test]
1307    fn test_client_builder_invalid_url() {
1308        let client = DakeraClient::new("invalid-url");
1309        assert!(client.is_err());
1310    }
1311
1312    #[test]
1313    fn test_client_builder_trailing_slash() {
1314        let client = DakeraClient::new("http://localhost:3000/").unwrap();
1315        assert!(!client.base_url.ends_with('/'));
1316    }
1317
1318    #[test]
1319    fn test_vector_creation() {
1320        let v = Vector::new("test", vec![0.1, 0.2, 0.3]);
1321        assert_eq!(v.id, "test");
1322        assert_eq!(v.values.len(), 3);
1323        assert!(v.metadata.is_none());
1324    }
1325
1326    #[test]
1327    fn test_query_request_builder() {
1328        let req = QueryRequest::new(vec![0.1, 0.2], 10)
1329            .with_filter(serde_json::json!({"category": "test"}))
1330            .include_metadata(false);
1331
1332        assert_eq!(req.top_k, 10);
1333        assert!(req.filter.is_some());
1334        assert!(!req.include_metadata);
1335    }
1336
1337    #[test]
1338    fn test_hybrid_search_request() {
1339        let req = HybridSearchRequest::new(vec![0.1], "test query", 5).with_vector_weight(0.7);
1340
1341        assert_eq!(req.vector_weight, 0.7);
1342        assert_eq!(req.text, "test query");
1343    }
1344
1345    #[test]
1346    fn test_hybrid_search_weight_clamping() {
1347        let req = HybridSearchRequest::new(vec![0.1], "test", 5).with_vector_weight(1.5); // Should be clamped to 1.0
1348
1349        assert_eq!(req.vector_weight, 1.0);
1350    }
1351
1352    #[test]
1353    fn test_text_document_builder() {
1354        let doc = TextDocument::new("doc1", "Hello world").with_ttl(3600);
1355
1356        assert_eq!(doc.id, "doc1");
1357        assert_eq!(doc.text, "Hello world");
1358        assert_eq!(doc.ttl_seconds, Some(3600));
1359        assert!(doc.metadata.is_none());
1360    }
1361
1362    #[test]
1363    fn test_upsert_text_request_builder() {
1364        let docs = vec![
1365            TextDocument::new("doc1", "Hello"),
1366            TextDocument::new("doc2", "World"),
1367        ];
1368        let req = UpsertTextRequest::new(docs).with_model(EmbeddingModel::BgeSmall);
1369
1370        assert_eq!(req.documents.len(), 2);
1371        assert_eq!(req.model, Some(EmbeddingModel::BgeSmall));
1372    }
1373
1374    #[test]
1375    fn test_query_text_request_builder() {
1376        let req = QueryTextRequest::new("semantic search query", 5)
1377            .with_filter(serde_json::json!({"category": "docs"}))
1378            .include_vectors(true)
1379            .with_model(EmbeddingModel::E5Small);
1380
1381        assert_eq!(req.text, "semantic search query");
1382        assert_eq!(req.top_k, 5);
1383        assert!(req.filter.is_some());
1384        assert!(req.include_vectors);
1385        assert_eq!(req.model, Some(EmbeddingModel::E5Small));
1386    }
1387
1388    #[test]
1389    fn test_fetch_request_builder() {
1390        let req = FetchRequest::new(vec!["id1".to_string(), "id2".to_string()]);
1391
1392        assert_eq!(req.ids.len(), 2);
1393        assert!(req.include_values);
1394        assert!(req.include_metadata);
1395    }
1396
1397    #[test]
1398    fn test_create_namespace_request_builder() {
1399        let req = CreateNamespaceRequest::new()
1400            .with_dimensions(384)
1401            .with_index_type("hnsw");
1402
1403        assert_eq!(req.dimensions, Some(384));
1404        assert_eq!(req.index_type.as_deref(), Some("hnsw"));
1405    }
1406
1407    #[test]
1408    fn test_batch_query_text_request() {
1409        let req =
1410            BatchQueryTextRequest::new(vec!["query one".to_string(), "query two".to_string()], 10);
1411
1412        assert_eq!(req.queries.len(), 2);
1413        assert_eq!(req.top_k, 10);
1414        assert!(!req.include_vectors);
1415        assert!(req.model.is_none());
1416    }
1417
1418    // =========================================================================
1419    // RetryConfig tests
1420    // =========================================================================
1421
1422    #[test]
1423    fn test_retry_config_defaults() {
1424        let rc = RetryConfig::default();
1425        assert_eq!(rc.max_retries, 3);
1426        assert_eq!(rc.base_delay, Duration::from_millis(100));
1427        assert_eq!(rc.max_delay, Duration::from_secs(60));
1428        assert!(rc.jitter);
1429    }
1430
1431    #[test]
1432    fn test_builder_connect_timeout() {
1433        let client = DakeraClient::builder("http://localhost:3000")
1434            .connect_timeout(Duration::from_secs(5))
1435            .timeout_secs(30)
1436            .build()
1437            .unwrap();
1438        // Client was built successfully with separate connect timeout
1439        assert!(client.base_url.starts_with("http"));
1440    }
1441
1442    #[test]
1443    fn test_builder_max_retries() {
1444        let client = DakeraClient::builder("http://localhost:3000")
1445            .max_retries(5)
1446            .build()
1447            .unwrap();
1448        assert_eq!(client.retry_config.max_retries, 5);
1449    }
1450
1451    #[test]
1452    fn test_builder_retry_config() {
1453        let rc = RetryConfig {
1454            max_retries: 7,
1455            base_delay: Duration::from_millis(200),
1456            max_delay: Duration::from_secs(30),
1457            jitter: false,
1458        };
1459        let client = DakeraClient::builder("http://localhost:3000")
1460            .retry_config(rc)
1461            .build()
1462            .unwrap();
1463        assert_eq!(client.retry_config.max_retries, 7);
1464        assert!(!client.retry_config.jitter);
1465    }
1466
1467    #[test]
1468    fn test_rate_limit_error_retryable() {
1469        let e = ClientError::RateLimitExceeded { retry_after: None };
1470        assert!(e.is_retryable());
1471    }
1472
1473    #[test]
1474    fn test_rate_limit_error_with_retry_after_zero() {
1475        // retry_after: Some(0) should still be Some, not treated as missing
1476        let e = ClientError::RateLimitExceeded {
1477            retry_after: Some(0),
1478        };
1479        assert!(e.is_retryable());
1480        if let ClientError::RateLimitExceeded {
1481            retry_after: Some(secs),
1482        } = &e
1483        {
1484            assert_eq!(*secs, 0u64);
1485        } else {
1486            panic!("unexpected variant");
1487        }
1488    }
1489
1490    #[tokio::test]
1491    async fn test_execute_with_retry_succeeds_immediately() {
1492        let client = DakeraClient::builder("http://localhost:3000")
1493            .max_retries(3)
1494            .build()
1495            .unwrap();
1496
1497        let call_count = std::sync::Arc::new(std::sync::atomic::AtomicU32::new(0));
1498        let cc = call_count.clone();
1499        let result = client
1500            .execute_with_retry(|| {
1501                let cc = cc.clone();
1502                async move {
1503                    cc.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
1504                    Ok::<u32, ClientError>(42)
1505                }
1506            })
1507            .await;
1508        assert_eq!(result.unwrap(), 42);
1509        assert_eq!(call_count.load(std::sync::atomic::Ordering::SeqCst), 1);
1510    }
1511
1512    #[tokio::test]
1513    async fn test_execute_with_retry_no_retry_on_4xx() {
1514        let client = DakeraClient::builder("http://localhost:3000")
1515            .max_retries(3)
1516            .build()
1517            .unwrap();
1518
1519        let call_count = std::sync::Arc::new(std::sync::atomic::AtomicU32::new(0));
1520        let cc = call_count.clone();
1521        let result = client
1522            .execute_with_retry(|| {
1523                let cc = cc.clone();
1524                async move {
1525                    cc.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
1526                    Err::<u32, ClientError>(ClientError::Server {
1527                        status: 400,
1528                        message: "bad request".to_string(),
1529                        code: None,
1530                    })
1531                }
1532            })
1533            .await;
1534        assert!(result.is_err());
1535        // Should not retry on 4xx
1536        assert_eq!(call_count.load(std::sync::atomic::Ordering::SeqCst), 1);
1537    }
1538
1539    #[tokio::test]
1540    async fn test_execute_with_retry_retries_on_5xx() {
1541        let client = DakeraClient::builder("http://localhost:3000")
1542            .retry_config(RetryConfig {
1543                max_retries: 3,
1544                base_delay: Duration::from_millis(0),
1545                max_delay: Duration::from_millis(0),
1546                jitter: false,
1547            })
1548            .build()
1549            .unwrap();
1550
1551        let call_count = std::sync::Arc::new(std::sync::atomic::AtomicU32::new(0));
1552        let cc = call_count.clone();
1553        let result = client
1554            .execute_with_retry(|| {
1555                let cc = cc.clone();
1556                async move {
1557                    let n = cc.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
1558                    if n < 2 {
1559                        Err::<u32, ClientError>(ClientError::Server {
1560                            status: 503,
1561                            message: "unavailable".to_string(),
1562                            code: None,
1563                        })
1564                    } else {
1565                        Ok(99)
1566                    }
1567                }
1568            })
1569            .await;
1570        assert_eq!(result.unwrap(), 99);
1571        assert_eq!(call_count.load(std::sync::atomic::Ordering::SeqCst), 3);
1572    }
1573}