vectorizer_sdk/client/vectors.rs
1//! Vector-level surface: get, batch-insert texts, embed.
2//!
3//! Single-vector retrieval, batch text insertion, and on-server
4//! embedding generation. Search lives in [`super::search`];
5//! collection-level CRUD in [`super::collections`].
6
7use super::VectorizerClient;
8use crate::error::{Result, VectorizerError};
9use crate::models::*;
10
11impl VectorizerClient {
12 /// Fetch one vector by id.
13 ///
14 /// **Server caveat (observed on `hivehub/vectorizer:3.0.x`):** the
15 /// `GET /collections/{c}/vectors/{id}` endpoint currently returns
16 /// HTTP 200 with a synthetic uniform-vector payload
17 /// (`[0.1, 0.1, …]`) even for ids that don't exist. Callers that
18 /// need real miss detection should probe via
19 /// [`VectorizerClient::list_vectors`] or search and not trust an
20 /// `Ok(Vector)` as proof of existence until the server fix ships.
21 pub async fn get_vector(&self, collection: &str, vector_id: &str) -> Result<Vector> {
22 let response = self
23 .make_request(
24 "GET",
25 &format!("/collections/{collection}/vectors/{vector_id}"),
26 None,
27 )
28 .await?;
29 let vector: Vector = serde_json::from_str(&response).map_err(|e| {
30 VectorizerError::server(format!("Failed to parse get vector response: {e}"))
31 })?;
32 Ok(vector)
33 }
34
35 /// Insert a batch of texts into a collection. The server embeds
36 /// each entry with the collection's configured provider (BM25 by
37 /// default; FastEmbed ONNX when selected in `config.yml`).
38 ///
39 /// Wire contract: the server's `POST /insert_texts` handler
40 /// expects `{ "collection": "<name>", "texts": [...] }` — the
41 /// collection is a top-level field in the JSON body, not a path
42 /// segment. The earlier `POST /collections/{c}/documents` path
43 /// this method used was never served (the 3.0.x server returns
44 /// 404 for it) and has been removed.
45 ///
46 /// Per-entry `id` field: the server **reassigns** every inserted
47 /// vector a server-generated UUID regardless of what the caller
48 /// sent. The original client id is stashed as `client_id` on the
49 /// response entry. Callers that need idempotency by client id
50 /// should key off the `client_id` round-trip, not the
51 /// server-assigned UUID.
52 pub async fn insert_texts(
53 &self,
54 collection: &str,
55 texts: Vec<BatchTextRequest>,
56 ) -> Result<BatchResponse> {
57 let payload = serde_json::json!({
58 "collection": collection,
59 "texts": texts,
60 });
61 let response = self
62 .make_request("POST", "/insert_texts", Some(payload))
63 .await?;
64 let mut batch_response: BatchResponse = serde_json::from_str(&response).map_err(|e| {
65 VectorizerError::server(format!("Failed to parse insert texts response: {e}"))
66 })?;
67 // v3 omits the pre-v3 `success` field and instead emits
68 // `inserted` / `failed` counts (aliased onto
69 // `successful_operations` / `failed_operations`). The struct
70 // doc-comment tells callers to derive the flag themselves; do
71 // that here once so existing consumers (and the SDK integration
72 // suite) keep working across the shape change.
73 if !batch_response.success
74 && batch_response.failed_operations == 0
75 && batch_response.successful_operations > 0
76 {
77 batch_response.success = true;
78 }
79 // v3 also drops the pre-v3 `operation` tag. The call site here
80 // unambiguously *is* an insert, so fill it in if the server
81 // didn't — callers that assert on the tag keep working.
82 if batch_response.operation.is_empty() {
83 batch_response.operation = "insert".to_string();
84 }
85 Ok(batch_response)
86 }
87
88 /// Generate an embedding for `text` using either the supplied
89 /// `model` name or the server default.
90 pub async fn embed_text(&self, text: &str, model: Option<&str>) -> Result<EmbeddingResponse> {
91 let mut payload = serde_json::Map::new();
92 payload.insert(
93 "text".to_string(),
94 serde_json::Value::String(text.to_string()),
95 );
96 if let Some(model) = model {
97 payload.insert(
98 "model".to_string(),
99 serde_json::Value::String(model.to_string()),
100 );
101 }
102 let response = self
103 .make_request("POST", "/embed", Some(serde_json::Value::Object(payload)))
104 .await?;
105 let embedding_response: EmbeddingResponse =
106 serde_json::from_str(&response).map_err(|e| {
107 VectorizerError::server(format!("Failed to parse embedding response: {e}"))
108 })?;
109 Ok(embedding_response)
110 }
111}