vectorizer_sdk/client/vectors.rs
1//! Vector-level surface: get, batch-insert texts, embed.
2//!
3//! Single-vector retrieval, batch text insertion, and on-server
4//! embedding generation. Search lives in [`super::search`];
5//! collection-level CRUD in [`super::collections`].
6
7use super::VectorizerClient;
8use crate::error::{Result, VectorizerError};
9use crate::models::*;
10
11impl VectorizerClient {
12 /// Fetch one vector by id.
13 ///
14 /// **Server caveat (observed on `hivehub/vectorizer:3.0.x`):** the
15 /// `GET /collections/{c}/vectors/{id}` endpoint currently returns
16 /// HTTP 200 with a synthetic uniform-vector payload
17 /// (`[0.1, 0.1, …]`) even for ids that don't exist. Callers that
18 /// need real miss detection should probe via
19 /// [`VectorizerClient::list_vectors`] or search and not trust an
20 /// `Ok(Vector)` as proof of existence until the server fix ships.
21 pub async fn get_vector(&self, collection: &str, vector_id: &str) -> Result<Vector> {
22 let response = self
23 .make_request(
24 "GET",
25 &format!("/collections/{collection}/vectors/{vector_id}"),
26 None,
27 )
28 .await?;
29 let vector: Vector = serde_json::from_str(&response).map_err(|e| {
30 VectorizerError::server(format!("Failed to parse get vector response: {e}"))
31 })?;
32 Ok(vector)
33 }
34
35 /// Insert a batch of texts into a collection. The server embeds
36 /// each entry with the collection's configured provider (BM25 by
37 /// default; FastEmbed ONNX when selected in `config.yml`).
38 ///
39 /// Wire contract: the server's `POST /insert_texts` handler
40 /// expects `{ "collection": "<name>", "texts": [...] }` — the
41 /// collection is a top-level field in the JSON body, not a path
42 /// segment. The earlier `POST /collections/{c}/documents` path
43 /// this method used was never served (the 3.0.x server returns
44 /// 404 for it) and has been removed.
45 ///
46 /// Per-entry `id` field: the server **reassigns** every inserted
47 /// vector a server-generated UUID regardless of what the caller
48 /// sent. The original client id is stashed as `client_id` on the
49 /// response entry. Callers that need idempotency by client id
50 /// should key off the `client_id` round-trip, not the
51 /// server-assigned UUID.
52 pub async fn insert_texts(
53 &self,
54 collection: &str,
55 texts: Vec<BatchTextRequest>,
56 ) -> Result<BatchResponse> {
57 let payload = serde_json::json!({
58 "collection": collection,
59 "texts": texts,
60 });
61 let response = self
62 .make_request("POST", "/insert_texts", Some(payload))
63 .await?;
64 let batch_response: BatchResponse = serde_json::from_str(&response).map_err(|e| {
65 VectorizerError::server(format!("Failed to parse insert texts response: {e}"))
66 })?;
67 Ok(batch_response)
68 }
69
70 /// Generate an embedding for `text` using either the supplied
71 /// `model` name or the server default.
72 pub async fn embed_text(&self, text: &str, model: Option<&str>) -> Result<EmbeddingResponse> {
73 let mut payload = serde_json::Map::new();
74 payload.insert(
75 "text".to_string(),
76 serde_json::Value::String(text.to_string()),
77 );
78 if let Some(model) = model {
79 payload.insert(
80 "model".to_string(),
81 serde_json::Value::String(model.to_string()),
82 );
83 }
84 let response = self
85 .make_request("POST", "/embed", Some(serde_json::Value::Object(payload)))
86 .await?;
87 let embedding_response: EmbeddingResponse =
88 serde_json::from_str(&response).map_err(|e| {
89 VectorizerError::server(format!("Failed to parse embedding response: {e}"))
90 })?;
91 Ok(embedding_response)
92 }
93}