1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
//! Vector-level surface: get, batch-insert texts, embed.
//!
//! Single-vector retrieval, batch text insertion, and on-server
//! embedding generation. Search lives in [`super::search`];
//! collection-level CRUD in [`super::collections`].
use super::VectorizerClient;
use crate::error::{Result, VectorizerError};
use crate::models::*;
impl VectorizerClient {
/// Fetch one vector by id.
///
/// **Server caveat (observed on `hivehub/vectorizer:3.0.x`):** the
/// `GET /collections/{c}/vectors/{id}` endpoint currently returns
/// HTTP 200 with a synthetic uniform-vector payload
/// (`[0.1, 0.1, …]`) even for ids that don't exist. Callers that
/// need real miss detection should probe via
/// [`VectorizerClient::list_vectors`] or search and not trust an
/// `Ok(Vector)` as proof of existence until the server fix ships.
pub async fn get_vector(&self, collection: &str, vector_id: &str) -> Result<Vector> {
let response = self
.make_request(
"GET",
&format!("/collections/{collection}/vectors/{vector_id}"),
None,
)
.await?;
let vector: Vector = serde_json::from_str(&response).map_err(|e| {
VectorizerError::server(format!("Failed to parse get vector response: {e}"))
})?;
Ok(vector)
}
/// Insert a batch of texts into a collection. The server embeds
/// each entry with the collection's configured provider (BM25 by
/// default; FastEmbed ONNX when selected in `config.yml`).
///
/// Wire contract: the server's `POST /insert_texts` handler
/// expects `{ "collection": "<name>", "texts": [...] }` — the
/// collection is a top-level field in the JSON body, not a path
/// segment. The earlier `POST /collections/{c}/documents` path
/// this method used was never served (the 3.0.x server returns
/// 404 for it) and has been removed.
///
/// Per-entry `id` field: the server **reassigns** every inserted
/// vector a server-generated UUID regardless of what the caller
/// sent. The original client id is stashed as `client_id` on the
/// response entry. Callers that need idempotency by client id
/// should key off the `client_id` round-trip, not the
/// server-assigned UUID.
pub async fn insert_texts(
&self,
collection: &str,
texts: Vec<BatchTextRequest>,
) -> Result<BatchResponse> {
let payload = serde_json::json!({
"collection": collection,
"texts": texts,
});
let response = self
.make_request("POST", "/insert_texts", Some(payload))
.await?;
let mut batch_response: BatchResponse = serde_json::from_str(&response).map_err(|e| {
VectorizerError::server(format!("Failed to parse insert texts response: {e}"))
})?;
// v3 omits the pre-v3 `success` field and instead emits
// `inserted` / `failed` counts (aliased onto
// `successful_operations` / `failed_operations`). The struct
// doc-comment tells callers to derive the flag themselves; do
// that here once so existing consumers (and the SDK integration
// suite) keep working across the shape change.
if !batch_response.success
&& batch_response.failed_operations == 0
&& batch_response.successful_operations > 0
{
batch_response.success = true;
}
// v3 also drops the pre-v3 `operation` tag. The call site here
// unambiguously *is* an insert, so fill it in if the server
// didn't — callers that assert on the tag keep working.
if batch_response.operation.is_empty() {
batch_response.operation = "insert".to_string();
}
Ok(batch_response)
}
/// Generate an embedding for `text` using either the supplied
/// `model` name or the server default.
pub async fn embed_text(&self, text: &str, model: Option<&str>) -> Result<EmbeddingResponse> {
let mut payload = serde_json::Map::new();
payload.insert(
"text".to_string(),
serde_json::Value::String(text.to_string()),
);
if let Some(model) = model {
payload.insert(
"model".to_string(),
serde_json::Value::String(model.to_string()),
);
}
let response = self
.make_request("POST", "/embed", Some(serde_json::Value::Object(payload)))
.await?;
let embedding_response: EmbeddingResponse =
serde_json::from_str(&response).map_err(|e| {
VectorizerError::server(format!("Failed to parse embedding response: {e}"))
})?;
Ok(embedding_response)
}
}