synaptic_lark/
vector_store.rs1use async_trait::async_trait;
2use serde_json::{json, Value};
3use std::collections::HashMap;
4use synaptic_core::{Document, Embeddings, SynapticError, VectorStore};
5
6use crate::{auth::TokenCache, LarkConfig};
7
8pub struct LarkVectorStore {
14 token_cache: TokenCache,
15 base_url: String,
16 client: reqwest::Client,
17 dataset_id: String,
18}
19
20impl LarkVectorStore {
21 pub fn new(config: LarkConfig, dataset_id: impl Into<String>) -> Self {
23 let base_url = config.base_url.clone();
24 Self {
25 token_cache: config.token_cache(),
26 base_url,
27 client: reqwest::Client::new(),
28 dataset_id: dataset_id.into(),
29 }
30 }
31
32 pub fn dataset_id(&self) -> &str {
34 &self.dataset_id
35 }
36
37 fn check(body: &Value, ctx: &str) -> Result<(), SynapticError> {
38 if body["code"].as_i64().unwrap_or(-1) != 0 {
39 Err(SynapticError::VectorStore(format!(
40 "LarkVectorStore ({ctx}): {}",
41 body["msg"].as_str().unwrap_or("unknown")
42 )))
43 } else {
44 Ok(())
45 }
46 }
47}
48
49#[async_trait]
50impl VectorStore for LarkVectorStore {
51 async fn add_documents(
52 &self,
53 docs: Vec<Document>,
54 _embeddings: &dyn Embeddings,
55 ) -> Result<Vec<String>, SynapticError> {
56 let token = self.token_cache.get_token().await?;
57 let url = format!(
58 "{}/search/v2/datasets/{}/documents/batch_create",
59 self.base_url, self.dataset_id
60 );
61 let items: Vec<Value> = docs
62 .iter()
63 .map(|d| {
64 json!({
65 "id": d.id,
66 "title": d.metadata.get("title").and_then(|v| v.as_str()).unwrap_or(&d.id),
67 "body": d.content,
68 "meta": d.metadata,
69 })
70 })
71 .collect();
72
73 let body = json!({ "documents": items });
74 let resp = self
75 .client
76 .post(&url)
77 .bearer_auth(&token)
78 .json(&body)
79 .send()
80 .await
81 .map_err(|e| SynapticError::VectorStore(format!("add_documents: {e}")))?;
82 let rb: Value = resp
83 .json()
84 .await
85 .map_err(|e| SynapticError::VectorStore(format!("add_documents parse: {e}")))?;
86 Self::check(&rb, "add_documents")?;
87
88 Ok(docs.iter().map(|d| d.id.clone()).collect())
89 }
90
91 async fn similarity_search(
92 &self,
93 query: &str,
94 k: usize,
95 _embeddings: &dyn Embeddings,
96 ) -> Result<Vec<Document>, SynapticError> {
97 let token = self.token_cache.get_token().await?;
98 let url = format!(
99 "{}/search/v2/datasets/{}/search",
100 self.base_url, self.dataset_id
101 );
102 let body = json!({ "query": query, "page_size": k });
103 let resp = self
104 .client
105 .post(&url)
106 .bearer_auth(&token)
107 .json(&body)
108 .send()
109 .await
110 .map_err(|e| SynapticError::VectorStore(format!("similarity_search: {e}")))?;
111 let rb: Value = resp
112 .json()
113 .await
114 .map_err(|e| SynapticError::VectorStore(format!("similarity_search parse: {e}")))?;
115 Self::check(&rb, "similarity_search")?;
116
117 let items = rb["data"]["items"].as_array().cloned().unwrap_or_default();
118 Ok(items
119 .iter()
120 .map(|item| {
121 let id = item["id"].as_str().unwrap_or("").to_string();
122 let content = item["body"].as_str().unwrap_or("").to_string();
123 let mut metadata: HashMap<String, Value> = HashMap::new();
124 if let Some(m) = item["meta"].as_object() {
125 for (k, v) in m {
126 metadata.insert(k.clone(), v.clone());
127 }
128 }
129 if let Some(title) = item["title"].as_str() {
130 metadata.insert("title".to_string(), Value::String(title.to_string()));
131 }
132 Document {
133 id,
134 content,
135 metadata,
136 }
137 })
138 .collect())
139 }
140
141 async fn similarity_search_with_score(
142 &self,
143 query: &str,
144 k: usize,
145 embeddings: &dyn Embeddings,
146 ) -> Result<Vec<(Document, f32)>, SynapticError> {
147 let docs = self.similarity_search(query, k, embeddings).await?;
149 Ok(docs.into_iter().map(|d| (d, 1.0_f32)).collect())
150 }
151
152 async fn similarity_search_by_vector(
153 &self,
154 _embedding: &[f32],
155 _k: usize,
156 ) -> Result<Vec<Document>, SynapticError> {
157 Err(SynapticError::VectorStore(
158 "LarkVectorStore: similarity_search_by_vector not supported (Lark has no raw vector query)".to_string(),
159 ))
160 }
161
162 async fn delete(&self, ids: &[&str]) -> Result<(), SynapticError> {
163 let token = self.token_cache.get_token().await?;
164 let url = format!(
165 "{}/search/v2/datasets/{}/documents/batch_delete",
166 self.base_url, self.dataset_id
167 );
168 let body = json!({ "document_ids": ids });
169 let resp = self
170 .client
171 .delete(&url)
172 .bearer_auth(&token)
173 .json(&body)
174 .send()
175 .await
176 .map_err(|e| SynapticError::VectorStore(format!("delete: {e}")))?;
177 let rb: Value = resp
178 .json()
179 .await
180 .map_err(|e| SynapticError::VectorStore(format!("delete parse: {e}")))?;
181 Self::check(&rb, "delete")
182 }
183}