Skip to main content

synaptic_lark/
vector_store.rs

1use async_trait::async_trait;
2use serde_json::{json, Value};
3use std::collections::HashMap;
4use synaptic_core::{Document, Embeddings, SynapticError, VectorStore};
5
6use crate::{auth::TokenCache, LarkConfig};
7
8/// Vector store backed by Lark's Search API (dataset-based, server-side embedding).
9///
10/// `add_documents`: indexes raw text into a Lark Search dataset.
11/// `similarity_search`: calls Lark's natural-language search — no external embeddings needed.
12/// `similarity_search_by_vector`: NOT supported (Lark has no raw vector query endpoint).
13pub struct LarkVectorStore {
14    token_cache: TokenCache,
15    base_url: String,
16    client: reqwest::Client,
17    dataset_id: String,
18}
19
20impl LarkVectorStore {
21    /// Create a new `LarkVectorStore` targeting the given Lark Search dataset.
22    pub fn new(config: LarkConfig, dataset_id: impl Into<String>) -> Self {
23        let base_url = config.base_url.clone();
24        Self {
25            token_cache: config.token_cache(),
26            base_url,
27            client: reqwest::Client::new(),
28            dataset_id: dataset_id.into(),
29        }
30    }
31
32    /// Return the dataset ID this store is configured to use.
33    pub fn dataset_id(&self) -> &str {
34        &self.dataset_id
35    }
36
37    fn check(body: &Value, ctx: &str) -> Result<(), SynapticError> {
38        if body["code"].as_i64().unwrap_or(-1) != 0 {
39            Err(SynapticError::VectorStore(format!(
40                "LarkVectorStore ({ctx}): {}",
41                body["msg"].as_str().unwrap_or("unknown")
42            )))
43        } else {
44            Ok(())
45        }
46    }
47}
48
49#[async_trait]
50impl VectorStore for LarkVectorStore {
51    async fn add_documents(
52        &self,
53        docs: Vec<Document>,
54        _embeddings: &dyn Embeddings,
55    ) -> Result<Vec<String>, SynapticError> {
56        let token = self.token_cache.get_token().await?;
57        let url = format!(
58            "{}/search/v2/datasets/{}/documents/batch_create",
59            self.base_url, self.dataset_id
60        );
61        let items: Vec<Value> = docs
62            .iter()
63            .map(|d| {
64                json!({
65                    "id": d.id,
66                    "title": d.metadata.get("title").and_then(|v| v.as_str()).unwrap_or(&d.id),
67                    "body": d.content,
68                    "meta": d.metadata,
69                })
70            })
71            .collect();
72
73        let body = json!({ "documents": items });
74        let resp = self
75            .client
76            .post(&url)
77            .bearer_auth(&token)
78            .json(&body)
79            .send()
80            .await
81            .map_err(|e| SynapticError::VectorStore(format!("add_documents: {e}")))?;
82        let rb: Value = resp
83            .json()
84            .await
85            .map_err(|e| SynapticError::VectorStore(format!("add_documents parse: {e}")))?;
86        Self::check(&rb, "add_documents")?;
87
88        Ok(docs.iter().map(|d| d.id.clone()).collect())
89    }
90
91    async fn similarity_search(
92        &self,
93        query: &str,
94        k: usize,
95        _embeddings: &dyn Embeddings,
96    ) -> Result<Vec<Document>, SynapticError> {
97        let token = self.token_cache.get_token().await?;
98        let url = format!(
99            "{}/search/v2/datasets/{}/search",
100            self.base_url, self.dataset_id
101        );
102        let body = json!({ "query": query, "page_size": k });
103        let resp = self
104            .client
105            .post(&url)
106            .bearer_auth(&token)
107            .json(&body)
108            .send()
109            .await
110            .map_err(|e| SynapticError::VectorStore(format!("similarity_search: {e}")))?;
111        let rb: Value = resp
112            .json()
113            .await
114            .map_err(|e| SynapticError::VectorStore(format!("similarity_search parse: {e}")))?;
115        Self::check(&rb, "similarity_search")?;
116
117        let items = rb["data"]["items"].as_array().cloned().unwrap_or_default();
118        Ok(items
119            .iter()
120            .map(|item| {
121                let id = item["id"].as_str().unwrap_or("").to_string();
122                let content = item["body"].as_str().unwrap_or("").to_string();
123                let mut metadata: HashMap<String, Value> = HashMap::new();
124                if let Some(m) = item["meta"].as_object() {
125                    for (k, v) in m {
126                        metadata.insert(k.clone(), v.clone());
127                    }
128                }
129                if let Some(title) = item["title"].as_str() {
130                    metadata.insert("title".to_string(), Value::String(title.to_string()));
131                }
132                Document {
133                    id,
134                    content,
135                    metadata,
136                }
137            })
138            .collect())
139    }
140
141    async fn similarity_search_with_score(
142        &self,
143        query: &str,
144        k: usize,
145        embeddings: &dyn Embeddings,
146    ) -> Result<Vec<(Document, f32)>, SynapticError> {
147        // Lark search doesn't return explicit scores; use 1.0 as a placeholder
148        let docs = self.similarity_search(query, k, embeddings).await?;
149        Ok(docs.into_iter().map(|d| (d, 1.0_f32)).collect())
150    }
151
152    async fn similarity_search_by_vector(
153        &self,
154        _embedding: &[f32],
155        _k: usize,
156    ) -> Result<Vec<Document>, SynapticError> {
157        Err(SynapticError::VectorStore(
158            "LarkVectorStore: similarity_search_by_vector not supported (Lark has no raw vector query)".to_string(),
159        ))
160    }
161
162    async fn delete(&self, ids: &[&str]) -> Result<(), SynapticError> {
163        let token = self.token_cache.get_token().await?;
164        let url = format!(
165            "{}/search/v2/datasets/{}/documents/batch_delete",
166            self.base_url, self.dataset_id
167        );
168        let body = json!({ "document_ids": ids });
169        let resp = self
170            .client
171            .delete(&url)
172            .bearer_auth(&token)
173            .json(&body)
174            .send()
175            .await
176            .map_err(|e| SynapticError::VectorStore(format!("delete: {e}")))?;
177        let rb: Value = resp
178            .json()
179            .await
180            .map_err(|e| SynapticError::VectorStore(format!("delete parse: {e}")))?;
181        Self::check(&rb, "delete")
182    }
183}