Skip to main content

lance_context_api/
lib.rs

1use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
2use chrono::{DateTime, Utc};
3use serde::{Deserialize, Serialize};
4use serde_json::Value;
5use std::future::Future;
6
7// ---------------------------------------------------------------------------
8// Unified error
9// ---------------------------------------------------------------------------
10
11#[derive(Debug, thiserror::Error)]
12pub enum ContextError {
13    #[error("{0}")]
14    NotFound(String),
15    #[error("{0}")]
16    AlreadyExists(String),
17    #[error("{0}")]
18    InvalidRequest(String),
19    #[error("{0}")]
20    Internal(String),
21    #[error("Compaction already in progress")]
22    CompactionInProgress,
23}
24
25pub type ContextResult<T> = Result<T, ContextError>;
26
27// ---------------------------------------------------------------------------
28// Unified trait
29// ---------------------------------------------------------------------------
30
31pub trait ContextStoreApi {
32    fn add(
33        &mut self,
34        records: &[AddRecordRequest],
35    ) -> impl Future<Output = ContextResult<AddRecordsResponse>> + Send;
36
37    fn upsert(
38        &mut self,
39        request: &UpsertRecordRequest,
40    ) -> impl Future<Output = ContextResult<UpsertRecordResponse>> + Send;
41
42    fn upsert_many(
43        &mut self,
44        request: &UpsertRecordsRequest,
45    ) -> impl Future<Output = ContextResult<UpsertRecordsResponse>> + Send;
46
47    fn update(
48        &mut self,
49        request: &UpdateRecordRequest,
50    ) -> impl Future<Output = ContextResult<UpdateRecordResponse>> + Send;
51
52    fn get(&self, id: &str) -> impl Future<Output = ContextResult<Option<RecordDto>>> + Send;
53
54    fn get_by_external_id(
55        &self,
56        external_id: &str,
57    ) -> impl Future<Output = ContextResult<Option<RecordDto>>> + Send;
58
59    fn delete_by_id(
60        &mut self,
61        id: &str,
62    ) -> impl Future<Output = ContextResult<DeleteRecordResponse>> + Send;
63
64    fn delete_by_external_id(
65        &mut self,
66        external_id: &str,
67    ) -> impl Future<Output = ContextResult<DeleteRecordResponse>> + Send;
68
69    fn list(
70        &self,
71        limit: Option<usize>,
72        offset: Option<usize>,
73        filters: Option<Value>,
74        include_expired: bool,
75        include_retired: bool,
76    ) -> impl Future<Output = ContextResult<Vec<RecordDto>>> + Send;
77
78    fn related(
79        &self,
80        target_id: &str,
81        relation: Option<&str>,
82        limit: Option<usize>,
83        include_expired: bool,
84        include_retired: bool,
85    ) -> impl Future<Output = ContextResult<Vec<RecordDto>>> + Send;
86
87    fn search(
88        &self,
89        request: &SearchRequest,
90    ) -> impl Future<Output = ContextResult<Vec<SearchResultDto>>> + Send;
91
92    fn retrieve(
93        &self,
94        request: &RetrieveRequest,
95    ) -> impl Future<Output = ContextResult<Vec<RetrieveResultDto>>> + Send;
96
97    fn version(&self) -> u64;
98
99    fn checkout(&mut self, version: u64) -> impl Future<Output = ContextResult<()>> + Send;
100
101    fn compact(
102        &mut self,
103        options: Option<CompactRequest>,
104    ) -> impl Future<Output = ContextResult<CompactResponse>> + Send;
105
106    fn compaction_stats(&self) -> impl Future<Output = ContextResult<CompactStatsResponse>> + Send;
107}
108
109// ---------------------------------------------------------------------------
110// Context lifecycle
111// ---------------------------------------------------------------------------
112
113#[derive(Debug, Serialize, Deserialize)]
114pub struct CreateContextRequest {
115    pub name: String,
116    #[serde(default)]
117    pub storage_options: Option<std::collections::HashMap<String, String>>,
118    #[serde(default)]
119    pub id_index_type: Option<String>,
120    #[serde(default)]
121    pub blob_columns: Option<Vec<String>>,
122    #[serde(default)]
123    pub embedding_dim: Option<i32>,
124    #[serde(default)]
125    pub distance_metric: Option<String>,
126}
127
128#[derive(Debug, Serialize, Deserialize)]
129pub struct ContextInfo {
130    pub name: String,
131    pub uri: String,
132    pub version: u64,
133}
134
135#[derive(Debug, Serialize, Deserialize)]
136pub struct ListContextsResponse {
137    pub contexts: Vec<ContextInfo>,
138}
139
140// ---------------------------------------------------------------------------
141// Records
142// ---------------------------------------------------------------------------
143
144#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct StateMetadataDto {
146    #[serde(default, skip_serializing_if = "Option::is_none")]
147    pub step: Option<i32>,
148    #[serde(default, skip_serializing_if = "Option::is_none")]
149    pub active_plan_id: Option<String>,
150    #[serde(default, skip_serializing_if = "Option::is_none")]
151    pub tokens_used: Option<i32>,
152    #[serde(default, skip_serializing_if = "Option::is_none")]
153    pub custom: Option<String>,
154}
155
156#[derive(Debug, Clone, Default, Serialize, Deserialize)]
157pub struct RelationshipDto {
158    pub target_id: String,
159    pub relation: String,
160    #[serde(default, skip_serializing_if = "Option::is_none")]
161    pub weight: Option<f32>,
162}
163
164#[derive(Debug, Clone, Default, Serialize, Deserialize)]
165pub struct AddRecordRequest {
166    #[serde(default = "default_role")]
167    pub role: String,
168    #[serde(default = "default_content_type")]
169    pub content_type: String,
170    #[serde(default, skip_serializing_if = "Option::is_none")]
171    pub text_payload: Option<String>,
172    #[serde(
173        default,
174        skip_serializing_if = "Option::is_none",
175        serialize_with = "serialize_base64_opt",
176        deserialize_with = "deserialize_base64_opt"
177    )]
178    pub binary_payload: Option<Vec<u8>>,
179    #[serde(default, skip_serializing_if = "Option::is_none")]
180    pub embedding: Option<Vec<f32>>,
181    #[serde(default, skip_serializing_if = "Option::is_none")]
182    pub bot_id: Option<String>,
183    #[serde(default, skip_serializing_if = "Option::is_none")]
184    pub session_id: Option<String>,
185    #[serde(default, skip_serializing_if = "Option::is_none")]
186    pub tenant: Option<String>,
187    #[serde(default, skip_serializing_if = "Option::is_none")]
188    pub source: Option<String>,
189    #[serde(default, skip_serializing_if = "Option::is_none")]
190    pub external_id: Option<String>,
191    #[serde(default, skip_serializing_if = "Option::is_none")]
192    pub state_metadata: Option<StateMetadataDto>,
193    #[serde(default, skip_serializing_if = "Option::is_none")]
194    pub metadata: Option<Value>,
195    #[serde(default, skip_serializing_if = "Vec::is_empty")]
196    pub relationships: Vec<RelationshipDto>,
197    #[serde(default, skip_serializing_if = "Option::is_none")]
198    pub expires_at: Option<DateTime<Utc>>,
199    #[serde(default, skip_serializing_if = "Option::is_none")]
200    pub retention_policy: Option<String>,
201    #[serde(default, skip_serializing_if = "Option::is_none")]
202    pub supersedes_id: Option<String>,
203}
204
205#[derive(Debug, Serialize, Deserialize)]
206pub struct AddRecordsRequest {
207    pub records: Vec<AddRecordRequest>,
208}
209
210#[derive(Debug, Serialize, Deserialize)]
211pub struct AddRecordsResponse {
212    pub version: u64,
213    pub ids: Vec<String>,
214    pub count: usize,
215}
216
217#[derive(Debug, Serialize, Deserialize)]
218pub struct UpsertRecordRequest {
219    pub record: AddRecordRequest,
220    #[serde(default = "default_upsert_key")]
221    pub key: String,
222}
223
224#[derive(Debug, Serialize, Deserialize)]
225pub struct UpsertRecordResponse {
226    pub version: u64,
227    pub inserted: bool,
228    #[serde(default, skip_serializing_if = "Option::is_none")]
229    pub replaced_id: Option<String>,
230    pub record: RecordDto,
231}
232
233#[derive(Debug, Serialize, Deserialize)]
234pub struct UpsertRecordsRequest {
235    pub records: Vec<AddRecordRequest>,
236    #[serde(default = "default_upsert_key")]
237    pub key: String,
238}
239
240/// Per-record outcome of a batch upsert, in input order.
241#[derive(Debug, Serialize, Deserialize)]
242pub struct UpsertResultDto {
243    pub inserted: bool,
244    #[serde(default, skip_serializing_if = "Option::is_none")]
245    pub replaced_id: Option<String>,
246    pub record: RecordDto,
247}
248
249#[derive(Debug, Serialize, Deserialize)]
250pub struct UpsertRecordsResponse {
251    pub version: u64,
252    pub results: Vec<UpsertResultDto>,
253}
254
255#[derive(Debug, Clone, Default, Serialize, Deserialize)]
256pub struct RecordPatchDto {
257    #[serde(default, skip_serializing_if = "Option::is_none")]
258    pub bot_id: Option<String>,
259    #[serde(default, skip_serializing_if = "Option::is_none")]
260    pub session_id: Option<String>,
261    #[serde(default, skip_serializing_if = "Option::is_none")]
262    pub tenant: Option<String>,
263    #[serde(default, skip_serializing_if = "Option::is_none")]
264    pub source: Option<String>,
265    #[serde(default, skip_serializing_if = "Option::is_none")]
266    pub state_metadata: Option<StateMetadataDto>,
267    #[serde(default, skip_serializing_if = "Option::is_none")]
268    pub metadata: Option<Value>,
269    #[serde(default, skip_serializing_if = "Option::is_none")]
270    pub relationships: Option<Vec<RelationshipDto>>,
271    #[serde(default, skip_serializing_if = "Option::is_none")]
272    pub expires_at: Option<DateTime<Utc>>,
273    #[serde(default, skip_serializing_if = "Option::is_none")]
274    pub retention_policy: Option<String>,
275    #[serde(default, skip_serializing_if = "Option::is_none")]
276    pub lifecycle_status: Option<String>,
277    #[serde(default, skip_serializing_if = "Option::is_none")]
278    pub retired_at: Option<DateTime<Utc>>,
279    #[serde(default, skip_serializing_if = "Option::is_none")]
280    pub retired_reason: Option<String>,
281    #[serde(default, skip_serializing_if = "Option::is_none")]
282    pub embedding: Option<Vec<f32>>,
283}
284
285impl RecordPatchDto {
286    #[must_use]
287    pub fn is_empty(&self) -> bool {
288        self.bot_id.is_none()
289            && self.session_id.is_none()
290            && self.tenant.is_none()
291            && self.source.is_none()
292            && self.state_metadata.is_none()
293            && self.metadata.is_none()
294            && self.relationships.is_none()
295            && self.expires_at.is_none()
296            && self.retention_policy.is_none()
297            && self.lifecycle_status.is_none()
298            && self.retired_at.is_none()
299            && self.retired_reason.is_none()
300            && self.embedding.is_none()
301    }
302}
303
304#[derive(Debug, Serialize, Deserialize)]
305pub struct UpdateRecordRequest {
306    #[serde(default, skip_serializing_if = "Option::is_none")]
307    pub id: Option<String>,
308    #[serde(default, skip_serializing_if = "Option::is_none")]
309    pub external_id: Option<String>,
310    #[serde(default)]
311    pub patch: RecordPatchDto,
312}
313
314#[derive(Debug, Serialize, Deserialize)]
315pub struct UpdateRecordResponse {
316    pub version: u64,
317    pub updated: bool,
318    #[serde(default, skip_serializing_if = "Option::is_none")]
319    pub replaced_id: Option<String>,
320    #[serde(default, skip_serializing_if = "Option::is_none")]
321    pub record: Option<RecordDto>,
322}
323
324#[derive(Debug, Clone, Serialize, Deserialize)]
325pub struct RecordDto {
326    pub id: String,
327    #[serde(default, skip_serializing_if = "Option::is_none")]
328    pub external_id: Option<String>,
329    pub run_id: String,
330    #[serde(default, skip_serializing_if = "Option::is_none")]
331    pub bot_id: Option<String>,
332    #[serde(default, skip_serializing_if = "Option::is_none")]
333    pub session_id: Option<String>,
334    #[serde(default, skip_serializing_if = "Option::is_none")]
335    pub tenant: Option<String>,
336    #[serde(default, skip_serializing_if = "Option::is_none")]
337    pub source: Option<String>,
338    pub created_at: DateTime<Utc>,
339    pub role: String,
340    pub content_type: String,
341    #[serde(default, skip_serializing_if = "Option::is_none")]
342    pub text_payload: Option<String>,
343    #[serde(
344        default,
345        skip_serializing_if = "Option::is_none",
346        serialize_with = "serialize_base64_opt",
347        deserialize_with = "deserialize_base64_opt"
348    )]
349    pub binary_payload: Option<Vec<u8>>,
350    #[serde(default, skip_serializing_if = "Option::is_none")]
351    pub embedding: Option<Vec<f32>>,
352    #[serde(default, skip_serializing_if = "Option::is_none")]
353    pub state_metadata: Option<StateMetadataDto>,
354    #[serde(default, skip_serializing_if = "Option::is_none")]
355    pub metadata: Option<Value>,
356    #[serde(default, skip_serializing_if = "Vec::is_empty")]
357    pub relationships: Vec<RelationshipDto>,
358    #[serde(default, skip_serializing_if = "Option::is_none")]
359    pub expires_at: Option<DateTime<Utc>>,
360    #[serde(default, skip_serializing_if = "Option::is_none")]
361    pub retention_policy: Option<String>,
362    pub lifecycle_status: String,
363    #[serde(default, skip_serializing_if = "Option::is_none")]
364    pub retired_at: Option<DateTime<Utc>>,
365    #[serde(default, skip_serializing_if = "Option::is_none")]
366    pub retired_reason: Option<String>,
367    #[serde(default, skip_serializing_if = "Option::is_none")]
368    pub supersedes_id: Option<String>,
369    #[serde(default, skip_serializing_if = "Option::is_none")]
370    pub superseded_by_id: Option<String>,
371}
372
373#[derive(Debug, Serialize, Deserialize)]
374pub struct ListRecordsResponse {
375    pub records: Vec<RecordDto>,
376}
377
378// ---------------------------------------------------------------------------
379// Single record lookup
380// ---------------------------------------------------------------------------
381
382#[derive(Debug, Serialize, Deserialize)]
383pub struct GetRecordResponse {
384    pub record: Option<RecordDto>,
385}
386
387#[derive(Debug, Serialize, Deserialize)]
388pub struct DeleteRecordResponse {
389    pub deleted: bool,
390    pub version: u64,
391}
392
393// ---------------------------------------------------------------------------
394// Search
395// ---------------------------------------------------------------------------
396
397#[derive(Debug, Serialize, Deserialize)]
398pub struct SearchRequest {
399    pub query: Vec<f32>,
400    #[serde(default = "default_search_limit")]
401    pub limit: usize,
402    #[serde(default, skip_serializing_if = "Option::is_none")]
403    pub filters: Option<Value>,
404    #[serde(default)]
405    pub include_expired: bool,
406    #[serde(default)]
407    pub include_retired: bool,
408    #[serde(default)]
409    pub include_relationships: bool,
410}
411
412#[derive(Debug, Serialize, Deserialize)]
413pub struct SearchResultDto {
414    pub record: RecordDto,
415    pub distance: f32,
416}
417
418#[derive(Debug, Serialize, Deserialize)]
419pub struct SearchResponse {
420    pub results: Vec<SearchResultDto>,
421}
422
423// ---------------------------------------------------------------------------
424// Hybrid retrieval
425// ---------------------------------------------------------------------------
426
427#[derive(Debug, Serialize, Deserialize)]
428pub struct RetrieveRequest {
429    #[serde(default, skip_serializing_if = "Option::is_none")]
430    pub text: Option<String>,
431    #[serde(default, skip_serializing_if = "Option::is_none")]
432    pub vector: Option<Vec<f32>>,
433    #[serde(default = "default_search_limit")]
434    pub limit: usize,
435    #[serde(default, skip_serializing_if = "Option::is_none")]
436    pub filters: Option<Value>,
437    #[serde(default)]
438    pub include_expired: bool,
439    #[serde(default)]
440    pub include_retired: bool,
441    #[serde(default)]
442    pub include_relationships: bool,
443    #[serde(default = "default_retrieve_fusion")]
444    pub fusion: String,
445}
446
447#[derive(Debug, Serialize, Deserialize)]
448pub struct RetrieveResultDto {
449    pub record: RecordDto,
450    pub score: f32,
451    #[serde(default, skip_serializing_if = "Option::is_none")]
452    pub vector_distance: Option<f32>,
453    #[serde(default, skip_serializing_if = "Option::is_none")]
454    pub text_score: Option<f32>,
455    #[serde(default, skip_serializing_if = "Vec::is_empty")]
456    pub matched_channels: Vec<String>,
457}
458
459#[derive(Debug, Serialize, Deserialize)]
460pub struct RetrieveResponse {
461    pub results: Vec<RetrieveResultDto>,
462}
463
464// ---------------------------------------------------------------------------
465// Versioning
466// ---------------------------------------------------------------------------
467
468#[derive(Debug, Serialize, Deserialize)]
469pub struct VersionResponse {
470    pub version: u64,
471}
472
473#[derive(Debug, Serialize, Deserialize)]
474pub struct CheckoutRequest {
475    pub version: u64,
476}
477
478// ---------------------------------------------------------------------------
479// Compaction
480// ---------------------------------------------------------------------------
481
482#[derive(Debug, Default, Serialize, Deserialize)]
483pub struct CompactRequest {
484    #[serde(default, skip_serializing_if = "Option::is_none")]
485    pub target_rows_per_fragment: Option<usize>,
486    #[serde(default, skip_serializing_if = "Option::is_none")]
487    pub materialize_deletions: Option<bool>,
488}
489
490#[derive(Debug, Serialize, Deserialize)]
491pub struct CompactResponse {
492    pub fragments_removed: usize,
493    pub fragments_added: usize,
494    pub files_removed: usize,
495    pub files_added: usize,
496}
497
498#[derive(Debug, Serialize, Deserialize)]
499pub struct CompactStatsResponse {
500    pub total_fragments: usize,
501    pub is_compacting: bool,
502    #[serde(default, skip_serializing_if = "Option::is_none")]
503    pub last_compaction: Option<DateTime<Utc>>,
504    #[serde(default, skip_serializing_if = "Option::is_none")]
505    pub last_error: Option<String>,
506    pub total_compactions: u64,
507}
508
509// ---------------------------------------------------------------------------
510// Error
511// ---------------------------------------------------------------------------
512
513#[derive(Debug, Serialize, Deserialize)]
514pub struct ErrorBody {
515    pub code: String,
516    pub message: String,
517}
518
519#[derive(Debug, Serialize, Deserialize)]
520pub struct ErrorResponse {
521    pub error: ErrorBody,
522}
523
524// ---------------------------------------------------------------------------
525// Helpers
526// ---------------------------------------------------------------------------
527
528fn default_content_type() -> String {
529    "text/plain".to_string()
530}
531
532fn default_role() -> String {
533    "user".to_string()
534}
535
536fn default_upsert_key() -> String {
537    "external_id".to_string()
538}
539
540fn default_search_limit() -> usize {
541    10
542}
543
544fn default_retrieve_fusion() -> String {
545    "rrf".to_string()
546}
547
548fn serialize_base64_opt<S>(data: &Option<Vec<u8>>, serializer: S) -> Result<S::Ok, S::Error>
549where
550    S: serde::Serializer,
551{
552    match data {
553        Some(bytes) => serializer.serialize_some(&BASE64.encode(bytes)),
554        None => serializer.serialize_none(),
555    }
556}
557
558fn deserialize_base64_opt<'de, D>(deserializer: D) -> Result<Option<Vec<u8>>, D::Error>
559where
560    D: serde::Deserializer<'de>,
561{
562    let opt: Option<String> = Option::deserialize(deserializer)?;
563    match opt {
564        Some(s) => BASE64
565            .decode(&s)
566            .map(Some)
567            .map_err(serde::de::Error::custom),
568        None => Ok(None),
569    }
570}
571
572#[cfg(test)]
573mod tests {
574    use super::*;
575
576    #[test]
577    fn search_request_legacy_payload_defaults_filters_and_lifecycle() {
578        // Clients written against the pre-#89 shape send only query/limit.
579        let req: SearchRequest =
580            serde_json::from_str(r#"{"query": [0.1, 0.2], "limit": 5}"#).unwrap();
581        assert_eq!(req.query, vec![0.1, 0.2]);
582        assert_eq!(req.limit, 5);
583        assert!(req.filters.is_none());
584        assert!(!req.include_expired);
585        assert!(!req.include_retired);
586        assert!(!req.include_relationships);
587    }
588
589    #[test]
590    fn search_request_defaults_limit_when_omitted() {
591        let req: SearchRequest = serde_json::from_str(r#"{"query": [1.0]}"#).unwrap();
592        assert_eq!(req.limit, default_search_limit());
593    }
594
595    #[test]
596    fn search_request_parses_filters_and_lifecycle() {
597        let req: SearchRequest = serde_json::from_str(
598            r#"{"query": [1.0], "filters": {"tenant": "acme"}, "include_expired": true, "include_retired": true}"#,
599        )
600        .unwrap();
601        assert_eq!(req.filters, Some(serde_json::json!({"tenant": "acme"})));
602        assert!(req.include_expired);
603        assert!(req.include_retired);
604    }
605}