Skip to main content

lance_context_core/
record.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use serde_json::Value;
4use std::collections::HashMap;
5
6use crate::serde::CONTENT_TYPE_TOMBSTONE;
7
8pub const LIFECYCLE_ACTIVE: &str = "active";
9pub const LIFECYCLE_CONTRADICTED: &str = "contradicted";
10
11/// Structured metadata captured alongside each context entry.
12#[derive(Debug, Clone, Default)]
13pub struct StateMetadata {
14    pub step: Option<i32>,
15    pub active_plan_id: Option<String>,
16    pub tokens_used: Option<i32>,
17    pub custom: Option<String>,
18}
19
20/// Directed relationship from this record to another graph node.
21#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
22pub struct Relationship {
23    pub target_id: String,
24    pub relation: String,
25    #[serde(default, skip_serializing_if = "Option::is_none")]
26    pub weight: Option<f32>,
27}
28
29/// User-facing representation of a context entry written to storage.
30#[derive(Debug, Clone)]
31pub struct ContextRecord {
32    pub id: String,
33    pub external_id: Option<String>,
34    pub run_id: String,
35    pub bot_id: Option<String>,
36    pub session_id: Option<String>,
37    pub tenant: Option<String>,
38    pub source: Option<String>,
39    pub created_at: DateTime<Utc>,
40    pub role: String,
41    pub state_metadata: Option<StateMetadata>,
42    pub metadata: Option<Value>,
43    pub relationships: Vec<Relationship>,
44    pub expires_at: Option<DateTime<Utc>>,
45    pub retention_policy: Option<String>,
46    pub lifecycle_status: String,
47    pub retired_at: Option<DateTime<Utc>>,
48    pub retired_reason: Option<String>,
49    pub supersedes_id: Option<String>,
50    pub superseded_by_id: Option<String>,
51    pub content_type: String,
52    pub text_payload: Option<String>,
53    pub binary_payload: Option<Vec<u8>>,
54    pub embedding: Option<Vec<f32>>,
55}
56
57impl ContextRecord {
58    #[must_use]
59    pub fn is_tombstone(&self) -> bool {
60        self.content_type == CONTENT_TYPE_TOMBSTONE
61    }
62
63    #[must_use]
64    pub fn is_expired_at(&self, now: DateTime<Utc>) -> bool {
65        self.expires_at.is_some_and(|expires_at| expires_at <= now)
66    }
67
68    #[must_use]
69    pub fn is_hidden_by_lifecycle(&self) -> bool {
70        if self.lifecycle_status == LIFECYCLE_ACTIVE
71            || self.lifecycle_status == LIFECYCLE_CONTRADICTED
72        {
73            return self.retired_at.is_some() || self.superseded_by_id.is_some();
74        }
75
76        true
77    }
78
79    #[must_use]
80    pub fn has_non_default_lifecycle(&self) -> bool {
81        self.expires_at.is_some()
82            || self.retention_policy.is_some()
83            || self.lifecycle_status != LIFECYCLE_ACTIVE
84            || self.retired_at.is_some()
85            || self.retired_reason.is_some()
86            || self.supersedes_id.is_some()
87            || self.superseded_by_id.is_some()
88    }
89}
90
91/// Query-time controls for lifecycle-aware retrieval.
92#[derive(Debug, Clone)]
93pub struct LifecycleQueryOptions {
94    pub include_expired: bool,
95    pub include_retired: bool,
96    pub reference_time: DateTime<Utc>,
97}
98
99impl Default for LifecycleQueryOptions {
100    fn default() -> Self {
101        Self {
102            include_expired: false,
103            include_retired: false,
104            reference_time: Utc::now(),
105        }
106    }
107}
108
109impl LifecycleQueryOptions {
110    #[must_use]
111    pub fn new(include_expired: bool, include_retired: bool) -> Self {
112        Self {
113            include_expired,
114            include_retired,
115            ..Self::default()
116        }
117    }
118
119    #[must_use]
120    pub fn is_visible(&self, record: &ContextRecord) -> bool {
121        !record.is_tombstone()
122            && (self.include_expired || !record.is_expired_at(self.reference_time))
123            && (self.include_retired || !record.is_hidden_by_lifecycle())
124    }
125}
126
127/// Result returned from a vector similarity search.
128#[derive(Debug, Clone)]
129pub struct SearchResult {
130    pub record: ContextRecord,
131    /// Distance score under the store's configured distance metric, always
132    /// ordered "smaller is better". Its scale is metric-dependent: L2 distance,
133    /// cosine distance (`1 - cosine_similarity`, in `0..=2`), or the negated dot
134    /// product for maximum-inner-product search.
135    pub distance: f32,
136}
137
138/// Result returned from hybrid retrieval over context records.
139#[derive(Debug, Clone)]
140pub struct RetrieveResult {
141    pub record: ContextRecord,
142    pub score: f32,
143    pub vector_distance: Option<f32>,
144    pub text_score: Option<f32>,
145    pub matched_channels: Vec<String>,
146}
147
148/// Result returned from insert-or-replace operations.
149#[derive(Debug, Clone)]
150pub struct UpsertResult {
151    pub record: ContextRecord,
152    pub inserted: bool,
153    pub replaced_id: Option<String>,
154    pub version: u64,
155}
156
157/// Mutable fields that can be patched without resupplying the payload.
158#[derive(Debug, Clone, Default)]
159pub struct RecordPatch {
160    pub bot_id: Option<String>,
161    pub session_id: Option<String>,
162    pub tenant: Option<String>,
163    pub source: Option<String>,
164    pub state_metadata: Option<StateMetadata>,
165    pub metadata: Option<Value>,
166    pub relationships: Option<Vec<Relationship>>,
167    pub expires_at: Option<DateTime<Utc>>,
168    pub retention_policy: Option<String>,
169    pub lifecycle_status: Option<String>,
170    pub retired_at: Option<DateTime<Utc>>,
171    pub retired_reason: Option<String>,
172    /// Vector embedding to attach to the record. Enables deferred embedding
173    /// workflows: append raw text first, then enrich with an embedding later.
174    pub embedding: Option<Vec<f32>>,
175}
176
177impl RecordPatch {
178    #[must_use]
179    pub fn is_empty(&self) -> bool {
180        self.bot_id.is_none()
181            && self.session_id.is_none()
182            && self.tenant.is_none()
183            && self.source.is_none()
184            && self.state_metadata.is_none()
185            && self.metadata.is_none()
186            && self.relationships.is_none()
187            && self.expires_at.is_none()
188            && self.retention_policy.is_none()
189            && self.lifecycle_status.is_none()
190            && self.retired_at.is_none()
191            && self.retired_reason.is_none()
192            && self.embedding.is_none()
193    }
194}
195
196/// Result returned from partial record update operations.
197#[derive(Debug, Clone)]
198pub struct UpdateResult {
199    pub record: ContextRecord,
200    pub replaced_id: String,
201    pub version: u64,
202}
203
204/// Metadata matching operation for filtered retrieval.
205#[derive(Debug, Clone, PartialEq)]
206pub enum MetadataFilter {
207    Equals(Value),
208    Contains(Value),
209}
210
211/// Filters applied to records before list pagination or search ranking.
212#[derive(Debug, Clone, Default, PartialEq)]
213pub struct RecordFilters {
214    pub bot_id: Option<String>,
215    pub session_id: Option<String>,
216    pub tenant: Option<String>,
217    pub source: Option<String>,
218    pub role: Option<String>,
219    pub content_type: Option<String>,
220    pub created_at_start: Option<DateTime<Utc>>,
221    pub created_at_end: Option<DateTime<Utc>>,
222    pub metadata: HashMap<String, MetadataFilter>,
223}
224
225impl RecordFilters {
226    pub fn from_json_value(value: Value) -> Result<Self, String> {
227        let Value::Object(object) = value else {
228            return Err("filters must be a JSON object".to_string());
229        };
230
231        let mut filters = RecordFilters::default();
232        for (key, value) in object {
233            match key.as_str() {
234                "bot_id" => filters.bot_id = filter_string(key.as_str(), value)?,
235                "session_id" => filters.session_id = filter_string(key.as_str(), value)?,
236                "tenant" => filters.tenant = filter_string(key.as_str(), value)?,
237                "source" => filters.source = filter_string(key.as_str(), value)?,
238                "role" => filters.role = filter_string(key.as_str(), value)?,
239                "content_type" => filters.content_type = filter_string(key.as_str(), value)?,
240                "created_at" => apply_created_at_filter(&mut filters, value)?,
241                "created_at_start" | "created_after" | "created_at_gte" => {
242                    filters.created_at_start = Some(parse_filter_datetime(&key, &value)?);
243                }
244                "created_at_end" | "created_before" | "created_at_lte" => {
245                    filters.created_at_end = Some(parse_filter_datetime(&key, &value)?);
246                }
247                _ => {
248                    let filter = match value {
249                        Value::Object(mut object)
250                            if object.len() == 1 && object.contains_key("contains") =>
251                        {
252                            MetadataFilter::Contains(object.remove("contains").unwrap())
253                        }
254                        value => MetadataFilter::Equals(value),
255                    };
256                    filters.metadata.insert(key, filter);
257                }
258            }
259        }
260
261        Ok(filters)
262    }
263
264    #[must_use]
265    pub fn is_empty(&self) -> bool {
266        self.bot_id.is_none()
267            && self.session_id.is_none()
268            && self.tenant.is_none()
269            && self.source.is_none()
270            && self.role.is_none()
271            && self.content_type.is_none()
272            && self.created_at_start.is_none()
273            && self.created_at_end.is_none()
274            && self.metadata.is_empty()
275    }
276
277    #[must_use]
278    pub fn matches(&self, record: &ContextRecord) -> bool {
279        if self
280            .bot_id
281            .as_deref()
282            .is_some_and(|value| record.bot_id.as_deref() != Some(value))
283        {
284            return false;
285        }
286        if self
287            .session_id
288            .as_deref()
289            .is_some_and(|value| record.session_id.as_deref() != Some(value))
290        {
291            return false;
292        }
293        if !matches_typed_or_metadata(record, "tenant", record.tenant.as_deref(), &self.tenant) {
294            return false;
295        }
296        if !matches_typed_or_metadata(record, "source", record.source.as_deref(), &self.source) {
297            return false;
298        }
299        if self
300            .role
301            .as_deref()
302            .is_some_and(|value| record.role != value)
303        {
304            return false;
305        }
306        if self
307            .content_type
308            .as_deref()
309            .is_some_and(|value| record.content_type != value)
310        {
311            return false;
312        }
313        if self
314            .created_at_start
315            .is_some_and(|start| record.created_at < start)
316        {
317            return false;
318        }
319        if self
320            .created_at_end
321            .is_some_and(|end| record.created_at > end)
322        {
323            return false;
324        }
325
326        self.metadata.iter().all(|(key, filter)| {
327            let Some(Value::Object(metadata)) = &record.metadata else {
328                return false;
329            };
330            let Some(value) = metadata.get(key) else {
331                return false;
332            };
333            match filter {
334                MetadataFilter::Equals(expected) => value == expected,
335                MetadataFilter::Contains(expected) => metadata_contains(value, expected),
336            }
337        })
338    }
339}
340
341fn filter_string(name: &str, value: Value) -> Result<Option<String>, String> {
342    match value {
343        Value::Null => Ok(None),
344        Value::String(value) => Ok(Some(value)),
345        _ => Err(format!("filter '{name}' must be a string or null")),
346    }
347}
348
349fn apply_created_at_filter(filters: &mut RecordFilters, value: Value) -> Result<(), String> {
350    let Value::Object(object) = value else {
351        return Err("filter 'created_at' must be an object with gte/lte bounds".to_string());
352    };
353
354    for (key, value) in object {
355        match key.as_str() {
356            "gte" | "start" | "after" => {
357                filters.created_at_start = Some(parse_filter_datetime(&key, &value)?);
358            }
359            "lte" | "end" | "before" => {
360                filters.created_at_end = Some(parse_filter_datetime(&key, &value)?);
361            }
362            other => {
363                return Err(format!("unsupported created_at filter operator '{other}'"));
364            }
365        }
366    }
367
368    Ok(())
369}
370
371fn parse_filter_datetime(name: &str, value: &Value) -> Result<DateTime<Utc>, String> {
372    let Some(value) = value.as_str() else {
373        return Err(format!(
374            "filter '{name}' must be an ISO-8601 timestamp string"
375        ));
376    };
377    DateTime::parse_from_rfc3339(value)
378        .map(|value| value.with_timezone(&Utc))
379        .map_err(|err| err.to_string())
380}
381
382fn metadata_contains(value: &Value, expected: &Value) -> bool {
383    match (value, expected) {
384        (Value::Array(items), expected) => items.iter().any(|item| item == expected),
385        (Value::String(value), Value::String(expected)) => value.contains(expected),
386        _ => false,
387    }
388}
389
390fn matches_typed_or_metadata(
391    record: &ContextRecord,
392    metadata_key: &str,
393    typed_value: Option<&str>,
394    expected: &Option<String>,
395) -> bool {
396    let Some(expected) = expected.as_deref() else {
397        return true;
398    };
399    if typed_value.is_some() {
400        return typed_value == Some(expected);
401    }
402    let Some(Value::Object(metadata)) = &record.metadata else {
403        return false;
404    };
405    metadata.get(metadata_key) == Some(&Value::String(expected.to_string()))
406}
407
408#[cfg(test)]
409mod tests {
410    use super::*;
411    use chrono::TimeZone;
412    use serde_json::json;
413
414    fn record() -> ContextRecord {
415        ContextRecord {
416            id: "rec-1".to_string(),
417            external_id: None,
418            run_id: "run-1".to_string(),
419            bot_id: Some("support-bot".to_string()),
420            session_id: Some("incident-1".to_string()),
421            tenant: Some("acme".to_string()),
422            source: Some("memory".to_string()),
423            created_at: Utc.with_ymd_and_hms(2026, 6, 9, 3, 0, 0).unwrap(),
424            role: "assistant".to_string(),
425            state_metadata: None,
426            metadata: Some(json!({
427                "scope": "team",
428                "tags": ["runbook", "ownership"],
429                "confidence": 0.92
430            })),
431            relationships: Vec::new(),
432            expires_at: None,
433            retention_policy: None,
434            lifecycle_status: LIFECYCLE_ACTIVE.to_string(),
435            retired_at: None,
436            retired_reason: None,
437            supersedes_id: None,
438            superseded_by_id: None,
439            content_type: "text/plain".to_string(),
440            text_payload: Some("hello".to_string()),
441            binary_payload: None,
442            embedding: None,
443        }
444    }
445
446    #[test]
447    fn filters_match_builtin_fields_timestamps_and_metadata() {
448        let mut filters = RecordFilters::from_json_value(json!({
449            "bot_id": "support-bot",
450            "session_id": "incident-1",
451            "tenant": "acme",
452            "source": "memory",
453            "role": "assistant",
454            "content_type": "text/plain",
455            "created_at": {
456                "gte": "2026-06-09T02:00:00Z",
457                "lte": "2026-06-09T04:00:00Z"
458            },
459            "scope": "team",
460            "tags": {"contains": "runbook"}
461        }))
462        .unwrap();
463
464        assert!(filters.matches(&record()));
465
466        filters.session_id = Some("other".to_string());
467        assert!(!filters.matches(&record()));
468    }
469
470    #[test]
471    fn tenant_and_source_filters_fall_back_to_legacy_metadata() {
472        let mut record = record();
473        record.tenant = None;
474        record.source = None;
475        record.metadata = Some(json!({
476            "tenant": "acme",
477            "source": "memory"
478        }));
479
480        let filters = RecordFilters::from_json_value(json!({
481            "tenant": "acme",
482            "source": "memory"
483        }))
484        .unwrap();
485        assert!(filters.matches(&record));
486    }
487}