semantic_memory/
types.rs

1#![allow(deprecated)]
2
3use crate::error::MemoryError;
4use chrono::{DateTime, Utc};
5use serde::{Deserialize, Serialize};
6use stack_ids::{
7    ClaimId, ClaimVersionId, EntityId, EnvelopeId, EpisodeId, RelationVersionId, ScopeKey,
8};
9
10/// Stable trace identifier used for cross-crate correlation and auditability.
11///
12/// ## Phase status: compatibility / migration-only
13///
14/// This is a crate-local `TraceId` retained for backward compatibility.
15/// The canonical replacement is `stack_ids::TraceCtx`. Use
16/// `TraceCtx::from_legacy_trace_id()` to convert.
17///
18/// **Removal condition**: removed when all internal usage migrates to `TraceCtx`.
19#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
20#[serde(transparent)]
21pub struct CompatTraceId(pub String);
22
23#[deprecated(since = "0.5.0", note = "Use stack_ids::TraceCtx instead")]
24pub type TraceId = CompatTraceId;
25
26impl CompatTraceId {
27    /// Create a trace ID from any owned string-like input.
28    pub fn new(value: impl Into<String>) -> Self {
29        Self(value.into())
30    }
31
32    /// Borrow the trace ID as a string slice.
33    pub fn as_str(&self) -> &str {
34        &self.0
35    }
36}
37
38impl std::fmt::Display for CompatTraceId {
39    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40        f.write_str(&self.0)
41    }
42}
43
44impl From<String> for CompatTraceId {
45    fn from(value: String) -> Self {
46        Self(value)
47    }
48}
49
50impl From<&str> for CompatTraceId {
51    fn from(value: &str) -> Self {
52        Self(value.to_string())
53    }
54}
55
56/// Role of a message in a conversation.
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
58#[serde(rename_all = "lowercase")]
59pub enum Role {
60    /// System prompt / instructions.
61    System,
62    /// User message.
63    User,
64    /// Assistant (LLM) response.
65    Assistant,
66    /// Tool call result.
67    Tool,
68}
69
70impl Role {
71    /// Convert to the string stored in SQLite.
72    pub fn as_str(&self) -> &'static str {
73        match self {
74            Role::System => "system",
75            Role::User => "user",
76            Role::Assistant => "assistant",
77            Role::Tool => "tool",
78        }
79    }
80
81    /// Parse from the string stored in SQLite.
82    pub fn from_str_value(s: &str) -> Option<Self> {
83        match s {
84            "system" => Some(Role::System),
85            "user" => Some(Role::User),
86            "assistant" => Some(Role::Assistant),
87            "tool" => Some(Role::Tool),
88            _ => None,
89        }
90    }
91}
92
93impl std::fmt::Display for Role {
94    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95        f.write_str(self.as_str())
96    }
97}
98
99impl std::str::FromStr for Role {
100    type Err = MemoryError;
101
102    fn from_str(s: &str) -> Result<Self, Self::Err> {
103        Self::from_str_value(s).ok_or_else(|| MemoryError::Other(format!("Unknown role: '{}'", s)))
104    }
105}
106
107/// Indicates whether a search result came from a fact, document chunk, message, or episode.
108#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
109#[serde(rename_all = "snake_case")]
110pub enum SearchSourceType {
111    /// Result is from the facts table.
112    Facts,
113    /// Result is from the chunks table.
114    Chunks,
115    /// Result is from the messages table.
116    Messages,
117    /// Result is from the episodes table.
118    Episodes,
119}
120
121/// Controls whether privacy-sensitive search inputs are retained for replay.
122#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
123#[serde(rename_all = "snake_case")]
124pub enum ReplayMode {
125    /// Keep only receipt digests; callers must supply inputs to replay.
126    #[default]
127    NoReplay,
128    /// Persist query text and filters alongside the durable receipt.
129    StoreInputs,
130}
131
132/// Controls whether search receipt metadata is produced.
133#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
134#[serde(rename_all = "snake_case")]
135pub enum ReceiptMode {
136    /// Do not produce receipt metadata.
137    #[default]
138    Disabled,
139    /// Produce receipt-ready metadata for explain/audit paths.
140    ExplainOnly,
141    /// Return receipt metadata to the caller.
142    ReturnReceipt,
143}
144
145/// Controls whether search should prefer exact reference scoring or allow approximate backends.
146#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
147#[serde(rename_all = "snake_case")]
148pub enum ExactnessProfile {
149    /// Use the configured default backend policy.
150    #[default]
151    Default,
152    /// Prefer exact brute-force f32 vector scoring over approximate sidecars.
153    PreferExact,
154    /// Permit approximate candidate generation, with exact rerank when configured.
155    AllowApproximate,
156}
157
158/// Explicit search execution context for deterministic replay and receipt generation.
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct SearchContext {
161    /// Timestamp used for time-sensitive scoring such as recency.
162    pub evaluation_time: DateTime<Utc>,
163    /// Receipt metadata mode.
164    pub receipt_mode: ReceiptMode,
165    /// Opt-in policy for storing privacy-sensitive inputs for complete replay.
166    #[serde(default)]
167    pub replay_mode: ReplayMode,
168    /// Exactness policy for vector candidate generation.
169    pub exactness_profile: ExactnessProfile,
170    /// Optional caller-provided request/receipt correlation ID.
171    pub request_id: Option<String>,
172    /// Optional distributed trace identifier supplied by the caller.
173    #[serde(default, skip_serializing_if = "Option::is_none")]
174    pub trace_id: Option<String>,
175    /// Optional family ID tying retries/attempts for the same logical request.
176    #[serde(default, skip_serializing_if = "Option::is_none")]
177    pub attempt_family_id: Option<String>,
178    /// Optional retry/attempt identifier supplied by the caller.
179    #[serde(default, skip_serializing_if = "Option::is_none")]
180    pub attempt_id: Option<String>,
181    /// Receipt ID this search is replaying, when applicable.
182    #[serde(default, skip_serializing_if = "Option::is_none")]
183    pub replay_of: Option<String>,
184    /// Digest of raw query text when the caller provides one.
185    #[serde(default, skip_serializing_if = "Option::is_none")]
186    pub query_text_digest: Option<String>,
187    /// Digest of raw or structured query input when supplied by the caller.
188    #[serde(default, skip_serializing_if = "Option::is_none")]
189    pub query_input_digest: Option<String>,
190    /// Digest of structured filters when the caller provides one.
191    #[serde(default, skip_serializing_if = "Option::is_none")]
192    pub filter_digest: Option<String>,
193    /// Redaction state label for explain/replay surfaces.
194    #[serde(default, skip_serializing_if = "Option::is_none")]
195    pub redaction_state: Option<String>,
196    /// Optional budget identity associated with the search.
197    #[serde(default, skip_serializing_if = "Option::is_none")]
198    pub budget_id: Option<String>,
199    /// Optional caller deadline associated with the search.
200    #[serde(default, skip_serializing_if = "Option::is_none")]
201    pub deadline_at: Option<DateTime<Utc>>,
202}
203
204impl SearchContext {
205    /// Build a context using the current wall clock at the API boundary.
206    pub fn default_now() -> Self {
207        Self {
208            evaluation_time: Utc::now(),
209            receipt_mode: ReceiptMode::Disabled,
210            replay_mode: ReplayMode::NoReplay,
211            exactness_profile: ExactnessProfile::Default,
212            request_id: None,
213            trace_id: None,
214            attempt_family_id: None,
215            attempt_id: None,
216            replay_of: None,
217            query_text_digest: None,
218            query_input_digest: None,
219            filter_digest: None,
220            redaction_state: None,
221            budget_id: None,
222            deadline_at: None,
223        }
224    }
225
226    /// Build a replay context with an explicit evaluation timestamp.
227    pub fn at(evaluation_time: DateTime<Utc>) -> Self {
228        Self {
229            evaluation_time,
230            ..Self::default_now()
231        }
232    }
233
234    /// Whether a receipt should be produced for this context.
235    pub fn receipts_enabled(&self) -> bool {
236        self.receipt_mode != ReceiptMode::Disabled
237    }
238}
239
240impl Default for SearchContext {
241    fn default() -> Self {
242        Self::default_now()
243    }
244}
245
246/// Receipt-ready vector/search execution metadata.
247#[derive(Debug, Clone, Serialize, Deserialize)]
248pub struct VectorSearchReceiptV1 {
249    /// Receipt schema version.
250    #[serde(default = "default_vector_search_receipt_schema")]
251    pub schema_version: String,
252    /// Digest of the canonical stored receipt payload, when persisted.
253    #[serde(default, skip_serializing_if = "Option::is_none")]
254    pub receipt_digest: Option<String>,
255    /// Receipt or request correlation ID.
256    pub receipt_id: String,
257    /// Timestamp used for deterministic scoring.
258    pub evaluation_time: DateTime<Utc>,
259    /// Optional distributed trace identifier supplied by the caller.
260    #[serde(default, skip_serializing_if = "Option::is_none")]
261    pub trace_id: Option<String>,
262    /// Optional family ID tying retries/attempts for the same logical request.
263    #[serde(default, skip_serializing_if = "Option::is_none")]
264    pub attempt_family_id: Option<String>,
265    /// Optional retry/attempt identifier supplied by the caller.
266    #[serde(default, skip_serializing_if = "Option::is_none")]
267    pub attempt_id: Option<String>,
268    /// Receipt ID this receipt replays, when applicable.
269    #[serde(default, skip_serializing_if = "Option::is_none")]
270    pub replay_of: Option<String>,
271    /// Stable BLAKE3 digest of the query embedding bytes, when available.
272    pub query_embedding_digest: Option<String>,
273    /// Digest of raw query text when supplied by the caller.
274    #[serde(default, skip_serializing_if = "Option::is_none")]
275    pub query_text_digest: Option<String>,
276    /// Digest of raw or structured query input when supplied by the caller.
277    #[serde(default, skip_serializing_if = "Option::is_none")]
278    pub query_input_digest: Option<String>,
279    /// Digest of structured filters when supplied by the caller.
280    #[serde(default, skip_serializing_if = "Option::is_none")]
281    pub filter_digest: Option<String>,
282    /// Redaction state label for explain/replay surfaces.
283    #[serde(default, skip_serializing_if = "Option::is_none")]
284    pub redaction_state: Option<String>,
285    /// Optional budget identity associated with the search.
286    #[serde(default, skip_serializing_if = "Option::is_none")]
287    pub budget_id: Option<String>,
288    /// Optional caller deadline associated with the search.
289    #[serde(default, skip_serializing_if = "Option::is_none")]
290    pub deadline_at: Option<DateTime<Utc>>,
291    /// Human-readable search profile.
292    pub search_profile: String,
293    /// Candidate backend used for vector retrieval.
294    pub candidate_backend: String,
295    /// Codec family used for derived vector artifacts, when applicable.
296    pub codec_family: Option<String>,
297    /// Codec profile digest used for derived vector artifacts, when applicable.
298    pub codec_profile_digest: Option<String>,
299    /// Alias for derived artifact profile digest used by v11-compatible hooks.
300    #[serde(default, skip_serializing_if = "Option::is_none")]
301    pub artifact_profile_digest: Option<String>,
302    /// Number of derived artifacts considered by the vector path.
303    #[serde(default, skip_serializing_if = "Option::is_none")]
304    pub artifact_count: Option<usize>,
305    /// Number of corrupt derived artifacts encountered by the vector path.
306    #[serde(default, skip_serializing_if = "Option::is_none")]
307    pub artifact_corruption_count: Option<usize>,
308    /// Number of missing derived artifacts encountered by the vector path.
309    #[serde(default, skip_serializing_if = "Option::is_none")]
310    pub artifact_missing_count: Option<usize>,
311    /// Manifest digest for the derived vector artifacts considered by the search.
312    #[serde(default, skip_serializing_if = "Option::is_none")]
313    pub vector_artifact_manifest_digest: Option<String>,
314    /// Active generation ID for derived vector artifacts, when used.
315    #[serde(default, skip_serializing_if = "Option::is_none")]
316    pub artifact_generation_id: Option<String>,
317    /// Number of derived artifacts scanned by approximate candidate generation.
318    #[serde(default, skip_serializing_if = "Option::is_none")]
319    pub approximate_scanned_count: Option<usize>,
320    /// Number of approximate candidates returned for exact f32 reranking.
321    #[serde(default, skip_serializing_if = "Option::is_none")]
322    pub approximate_returned_count: Option<usize>,
323    /// Number of authoritative raw f32 rows loaded during exact rerank.
324    #[serde(default, skip_serializing_if = "Option::is_none")]
325    pub raw_rows_loaded_count: Option<usize>,
326    /// Filter strategy used by approximate candidate generation.
327    #[serde(default, skip_serializing_if = "Option::is_none")]
328    pub filter_strategy: Option<String>,
329    /// Number of derived vector artifacts considered by the vector path.
330    #[serde(default, skip_serializing_if = "Option::is_none")]
331    pub vector_artifact_count: Option<usize>,
332    /// Number of missing derived vector artifacts encountered by the vector path.
333    #[serde(default, skip_serializing_if = "Option::is_none")]
334    pub vector_artifact_missing_count: Option<usize>,
335    /// Number of stale derived vector artifacts encountered by the vector path.
336    #[serde(default, skip_serializing_if = "Option::is_none")]
337    pub vector_artifact_stale_count: Option<usize>,
338    /// Number of candidates exact-reranked against authoritative f32 embeddings.
339    #[serde(default, skip_serializing_if = "Option::is_none")]
340    pub exact_rerank_count: Option<usize>,
341    /// Number of approximate candidates produced by the candidate backend.
342    #[serde(default, skip_serializing_if = "Option::is_none")]
343    pub approximate_candidate_count: Option<usize>,
344    /// Explicit fallback reason, mirrored from fallback for evidence readers.
345    #[serde(default, skip_serializing_if = "Option::is_none")]
346    pub fallback_reason: Option<String>,
347    /// Structured receipt for derived candidate backends such as proveKV pool.
348    #[serde(default, skip_serializing_if = "Option::is_none")]
349    pub derived_candidate: Option<DerivedCandidateReceiptV1>,
350    /// Whether approximate codec/index scoring contributed to candidate generation.
351    pub approximate: bool,
352    /// Number of vector candidates requested from the backend.
353    pub requested_candidates: usize,
354    /// Number of candidates returned by the backend before SQL post-filtering.
355    pub returned_candidates: usize,
356    /// Number of vector candidates remaining after SQL filters and exact rerank.
357    pub post_filter_candidates: usize,
358    /// Whether a genuine sparse retrieval lane participated in fusion.
359    #[serde(default)]
360    pub sparse_enabled: bool,
361    /// Configured sparse RRF weight when the lane was enabled.
362    #[serde(default, skip_serializing_if = "Option::is_none")]
363    pub sparse_weight: Option<f64>,
364    /// Number of sparse query entries used for dot-product retrieval.
365    #[serde(default, skip_serializing_if = "Option::is_none")]
366    pub sparse_query_nonzero_count: Option<usize>,
367    /// Number of filtered sparse candidates admitted to fusion.
368    #[serde(default, skip_serializing_if = "Option::is_none")]
369    pub sparse_candidate_count: Option<usize>,
370    /// Sparse representation labels observed among admitted candidates.
371    #[serde(default, skip_serializing_if = "Vec::is_empty")]
372    pub sparse_representations: Vec<String>,
373    /// Per-result sparse ranks, allowing the third signal to be audited.
374    #[serde(default, skip_serializing_if = "Vec::is_empty")]
375    pub sparse_result_ranks: Vec<SparseRankReceiptV1>,
376    /// Fallback path, if approximate retrieval degraded or was bypassed.
377    pub fallback: Option<String>,
378    /// Whether exact f32 rerank/reference scoring was used.
379    pub exact_rerank: bool,
380    /// Result IDs returned to the caller.
381    pub result_ids: Vec<String>,
382    /// Degradation notes visible to explain/audit paths.
383    pub degradations: Vec<String>,
384}
385
386/// Inspectable sparse rank recorded in a durable search receipt.
387#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
388pub struct SparseRankReceiptV1 {
389    /// Canonical result ID.
390    pub result_id: String,
391    /// One-based sparse rank.
392    pub rank: usize,
393}
394
395/// Stable generation-level manifest for derived vector acceleration artifacts.
396#[derive(Debug, Clone, Serialize, Deserialize)]
397pub struct DerivedVectorArtifactGenerationV1 {
398    /// Stable schema marker.
399    pub schema_version: String,
400    /// Generation UUID.
401    pub generation_id: String,
402    /// Derived codec family.
403    pub codec_family: String,
404    /// Digest of the codec profile.
405    pub codec_profile_digest: String,
406    /// Digest over authoritative source rows used to build the generation.
407    pub source_snapshot_digest: String,
408    /// Number of authoritative source rows scanned.
409    pub source_row_count: usize,
410    /// Number of artifacts produced.
411    pub artifact_count: usize,
412    /// Authoritative source tables included in the build.
413    pub source_tables: Vec<String>,
414    /// Embedding dimension.
415    pub dim: usize,
416    /// Artifact wire encoding.
417    pub encoding: String,
418    /// Build timestamp.
419    pub created_at: DateTime<Utc>,
420    /// Optional build receipt ID.
421    pub build_receipt_id: Option<String>,
422    /// Digest of the artifact manifest for this generation.
423    pub artifact_manifest_digest: String,
424    /// Generation state.
425    pub status: String,
426    /// Structured or human-readable degradation markers.
427    pub degradations: Vec<String>,
428}
429
430/// Stable generation-level manifest for derived proveKV/poly-kv pool candidate artifacts.
431#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
432pub struct ProveKvPoolGenerationV1 {
433    pub schema_version: String,
434    pub generation_id: String,
435    pub embedding_snapshot_digest: String,
436    pub source_digest: String,
437    pub pool_manifest_digest: String,
438    pub codec_family: String,
439    pub codec_profile: String,
440    pub vector_dim: usize,
441    pub item_count: usize,
442    pub payload_bytes: u64,
443    pub created_at: DateTime<Utc>,
444}
445
446/// Durable item-to-pool-index mapping for a proveKV/poly-kv pool generation.
447#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
448pub struct ProveKvPoolItemMapEntryV1 {
449    pub generation_id: String,
450    pub item_id: String,
451    pub source_type: String,
452    pub pool_index: usize,
453    pub embedding_digest: String,
454}
455
456/// Lifecycle status for a proveKV/poly-kv pool generation.
457#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
458#[serde(rename_all = "snake_case")]
459pub enum ProveKvPoolGenerationStatus {
460    Disabled,
461    Missing,
462    Building,
463    Ready,
464    Stale,
465    Failed,
466}
467
468/// Current proveKV/poly-kv pool artifact status.
469#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
470pub struct ProveKvPoolArtifactStatusV1 {
471    pub status: ProveKvPoolGenerationStatus,
472    pub generation_id: Option<String>,
473    pub embedding_snapshot_digest: Option<String>,
474    pub pool_manifest_digest: Option<String>,
475    pub item_count: usize,
476    pub payload_bytes: u64,
477    pub reason: Option<String>,
478}
479
480/// Receipt-like summary for rebuilding proveKV/poly-kv pool artifacts.
481#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
482pub struct ProveKvPoolArtifactBuildReceiptV1 {
483    pub schema_version: String,
484    pub generation_id: String,
485    pub embedding_snapshot_digest: String,
486    pub source_digest: String,
487    pub pool_manifest_digest: String,
488    pub codec_family: String,
489    pub codec_profile: String,
490    pub vector_dim: usize,
491    pub item_count: usize,
492    pub payload_bytes: u64,
493    pub exact_rerank_required: bool,
494    pub created_at: DateTime<Utc>,
495}
496
497/// Structured search receipt for derived candidate generation followed by exact f32 rerank.
498#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
499pub struct DerivedCandidateReceiptV1 {
500    pub candidate_backend: String,
501    pub codec_family: Option<String>,
502    pub generation_id: Option<String>,
503    pub embedding_snapshot_digest: Option<String>,
504    pub pool_manifest_digest: Option<String>,
505    pub exact_rerank: bool,
506    pub approximate: bool,
507    pub fallback: Option<String>,
508    pub raw_candidate_count: usize,
509    pub post_filter_count: usize,
510    pub final_result_count: usize,
511}
512
513/// Receipt-like summary for rebuilding derived vector acceleration artifacts.
514#[derive(Debug, Clone, Serialize, Deserialize)]
515pub struct VectorArtifactBuildReceiptV1 {
516    /// Stable schema marker.
517    pub schema_version: String,
518    /// Derived codec family.
519    pub codec_family: String,
520    /// Digest of the codec profile used for all artifacts in the build.
521    pub codec_profile_digest: String,
522    /// Number of authoritative embedding rows scanned.
523    pub source_row_count: usize,
524    /// Number of artifacts written.
525    pub artifact_count: usize,
526    /// Active generation ID produced by the rebuild.
527    #[serde(default, skip_serializing_if = "Option::is_none")]
528    pub generation_id: Option<String>,
529    /// Source snapshot digest used by the generation manifest.
530    #[serde(default, skip_serializing_if = "Option::is_none")]
531    pub source_snapshot_digest: Option<String>,
532    /// Artifact manifest digest for this generation.
533    #[serde(default, skip_serializing_if = "Option::is_none")]
534    pub artifact_manifest_digest: Option<String>,
535    /// ID of the build receipt itself (same value stored in the generation manifest).
536    #[serde(default, skip_serializing_if = "Option::is_none")]
537    pub build_receipt_id: Option<String>,
538    /// Number of rows skipped because authoritative embeddings were invalid.
539    pub skipped_row_count: usize,
540    /// Wall-clock build duration in milliseconds.
541    pub elapsed_ms: u128,
542    /// Build timestamp.
543    pub created_at: DateTime<Utc>,
544    /// Non-fatal build notes.
545    pub degradations: Vec<String>,
546}
547
548fn default_vector_search_receipt_schema() -> String {
549    "vector_search_receipt_v1".to_string()
550}
551
552/// Product-facing answers derived from a search receipt.
553#[derive(Debug, Clone, Serialize, Deserialize)]
554pub struct SearchReceiptAnswersV1 {
555    /// Receipt or request correlation ID.
556    pub receipt_id: String,
557    /// Stable ID to attach to replay/audit logs.
558    pub replay_receipt_id: String,
559    /// Timestamp used for deterministic scoring.
560    pub evaluation_time: DateTime<Utc>,
561    /// Human-readable search profile.
562    pub search_profile: String,
563    /// Candidate backend used for retrieval.
564    pub candidate_backend: String,
565    /// Codec family used for derived vector artifacts, when applicable.
566    pub codec_family: Option<String>,
567    /// Codec profile digest used for derived vector artifacts, when applicable.
568    pub codec_profile_digest: Option<String>,
569    /// Exactness label suitable for UI/API surfaces.
570    pub exactness: String,
571    /// Whether approximate codec/index scoring contributed to candidate generation.
572    pub approximate: bool,
573    /// Whether exact f32 rerank/reference scoring was used.
574    pub exact_rerank: bool,
575    /// Fallback path, if approximate retrieval degraded or was bypassed.
576    pub fallback: Option<String>,
577    /// Whether degradations or fallback occurred.
578    pub degraded: bool,
579    /// Whether the receipt carries enough deterministic context for replay with the original query.
580    pub replay_ready: bool,
581    /// Whether derived vector/index artifacts can be rebuilt from authoritative rows and profiles.
582    pub rebuild_ready: bool,
583    /// Result IDs returned to the caller.
584    pub result_ids: Vec<String>,
585    /// Number of returned results.
586    pub result_count: usize,
587    /// Degradation notes visible to explain/audit paths.
588    pub degradations: Vec<String>,
589    /// Plain-language reasons results appeared.
590    pub why_results_appeared: Vec<String>,
591}
592
593impl VectorSearchReceiptV1 {
594    /// Convert low-level receipt metadata into answers for explain/replay UX.
595    pub fn answers(&self) -> SearchReceiptAnswersV1 {
596        let exactness = match (self.approximate, self.exact_rerank) {
597            (true, true) => "approximate_candidate_generation_with_exact_rerank",
598            (true, false) => "approximate",
599            (false, true) => "exact_reference_with_rerank",
600            (false, false) => "exact_reference",
601        }
602        .to_string();
603
604        let mut why_results_appeared = Vec::new();
605        why_results_appeared.push(format!(
606            "retrieval used candidate backend '{}'",
607            self.candidate_backend
608        ));
609        if self.exact_rerank {
610            why_results_appeared.push("final vector ordering used exact f32 scoring".to_string());
611        }
612        if self.sparse_enabled {
613            why_results_appeared.push(format!(
614                "sparse dot-product retrieval admitted {} candidates",
615                self.sparse_candidate_count.unwrap_or(0)
616            ));
617        }
618        if let Some(fallback) = &self.fallback {
619            why_results_appeared.push(format!("fallback path '{}' was used", fallback));
620        }
621        if let Some(codec_profile_digest) = &self.codec_profile_digest {
622            why_results_appeared.push(format!(
623                "derived vector artifacts used codec profile '{}'",
624                codec_profile_digest
625            ));
626        } else {
627            why_results_appeared.push("no derived codec profile was used".to_string());
628        }
629        if let Some(query_embedding_digest) = &self.query_embedding_digest {
630            why_results_appeared.push(format!(
631                "query embedding digest '{}' is recorded for replay checks",
632                query_embedding_digest
633            ));
634        }
635
636        SearchReceiptAnswersV1 {
637            receipt_id: self.receipt_id.clone(),
638            replay_receipt_id: self.receipt_id.clone(),
639            evaluation_time: self.evaluation_time,
640            search_profile: self.search_profile.clone(),
641            candidate_backend: self.candidate_backend.clone(),
642            codec_family: self.codec_family.clone(),
643            codec_profile_digest: self.codec_profile_digest.clone(),
644            exactness,
645            approximate: self.approximate,
646            exact_rerank: self.exact_rerank,
647            fallback: self.fallback.clone(),
648            degraded: self.fallback.is_some() || !self.degradations.is_empty(),
649            replay_ready: self.query_embedding_digest.is_some(),
650            rebuild_ready: self.query_embedding_digest.is_some()
651                && self.exact_rerank
652                && self.fallback.is_none()
653                && (self
654                    .vector_artifact_count
655                    .or(self.artifact_count)
656                    .is_some_and(|count| count > 0)
657                    || (self.codec_family.is_none()
658                        && self.candidate_backend.contains("brute_force_f32")
659                        && !self.result_ids.is_empty())),
660            result_ids: self.result_ids.clone(),
661            result_count: self.result_ids.len(),
662            degradations: self.degradations.clone(),
663            why_results_appeared,
664        }
665    }
666}
667
668/// Search response shape for context-aware APIs.
669#[derive(Debug, Clone, Serialize, Deserialize)]
670pub struct SearchResponse {
671    /// Search results.
672    pub results: Vec<SearchResult>,
673    /// Optional receipt metadata.
674    pub receipt: Option<VectorSearchReceiptV1>,
675}
676
677/// Caller-supplied chunk for manifest ingestion.
678///
679/// The external chunk ID is returned in the ingest mapping, but semantic-memory still
680/// owns the durable chunk primary key and generates its own `sm_chunk_id`.
681#[derive(Debug, Clone, Serialize, Deserialize)]
682pub struct ChunkManifestEntry {
683    /// Caller-owned chunk identifier.
684    pub external_chunk_id: String,
685    /// Already chunked content to embed and store.
686    pub content: String,
687    /// Optional caller-estimated token count.
688    #[serde(default, skip_serializing_if = "Option::is_none")]
689    pub token_count_estimate: Option<usize>,
690    /// Optional caller-computed content digest for verification by adapters.
691    #[serde(default, skip_serializing_if = "Option::is_none")]
692    pub content_digest: Option<String>,
693    /// Optional per-chunk metadata kept in the receipt mapping.
694    #[serde(default, skip_serializing_if = "Option::is_none")]
695    pub metadata: Option<serde_json::Value>,
696}
697
698/// Document-level options for chunk manifest ingestion.
699#[derive(Debug, Clone, Serialize, Deserialize)]
700pub struct ChunkManifestIngestOptions {
701    /// Document title.
702    pub title: String,
703    /// Namespace/notebook scope.
704    pub namespace: String,
705    /// Optional file path, URL, or caller source identifier.
706    #[serde(default, skip_serializing_if = "Option::is_none")]
707    pub source_path: Option<String>,
708    /// Optional document metadata stored with the semantic-memory document.
709    #[serde(default, skip_serializing_if = "Option::is_none")]
710    pub metadata: Option<serde_json::Value>,
711}
712
713/// Exact mapping returned for a single manifest chunk after a successful transaction.
714#[derive(Debug, Clone, Serialize, Deserialize)]
715pub struct ChunkManifestChunkMapping {
716    /// Caller-owned chunk identifier supplied in the manifest.
717    pub external_chunk_id: String,
718    /// semantic-memory document id that owns the chunk.
719    pub sm_document_id: String,
720    /// semantic-memory chunk id generated and stored in `chunks.id`.
721    pub sm_chunk_id: String,
722    /// Position in the supplied manifest.
723    pub chunk_index: usize,
724    /// Stored chunk content digest, when supplied by caller.
725    #[serde(default, skip_serializing_if = "Option::is_none")]
726    pub content_digest: Option<String>,
727    /// Optional caller metadata echoed for adapter receipt/audit use.
728    #[serde(default, skip_serializing_if = "Option::is_none")]
729    pub metadata: Option<serde_json::Value>,
730}
731
732/// Successful chunk-manifest ingest receipt.
733#[derive(Debug, Clone, Serialize, Deserialize)]
734pub struct ChunkManifestIngestResult {
735    /// semantic-memory document id generated for this manifest.
736    pub sm_document_id: String,
737    /// Namespace/notebook scope used for ingest.
738    pub namespace: String,
739    /// Receipt/request correlation id for adapters.
740    pub receipt_id: String,
741    /// Ordered external chunk to semantic-memory chunk mappings.
742    pub chunks: Vec<ChunkManifestChunkMapping>,
743}
744
745/// Explained search response shape for context-aware APIs.
746#[derive(Debug, Clone, Serialize, Deserialize)]
747pub struct ExplainedSearchResponse {
748    /// Search results with scoring breakdowns.
749    pub results: Vec<ExplainedResult>,
750    /// Optional receipt metadata.
751    pub receipt: Option<VectorSearchReceiptV1>,
752}
753
754/// Replay comparison for a durable search receipt.
755#[derive(Debug, Clone, Serialize, Deserialize)]
756pub struct SearchReplayReportV1 {
757    /// Durable receipt ID that was replayed.
758    pub receipt_id: String,
759    /// Newly generated receipt ID for the replay attempt.
760    pub replay_receipt_id: String,
761    /// Original durable receipt metadata.
762    pub original_receipt: VectorSearchReceiptV1,
763    /// Receipt produced by the replay attempt.
764    pub replay_receipt: VectorSearchReceiptV1,
765    /// Whether the caller-supplied query produced the same embedding digest.
766    pub query_embedding_digest_matches: bool,
767    /// Whether replay returned the same result IDs in the same order.
768    pub result_ids_match: bool,
769    /// Original result IDs missing from replay output.
770    pub missing_result_ids: Vec<String>,
771    /// Replay result IDs not present in the original receipt.
772    pub added_result_ids: Vec<String>,
773    /// Whether replay used the vector-only API family.
774    pub vector_only: bool,
775}
776
777/// Common filter surface for imported projection queries.
778#[derive(Debug, Clone, Serialize, Deserialize)]
779pub struct ProjectionQuery {
780    /// Full scope to enforce.
781    pub scope: ScopeKey,
782    /// Optional free-text query applied to the projection's searchable fields.
783    #[serde(default, skip_serializing_if = "Option::is_none")]
784    pub text_query: Option<String>,
785    /// Valid-time as-of filter for versioned projection rows.
786    #[serde(default, skip_serializing_if = "Option::is_none")]
787    pub valid_at: Option<String>,
788    /// Transaction-time cutoff for imported rows.
789    #[serde(default, skip_serializing_if = "Option::is_none")]
790    pub recorded_at_or_before: Option<String>,
791    /// Optional subject-entity filter for claim/relation queries.
792    #[serde(default, skip_serializing_if = "Option::is_none")]
793    pub subject_entity_id: Option<EntityId>,
794    /// Optional canonical-entity filter for alias queries.
795    #[serde(default, skip_serializing_if = "Option::is_none")]
796    pub canonical_entity_id: Option<EntityId>,
797    /// Optional claim-state filter for claim-version queries.
798    #[serde(default, skip_serializing_if = "Option::is_none")]
799    pub claim_state: Option<String>,
800    /// Optional claim filter for claim/evidence queries.
801    #[serde(default, skip_serializing_if = "Option::is_none")]
802    pub claim_id: Option<ClaimId>,
803    /// Optional claim-version filter for evidence queries.
804    #[serde(default, skip_serializing_if = "Option::is_none")]
805    pub claim_version_id: Option<ClaimVersionId>,
806    /// Final result limit.
807    pub limit: usize,
808}
809
810impl ProjectionQuery {
811    pub fn new(scope: ScopeKey) -> Self {
812        Self {
813            scope,
814            text_query: None,
815            valid_at: None,
816            recorded_at_or_before: None,
817            subject_entity_id: None,
818            canonical_entity_id: None,
819            claim_state: None,
820            claim_id: None,
821            claim_version_id: None,
822            limit: 10,
823        }
824    }
825}
826
827/// Public read shape for imported claim projection rows.
828#[derive(Debug, Clone, Serialize, Deserialize)]
829pub struct ProjectionClaimVersion {
830    pub claim_version_id: ClaimVersionId,
831    pub claim_id: ClaimId,
832    pub claim_state: String,
833    pub projection_family: String,
834    pub subject_entity_id: EntityId,
835    pub predicate: String,
836    pub object_anchor: serde_json::Value,
837    pub scope_key: ScopeKey,
838    pub valid_from: Option<String>,
839    pub valid_to: Option<String>,
840    pub recorded_at: String,
841    pub preferred_open: bool,
842    pub source_envelope_id: EnvelopeId,
843    pub source_authority: String,
844    pub trace_id: Option<String>,
845    pub freshness: String,
846    pub contradiction_status: String,
847    pub supersedes_claim_version_id: Option<ClaimVersionId>,
848    pub content: String,
849    pub confidence: f32,
850    pub metadata: Option<serde_json::Value>,
851    pub source_exported_at: Option<String>,
852    pub transformed_at: Option<String>,
853}
854
855/// Public read shape for imported relation projection rows.
856#[derive(Debug, Clone, Serialize, Deserialize)]
857pub struct ProjectionRelationVersion {
858    pub relation_version_id: RelationVersionId,
859    pub subject_entity_id: EntityId,
860    pub predicate: String,
861    pub object_anchor: serde_json::Value,
862    pub scope_key: ScopeKey,
863    pub claim_id: Option<ClaimId>,
864    pub source_episode_id: Option<EpisodeId>,
865    pub valid_from: Option<String>,
866    pub valid_to: Option<String>,
867    pub recorded_at: String,
868    pub preferred_open: bool,
869    pub supersedes_relation_version_id: Option<RelationVersionId>,
870    pub contradiction_status: String,
871    pub source_confidence: f32,
872    pub projection_family: String,
873    pub source_envelope_id: EnvelopeId,
874    pub source_authority: String,
875    pub trace_id: Option<String>,
876    pub freshness: String,
877    pub metadata: Option<serde_json::Value>,
878    pub source_exported_at: Option<String>,
879    pub transformed_at: Option<String>,
880}
881
882/// Public read shape for imported episode projection rows.
883#[derive(Debug, Clone, Serialize, Deserialize)]
884pub struct ProjectionEpisode {
885    pub episode_id: EpisodeId,
886    pub document_id: String,
887    pub cause_ids: Vec<String>,
888    pub effect_type: String,
889    pub outcome: String,
890    pub confidence: f32,
891    pub experiment_id: Option<String>,
892    pub scope_key: ScopeKey,
893    pub source_envelope_id: EnvelopeId,
894    pub source_authority: String,
895    pub trace_id: Option<String>,
896    pub recorded_at: String,
897    pub metadata: Option<serde_json::Value>,
898    pub source_exported_at: Option<String>,
899    pub transformed_at: Option<String>,
900}
901
902/// Public read shape for imported entity-alias rows.
903#[derive(Debug, Clone, Serialize, Deserialize)]
904pub struct ProjectionEntityAlias {
905    pub canonical_entity_id: EntityId,
906    pub alias_text: String,
907    pub alias_source: String,
908    pub match_evidence: Option<serde_json::Value>,
909    pub confidence: f32,
910    pub merge_decision: String,
911    pub scope_key: ScopeKey,
912    pub review_state: String,
913    pub is_human_confirmed: bool,
914    pub is_human_confirmed_final: bool,
915    pub superseded_by_entity_id: Option<EntityId>,
916    pub split_from_entity_id: Option<EntityId>,
917    pub source_envelope_id: EnvelopeId,
918    pub recorded_at: String,
919    pub source_exported_at: Option<String>,
920    pub transformed_at: Option<String>,
921}
922
923/// Public read shape for imported evidence-reference rows.
924#[derive(Debug, Clone, Serialize, Deserialize)]
925pub struct ProjectionEvidenceRef {
926    pub claim_id: ClaimId,
927    pub claim_version_id: Option<ClaimVersionId>,
928    pub fetch_handle: String,
929    pub source_authority: String,
930    pub source_envelope_id: EnvelopeId,
931    pub scope_key: ScopeKey,
932    pub recorded_at: String,
933    pub metadata: Option<serde_json::Value>,
934    pub source_exported_at: Option<String>,
935    pub transformed_at: Option<String>,
936}
937
938/// A conversation session.
939#[derive(Debug, Clone, Serialize, Deserialize)]
940pub struct Session {
941    /// UUID v4.
942    pub id: String,
943    /// Channel identifier (e.g. "repl", "telegram").
944    pub channel: String,
945    /// ISO 8601 timestamp.
946    pub created_at: String,
947    /// ISO 8601 timestamp.
948    pub updated_at: String,
949    /// Optional JSON metadata.
950    pub metadata: Option<serde_json::Value>,
951    /// Number of messages (populated on list queries).
952    pub message_count: u32,
953}
954
955/// A single message within a session.
956#[derive(Debug, Clone, Serialize, Deserialize)]
957pub struct Message {
958    /// Auto-increment ID.
959    pub id: i64,
960    /// Session this message belongs to.
961    pub session_id: String,
962    /// Role of the speaker.
963    pub role: Role,
964    /// Message text.
965    pub content: String,
966    /// Estimated token count (caller-provided).
967    pub token_count: Option<u32>,
968    /// ISO 8601 timestamp.
969    pub created_at: String,
970    /// Optional JSON metadata.
971    pub metadata: Option<serde_json::Value>,
972}
973
974/// A discrete fact in the knowledge store.
975#[derive(Debug, Clone, Serialize, Deserialize)]
976pub struct Fact {
977    /// UUID v4.
978    pub id: String,
979    /// Categorization namespace.
980    pub namespace: String,
981    /// The fact text.
982    pub content: String,
983    /// Where this fact came from.
984    pub source: Option<String>,
985    /// ISO 8601 timestamp.
986    pub created_at: String,
987    /// ISO 8601 timestamp.
988    pub updated_at: String,
989    /// Optional JSON metadata.
990    pub metadata: Option<serde_json::Value>,
991}
992
993/// A source document that has been chunked and embedded.
994#[derive(Debug, Clone, Serialize, Deserialize)]
995pub struct Document {
996    /// UUID v4.
997    pub id: String,
998    /// Document title.
999    pub title: String,
1000    /// File path, URL, or identifier.
1001    pub source_path: Option<String>,
1002    /// Categorization namespace.
1003    pub namespace: String,
1004    /// ISO 8601 timestamp.
1005    pub created_at: String,
1006    /// Optional JSON metadata.
1007    pub metadata: Option<serde_json::Value>,
1008    /// Number of chunks (populated on list queries).
1009    pub chunk_count: u32,
1010}
1011
1012/// A chunk produced by the text splitter.
1013#[derive(Debug, Clone, Serialize, Deserialize)]
1014pub struct TextChunk {
1015    /// Position in the original document (0-based).
1016    pub index: usize,
1017    /// The chunk text.
1018    pub content: String,
1019    /// Rough token estimate (chars / 4).
1020    pub token_count_estimate: usize,
1021}
1022
1023/// A single search result.
1024#[derive(Debug, Clone, Serialize, Deserialize)]
1025pub struct SearchResult {
1026    /// The matched text content.
1027    pub content: String,
1028
1029    /// Where this result came from.
1030    pub source: SearchSource,
1031
1032    /// Combined RRF score. Higher = more relevant.
1033    pub score: f64,
1034
1035    /// BM25 rank (1-based) if this result appeared in BM25 results.
1036    pub bm25_rank: Option<usize>,
1037
1038    /// Vector rank (1-based) if this result appeared in vector results.
1039    pub vector_rank: Option<usize>,
1040
1041    /// Cosine similarity score if computed.
1042    pub cosine_similarity: Option<f64>,
1043}
1044
1045/// Source information for a search result.
1046#[derive(Debug, Clone, Serialize, Deserialize)]
1047#[serde(rename_all = "snake_case")]
1048pub enum SearchSource {
1049    /// Result came from the facts table.
1050    Fact {
1051        /// Fact UUID.
1052        fact_id: String,
1053        /// Fact namespace.
1054        namespace: String,
1055    },
1056    /// Result came from a document chunk.
1057    Chunk {
1058        /// Chunk UUID.
1059        chunk_id: String,
1060        /// Parent document UUID.
1061        document_id: String,
1062        /// Parent document title.
1063        document_title: String,
1064        /// Position within the document (0-based).
1065        chunk_index: usize,
1066    },
1067    /// Result came from a conversation message.
1068    Message {
1069        /// Message auto-increment ID.
1070        message_id: i64,
1071        /// Session UUID.
1072        session_id: String,
1073        /// Message role (user, assistant, etc.).
1074        role: String,
1075    },
1076    /// Result came from an episode (causal record). SearchSource::Episode variant.
1077    Episode {
1078        /// First-class episode identity (V9+). Falls back to `document_id + "-ep0"`
1079        /// for legacy data.
1080        episode_id: String,
1081        /// Document ID the episode is attached to.
1082        document_id: String,
1083        /// Type of effect (e.g. "test_failure", "regression").
1084        effect_type: String,
1085        /// Current outcome.
1086        outcome: String,
1087    },
1088    /// Result came from an imported projection row.
1089    Projection {
1090        /// Projection row family, such as `claim_version` or `relation_version`.
1091        projection_kind: String,
1092        /// Stable projection-row identity.
1093        projection_id: String,
1094        /// Full scope carried by the imported row.
1095        scope_key: ScopeKey,
1096        /// Validity start for versioned projections, if any.
1097        valid_from: Option<String>,
1098        /// Validity end for versioned projections, if any.
1099        valid_to: Option<String>,
1100        /// Authoritative importer-assigned recorded_at.
1101        recorded_at: String,
1102        /// Source envelope provenance.
1103        source_envelope_id: String,
1104        /// Source authority provenance.
1105        source_authority: String,
1106    },
1107}
1108
1109impl SearchSource {
1110    /// Stable result ID used in receipts and replay logs.
1111    pub fn result_id(&self) -> String {
1112        match self {
1113            Self::Fact { fact_id, .. } => format!("fact:{fact_id}"),
1114            Self::Chunk { chunk_id, .. } => format!("chunk:{chunk_id}"),
1115            Self::Message { message_id, .. } => format!("msg:{message_id}"),
1116            Self::Episode { episode_id, .. } => format!("episode:{episode_id}"),
1117            Self::Projection { projection_id, .. } => format!("projection:{projection_id}"),
1118        }
1119    }
1120
1121    /// Source family label used by explain/receipt surfaces.
1122    pub fn source_kind(&self) -> &'static str {
1123        match self {
1124            Self::Fact { .. } => "fact",
1125            Self::Chunk { .. } => "chunk",
1126            Self::Message { .. } => "message",
1127            Self::Episode { .. } => "episode",
1128            Self::Projection { .. } => "projection",
1129        }
1130    }
1131
1132    /// Authoritative source row key without the receipt result prefix.
1133    pub fn source_id(&self) -> String {
1134        match self {
1135            Self::Fact { fact_id, .. } => fact_id.clone(),
1136            Self::Chunk { chunk_id, .. } => chunk_id.clone(),
1137            Self::Message { message_id, .. } => message_id.to_string(),
1138            Self::Episode { episode_id, .. } => episode_id.clone(),
1139            Self::Projection { projection_id, .. } => projection_id.clone(),
1140        }
1141    }
1142}
1143
1144// ─── Episode Types ─────────────────────────────────────────────
1145
1146/// Metadata for a causal episode (PRIMITIVES_CONTRACT §4).
1147#[derive(Debug, Clone, Serialize, Deserialize)]
1148pub struct EpisodeMeta {
1149    /// IDs of the facts/chunks/messages that caused this episode.
1150    pub cause_ids: Vec<String>,
1151    /// Type of effect (e.g. "test_failure", "regression", "improvement").
1152    pub effect_type: String,
1153    /// Current outcome assessment.
1154    pub outcome: EpisodeOutcome,
1155    /// Confidence in the causal link (0.0 to 1.0).
1156    pub confidence: f32,
1157    /// Verification status.
1158    pub verification_status: VerificationStatus,
1159    /// Links to an EvidenceBundle.run_id (if experimentally verified).
1160    pub experiment_id: Option<String>,
1161    /// Bitemporal valid time — when this episode fact was true in the domain.
1162    pub valid_time: Option<chrono::DateTime<chrono::Utc>>,
1163    /// Content-addressed digest of the episode fact payload (for supersession chain).
1164    pub fact_digest: Option<String>,
1165}
1166
1167/// Receipt for an as-of bitemporal episode query.
1168#[derive(Debug, Clone, Serialize, Deserialize)]
1169pub struct EpisodeAsOfReceiptV1 {
1170    pub query_id: String,
1171    pub as_of_valid: chrono::DateTime<chrono::Utc>,
1172    pub as_of_recorded: chrono::DateTime<chrono::Utc>,
1173    pub episode_count: usize,
1174    pub episode_ids: Vec<String>,
1175    pub excluded_superseded: usize,
1176}
1177
1178/// Outcome of an episode's causal hypothesis.
1179#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1180#[serde(rename_all = "lowercase")]
1181pub enum EpisodeOutcome {
1182    /// Causal link confirmed by experiment.
1183    Confirmed,
1184    /// Causal link refuted by experiment.
1185    Refuted,
1186    /// Evidence is inconclusive.
1187    Inconclusive,
1188    /// Not yet tested.
1189    Pending,
1190}
1191
1192impl EpisodeOutcome {
1193    /// Convert to the string stored in SQLite.
1194    pub fn as_str(&self) -> &'static str {
1195        match self {
1196            Self::Confirmed => "confirmed",
1197            Self::Refuted => "refuted",
1198            Self::Inconclusive => "inconclusive",
1199            Self::Pending => "pending",
1200        }
1201    }
1202
1203    /// Parse from the string stored in SQLite.
1204    pub fn from_str_value(s: &str) -> Option<Self> {
1205        match s {
1206            "confirmed" => Some(Self::Confirmed),
1207            "refuted" => Some(Self::Refuted),
1208            "inconclusive" => Some(Self::Inconclusive),
1209            "pending" => Some(Self::Pending),
1210            _ => None,
1211        }
1212    }
1213}
1214
1215impl std::fmt::Display for EpisodeOutcome {
1216    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1217        f.write_str(self.as_str())
1218    }
1219}
1220
1221/// Verification status for an episode.
1222#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1223#[serde(tag = "status", rename_all = "lowercase")]
1224pub enum VerificationStatus {
1225    /// Not yet verified.
1226    Unverified,
1227    /// Successfully verified.
1228    Verified {
1229        /// Method used for verification.
1230        method: String,
1231        /// When verification occurred (ISO 8601).
1232        at: String,
1233    },
1234    /// Verification attempt failed.
1235    Failed {
1236        /// Reason for failure.
1237        reason: String,
1238        /// When verification was attempted (ISO 8601).
1239        at: String,
1240    },
1241}
1242
1243// ─── Score Breakdown ───────────────────────────────────────────
1244
1245/// Detailed score breakdown for explainable search results.
1246#[derive(Debug, Clone, Serialize, Deserialize)]
1247pub struct ScoreBreakdown {
1248    /// Final fused RRF score.
1249    pub rrf_score: f64,
1250    /// Raw BM25 score reported by SQLite FTS5 (lower is better).
1251    pub bm25_score: Option<f64>,
1252    /// Raw vector similarity used for the final vector ordering.
1253    pub vector_score: Option<f64>,
1254    /// Raw sparse dot-product score used for sparse ordering.
1255    #[serde(default, skip_serializing_if = "Option::is_none")]
1256    pub sparse_score: Option<f64>,
1257    /// Recency contribution added during fusion.
1258    pub recency_score: Option<f64>,
1259    /// BM25 rank (1-based).
1260    pub bm25_rank: Option<usize>,
1261    /// Vector rank (1-based).
1262    pub vector_rank: Option<usize>,
1263    /// Sparse rank (1-based).
1264    #[serde(default, skip_serializing_if = "Option::is_none")]
1265    pub sparse_rank: Option<usize>,
1266    /// Rank from the underlying vector retrieval source before any exact rerank.
1267    pub vector_source_rank: Option<usize>,
1268    /// Similarity score from the underlying vector retrieval source before rerank.
1269    pub vector_source_score: Option<f64>,
1270    /// BM25 RRF contribution to the final score.
1271    pub bm25_contribution: Option<f64>,
1272    /// Vector RRF contribution to the final score.
1273    pub vector_contribution: Option<f64>,
1274    /// Sparse RRF contribution to the final score.
1275    #[serde(default, skip_serializing_if = "Option::is_none")]
1276    pub sparse_contribution: Option<f64>,
1277    /// Whether the vector ordering was reranked with exact f32 cosine similarity.
1278    pub vector_reranked_from_f32: bool,
1279    /// Configured BM25 fusion weight.
1280    pub bm25_weight: f64,
1281    /// Configured vector fusion weight.
1282    pub vector_weight: f64,
1283    /// Configured sparse fusion weight.
1284    #[serde(default)]
1285    pub sparse_weight: f64,
1286    /// Configured recency weight when recency is enabled.
1287    pub recency_weight: Option<f64>,
1288    /// Configured RRF decay constant.
1289    pub rrf_k: f64,
1290}
1291
1292/// Search result with full score explanation.
1293#[derive(Debug, Clone, Serialize, Deserialize)]
1294pub struct ExplainedResult {
1295    /// The search result.
1296    pub result: SearchResult,
1297    /// Score breakdown.
1298    pub breakdown: ScoreBreakdown,
1299}
1300
1301/// Product-facing answer for one explained result.
1302#[derive(Debug, Clone, Serialize, Deserialize)]
1303pub struct ExplainedResultAnswerV1 {
1304    /// Stable result ID used in receipts and replay logs.
1305    pub result_id: String,
1306    /// Source family label.
1307    pub source_kind: String,
1308    /// Authoritative source row key without the receipt result prefix.
1309    pub source_id: String,
1310    /// Plain-language reasons this result appeared.
1311    pub why_this_result: Vec<String>,
1312    /// Whether the result matched the text/BM25 lane.
1313    pub text_match: bool,
1314    /// Whether the result matched the vector lane.
1315    pub vector_match: bool,
1316    /// Whether recency contributed to the score.
1317    pub recency_applied: bool,
1318    /// Whether exact f32 rerank/reference scoring was used for the vector lane.
1319    pub exact_vector_rerank: bool,
1320    /// Final fused score.
1321    pub final_score: f64,
1322}
1323
1324impl ExplainedResult {
1325    /// Convert a detailed score breakdown into a practical "why this result" answer.
1326    pub fn answer(&self) -> ExplainedResultAnswerV1 {
1327        let text_match = self.breakdown.bm25_rank.is_some();
1328        let vector_match = self.breakdown.vector_rank.is_some();
1329        let recency_applied = self.breakdown.recency_score.is_some();
1330        let mut why_this_result = Vec::new();
1331
1332        if let Some(rank) = self.breakdown.bm25_rank {
1333            why_this_result.push(format!("text match rank {rank} contributed to fusion"));
1334        }
1335        if let Some(rank) = self.breakdown.vector_rank {
1336            why_this_result.push(format!("vector match rank {rank} contributed to fusion"));
1337        }
1338        if let Some(rank) = self.breakdown.sparse_rank {
1339            why_this_result.push(format!("sparse match rank {rank} contributed to fusion"));
1340        }
1341        if recency_applied {
1342            why_this_result.push("recency contributed to the fused score".to_string());
1343        }
1344        if self.breakdown.vector_reranked_from_f32 {
1345            why_this_result.push("vector score was checked with exact f32 rerank".to_string());
1346        }
1347        if why_this_result.is_empty() {
1348            why_this_result.push("result survived filtering and deterministic ranking".to_string());
1349        }
1350
1351        ExplainedResultAnswerV1 {
1352            result_id: self.result.source.result_id(),
1353            source_kind: self.result.source.source_kind().to_string(),
1354            source_id: self.result.source.source_id(),
1355            why_this_result,
1356            text_match,
1357            vector_match,
1358            recency_applied,
1359            exact_vector_rerank: self.breakdown.vector_reranked_from_f32,
1360            final_score: self.result.score,
1361        }
1362    }
1363}
1364
1365// ─── Graph Types (PRIMITIVES_CONTRACT §8) ──────────────────────
1366
1367/// Trait for querying the memory store as a graph.
1368pub trait GraphView: Send + Sync {
1369    /// Find neighboring nodes up to `max_depth` hops away.
1370    fn neighbors(
1371        &self,
1372        node_id: &str,
1373        direction: GraphDirection,
1374        max_depth: usize,
1375    ) -> Result<Vec<GraphEdge>, MemoryError>;
1376
1377    /// Find a path between two nodes (BFS, max depth).
1378    fn path(
1379        &self,
1380        from: &str,
1381        to: &str,
1382        max_depth: usize,
1383    ) -> Result<Option<Vec<String>>, MemoryError>;
1384}
1385
1386/// Direction for graph traversal.
1387#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1388pub enum GraphDirection {
1389    /// Follow outgoing edges.
1390    Outgoing,
1391    /// Follow incoming edges.
1392    Incoming,
1393    /// Follow edges in both directions.
1394    Both,
1395}
1396
1397/// An edge in the memory graph.
1398#[derive(Debug, Clone, Serialize, Deserialize)]
1399pub struct GraphEdge {
1400    /// Source node ID.
1401    pub source: String,
1402    /// Target node ID.
1403    pub target: String,
1404    /// Type of relationship.
1405    pub edge_type: GraphEdgeType,
1406    /// Edge weight (interpretation depends on edge_type).
1407    pub weight: f64,
1408    /// Optional metadata.
1409    pub metadata: Option<serde_json::Value>,
1410}
1411
1412/// Type of relationship between graph nodes.
1413#[derive(Debug, Clone, Serialize, Deserialize)]
1414#[serde(rename_all = "snake_case")]
1415pub enum GraphEdgeType {
1416    /// Semantic similarity. GraphEdgeType::Semantic variant.
1417    Semantic {
1418        /// Cosine similarity between embeddings.
1419        cosine_similarity: f32,
1420    },
1421    /// Temporal proximity. GraphEdgeType::Temporal variant.
1422    Temporal {
1423        /// Time delta in seconds.
1424        delta_secs: u64,
1425    },
1426    /// Causal relationship. GraphEdgeType::Causal variant.
1427    Causal {
1428        /// Confidence in the causal link.
1429        confidence: f32,
1430        /// EvidenceBundle run_ids supporting this link.
1431        evidence_ids: Vec<String>,
1432    },
1433    /// Entity co-occurrence. GraphEdgeType::Entity variant.
1434    Entity {
1435        /// Relationship type (e.g. "mentions", "modifies").
1436        relation: String,
1437    },
1438}
1439
1440/// Embedding displacement between two text embeddings.
1441#[derive(Debug, Clone, Serialize, Deserialize)]
1442pub struct EmbeddingDisplacement {
1443    /// Cosine similarity between the two embeddings.
1444    pub cosine_similarity: f32,
1445    /// Euclidean distance between the two embeddings.
1446    pub euclidean_distance: f32,
1447    /// Magnitude of the first embedding.
1448    pub magnitude_a: f32,
1449    /// Magnitude of the second embedding.
1450    pub magnitude_b: f32,
1451}
1452
1453/// Database statistics.
1454#[derive(Debug, Clone, Serialize, Deserialize)]
1455pub struct MemoryStats {
1456    /// Total number of facts.
1457    pub total_facts: u64,
1458    /// Total number of documents.
1459    pub total_documents: u64,
1460    /// Total number of chunks across all documents.
1461    pub total_chunks: u64,
1462    /// Total number of conversation sessions.
1463    pub total_sessions: u64,
1464    /// Total number of messages across all sessions.
1465    pub total_messages: u64,
1466    /// Database file size in bytes.
1467    pub database_size_bytes: u64,
1468    /// Currently configured embedding model.
1469    pub embedding_model: Option<String>,
1470    /// Currently configured embedding dimensions.
1471    pub embedding_dimensions: Option<usize>,
1472}
1473
1474/// Per-surface deletion counts for namespace removal.
1475#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
1476pub struct NamespaceDeleteReport {
1477    /// Facts deleted from the namespace.
1478    pub facts: usize,
1479    /// Documents deleted from the namespace.
1480    pub documents: usize,
1481    /// Document chunks deleted from the namespace.
1482    pub chunks: usize,
1483    /// Messages deleted through namespaced sessions.
1484    pub messages: usize,
1485    /// Sessions deleted for the namespace.
1486    pub sessions: usize,
1487    /// Episodes deleted with namespaced documents.
1488    pub episodes: usize,
1489    /// Projection/import rows deleted or invalidated.
1490    pub projection_rows: usize,
1491    /// HNSW pending operations queued by the deletion.
1492    pub hnsw_ops: usize,
1493}
semantic_memory/types.rs

semantic_memory/
types.rs