Skip to main content

smos_application/types/
search_hit.rs

1//! Vector-search hit DTO.
2//!
3//! Mirrors the POC `SearchHit` (`smos/storage.py:36`): id, document, metadata,
4//! distance. Metadata is broken out into a typed sub-struct so downstream
5//! retrieval-planning logic can read confidence / heat / validity without
6//! string-keyed lookups, but the field stays round-trippable as JSON for
7//! adapter convenience.
8
9use serde::{Deserialize, Serialize};
10use smos_domain::{FactId, MemoryKey};
11
12/// One row returned by `FactRepository::search_similar`.
13#[derive(Debug, Clone, PartialEq)]
14pub struct SearchHit {
15    pub id: FactId,
16    pub document: String,
17    pub memory_key: MemoryKey,
18    pub metadata: SearchHitMetadata,
19}
20
21/// Strongly-typed view over the POC's `dict[str, object]` metadata bag.
22#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
23pub struct SearchHitMetadata {
24    /// `accepted` / `pending` / `rejected`.
25    pub status: String,
26    /// Stored confidence score in `[0.0, 1.0]`.
27    pub confidence: f32,
28    /// ISO-8601 string of the validity tombstone, or `None` if the fact is
29    /// still current. Stored as a string so the row stays self-describing
30    /// across adapters without binding to a specific datetime crate.
31    pub valid_until: Option<String>,
32    /// Heat base value `[0.0, 1.0]`; §7 decay uses this as the seed.
33    pub heat_base: f32,
34    /// Last-access unix timestamp in seconds. The field is typed as `f32` for
35    /// wire compatibility with downstream JSON consumers that emit fractional
36    /// seconds, but the SurrealStore adapter currently truncates to whole
37    /// seconds (`surreal_store::SearchSimilarRow::to_hit` parses an ISO
38    /// datetime and stores `ts.as_unix_secs() as f32`). Treat the value as
39    /// second-precision until the storage layer gains sub-second support.
40    pub last_access_at: f32,
41    /// Cosine distance reported by the vector store. Lower = more similar.
42    /// `None` when the underlying store did not surface a distance.
43    pub distance: Option<f32>,
44    /// ISO-8601 extraction timestamp. Populated by the adapter from the fact's
45    /// `extracted_at` column so the read-only search surface can report when
46    /// each memory was first observed without a second round-trip.
47    /// `#[serde(default)]` keeps the field optional for older rows / fakes
48    /// that never set it.
49    #[serde(default)]
50    pub created_at: Option<String>,
51    /// Fact ids this fact contradicts (the `conflicts_with` provenance set).
52    /// Populated by the adapter so the search surface can surface active
53    /// conflicts to the caller. `#[serde(default)]` deserialises missing
54    /// arrays as empty for backward compatibility.
55    #[serde(default)]
56    pub conflicts_with: Vec<String>,
57}
58
59#[cfg(test)]
60mod tests {
61    use super::*;
62
63    fn sample_metadata() -> SearchHitMetadata {
64        SearchHitMetadata {
65            status: "accepted".into(),
66            confidence: 0.85,
67            valid_until: None,
68            heat_base: 1.0,
69            last_access_at: 1_700_000_000.0,
70            distance: Some(0.12),
71            created_at: Some("2025-06-18T12:00:00Z".into()),
72            conflicts_with: vec!["fact_deadbeefdeadbee".into()],
73        }
74    }
75
76    #[test]
77    fn metadata_roundtrips_through_serde() {
78        let meta = sample_metadata();
79        let json = serde_json::to_string(&meta).unwrap();
80        let back: SearchHitMetadata = serde_json::from_str(&json).unwrap();
81        assert_eq!(meta, back);
82    }
83
84    #[test]
85    fn metadata_serialises_optional_valid_until_as_null_when_absent() {
86        let meta = sample_metadata();
87        let v: serde_json::Value = serde_json::to_value(&meta).unwrap();
88        assert_eq!(v["valid_until"], serde_json::Value::Null);
89    }
90
91    #[test]
92    fn metadata_serialises_optional_distance_as_number_when_present() {
93        let meta = sample_metadata();
94        let v: serde_json::Value = serde_json::to_value(&meta).unwrap();
95        // f32 → f64 widening can introduce tiny representation drift, so
96        // compare with tolerance rather than strict equality.
97        let got = v["distance"].as_f64().unwrap_or(f64::NAN);
98        assert!((got - 0.12).abs() < 1e-5, "got {got}");
99    }
100
101    #[test]
102    fn metadata_supports_tombstoned_fact() {
103        let meta = SearchHitMetadata {
104            status: "accepted".into(),
105            confidence: 0.9,
106            valid_until: Some("2027-01-01T00:00:00Z".into()),
107            heat_base: 0.4,
108            last_access_at: 1_700_000_050.0,
109            distance: None,
110            created_at: None,
111            conflicts_with: Vec::new(),
112        };
113        let v: serde_json::Value = serde_json::to_value(&meta).unwrap();
114        assert_eq!(v["valid_until"], "2027-01-01T00:00:00Z");
115        assert_eq!(v["distance"], serde_json::Value::Null);
116    }
117
118    #[test]
119    fn metadata_roundtrips_created_at_and_conflicts_with() {
120        let meta = SearchHitMetadata {
121            status: "accepted".into(),
122            confidence: 0.9,
123            valid_until: None,
124            heat_base: 1.0,
125            last_access_at: 1_700_000_000.0,
126            distance: Some(0.05),
127            created_at: Some("2025-06-18T12:00:00Z".into()),
128            conflicts_with: vec![
129                "fact_aaaaaaaaaaaaaaaa".into(),
130                "fact_bbbbbbbbbbbbbbbb".into(),
131            ],
132        };
133        let json = serde_json::to_string(&meta).unwrap();
134        let back: SearchHitMetadata = serde_json::from_str(&json).unwrap();
135        assert_eq!(back.created_at.as_deref(), Some("2025-06-18T12:00:00Z"));
136        assert_eq!(
137            back.conflicts_with,
138            vec!["fact_aaaaaaaaaaaaaaaa", "fact_bbbbbbbbbbbbbbbb"]
139        );
140    }
141
142    /// `#[serde(default)]` on the new fields keeps deserialisation backward
143    /// compatible with rows / fakes emitted before the fields existed: a JSON
144    /// object missing `created_at` and `conflicts_with` deserialises to
145    /// `None` / `[]` instead of erroring.
146    #[test]
147    fn metadata_deserialises_legacy_payload_missing_new_fields() {
148        let legacy = serde_json::json!({
149            "status": "accepted",
150            "confidence": 0.8,
151            "valid_until": null,
152            "heat_base": 1.0,
153            "last_access_at": 1700000000.0,
154            "distance": 0.1
155        });
156        let meta: SearchHitMetadata = serde_json::from_value(legacy).unwrap();
157        assert!(meta.created_at.is_none());
158        assert!(meta.conflicts_with.is_empty());
159    }
160}