eros-engine-store 0.6.1

Postgres + pgvector persistence layer for the eros-engine AI companion engine: chat history, two-layer long-term memory, affinity, and structured user insight.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
// SPDX-License-Identifier: AGPL-3.0-only
//! Companion insight storage + JSONB merge + training-level computation.
//!
//! `training_level` is a weighted score across known schema fields.
//! Weights ported verbatim from the gateway implementation.

use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use uuid::Uuid;

#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct CompanionInsightsRow {
    pub user_id: Uuid,
    pub insights: serde_json::Value,
    pub training_level: f64,
    pub updated_at: DateTime<Utc>,
}

/// Per-field weights summing to 1.0. Matches the gateway's WEIGHTS table.
const WEIGHTS: &[(&str, f64)] = &[
    ("city", 0.05),
    ("occupation", 0.05),
    ("interests", 0.10),
    ("mbti_guess", 0.15),
    ("love_values", 0.15),
    ("emotional_needs", 0.15),
    ("life_rhythm", 0.10),
    ("personality_traits", 0.15),
    ("matching_preferences", 0.10),
];

/// Compute a [0.0, 1.0] training level from the JSONB insights blob.
pub fn compute_training_level(insights: &serde_json::Value) -> f64 {
    let Some(obj) = insights.as_object() else {
        return 0.0;
    };
    let mut score = 0.0;
    for &(field, weight) in WEIGHTS {
        if let Some(val) = obj.get(field) {
            if is_populated(val) {
                score += weight;
            }
        }
    }
    ((score * 1000.0).round() / 1000.0).min(1.0)
}

fn is_populated(val: &serde_json::Value) -> bool {
    match val {
        serde_json::Value::Null => false,
        serde_json::Value::String(s) => !s.is_empty(),
        serde_json::Value::Array(arr) => !arr.is_empty(),
        serde_json::Value::Object(obj) => !obj.is_empty(),
        _ => true,
    }
}

fn merge_objects(mut base: serde_json::Value, patch: &serde_json::Value) -> serde_json::Value {
    if let (Some(base_obj), Some(patch_obj)) = (base.as_object_mut(), patch.as_object()) {
        for (k, v) in patch_obj {
            base_obj.insert(k.clone(), v.clone());
        }
    }
    base
}

pub struct InsightRepo<'a> {
    pub pool: &'a PgPool,
}

impl<'a> InsightRepo<'a> {
    pub async fn load(&self, user_id: Uuid) -> Result<Option<CompanionInsightsRow>, sqlx::Error> {
        sqlx::query_as::<_, CompanionInsightsRow>(
            "SELECT * FROM engine.companion_insights WHERE user_id = $1",
        )
        .bind(user_id)
        .fetch_optional(self.pool)
        .await
    }

    /// Merge `new_facts` into the user's stored JSONB, recompute
    /// `training_level`, upsert the row, and return the new state.
    pub async fn merge(
        &self,
        user_id: Uuid,
        new_facts: serde_json::Value,
    ) -> Result<CompanionInsightsRow, sqlx::Error> {
        let existing = self.load(user_id).await?;

        let merged = match existing {
            Some(prev) => merge_objects(prev.insights, &new_facts),
            None => new_facts,
        };
        let level = compute_training_level(&merged);

        let row = sqlx::query_as::<_, CompanionInsightsRow>(
            "INSERT INTO engine.companion_insights (user_id, insights, training_level) \
             VALUES ($1, $2, $3) \
             ON CONFLICT (user_id) DO UPDATE SET \
                 insights       = EXCLUDED.insights, \
                 training_level = EXCLUDED.training_level, \
                 updated_at     = now() \
             RETURNING *",
        )
        .bind(user_id)
        .bind(merged)
        .bind(level)
        .fetch_one(self.pool)
        .await?;
        Ok(row)
    }

    /// Append one snapshot row per companion_insights record at the given
    /// instant. Single server-side INSERT … SELECT; no per-user roundtrip.
    /// Returns the number of rows written.
    pub async fn snapshot_all_users(
        &self,
        captured_at: DateTime<Utc>,
    ) -> Result<usize, sqlx::Error> {
        let res = sqlx::query(
            "INSERT INTO engine.companion_insights_snapshot
                (user_id, insights, training_level, captured_at)
             SELECT user_id, insights, training_level, $1
               FROM engine.companion_insights",
        )
        .bind(captured_at)
        .execute(self.pool)
        .await?;
        Ok(res.rows_affected() as usize)
    }
}

/// One `companion_insights_events` row to insert. `payload` is the facts array
/// (`stage='facts'`) or the structured insight delta (`stage='structured'`), or
/// `None` on a parse error.
pub struct InsightEventInsert<'a> {
    pub run_id: Uuid,
    pub user_id: Uuid,
    pub session_id: Option<Uuid>,
    pub message_id: Option<Uuid>,
    pub stage: &'a str,
    pub status: &'a str,
    pub payload: Option<serde_json::Value>,
    pub meta: crate::OpenRouterCallMeta,
}

pub struct InsightEventRepo<'a> {
    pub pool: &'a PgPool,
}

impl<'a> InsightEventRepo<'a> {
    /// Append one audit row. Append-only; no FK on user_id (a row may precede
    /// the first companion_insights row on an empty-facts run).
    pub async fn record(&self, ev: InsightEventInsert<'_>) -> Result<(), sqlx::Error> {
        sqlx::query(
            "INSERT INTO engine.companion_insights_events \
               (run_id, user_id, session_id, message_id, stage, status, payload, \
                model, usage, generation_id) \
             VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)",
        )
        .bind(ev.run_id)
        .bind(ev.user_id)
        .bind(ev.session_id)
        .bind(ev.message_id)
        .bind(ev.stage)
        .bind(ev.status)
        .bind(ev.payload)
        .bind(ev.meta.model)
        .bind(ev.meta.usage)
        .bind(ev.meta.generation_id)
        .execute(self.pool)
        .await?;
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn training_level_empty_is_zero() {
        let v = serde_json::json!({});
        assert!(compute_training_level(&v).abs() < 1e-6);
    }

    #[test]
    fn training_level_partial() {
        let v = serde_json::json!({
            "city": "Shanghai",
            "interests": ["coffee"],
        });
        // 0.05 + 0.10
        assert!((compute_training_level(&v) - 0.15).abs() < 1e-3);
    }

    #[test]
    fn training_level_full_caps_at_one() {
        let v = serde_json::json!({
            "city": "Shanghai",
            "occupation": "engineer",
            "interests": ["coffee"],
            "mbti_guess": "INFP",
            "love_values": "slow burn",
            "emotional_needs": "validation",
            "life_rhythm": "night owl",
            "personality_traits": ["curious"],
            "matching_preferences": { "preferred_gender": "any" },
        });
        let l = compute_training_level(&v);
        assert!((l - 1.0).abs() < 1e-3);
    }

    #[sqlx::test(migrations = "./migrations")]
    async fn merge_creates_then_accumulates(pool: PgPool) {
        let repo = InsightRepo { pool: &pool };
        let user_id = Uuid::new_v4();

        // First merge → row created.
        let first = repo
            .merge(user_id, serde_json::json!({ "city": "Shanghai" }))
            .await
            .unwrap();
        assert_eq!(first.user_id, user_id);
        assert_eq!(first.insights["city"], "Shanghai");
        assert!((first.training_level - 0.05).abs() < 1e-3);

        // Second merge → adds field, level rises.
        let second = repo
            .merge(
                user_id,
                serde_json::json!({ "occupation": "engineer", "interests": ["coffee"] }),
            )
            .await
            .unwrap();
        assert_eq!(second.insights["city"], "Shanghai");
        assert_eq!(second.insights["occupation"], "engineer");
        assert!(second.training_level > first.training_level);
        assert!((second.training_level - 0.20).abs() < 1e-3);
    }

    #[sqlx::test(migrations = "./migrations")]
    async fn merge_overwrites_same_key(pool: PgPool) {
        let repo = InsightRepo { pool: &pool };
        let user_id = Uuid::new_v4();

        repo.merge(user_id, serde_json::json!({ "city": "Shanghai" }))
            .await
            .unwrap();
        let updated = repo
            .merge(user_id, serde_json::json!({ "city": "Beijing" }))
            .await
            .unwrap();
        assert_eq!(updated.insights["city"], "Beijing");
    }

    #[sqlx::test(migrations = "./migrations")]
    async fn load_returns_none_for_unknown_user(pool: PgPool) {
        let repo = InsightRepo { pool: &pool };
        let result = repo.load(Uuid::new_v4()).await.unwrap();
        assert!(result.is_none());
    }

    #[sqlx::test(migrations = "./migrations")]
    async fn snapshot_all_users_writes_one_row_per_user_at_same_ts(pool: PgPool) {
        let repo = InsightRepo { pool: &pool };
        let u1 = Uuid::new_v4();
        let u2 = Uuid::new_v4();
        repo.merge(u1, serde_json::json!({ "city": "Shanghai" }))
            .await
            .unwrap();
        repo.merge(u2, serde_json::json!({ "occupation": "engineer" }))
            .await
            .unwrap();

        let t = DateTime::<Utc>::from_timestamp(1_700_000_000, 0).unwrap();
        let n = repo.snapshot_all_users(t).await.unwrap();
        assert_eq!(n, 2, "one row per companion_insights row");

        let rows: Vec<(Uuid, serde_json::Value, f64, DateTime<Utc>)> = sqlx::query_as(
            "SELECT user_id, insights, training_level, captured_at
               FROM engine.companion_insights_snapshot
              ORDER BY user_id",
        )
        .fetch_all(&pool)
        .await
        .unwrap();
        assert_eq!(rows.len(), 2);
        for (_, _, _, ts) in &rows {
            assert_eq!(*ts, t, "every row in the same fire shares captured_at");
        }
    }

    #[sqlx::test(migrations = "./migrations")]
    async fn snapshot_all_users_with_empty_table_writes_nothing(pool: PgPool) {
        let repo = InsightRepo { pool: &pool };
        let t = DateTime::<Utc>::from_timestamp(1_700_000_000, 0).unwrap();
        let n = repo.snapshot_all_users(t).await.unwrap();
        assert_eq!(n, 0, "no companion_insights rows ⇒ no snapshot rows");

        let count: i64 =
            sqlx::query_scalar("SELECT COUNT(*) FROM engine.companion_insights_snapshot")
                .fetch_one(&pool)
                .await
                .unwrap();
        assert_eq!(count, 0);
    }

    #[sqlx::test(migrations = "./migrations")]
    async fn insight_event_repo_records_rows_with_run_id_and_trio(pool: PgPool) {
        let repo = InsightEventRepo { pool: &pool };
        let run_id = Uuid::new_v4();
        let user_id = Uuid::new_v4();
        let session_id = Uuid::new_v4();
        let message_id = Uuid::new_v4();

        repo.record(InsightEventInsert {
            run_id,
            user_id,
            session_id: Some(session_id),
            message_id: Some(message_id),
            stage: "facts",
            status: "ok",
            payload: Some(serde_json::json!(["用户在深圳工作"])),
            meta: crate::OpenRouterCallMeta {
                generation_id: Some("gen-facts".into()),
                model: Some("ins/m".into()),
                usage: Some(serde_json::json!({"total_tokens": 7})),
            },
        })
        .await
        .unwrap();

        repo.record(InsightEventInsert {
            run_id,
            user_id,
            session_id: Some(session_id),
            message_id: Some(message_id),
            stage: "structured",
            status: "parse_error",
            payload: None,
            meta: crate::OpenRouterCallMeta::default(),
        })
        .await
        .unwrap();

        // Two rows, same run_id. Also round-trip the JSONB payload + usage so a
        // bind/column swap of the trio would be caught here, not just at the DB.
        #[allow(clippy::type_complexity)]
        let rows: Vec<(
            String,
            String,
            Option<String>,
            Option<serde_json::Value>,
            Option<serde_json::Value>,
        )> = sqlx::query_as(
            "SELECT stage, status, generation_id, payload, usage \
             FROM engine.companion_insights_events \
             WHERE run_id = $1 ORDER BY stage",
        )
        .bind(run_id)
        .fetch_all(&pool)
        .await
        .unwrap();
        assert_eq!(rows.len(), 2);
        assert_eq!(rows[0].0, "facts");
        assert_eq!(rows[0].1, "ok");
        assert_eq!(rows[0].2.as_deref(), Some("gen-facts"));
        assert_eq!(rows[0].3, Some(serde_json::json!(["用户在深圳工作"])));
        assert_eq!(rows[0].4, Some(serde_json::json!({"total_tokens": 7})));
        assert_eq!(rows[1].0, "structured");
        assert_eq!(rows[1].1, "parse_error");
        assert_eq!(rows[1].2, None); // default meta ⇒ NULL generation_id
        assert_eq!(rows[1].3, None); // parse_error ⇒ NULL payload
        assert_eq!(rows[1].4, None); // default meta ⇒ NULL usage
    }

    #[sqlx::test(migrations = "./migrations")]
    async fn migration_0025_creates_events_table_and_affinity_audit_cols(pool: PgPool) {
        // companion_insights_events exists with every column.
        sqlx::query(
            "INSERT INTO engine.companion_insights_events \
               (run_id, user_id, session_id, message_id, stage, status, payload, model, usage, generation_id) \
             VALUES ($1,$2,$3,$4,'facts','ok','[]'::jsonb,'m','{}'::jsonb,'g')",
        )
        .bind(Uuid::new_v4())
        .bind(Uuid::new_v4())
        .bind(Uuid::new_v4())
        .bind(Uuid::new_v4())
        .execute(&pool)
        .await
        .expect("insert into companion_insights_events");

        let n: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM engine.companion_insights_events")
            .fetch_one(&pool)
            .await
            .unwrap();
        assert_eq!(n, 1);

        // companion_affinity_events now has the audit trio (select compiles ⇒ columns exist).
        let _ =
            sqlx::query("SELECT model, usage, generation_id FROM engine.companion_affinity_events")
                .fetch_all(&pool)
                .await
                .expect("affinity audit columns exist");
    }
}