smooai-smooth-operator-adapter-postgres 1.4.0

Postgres + pgvector StorageAdapter for smooth-operator — the dogfood backend (Postgres OLTP, PostgresCheckpointStore, pgvector hybrid retrieval).
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
//! Persistent admin stores (Phase 12 follow-up) — Postgres-backed.
//!
//! The three management-console stores ship with process-local in-memory
//! implementations (`InMemoryConnectorConfigStore`, `InMemorySettingsStore`,
//! `InMemoryIndexingStore`) that lose everything on restart. This module makes
//! them durable against the same Postgres the rest of the adapter dogfoods,
//! preserving the in-memory semantics exactly:
//!
//! - [`PgConnectorConfigStore`] — org-scoped CRUD over `connector_configs`
//!   (PK `(org_id, id)`). `list` is sorted by `(name, id)`; cross-org `get` /
//!   `delete` never touch another org's row.
//! - [`PgSettingsStore`] — per-org `agent_settings` (PK `org_id`); `get` of an
//!   unset org returns [`AgentSettings::defaults`], `put` is an upsert.
//! - [`PgIndexingStore`] — the `indexing_runs` ledger (PK `id`). `record_run`
//!   upserts by id (so a `Running` row can be promoted to a terminal state),
//!   `list_runs` returns a connector's runs oldest-first, and `latest_cursor`
//!   is `max(cursor)` over **succeeded** runs only — a failed run never advances
//!   the cursor.
//!
//! ## Sync trait over an async pool
//!
//! All three store traits are **synchronous** (the engine / admin API call them
//! directly), but `deadpool` is async. We bridge with the same
//! [`run_blocking`](run_blocking) helper the knowledge base uses: `spawn` the
//! async work onto a captured runtime [`Handle`] (so its I/O makes progress on
//! that runtime's reactor) and block the calling thread on the `JoinHandle` from
//! a throwaway OS thread — never `Handle::block_on` on a runtime worker thread.

use std::future::Future;

use anyhow::{anyhow, Result};
use chrono::{DateTime, Utc};
use deadpool_postgres::Pool;
use tokio::runtime::Handle;

use smooth_operator::connector_config::{ConnectorConfig, ConnectorConfigStore, ConnectorKind};
use smooth_operator::settings::{AgentSettings, SettingsStore};
use smooth_operator_ingestion::indexing::{IndexingRun, IndexingRunStatus, IndexingStore};
use smooth_operator_ingestion::Timestamp;

/// Drive an async future to completion from a *synchronous* trait method.
///
/// Identical bridge to `PgKnowledgeBase::run_blocking`: `spawn` onto the
/// captured runtime so the async I/O makes progress on that runtime's reactor,
/// then block on the `JoinHandle` from a throwaway OS thread running a tiny
/// current-thread runtime. This never calls `Handle::block_on` on a runtime
/// worker thread (which panics "Cannot start a runtime from within a runtime"),
/// so it is safe whether the caller is on a worker or a plain OS thread.
fn run_blocking<F, T>(handle: &Handle, fut: F) -> Result<T>
where
    F: Future<Output = Result<T>> + Send + 'static,
    T: Send + 'static,
{
    let join = handle.spawn(fut);
    let (tx, rx) = std::sync::mpsc::channel();
    std::thread::spawn(move || {
        let result = (|| -> Result<T> {
            let rt = tokio::runtime::Builder::new_current_thread()
                .enable_all()
                .build()?;
            let joined = rt.block_on(join);
            joined.map_err(|e| anyhow!("admin store task panicked or was cancelled: {e}"))?
        })();
        let _ = tx.send(result);
    });
    rx.recv()
        .map_err(|e| anyhow!("admin store task channel closed: {e}"))?
}

// ---------------------------------------------------------------------------
// Connector config store
// ---------------------------------------------------------------------------

/// Postgres-backed [`ConnectorConfigStore`] over `connector_configs`.
#[derive(Clone)]
pub struct PgConnectorConfigStore {
    pool: Pool,
    handle: Handle,
}

impl PgConnectorConfigStore {
    /// Build over the adapter's async pool + captured runtime handle.
    #[must_use]
    pub fn new(pool: Pool, handle: Handle) -> Self {
        Self { pool, handle }
    }

    async fn list_async(&self, org_id: String) -> Result<Vec<ConnectorConfig>> {
        let client = self.pool.get().await?;
        let rows = client
            .query(
                "SELECT id, org_id, name, kind, config, enabled, created_at, updated_at
                 FROM connector_configs
                 WHERE org_id = $1
                 ORDER BY name, id",
                &[&org_id],
            )
            .await?;
        rows.iter().map(row_to_connector).collect()
    }

    async fn get_async(&self, org_id: String, id: String) -> Result<Option<ConnectorConfig>> {
        let client = self.pool.get().await?;
        let row = client
            .query_opt(
                "SELECT id, org_id, name, kind, config, enabled, created_at, updated_at
                 FROM connector_configs
                 WHERE org_id = $1 AND id = $2",
                &[&org_id, &id],
            )
            .await?;
        row.as_ref().map(row_to_connector).transpose()
    }

    async fn upsert_async(&self, cfg: ConnectorConfig) -> Result<()> {
        let client = self.pool.get().await?;
        client
            .execute(
                "INSERT INTO connector_configs
                    (org_id, id, name, kind, config, enabled, created_at, updated_at)
                 VALUES ($1,$2,$3,$4,$5,$6,$7,$8)
                 ON CONFLICT (org_id, id) DO UPDATE SET
                    name       = EXCLUDED.name,
                    kind       = EXCLUDED.kind,
                    config     = EXCLUDED.config,
                    enabled    = EXCLUDED.enabled,
                    created_at = EXCLUDED.created_at,
                    updated_at = EXCLUDED.updated_at",
                &[
                    &cfg.org_id,
                    &cfg.id,
                    &cfg.name,
                    &cfg.kind.as_str(),
                    &cfg.config,
                    &cfg.enabled,
                    &cfg.created_at,
                    &cfg.updated_at,
                ],
            )
            .await?;
        Ok(())
    }

    async fn delete_async(&self, org_id: String, id: String) -> Result<bool> {
        let client = self.pool.get().await?;
        let n = client
            .execute(
                "DELETE FROM connector_configs WHERE org_id = $1 AND id = $2",
                &[&org_id, &id],
            )
            .await?;
        Ok(n > 0)
    }
}

fn row_to_connector(row: &tokio_postgres::Row) -> Result<ConnectorConfig> {
    let kind_str: String = row.get("kind");
    let kind = ConnectorKind::parse(&kind_str)
        .map_err(|bad| anyhow!("unknown connector kind '{bad}' in connector_configs row"))?;
    Ok(ConnectorConfig {
        id: row.get("id"),
        org_id: row.get("org_id"),
        name: row.get("name"),
        kind,
        config: row.get("config"),
        enabled: row.get("enabled"),
        created_at: row.get("created_at"),
        updated_at: row.get("updated_at"),
    })
}

impl ConnectorConfigStore for PgConnectorConfigStore {
    fn list(&self, org_id: &str) -> Vec<ConnectorConfig> {
        let this = self.clone();
        let org_id = org_id.to_string();
        run_blocking(&self.handle, async move { this.list_async(org_id).await }).unwrap_or_default()
    }

    fn get(&self, org_id: &str, id: &str) -> Option<ConnectorConfig> {
        let this = self.clone();
        let org_id = org_id.to_string();
        let id = id.to_string();
        run_blocking(
            &self.handle,
            async move { this.get_async(org_id, id).await },
        )
        .ok()
        .flatten()
    }

    fn upsert(&self, config: ConnectorConfig) {
        let this = self.clone();
        let _ = run_blocking(&self.handle, async move { this.upsert_async(config).await });
    }

    fn delete(&self, org_id: &str, id: &str) -> bool {
        let this = self.clone();
        let org_id = org_id.to_string();
        let id = id.to_string();
        run_blocking(
            &self.handle,
            async move { this.delete_async(org_id, id).await },
        )
        .unwrap_or(false)
    }
}

// ---------------------------------------------------------------------------
// Settings store
// ---------------------------------------------------------------------------

/// Postgres-backed [`SettingsStore`] over `agent_settings`.
#[derive(Clone)]
pub struct PgSettingsStore {
    pool: Pool,
    handle: Handle,
}

impl PgSettingsStore {
    /// Build over the adapter's async pool + captured runtime handle.
    #[must_use]
    pub fn new(pool: Pool, handle: Handle) -> Self {
        Self { pool, handle }
    }

    async fn get_async(&self, org_id: String) -> Result<Option<AgentSettings>> {
        let client = self.pool.get().await?;
        let row = client
            .query_opt(
                "SELECT org_id, model, system_prompt, default_tools, updated_at
                 FROM agent_settings WHERE org_id = $1",
                &[&org_id],
            )
            .await?;
        match row {
            Some(row) => {
                let default_tools: serde_json::Value = row.get("default_tools");
                let default_tools: Vec<String> = serde_json::from_value(default_tools)?;
                Ok(Some(AgentSettings {
                    org_id: row.get("org_id"),
                    model: row.get("model"),
                    system_prompt: row.get("system_prompt"),
                    // The `agent_settings` table predates the per-org persona
                    // override; until a column is added, persisted rows read back
                    // with no override (runner stays on its const prompt). The
                    // in-memory store + admin API already carry `persona`.
                    persona: None,
                    default_tools,
                    updated_at: row.get("updated_at"),
                }))
            }
            None => Ok(None),
        }
    }

    async fn put_async(&self, settings: AgentSettings) -> Result<()> {
        let client = self.pool.get().await?;
        let default_tools = serde_json::to_value(&settings.default_tools)?;
        client
            .execute(
                "INSERT INTO agent_settings
                    (org_id, model, system_prompt, default_tools, updated_at)
                 VALUES ($1,$2,$3,$4,$5)
                 ON CONFLICT (org_id) DO UPDATE SET
                    model         = EXCLUDED.model,
                    system_prompt = EXCLUDED.system_prompt,
                    default_tools = EXCLUDED.default_tools,
                    updated_at    = EXCLUDED.updated_at",
                &[
                    &settings.org_id,
                    &settings.model,
                    &settings.system_prompt,
                    &default_tools,
                    &settings.updated_at,
                ],
            )
            .await?;
        Ok(())
    }
}

impl SettingsStore for PgSettingsStore {
    fn get(&self, org_id: &str) -> AgentSettings {
        let this = self.clone();
        let org = org_id.to_string();
        // Absent row (or a transient read failure) falls back to defaults so the
        // console always has a populated form, matching the in-memory store.
        run_blocking(&self.handle, async move { this.get_async(org).await })
            .ok()
            .flatten()
            .unwrap_or_else(|| AgentSettings::defaults(org_id))
    }

    fn put(&self, settings: AgentSettings) {
        let this = self.clone();
        let _ = run_blocking(&self.handle, async move { this.put_async(settings).await });
    }
}

// ---------------------------------------------------------------------------
// Indexing store
// ---------------------------------------------------------------------------

/// Postgres-backed [`IndexingStore`] over `indexing_runs`.
#[derive(Clone)]
pub struct PgIndexingStore {
    pool: Pool,
    handle: Handle,
}

impl PgIndexingStore {
    /// Build over the adapter's async pool + captured runtime handle.
    #[must_use]
    pub fn new(pool: Pool, handle: Handle) -> Self {
        Self { pool, handle }
    }

    async fn record_run_async(&self, run: IndexingRun) -> Result<()> {
        let client = self.pool.get().await?;
        let status = status_to_str(run.status);
        let documents_seen = i64::try_from(run.documents_seen).unwrap_or(i64::MAX);
        let chunks_indexed = i64::try_from(run.chunks_indexed).unwrap_or(i64::MAX);
        let documents_skipped = i64::try_from(run.documents_skipped).unwrap_or(i64::MAX);
        client
            .execute(
                "INSERT INTO indexing_runs
                    (id, connector_name, status, started_at, finished_at,
                     documents_seen, chunks_indexed, documents_skipped, cursor, error)
                 VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
                 ON CONFLICT (id) DO UPDATE SET
                    connector_name    = EXCLUDED.connector_name,
                    status            = EXCLUDED.status,
                    started_at        = EXCLUDED.started_at,
                    finished_at       = EXCLUDED.finished_at,
                    documents_seen    = EXCLUDED.documents_seen,
                    chunks_indexed    = EXCLUDED.chunks_indexed,
                    documents_skipped = EXCLUDED.documents_skipped,
                    cursor            = EXCLUDED.cursor,
                    error             = EXCLUDED.error",
                &[
                    &run.id,
                    &run.connector_name,
                    &status,
                    &run.started_at,
                    &run.finished_at,
                    &documents_seen,
                    &chunks_indexed,
                    &documents_skipped,
                    &run.cursor,
                    &run.error,
                ],
            )
            .await?;
        Ok(())
    }

    async fn latest_cursor_async(&self, connector_name: String) -> Result<Option<Timestamp>> {
        let client = self.pool.get().await?;
        // Max cursor over SUCCEEDED runs only — a failed run never advances it.
        let row = client
            .query_one(
                "SELECT max(cursor) AS c
                 FROM indexing_runs
                 WHERE connector_name = $1 AND status = 'succeeded'",
                &[&connector_name],
            )
            .await?;
        Ok(row.get::<_, Option<DateTime<Utc>>>("c"))
    }

    async fn list_runs_async(&self, connector_name: String) -> Result<Vec<IndexingRun>> {
        let client = self.pool.get().await?;
        // Oldest-first to match the in-memory insertion-order contract.
        let rows = client
            .query(
                "SELECT id, connector_name, status, started_at, finished_at,
                        documents_seen, chunks_indexed, documents_skipped, cursor, error
                 FROM indexing_runs
                 WHERE connector_name = $1
                 ORDER BY started_at ASC, id ASC",
                &[&connector_name],
            )
            .await?;
        rows.iter().map(row_to_run).collect()
    }
}

fn status_to_str(status: IndexingRunStatus) -> &'static str {
    match status {
        IndexingRunStatus::Running => "running",
        IndexingRunStatus::Succeeded => "succeeded",
        IndexingRunStatus::Failed => "failed",
    }
}

fn status_from_str(s: &str) -> Result<IndexingRunStatus> {
    Ok(match s {
        "running" => IndexingRunStatus::Running,
        "succeeded" => IndexingRunStatus::Succeeded,
        "failed" => IndexingRunStatus::Failed,
        other => return Err(anyhow!("unknown indexing run status '{other}'")),
    })
}

fn row_to_run(row: &tokio_postgres::Row) -> Result<IndexingRun> {
    let status = status_from_str(row.get::<_, String>("status").as_str())?;
    let documents_seen: i64 = row.get("documents_seen");
    let chunks_indexed: i64 = row.get("chunks_indexed");
    let documents_skipped: i64 = row.get("documents_skipped");
    Ok(IndexingRun {
        id: row.get("id"),
        connector_name: row.get("connector_name"),
        status,
        started_at: row.get("started_at"),
        finished_at: row.get("finished_at"),
        documents_seen: usize::try_from(documents_seen).unwrap_or(0),
        chunks_indexed: usize::try_from(chunks_indexed).unwrap_or(0),
        documents_skipped: usize::try_from(documents_skipped).unwrap_or(0),
        cursor: row.get("cursor"),
        error: row.get("error"),
    })
}

impl IndexingStore for PgIndexingStore {
    fn record_run(&self, run: &IndexingRun) {
        let this = self.clone();
        let run = run.clone();
        let _ = run_blocking(
            &self.handle,
            async move { this.record_run_async(run).await },
        );
    }

    fn latest_cursor(&self, connector_name: &str) -> Option<Timestamp> {
        let this = self.clone();
        let name = connector_name.to_string();
        run_blocking(
            &self.handle,
            async move { this.latest_cursor_async(name).await },
        )
        .ok()
        .flatten()
    }

    fn list_runs(&self, connector_name: &str) -> Vec<IndexingRun> {
        let this = self.clone();
        let name = connector_name.to_string();
        run_blocking(
            &self.handle,
            async move { this.list_runs_async(name).await },
        )
        .unwrap_or_default()
    }
}