chunkshop/backends/base.rs
1//! Backend traits + ColSpec.
2//!
3//! Backends own everything that MUST be different per backend, including DDL
4//! sequencing. Sinks own chunkshop-specific data-model semantics (modes,
5//! metadata promotion, delete_orphans, source-tag write-once).
6//!
7//! Two traits:
8//! - `BackendDialect` — pure helpers, no I/O, no async. Returns String / Vec<String>.
9//! Trivially unit-testable without a tokio runtime.
10//! - `BackendConn` — I/O surface. AFIT (Rust ≥1.75 stable). No `async-trait` macro,
11//! no `dyn`. Generic dispatch via `<B: Backend>`.
12//!
13//! R1 caveat (now discharged in R2): `BackendConn` originally took a PG-concrete
14//! `&mut sqlx::Transaction<'_, sqlx::Postgres>`. R2 lifts this to a GAT
15//! (`type Db: sqlx::Database`) so each backend names its own sqlx Database.
16//! Sinks hold concrete backends (PgSink → PostgresBackend, MariadbSink →
17//! MariadbBackend), so `<PostgresBackend as BackendConn>::Db = sqlx::Postgres`
18//! resolves at the call site without sinks needing to be generic over `<B>`.
19
20use std::future::Future;
21
22#[derive(Debug, Clone)]
23pub struct ColSpec {
24 /// Compile-time constant — canonical chunkshop columns are always known
25 /// at build time (`"id"`, `"doc_id"`, `"embedding"`, etc.). Promoted-
26 /// metadata columns flow through `add_column_if_not_exists_sql`, not
27 /// through `ColSpec`, so this never needs to be runtime-derived.
28 pub name: &'static str,
29 /// Backend-specific. May be runtime-computed (e.g., `format!("vector({dim})")`),
30 /// hence `String` rather than `&'static str`.
31 pub type_ddl: String,
32 pub nullable: bool,
33 pub default: Option<&'static str>,
34 pub is_primary_key: bool,
35}
36
37/// Pure dialect helpers. No I/O, no async.
38pub trait BackendDialect {
39 const NAME: &'static str;
40 const SUPPORTS_UPSERT: bool;
41
42 fn quote_ident(&self, name: &str) -> String;
43 fn fq_table(&self, db: &str, table: &str) -> String;
44
45 fn vector_type_ddl(&self, dim: usize) -> String;
46 fn json_type_ddl(&self) -> String;
47 fn tags_array_type_ddl(&self) -> String;
48 fn text_pk_type_ddl(&self) -> String;
49 fn timestamp_now_default_ddl(&self) -> String;
50
51 fn vector_literal(&self, arr: &[f32]) -> String;
52 fn json_literal(&self, obj: &serde_json::Value) -> String;
53
54 fn json_path_sql(&self, col_expr: &str, dotted_path: &str) -> String;
55 fn upsert_clause(&self, key_cols: &[&str], update_cols: &[&str]) -> String;
56
57 fn create_database_sql(&self, name: &str) -> String;
58 fn add_column_if_not_exists_sql(&self, fq: &str, col: &str, type_ddl: &str) -> String;
59 fn drop_table_sql(&self, fq: &str) -> String;
60
61 fn emit_chunks_table_ddl(
62 &self,
63 fq: &str,
64 cols: &[ColSpec],
65 hnsw: bool,
66 dim: usize,
67 engine: Option<&str>,
68 vector_metric: Option<&str>,
69 ) -> Vec<String>;
70}
71
72/// I/O surface. R2 lifts this to a GAT (`type Db: sqlx::Database`) so each
73/// backend names its own sqlx Database. PgSink/MariadbSink hold concrete
74/// backends, so `<PostgresBackend as BackendConn>::Db = sqlx::Postgres`
75/// resolves at the call site without sinks needing to be generic over `<B>`.
76pub trait BackendConn {
77 type Db: sqlx::Database;
78
79 /// Force-initialize the connection pool. Idempotent — second call is a no-op.
80 /// The DSN is sourced from the backend struct's configuration (set when the
81 /// backend is constructed), not from arguments to this method.
82 fn connect(&self) -> impl Future<Output = anyhow::Result<()>> + Send;
83
84 fn acquire_create_lock(
85 &self,
86 tx: &mut sqlx::Transaction<'_, Self::Db>,
87 key: &str,
88 ) -> impl Future<Output = anyhow::Result<()>> + Send;
89
90 fn table_exists(
91 &self,
92 tx: &mut sqlx::Transaction<'_, Self::Db>,
93 db: &str,
94 table: &str,
95 ) -> impl Future<Output = anyhow::Result<bool>> + Send;
96
97 fn embedding_dim(
98 &self,
99 tx: &mut sqlx::Transaction<'_, Self::Db>,
100 db: &str,
101 table: &str,
102 ) -> impl Future<Output = anyhow::Result<Option<usize>>> + Send;
103}
104
105/// Convenience super-trait: `<B: Backend>` for ergonomic generic bounds.
106pub trait Backend: BackendDialect + BackendConn {}
107impl<T: BackendDialect + BackendConn> Backend for T {}