1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
/// SQL DDL for the initial catalog schema: sources, models, embeddings, fine-tune jobs, evals.
pub const MIGRATION_001_CORE_TABLES: &str = r#"
CREATE TABLE sources (
source_id TEXT PRIMARY KEY,
name TEXT NOT NULL,
source_type TEXT NOT NULL,
uri TEXT NOT NULL,
schema_json TEXT,
options TEXT,
created_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT)),
updated_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT))
);
CREATE TABLE embedding_sets (
set_id TEXT PRIMARY KEY,
source_id TEXT NOT NULL REFERENCES sources(source_id),
model_id TEXT NOT NULL,
text_column TEXT NOT NULL,
table_name TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
num_rows INTEGER,
dimensions INTEGER,
created_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT)),
updated_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT))
);
CREATE INDEX idx_embedding_sets_source ON embedding_sets(source_id);
CREATE INDEX idx_embedding_sets_model ON embedding_sets(model_id);
CREATE INDEX idx_embedding_sets_status ON embedding_sets(status);
CREATE TABLE models (
model_id TEXT PRIMARY KEY,
name TEXT NOT NULL,
model_type TEXT NOT NULL,
task TEXT NOT NULL,
backend TEXT,
version INTEGER NOT NULL DEFAULT 1,
source TEXT,
dimensions INTEGER,
status TEXT NOT NULL DEFAULT 'available',
metadata TEXT,
created_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT)),
updated_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT))
);
CREATE INDEX idx_models_type ON models(model_type);
CREATE INDEX idx_models_task ON models(task);
CREATE TABLE fine_tune_jobs (
job_id TEXT PRIMARY KEY,
base_model_id TEXT NOT NULL REFERENCES models(model_id),
output_model_id TEXT,
training_source TEXT NOT NULL,
loss_type TEXT NOT NULL,
hyperparams TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
metrics TEXT,
created_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT)),
updated_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT))
);
CREATE INDEX idx_fine_tune_jobs_status ON fine_tune_jobs(status);
CREATE TABLE eval_runs (
run_id TEXT PRIMARY KEY,
model_id TEXT NOT NULL REFERENCES models(model_id),
eval_type TEXT NOT NULL,
source_id TEXT,
metrics TEXT NOT NULL,
config TEXT,
created_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT))
);
CREATE INDEX idx_eval_runs_model ON eval_runs(model_id);
CREATE INDEX idx_eval_runs_type ON eval_runs(eval_type);
CREATE INDEX idx_eval_runs_created ON eval_runs(created_at);
CREATE TABLE evidence_channels (
channel_name TEXT PRIMARY KEY,
schema_json TEXT NOT NULL,
priority INTEGER NOT NULL,
created_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT))
);
INSERT INTO evidence_channels (channel_name, schema_json, priority) VALUES
('vector', '{"similarity": "Float32"}', 1),
('inference', '{"inference_model": "Utf8", "inference_task": "Utf8", "inference_confidence": "Float32"}', 2);
"#;
/// Result tables: Parquet-backed embedding and inference outputs with sidecar ANN indexes.
pub const MIGRATION_002_RESULT_TABLES: &str = r#"
CREATE TABLE result_tables (
table_name TEXT PRIMARY KEY,
source_id TEXT NOT NULL,
model_id TEXT NOT NULL,
task TEXT NOT NULL,
parquet_path TEXT NOT NULL,
index_path TEXT,
dimensions INTEGER,
distance_metric TEXT DEFAULT 'cosine',
row_count INTEGER NOT NULL DEFAULT 0,
status TEXT NOT NULL DEFAULT 'building',
key_column TEXT,
text_columns TEXT,
checkpoint INTEGER,
created_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT)),
completed_at TEXT
);
CREATE INDEX idx_result_tables_source ON result_tables(source_id);
CREATE INDEX idx_result_tables_task ON result_tables(task);
CREATE INDEX idx_result_tables_status ON result_tables(status);
"#;
/// Phase 08: add golden_source, k, and status columns to eval_runs.
pub const MIGRATION_003_EVAL_COLUMNS: &str = r#"
ALTER TABLE eval_runs ADD COLUMN golden_source TEXT;
ALTER TABLE eval_runs ADD COLUMN k INTEGER;
ALTER TABLE eval_runs ADD COLUMN status TEXT NOT NULL DEFAULT 'completed';
"#;
/// Drop unused embedding_sets table. Defined in MIGRATION_001 but never
/// referenced by any Rust code — no repo, no types, no callers.
pub const MIGRATION_004_DROP_EMBEDDING_SETS: &str = r#"
DROP TABLE IF EXISTS embedding_sets;
"#;
/// Add a nullable `tenant_id` column to every catalog table, plus a B-tree
/// index per table. The column stores the canonical hyphenated lowercase
/// `Uuid::Display` form (SQLite has no native UUID type; `TEXT` is the
/// convention). Existing rows back-fill to NULL.
pub const MIGRATION_005_TENANT_SCOPE: &str = r#"
ALTER TABLE sources ADD COLUMN tenant_id TEXT;
ALTER TABLE models ADD COLUMN tenant_id TEXT;
ALTER TABLE fine_tune_jobs ADD COLUMN tenant_id TEXT;
ALTER TABLE eval_runs ADD COLUMN tenant_id TEXT;
ALTER TABLE result_tables ADD COLUMN tenant_id TEXT;
ALTER TABLE evidence_channels ADD COLUMN tenant_id TEXT;
CREATE INDEX idx_sources_tenant ON sources(tenant_id);
CREATE INDEX idx_models_tenant ON models(tenant_id);
CREATE INDEX idx_fine_tune_jobs_tenant ON fine_tune_jobs(tenant_id);
CREATE INDEX idx_eval_runs_tenant ON eval_runs(tenant_id);
CREATE INDEX idx_result_tables_tenant ON result_tables(tenant_id);
CREATE INDEX idx_evidence_channels_tenant ON evidence_channels(tenant_id);
"#;
/// Normalise the JSON-blob `evidence_channels.schema_json` column into a
/// child `evidence_channel_columns` table. After this migration each
/// declared column is a row keyed by `(channel_name, column_name)`,
/// making the append-only invariant a database constraint rather than
/// a parser check.
pub const MIGRATION_006_CHANNEL_COLUMNS: &str = r#"
CREATE TABLE evidence_channel_columns (
channel_name TEXT NOT NULL REFERENCES evidence_channels(channel_name),
column_name TEXT NOT NULL,
column_type TEXT NOT NULL,
ordinal INTEGER NOT NULL,
declared_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT)),
PRIMARY KEY (channel_name, column_name)
);
CREATE UNIQUE INDEX idx_channel_cols_ordinal
ON evidence_channel_columns(channel_name, ordinal);
INSERT INTO evidence_channel_columns(channel_name, column_name, column_type, ordinal) VALUES
('vector', 'similarity', 'Float32', 0),
('inference', 'inference_model', 'Utf8', 0),
('inference', 'inference_task', 'Utf8', 1),
('inference', 'inference_confidence', 'Float32', 2);
ALTER TABLE evidence_channels DROP COLUMN schema_json;
"#;
/// Migration 007 — mutable companion tables registry.
///
/// Adds two catalog tables that record user-declared mutable tables:
/// * `mutable_tables` — one row per registered table, carrying the Arrow
/// schema JSON, primary-key column list, optional tenant scope, free-form
/// user metadata, and a backend identifier (`'sqlite'` | `'postgres'`).
/// * `mutable_table_indexes` — secondary indexes per registered table.
///
/// The `tenant_id` column on `mutable_tables` is defined by migration 005;
/// Phase 2 stores `NULL` for every row it writes. Phase 3 wires the
/// session-attribute layer that populates it.
pub const MIGRATION_007_MUTABLE_TABLES: &str = r#"
CREATE TABLE mutable_tables (
id TEXT PRIMARY KEY,
schema_json TEXT NOT NULL,
primary_key TEXT NOT NULL,
tenant_id TEXT,
user_metadata TEXT NOT NULL DEFAULT '{}',
backend_kind TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT)),
updated_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT))
);
CREATE INDEX idx_mutable_tables_tenant ON mutable_tables(tenant_id);
CREATE TABLE mutable_table_indexes (
table_id TEXT NOT NULL REFERENCES mutable_tables(id) ON DELETE CASCADE,
index_name TEXT NOT NULL,
columns TEXT NOT NULL,
-- BIGINT (8-byte) so the column decodes as `i64` on both SQLite (stores
-- INTEGER as variable-width up to 8 bytes) and Postgres (where `INTEGER`
-- is INT4 / i32, incompatible with the engine's i64 read shape).
is_unique BIGINT NOT NULL DEFAULT 0,
PRIMARY KEY (table_id, index_name)
);
"#;
/// Migration 008 — `order_column` on `mutable_tables`.
///
/// `MutableTableDefinition` already carries an optional `order_column` field
/// validated at build time, but Phase 2's `mutable_tables` catalog row did
/// not persist it; every reload via `get_mutable_table` returned
/// `order_column: None`. Phase 4's trigger-stream replay path consumes
/// `order_column` via `MutableTableRegistry::scan_after`, so we round-trip
/// it now.
pub const MIGRATION_008_MUTABLE_ORDER_COLUMN: &str = r#"
ALTER TABLE mutable_tables ADD COLUMN order_column TEXT;
"#;
/// Migration 010 — rename `source_type = 'local'` to `'file'`.
///
/// The `SourceType` enum on the Rust side has its `Local` variant
/// renamed `File` so the engine's file-shaped source driver can target
/// any `StorageUrl` (local disk, S3, GCS, Azure). The serde rename is
/// not back-compatible — there is no `#[serde(alias = "local")]` — so
/// every existing row whose JSON-encoded `source_type` column reads
/// `"local"` must be rewritten to `"file"` before the next catalog
/// read.
///
/// The column stores the JSON-encoded enum tag rather than the bare
/// snake-case string — `'"local"'` is what `serde_json::to_string` emits
/// for `SourceType::Local`. The UPDATE matches that exact spelling.
pub const MIGRATION_010_RENAME_SOURCE_TYPE_LOCAL_TO_FILE: &str = r#"
UPDATE sources SET source_type = '"file"' WHERE source_type = '"local"';
"#;
/// Migration 009 — trigger-stream `topics` catalog table.
///
/// One row per registered topic. The Arrow schema is persisted as JSON
/// (matching the convention used by `mutable_repo`) — `BLOB` / `BYTEA`
/// would force dialect-aware DDL whereas `TEXT` decodes identically on
/// both backends. `backing_table` references the Phase-2 mutable table
/// that persists the event log; `ON DELETE RESTRICT` keeps the topic and
/// its backing table aligned. Tenant scope follows ADR-00 — nullable.
pub const MIGRATION_009_TOPICS: &str = r#"
CREATE TABLE topics (
topic_id TEXT PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
schema_json TEXT NOT NULL,
tenant_id TEXT,
broker_metadata TEXT NOT NULL DEFAULT '{}',
backing_table TEXT NOT NULL UNIQUE REFERENCES mutable_tables(id) ON DELETE RESTRICT,
created_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT))
);
CREATE INDEX idx_topics_tenant ON topics(tenant_id);
CREATE INDEX idx_topics_name ON topics(name);
"#;
/// Migration 013 — derived result tables (`kind` + `derived_from`).
///
/// A result table is either a *model output* (an embedding or inference table
/// produced by running a model over a source) or a *derivation of* another
/// result table. The similarity-graph edge table is the first derivation kind:
/// it is computed from an existing embedding table, carries no model, and has
/// no sidecar index.
///
/// `kind` is a discriminator orthogonal to `result_tables.task`: a `'model'`
/// row is resolved as an embedding/inference output, while a non-`'model'` row
/// is excluded from embedding-table resolution even though its `task` column
/// still names the source embedding's task. This keeps the `ModelTask` enum a
/// pristine catalogue of genuine model tasks — the kind, not a fake `ModelTask`
/// variant, is what marks a row non-resolvable as an embedding source.
///
/// `derived_from` references the source result table the derivation was
/// computed from (the embedding table for an edge table); it is `NULL` for
/// `'model'` rows. Existing rows back-fill to `kind = 'model'`, `derived_from
/// = NULL` via the column defaults.
pub const MIGRATION_013_RESULT_TABLE_KIND: &str = r#"
ALTER TABLE result_tables ADD COLUMN kind TEXT NOT NULL DEFAULT 'model';
ALTER TABLE result_tables ADD COLUMN derived_from TEXT REFERENCES result_tables(table_name);
CREATE INDEX idx_result_tables_kind ON result_tables(kind);
"#;
/// Migration 011 — per-query eval persistence (spec J9).
///
/// Companion to `eval_runs`: one row per (eval_run_id, query_id), carrying the
/// per-query metric vector (Recall@{1,3,5,10}, MRR, nDCG, distance) as JSON and
/// an opaque `cohorts` JSON object (`'{}'` when none supplied). The aggregate
/// `eval_runs.metrics` path is untouched; this table is purely additive so
/// downstream consumers can re-aggregate the stored per-query arrays by cohort
/// instead of re-running the eval.
///
/// The `_jammi_` name prefix marks the table substrate-owned (same reserved
/// convention as the J2 audit table): users may read it but the substrate owns
/// writes. `tenant_id` follows the catalog convention (migration 005) —
/// nullable `TEXT` holding the canonical hyphenated `Uuid::Display` form — and
/// reads are tenant-filtered exactly like `eval_runs`.
pub const MIGRATION_011_EVAL_PER_QUERY: &str = r#"
CREATE TABLE _jammi_eval_per_query (
eval_run_id TEXT NOT NULL,
query_id TEXT NOT NULL,
cohorts TEXT NOT NULL DEFAULT '{}',
metrics TEXT NOT NULL,
tenant_id TEXT,
PRIMARY KEY (eval_run_id, query_id)
);
CREATE INDEX idx_eval_per_query_run ON _jammi_eval_per_query(eval_run_id);
CREATE INDEX idx_eval_per_query_tenant ON _jammi_eval_per_query(tenant_id);
"#;
/// Migration 012 — scope topic-name uniqueness per tenant.
///
/// Migration 009 created `topics` with a global `name TEXT NOT NULL UNIQUE`.
/// That is wrong for the substrate's trigger-stream model: per-tenant topics
/// (`tenant: Some(_)`) are the norm, and two tenants must be able to hold the
/// same logical topic name (e.g. each tenant's own `jammi.audit.search.v1`).
/// Under the global unique, the first tenant to register a topic claims the
/// name process-wide and every other tenant's first registration fails with
/// `UNIQUE constraint failed: topics.name` — the J2 per-query audit log
/// crashes for the second tenant onward.
///
/// SQLite cannot drop or alter a column-level `UNIQUE` constraint in place, so
/// this migration rebuilds `topics` via the canonical
/// create-new / copy / drop / rename dance, replacing the global unique on
/// `name` with a composite `UNIQUE(name, tenant_id)`. The rebuilt table carries
/// identical columns, defaults, and the same FK / `ON DELETE RESTRICT` on
/// `backing_table`; only the uniqueness rule changes.
///
/// Per-tenant rows with the same name now coexist. SQLite treats NULLs as
/// distinct in UNIQUE constraints, so global (`tenant_id IS NULL`) topics with
/// the same name are *not* deduplicated by this constraint — but the only
/// globally-registered topics today are user-declared via the CLI/session/
/// Python surfaces, and the substrate-owned topics (audit, session lifecycle)
/// are all tenant-pinned, so this matches existing behaviour. The
/// `idx_topics_tenant` and `idx_topics_name` secondary indexes are recreated.
///
/// `PRAGMA foreign_keys` is OFF inside the migration transaction (sqlx opens
/// SQLite connections without it), so the temporary FK-less window during the
/// table swap does not trip referential checks; the `backing_table` FK is
/// restored on the rebuilt table.
pub const MIGRATION_012_TOPICS_TENANT_UNIQUE: &str = r#"
CREATE TABLE topics_new (
topic_id TEXT PRIMARY KEY,
name TEXT NOT NULL,
schema_json TEXT NOT NULL,
tenant_id TEXT,
broker_metadata TEXT NOT NULL DEFAULT '{}',
backing_table TEXT NOT NULL UNIQUE REFERENCES mutable_tables(id) ON DELETE RESTRICT,
created_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT)),
UNIQUE(name, tenant_id)
);
INSERT INTO topics_new (topic_id, name, schema_json, tenant_id, broker_metadata, backing_table, created_at)
SELECT topic_id, name, schema_json, tenant_id, broker_metadata, backing_table, created_at FROM topics;
DROP TABLE topics;
ALTER TABLE topics_new RENAME TO topics;
CREATE INDEX idx_topics_tenant ON topics(tenant_id);
CREATE INDEX idx_topics_name ON topics(name);
"#;
/// Migration 014 — seed the `bm25` lexical-retrieval evidence channel.
///
/// The lexical (tantivy/BM25) sidecar contributes its rank and score on this
/// channel, the lexical peer of the `vector` channel's `similarity`. It shares
/// `inference`'s priority slot order only incidentally; what matters is that it
/// sorts after `vector` (priority 1) so a fused result's dense column precedes
/// its lexical columns. The channel carries the raw BM25 score plus the 0-based
/// lexical rank RRF fuses on — both caller-supplied, exactly as
/// `vector.similarity` is.
pub const MIGRATION_014_BM25_CHANNEL: &str = r#"
INSERT INTO evidence_channels (channel_name, priority) VALUES ('bm25', 3);
INSERT INTO evidence_channel_columns(channel_name, column_name, column_type, ordinal) VALUES
('bm25', 'bm25_score', 'Float32', 0),
('bm25', 'bm25_rank', 'Int64', 1);
"#;
/// Migration 015 — lease-based job-queue columns on `fine_tune_jobs`.
///
/// Turns the table into a durable work queue a worker can poll. A queued job
/// is claimed by setting `status = 'running'`, stamping the claiming worker in
/// `claimed_by`, and writing a `lease_expires_at` deadline; the worker renews
/// the lease by heartbeating, and an expired lease lets the row be re-queued
/// (or failed once `attempts` is exhausted).
///
/// * `kind` — the training-job kind. Discriminates which trainer drives the
/// row (`'fine_tune'` for the contrastive-adapter path; future kinds add
/// their own values). Existing rows back-fill to `'fine_tune'` via the
/// column default.
/// * `claimed_by` — id of the worker holding the lease; `NULL` while the job
/// is queued or otherwise unclaimed.
/// * `lease_expires_at` — the lease deadline. An engine-clock UTC timestamp
/// stored in the canonical `%Y-%m-%dT%H:%M:%S%.6fZ` form (the same lexical
/// shape the repo writes), so that `lease_expires_at < $now` is a correct
/// text comparison on both SQLite and Postgres with no dialect-specific
/// interval arithmetic. `NULL` when the job is not leased.
/// * `attempts` — how many times the job has been claimed; incremented on
/// each claim and bounding reclaim retries.
/// * `training_spec` — reserved for a self-contained job specification a
/// worker can execute without re-deriving it from session state. Nullable;
/// no writer populates it yet.
///
/// `idx_fine_tune_jobs_claim` on `(status, lease_expires_at)` serves both the
/// oldest-queued claim scan and the expired-lease reclaim scan.
pub const MIGRATION_015_FINE_TUNE_JOB_QUEUE: &str = r#"
ALTER TABLE fine_tune_jobs ADD COLUMN kind TEXT NOT NULL DEFAULT 'fine_tune';
ALTER TABLE fine_tune_jobs ADD COLUMN claimed_by TEXT;
ALTER TABLE fine_tune_jobs ADD COLUMN lease_expires_at TEXT;
ALTER TABLE fine_tune_jobs ADD COLUMN attempts INTEGER NOT NULL DEFAULT 0;
ALTER TABLE fine_tune_jobs ADD COLUMN training_spec TEXT;
CREATE INDEX idx_fine_tune_jobs_claim ON fine_tune_jobs(status, lease_expires_at);
"#;
/// Migration 016 — rename the job table to `training_jobs`.
///
/// The job machinery carries more than one training kind (the `kind`
/// discriminator added in 015), so the table's name is generalised from
/// `fine_tune_jobs` to `training_jobs`. A behaviour-preserving rename: the
/// column set, constraints, and row contents are unchanged.
///
/// `ALTER TABLE … RENAME TO …` is portable across SQLite and Postgres. The
/// three indexes are renamed by dropping and recreating them against the new
/// table name — `DROP INDEX` / `CREATE INDEX` are portable, whereas SQLite has
/// no `ALTER INDEX … RENAME`. After the table rename the indexes still exist
/// under their old names (both backends carry indexes across a table rename),
/// so each is dropped before being recreated with its new name.
pub const MIGRATION_016_RENAME_TRAINING_JOBS: &str = r#"
ALTER TABLE fine_tune_jobs RENAME TO training_jobs;
DROP INDEX idx_fine_tune_jobs_status;
DROP INDEX idx_fine_tune_jobs_tenant;
DROP INDEX idx_fine_tune_jobs_claim;
CREATE INDEX idx_training_jobs_status ON training_jobs(status);
CREATE INDEX idx_training_jobs_tenant ON training_jobs(tenant_id);
CREATE INDEX idx_training_jobs_claim ON training_jobs(status, lease_expires_at);
"#;
/// Migration 017 — the served `artifact_path` column on `models`.
///
/// `artifact_path` is the model's *commit pointer*: the path a reload resolves
/// the model's bytes from. For a fine-tuned or context-predictor model it is
/// written by exactly one writer — the worker whose lease-guarded finalize CAS
/// wins — and by no one else. The finalize CAS sets it with a plain
/// `UPDATE models SET artifact_path = …` in the same lease-guarded transaction
/// as the job-row compare-and-set (no dialect-specific JSON mutation), so the
/// served pointer is structurally single-writer: a loser's CAS matches no job
/// row and writes neither the job status nor the served path. The descriptive
/// `metadata` fields (`base_model_id`, `config_json`) stay in the JSON blob;
/// the served path is its own column.
pub const MIGRATION_017_MODEL_ARTIFACT_PATH_COLUMN: &str = r#"
ALTER TABLE models ADD COLUMN artifact_path TEXT;
"#;
/// Migration 018 — make `eval_runs.model_id` nullable.
///
/// An eval run is only sometimes model-scoped. Embedding and inference evals
/// score a registered model's output, so they carry the catalog PK of that
/// model and the FK to `models(model_id)` is theirs to satisfy. A calibration
/// eval is different in kind: it scores a held-out predictive *distribution*
/// supplied in the golden source — `(mean, sd, outcome)` or `(draws, outcome)`
/// — and never loads or names a model. Forcing it to put *something* in a
/// `NOT NULL REFERENCES models(model_id)` column made it write a synthetic id
/// (the calibration shape, e.g. `gaussian::1`) for which no `models` row
/// exists, so the insert failed the foreign key at runtime — calibration eval
/// could not record a single run.
///
/// The fix is to let `model_id` be `NULL`: a model-scoped run records its
/// model's PK and the FK validates it; a calibration run records `NULL` and is
/// simply not model-scoped. The FK is *kept* — when a value is present it must
/// reference a real model — only the `NOT NULL` is dropped.
///
/// SQLite cannot drop a column-level `NOT NULL` in place, so this rebuilds
/// `eval_runs` via the canonical create-new / copy / drop / rename dance (the
/// same shape migration 012 uses for `topics`), preserving every other column,
/// default, and the `model_id` FK. The secondary indexes
/// (`model`, `type`, `created`, `tenant`) are recreated. `PRAGMA foreign_keys`
/// is OFF inside the migration transaction, so the FK-less window during the
/// swap does not trip referential checks.
pub const MIGRATION_018_EVAL_RUNS_MODEL_ID_NULLABLE: &str = r#"
CREATE TABLE eval_runs_new (
run_id TEXT PRIMARY KEY,
model_id TEXT REFERENCES models(model_id),
eval_type TEXT NOT NULL,
source_id TEXT,
metrics TEXT NOT NULL,
config TEXT,
created_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT)),
golden_source TEXT,
k INTEGER,
status TEXT NOT NULL DEFAULT 'completed',
tenant_id TEXT
);
INSERT INTO eval_runs_new (run_id, model_id, eval_type, source_id, metrics, config, created_at, golden_source, k, status, tenant_id)
SELECT run_id, model_id, eval_type, source_id, metrics, config, created_at, golden_source, k, status, tenant_id FROM eval_runs;
DROP TABLE eval_runs;
ALTER TABLE eval_runs_new RENAME TO eval_runs;
CREATE INDEX idx_eval_runs_model ON eval_runs(model_id);
CREATE INDEX idx_eval_runs_type ON eval_runs(eval_type);
CREATE INDEX idx_eval_runs_created ON eval_runs(created_at);
CREATE INDEX idx_eval_runs_tenant ON eval_runs(tenant_id);
"#;
/// Migration 019 — normalize the `models.status` value set onto the canonical
/// [`ModelStatus`](super::status::ModelStatus) variants.
///
/// Migration 001 created `models` with `status TEXT NOT NULL DEFAULT
/// 'available'`, but `register_model` always writes an explicit `'registered'`
/// and the typed `ModelStatus` enum carries no `'available'` variant — so the
/// DDL default was a string the type system never names. A row could only carry
/// `'available'` if it were inserted by some path that omitted the column and
/// fell through to the default; no such path exists today, so this is defensive
/// rather than corrective. It rewrites any stray `'available'` to `'registered'`
/// (the canonical "registered, not loaded" state) so the on-disk value set is
/// total over the enum without introducing an `'available'` alias variant.
pub const MIGRATION_019_NORMALIZE_MODEL_STATUS: &str = r#"
UPDATE models SET status = 'registered' WHERE status = 'available';
"#;
/// Migration 020 — tenant-qualify the evidence-channel identity.
///
/// Migration 005 added a `tenant_id` column to `evidence_channels` but no code
/// ever wrote or read it, and the channel name stayed a *global* `TEXT PRIMARY
/// KEY` (migration 001). `evidence_channel_columns` (migration 006) keyed on
/// `(channel_name, column_name)` and FK-referenced `evidence_channels(channel_name)`
/// — also tenant-blind. So two tenants registering a channel of the same name
/// collided on one global PK slot: tenant B's `register("X")` hit tenant A's row
/// (a cross-tenant collision, the same D1 class fixed for `models` in #140), and
/// `list()` returned every tenant's channels regardless of the bound tenant — a
/// cross-tenant read leak. The gRPC handlers already wrapped these calls in a
/// tenant `scoped(...)`, so the scope was a lie the repo ignored.
///
/// This migration makes the channel name unique PER TENANT by reshaping the
/// identity of both tables to carry `tenant_id`:
/// * `evidence_channels` gains a `UNIQUE (tenant_id, channel_name)` constraint
/// in place of the global `channel_name` PK.
/// * `evidence_channel_columns` carries `tenant_id`, gains a
/// `UNIQUE (tenant_id, channel_name, column_name)` constraint in place of
/// its old `(channel_name, column_name)` PK, and its FK becomes a composite
/// `REFERENCES evidence_channels(tenant_id, channel_name)` so the parent
/// link holds *within* a tenant. The per-channel ordinal-uniqueness index is
/// rebuilt as `(tenant_id, channel_name, ordinal)`.
///
/// `tenant_id` deliberately stays OUT of any PRIMARY KEY and is expressed as a
/// UNIQUE constraint instead — exactly as migration 012 did for `topics`.
/// Postgres (the CI-authoritative backend, which runs these same migration
/// constants) makes every PRIMARY-KEY column implicitly `NOT NULL`, so a
/// composite `PRIMARY KEY (tenant_id, …)` would reject the global
/// (`tenant_id IS NULL`) seed channels on insert. A UNIQUE constraint admits
/// NULLs (treated as distinct) on both backends, and Postgres accepts it as the
/// composite FK target the child table references.
///
/// SQLite cannot alter a PK or a column-level FK in place, so each table is
/// rebuilt via the canonical create-new / copy / drop / rename dance (the same
/// shape migrations 012 and 018 use). Every pre-existing row's `tenant_id` is
/// already `NULL` (no writer ever set it), so the copy carries that forward —
/// the seed channels (`vector`, `inference`, `bm25`) and any rows present stay
/// in the global (`tenant_id IS NULL`) namespace, preserving existing
/// default-channel behaviour. The child rows are parked in a temporary table
/// while the parent is swapped, then copied back with an explicit
/// `tenant_id = NULL` (the old child table had no `tenant_id` column); the child
/// is dropped *before* the parent and recreated *after* it, so the rebuild is
/// FK-safe on both backends regardless of FK-enforcement state.
///
/// Note on NULL semantics: both backends treat NULLs as DISTINCT in a UNIQUE
/// constraint, so the `UNIQUE (tenant_id, channel_name)` constraint alone does
/// not reject a duplicate *global* (`tenant_id IS NULL`) channel. A PARTIAL
/// UNIQUE INDEX on `channel_name WHERE tenant_id IS NULL` closes that gap: it
/// makes the database enforce global-channel-name uniqueness atomically, so two
/// concurrent unbound registrations of the same name can no longer both commit.
/// Together the two constraints enforce per-namespace uniqueness with no
/// app-level race — the composite UNIQUE covers the non-NULL tenants, the
/// partial index covers the global namespace. `CREATE UNIQUE INDEX … WHERE …`
/// is accepted with identical syntax by SQLite (≥3.8.0) and Postgres, and runs
/// from this shared migration constant on both backends. The repo's `register`
/// keeps its explicit tenant-scoped existence check for a friendly
/// "already exists" error; the partial index (global) and the composite UNIQUE
/// (tenant-scoped) are the authoritative DB-level backstops, and a duplicate
/// surfaces through the same `BackendError::Constraint` → "already exists" path
/// in either namespace. Greenfield: no production rows to preserve beyond the
/// seeds.
pub const MIGRATION_020_CHANNEL_TENANT_SCOPE: &str = r#"
CREATE TABLE evidence_channel_columns_old (
channel_name TEXT NOT NULL,
column_name TEXT NOT NULL,
column_type TEXT NOT NULL,
ordinal INTEGER NOT NULL,
declared_at TEXT NOT NULL
);
INSERT INTO evidence_channel_columns_old (channel_name, column_name, column_type, ordinal, declared_at)
SELECT channel_name, column_name, column_type, ordinal, declared_at FROM evidence_channel_columns;
DROP TABLE evidence_channel_columns;
CREATE TABLE evidence_channels_new (
tenant_id TEXT,
channel_name TEXT NOT NULL,
priority INTEGER NOT NULL,
created_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT)),
UNIQUE (tenant_id, channel_name)
);
INSERT INTO evidence_channels_new (tenant_id, channel_name, priority, created_at)
SELECT tenant_id, channel_name, priority, created_at FROM evidence_channels;
DROP TABLE evidence_channels;
ALTER TABLE evidence_channels_new RENAME TO evidence_channels;
CREATE INDEX idx_evidence_channels_tenant ON evidence_channels(tenant_id);
CREATE UNIQUE INDEX idx_evidence_channels_global_name
ON evidence_channels(channel_name) WHERE tenant_id IS NULL;
CREATE TABLE evidence_channel_columns (
tenant_id TEXT,
channel_name TEXT NOT NULL,
column_name TEXT NOT NULL,
column_type TEXT NOT NULL,
ordinal INTEGER NOT NULL,
declared_at TEXT NOT NULL DEFAULT (CAST(CURRENT_TIMESTAMP AS TEXT)),
UNIQUE (tenant_id, channel_name, column_name),
FOREIGN KEY (tenant_id, channel_name)
REFERENCES evidence_channels(tenant_id, channel_name)
);
CREATE UNIQUE INDEX idx_channel_cols_ordinal
ON evidence_channel_columns(tenant_id, channel_name, ordinal);
INSERT INTO evidence_channel_columns(tenant_id, channel_name, column_name, column_type, ordinal, declared_at)
SELECT NULL, channel_name, column_name, column_type, ordinal, declared_at FROM evidence_channel_columns_old;
DROP TABLE evidence_channel_columns_old;
"#;