1use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole, Snippet};
4use crate::sources::provenance::{LOCAL_SOURCE_ID, Source, SourceKind};
5use anyhow::{Context, Result, anyhow, bail};
6use frankensqlite::{
7 Connection as FrankenConnection, Row as FrankenRow, SqliteValue,
8 compat::{
9 ConnectionExt as FrankenConnectionExt, OpenFlags as FrankenOpenFlags,
10 OptionalExtension as FrankenOptionalExtension, ParamValue, RowExt as FrankenRowExt,
11 Transaction as FrankenTransaction, TransactionExt as FrankenTransactionExt,
12 open_with_flags as open_franken_with_flags, param_slice_to_values, params_from_iter,
13 },
14 migrate::MigrationRunner,
15};
16use serde::{Deserialize, Serialize};
17use smallvec::SmallVec;
18use std::borrow::Cow;
19use std::collections::{HashMap, HashSet};
20use std::fs;
21use std::io::{BufRead, BufReader, Write};
22use std::process::{Command, Stdio};
23use std::sync::{
24 Arc,
25 atomic::{AtomicBool, AtomicI8, AtomicI64, AtomicU64, AtomicUsize, Ordering},
26};
27
28macro_rules! fparams {
30 () => {
31 &[] as &[ParamValue]
32 };
33 ($($val:expr),+ $(,)?) => {
34 &[$(ParamValue::from($val)),+] as &[ParamValue]
35 };
36}
37use std::path::{Path, PathBuf};
38use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
39use thiserror::Error;
40use tracing::info;
41
42const DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT: Duration = Duration::from_secs(30);
43const DOCTOR_MUTATION_LOCK_MAX_METADATA_READ: u64 = 64 * 1024;
44
45#[derive(Debug, Error)]
54pub enum LazyDbError {
55 #[error("Database not found at {0}")]
56 NotFound(PathBuf),
57 #[error("Failed to open FrankenSQLite database at {path}: {source}")]
58 FrankenOpenFailed {
59 path: PathBuf,
60 source: frankensqlite::FrankenError,
61 },
62}
63
64pub struct SendFrankenConnection(FrankenConnection, i64, u64);
75
76unsafe impl Send for SendFrankenConnection {}
79
80impl SendFrankenConnection {
81 pub(crate) fn new(conn: FrankenConnection) -> Self {
82 Self(
83 conn,
84 UNSET_INDEX_WRITER_CHECKPOINT_PAGES,
85 UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS,
86 )
87 }
88
89 pub(crate) fn new_with_index_writer_state(
90 conn: FrankenConnection,
91 checkpoint_pages: i64,
92 busy_timeout_ms: u64,
93 ) -> Self {
94 Self(conn, checkpoint_pages, busy_timeout_ms)
95 }
96
97 pub(crate) fn into_parts(self) -> (FrankenConnection, i64, u64) {
98 (self.0, self.1, self.2)
99 }
100}
101
102impl std::ops::Deref for SendFrankenConnection {
103 type Target = FrankenConnection;
104 fn deref(&self) -> &FrankenConnection {
105 &self.0
106 }
107}
108
109pub struct LazyFrankenDb {
115 path: PathBuf,
116 conn: parking_lot::Mutex<Option<SendFrankenConnection>>,
117}
118
119pub struct LazyFrankenDbGuard<'a>(parking_lot::MutexGuard<'a, Option<SendFrankenConnection>>);
121
122impl std::fmt::Debug for LazyFrankenDbGuard<'_> {
123 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124 f.debug_tuple("LazyFrankenDbGuard")
125 .field(&self.0.is_some())
126 .finish()
127 }
128}
129
130impl std::ops::Deref for LazyFrankenDbGuard<'_> {
131 type Target = FrankenConnection;
132 fn deref(&self) -> &FrankenConnection {
133 self.0
134 .as_ref()
135 .expect("LazyFrankenDb connection must be initialized before access")
136 }
137}
138
139impl LazyFrankenDb {
140 pub fn new(path: PathBuf) -> Self {
142 Self {
143 path,
144 conn: parking_lot::Mutex::new(None),
145 }
146 }
147
148 pub fn from_overrides(data_dir: &Option<PathBuf>, db_override: Option<PathBuf>) -> Self {
152 let data_dir = data_dir.clone().unwrap_or_else(crate::default_data_dir);
153 let path = db_override.unwrap_or_else(|| data_dir.join("agent_search.db"));
154 Self::new(path)
155 }
156
157 pub fn get(&self, reason: &str) -> std::result::Result<LazyFrankenDbGuard<'_>, LazyDbError> {
162 let mut guard = self.conn.lock();
163 if guard.is_none() {
164 if !self.path.exists() {
165 return Err(LazyDbError::NotFound(self.path.clone()));
166 }
167 let start = Instant::now();
168 let _doctor_guard = acquire_doctor_mutation_db_open_guard(
169 &self.path,
170 DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT,
171 )
172 .map_err(|err| LazyDbError::FrankenOpenFailed {
173 path: self.path.clone(),
174 source: frankensqlite::FrankenError::Internal(err.to_string()),
175 })?;
176 let conn =
177 FrankenConnection::open(self.path.to_string_lossy().into_owned()).map_err(|e| {
178 LazyDbError::FrankenOpenFailed {
179 path: self.path.clone(),
180 source: e,
181 }
182 })?;
183 let elapsed_ms = start.elapsed().as_millis();
184 info!(
185 path = %self.path.display(),
186 elapsed_ms = elapsed_ms,
187 reason = reason,
188 "lazily opened FrankenSQLite database"
189 );
190 *guard = Some(SendFrankenConnection::new(conn));
191 }
192 Ok(LazyFrankenDbGuard(guard))
193 }
194
195 pub fn get_with_timeout(
201 &self,
202 reason: &str,
203 timeout: Duration,
204 ) -> std::result::Result<LazyFrankenDbGuard<'_>, LazyDbError> {
205 let mut guard = self.conn.lock();
206 if guard.is_none() {
207 if !self.path.exists() {
208 return Err(LazyDbError::NotFound(self.path.clone()));
209 }
210 let start = Instant::now();
211 let path_owned = self.path.to_string_lossy().into_owned();
212 let path_for_guard = self.path.clone();
213 let (tx, rx) = std::sync::mpsc::channel();
214 std::thread::spawn(move || {
215 let _doctor_guard =
216 match acquire_doctor_mutation_db_open_guard(&path_for_guard, timeout) {
217 Ok(guard) => guard,
218 Err(err) => {
219 let _ = tx
220 .send(Err(frankensqlite::FrankenError::Internal(err.to_string())));
221 return;
222 }
223 };
224 let _ =
225 tx.send(FrankenConnection::open(path_owned).map(SendFrankenConnection::new));
226 });
227 let conn = rx
228 .recv_timeout(timeout)
229 .map_err(|_| LazyDbError::FrankenOpenFailed {
230 path: self.path.clone(),
231 source: frankensqlite::FrankenError::Internal(format!(
232 "database open timed out after {}s (possible corruption or lock contention)",
233 timeout.as_secs()
234 )),
235 })?
236 .map_err(|e| LazyDbError::FrankenOpenFailed {
237 path: self.path.clone(),
238 source: e,
239 })?;
240 let elapsed_ms = start.elapsed().as_millis();
241 info!(
242 path = %self.path.display(),
243 elapsed_ms = elapsed_ms,
244 reason = reason,
245 "lazily opened FrankenSQLite database (with timeout)"
246 );
247 *guard = Some(conn);
248 }
249 Ok(LazyFrankenDbGuard(guard))
250 }
251
252 pub fn path(&self) -> &Path {
254 &self.path
255 }
256
257 pub fn is_open(&self) -> bool {
259 self.conn.lock().is_some()
260 }
261}
262
263static FRANKEN_RETRY_JITTER_STATE: AtomicU64 = AtomicU64::new(0x9e37_79b9_7f4a_7c15);
264static DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH: AtomicUsize = AtomicUsize::new(0);
265static MESSAGE_LOOKUP_TRACE_ENABLED: AtomicBool = AtomicBool::new(false);
266static MESSAGE_LOOKUP_EXACT_IDX_PROBES: AtomicU64 = AtomicU64::new(0);
267static MESSAGE_LOOKUP_BOUNDED_QUERIES: AtomicU64 = AtomicU64::new(0);
268static MESSAGE_LOOKUP_FULL_SCAN_QUERIES: AtomicU64 = AtomicU64::new(0);
269static MESSAGE_LOOKUP_ROWS_MATERIALIZED: AtomicU64 = AtomicU64::new(0);
270
271#[derive(Debug, Clone, Copy, Default, Serialize)]
272pub(crate) struct MessageLookupTraceCounters {
273 pub exact_idx_probes: u64,
274 pub bounded_lookup_queries: u64,
275 pub full_scan_queries: u64,
276 pub rows_materialized: u64,
277}
278
279impl MessageLookupTraceCounters {
280 pub(crate) fn saturating_sub(self, before: Self) -> Self {
281 Self {
282 exact_idx_probes: self
283 .exact_idx_probes
284 .saturating_sub(before.exact_idx_probes),
285 bounded_lookup_queries: self
286 .bounded_lookup_queries
287 .saturating_sub(before.bounded_lookup_queries),
288 full_scan_queries: self
289 .full_scan_queries
290 .saturating_sub(before.full_scan_queries),
291 rows_materialized: self
292 .rows_materialized
293 .saturating_sub(before.rows_materialized),
294 }
295 }
296
297 pub(crate) fn lookups_against_global(self) -> u64 {
298 self.exact_idx_probes.saturating_add(self.rows_materialized)
299 }
300}
301
302pub(crate) fn set_message_lookup_trace_enabled(enabled: bool) -> bool {
303 MESSAGE_LOOKUP_TRACE_ENABLED.swap(enabled, Ordering::Relaxed)
304}
305
306pub(crate) fn message_lookup_trace_snapshot() -> MessageLookupTraceCounters {
307 MessageLookupTraceCounters {
308 exact_idx_probes: MESSAGE_LOOKUP_EXACT_IDX_PROBES.load(Ordering::Relaxed),
309 bounded_lookup_queries: MESSAGE_LOOKUP_BOUNDED_QUERIES.load(Ordering::Relaxed),
310 full_scan_queries: MESSAGE_LOOKUP_FULL_SCAN_QUERIES.load(Ordering::Relaxed),
311 rows_materialized: MESSAGE_LOOKUP_ROWS_MATERIALIZED.load(Ordering::Relaxed),
312 }
313}
314
315fn record_message_lookup_exact_idx_probe() {
316 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
317 MESSAGE_LOOKUP_EXACT_IDX_PROBES.fetch_add(1, Ordering::Relaxed);
318 }
319}
320
321fn record_message_lookup_bounded_queries(query_count: u64, rows: usize) {
322 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
323 MESSAGE_LOOKUP_BOUNDED_QUERIES.fetch_add(query_count, Ordering::Relaxed);
324 MESSAGE_LOOKUP_ROWS_MATERIALIZED.fetch_add(rows as u64, Ordering::Relaxed);
325 }
326}
327
328fn record_message_lookup_full_scan_query(rows: usize) {
329 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
330 MESSAGE_LOOKUP_FULL_SCAN_QUERIES.fetch_add(1, Ordering::Relaxed);
331 MESSAGE_LOOKUP_ROWS_MATERIALIZED.fetch_add(rows as u64, Ordering::Relaxed);
332 }
333}
334
335pub(crate) struct DoctorMutationDbOpenBypassGuard;
336
337impl Drop for DoctorMutationDbOpenBypassGuard {
338 fn drop(&mut self) {
339 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.fetch_sub(1, Ordering::SeqCst);
340 }
341}
342
343pub(crate) fn enter_doctor_mutation_db_open_bypass() -> DoctorMutationDbOpenBypassGuard {
344 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.fetch_add(1, Ordering::SeqCst);
345 DoctorMutationDbOpenBypassGuard
346}
347
348fn doctor_mutation_db_open_bypass_active() -> bool {
349 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.load(Ordering::SeqCst) > 0
350}
351
352fn next_franken_retry_jitter_ms(max_inclusive: u64) -> u64 {
353 let mut value = FRANKEN_RETRY_JITTER_STATE.fetch_add(0x9e37_79b9_7f4a_7c15, Ordering::Relaxed);
354 value ^= value >> 30;
355 value = value.wrapping_mul(0xbf58_476d_1ce4_e5b9);
356 value ^= value >> 27;
357 value = value.wrapping_mul(0x94d0_49bb_1331_11eb);
358 value ^= value >> 31;
359 value % max_inclusive.saturating_add(1)
360}
361
362pub(crate) fn sleep_with_franken_retry_backoff(
365 backoff: &mut Duration,
366 remaining: Duration,
367 max_backoff: Duration,
368) {
369 let capped = (*backoff).min(remaining);
370 let extra_budget = remaining.saturating_sub(capped).min(capped);
371 let extra_ms = extra_budget.as_millis().min(u128::from(u64::MAX)) as u64;
372 let sleep_for = if extra_ms == 0 {
373 capped
374 } else {
375 capped
376 .saturating_add(Duration::from_millis(next_franken_retry_jitter_ms(
377 extra_ms,
378 )))
379 .min(remaining)
380 };
381 std::thread::sleep(sleep_for);
382 *backoff = backoff.saturating_mul(2).min(max_backoff);
383}
384
385struct DoctorMutationDbOpenGuard(Option<fs::File>);
386
387impl Drop for DoctorMutationDbOpenGuard {
388 fn drop(&mut self) {
389 if let Some(file) = self.0.as_ref() {
390 let _ = fs2::FileExt::unlock(file);
391 }
392 }
393}
394
395fn doctor_mutation_lock_path_for_db_open(db_path: &Path) -> Option<PathBuf> {
396 if db_path.file_name().and_then(|name| name.to_str()) != Some("agent_search.db") {
397 return None;
398 }
399
400 Some(
401 db_path
402 .parent()?
403 .join("doctor")
404 .join("locks")
405 .join("doctor-repair.lock"),
406 )
407}
408
409fn doctor_lock_metadata_pid_is_current_process(raw: &str) -> bool {
410 raw.lines().any(|line| {
411 let Some((key, value)) = line.split_once('=') else {
412 return false;
413 };
414 key.trim() == "pid"
415 && value
416 .trim()
417 .parse::<u32>()
418 .is_ok_and(|pid| pid == std::process::id())
419 })
420}
421
422fn doctor_lock_file_pid_is_current_process(file: &fs::File) -> bool {
423 use std::io::Read as _;
424
425 let Ok(mut file) = file.try_clone() else {
426 return false;
427 };
428 let mut raw = String::new();
429 let _ = std::io::Read::take(&mut file, DOCTOR_MUTATION_LOCK_MAX_METADATA_READ)
430 .read_to_string(&mut raw);
431 doctor_lock_metadata_pid_is_current_process(&raw)
432}
433
434fn acquire_doctor_mutation_db_open_guard(
435 db_path: &Path,
436 timeout: Duration,
437) -> Result<DoctorMutationDbOpenGuard> {
438 let Some(lock_path) = doctor_mutation_lock_path_for_db_open(db_path) else {
439 return Ok(DoctorMutationDbOpenGuard(None));
440 };
441 if doctor_mutation_db_open_bypass_active() {
442 return Ok(DoctorMutationDbOpenGuard(None));
443 }
444
445 if let Some(parent) = lock_path.parent() {
446 fs::create_dir_all(parent).with_context(|| {
447 format!(
448 "creating doctor mutation lock directory {} before opening {}",
449 parent.display(),
450 db_path.display()
451 )
452 })?;
453 }
454
455 let deadline = Instant::now() + timeout;
456 let mut backoff = Duration::from_millis(4);
457 loop {
458 let file = fs::OpenOptions::new()
459 .create(true)
460 .truncate(false)
461 .read(true)
462 .write(true)
463 .open(&lock_path)
464 .with_context(|| {
465 format!(
466 "opening doctor mutation lock {} before opening {}",
467 lock_path.display(),
468 db_path.display()
469 )
470 })?;
471
472 if doctor_lock_file_pid_is_current_process(&file) {
473 return Ok(DoctorMutationDbOpenGuard(None));
474 }
475
476 match fs2::FileExt::try_lock_shared(&file) {
477 Ok(()) => return Ok(DoctorMutationDbOpenGuard(Some(file))),
478 Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => {
479 let now = Instant::now();
480 if now >= deadline {
481 return Err(anyhow!(
482 "doctor mutation lock {} is active while opening {}; refusing to open during repair after waiting {}ms",
483 lock_path.display(),
484 db_path.display(),
485 timeout.as_millis()
486 ));
487 }
488 let remaining = deadline.saturating_duration_since(now);
489 sleep_with_franken_retry_backoff(
490 &mut backoff,
491 remaining,
492 Duration::from_millis(128),
493 );
494 }
495 Err(err) => {
496 return Err(anyhow!(
497 "failed to acquire shared doctor mutation lock {} before opening {}: {}",
498 lock_path.display(),
499 db_path.display(),
500 err
501 ));
502 }
503 }
504 }
505}
506
507pub(crate) fn open_franken_storage_with_timeout(
508 path: &Path,
509 timeout: Duration,
510) -> Result<FrankenStorage> {
511 if !path.exists() {
512 return Err(anyhow!("Database not found at {}", path.display()));
513 }
514
515 let deadline = Instant::now() + timeout;
516 let mut backoff = Duration::from_millis(4);
517 loop {
518 match FrankenStorage::open(path) {
519 Ok(storage) => return Ok(storage),
520 Err(err) if retryable_franken_anyhow(&err) => {
521 let now = Instant::now();
522 if now >= deadline {
523 return Err(err);
524 }
525 let remaining = deadline.saturating_duration_since(now);
526 sleep_with_franken_retry_backoff(
527 &mut backoff,
528 remaining,
529 Duration::from_millis(128),
530 );
531 }
532 Err(err) => return Err(err),
533 }
534 }
535}
536
537pub(crate) fn open_current_schema_storage_with_timeout(
538 path: &Path,
539 timeout: Duration,
540) -> Result<Option<FrankenStorage>> {
541 if !path.exists() {
542 return Ok(None);
543 }
544
545 let mut storage = FrankenStorage::new(
546 open_franken_raw_connection_with_timeout(path, timeout)?,
547 path.to_path_buf(),
548 );
549 storage.apply_open_stage_busy_timeout();
550
551 let version = storage
552 .raw()
553 .query("SELECT value FROM meta WHERE key = 'schema_version';")
554 .ok()
555 .and_then(|rows| rows.first().cloned())
556 .and_then(|row| row.get_typed::<String>(0).ok())
557 .and_then(|raw| raw.parse::<i64>().ok());
558
559 if version != Some(CURRENT_SCHEMA_VERSION) {
560 if let Err(close_err) = storage.close_without_checkpoint_in_place() {
561 tracing::debug!(
562 error = %close_err,
563 db_path = %path.display(),
564 "open_current_schema_storage_with_timeout: close_without_checkpoint_in_place failed; falling back to best-effort close"
565 );
566 storage.close_best_effort_in_place();
567 }
568 return Ok(None);
569 }
570
571 transition_from_meta_version(&storage.conn)?;
572 storage.repair_missing_current_schema_objects()?;
573 storage.apply_config()?;
574 Ok(Some(storage))
575}
576
577pub(crate) fn open_franken_readonly_storage_with_timeout(
578 path: &Path,
579 timeout: Duration,
580) -> Result<FrankenStorage> {
581 if !path.exists() {
582 return Err(anyhow!("Database not found at {}", path.display()));
583 }
584
585 let deadline = Instant::now() + timeout;
586 let mut backoff = Duration::from_millis(4);
587 loop {
588 match FrankenStorage::open_readonly(path) {
589 Ok(storage) => return Ok(storage),
590 Err(err) if retryable_franken_anyhow(&err) => {
591 let now = Instant::now();
592 if now >= deadline {
593 return Err(err);
594 }
595 let remaining = deadline.saturating_duration_since(now);
596 sleep_with_franken_retry_backoff(
597 &mut backoff,
598 remaining,
599 Duration::from_millis(128),
600 );
601 }
602 Err(err) => return Err(err),
603 }
604 }
605}
606
607pub(crate) fn open_franken_raw_connection_with_timeout(
608 path: &Path,
609 timeout: Duration,
610) -> Result<FrankenConnection> {
611 if !path.exists() {
612 return Err(anyhow!("Database not found at {}", path.display()));
613 }
614
615 let path_str = path.to_string_lossy().to_string();
616 let deadline = Instant::now() + timeout;
617 let mut backoff = Duration::from_millis(4);
618 loop {
619 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
620 match FrankenConnection::open(&path_str)
621 .with_context(|| format!("opening raw frankensqlite db at {}", path.display()))
622 {
623 Ok(conn) => return Ok(conn),
624 Err(err) if retryable_franken_anyhow(&err) => {
625 let now = Instant::now();
626 if now >= deadline {
627 return Err(err);
628 }
629 let remaining = deadline.saturating_duration_since(now);
630 sleep_with_franken_retry_backoff(
631 &mut backoff,
632 remaining,
633 Duration::from_millis(128),
634 );
635 }
636 Err(err) => return Err(err),
637 }
638 }
639}
640
641pub(crate) fn open_franken_raw_readonly_connection_with_timeout(
642 path: &Path,
643 timeout: Duration,
644) -> Result<FrankenConnection> {
645 if !path.exists() {
646 return Err(anyhow!("Database not found at {}", path.display()));
647 }
648
649 let path_str = path.to_string_lossy().to_string();
650 let deadline = Instant::now() + timeout;
651 let mut backoff = Duration::from_millis(4);
652 loop {
653 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
654 match open_franken_with_flags(&path_str, FrankenOpenFlags::SQLITE_OPEN_READ_ONLY)
655 .with_context(|| {
656 format!(
657 "opening raw frankensqlite db readonly at {}",
658 path.display()
659 )
660 }) {
661 Ok(conn) => return Ok(conn),
662 Err(err) if retryable_franken_anyhow(&err) => {
663 let now = Instant::now();
664 if now >= deadline {
665 return Err(err);
666 }
667 let remaining = deadline.saturating_duration_since(now);
668 sleep_with_franken_retry_backoff(
669 &mut backoff,
670 remaining,
671 Duration::from_millis(128),
672 );
673 }
674 Err(err) => return Err(err),
675 }
676 }
677}
678
679pub(crate) fn retryable_franken_error(err: &frankensqlite::FrankenError) -> bool {
680 matches!(
681 err,
682 frankensqlite::FrankenError::Busy
683 | frankensqlite::FrankenError::BusyRecovery
684 | frankensqlite::FrankenError::BusySnapshot { .. }
685 | frankensqlite::FrankenError::DatabaseLocked { .. }
686 | frankensqlite::FrankenError::LockFailed { .. }
687 | frankensqlite::FrankenError::WriteConflict { .. }
688 | frankensqlite::FrankenError::SerializationFailure { .. }
689 ) || retryable_storage_error_message(&err.to_string())
690}
691
692pub(crate) fn retryable_storage_error_message(message: &str) -> bool {
693 let lower = message.to_ascii_lowercase();
694 lower.contains("busy")
695 || lower.contains("locked")
696 || lower.contains("locking")
697 || lower.contains("contention")
698 || lower.contains("temporarily unavailable")
699 || lower.contains("would block")
700}
701
702pub(crate) fn retryable_franken_anyhow(err: &anyhow::Error) -> bool {
703 err.chain().any(|cause| {
704 cause
705 .downcast_ref::<frankensqlite::FrankenError>()
706 .is_some_and(retryable_franken_error)
707 || retryable_storage_error_message(&cause.to_string())
708 })
709}
710
711impl Drop for LazyFrankenDb {
712 fn drop(&mut self) {
713 let Some(mut conn) = self.conn.get_mut().take() else {
714 return;
715 };
716 conn.0.close_best_effort_in_place();
717 }
718}
719
720#[derive(Debug, Clone)]
729pub struct ConnectionManagerConfig {
730 pub reader_count: usize,
732 pub max_writers: usize,
734}
735
736impl Default for ConnectionManagerConfig {
737 fn default() -> Self {
738 let cpus = std::thread::available_parallelism()
739 .map(|n| n.get())
740 .unwrap_or(4);
741 Self {
742 reader_count: 4,
743 max_writers: cpus,
744 }
745 }
746}
747
748pub struct FrankenConnectionManager {
758 db_path: PathBuf,
759 readers: Vec<parking_lot::Mutex<SendFrankenConnection>>,
760 reader_idx: std::sync::atomic::AtomicUsize,
761 writer_tokens: (
764 crossbeam_channel::Sender<()>,
765 crossbeam_channel::Receiver<()>,
766 ),
767 config: ConnectionManagerConfig,
768}
769
770unsafe impl Send for FrankenConnectionManager {}
775unsafe impl Sync for FrankenConnectionManager {}
776
777impl FrankenConnectionManager {
778 pub fn new(db_path: impl Into<PathBuf>, config: ConnectionManagerConfig) -> Result<Self> {
783 let db_path = db_path.into();
784 let path_str = db_path.to_string_lossy().to_string();
785
786 let reader_count = config.reader_count.max(1);
787 let mut readers = Vec::with_capacity(reader_count);
788 for _ in 0..reader_count {
789 let conn = FrankenConnection::open(&path_str)
790 .with_context(|| format!("opening reader connection at {}", db_path.display()))?;
791 let _ = conn.execute("PRAGMA busy_timeout = 5000;"); let _ = conn.execute("PRAGMA cache_size = -16384;"); readers.push(parking_lot::Mutex::new(SendFrankenConnection::new(conn)));
795 }
796
797 let max_writers = config.max_writers.max(1);
798
799 let (tx, rx) = crossbeam_channel::bounded(max_writers);
803 for _ in 0..max_writers {
804 tx.send(())
805 .map_err(|_| anyhow!("writer token channel closed during initialization"))?;
806 }
807
808 Ok(Self {
809 db_path,
810 readers,
811 reader_idx: std::sync::atomic::AtomicUsize::new(0),
812 writer_tokens: (tx, rx),
813 config: ConnectionManagerConfig {
814 reader_count,
815 max_writers,
816 },
817 })
818 }
819
820 pub fn reader(&self) -> parking_lot::MutexGuard<'_, SendFrankenConnection> {
825 let idx = self
826 .reader_idx
827 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
828 self.readers[idx % self.readers.len()].lock()
829 }
830
831 pub fn writer(&self) -> Result<WriterGuard<'_>> {
837 self.writer_tokens
838 .1
839 .recv()
840 .map_err(|_| anyhow!("writer token channel closed"))?;
841 let path_str = self.db_path.to_string_lossy().to_string();
842 let conn = match FrankenConnection::open(&path_str) {
843 Ok(c) => c,
844 Err(e) => {
845 let _ = self.writer_tokens.0.send(());
846 return Err(anyhow::Error::from(e).context(format!(
847 "opening writer connection at {}",
848 self.db_path.display()
849 )));
850 }
851 };
852 let storage = FrankenStorage::new(conn, self.db_path.clone());
853 if let Err(e) = storage.apply_config() {
854 let _ = self.writer_tokens.0.send(());
855 return Err(e);
856 }
857 Ok(WriterGuard {
858 storage,
859 mgr: self,
860 committed: false,
861 })
862 }
863
864 pub fn concurrent_writer(&self) -> Result<WriterGuard<'_>> {
869 self.writer_tokens
870 .1
871 .recv()
872 .map_err(|_| anyhow!("writer token channel closed"))?;
873 let path_str = self.db_path.to_string_lossy().to_string();
874 let conn = match FrankenConnection::open(&path_str) {
875 Ok(c) => c,
876 Err(e) => {
877 let _ = self.writer_tokens.0.send(());
878 return Err(anyhow::Error::from(e).context(format!(
879 "opening concurrent writer at {}",
880 self.db_path.display()
881 )));
882 }
883 };
884 let storage = FrankenStorage::new(conn, self.db_path.clone());
885 if let Err(e) = storage.apply_config() {
886 let _ = self.writer_tokens.0.send(());
887 return Err(e);
888 }
889 let _ = storage.raw().execute("PRAGMA cache_size = -4096;");
891 Ok(WriterGuard {
892 storage,
893 mgr: self,
894 committed: false,
895 })
896 }
897
898 pub fn db_path(&self) -> &Path {
900 &self.db_path
901 }
902
903 pub fn reader_count(&self) -> usize {
905 self.readers.len()
906 }
907
908 pub fn max_writers(&self) -> usize {
910 self.config.max_writers
911 }
912}
913
914impl Drop for FrankenConnectionManager {
915 fn drop(&mut self) {
916 for reader in &mut self.readers {
917 reader.get_mut().0.close_best_effort_in_place();
918 }
919 }
920}
921
922pub struct WriterGuard<'a> {
927 storage: FrankenStorage,
928 mgr: &'a FrankenConnectionManager,
929 committed: bool,
930}
931
932impl<'a> WriterGuard<'a> {
933 pub fn storage(&self) -> &FrankenStorage {
935 &self.storage
936 }
937
938 pub fn mark_committed(&mut self) {
943 self.committed = true;
944 }
945}
946
947impl Drop for WriterGuard<'_> {
948 fn drop(&mut self) {
949 if !self.committed {
950 let _ = self.storage.raw().execute("ROLLBACK;");
952 }
953 self.storage.close_best_effort_in_place();
954 let _ = self.mgr.writer_tokens.0.send(());
956 }
957}
958
959fn serialize_json_to_msgpack(value: &serde_json::Value) -> Option<Vec<u8>> {
968 if value.is_null() || value.as_object().is_some_and(|o| o.is_empty()) {
969 return None;
970 }
971 rmp_serde::to_vec(value).ok()
972}
973
974fn deserialize_msgpack_to_json(bytes: &[u8]) -> serde_json::Value {
977 if bytes.is_empty() {
978 return serde_json::Value::Object(serde_json::Map::new());
979 }
980 rmp_serde::from_slice(bytes).unwrap_or_else(|e| {
981 tracing::debug!(
982 error = %e,
983 bytes_len = bytes.len(),
984 "Failed to deserialize metadata - returning empty object"
985 );
986 serde_json::Value::Object(serde_json::Map::new())
987 })
988}
989
990fn franken_read_metadata_compat(
992 row: &FrankenRow,
993 json_idx: usize,
994 bin_idx: usize,
995) -> serde_json::Value {
996 if let Ok(Some(bytes)) = row.get_typed::<Option<Vec<u8>>>(bin_idx)
998 && !bytes.is_empty()
999 {
1000 return deserialize_msgpack_to_json(&bytes);
1001 }
1002
1003 if let Ok(Some(json_str)) = row.get_typed::<Option<String>>(json_idx) {
1005 return serde_json::from_str(&json_str)
1006 .unwrap_or_else(|_| serde_json::Value::Object(serde_json::Map::new()));
1007 }
1008
1009 serde_json::Value::Object(serde_json::Map::new())
1010}
1011
1012fn franken_read_message_extra_compat(
1013 row: &FrankenRow,
1014 json_idx: usize,
1015 bin_idx: usize,
1016) -> serde_json::Value {
1017 if let Ok(Some(bytes)) = row.get_typed::<Option<Vec<u8>>>(bin_idx)
1018 && !bytes.is_empty()
1019 {
1020 return deserialize_msgpack_to_json(&bytes);
1021 }
1022
1023 if let Ok(Some(json_str)) = row.get_typed::<Option<String>>(json_idx) {
1024 return serde_json::from_str(&json_str).unwrap_or(serde_json::Value::Null);
1025 }
1026
1027 serde_json::Value::Null
1028}
1029
1030#[derive(Debug, Error)]
1036pub enum MigrationError {
1037 #[error("Rebuild required: {reason}")]
1039 RebuildRequired {
1040 reason: String,
1041 backup_path: Option<std::path::PathBuf>,
1042 },
1043
1044 #[error("Database error: {0}")]
1046 Database(#[from] frankensqlite::FrankenError),
1047
1048 #[error("I/O error: {0}")]
1050 Io(#[from] std::io::Error),
1051
1052 #[error("{0}")]
1054 Other(String),
1055}
1056
1057impl From<anyhow::Error> for MigrationError {
1058 fn from(e: anyhow::Error) -> Self {
1059 MigrationError::Other(e.to_string())
1060 }
1061}
1062
1063const MAX_BACKUPS: usize = 3;
1065const BACKUP_VACUUM_BUSY_TIMEOUT_PRAGMA: &str = "PRAGMA busy_timeout = 30000;";
1066
1067const USER_DATA_FILES: &[&str] = &["bookmarks.db", "tui_state.json", "sources.toml", ".env"];
1069
1070pub fn is_user_data_file(path: &Path) -> bool {
1072 path.file_name()
1073 .and_then(|n| n.to_str())
1074 .map(|name| USER_DATA_FILES.contains(&name))
1075 .unwrap_or(false)
1076}
1077
1078pub const FTS5_REGISTER_SQL: &str = "\
1085 CREATE VIRTUAL TABLE IF NOT EXISTS fts_messages USING fts5(\
1086 content, title, agent, workspace, source_path, \
1087 created_at UNINDEXED, \
1088 content='', tokenize='porter'\
1089 )";
1090
1091const FTS_FRANKEN_REBUILD_META_KEY: &str = "fts_frankensqlite_rebuild_generation";
1092const FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY: &str = "fts_frankensqlite_archive_fingerprint";
1093const FTS_FRANKEN_REBUILD_GENERATION: i64 = 1;
1094const DAILY_STATS_HEALTH_META_KEY: &str = "daily_stats_archive_fingerprint";
1095const DAILY_STATS_HEALTH_GENERATION_META_KEY: &str = "daily_stats_health_generation";
1096const DAILY_STATS_HEALTH_GENERATION: i64 = 1;
1097
1098pub const FTS5_DELETE_ALL_SQL: &str =
1102 "INSERT INTO fts_messages(fts_messages) VALUES('delete-all');";
1103
1104pub const FTS_MESSAGES_REQUIRED_SHADOW_TABLES: [&str; 5] = [
1105 "fts_messages_config",
1106 "fts_messages_content",
1107 "fts_messages_data",
1108 "fts_messages_docsize",
1109 "fts_messages_idx",
1110];
1111
1112pub const FTS_MESSAGES_INTEGRITY_PROBE_SQL: &str = "SELECT * FROM fts_messages LIMIT 0";
1113
1114pub const FTS_MESSAGES_CORRUPTION_RECOVERY_HINT: &str = "Stop all cass index/watch processes, back up the current database, then run \
1115 'cass doctor check --json' for a read-only diagnosis before using a supported \
1116 repair/rebuild path.";
1117
1118#[derive(Debug, Clone, PartialEq, Eq)]
1119pub struct FtsMessagesIntegrityError {
1120 missing_shadow_tables: Vec<&'static str>,
1121 failed_sql: Option<&'static str>,
1122 source_error: Option<String>,
1123}
1124
1125impl FtsMessagesIntegrityError {
1126 fn new(
1127 missing_shadow_tables: Vec<&'static str>,
1128 failed_sql: Option<&'static str>,
1129 source_error: Option<String>,
1130 ) -> Self {
1131 Self {
1132 missing_shadow_tables,
1133 failed_sql,
1134 source_error,
1135 }
1136 }
1137
1138 pub fn missing_shadow_tables(&self) -> &[&'static str] {
1139 &self.missing_shadow_tables
1140 }
1141}
1142
1143impl std::fmt::Display for FtsMessagesIntegrityError {
1144 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1145 write!(
1146 f,
1147 "CASS database FTS5 index is corrupt: fts_messages exists, but required FTS5 shadow tables are missing or unreadable"
1148 )?;
1149 if !self.missing_shadow_tables.is_empty() {
1150 write!(
1151 f,
1152 "; missing shadow tables: {}",
1153 self.missing_shadow_tables.join(", ")
1154 )?;
1155 }
1156 if let Some(sql) = self.failed_sql {
1157 write!(f, "; failed SQL: {sql}")?;
1158 }
1159 if let Some(source_error) = &self.source_error {
1160 write!(f, "; error: {source_error}")?;
1161 }
1162 write!(
1163 f,
1164 ". Suggested recovery: {FTS_MESSAGES_CORRUPTION_RECOVERY_HINT}"
1165 )
1166 }
1167}
1168
1169impl std::error::Error for FtsMessagesIntegrityError {}
1170
1171pub fn fts_messages_integrity_error_from_message(
1172 source_error: impl Into<String>,
1173) -> Option<FtsMessagesIntegrityError> {
1174 let source_error = source_error.into();
1175 let lower = source_error.to_ascii_lowercase();
1176 if !lower.contains("fts_messages") {
1177 return None;
1178 }
1179
1180 let mentions_structural_fts_failure = lower.contains("shadow table")
1181 || lower.contains("vtable constructor failed")
1182 || lower.contains("sqlite_corrupt")
1183 || lower.contains("databasecorrupt")
1184 || lower.contains("database corrupt")
1185 || lower.contains("missing required");
1186 if !mentions_structural_fts_failure {
1187 return None;
1188 }
1189
1190 let missing_shadow_tables = FTS_MESSAGES_REQUIRED_SHADOW_TABLES
1191 .iter()
1192 .copied()
1193 .filter(|table| lower.contains(&table.to_ascii_lowercase()))
1194 .collect::<Vec<_>>();
1195
1196 Some(FtsMessagesIntegrityError::new(
1197 missing_shadow_tables,
1198 Some(FTS_MESSAGES_INTEGRITY_PROBE_SQL),
1199 Some(source_error),
1200 ))
1201}
1202
1203fn fts_schema_tolerates_missing_shadow_metadata(sql: &str) -> bool {
1204 let normalized = sql
1205 .chars()
1206 .filter(|ch| !ch.is_whitespace())
1207 .collect::<String>()
1208 .to_ascii_lowercase();
1209 normalized.contains("usingfts5(")
1210 && normalized.contains("content=''")
1211 && !normalized.contains("message_id")
1212}
1213
1214pub fn validate_fts_messages_integrity_for_connection(conn: &FrankenConnection) -> Result<()> {
1215 let fts_schema_sql: Vec<String> = conn
1216 .query_map_collect(
1217 "SELECT sql FROM sqlite_master WHERE type = 'table' AND name = 'fts_messages'",
1218 fparams![],
1219 |row: &FrankenRow| row.get_typed::<String>(0),
1220 )
1221 .with_context(|| "checking for fts_messages in sqlite_master")?;
1222 if fts_schema_sql.is_empty() {
1223 return Ok(());
1224 }
1225
1226 let probe_error = conn.query(FTS_MESSAGES_INTEGRITY_PROBE_SQL).err();
1227 if probe_error.is_none()
1228 && fts_schema_sql
1229 .iter()
1230 .all(|sql| fts_schema_tolerates_missing_shadow_metadata(sql))
1231 {
1232 return Ok(());
1233 }
1234
1235 let present_shadow_tables: HashSet<String> = conn
1236 .query_map_collect(
1237 "SELECT name FROM sqlite_master
1238 WHERE type = 'table'
1239 AND name IN (
1240 'fts_messages_config',
1241 'fts_messages_content',
1242 'fts_messages_data',
1243 'fts_messages_docsize',
1244 'fts_messages_idx'
1245 )",
1246 fparams![],
1247 |row: &FrankenRow| row.get_typed::<String>(0),
1248 )
1249 .map(|rows| rows.into_iter().collect())
1250 .map_err(|err| {
1251 FtsMessagesIntegrityError::new(
1252 Vec::new(),
1253 Some(
1254 "SELECT name FROM sqlite_master WHERE name IN \
1255 ('fts_messages_config','fts_messages_content','fts_messages_data','fts_messages_docsize','fts_messages_idx')",
1256 ),
1257 Some(err.to_string()),
1258 )
1259 })?;
1260 let missing_shadow_tables = FTS_MESSAGES_REQUIRED_SHADOW_TABLES
1261 .iter()
1262 .copied()
1263 .filter(|table| !present_shadow_tables.contains(*table))
1264 .collect::<Vec<_>>();
1265
1266 if missing_shadow_tables.is_empty() {
1275 return Ok(());
1276 }
1277
1278 Err(FtsMessagesIntegrityError::new(
1279 missing_shadow_tables,
1280 probe_error
1281 .as_ref()
1282 .map(|_| FTS_MESSAGES_INTEGRITY_PROBE_SQL),
1283 probe_error.map(|err| err.to_string()),
1284 )
1285 .into())
1286}
1287
1288#[cfg(test)]
1289pub(crate) fn materialize_fresh_fts_schema_via_rusqlite(db_path: &Path) -> Result<()> {
1290 let storage = FrankenStorage::open(db_path).with_context(|| {
1296 format!(
1297 "opening frankensqlite db at {} for FTS materialization",
1298 db_path.display()
1299 )
1300 })?;
1301 storage.rebuild_fts_via_frankensqlite().map(|_| ())
1302}
1303
1304#[cfg(test)]
1305pub(crate) fn rebuild_fts_via_rusqlite(db_path: &Path) -> Result<usize> {
1306 let storage = FrankenStorage::open(db_path).with_context(|| {
1307 format!(
1308 "opening frankensqlite db at {} for FTS rebuild",
1309 db_path.display()
1310 )
1311 })?;
1312 let inserted = storage.rebuild_fts_via_frankensqlite()?;
1313 storage.record_fts_franken_rebuild_generation()?;
1314 Ok(inserted)
1315}
1316
1317pub(crate) fn ensure_fts_consistency_via_rusqlite(db_path: &Path) -> Result<FtsConsistencyRepair> {
1318 let storage = FrankenStorage::open(db_path).with_context(|| {
1322 format!(
1323 "opening frankensqlite db at {} for FTS consistency check",
1324 db_path.display()
1325 )
1326 })?;
1327 storage.ensure_search_fallback_fts_consistency()
1328}
1329
1330pub fn create_backup(db_path: &Path) -> Result<Option<std::path::PathBuf>, MigrationError> {
1334 if !bundle_path_exists(db_path)? {
1335 return Ok(None);
1336 }
1337
1338 if !copyable_bundle_file_exists(db_path)? {
1339 return Ok(None);
1340 }
1341 let _ = copyable_bundle_sidecar_sources(db_path)?;
1342
1343 let backup_path = unique_backup_path(db_path);
1344 let vacuum_stage_path = vacuum_stage_backup_path(&backup_path);
1345
1346 match vacuum_into_backup_stage(db_path, &vacuum_stage_path) {
1349 Ok(()) => {
1350 fs::rename(&vacuum_stage_path, &backup_path)?;
1351 }
1352 Err(err) if backup_vacuum_error_requires_consistent_retry(&err) => {
1353 tracing::warn!(
1354 db_path = %db_path.display(),
1355 error = %err,
1356 "create_backup: VACUUM INTO hit transient contention; refusing raw WAL bundle copy"
1357 );
1358 return Err(MigrationError::Database(err));
1359 }
1360 Err(err) => {
1361 tracing::warn!(
1362 db_path = %db_path.display(),
1363 error = %err,
1364 "create_backup: VACUUM INTO failed; falling back to raw evidence copy"
1365 );
1366 }
1367 }
1368
1369 if backup_path.exists() {
1370 sync_file_if_exists(&backup_path)?;
1371 if let Some(parent) = backup_path.parent() {
1372 sync_parent_directory(parent)?;
1373 }
1374 return Ok(Some(backup_path));
1375 }
1376
1377 copy_database_bundle(db_path, &backup_path)?;
1382
1383 Ok(Some(backup_path))
1384}
1385
1386fn vacuum_into_backup_stage(
1387 db_path: &Path,
1388 stage_path: &Path,
1389) -> std::result::Result<(), frankensqlite::FrankenError> {
1390 let mut conn = open_franken_with_flags(
1391 &db_path.to_string_lossy(),
1392 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
1393 )?;
1394 let result = (|| {
1395 conn.execute(BACKUP_VACUUM_BUSY_TIMEOUT_PRAGMA)?;
1396 let path_str = stage_path.to_string_lossy();
1397 conn.execute_compat("VACUUM INTO ?", fparams![path_str.as_ref()])?;
1398 Ok(())
1399 })();
1400 if let Err(close_err) = conn.close_in_place() {
1401 tracing::warn!(
1402 error = %close_err,
1403 db_path = %db_path.display(),
1404 "create_backup: close_in_place failed after VACUUM INTO; falling back to best-effort close"
1405 );
1406 conn.close_best_effort_in_place();
1407 }
1408 result
1409}
1410
1411fn backup_vacuum_error_requires_consistent_retry(err: &frankensqlite::FrankenError) -> bool {
1412 retryable_franken_error(err)
1413}
1414
1415#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1416pub struct DatabaseBundleMoveResult {
1417 pub database: bool,
1418 pub wal: bool,
1419 pub shm: bool,
1420}
1421
1422impl DatabaseBundleMoveResult {
1423 pub fn moved_any(&self) -> bool {
1424 self.database || self.wal || self.shm
1425 }
1426}
1427
1428fn database_sidecar_path(path: &Path, suffix: &str) -> PathBuf {
1429 PathBuf::from(format!("{}{}", path.to_string_lossy(), suffix))
1430}
1431
1432pub(crate) fn move_database_bundle(
1439 source_root: &Path,
1440 destination_root: &Path,
1441) -> std::io::Result<DatabaseBundleMoveResult> {
1442 let mut moved = DatabaseBundleMoveResult::default();
1443 if let Some(parent) = destination_root.parent() {
1444 fs::create_dir_all(parent)?;
1445 sync_parent_directory(parent)?;
1446 }
1447
1448 if bundle_path_exists(source_root)? {
1449 fs::rename(source_root, destination_root)?;
1450 moved.database = true;
1451 }
1452
1453 let wal_source = database_sidecar_path(source_root, "-wal");
1454 if bundle_path_exists(&wal_source)? {
1455 fs::rename(&wal_source, database_sidecar_path(destination_root, "-wal"))?;
1456 moved.wal = true;
1457 }
1458
1459 let shm_source = database_sidecar_path(source_root, "-shm");
1460 if bundle_path_exists(&shm_source)? {
1461 fs::rename(&shm_source, database_sidecar_path(destination_root, "-shm"))?;
1462 moved.shm = true;
1463 }
1464
1465 if moved.moved_any() {
1466 if let Some(parent) = source_root.parent() {
1467 sync_parent_directory(parent)?;
1468 }
1469 if let Some(parent) = destination_root.parent() {
1470 sync_parent_directory(parent)?;
1471 }
1472 }
1473
1474 Ok(moved)
1475}
1476
1477fn bundle_path_exists(path: &Path) -> std::io::Result<bool> {
1478 match fs::symlink_metadata(path) {
1479 Ok(_) => Ok(true),
1480 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1481 Err(err) => Err(err),
1482 }
1483}
1484
1485fn copy_database_bundle(source_root: &Path, destination_root: &Path) -> Result<()> {
1486 if let Some(parent) = destination_root.parent() {
1487 fs::create_dir_all(parent).with_context(|| {
1488 format!(
1489 "creating destination directory for database bundle copy: {}",
1490 parent.display()
1491 )
1492 })?;
1493 sync_parent_directory(parent)
1494 .with_context(|| format!("syncing destination directory {}", parent.display()))?;
1495 }
1496
1497 if !copyable_bundle_file_exists(source_root)? {
1498 bail!(
1499 "database bundle root is missing before copy: {}",
1500 source_root.display()
1501 );
1502 }
1503
1504 let sidecars = copyable_bundle_sidecar_sources(source_root)?;
1505
1506 fs::copy(source_root, destination_root).with_context(|| {
1507 format!(
1508 "copying database bundle {} -> {}",
1509 source_root.display(),
1510 destination_root.display()
1511 )
1512 })?;
1513 sync_file_if_exists(destination_root).with_context(|| {
1514 format!(
1515 "syncing copied database bundle {}",
1516 destination_root.display()
1517 )
1518 })?;
1519
1520 for (source_sidecar, suffix) in sidecars {
1521 let destination_sidecar = database_sidecar_path(destination_root, suffix);
1522 fs::copy(&source_sidecar, &destination_sidecar).with_context(|| {
1523 format!(
1524 "copying database bundle sidecar {} -> {}",
1525 source_sidecar.display(),
1526 destination_sidecar.display()
1527 )
1528 })?;
1529 sync_file_if_exists(&destination_sidecar).with_context(|| {
1530 format!(
1531 "syncing copied database bundle sidecar {}",
1532 destination_sidecar.display()
1533 )
1534 })?;
1535 }
1536
1537 if let Some(parent) = destination_root.parent() {
1538 sync_parent_directory(parent)
1539 .with_context(|| format!("syncing destination directory {}", parent.display()))?;
1540 }
1541
1542 Ok(())
1543}
1544
1545fn copyable_bundle_sidecar_sources(source_root: &Path) -> Result<Vec<(PathBuf, &'static str)>> {
1546 let mut sidecars = Vec::new();
1547 for suffix in ["-wal", "-shm"] {
1548 let source_sidecar = database_sidecar_path(source_root, suffix);
1549 if copyable_bundle_file_exists(&source_sidecar)? {
1550 sidecars.push((source_sidecar, suffix));
1551 }
1552 }
1553 Ok(sidecars)
1554}
1555
1556fn copyable_bundle_file_exists(path: &Path) -> Result<bool> {
1557 match fs::symlink_metadata(path) {
1558 Ok(metadata) => {
1559 let file_type = metadata.file_type();
1560 if file_type.is_symlink() {
1561 bail!(
1562 "refusing to copy database bundle symlink: {}",
1563 path.display()
1564 );
1565 }
1566 if !file_type.is_file() {
1567 bail!(
1568 "refusing to copy non-file database bundle path: {}",
1569 path.display()
1570 );
1571 }
1572 Ok(true)
1573 }
1574 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1575 Err(err) => Err(err).with_context(|| {
1576 format!(
1577 "checking database bundle path before copy: {}",
1578 path.display()
1579 )
1580 }),
1581 }
1582}
1583
1584pub(crate) fn remove_database_files(path: &Path) -> std::io::Result<()> {
1586 let mut removed_any = false;
1587
1588 match fs::remove_file(path) {
1589 Ok(()) => removed_any = true,
1590 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1591 Err(err) => return Err(err),
1592 }
1593
1594 for suffix in ["-wal", "-shm"] {
1596 match fs::remove_file(database_sidecar_path(path, suffix)) {
1597 Ok(()) => removed_any = true,
1598 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1599 Err(err) => return Err(err),
1600 }
1601 }
1602
1603 if removed_any && let Some(parent) = path.parent() {
1604 sync_parent_directory(parent)?;
1605 }
1606
1607 Ok(())
1608}
1609
1610#[cfg(not(windows))]
1611fn sync_parent_directory(path: &Path) -> std::io::Result<()> {
1612 fs::File::open(path)?.sync_all()
1613}
1614
1615#[cfg(windows)]
1616fn sync_parent_directory(_path: &Path) -> std::io::Result<()> {
1617 Ok(())
1618}
1619
1620fn sync_file_if_exists(path: &Path) -> std::io::Result<()> {
1621 if path.exists() {
1622 fs::File::open(path)?.sync_all()?;
1623 }
1624 Ok(())
1625}
1626
1627pub fn cleanup_old_backups(db_path: &Path, keep_count: usize) -> Result<(), std::io::Error> {
1629 let parent = match db_path.parent() {
1630 Some(p) => p,
1631 None => return Ok(()),
1632 };
1633
1634 let db_name = db_path.file_name().and_then(|n| n.to_str()).unwrap_or("db");
1635
1636 let prefix = format!("{}.backup.", db_name);
1637
1638 let mut backups: Vec<(std::path::PathBuf, SystemTime)> = Vec::new();
1640
1641 if let Ok(entries) = fs::read_dir(parent) {
1642 for entry in entries.flatten() {
1643 let path = entry.path();
1644 if let Some(name) = path.file_name().and_then(|n| n.to_str())
1645 && is_backup_root_name(name, &prefix)
1646 && let Ok(meta) = fs::metadata(&path)
1647 && meta.is_file()
1648 && let Ok(mtime) = meta.modified()
1649 {
1650 backups.push((path, mtime));
1651 }
1652 }
1653 }
1654
1655 backups.sort_by_key(|entry| std::cmp::Reverse(entry.1));
1657
1658 for (path, _) in backups.into_iter().skip(keep_count) {
1660 let _ = fs::remove_file(&path);
1661
1662 let _ = fs::remove_file(database_sidecar_path(&path, "-wal"));
1664 let _ = fs::remove_file(database_sidecar_path(&path, "-shm"));
1665 }
1666
1667 Ok(())
1668}
1669
1670#[derive(Debug, Clone)]
1671pub(crate) struct HistoricalDatabaseBundle {
1672 root_path: PathBuf,
1673 total_bytes: u64,
1674 modified_at_ms: i64,
1675 supports_direct_readonly: bool,
1676 probe: HistoricalBundleProbe,
1677}
1678
1679#[derive(Debug, Clone, Copy, Default)]
1680struct HistoricalBundleProbe {
1681 schema_version: Option<i64>,
1682 fts_schema_rows: Option<i64>,
1683 fts_queryable: bool,
1684 max_message_id: i64,
1685}
1686
1687#[cfg(test)]
1688#[allow(dead_code)]
1689#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1690pub(crate) struct SqliteDatabaseHealthProbe {
1691 pub schema_version: Option<i64>,
1692 pub quick_check_ok: bool,
1693 pub fts_schema_rows: i64,
1694 pub fts_queryable: bool,
1695 pub message_count: i64,
1696 pub max_message_id: i64,
1697}
1698
1699#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1700pub(crate) enum FtsConsistencyRepair {
1701 AlreadyHealthy {
1702 rows: usize,
1703 },
1704 IncrementalCatchUp {
1705 inserted_rows: usize,
1706 total_rows: usize,
1707 },
1708 Rebuilt {
1709 inserted_rows: usize,
1710 },
1711}
1712
1713#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
1714pub struct HistoricalSalvageOutcome {
1715 pub bundles_considered: usize,
1716 pub bundles_imported: usize,
1717 pub conversations_imported: usize,
1718 pub messages_imported: usize,
1719}
1720
1721impl HistoricalSalvageOutcome {
1722 pub(crate) fn accumulate(&mut self, other: Self) {
1723 self.bundles_considered += other.bundles_considered;
1724 self.bundles_imported += other.bundles_imported;
1725 self.conversations_imported += other.conversations_imported;
1726 self.messages_imported += other.messages_imported;
1727 }
1728}
1729
1730#[derive(Debug)]
1731struct HistoricalReadConnection {
1732 conn: FrankenConnection,
1733 method: &'static str,
1734 root_path: PathBuf,
1735 _tempdir: Option<tempfile::TempDir>,
1736}
1737
1738const HISTORICAL_RECOVERY_CORE_SCHEMA: &str = r"
1739CREATE TABLE sources (
1740 id TEXT PRIMARY KEY,
1741 kind TEXT,
1742 host_label TEXT,
1743 machine_id TEXT,
1744 platform TEXT,
1745 config_json TEXT,
1746 created_at INTEGER,
1747 updated_at INTEGER
1748);
1749CREATE TABLE agents (
1750 id INTEGER PRIMARY KEY,
1751 slug TEXT,
1752 name TEXT,
1753 version TEXT,
1754 kind TEXT,
1755 created_at INTEGER,
1756 updated_at INTEGER
1757);
1758CREATE TABLE workspaces (
1759 id INTEGER PRIMARY KEY,
1760 path TEXT,
1761 display_name TEXT
1762);
1763CREATE TABLE conversations (
1764 id INTEGER PRIMARY KEY,
1765 agent_id INTEGER,
1766 workspace_id INTEGER,
1767 source_id TEXT,
1768 external_id TEXT,
1769 title TEXT,
1770 source_path TEXT,
1771 started_at INTEGER,
1772 ended_at INTEGER,
1773 approx_tokens INTEGER,
1774 metadata_json TEXT,
1775 origin_host TEXT,
1776 metadata_bin BLOB,
1777 total_input_tokens INTEGER,
1778 total_output_tokens INTEGER,
1779 total_cache_read_tokens INTEGER,
1780 total_cache_creation_tokens INTEGER,
1781 grand_total_tokens INTEGER,
1782 estimated_cost_usd REAL,
1783 primary_model TEXT,
1784 api_call_count INTEGER,
1785 tool_call_count INTEGER,
1786 user_message_count INTEGER,
1787 assistant_message_count INTEGER,
1788 last_message_idx INTEGER,
1789 last_message_created_at INTEGER
1790);
1791CREATE TABLE messages (
1792 id INTEGER PRIMARY KEY,
1793 conversation_id INTEGER,
1794 idx INTEGER,
1795 role TEXT,
1796 author TEXT,
1797 created_at INTEGER,
1798 content TEXT,
1799 extra_json TEXT,
1800 extra_bin BLOB
1801);
1802CREATE TABLE snippets (
1803 id INTEGER PRIMARY KEY,
1804 message_id INTEGER,
1805 file_path TEXT,
1806 start_line INTEGER,
1807 end_line INTEGER,
1808 language TEXT,
1809 snippet_text TEXT
1810);
1811";
1812const HISTORICAL_SALVAGE_LEDGER_VERSION: u32 = 2;
1813const HISTORICAL_SALVAGE_PROGRESS_VERSION: u32 = 1;
1814const SOURCE_PATH_MERGE_START_TOLERANCE_MS: i64 = 5 * 60 * 1000;
1815
1816#[derive(Debug, Clone, Serialize, Deserialize)]
1817struct HistoricalBundleProgress {
1818 progress_version: u32,
1819 path: String,
1820 bytes: u64,
1821 modified_at_ms: i64,
1822 method: String,
1823 last_completed_source_row_id: i64,
1824 conversations_imported: usize,
1825 messages_imported: usize,
1826 updated_at_ms: i64,
1827}
1828
1829#[derive(Debug, Clone)]
1830struct HistoricalBatchEntry {
1831 source_row_id: i64,
1832 agent_id: i64,
1833 workspace_id: Option<i64>,
1834 conversation: Conversation,
1835}
1836
1837#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1838struct HistoricalBatchImportTotals {
1839 inserted_source_rows: usize,
1840 inserted_messages: usize,
1841}
1842
1843fn historical_bundle_root_paths(db_path: &Path) -> Vec<PathBuf> {
1844 let mut roots = Vec::new();
1845 let Some(parent) = db_path.parent() else {
1846 return roots;
1847 };
1848 let db_name = db_path
1849 .file_name()
1850 .and_then(|n| n.to_str())
1851 .unwrap_or("agent_search.db");
1852 let db_stem = db_path
1853 .file_stem()
1854 .and_then(|n| n.to_str())
1855 .unwrap_or("agent_search");
1856
1857 let mut push_root = |path: PathBuf| {
1858 if path == db_path {
1859 return;
1860 }
1861 if !roots.iter().any(|existing| existing == &path) {
1862 roots.push(path);
1863 }
1864 };
1865
1866 if let Ok(entries) = fs::read_dir(parent) {
1867 for entry in entries.flatten() {
1868 let path = entry.path();
1869 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1870 continue;
1871 };
1872 if has_db_sidecar_suffix(name) {
1873 continue;
1874 }
1875 if name.starts_with(&format!("{db_name}.backup."))
1876 || name.starts_with(&format!("{db_stem}.corrupt."))
1877 {
1878 push_root(path);
1879 }
1880 }
1881 }
1882
1883 let backups_dir = parent.join("backups");
1884 if let Ok(entries) = fs::read_dir(backups_dir) {
1885 for entry in entries.flatten() {
1886 let path = entry.path();
1887 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1888 continue;
1889 };
1890 if has_db_sidecar_suffix(name) {
1891 continue;
1892 }
1893 if name.starts_with(&format!("{db_name}.")) && name.ends_with(".bak") {
1894 push_root(path);
1895 }
1896 }
1897 }
1898
1899 push_named_database_children(&mut roots, db_path, &parent.join("repair-lab"), db_name);
1900 push_named_database_children(&mut roots, db_path, &parent.join("snapshots"), db_name);
1901
1902 roots
1903}
1904
1905fn push_named_database_children(
1906 roots: &mut Vec<PathBuf>,
1907 canonical_db_path: &Path,
1908 dir: &Path,
1909 db_name: &str,
1910) {
1911 if let Ok(entries) = fs::read_dir(dir) {
1912 for entry in entries.flatten() {
1913 let candidate = entry.path().join(db_name);
1914 if candidate == canonical_db_path {
1915 continue;
1916 }
1917 if candidate.exists() && !roots.iter().any(|existing| existing == &candidate) {
1918 roots.push(candidate);
1919 }
1920 }
1921 }
1922}
1923
1924fn file_mtime_ms(path: &Path) -> i64 {
1925 fs::metadata(path)
1926 .and_then(|meta| meta.modified())
1927 .ok()
1928 .and_then(|ts| ts.duration_since(UNIX_EPOCH).ok())
1929 .map(|d| d.as_millis() as i64)
1930 .unwrap_or(0)
1931}
1932
1933fn bundle_total_bytes(root_path: &Path) -> u64 {
1934 let mut total = fs::metadata(root_path).map(|meta| meta.len()).unwrap_or(0);
1935 for suffix in ["-wal", "-shm"] {
1936 let sidecar = database_sidecar_path(root_path, suffix);
1937 total = total.saturating_add(fs::metadata(sidecar).map(|meta| meta.len()).unwrap_or(0));
1938 }
1939 total
1940}
1941
1942pub(crate) fn discover_historical_database_bundles(
1943 db_path: &Path,
1944) -> Vec<HistoricalDatabaseBundle> {
1945 let mut bundles: Vec<_> = historical_bundle_root_paths(db_path)
1946 .into_iter()
1947 .filter(|root| root.exists())
1948 .map(|root_path| {
1949 let modified_at_ms = file_mtime_ms(&root_path);
1950 let total_bytes = bundle_total_bytes(&root_path);
1951 let supports_direct_readonly = historical_bundle_supports_direct_readonly(&root_path);
1952 let probe = probe_historical_bundle(&root_path);
1953 HistoricalDatabaseBundle {
1954 modified_at_ms,
1955 total_bytes,
1956 supports_direct_readonly,
1957 root_path,
1958 probe,
1959 }
1960 })
1961 .filter(|bundle| bundle.total_bytes > 0)
1962 .collect();
1963
1964 fn bundle_priority(path: &Path) -> i32 {
1965 let path_str = path.to_string_lossy();
1966 if path_str.contains("/repair-lab/replay-") {
1967 return 5;
1968 }
1969 if path_str.contains("/repair-lab/") {
1970 return 4;
1971 }
1972 if path_str.contains("/snapshots/") {
1973 return 3;
1974 }
1975 if path_str.contains(".corrupt.") || path_str.contains("failed-baseline-seed") {
1976 return 0;
1977 }
1978 1
1979 }
1980
1981 fn bundle_health_rank(bundle: &HistoricalDatabaseBundle) -> i32 {
1982 let fts_clean = match bundle.probe.fts_schema_rows {
2005 Some(1) => bundle.probe.fts_queryable,
2006 Some(0) => bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION),
2007 _ => false,
2008 };
2009
2010 let clean_schema14_fts =
2011 bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION) && fts_clean;
2012 if clean_schema14_fts {
2013 return 5;
2014 }
2015
2016 if fts_clean {
2017 return 4;
2018 }
2019
2020 if bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION)
2021 && bundle.supports_direct_readonly
2022 {
2023 return 3;
2024 }
2025
2026 if bundle.supports_direct_readonly {
2027 return 2;
2028 }
2029
2030 1
2031 }
2032
2033 bundles.sort_by(|left, right| {
2034 bundle_health_rank(right)
2035 .cmp(&bundle_health_rank(left))
2036 .then_with(|| right.probe.max_message_id.cmp(&left.probe.max_message_id))
2037 .then_with(|| bundle_priority(&right.root_path).cmp(&bundle_priority(&left.root_path)))
2038 .then_with(|| {
2039 right
2040 .supports_direct_readonly
2041 .cmp(&left.supports_direct_readonly)
2042 })
2043 .then_with(|| right.total_bytes.cmp(&left.total_bytes))
2044 .then_with(|| right.modified_at_ms.cmp(&left.modified_at_ms))
2045 .then_with(|| right.root_path.cmp(&left.root_path))
2046 });
2047 bundles
2048}
2049
2050fn probe_historical_bundle(root_path: &Path) -> HistoricalBundleProbe {
2051 let Ok(conn) = open_historical_bundle_readonly(root_path) else {
2052 return probe_historical_bundle_via_sqlite3_metadata(root_path).unwrap_or_default();
2053 };
2054
2055 let schema_version = read_meta_schema_version(&conn).ok().flatten();
2056 let fts_schema_rows: Option<i64> = conn
2057 .query_row_map(
2058 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
2059 fparams![],
2060 |row| row.get_typed(0),
2061 )
2062 .ok();
2063 let fts_queryable =
2064 historical_bundle_fts_queryable_via_frankensqlite(root_path, fts_schema_rows);
2065 let max_message_id: i64 = conn
2066 .query_row_map(
2067 "SELECT COALESCE(MAX(id), 0) FROM messages",
2068 fparams![],
2069 |row| row.get_typed(0),
2070 )
2071 .unwrap_or(0);
2072
2073 let probe = HistoricalBundleProbe {
2074 schema_version,
2075 fts_schema_rows,
2076 fts_queryable,
2077 max_message_id,
2078 };
2079
2080 if probe.schema_version.is_none()
2081 && probe.fts_schema_rows.is_none()
2082 && probe.max_message_id == 0
2083 {
2084 return probe_historical_bundle_via_sqlite3_metadata(root_path).unwrap_or(probe);
2085 }
2086
2087 probe
2088}
2089
2090fn probe_historical_bundle_via_sqlite3_metadata(root_path: &Path) -> Option<HistoricalBundleProbe> {
2091 let bundle_uri = format!("file:{}?immutable=1", root_path.to_string_lossy());
2092 let output = Command::new("sqlite3")
2093 .arg("-batch")
2094 .arg("-noheader")
2095 .arg(&bundle_uri)
2096 .arg(
2097 "PRAGMA writable_schema=ON;
2098 SELECT COALESCE((SELECT value FROM meta WHERE key = 'schema_version'), '');
2099 SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages';
2100 SELECT COALESCE(MAX(id), 0) FROM messages;",
2101 )
2102 .output()
2103 .ok()?;
2104 if !output.status.success() {
2105 return None;
2106 }
2107
2108 let stdout = String::from_utf8(output.stdout).ok()?;
2109 let mut lines = stdout.lines();
2110 let schema_version = lines.next().and_then(|raw| raw.trim().parse::<i64>().ok());
2111 let fts_schema_rows = lines.next().and_then(|raw| raw.trim().parse::<i64>().ok());
2112 let max_message_id = lines
2113 .next()
2114 .and_then(|raw| raw.trim().parse::<i64>().ok())
2115 .unwrap_or(0);
2116
2117 Some(HistoricalBundleProbe {
2118 schema_version,
2119 fts_schema_rows,
2120 fts_queryable: false,
2121 max_message_id,
2122 })
2123}
2124
2125fn historical_bundle_fts_queryable_via_frankensqlite(
2126 root_path: &Path,
2127 fts_schema_rows: Option<i64>,
2128) -> bool {
2129 matches!(fts_schema_rows, Some(1))
2130 && FrankenStorage::open_readonly(root_path)
2131 .map(|storage| {
2132 storage
2133 .raw()
2134 .query("SELECT COUNT(*) FROM fts_messages")
2135 .is_ok()
2136 })
2137 .unwrap_or(false)
2138}
2139
2140fn historical_bundle_supports_direct_readonly(root_path: &Path) -> bool {
2141 open_historical_bundle_readonly(root_path)
2142 .and_then(|conn| historical_bundle_has_queryable_core_tables(&conn))
2143 .is_ok()
2144}
2145
2146fn historical_table_exists(conn: &FrankenConnection, table: &str) -> Result<bool> {
2147 let found: Option<i64> = conn
2148 .query_row_map(
2149 "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1 LIMIT 1",
2150 fparams![table],
2151 |row| row.get_typed(0),
2152 )
2153 .optional()
2154 .with_context(|| format!("checking for historical table {table}"))?;
2155 Ok(found.is_some())
2156}
2157
2158fn probe_historical_table_reads(conn: &FrankenConnection, table: &str) -> Result<()> {
2159 if !historical_table_exists(conn, table)? {
2160 return Err(anyhow!(
2161 "historical database missing required table {table}"
2162 ));
2163 }
2164
2165 let sql = format!("SELECT rowid FROM {table} LIMIT 1");
2166 let _: Option<i64> = conn
2167 .query_row_map(&sql, fparams![], |row| row.get_typed(0))
2168 .optional()
2169 .with_context(|| format!("probing rows from historical table {table}"))?;
2170 Ok(())
2171}
2172
2173fn historical_bundle_has_queryable_core_tables(conn: &FrankenConnection) -> Result<()> {
2174 probe_historical_table_reads(conn, "conversations")?;
2175 probe_historical_table_reads(conn, "messages")?;
2176 Ok(())
2177}
2178
2179fn open_historical_bundle_readonly(root_path: &Path) -> Result<FrankenConnection> {
2180 let path_str = root_path.to_string_lossy();
2181 let flags = FrankenOpenFlags::SQLITE_OPEN_READ_ONLY;
2182 let conn = open_franken_with_flags(&path_str, flags)
2183 .with_context(|| format!("opening historical database {}", root_path.display()))?;
2184 Ok(conn)
2185}
2186
2187fn is_recoverable_insert_line(line: &str) -> bool {
2188 [
2189 "sources",
2190 "agents",
2191 "workspaces",
2192 "conversations",
2193 "messages",
2194 "snippets",
2195 ]
2196 .iter()
2197 .any(|table| {
2198 line.starts_with(&format!("INSERT INTO '{table}'"))
2199 || line.starts_with(&format!("INSERT OR IGNORE INTO '{table}'"))
2200 || line.starts_with(&format!("INSERT INTO \"{table}\""))
2201 || line.starts_with(&format!("INSERT OR IGNORE INTO \"{table}\""))
2202 })
2203}
2204
2205fn recover_historical_bundle_via_sqlite3(
2206 bundle: &HistoricalDatabaseBundle,
2207) -> Result<HistoricalReadConnection> {
2208 let tempdir = tempfile::TempDir::new().context("creating temporary salvage directory")?;
2209 let recovered_db = tempdir.path().join("historical-recovered.db");
2210 let temp_conn = FrankenConnection::open(recovered_db.to_string_lossy().as_ref())
2211 .with_context(|| format!("creating recovered database {}", recovered_db.display()))?;
2212 temp_conn
2213 .execute_batch(HISTORICAL_RECOVERY_CORE_SCHEMA)
2214 .with_context(|| format!("initializing recovered schema {}", recovered_db.display()))?;
2215 drop(temp_conn);
2216
2217 let bundle_uri = format!("file:{}?immutable=1", bundle.root_path.to_string_lossy());
2218 let mut recover = Command::new("sqlite3")
2219 .arg(&bundle_uri)
2220 .arg(".recover")
2221 .stdout(Stdio::piped())
2222 .spawn()
2223 .with_context(|| {
2224 format!(
2225 "launching sqlite3 .recover for historical bundle {}",
2226 bundle.root_path.display()
2227 )
2228 })?;
2229 let recover_stdout = recover
2230 .stdout
2231 .take()
2232 .context("capturing sqlite3 .recover stdout")?;
2233
2234 let mut importer = Command::new("sqlite3")
2235 .arg(&recovered_db)
2236 .stdin(Stdio::piped())
2237 .spawn()
2238 .with_context(|| {
2239 format!(
2240 "launching sqlite3 importer for recovered bundle {}",
2241 recovered_db.display()
2242 )
2243 })?;
2244
2245 {
2246 let importer_stdin = importer
2247 .stdin
2248 .as_mut()
2249 .context("opening sqlite3 importer stdin")?;
2250 importer_stdin
2251 .write_all(b"BEGIN;\n")
2252 .context("starting recovery import transaction")?;
2253
2254 let reader = BufReader::new(recover_stdout);
2255 for line in reader.lines() {
2256 let line = line.context("reading sqlite3 .recover output")?;
2257 if is_recoverable_insert_line(&line) {
2258 importer_stdin
2259 .write_all(line.as_bytes())
2260 .context("writing recovered INSERT")?;
2261 importer_stdin
2262 .write_all(b"\n")
2263 .context("writing recovered INSERT newline")?;
2264 }
2265 }
2266
2267 importer_stdin
2268 .write_all(b"COMMIT;\n")
2269 .context("committing recovery import transaction")?;
2270 }
2271
2272 let recover_status = recover
2273 .wait()
2274 .context("waiting for sqlite3 .recover process")?;
2275 if !recover_status.success() {
2276 anyhow::bail!(
2277 "sqlite3 .recover exited with status {} for {}",
2278 recover_status,
2279 bundle.root_path.display()
2280 );
2281 }
2282
2283 let importer_status = importer
2284 .wait()
2285 .context("waiting for sqlite3 recovery importer")?;
2286 if !importer_status.success() {
2287 anyhow::bail!(
2288 "sqlite3 recovery importer exited with status {} for {}",
2289 importer_status,
2290 recovered_db.display()
2291 );
2292 }
2293
2294 let conn = open_historical_bundle_readonly(&recovered_db)?;
2295 historical_bundle_has_queryable_core_tables(&conn)?;
2296 Ok(HistoricalReadConnection {
2297 conn,
2298 method: "sqlite3-recover",
2299 root_path: recovered_db,
2300 _tempdir: Some(tempdir),
2301 })
2302}
2303
2304fn open_historical_bundle_for_salvage(
2305 bundle: &HistoricalDatabaseBundle,
2306) -> Result<HistoricalReadConnection> {
2307 match open_historical_bundle_readonly(&bundle.root_path) {
2308 Ok(conn) => {
2309 if historical_bundle_has_queryable_core_tables(&conn).is_ok() {
2310 return Ok(HistoricalReadConnection {
2311 conn,
2312 method: "direct-readonly",
2313 root_path: bundle.root_path.clone(),
2314 _tempdir: None,
2315 });
2316 }
2317 }
2318 Err(err) => {
2319 tracing::warn!(
2320 path = %bundle.root_path.display(),
2321 error = %err,
2322 "historical bundle direct open failed; falling back to sqlite3 .recover"
2323 );
2324 }
2325 }
2326
2327 recover_historical_bundle_via_sqlite3(bundle)
2328}
2329
2330fn historical_bundle_counts(conn: &FrankenConnection) -> Result<(usize, usize)> {
2331 let conversations: i64 =
2332 conn.query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
2333 row.get_typed(0)
2334 })?;
2335 let messages: i64 = conn.query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
2336 row.get_typed(0)
2337 })?;
2338 Ok((
2339 usize::try_from(conversations.max(0)).unwrap_or(usize::MAX),
2340 usize::try_from(messages.max(0)).unwrap_or(usize::MAX),
2341 ))
2342}
2343
2344fn clear_seeded_runtime_meta(conn: &FrankenConnection) -> Result<()> {
2345 conn.execute(
2346 "DELETE FROM meta
2347 WHERE key LIKE 'historical_bundle_salvaged:%'
2348 OR key IN ('last_scan_ts', 'last_indexed_at', 'last_embedded_message_id')",
2349 )?;
2350 Ok(())
2351}
2352
2353fn record_historical_bundle_import(
2354 conn: &FrankenConnection,
2355 bundle: &HistoricalDatabaseBundle,
2356 method: &str,
2357 conversations_imported: usize,
2358 messages_imported: usize,
2359) -> Result<()> {
2360 let key = FrankenStorage::historical_bundle_meta_key(bundle);
2361 let value = serde_json::json!({
2362 "salvage_version": HISTORICAL_SALVAGE_LEDGER_VERSION,
2363 "path": bundle.root_path.display().to_string(),
2364 "bytes": bundle.total_bytes,
2365 "modified_at_ms": bundle.modified_at_ms,
2366 "method": method,
2367 "conversations_imported": conversations_imported,
2368 "messages_imported": messages_imported,
2369 "recorded_at_ms": FrankenStorage::now_millis(),
2370 });
2371 let value_str = serde_json::to_string(&value)?;
2372 conn.execute_compat(
2373 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
2374 fparams![key, value_str],
2375 )?;
2376 Ok(())
2377}
2378
2379fn finalize_seeded_canonical_bundle_via_rusqlite(
2380 canonical_db_path: &Path,
2381 bundle: &HistoricalDatabaseBundle,
2382 conversations_imported: usize,
2383 messages_imported: usize,
2384) -> Result<()> {
2385 let _fts_repair =
2386 ensure_fts_consistency_via_rusqlite(canonical_db_path).with_context(|| {
2387 format!(
2388 "repairing staged canonical FTS consistency before finalization: {}",
2389 canonical_db_path.display()
2390 )
2391 })?;
2392
2393 let path_str = canonical_db_path.to_string_lossy();
2394 let conn = FrankenConnection::open(path_str.as_ref()).with_context(|| {
2395 format!(
2396 "opening seeded canonical database for post-seed finalization: {}",
2397 canonical_db_path.display()
2398 )
2399 })?;
2400 conn.execute("PRAGMA busy_timeout = 30000;")
2401 .with_context(|| {
2402 format!(
2403 "configuring busy timeout for seeded canonical database {}",
2404 canonical_db_path.display()
2405 )
2406 })?;
2407 let schema_version = read_meta_schema_version(&conn)?;
2408
2409 if let Some(version) = schema_version
2410 && version < CURRENT_SCHEMA_VERSION
2411 && version != 13
2412 {
2413 anyhow::bail!(
2414 "seeded canonical bundle schema_version {version} is too old for baseline import and cannot be finalized automatically"
2415 );
2416 }
2417
2418 clear_seeded_runtime_meta(&conn)?;
2419
2420 conn.execute_compat(
2421 "INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', ?1)",
2422 fparams![CURRENT_SCHEMA_VERSION.to_string()],
2423 )?;
2424
2425 conn.execute_compat(
2426 "INSERT OR IGNORE INTO _schema_migrations(version, name) VALUES(?1, 'fts_contentless')",
2427 fparams![CURRENT_SCHEMA_VERSION],
2428 )?;
2429 record_historical_bundle_import(
2430 &conn,
2431 bundle,
2432 "baseline-bulk-sql-copy",
2433 conversations_imported,
2434 messages_imported,
2435 )?;
2436 Ok(())
2437}
2438
2439fn read_meta_schema_version(conn: &FrankenConnection) -> Result<Option<i64>> {
2440 let version: Option<String> = conn
2441 .query_row_map(
2442 "SELECT value FROM meta WHERE key = 'schema_version'",
2443 fparams![],
2444 |row| row.get_typed(0),
2445 )
2446 .optional()?;
2447 Ok(version.and_then(|raw| raw.parse::<i64>().ok()))
2448}
2449
2450#[cfg(test)]
2451fn franken_fts_schema_rows(conn: &FrankenConnection) -> Result<i64> {
2452 conn.query_row_map(
2453 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
2454 fparams![],
2455 |row| row.get_typed(0),
2456 )
2457 .context("counting sqlite_master rows for fts_messages via frankensqlite")
2458}
2459
2460#[cfg(test)]
2461fn franken_fts_limit_probe(conn: &FrankenConnection) -> bool {
2462 conn.query("SELECT COUNT(*) FROM fts_messages").is_ok()
2463}
2464
2465#[cfg(test)]
2466#[allow(dead_code)]
2467pub(crate) fn probe_database_health_via_frankensqlite(
2468 db_path: &Path,
2469) -> Result<SqliteDatabaseHealthProbe> {
2470 let path_str = db_path.to_string_lossy();
2471 let conn = FrankenConnection::open(path_str.as_ref()).with_context(|| {
2472 format!(
2473 "opening frankensqlite db at {} for database health probe",
2474 db_path.display()
2475 )
2476 })?;
2477 conn.execute_batch("PRAGMA busy_timeout = 30000;")
2478 .with_context(|| {
2479 format!(
2480 "configuring busy timeout for database health probe at {}",
2481 db_path.display()
2482 )
2483 })?;
2484
2485 let schema_version = read_meta_schema_version(&conn)?;
2486 let quick_check_status: String = conn
2487 .query_row_map("PRAGMA quick_check(1)", fparams![], |row| row.get_typed(0))
2488 .with_context(|| format!("running PRAGMA quick_check(1) for {}", db_path.display()))?;
2489 let quick_check_ok = quick_check_status.trim().eq_ignore_ascii_case("ok");
2490 let fts_schema_rows = franken_fts_schema_rows(&conn)?;
2491 let fts_queryable = fts_schema_rows == 1 && franken_fts_limit_probe(&conn);
2492
2493 if !quick_check_ok {
2494 return Ok(SqliteDatabaseHealthProbe {
2495 schema_version,
2496 quick_check_ok,
2497 fts_schema_rows,
2498 fts_queryable,
2499 message_count: 0,
2500 max_message_id: 0,
2501 });
2502 }
2503
2504 let message_count: i64 = conn
2505 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
2506 row.get_typed(0)
2507 })
2508 .context("counting messages during frankensqlite database health probe")?;
2509 let max_message_id: i64 = conn
2510 .query_row_map(
2511 "SELECT COALESCE(MAX(id), 0) FROM messages",
2512 fparams![],
2513 |row| row.get_typed(0),
2514 )
2515 .context("reading max message id during frankensqlite database health probe")?;
2516
2517 Ok(SqliteDatabaseHealthProbe {
2518 schema_version,
2519 quick_check_ok,
2520 fts_schema_rows,
2521 fts_queryable,
2522 message_count,
2523 max_message_id,
2524 })
2525}
2526
2527struct StagedHistoricalSeed {
2528 tempdir: tempfile::TempDir,
2529 db_path: PathBuf,
2530}
2531
2532fn stage_historical_bundle_for_seed(
2533 canonical_db_path: &Path,
2534 source_root_path: &Path,
2535) -> Result<StagedHistoricalSeed> {
2536 let canonical_parent = canonical_db_path.parent().unwrap_or_else(|| Path::new("."));
2537 fs::create_dir_all(canonical_parent).with_context(|| {
2538 format!(
2539 "creating canonical database directory before bulk historical seed import: {}",
2540 canonical_parent.display()
2541 )
2542 })?;
2543 let tempdir = tempfile::TempDir::new_in(canonical_parent)
2544 .context("creating temporary baseline seed directory")?;
2545 let staged_seed_db = tempdir.path().join("baseline-seed-output.db");
2546 copy_database_bundle(source_root_path, &staged_seed_db)?;
2547
2548 Ok(StagedHistoricalSeed {
2549 tempdir,
2550 db_path: staged_seed_db,
2551 })
2552}
2553
2554fn promote_staged_historical_seed(
2555 canonical_db_path: &Path,
2556 staged_seed: &StagedHistoricalSeed,
2557) -> Result<()> {
2558 let canonical_backup = staged_seed
2559 .tempdir
2560 .path()
2561 .join("pre-seed-canonical-backup.db");
2562 let had_canonical = canonical_db_path.exists()
2563 || database_sidecar_path(canonical_db_path, "-wal").exists()
2564 || database_sidecar_path(canonical_db_path, "-shm").exists();
2565
2566 if had_canonical {
2567 move_database_bundle(canonical_db_path, &canonical_backup).with_context(|| {
2568 format!(
2569 "backing up canonical database before promoting staged historical seed import: {}",
2570 canonical_db_path.display()
2571 )
2572 })?;
2573 }
2574
2575 if let Err(err) =
2576 move_database_bundle(&staged_seed.db_path, canonical_db_path).with_context(|| {
2577 format!(
2578 "promoting staged historical seed database bundle {} into canonical path {}",
2579 staged_seed.db_path.display(),
2580 canonical_db_path.display()
2581 )
2582 })
2583 {
2584 if had_canonical {
2585 let _ = move_database_bundle(&canonical_backup, canonical_db_path);
2586 }
2587 return Err(err);
2588 }
2589
2590 Ok(())
2591}
2592
2593pub(crate) fn seed_canonical_from_best_historical_bundle(
2594 canonical_db_path: &Path,
2595) -> Result<Option<HistoricalSalvageOutcome>> {
2596 let ordered_bundles = discover_historical_database_bundles(canonical_db_path);
2597 let mut last_seed_error: Option<anyhow::Error> = None;
2598 for bundle in ordered_bundles {
2599 if let Some(version) = bundle.probe.schema_version
2600 && version < 13
2601 {
2602 let err = anyhow!(
2603 "historical bundle {} schema_version {version} is too old for baseline import",
2604 bundle.root_path.display()
2605 );
2606 tracing::warn!(
2607 path = %bundle.root_path.display(),
2608 schema_version = version,
2609 "historical bundle is too old for baseline seed import"
2610 );
2611 last_seed_error = Some(err);
2612 continue;
2613 }
2614
2615 let source = open_historical_bundle_for_salvage(&bundle).with_context(|| {
2616 format!(
2617 "opening historical seed bundle {} for baseline import",
2618 bundle.root_path.display()
2619 )
2620 })?;
2621 let (conversations_imported, messages_imported) = historical_bundle_counts(&source.conn)?;
2622
2623 let staged_seed = match stage_historical_bundle_for_seed(
2624 canonical_db_path,
2625 &source.root_path,
2626 ) {
2627 Ok(staged_seed) => staged_seed,
2628 Err(err) => {
2629 tracing::warn!(
2630 path = %bundle.root_path.display(),
2631 error = %err,
2632 "bulk baseline seed staging from historical bundle failed; trying next candidate"
2633 );
2634 last_seed_error = Some(err);
2635 continue;
2636 }
2637 };
2638
2639 if let Err(err) = finalize_seeded_canonical_bundle_via_rusqlite(
2640 &staged_seed.db_path,
2641 &bundle,
2642 conversations_imported,
2643 messages_imported,
2644 ) {
2645 tracing::warn!(
2646 path = %bundle.root_path.display(),
2647 error = %err,
2648 "finalizing staged historical seed import failed; trying next candidate"
2649 );
2650 last_seed_error = Some(err);
2651 continue;
2652 }
2653
2654 if let Err(err) = promote_staged_historical_seed(canonical_db_path, &staged_seed) {
2655 tracing::warn!(
2656 path = %bundle.root_path.display(),
2657 error = %err,
2658 "promoting staged historical seed import failed; trying next candidate"
2659 );
2660 last_seed_error = Some(err);
2661 continue;
2662 }
2663
2664 tracing::info!(
2665 path = %bundle.root_path.display(),
2666 conversations_imported,
2667 messages_imported,
2668 "seeded empty canonical database from largest healthy historical bundle"
2669 );
2670
2671 return Ok(Some(HistoricalSalvageOutcome {
2672 bundles_considered: 0,
2673 bundles_imported: 1,
2674 conversations_imported,
2675 messages_imported,
2676 }));
2677 }
2678 if let Some(err) = last_seed_error {
2679 return Err(err);
2680 }
2681 Ok(None)
2682}
2683
2684fn parse_json_column(value: Option<String>) -> serde_json::Value {
2685 value
2686 .and_then(|raw| serde_json::from_str(&raw).ok())
2687 .unwrap_or(serde_json::Value::Null)
2688}
2689
2690const HISTORICAL_RAW_JSON_SENTINEL_KEY: &str = "__cass_historical_raw_json__";
2691
2692fn wrap_historical_raw_json(raw: String) -> serde_json::Value {
2693 serde_json::json!({ HISTORICAL_RAW_JSON_SENTINEL_KEY: raw })
2694}
2695
2696fn historical_raw_json(value: &serde_json::Value) -> Option<&str> {
2697 match value {
2698 serde_json::Value::Object(map) if map.len() == 1 => map
2699 .get(HISTORICAL_RAW_JSON_SENTINEL_KEY)
2700 .and_then(serde_json::Value::as_str),
2701 _ => None,
2702 }
2703}
2704
2705fn parse_historical_json_column(value: Option<String>) -> serde_json::Value {
2706 match value {
2707 Some(raw) if raw.trim().is_empty() => serde_json::Value::Null,
2708 Some(raw) => wrap_historical_raw_json(raw),
2709 None => serde_json::Value::Null,
2710 }
2711}
2712
2713fn historical_salvage_debug_enabled() -> bool {
2714 std::env::var_os("CASS_DEBUG_HISTORICAL_SALVAGE").is_some()
2715}
2716
2717#[derive(Debug, Clone, Copy)]
2718struct HistoricalImportBatchLimits {
2719 conversations: usize,
2720 messages: usize,
2721 payload_chars: usize,
2722}
2723
2724fn env_positive_usize(key: &str) -> Option<usize> {
2725 dotenvy::var(key)
2726 .ok()
2727 .and_then(|value| value.parse::<usize>().ok())
2728 .filter(|value| *value > 0)
2729}
2730
2731fn historical_import_batch_limits() -> HistoricalImportBatchLimits {
2732 let cpu_count = std::thread::available_parallelism()
2733 .map(std::num::NonZeroUsize::get)
2734 .unwrap_or(1);
2735
2736 let default_limits = if cpu_count >= 32 {
2737 HistoricalImportBatchLimits {
2738 conversations: 128,
2739 messages: 16_384,
2740 payload_chars: 12_000_000,
2741 }
2742 } else {
2743 HistoricalImportBatchLimits {
2744 conversations: 32,
2745 messages: 4_096,
2746 payload_chars: 3_000_000,
2747 }
2748 };
2749
2750 HistoricalImportBatchLimits {
2751 conversations: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_CONVERSATIONS")
2752 .unwrap_or(default_limits.conversations),
2753 messages: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_MESSAGES")
2754 .unwrap_or(default_limits.messages),
2755 payload_chars: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_CHARS")
2756 .unwrap_or(default_limits.payload_chars),
2757 }
2758}
2759
2760fn json_value_size_hint(value: &serde_json::Value) -> usize {
2761 if let Some(raw) = historical_raw_json(value) {
2762 return raw.len();
2763 }
2764 match value {
2765 serde_json::Value::Null => 0,
2766 other => serde_json::to_string(other)
2767 .map(|raw| raw.len())
2768 .unwrap_or(0),
2769 }
2770}
2771
2772fn message_payload_size_hint(message: &Message) -> usize {
2773 message
2774 .content
2775 .len()
2776 .saturating_add(json_value_size_hint(&message.extra_json))
2777}
2778
2779fn is_backup_root_name(name: &str, prefix: &str) -> bool {
2780 name.starts_with(prefix) && !name.ends_with("-wal") && !name.ends_with("-shm")
2781}
2782
2783fn has_db_sidecar_suffix(name: &str) -> bool {
2790 const SIDECAR_SUFFIXES: &[&str] = &[
2791 "-wal",
2792 "-shm",
2793 "-lock-shared",
2794 "-lock-reserved",
2795 "-lock-pending",
2796 ];
2797 SIDECAR_SUFFIXES.iter().any(|suffix| name.ends_with(suffix))
2798}
2799
2800pub const CURRENT_SCHEMA_VERSION: i64 = 20;
2802const MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION: i64 = 13;
2803
2804#[derive(Debug, Clone)]
2806pub enum SchemaCheck {
2807 Compatible,
2809 NeedsMigration,
2811 NeedsRebuild(String),
2813}
2814
2815fn schema_check_error_requires_rebuild(err: &frankensqlite::FrankenError) -> bool {
2816 matches!(
2820 err,
2821 frankensqlite::FrankenError::DatabaseCorrupt { .. }
2822 | frankensqlite::FrankenError::WalCorrupt { .. }
2823 | frankensqlite::FrankenError::NotADatabase { .. }
2824 | frankensqlite::FrankenError::ShortRead { .. }
2825 )
2826}
2827
2828fn unique_backup_path(path: &Path) -> PathBuf {
2829 static NEXT_NONCE: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
2830
2831 let timestamp = SystemTime::now()
2832 .duration_since(UNIX_EPOCH)
2833 .map(|d| d.as_nanos())
2834 .unwrap_or(0);
2835 let nonce = NEXT_NONCE.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2836 let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("db");
2837
2838 path.with_file_name(format!(
2839 "{file_name}.backup.{}.{}.{}",
2840 std::process::id(),
2841 timestamp,
2842 nonce
2843 ))
2844}
2845
2846fn vacuum_stage_backup_path(backup_path: &Path) -> PathBuf {
2847 let file_name = backup_path
2848 .file_name()
2849 .and_then(|name| name.to_str())
2850 .unwrap_or("db.backup");
2851 backup_path.with_file_name(format!(".{file_name}.vacuum-in-progress"))
2852}
2853
2854fn check_schema_compatibility(
2858 path: &Path,
2859) -> std::result::Result<SchemaCheck, frankensqlite::FrankenError> {
2860 let mut conn = open_franken_with_flags(
2861 &path.to_string_lossy(),
2862 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
2863 )?;
2864
2865 let result = (|| {
2866 let meta_exists: i32 = conn.query_row_map(
2868 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='meta'",
2869 fparams![],
2870 |row| row.get_typed(0),
2871 )?;
2872
2873 if meta_exists == 0 {
2874 let table_count: i32 = conn.query_row_map(
2877 "SELECT COUNT(*) FROM sqlite_master WHERE type='table'",
2878 fparams![],
2879 |row| row.get_typed(0),
2880 )?;
2881
2882 if table_count == 0 {
2883 return Ok(SchemaCheck::NeedsMigration);
2885 }
2886
2887 return Ok(SchemaCheck::NeedsRebuild(
2889 "Database missing schema version metadata".to_string(),
2890 ));
2891 }
2892
2893 let version: Option<i64> = conn
2895 .query_row_map(
2896 "SELECT value FROM meta WHERE key = 'schema_version'",
2897 fparams![],
2898 |row| Ok(row.get_typed::<String>(0)?.parse().ok()),
2899 )
2900 .ok()
2901 .flatten();
2902
2903 match version {
2904 Some(v) if v == SCHEMA_VERSION => Ok(SchemaCheck::Compatible),
2905 Some(v) if (MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION..SCHEMA_VERSION).contains(&v) => {
2906 Ok(SchemaCheck::NeedsMigration)
2907 }
2908 Some(v) if v > 0 && v < MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION => {
2909 Ok(SchemaCheck::NeedsRebuild(format!(
2910 "Schema version {} is too old for in-place migration; supported upgrade path starts at version {}",
2911 v, MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION
2912 )))
2913 }
2914 Some(v) => {
2915 Ok(SchemaCheck::NeedsRebuild(format!(
2917 "Schema version {} is newer than supported version {}",
2918 v, SCHEMA_VERSION
2919 )))
2920 }
2921 None => Ok(SchemaCheck::NeedsRebuild(
2922 "Schema version not found or invalid".to_string(),
2923 )),
2924 }
2925 })();
2926
2927 if let Err(close_err) = conn.close_in_place() {
2928 tracing::warn!(
2929 error = %close_err,
2930 db_path = %path.display(),
2931 "check_schema_compatibility: close_in_place failed; falling back to best-effort close"
2932 );
2933 conn.close_best_effort_in_place();
2934 }
2935
2936 result
2937}
2938
2939const SCHEMA_VERSION: i64 = CURRENT_SCHEMA_VERSION;
2940
2941#[cfg(test)]
2942const MIGRATION_V1: &str = r"
2943PRAGMA foreign_keys = ON;
2944
2945CREATE TABLE IF NOT EXISTS meta (
2946 key TEXT PRIMARY KEY,
2947 value TEXT NOT NULL
2948);
2949
2950CREATE TABLE IF NOT EXISTS agents (
2951 id INTEGER PRIMARY KEY,
2952 slug TEXT NOT NULL UNIQUE,
2953 name TEXT NOT NULL,
2954 version TEXT,
2955 kind TEXT NOT NULL,
2956 created_at INTEGER NOT NULL,
2957 updated_at INTEGER NOT NULL
2958);
2959
2960CREATE TABLE IF NOT EXISTS workspaces (
2961 id INTEGER PRIMARY KEY,
2962 path TEXT NOT NULL UNIQUE,
2963 display_name TEXT
2964);
2965
2966CREATE TABLE IF NOT EXISTS conversations (
2967 id INTEGER PRIMARY KEY,
2968 agent_id INTEGER NOT NULL REFERENCES agents(id),
2969 workspace_id INTEGER REFERENCES workspaces(id),
2970 external_id TEXT,
2971 title TEXT,
2972 source_path TEXT NOT NULL,
2973 started_at INTEGER,
2974 ended_at INTEGER,
2975 approx_tokens INTEGER,
2976 metadata_json TEXT,
2977 UNIQUE(agent_id, external_id)
2978);
2979
2980CREATE TABLE IF NOT EXISTS messages (
2981 id INTEGER PRIMARY KEY,
2982 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
2983 idx INTEGER NOT NULL,
2984 role TEXT NOT NULL,
2985 author TEXT,
2986 created_at INTEGER,
2987 content TEXT NOT NULL,
2988 extra_json TEXT,
2989 UNIQUE(conversation_id, idx)
2990);
2991
2992CREATE TABLE IF NOT EXISTS snippets (
2993 id INTEGER PRIMARY KEY,
2994 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
2995 file_path TEXT,
2996 start_line INTEGER,
2997 end_line INTEGER,
2998 language TEXT,
2999 snippet_text TEXT
3000);
3001
3002CREATE TABLE IF NOT EXISTS tags (
3003 id INTEGER PRIMARY KEY,
3004 name TEXT NOT NULL UNIQUE
3005);
3006
3007CREATE TABLE IF NOT EXISTS conversation_tags (
3008 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
3009 tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
3010 PRIMARY KEY (conversation_id, tag_id)
3011);
3012
3013CREATE INDEX IF NOT EXISTS idx_conversations_agent_started
3014 ON conversations(agent_id, started_at DESC);
3015
3016CREATE INDEX IF NOT EXISTS idx_messages_conv_idx
3017 ON messages(conversation_id, idx);
3018
3019";
3020
3021#[cfg(test)]
3022const MIGRATION_V2: &str = r"
3023CREATE VIRTUAL TABLE IF NOT EXISTS fts_messages USING fts5(
3024 content,
3025 title,
3026 agent,
3027 workspace,
3028 source_path,
3029 created_at UNINDEXED,
3030 message_id UNINDEXED,
3031 tokenize='porter'
3032);
3033INSERT INTO fts_messages(content, title, agent, workspace, source_path, created_at, message_id)
3034SELECT
3035 m.content,
3036 c.title,
3037 a.slug,
3038 w.path,
3039 c.source_path,
3040 m.created_at,
3041 m.id
3042FROM messages m
3043JOIN conversations c ON m.conversation_id = c.id
3044JOIN agents a ON c.agent_id = a.id
3045LEFT JOIN workspaces w ON c.workspace_id = w.id;
3046";
3047
3048#[cfg(test)]
3049#[allow(dead_code)]
3050const MIGRATION_V3: &str = r"
3051DROP TABLE IF EXISTS fts_messages;
3052CREATE VIRTUAL TABLE fts_messages USING fts5(
3053 content,
3054 title,
3055 agent,
3056 workspace,
3057 source_path,
3058 created_at UNINDEXED,
3059 message_id UNINDEXED,
3060 tokenize='porter'
3061);
3062INSERT INTO fts_messages(content, title, agent, workspace, source_path, created_at, message_id)
3063SELECT
3064 m.content,
3065 c.title,
3066 a.slug,
3067 w.path,
3068 c.source_path,
3069 m.created_at,
3070 m.id
3071FROM messages m
3072JOIN conversations c ON m.conversation_id = c.id
3073JOIN agents a ON c.agent_id = a.id
3074LEFT JOIN workspaces w ON c.workspace_id = w.id;
3075";
3076
3077#[cfg(test)]
3078const MIGRATION_V4: &str = r"
3079-- Sources table for tracking where conversations come from
3080CREATE TABLE IF NOT EXISTS sources (
3081 id TEXT PRIMARY KEY, -- source_id (e.g., 'local', 'work-laptop')
3082 kind TEXT NOT NULL, -- 'local', 'ssh', etc.
3083 host_label TEXT, -- display label
3084 machine_id TEXT, -- optional stable machine id
3085 platform TEXT, -- 'macos', 'linux', 'windows'
3086 config_json TEXT, -- JSON blob for extra config (SSH params, path rewrites)
3087 created_at INTEGER NOT NULL,
3088 updated_at INTEGER NOT NULL
3089);
3090
3091-- Bootstrap: Insert the default 'local' source
3092INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
3093VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
3094";
3095
3096#[cfg(test)]
3097const MIGRATION_V5: &str = r"
3098-- Add provenance columns to conversations table
3099-- SQLite cannot alter unique constraints, so we need to recreate the table
3100
3101-- Create new table with provenance columns and updated unique constraint
3102CREATE TABLE conversations_new (
3103 id INTEGER PRIMARY KEY,
3104 agent_id INTEGER NOT NULL REFERENCES agents(id),
3105 workspace_id INTEGER REFERENCES workspaces(id),
3106 source_id TEXT NOT NULL DEFAULT 'local' REFERENCES sources(id),
3107 external_id TEXT,
3108 title TEXT,
3109 source_path TEXT NOT NULL,
3110 started_at INTEGER,
3111 ended_at INTEGER,
3112 approx_tokens INTEGER,
3113 metadata_json TEXT,
3114 origin_host TEXT,
3115 UNIQUE(source_id, agent_id, external_id)
3116);
3117
3118-- Copy data from old table (all existing conversations get source_id='local')
3119INSERT INTO conversations_new (id, agent_id, workspace_id, source_id, external_id, title,
3120 source_path, started_at, ended_at, approx_tokens, metadata_json, origin_host)
3121SELECT id, agent_id, workspace_id, 'local', external_id, title,
3122 source_path, started_at, ended_at, approx_tokens, metadata_json, NULL
3123FROM conversations;
3124
3125-- Drop old table and rename new
3126DROP TABLE conversations;
3127ALTER TABLE conversations_new RENAME TO conversations;
3128
3129-- Recreate indexes
3130CREATE INDEX IF NOT EXISTS idx_conversations_agent_started ON conversations(agent_id, started_at DESC);
3131CREATE INDEX IF NOT EXISTS idx_conversations_source_id ON conversations(source_id);
3132";
3133
3134#[cfg(test)]
3135const MIGRATION_V6: &str = r"
3136-- Optimize lookup by source_path (used by TUI detail view)
3137CREATE INDEX IF NOT EXISTS idx_conversations_source_path ON conversations(source_path);
3138";
3139
3140#[cfg(test)]
3141const MIGRATION_V7: &str = r"
3142-- Add binary columns for MessagePack serialization (Opt 3.1)
3143-- Binary format is 50-70% smaller than JSON and faster to parse
3144ALTER TABLE conversations ADD COLUMN metadata_bin BLOB;
3145ALTER TABLE messages ADD COLUMN extra_bin BLOB;
3146";
3147
3148#[cfg(test)]
3149const MIGRATION_V8: &str = r"
3150-- Opt 3.2: Daily stats materialized table for O(1) time-range histograms
3151-- Provides fast aggregated queries for stats/dashboard without full table scans
3152
3153CREATE TABLE IF NOT EXISTS daily_stats (
3154 day_id INTEGER NOT NULL, -- Days since 2020-01-01 (Unix epoch + offset)
3155 agent_slug TEXT NOT NULL, -- 'all' for totals, or specific agent slug
3156 source_id TEXT NOT NULL DEFAULT 'all', -- 'all' for totals, or specific source
3157 session_count INTEGER NOT NULL DEFAULT 0,
3158 message_count INTEGER NOT NULL DEFAULT 0,
3159 total_chars INTEGER NOT NULL DEFAULT 0,
3160 last_updated INTEGER NOT NULL,
3161 PRIMARY KEY (day_id, agent_slug, source_id)
3162);
3163
3164CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
3165CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
3166";
3167
3168#[cfg(test)]
3169const MIGRATION_V9: &str = r"
3170-- Background embedding jobs tracking table
3171CREATE TABLE IF NOT EXISTS embedding_jobs (
3172 id INTEGER PRIMARY KEY AUTOINCREMENT,
3173 db_path TEXT NOT NULL,
3174 model_id TEXT NOT NULL,
3175 status TEXT NOT NULL DEFAULT 'pending',
3176 total_docs INTEGER NOT NULL DEFAULT 0,
3177 completed_docs INTEGER NOT NULL DEFAULT 0,
3178 error_message TEXT,
3179 created_at TEXT NOT NULL DEFAULT (datetime('now')),
3180 started_at TEXT,
3181 completed_at TEXT
3182);
3183
3184-- Only one pending or running job per (db_path, model_id) at a time.
3185-- Multiple completed/failed/cancelled jobs are allowed for history.
3186CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
3187ON embedding_jobs(db_path, model_id)
3188WHERE status IN ('pending', 'running');
3189";
3190
3191#[cfg(test)]
3192const MIGRATION_V10: &str = r"
3193-- Token analytics: per-message token usage ledger
3194CREATE TABLE IF NOT EXISTS token_usage (
3195 id INTEGER PRIMARY KEY AUTOINCREMENT,
3196 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
3197 conversation_id INTEGER NOT NULL,
3198 agent_id INTEGER NOT NULL,
3199 workspace_id INTEGER,
3200 source_id TEXT NOT NULL DEFAULT 'local',
3201
3202 -- Timing
3203 timestamp_ms INTEGER NOT NULL,
3204 day_id INTEGER NOT NULL,
3205
3206 -- Model identification
3207 model_name TEXT,
3208 model_family TEXT,
3209 model_tier TEXT,
3210 service_tier TEXT,
3211 provider TEXT,
3212
3213 -- Token counts (nullable — not all agents provide all fields)
3214 input_tokens INTEGER,
3215 output_tokens INTEGER,
3216 cache_read_tokens INTEGER,
3217 cache_creation_tokens INTEGER,
3218 thinking_tokens INTEGER,
3219 total_tokens INTEGER,
3220
3221 -- Cost estimation
3222 estimated_cost_usd REAL,
3223
3224 -- Message context
3225 role TEXT NOT NULL,
3226 content_chars INTEGER NOT NULL,
3227 has_tool_calls INTEGER NOT NULL DEFAULT 0,
3228 tool_call_count INTEGER NOT NULL DEFAULT 0,
3229
3230 -- Data quality
3231 data_source TEXT NOT NULL DEFAULT 'api',
3232
3233 UNIQUE(message_id)
3234);
3235
3236CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
3237CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
3238CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
3239CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
3240CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
3241
3242-- Token analytics: pre-aggregated daily rollups
3243CREATE TABLE IF NOT EXISTS token_daily_stats (
3244 day_id INTEGER NOT NULL,
3245 agent_slug TEXT NOT NULL,
3246 source_id TEXT NOT NULL DEFAULT 'all',
3247 model_family TEXT NOT NULL DEFAULT 'all',
3248
3249 api_call_count INTEGER NOT NULL DEFAULT 0,
3250 user_message_count INTEGER NOT NULL DEFAULT 0,
3251 assistant_message_count INTEGER NOT NULL DEFAULT 0,
3252 tool_message_count INTEGER NOT NULL DEFAULT 0,
3253
3254 total_input_tokens INTEGER NOT NULL DEFAULT 0,
3255 total_output_tokens INTEGER NOT NULL DEFAULT 0,
3256 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
3257 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
3258 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
3259 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
3260
3261 total_content_chars INTEGER NOT NULL DEFAULT 0,
3262 total_tool_calls INTEGER NOT NULL DEFAULT 0,
3263
3264 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
3265
3266 session_count INTEGER NOT NULL DEFAULT 0,
3267
3268 last_updated INTEGER NOT NULL,
3269
3270 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
3271);
3272
3273CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
3274CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
3275
3276-- Model pricing lookup table
3277CREATE TABLE IF NOT EXISTS model_pricing (
3278 model_pattern TEXT NOT NULL,
3279 provider TEXT NOT NULL,
3280 input_cost_per_mtok REAL NOT NULL,
3281 output_cost_per_mtok REAL NOT NULL,
3282 cache_read_cost_per_mtok REAL,
3283 cache_creation_cost_per_mtok REAL,
3284 effective_date TEXT NOT NULL,
3285 PRIMARY KEY (model_pattern, effective_date)
3286);
3287
3288-- Seed with current pricing (as of 2026-02)
3289INSERT OR IGNORE INTO model_pricing VALUES
3290 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
3291 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
3292 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
3293 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
3294 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
3295 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
3296 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
3297 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
3298 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
3299 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
3300
3301-- Extend conversations table with token summary columns
3302ALTER TABLE conversations ADD COLUMN total_input_tokens INTEGER;
3303ALTER TABLE conversations ADD COLUMN total_output_tokens INTEGER;
3304ALTER TABLE conversations ADD COLUMN total_cache_read_tokens INTEGER;
3305ALTER TABLE conversations ADD COLUMN total_cache_creation_tokens INTEGER;
3306ALTER TABLE conversations ADD COLUMN grand_total_tokens INTEGER;
3307ALTER TABLE conversations ADD COLUMN estimated_cost_usd REAL;
3308ALTER TABLE conversations ADD COLUMN primary_model TEXT;
3309ALTER TABLE conversations ADD COLUMN api_call_count INTEGER;
3310ALTER TABLE conversations ADD COLUMN tool_call_count INTEGER;
3311ALTER TABLE conversations ADD COLUMN user_message_count INTEGER;
3312ALTER TABLE conversations ADD COLUMN assistant_message_count INTEGER;
3313";
3314
3315const MIGRATION_V14: &str = r"
3316-- Switch FTS5 from internal-content to contentless mode (CASS #163).
3317-- Drop the old V13 internal-content fts_messages first so that
3318-- sqlite_schema does not contain two conflicting CREATE VIRTUAL TABLE
3319-- entries, which makes the database completely unreadable.
3320-- The current contentless table is recreated lazily after open() only when the
3321-- frankensqlite FTS consistency check finds it missing or malformed.
3322DROP TABLE IF EXISTS fts_messages;
3323";
3324
3325const MIGRATION_V15_TAIL_STATE_TABLE: &str = r"
3326CREATE TABLE IF NOT EXISTS conversation_tail_state (
3327 -- Deliberately no FOREIGN KEY: this hot row is maintained by insert/append
3328 -- paths, and FK metadata keeps frankensqlite off the direct rowid update path.
3329 conversation_id INTEGER PRIMARY KEY,
3330 ended_at INTEGER,
3331 last_message_idx INTEGER,
3332 last_message_created_at INTEGER
3333);
3334";
3335
3336const MIGRATION_V16: &str = r"
3337-- UNIQUE(conversation_id, idx) already creates sqlite_autoindex_messages_1,
3338-- which covers the same lookup/order key as idx_messages_conv_idx. Keeping both
3339-- doubles message insert index maintenance on the hot indexing path.
3340DROP INDEX IF EXISTS idx_messages_conv_idx;
3341";
3342
3343const MIGRATION_V17: &str = r"
3344-- Drop the global messages(created_at) secondary index from the ingest hot
3345-- path. Search/time filters are served by the derived search layer and
3346-- conversation/analytics indexes, while this index is maintained on every
3347-- message insert.
3348DROP INDEX IF EXISTS idx_messages_created;
3349";
3350
3351const MIGRATION_V18: &str = r"
3352-- Move append-tail state out of the wide, indexed conversations row. The hot
3353-- append path updates this cache for every appended conversation; keeping it in
3354-- a tiny rowid table avoids rewriting the large conversation record.
3355CREATE TABLE IF NOT EXISTS conversation_tail_state (
3356 -- Deliberately no FOREIGN KEY: this hot row is maintained by insert/append
3357 -- paths, and FK metadata keeps frankensqlite off the direct rowid update path.
3358 conversation_id INTEGER PRIMARY KEY,
3359 ended_at INTEGER,
3360 last_message_idx INTEGER,
3361 last_message_created_at INTEGER
3362);
3363
3364INSERT OR REPLACE INTO conversation_tail_state (
3365 conversation_id, ended_at, last_message_idx, last_message_created_at
3366)
3367SELECT id, ended_at, last_message_idx, last_message_created_at
3368FROM conversations
3369WHERE ended_at IS NOT NULL
3370 OR last_message_idx IS NOT NULL
3371 OR last_message_created_at IS NOT NULL;
3372";
3373
3374const MIGRATION_V19: &str = r"
3375-- Materialize external conversation provenance into one compact lookup key.
3376-- This keeps the hot append/new-conversation probe on a single primary-key
3377-- lookup instead of a composite conversations-table predicate.
3378CREATE TABLE IF NOT EXISTS conversation_external_lookup (
3379 lookup_key TEXT PRIMARY KEY,
3380 conversation_id INTEGER NOT NULL
3381);
3382
3383INSERT OR REPLACE INTO conversation_external_lookup (lookup_key, conversation_id)
3384SELECT
3385 CAST(length(source_id) AS TEXT) || ':' || source_id || ':' ||
3386 CAST(agent_id AS TEXT) || ':' ||
3387 CAST(length(external_id) AS TEXT) || ':' || external_id,
3388 id
3389FROM conversations
3390WHERE external_id IS NOT NULL;
3391";
3392
3393const MIGRATION_V20: &str = r"
3394-- Fuse external conversation lookup with append-tail state. Append-heavy
3395-- workloads can resolve both the conversation id and tail plan from one
3396-- primary-key probe.
3397CREATE TABLE IF NOT EXISTS conversation_external_tail_lookup (
3398 lookup_key TEXT PRIMARY KEY,
3399 conversation_id INTEGER NOT NULL,
3400 ended_at INTEGER,
3401 last_message_idx INTEGER,
3402 last_message_created_at INTEGER
3403);
3404
3405INSERT OR REPLACE INTO conversation_external_tail_lookup (
3406 lookup_key,
3407 conversation_id,
3408 ended_at,
3409 last_message_idx,
3410 last_message_created_at
3411)
3412SELECT
3413 CAST(length(c.source_id) AS TEXT) || ':' || c.source_id || ':' ||
3414 CAST(c.agent_id AS TEXT) || ':' ||
3415 CAST(length(c.external_id) AS TEXT) || ':' || c.external_id,
3416 c.id,
3417 (SELECT ts.ended_at
3418 FROM conversation_tail_state ts
3419 WHERE ts.conversation_id = c.id),
3420 (SELECT ts.last_message_idx
3421 FROM conversation_tail_state ts
3422 WHERE ts.conversation_id = c.id),
3423 (SELECT ts.last_message_created_at
3424 FROM conversation_tail_state ts
3425 WHERE ts.conversation_id = c.id)
3426FROM conversations c
3427WHERE c.external_id IS NOT NULL;
3428";
3429
3430#[derive(Debug, Clone)]
3432pub struct EmbeddingJobRow {
3433 pub id: i64,
3434 pub db_path: String,
3435 pub model_id: String,
3436 pub status: String,
3437 pub total_docs: i64,
3438 pub completed_docs: i64,
3439 pub error_message: Option<String>,
3440 pub created_at: String,
3441 pub started_at: Option<String>,
3442 pub completed_at: Option<String>,
3443}
3444
3445#[derive(Debug, Clone)]
3452pub struct LexicalRebuildConversationRow {
3453 pub id: Option<i64>,
3454 pub agent_slug: String,
3455 pub workspace: Option<PathBuf>,
3456 pub external_id: Option<String>,
3457 pub title: Option<String>,
3458 pub source_path: PathBuf,
3459 pub started_at: Option<i64>,
3460 pub ended_at: Option<i64>,
3461 pub source_id: String,
3462 pub origin_host: Option<String>,
3463}
3464
3465#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3468pub struct LexicalRebuildConversationFootprintRow {
3469 pub conversation_id: i64,
3470 pub message_count: usize,
3471 pub message_bytes: usize,
3472}
3473
3474pub(crate) const LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE: usize = 4 * 1024;
3475const LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT: usize = 64;
3476
3477fn lexical_rebuild_tail_metadata_coverage_is_sufficient(
3478 total_conversations: usize,
3479 covered_conversations: usize,
3480) -> bool {
3481 total_conversations == 0
3482 || total_conversations.saturating_sub(covered_conversations.min(total_conversations))
3483 <= LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT
3484}
3485
3486fn lexical_rebuild_message_count_from_tail_idx(last_message_idx: Option<i64>) -> Option<usize> {
3487 let last_message_idx = u64::try_from(last_message_idx?).ok()?;
3488 let high_water = last_message_idx.checked_add(1)?;
3489 usize::try_from(high_water).ok()
3490}
3491
3492fn lexical_rebuild_conversation_footprint_from_count(
3493 conversation_id: i64,
3494 message_count: usize,
3495) -> LexicalRebuildConversationFootprintRow {
3496 LexicalRebuildConversationFootprintRow {
3497 conversation_id,
3498 message_count,
3499 message_bytes: message_count
3500 .saturating_mul(LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE),
3501 }
3502}
3503
3504#[derive(Debug, Clone)]
3506pub struct LexicalRebuildMessageRow {
3507 pub conversation_id: i64,
3508 pub id: i64,
3509 pub idx: i64,
3510 pub role: String,
3511 pub author: Option<String>,
3512 pub created_at: Option<i64>,
3513 pub content: String,
3514}
3515
3516#[derive(Debug, Clone, PartialEq, Eq)]
3520pub struct LexicalRebuildGroupedMessageRow {
3521 pub idx: i64,
3522 pub is_tool_role: bool,
3523 pub created_at: Option<i64>,
3524 pub content: String,
3525}
3526
3527pub type LexicalRebuildGroupedMessageRows = SmallVec<[LexicalRebuildGroupedMessageRow; 32]>;
3528
3529pub type SqliteStorage = FrankenStorage;
3531
3532pub struct FrankenStorage {
3534 conn: FrankenConnection,
3535 db_path: PathBuf,
3536 ephemeral_writer_preflight_verified: AtomicBool,
3537 index_writer_checkpoint_pages: AtomicI64,
3538 index_writer_busy_timeout_ms: AtomicU64,
3539 cached_ephemeral_writer: parking_lot::Mutex<CachedEphemeralWriter>,
3540 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3541 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3542 ensured_conversation_sources: Arc<parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>>,
3543 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3544 fts_messages_present_cache: AtomicI8,
3545}
3546
3547const DEFAULT_WAL_AUTOCHECKPOINT_PAGES: i64 = 4096;
3551const UNSET_INDEX_WRITER_CHECKPOINT_PAGES: i64 = i64::MIN;
3552const UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS: u64 = 0;
3553const FTS_MESSAGES_PRESENT_UNKNOWN: i8 = 0;
3554const FTS_MESSAGES_PRESENT_ABSENT: i8 = 1;
3555const FTS_MESSAGES_PRESENT_PRESENT: i8 = 2;
3556
3557enum CachedEphemeralWriter {
3558 Uninitialized,
3559 Cached(Box<SendFrankenConnection>),
3560 InUse,
3561}
3562
3563#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3564struct EnsuredAgentKey {
3565 slug: String,
3566 name: String,
3567 version: Option<String>,
3568 kind: String,
3569}
3570
3571impl EnsuredAgentKey {
3572 fn from_agent(agent: &Agent) -> Self {
3573 Self {
3574 slug: agent.slug.clone(),
3575 name: agent.name.clone(),
3576 version: agent.version.clone(),
3577 kind: agent_kind_str(agent.kind.clone()),
3578 }
3579 }
3580}
3581
3582#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3583struct EnsuredWorkspaceKey {
3584 path: String,
3585 display_name: Option<String>,
3586}
3587
3588impl EnsuredWorkspaceKey {
3589 fn new(path: String, display_name: Option<&str>) -> Self {
3590 Self {
3591 path,
3592 display_name: display_name.map(str::to_owned),
3593 }
3594 }
3595}
3596
3597#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3598struct EnsuredConversationSourceKey {
3599 id: String,
3600 kind: SourceKind,
3601 host_label: Option<String>,
3602}
3603
3604impl EnsuredConversationSourceKey {
3605 fn from_source(source: &Source) -> Self {
3606 Self {
3607 id: source.id.clone(),
3608 kind: source.kind,
3609 host_label: source.host_label.clone(),
3610 }
3611 }
3612}
3613
3614#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3615struct EnsuredDailyStatsKey {
3616 day_id: i64,
3617 agent_slug: String,
3618 source_id: String,
3619}
3620
3621impl EnsuredDailyStatsKey {
3622 fn new(day_id: i64, agent_slug: &str, source_id: &str) -> Self {
3623 Self {
3624 day_id,
3625 agent_slug: agent_slug.to_owned(),
3626 source_id: source_id.to_owned(),
3627 }
3628 }
3629}
3630
3631const AUTOCOMMIT_RETAIN_OFF_PRAGMAS: [&str; 2] = [
3632 "PRAGMA fsqlite.autocommit_retain = OFF;",
3633 "PRAGMA autocommit_retain = OFF;",
3634];
3635
3636fn disable_autocommit_retain<E>(
3637 mut execute: impl FnMut(&'static str) -> std::result::Result<(), E>,
3638) -> Result<&'static str>
3639where
3640 E: std::fmt::Display,
3641{
3642 let mut failures = Vec::new();
3643 for pragma in AUTOCOMMIT_RETAIN_OFF_PRAGMAS {
3644 match execute(pragma) {
3645 Ok(()) => return Ok(pragma),
3646 Err(err) => {
3647 let error = err.to_string();
3648 tracing::debug!(
3649 %pragma,
3650 error = %error,
3651 "autocommit_retain PRAGMA variant not supported"
3652 );
3653 failures.push(format!("{pragma}: {error}"));
3654 }
3655 }
3656 }
3657
3658 Err(anyhow!(
3659 "failed to disable autocommit_retain on frankensqlite connection; \
3660 refusing to keep a long-lived MVCC connection that may accumulate \
3661 unbounded write snapshots. Upgrade frankensqlite to a version that \
3662 supports one of these PRAGMAs or use a short-lived connection path. \
3663 attempts: {}",
3664 failures.join("; ")
3665 ))
3666}
3667
3668impl FrankenStorage {
3669 fn new(conn: FrankenConnection, db_path: PathBuf) -> Self {
3670 Self::new_with_shared_caches(
3671 conn,
3672 db_path,
3673 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3674 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3675 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3676 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3677 )
3678 }
3679
3680 fn new_with_shared_caches(
3681 conn: FrankenConnection,
3682 db_path: PathBuf,
3683 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3684 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3685 ensured_conversation_sources: Arc<
3686 parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>,
3687 >,
3688 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3689 ) -> Self {
3690 Self {
3691 conn,
3692 db_path,
3693 ephemeral_writer_preflight_verified: AtomicBool::new(false),
3694 index_writer_checkpoint_pages: AtomicI64::new(UNSET_INDEX_WRITER_CHECKPOINT_PAGES),
3695 index_writer_busy_timeout_ms: AtomicU64::new(UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS),
3696 cached_ephemeral_writer: parking_lot::Mutex::new(CachedEphemeralWriter::Uninitialized),
3697 ensured_agents,
3698 ensured_workspaces,
3699 ensured_conversation_sources,
3700 ensured_daily_stats_keys,
3701 fts_messages_present_cache: AtomicI8::new(FTS_MESSAGES_PRESENT_UNKNOWN),
3702 }
3703 }
3704
3705 fn apply_open_stage_busy_timeout(&self) {
3706 if let Err(err) = self.conn.execute("PRAGMA busy_timeout = 5000;") {
3707 tracing::debug!(
3708 error = %err,
3709 "failed to apply open-stage busy_timeout before migrations"
3710 );
3711 }
3712 }
3713
3714 pub fn open(path: &Path) -> Result<Self> {
3720 if let Some(parent) = path.parent() {
3721 fs::create_dir_all(parent)
3722 .with_context(|| format!("creating db directory {}", parent.display()))?;
3723 }
3724
3725 let path_str = path.to_string_lossy().to_string();
3726 let _doctor_guard =
3727 acquire_doctor_mutation_db_open_guard(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)?;
3728 let conn = FrankenConnection::open(&path_str)
3729 .with_context(|| format!("opening frankensqlite db at {}", path.display()))?;
3730 let storage = Self::new(conn, path.to_path_buf());
3731 storage.apply_open_stage_busy_timeout();
3732 storage.run_migrations()?;
3733 storage.repair_missing_current_schema_objects()?;
3734 storage.apply_config()?;
3735 Ok(storage)
3736 }
3737
3738 pub fn open_writer(path: &Path) -> Result<Self> {
3744 Self::open_writer_with_shared_caches(
3745 path,
3746 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3747 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3748 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3749 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3750 )
3751 }
3752
3753 fn open_writer_with_shared_caches(
3754 path: &Path,
3755 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3756 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3757 ensured_conversation_sources: Arc<
3758 parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>,
3759 >,
3760 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3761 ) -> Result<Self> {
3762 let path_str = path.to_string_lossy().to_string();
3763 let _doctor_guard =
3764 acquire_doctor_mutation_db_open_guard(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)?;
3765 let conn = FrankenConnection::open(&path_str)
3766 .with_context(|| format!("opening frankensqlite writer at {}", path.display()))?;
3767 let storage = Self::new_with_shared_caches(
3768 conn,
3769 path.to_path_buf(),
3770 ensured_agents,
3771 ensured_workspaces,
3772 ensured_conversation_sources,
3773 ensured_daily_stats_keys,
3774 );
3775 storage.apply_config()?;
3776 Ok(storage)
3777 }
3778
3779 pub(crate) fn acquire_cached_ephemeral_writer(&self) -> Result<(Self, bool)> {
3780 let mut cached = self.cached_ephemeral_writer.lock();
3781 match std::mem::replace(&mut *cached, CachedEphemeralWriter::InUse) {
3782 CachedEphemeralWriter::Cached(conn) => {
3783 let (conn, checkpoint_pages, busy_timeout_ms) = (*conn).into_parts();
3784 let writer = Self::new_with_shared_caches(
3785 conn,
3786 self.db_path.clone(),
3787 Arc::clone(&self.ensured_agents),
3788 Arc::clone(&self.ensured_workspaces),
3789 Arc::clone(&self.ensured_conversation_sources),
3790 Arc::clone(&self.ensured_daily_stats_keys),
3791 );
3792 writer
3793 .index_writer_checkpoint_pages
3794 .store(checkpoint_pages, Ordering::Relaxed);
3795 writer
3796 .index_writer_busy_timeout_ms
3797 .store(busy_timeout_ms, Ordering::Relaxed);
3798 Ok((writer, true))
3799 }
3800 CachedEphemeralWriter::Uninitialized => {
3801 drop(cached);
3802 match Self::open_writer_with_shared_caches(
3803 &self.db_path,
3804 Arc::clone(&self.ensured_agents),
3805 Arc::clone(&self.ensured_workspaces),
3806 Arc::clone(&self.ensured_conversation_sources),
3807 Arc::clone(&self.ensured_daily_stats_keys),
3808 ) {
3809 Ok(writer) => Ok((writer, true)),
3810 Err(err) => {
3811 let mut cached = self.cached_ephemeral_writer.lock();
3812 if matches!(&*cached, CachedEphemeralWriter::InUse) {
3813 *cached = CachedEphemeralWriter::Uninitialized;
3814 }
3815 Err(err)
3816 }
3817 }
3818 }
3819 CachedEphemeralWriter::InUse => {
3820 *cached = CachedEphemeralWriter::InUse;
3821 drop(cached);
3822 Ok((
3823 Self::open_writer_with_shared_caches(
3824 &self.db_path,
3825 Arc::clone(&self.ensured_agents),
3826 Arc::clone(&self.ensured_workspaces),
3827 Arc::clone(&self.ensured_conversation_sources),
3828 Arc::clone(&self.ensured_daily_stats_keys),
3829 )?,
3830 false,
3831 ))
3832 }
3833 }
3834 }
3835
3836 pub(crate) fn release_cached_ephemeral_writer(&self, writer: Self) {
3837 let checkpoint_pages = writer.index_writer_checkpoint_pages.load(Ordering::Relaxed);
3838 let busy_timeout_ms = writer.index_writer_busy_timeout_ms.load(Ordering::Relaxed);
3839 let conn = writer.into_raw();
3840 let mut cached = self.cached_ephemeral_writer.lock();
3841 debug_assert!(
3842 matches!(&*cached, CachedEphemeralWriter::InUse),
3843 "cached ephemeral writer state should be in-use when releasing"
3844 );
3845 *cached = CachedEphemeralWriter::Cached(Box::new(
3846 SendFrankenConnection::new_with_index_writer_state(
3847 conn,
3848 checkpoint_pages,
3849 busy_timeout_ms,
3850 ),
3851 ));
3852 }
3853
3854 pub(crate) fn discard_cached_ephemeral_writer(&self, mut writer: Self) {
3855 writer.close_best_effort_in_place();
3856 let mut cached = self.cached_ephemeral_writer.lock();
3857 if matches!(&*cached, CachedEphemeralWriter::InUse) {
3858 *cached = CachedEphemeralWriter::Uninitialized;
3859 }
3860 }
3861
3862 fn cached_agent_id(&self, key: &EnsuredAgentKey) -> Option<i64> {
3863 self.ensured_agents.lock().get(key).copied()
3864 }
3865
3866 fn mark_agent_ensured(&self, key: EnsuredAgentKey, id: i64) {
3867 self.ensured_agents.lock().insert(key, id);
3868 }
3869
3870 fn cached_workspace_id(&self, key: &EnsuredWorkspaceKey) -> Option<i64> {
3871 self.ensured_workspaces.lock().get(key).copied()
3872 }
3873
3874 fn mark_workspace_ensured(&self, key: EnsuredWorkspaceKey, id: i64) {
3875 self.ensured_workspaces.lock().insert(key, id);
3876 }
3877
3878 fn conversation_source_already_ensured(&self, key: &EnsuredConversationSourceKey) -> bool {
3879 self.ensured_conversation_sources.lock().contains(key)
3880 }
3881
3882 fn mark_conversation_source_ensured(&self, key: EnsuredConversationSourceKey) {
3883 self.ensured_conversation_sources.lock().insert(key);
3884 }
3885
3886 fn daily_stats_key_already_ensured(&self, key: &EnsuredDailyStatsKey) -> bool {
3887 self.ensured_daily_stats_keys.lock().contains(key)
3888 }
3889
3890 fn daily_stats_keys_already_ensured(&self, keys: &[EnsuredDailyStatsKey; 4]) -> bool {
3891 let ensured = self.ensured_daily_stats_keys.lock();
3892 keys.iter().all(|key| ensured.contains(key))
3893 }
3894
3895 fn mark_daily_stats_key_ensured(&self, key: EnsuredDailyStatsKey) {
3896 self.ensured_daily_stats_keys.lock().insert(key);
3897 }
3898
3899 fn fts_messages_present_cached(&self, tx: &FrankenTransaction<'_>) -> bool {
3900 match self.fts_messages_present_cache.load(Ordering::Acquire) {
3901 FTS_MESSAGES_PRESENT_PRESENT => return true,
3902 FTS_MESSAGES_PRESENT_ABSENT => return false,
3903 _ => {}
3904 }
3905
3906 let present = tx
3907 .query_row_map(
3908 "SELECT COUNT(*) FROM sqlite_master
3909 WHERE name = 'fts_messages'
3910 AND rootpage > 0",
3911 fparams![],
3912 |row| row.get_typed::<i64>(0),
3913 )
3914 .map(|count| count > 0)
3915 .unwrap_or_else(|err| {
3916 tracing::debug!(
3917 error = %err,
3918 "failed to probe fts_messages presence; skipping db-resident FTS maintenance"
3919 );
3920 false
3921 });
3922 self.set_fts_messages_present_cache(present);
3923 present
3924 }
3925
3926 fn set_fts_messages_present_cache(&self, present: bool) {
3927 self.fts_messages_present_cache.store(
3928 if present {
3929 FTS_MESSAGES_PRESENT_PRESENT
3930 } else {
3931 FTS_MESSAGES_PRESENT_ABSENT
3932 },
3933 Ordering::Release,
3934 );
3935 }
3936
3937 fn invalidate_fts_messages_present_cache(&self) {
3938 self.fts_messages_present_cache
3939 .store(FTS_MESSAGES_PRESENT_UNKNOWN, Ordering::Release);
3940 }
3941
3942 fn invalidate_conversation_source_cache(&self, source_id: &str) {
3943 self.ensured_conversation_sources
3944 .lock()
3945 .retain(|key| key.id != source_id);
3946 }
3947
3948 fn close_cached_ephemeral_writer_best_effort_in_place(&mut self) {
3949 let cached = self.cached_ephemeral_writer.get_mut();
3950 if let CachedEphemeralWriter::Cached(conn) =
3951 std::mem::replace(cached, CachedEphemeralWriter::Uninitialized)
3952 {
3953 let mut conn = conn;
3954 conn.0.close_best_effort_in_place();
3955 }
3956 }
3957
3958 fn close_cached_ephemeral_writer_without_checkpoint_in_place(&mut self) -> Result<()> {
3959 let cached = self.cached_ephemeral_writer.get_mut();
3960 match std::mem::replace(cached, CachedEphemeralWriter::Uninitialized) {
3961 CachedEphemeralWriter::Cached(mut conn) => conn
3962 .0
3963 .close_without_checkpoint_in_place()
3964 .with_context(|| "closing cached frankensqlite writer without final checkpoint"),
3965 CachedEphemeralWriter::Uninitialized | CachedEphemeralWriter::InUse => Ok(()),
3966 }
3967 }
3968
3969 pub fn open_readonly(path: &Path) -> Result<Self> {
3971 Self::open_readonly_with_doctor_lock_timeout(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)
3972 }
3973
3974 pub fn open_readonly_with_doctor_lock_timeout(path: &Path, timeout: Duration) -> Result<Self> {
3979 let path_str = path.to_string_lossy().to_string();
3980 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
3981 let conn = open_franken_with_flags(&path_str, FrankenOpenFlags::SQLITE_OPEN_READ_ONLY)
3982 .with_context(|| format!("opening frankensqlite db readonly at {}", path.display()))?;
3983 let storage = Self::new(conn, path.to_path_buf());
3984 storage.apply_readonly_config()?;
3985 Ok(storage)
3986 }
3987
3988 pub fn close(self) -> Result<()> {
3989 let mut this = self;
3990 this.close_cached_ephemeral_writer_best_effort_in_place();
3991 this.conn
3992 .close()
3993 .with_context(|| "closing frankensqlite connection")
3994 }
3995
3996 pub fn close_without_checkpoint(self) -> Result<()> {
3997 let mut this = self;
3998 this.close_cached_ephemeral_writer_without_checkpoint_in_place()?;
3999 this.conn
4000 .close_without_checkpoint()
4001 .with_context(|| "closing frankensqlite connection without final checkpoint")
4002 }
4003
4004 pub fn close_best_effort_in_place(&mut self) {
4005 self.close_cached_ephemeral_writer_best_effort_in_place();
4006 self.conn.close_best_effort_in_place();
4007 }
4008
4009 pub fn close_without_checkpoint_in_place(&mut self) -> Result<()> {
4010 self.close_cached_ephemeral_writer_without_checkpoint_in_place()?;
4011 self.conn
4012 .close_without_checkpoint_in_place()
4013 .with_context(|| "closing frankensqlite connection without final checkpoint")
4014 }
4015
4016 pub fn raw(&self) -> &FrankenConnection {
4018 &self.conn
4019 }
4020
4021 pub fn into_raw(self) -> FrankenConnection {
4024 let mut this = self;
4025 this.close_cached_ephemeral_writer_best_effort_in_place();
4026 this.conn
4027 }
4028
4029 pub fn apply_config(&self) -> Result<()> {
4036 self.conn
4040 .execute("PRAGMA journal_mode = WAL;")
4041 .with_context(|| "setting journal_mode")?;
4042 self.conn
4043 .execute("PRAGMA synchronous = NORMAL;")
4044 .with_context(|| "setting synchronous")?;
4045
4046 self.conn
4048 .execute("PRAGMA cache_size = -65536;")
4049 .with_context(|| "setting cache_size")?;
4050
4051 self.conn
4053 .execute("PRAGMA foreign_keys = ON;")
4054 .with_context(|| "setting foreign_keys")?;
4055
4056 self.conn
4058 .execute("PRAGMA busy_timeout = 5000;")
4059 .with_context(|| "setting busy_timeout")?;
4060
4061 let checkpoint_pragma =
4069 format!("PRAGMA wal_autocheckpoint = {DEFAULT_WAL_AUTOCHECKPOINT_PAGES};");
4070 let _ = self.conn.execute(&checkpoint_pragma);
4071 self.index_writer_checkpoint_pages
4072 .store(DEFAULT_WAL_AUTOCHECKPOINT_PAGES, Ordering::Relaxed);
4073 let _ = self.conn.execute("PRAGMA fsqlite.concurrent_mode = ON;");
4076 let _ = self.conn.execute("PRAGMA concurrent_mode = ON;");
4077 let autocommit_pragma =
4088 disable_autocommit_retain(|pragma| self.conn.execute(pragma).map(|_| ()))?;
4089 tracing::debug!(
4090 pragma = autocommit_pragma,
4091 "disabled frankensqlite autocommit_retain for storage connection"
4092 );
4093
4094 Ok(())
4095 }
4096
4097 fn apply_readonly_config(&self) -> Result<()> {
4098 self.conn
4099 .execute("PRAGMA query_only = 1;")
4100 .with_context(|| "setting query_only")?;
4101 self.conn
4102 .execute("PRAGMA busy_timeout = 5000;")
4103 .with_context(|| "setting busy_timeout")?;
4104 self.conn
4105 .execute("PRAGMA cache_size = -65536;")
4106 .with_context(|| "setting cache_size")?;
4107 self.conn
4108 .execute("PRAGMA foreign_keys = ON;")
4109 .with_context(|| "setting foreign_keys")?;
4110 Ok(())
4111 }
4112
4113 pub fn run_migrations(&self) -> Result<()> {
4131 transition_from_meta_version(&self.conn)?;
4132
4133 let base_result = build_cass_migrations_before_tail_cache()
4134 .run(&self.conn)
4135 .with_context(|| "running base schema migrations")?;
4136
4137 let mut applied = base_result.applied;
4138 if apply_conversation_tail_state_cache_migration(&self.conn)
4139 .with_context(|| "running conversation tail-state cache migration")?
4140 {
4141 applied.push(15);
4142 }
4143
4144 let post_result = build_cass_migrations_after_tail_cache()
4145 .run(&self.conn)
4146 .with_context(|| "running post-tail-cache schema migrations")?;
4147 applied.extend(post_result.applied);
4148
4149 let current = self.schema_version()?;
4150 if !applied.is_empty() {
4151 info!(
4152 applied = ?applied,
4153 current,
4154 was_fresh = base_result.was_fresh,
4155 "frankensqlite schema migrations applied"
4156 );
4157 }
4158
4159 self.sync_meta_schema_version(current)?;
4161
4162 Ok(())
4163 }
4164
4165 fn repair_missing_current_schema_objects(&self) -> Result<()> {
4170 let mut missing_tables = Vec::new();
4171 for &(table_name, probe_sql) in REQUIRED_CURRENT_SCHEMA_TABLE_PROBES {
4172 if let Err(err) = self.conn.query(probe_sql) {
4173 if error_indicates_missing_table(&err) {
4174 missing_tables.push(table_name);
4175 continue;
4176 }
4177 return Err(err).with_context(|| {
4178 format!("probing required schema table {table_name} for completeness")
4179 });
4180 }
4181 }
4182
4183 if !missing_tables.is_empty() {
4184 info!(
4185 missing_tables = ?missing_tables,
4186 "repairing missing current-schema tables on an already-versioned cass database"
4187 );
4188
4189 for batch in current_schema_repair_batches_for_missing_tables(&missing_tables)? {
4190 self.conn
4191 .execute_batch(batch.sql)
4192 .with_context(|| format!("repairing current-schema batch {}", batch.name))?;
4193 }
4194
4195 for &(table_name, probe_sql) in REQUIRED_CURRENT_SCHEMA_TABLE_PROBES {
4196 if !missing_tables.contains(&table_name) {
4197 continue;
4198 }
4199 self.conn
4200 .query(probe_sql)
4201 .with_context(|| format!("verifying repaired schema table {table_name}"))?;
4202 }
4203 }
4204 self.repair_missing_conversation_token_columns()?;
4205 Ok(())
4206 }
4207
4208 fn repair_missing_conversation_token_columns(&self) -> Result<()> {
4209 let columns = franken_table_column_names(&self.conn, "conversations")
4210 .with_context(|| "inspecting conversations columns for token-summary repair")?;
4211 let mut missing_columns = Vec::new();
4212 for &(column_name, column_type) in REQUIRED_CONVERSATION_TOKEN_COLUMNS {
4213 if columns.contains(column_name) {
4214 continue;
4215 }
4216 let sql = format!("ALTER TABLE conversations ADD COLUMN {column_name} {column_type};");
4217 self.conn.execute(&sql).with_context(|| {
4218 format!("adding missing conversations.{column_name} token-summary column")
4219 })?;
4220 missing_columns.push(column_name);
4221 }
4222 if !missing_columns.is_empty() {
4223 tracing::warn!(
4224 target: "cass::schema_repair",
4225 db_path = %self.db_path.display(),
4226 missing_columns = ?missing_columns,
4227 "cass#222: repaired missing conversations token-summary columns"
4228 );
4229 }
4230 Ok(())
4231 }
4232
4233 pub(crate) fn cleanup_orphan_fk_rows(&self) -> Result<OrphanFkCleanupReport> {
4252 let mut report = OrphanFkCleanupReport::default();
4253 let orphan_message_ids = match collect_orphan_message_ids(&self.conn) {
4254 Ok(ids) => ids,
4255 Err(err) if error_indicates_missing_table(&err) => {
4256 tracing::debug!(
4257 target: "cass::fk_repair",
4258 child_table = "messages",
4259 error = %err,
4260 "skipping orphan-message probe (table or column unavailable)"
4261 );
4262 Vec::new()
4263 }
4264 Err(err) => return Err(err),
4265 };
4266 if !orphan_message_ids.is_empty() {
4267 report.record("messages", orphan_message_ids.len() as i64);
4268 }
4269
4270 if !orphan_message_ids.is_empty() {
4271 delete_orphan_message_ids_bisecting_oom(&self.conn, &orphan_message_ids)
4272 .context("deleting orphan message rows and dependent children")?;
4273 }
4274
4275 for entry in ORPHAN_DIRECT_CHILD_TABLES {
4276 loop {
4277 let ids = match collect_direct_orphan_id_page(&self.conn, entry) {
4278 Ok(ids) => ids,
4279 Err(err)
4280 if error_indicates_missing_table(&err)
4281 || error_indicates_missing_column(&err) =>
4282 {
4283 tracing::debug!(
4287 target: "cass::fk_repair",
4288 child_table = entry.child_table,
4289 error = %err,
4290 "skipping orphan probe (table or column unavailable)"
4291 );
4292 break;
4293 }
4294 Err(err) => {
4295 return Err(err).with_context(|| {
4296 format!("probing orphan rows in {}", entry.child_table)
4297 });
4298 }
4299 };
4300 if ids.is_empty() {
4301 break;
4302 }
4303
4304 let deleted = delete_direct_orphan_ids_bisecting_oom(&self.conn, entry, &ids)
4305 .with_context(|| format!("deleting orphan rows from {}", entry.child_table))?;
4306 if deleted == 0 {
4307 break;
4308 }
4309 report.record(
4310 entry.child_table,
4311 i64::try_from(deleted).unwrap_or(i64::MAX),
4312 );
4313 }
4314 }
4315
4316 if report.total == 0 {
4317 return Ok(report);
4318 }
4319
4320 tracing::warn!(
4325 target: "cass::fk_repair",
4326 db_path = %self.db_path.display(),
4327 total_orphans = report.total,
4328 per_table = ?report.per_table,
4329 "cass#202: removed orphan rows left behind by interrupted index transactions"
4330 );
4331
4332 Ok(report)
4333 }
4334
4335 pub fn schema_version(&self) -> Result<i64> {
4337 let rows = self
4338 .conn
4339 .query("SELECT MAX(version) FROM _schema_migrations;")
4340 .with_context(|| "reading schema version from _schema_migrations")?;
4341
4342 if let Some(row) = rows.first()
4343 && let Ok(v) = row.get_typed::<Option<i64>>(0)
4344 {
4345 return Ok(v.unwrap_or(0));
4346 }
4347 Ok(0)
4348 }
4349
4350 fn sync_meta_schema_version(&self, version: i64) -> Result<()> {
4352 if self.conn.query("SELECT key FROM meta LIMIT 1;").is_err() {
4355 return Ok(());
4356 }
4357
4358 if let Ok(rows) = self
4360 .conn
4361 .query("SELECT value FROM meta WHERE key = 'schema_version';")
4362 && let Some(row) = rows.first()
4363 && let Ok(val) = row.get_typed::<String>(0)
4364 && val == version.to_string()
4365 {
4366 return Ok(()); }
4368
4369 self.conn
4370 .execute_compat(
4371 "INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', ?1);",
4372 &[ParamValue::from(version.to_string())],
4373 )
4374 .with_context(|| "syncing meta schema_version")?;
4375
4376 Ok(())
4377 }
4378
4379 pub fn database_path(&self) -> Result<PathBuf> {
4381 Ok(self.db_path.clone())
4382 }
4383
4384 pub(crate) fn ephemeral_writer_preflight_verified(&self) -> bool {
4385 self.ephemeral_writer_preflight_verified
4386 .load(Ordering::Relaxed)
4387 }
4388
4389 pub(crate) fn mark_ephemeral_writer_preflight_verified(&self) {
4390 self.ephemeral_writer_preflight_verified
4391 .store(true, Ordering::Relaxed);
4392 }
4393
4394 pub(crate) fn index_writer_checkpoint_pages(&self) -> Option<i64> {
4395 let pages = self.index_writer_checkpoint_pages.load(Ordering::Relaxed);
4396 (pages != UNSET_INDEX_WRITER_CHECKPOINT_PAGES).then_some(pages)
4397 }
4398
4399 pub(crate) fn mark_index_writer_checkpoint_pages(&self, pages: i64) {
4400 self.index_writer_checkpoint_pages
4401 .store(pages, Ordering::Relaxed);
4402 }
4403
4404 pub(crate) fn index_writer_busy_timeout_ms(&self) -> Option<u64> {
4405 let timeout_ms = self.index_writer_busy_timeout_ms.load(Ordering::Relaxed);
4406 (timeout_ms != UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS).then_some(timeout_ms)
4407 }
4408
4409 pub(crate) fn mark_index_writer_busy_timeout_ms(&self, timeout_ms: u64) {
4410 self.index_writer_busy_timeout_ms
4411 .store(timeout_ms, Ordering::Relaxed);
4412 }
4413
4414 pub fn open_or_rebuild(path: &Path) -> std::result::Result<Self, MigrationError> {
4416 if let Some(parent) = path.parent() {
4417 fs::create_dir_all(parent)?;
4418 }
4419
4420 if path.exists() {
4421 let check_result = check_schema_compatibility(path);
4422 match check_result {
4423 Ok(SchemaCheck::Compatible) | Ok(SchemaCheck::NeedsMigration) => {
4424 }
4426 Ok(SchemaCheck::NeedsRebuild(reason)) => {
4427 let backup_path = create_backup(path)?;
4428 cleanup_old_backups(path, MAX_BACKUPS)?;
4429 remove_database_files(path)?;
4430 return Err(MigrationError::RebuildRequired {
4431 reason,
4432 backup_path,
4433 });
4434 }
4435 Err(err) if schema_check_error_requires_rebuild(&err) => {
4436 let backup_path = create_backup(path)?;
4437 cleanup_old_backups(path, MAX_BACKUPS)?;
4438 remove_database_files(path)?;
4439 return Err(MigrationError::RebuildRequired {
4440 reason: format!("Database appears corrupted: {err}"),
4441 backup_path,
4442 });
4443 }
4444 Err(err) => return Err(MigrationError::Database(err)),
4445 }
4446 }
4447
4448 let storage = Self::open(path).map_err(|e| MigrationError::Other(e.to_string()))?;
4449 Ok(storage)
4450 }
4451}
4452
4453fn build_cass_migrations_before_tail_cache() -> MigrationRunner {
4469 MigrationRunner::new()
4470 .add(13, "full_schema_v13", MIGRATION_FRESH_SCHEMA)
4471 .add(14, "fts_contentless", MIGRATION_V14)
4472}
4473
4474fn build_cass_migrations_after_tail_cache() -> MigrationRunner {
4475 MigrationRunner::new()
4476 .add(16, "drop_redundant_message_conv_idx", MIGRATION_V16)
4477 .add(17, "drop_message_created_idx", MIGRATION_V17)
4478 .add(18, "conversation_tail_state_hot_table", MIGRATION_V18)
4479 .add(19, "conversation_external_lookup", MIGRATION_V19)
4480 .add(20, "conversation_external_tail_lookup", MIGRATION_V20)
4481}
4482
4483fn schema_migration_is_applied(conn: &FrankenConnection, version: i64) -> Result<bool> {
4484 let rows = conn
4485 .query_with_params(
4486 "SELECT 1 FROM _schema_migrations WHERE version = ?1 LIMIT 1;",
4487 &[SqliteValue::from(version)],
4488 )
4489 .with_context(|| format!("checking schema migration version {version}"))?;
4490 Ok(!rows.is_empty())
4491}
4492
4493fn apply_conversation_tail_state_cache_migration(conn: &FrankenConnection) -> Result<bool> {
4494 conn.execute("BEGIN IMMEDIATE;")
4495 .with_context(|| "starting v15 conversation tail-state migration transaction")?;
4496
4497 let result = (|| -> Result<bool> {
4498 if schema_migration_is_applied(conn, 15)? {
4499 conn.execute("COMMIT;")
4500 .with_context(|| "committing already-applied v15 migration transaction")?;
4501 return Ok(false);
4502 }
4503
4504 let started = Instant::now();
4505 let conversation_columns = franken_table_column_names(conn, "conversations")
4506 .with_context(|| "inspecting conversations columns before v15 migration")?;
4507 if !conversation_columns.contains("last_message_idx") {
4508 conn.execute("ALTER TABLE conversations ADD COLUMN last_message_idx INTEGER;")
4509 .with_context(|| "adding v15 conversations.last_message_idx column")?;
4510 }
4511 if !conversation_columns.contains("last_message_created_at") {
4512 conn.execute("ALTER TABLE conversations ADD COLUMN last_message_created_at INTEGER;")
4513 .with_context(|| "adding v15 conversations.last_message_created_at column")?;
4514 }
4515 conn.execute_batch(MIGRATION_V15_TAIL_STATE_TABLE)
4516 .with_context(|| "applying v15 conversation tail-state table schema")?;
4517 conn.execute_compat(
4518 "INSERT INTO _schema_migrations (version, name) VALUES (?1, ?2);",
4519 fparams![15_i64, "conversation_tail_state_cache"],
4520 )
4521 .with_context(|| "recording v15 conversation tail-state migration")?;
4522 conn.execute("COMMIT;")
4523 .with_context(|| "committing v15 conversation tail-state migration")?;
4524 info!(
4525 elapsed_ms = started.elapsed().as_millis(),
4526 "applied v15 conversation tail-state cache migration"
4527 );
4528 Ok(true)
4529 })();
4530
4531 if result.is_err() {
4532 let _ = conn.execute("ROLLBACK;");
4533 }
4534
4535 result
4536}
4537
4538fn franken_table_column_names(
4539 conn: &FrankenConnection,
4540 table_name: &str,
4541) -> Result<HashSet<String>> {
4542 if !table_name
4543 .chars()
4544 .all(|c| c.is_ascii_alphanumeric() || c == '_')
4545 {
4546 return Err(anyhow!(
4547 "unsafe table name for PRAGMA table_info: {table_name}"
4548 ));
4549 }
4550
4551 conn.query_map_collect(
4552 &format!("PRAGMA table_info({table_name})"),
4553 fparams![],
4554 |row: &FrankenRow| row.get_typed::<String>(1),
4555 )
4556 .with_context(|| format!("reading PRAGMA table_info({table_name})"))
4557 .map(|columns| columns.into_iter().collect())
4558}
4559
4560const MIGRATION_FRESH_SCHEMA: &str = r"
4570-- Core tables (V1)
4571CREATE TABLE IF NOT EXISTS meta (
4572 key TEXT PRIMARY KEY,
4573 value TEXT NOT NULL
4574);
4575
4576CREATE TABLE IF NOT EXISTS agents (
4577 id INTEGER PRIMARY KEY,
4578 slug TEXT NOT NULL UNIQUE,
4579 name TEXT NOT NULL,
4580 version TEXT,
4581 kind TEXT NOT NULL,
4582 created_at INTEGER NOT NULL,
4583 updated_at INTEGER NOT NULL
4584);
4585
4586CREATE TABLE IF NOT EXISTS workspaces (
4587 id INTEGER PRIMARY KEY,
4588 path TEXT NOT NULL UNIQUE,
4589 display_name TEXT
4590);
4591
4592-- Sources (V4)
4593CREATE TABLE IF NOT EXISTS sources (
4594 id TEXT PRIMARY KEY,
4595 kind TEXT NOT NULL,
4596 host_label TEXT,
4597 machine_id TEXT,
4598 platform TEXT,
4599 config_json TEXT,
4600 created_at INTEGER NOT NULL,
4601 updated_at INTEGER NOT NULL
4602);
4603
4604INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
4605VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
4606
4607-- Conversations: V1 base + V5 provenance + V7 metadata_bin + V10 token summary
4608CREATE TABLE IF NOT EXISTS conversations (
4609 id INTEGER PRIMARY KEY,
4610 agent_id INTEGER NOT NULL REFERENCES agents(id),
4611 workspace_id INTEGER REFERENCES workspaces(id),
4612 source_id TEXT NOT NULL DEFAULT 'local' REFERENCES sources(id),
4613 external_id TEXT,
4614 title TEXT,
4615 source_path TEXT NOT NULL,
4616 started_at INTEGER,
4617 ended_at INTEGER,
4618 approx_tokens INTEGER,
4619 metadata_json TEXT,
4620 origin_host TEXT,
4621 metadata_bin BLOB,
4622 total_input_tokens INTEGER,
4623 total_output_tokens INTEGER,
4624 total_cache_read_tokens INTEGER,
4625 total_cache_creation_tokens INTEGER,
4626 grand_total_tokens INTEGER,
4627 estimated_cost_usd REAL,
4628 primary_model TEXT,
4629 api_call_count INTEGER,
4630 tool_call_count INTEGER,
4631 user_message_count INTEGER,
4632 assistant_message_count INTEGER,
4633 -- V15 columns are included in the fresh schema so fresh DB creation does
4634 -- not need ALTER TABLE on conversations. That ALTER path can duplicate
4635 -- provenance autoindex state in frankensqlite when the named unique
4636 -- provenance index already exists.
4637 last_message_idx INTEGER,
4638 last_message_created_at INTEGER
4639);
4640
4641-- Named unique index avoids autoindex issues if table is ever recreated
4642CREATE UNIQUE INDEX IF NOT EXISTS idx_conversations_provenance
4643 ON conversations(source_id, agent_id, external_id);
4644
4645-- Messages: V1 base + V7 extra_bin
4646CREATE TABLE IF NOT EXISTS messages (
4647 id INTEGER PRIMARY KEY,
4648 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
4649 idx INTEGER NOT NULL,
4650 role TEXT NOT NULL,
4651 author TEXT,
4652 created_at INTEGER,
4653 content TEXT NOT NULL,
4654 extra_json TEXT,
4655 extra_bin BLOB,
4656 UNIQUE(conversation_id, idx)
4657);
4658
4659CREATE TABLE IF NOT EXISTS snippets (
4660 id INTEGER PRIMARY KEY,
4661 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4662 file_path TEXT,
4663 start_line INTEGER,
4664 end_line INTEGER,
4665 language TEXT,
4666 snippet_text TEXT
4667);
4668
4669CREATE TABLE IF NOT EXISTS tags (
4670 id INTEGER PRIMARY KEY,
4671 name TEXT NOT NULL UNIQUE
4672);
4673
4674CREATE TABLE IF NOT EXISTS conversation_tags (
4675 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
4676 tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
4677 PRIMARY KEY (conversation_id, tag_id)
4678);
4679
4680-- Daily stats (V8)
4681CREATE TABLE IF NOT EXISTS daily_stats (
4682 day_id INTEGER NOT NULL,
4683 agent_slug TEXT NOT NULL,
4684 source_id TEXT NOT NULL DEFAULT 'all',
4685 session_count INTEGER NOT NULL DEFAULT 0,
4686 message_count INTEGER NOT NULL DEFAULT 0,
4687 total_chars INTEGER NOT NULL DEFAULT 0,
4688 last_updated INTEGER NOT NULL,
4689 PRIMARY KEY (day_id, agent_slug, source_id)
4690);
4691
4692-- Embedding jobs (V9)
4693CREATE TABLE IF NOT EXISTS embedding_jobs (
4694 id INTEGER PRIMARY KEY AUTOINCREMENT,
4695 db_path TEXT NOT NULL,
4696 model_id TEXT NOT NULL,
4697 status TEXT NOT NULL DEFAULT 'pending',
4698 total_docs INTEGER NOT NULL DEFAULT 0,
4699 completed_docs INTEGER NOT NULL DEFAULT 0,
4700 error_message TEXT,
4701 created_at TEXT NOT NULL DEFAULT (datetime('now')),
4702 started_at TEXT,
4703 completed_at TEXT
4704);
4705
4706CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
4707ON embedding_jobs(db_path, model_id)
4708WHERE status IN ('pending', 'running');
4709
4710-- Token usage ledger (V10)
4711CREATE TABLE IF NOT EXISTS token_usage (
4712 id INTEGER PRIMARY KEY AUTOINCREMENT,
4713 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4714 conversation_id INTEGER NOT NULL,
4715 agent_id INTEGER NOT NULL,
4716 workspace_id INTEGER,
4717 source_id TEXT NOT NULL DEFAULT 'local',
4718 timestamp_ms INTEGER NOT NULL,
4719 day_id INTEGER NOT NULL,
4720 model_name TEXT,
4721 model_family TEXT,
4722 model_tier TEXT,
4723 service_tier TEXT,
4724 provider TEXT,
4725 input_tokens INTEGER,
4726 output_tokens INTEGER,
4727 cache_read_tokens INTEGER,
4728 cache_creation_tokens INTEGER,
4729 thinking_tokens INTEGER,
4730 total_tokens INTEGER,
4731 estimated_cost_usd REAL,
4732 role TEXT NOT NULL,
4733 content_chars INTEGER NOT NULL,
4734 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4735 tool_call_count INTEGER NOT NULL DEFAULT 0,
4736 data_source TEXT NOT NULL DEFAULT 'api',
4737 UNIQUE(message_id)
4738);
4739
4740-- Token daily stats (V10)
4741CREATE TABLE IF NOT EXISTS token_daily_stats (
4742 day_id INTEGER NOT NULL,
4743 agent_slug TEXT NOT NULL,
4744 source_id TEXT NOT NULL DEFAULT 'all',
4745 model_family TEXT NOT NULL DEFAULT 'all',
4746 api_call_count INTEGER NOT NULL DEFAULT 0,
4747 user_message_count INTEGER NOT NULL DEFAULT 0,
4748 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4749 tool_message_count INTEGER NOT NULL DEFAULT 0,
4750 total_input_tokens INTEGER NOT NULL DEFAULT 0,
4751 total_output_tokens INTEGER NOT NULL DEFAULT 0,
4752 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
4753 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
4754 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
4755 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
4756 total_content_chars INTEGER NOT NULL DEFAULT 0,
4757 total_tool_calls INTEGER NOT NULL DEFAULT 0,
4758 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
4759 session_count INTEGER NOT NULL DEFAULT 0,
4760 last_updated INTEGER NOT NULL,
4761 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
4762);
4763
4764-- Model pricing (V10)
4765CREATE TABLE IF NOT EXISTS model_pricing (
4766 model_pattern TEXT NOT NULL,
4767 provider TEXT NOT NULL,
4768 input_cost_per_mtok REAL NOT NULL,
4769 output_cost_per_mtok REAL NOT NULL,
4770 cache_read_cost_per_mtok REAL,
4771 cache_creation_cost_per_mtok REAL,
4772 effective_date TEXT NOT NULL,
4773 PRIMARY KEY (model_pattern, effective_date)
4774);
4775
4776INSERT OR IGNORE INTO model_pricing VALUES
4777 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
4778 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
4779 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
4780 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
4781 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
4782 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4783 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4784 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
4785 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
4786 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
4787
4788-- Message metrics: V11 base + V12 model dimensions
4789CREATE TABLE IF NOT EXISTS message_metrics (
4790 message_id INTEGER PRIMARY KEY REFERENCES messages(id) ON DELETE CASCADE,
4791 created_at_ms INTEGER NOT NULL,
4792 hour_id INTEGER NOT NULL,
4793 day_id INTEGER NOT NULL,
4794 agent_slug TEXT NOT NULL,
4795 workspace_id INTEGER NOT NULL DEFAULT 0,
4796 source_id TEXT NOT NULL DEFAULT 'local',
4797 role TEXT NOT NULL,
4798 content_chars INTEGER NOT NULL,
4799 content_tokens_est INTEGER NOT NULL,
4800 api_input_tokens INTEGER,
4801 api_output_tokens INTEGER,
4802 api_cache_read_tokens INTEGER,
4803 api_cache_creation_tokens INTEGER,
4804 api_thinking_tokens INTEGER,
4805 api_service_tier TEXT,
4806 api_data_source TEXT NOT NULL DEFAULT 'estimated',
4807 tool_call_count INTEGER NOT NULL DEFAULT 0,
4808 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4809 has_plan INTEGER NOT NULL DEFAULT 0,
4810 model_name TEXT,
4811 model_family TEXT NOT NULL DEFAULT 'unknown',
4812 model_tier TEXT NOT NULL DEFAULT 'unknown',
4813 provider TEXT NOT NULL DEFAULT 'unknown'
4814);
4815
4816-- Hourly rollups: V11 base + V13 plan columns
4817CREATE TABLE IF NOT EXISTS usage_hourly (
4818 hour_id INTEGER NOT NULL,
4819 agent_slug TEXT NOT NULL,
4820 workspace_id INTEGER NOT NULL DEFAULT 0,
4821 source_id TEXT NOT NULL DEFAULT 'local',
4822 message_count INTEGER NOT NULL DEFAULT 0,
4823 user_message_count INTEGER NOT NULL DEFAULT 0,
4824 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4825 tool_call_count INTEGER NOT NULL DEFAULT 0,
4826 plan_message_count INTEGER NOT NULL DEFAULT 0,
4827 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4828 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4829 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4830 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4831 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4832 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4833 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4834 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4835 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4836 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4837 last_updated INTEGER NOT NULL DEFAULT 0,
4838 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4839 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
4840 PRIMARY KEY (hour_id, agent_slug, workspace_id, source_id)
4841);
4842
4843-- Daily rollups: V11 base + V13 plan columns
4844CREATE TABLE IF NOT EXISTS usage_daily (
4845 day_id INTEGER NOT NULL,
4846 agent_slug TEXT NOT NULL,
4847 workspace_id INTEGER NOT NULL DEFAULT 0,
4848 source_id TEXT NOT NULL DEFAULT 'local',
4849 message_count INTEGER NOT NULL DEFAULT 0,
4850 user_message_count INTEGER NOT NULL DEFAULT 0,
4851 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4852 tool_call_count INTEGER NOT NULL DEFAULT 0,
4853 plan_message_count INTEGER NOT NULL DEFAULT 0,
4854 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4855 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4856 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4857 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4858 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4859 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4860 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4861 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4862 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4863 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4864 last_updated INTEGER NOT NULL DEFAULT 0,
4865 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4866 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
4867 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id)
4868);
4869
4870-- Model daily rollups (V12)
4871CREATE TABLE IF NOT EXISTS usage_models_daily (
4872 day_id INTEGER NOT NULL,
4873 agent_slug TEXT NOT NULL,
4874 workspace_id INTEGER NOT NULL DEFAULT 0,
4875 source_id TEXT NOT NULL DEFAULT 'local',
4876 model_family TEXT NOT NULL DEFAULT 'unknown',
4877 model_tier TEXT NOT NULL DEFAULT 'unknown',
4878 message_count INTEGER NOT NULL DEFAULT 0,
4879 user_message_count INTEGER NOT NULL DEFAULT 0,
4880 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4881 tool_call_count INTEGER NOT NULL DEFAULT 0,
4882 plan_message_count INTEGER NOT NULL DEFAULT 0,
4883 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4884 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4885 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4886 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4887 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4888 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4889 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4890 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4891 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4892 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4893 last_updated INTEGER NOT NULL DEFAULT 0,
4894 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id, model_family, model_tier)
4895);
4896
4897-- All indexes
4898CREATE INDEX IF NOT EXISTS idx_conversations_agent_started ON conversations(agent_id, started_at DESC);
4899CREATE INDEX IF NOT EXISTS idx_conversations_source_id ON conversations(source_id);
4900CREATE INDEX IF NOT EXISTS idx_conversations_source_path ON conversations(source_path);
4901CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
4902CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
4903CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
4904CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
4905CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
4906CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
4907CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
4908CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
4909CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
4910CREATE INDEX IF NOT EXISTS idx_mm_hour ON message_metrics(hour_id);
4911CREATE INDEX IF NOT EXISTS idx_mm_day ON message_metrics(day_id);
4912CREATE INDEX IF NOT EXISTS idx_mm_agent_hour ON message_metrics(agent_slug, hour_id);
4913CREATE INDEX IF NOT EXISTS idx_mm_agent_day ON message_metrics(agent_slug, day_id);
4914CREATE INDEX IF NOT EXISTS idx_mm_workspace_hour ON message_metrics(workspace_id, hour_id);
4915CREATE INDEX IF NOT EXISTS idx_mm_source_hour ON message_metrics(source_id, hour_id);
4916CREATE INDEX IF NOT EXISTS idx_mm_model_family_day ON message_metrics(model_family, day_id);
4917CREATE INDEX IF NOT EXISTS idx_mm_provider_day ON message_metrics(provider, day_id);
4918CREATE INDEX IF NOT EXISTS idx_uh_agent ON usage_hourly(agent_slug, hour_id);
4919CREATE INDEX IF NOT EXISTS idx_uh_workspace ON usage_hourly(workspace_id, hour_id);
4920CREATE INDEX IF NOT EXISTS idx_uh_source ON usage_hourly(source_id, hour_id);
4921CREATE INDEX IF NOT EXISTS idx_ud_agent ON usage_daily(agent_slug, day_id);
4922CREATE INDEX IF NOT EXISTS idx_ud_workspace ON usage_daily(workspace_id, day_id);
4923CREATE INDEX IF NOT EXISTS idx_ud_source ON usage_daily(source_id, day_id);
4924CREATE INDEX IF NOT EXISTS idx_umd_model_day ON usage_models_daily(model_family, day_id);
4925CREATE INDEX IF NOT EXISTS idx_umd_agent_day ON usage_models_daily(agent_slug, day_id);
4926CREATE INDEX IF NOT EXISTS idx_umd_workspace_day ON usage_models_daily(workspace_id, day_id);
4927CREATE INDEX IF NOT EXISTS idx_umd_source_day ON usage_models_daily(source_id, day_id);
4928";
4929
4930#[derive(Clone, Copy)]
4931struct SchemaRepairBatch {
4932 name: &'static str,
4933 tables: &'static [&'static str],
4934 sql: &'static str,
4935}
4936
4937const CURRENT_SCHEMA_REPAIR_SOURCES_SQL: &str = r"
4938CREATE TABLE IF NOT EXISTS sources (
4939 id TEXT PRIMARY KEY,
4940 kind TEXT NOT NULL,
4941 host_label TEXT,
4942 machine_id TEXT,
4943 platform TEXT,
4944 config_json TEXT,
4945 created_at INTEGER NOT NULL,
4946 updated_at INTEGER NOT NULL
4947);
4948
4949INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
4950VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
4951";
4952
4953const CURRENT_SCHEMA_REPAIR_DAILY_STATS_SQL: &str = r"
4954CREATE TABLE IF NOT EXISTS daily_stats (
4955 day_id INTEGER NOT NULL,
4956 agent_slug TEXT NOT NULL,
4957 source_id TEXT NOT NULL DEFAULT 'all',
4958 session_count INTEGER NOT NULL DEFAULT 0,
4959 message_count INTEGER NOT NULL DEFAULT 0,
4960 total_chars INTEGER NOT NULL DEFAULT 0,
4961 last_updated INTEGER NOT NULL,
4962 PRIMARY KEY (day_id, agent_slug, source_id)
4963);
4964
4965CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
4966CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
4967";
4968
4969const CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_LOOKUP_SQL: &str = r"
4970CREATE TABLE IF NOT EXISTS conversation_external_lookup (
4971 lookup_key TEXT PRIMARY KEY,
4972 conversation_id INTEGER NOT NULL
4973);
4974
4975INSERT OR REPLACE INTO conversation_external_lookup (lookup_key, conversation_id)
4976SELECT
4977 CAST(length(source_id) AS TEXT) || ':' || source_id || ':' ||
4978 CAST(agent_id AS TEXT) || ':' ||
4979 CAST(length(external_id) AS TEXT) || ':' || external_id,
4980 id
4981FROM conversations
4982WHERE external_id IS NOT NULL;
4983";
4984
4985const CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_TAIL_LOOKUP_SQL: &str = r"
4986CREATE TABLE IF NOT EXISTS conversation_tail_state (
4987 conversation_id INTEGER PRIMARY KEY,
4988 ended_at INTEGER,
4989 last_message_idx INTEGER,
4990 last_message_created_at INTEGER
4991);
4992
4993CREATE TABLE IF NOT EXISTS conversation_external_tail_lookup (
4994 lookup_key TEXT PRIMARY KEY,
4995 conversation_id INTEGER NOT NULL,
4996 ended_at INTEGER,
4997 last_message_idx INTEGER,
4998 last_message_created_at INTEGER
4999);
5000
5001INSERT OR REPLACE INTO conversation_external_tail_lookup (
5002 lookup_key,
5003 conversation_id,
5004 ended_at,
5005 last_message_idx,
5006 last_message_created_at
5007)
5008SELECT
5009 CAST(length(c.source_id) AS TEXT) || ':' || c.source_id || ':' ||
5010 CAST(c.agent_id AS TEXT) || ':' ||
5011 CAST(length(c.external_id) AS TEXT) || ':' || c.external_id,
5012 c.id,
5013 ts.ended_at,
5014 ts.last_message_idx,
5015 ts.last_message_created_at
5016FROM conversations c
5017LEFT JOIN conversation_tail_state ts ON ts.conversation_id = c.id
5018WHERE c.external_id IS NOT NULL;
5019";
5020
5021const CURRENT_SCHEMA_REPAIR_EMBEDDING_JOBS_SQL: &str = r"
5022CREATE TABLE IF NOT EXISTS embedding_jobs (
5023 id INTEGER PRIMARY KEY AUTOINCREMENT,
5024 db_path TEXT NOT NULL,
5025 model_id TEXT NOT NULL,
5026 status TEXT NOT NULL DEFAULT 'pending',
5027 total_docs INTEGER NOT NULL DEFAULT 0,
5028 completed_docs INTEGER NOT NULL DEFAULT 0,
5029 error_message TEXT,
5030 created_at TEXT NOT NULL DEFAULT (datetime('now')),
5031 started_at TEXT,
5032 completed_at TEXT
5033);
5034
5035CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
5036ON embedding_jobs(db_path, model_id)
5037WHERE status IN ('pending', 'running');
5038";
5039
5040const CURRENT_SCHEMA_REPAIR_TOKEN_ANALYTICS_SQL: &str = r"
5041CREATE TABLE IF NOT EXISTS token_usage (
5042 id INTEGER PRIMARY KEY AUTOINCREMENT,
5043 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
5044 conversation_id INTEGER NOT NULL,
5045 agent_id INTEGER NOT NULL,
5046 workspace_id INTEGER,
5047 source_id TEXT NOT NULL DEFAULT 'local',
5048 timestamp_ms INTEGER NOT NULL,
5049 day_id INTEGER NOT NULL,
5050 model_name TEXT,
5051 model_family TEXT,
5052 model_tier TEXT,
5053 service_tier TEXT,
5054 provider TEXT,
5055 input_tokens INTEGER,
5056 output_tokens INTEGER,
5057 cache_read_tokens INTEGER,
5058 cache_creation_tokens INTEGER,
5059 thinking_tokens INTEGER,
5060 total_tokens INTEGER,
5061 estimated_cost_usd REAL,
5062 role TEXT NOT NULL,
5063 content_chars INTEGER NOT NULL,
5064 has_tool_calls INTEGER NOT NULL DEFAULT 0,
5065 tool_call_count INTEGER NOT NULL DEFAULT 0,
5066 data_source TEXT NOT NULL DEFAULT 'api',
5067 UNIQUE(message_id)
5068);
5069
5070CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
5071CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
5072CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
5073CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
5074CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
5075
5076CREATE TABLE IF NOT EXISTS token_daily_stats (
5077 day_id INTEGER NOT NULL,
5078 agent_slug TEXT NOT NULL,
5079 source_id TEXT NOT NULL DEFAULT 'all',
5080 model_family TEXT NOT NULL DEFAULT 'all',
5081 api_call_count INTEGER NOT NULL DEFAULT 0,
5082 user_message_count INTEGER NOT NULL DEFAULT 0,
5083 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5084 tool_message_count INTEGER NOT NULL DEFAULT 0,
5085 total_input_tokens INTEGER NOT NULL DEFAULT 0,
5086 total_output_tokens INTEGER NOT NULL DEFAULT 0,
5087 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
5088 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
5089 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
5090 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
5091 total_content_chars INTEGER NOT NULL DEFAULT 0,
5092 total_tool_calls INTEGER NOT NULL DEFAULT 0,
5093 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
5094 session_count INTEGER NOT NULL DEFAULT 0,
5095 last_updated INTEGER NOT NULL,
5096 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
5097);
5098
5099CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
5100CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
5101
5102CREATE TABLE IF NOT EXISTS model_pricing (
5103 model_pattern TEXT NOT NULL,
5104 provider TEXT NOT NULL,
5105 input_cost_per_mtok REAL NOT NULL,
5106 output_cost_per_mtok REAL NOT NULL,
5107 cache_read_cost_per_mtok REAL,
5108 cache_creation_cost_per_mtok REAL,
5109 effective_date TEXT NOT NULL,
5110 PRIMARY KEY (model_pattern, effective_date)
5111);
5112
5113INSERT OR IGNORE INTO model_pricing VALUES
5114 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
5115 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
5116 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
5117 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
5118 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
5119 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
5120 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
5121 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
5122 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
5123 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
5124";
5125
5126const CURRENT_SCHEMA_REPAIR_MESSAGE_METRICS_SQL: &str = r"
5127CREATE TABLE IF NOT EXISTS message_metrics (
5128 message_id INTEGER PRIMARY KEY REFERENCES messages(id) ON DELETE CASCADE,
5129 created_at_ms INTEGER NOT NULL,
5130 hour_id INTEGER NOT NULL,
5131 day_id INTEGER NOT NULL,
5132 agent_slug TEXT NOT NULL,
5133 workspace_id INTEGER NOT NULL DEFAULT 0,
5134 source_id TEXT NOT NULL DEFAULT 'local',
5135 role TEXT NOT NULL,
5136 content_chars INTEGER NOT NULL,
5137 content_tokens_est INTEGER NOT NULL,
5138 api_input_tokens INTEGER,
5139 api_output_tokens INTEGER,
5140 api_cache_read_tokens INTEGER,
5141 api_cache_creation_tokens INTEGER,
5142 api_thinking_tokens INTEGER,
5143 api_service_tier TEXT,
5144 api_data_source TEXT NOT NULL DEFAULT 'estimated',
5145 tool_call_count INTEGER NOT NULL DEFAULT 0,
5146 has_tool_calls INTEGER NOT NULL DEFAULT 0,
5147 has_plan INTEGER NOT NULL DEFAULT 0,
5148 model_name TEXT,
5149 model_family TEXT NOT NULL DEFAULT 'unknown',
5150 model_tier TEXT NOT NULL DEFAULT 'unknown',
5151 provider TEXT NOT NULL DEFAULT 'unknown'
5152);
5153
5154CREATE TABLE IF NOT EXISTS usage_hourly (
5155 hour_id INTEGER NOT NULL,
5156 agent_slug TEXT NOT NULL,
5157 workspace_id INTEGER NOT NULL DEFAULT 0,
5158 source_id TEXT NOT NULL DEFAULT 'local',
5159 message_count INTEGER NOT NULL DEFAULT 0,
5160 user_message_count INTEGER NOT NULL DEFAULT 0,
5161 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5162 tool_call_count INTEGER NOT NULL DEFAULT 0,
5163 plan_message_count INTEGER NOT NULL DEFAULT 0,
5164 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5165 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5166 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5167 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5168 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5169 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5170 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5171 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5172 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5173 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5174 last_updated INTEGER NOT NULL DEFAULT 0,
5175 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5176 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
5177 PRIMARY KEY (hour_id, agent_slug, workspace_id, source_id)
5178);
5179
5180CREATE TABLE IF NOT EXISTS usage_daily (
5181 day_id INTEGER NOT NULL,
5182 agent_slug TEXT NOT NULL,
5183 workspace_id INTEGER NOT NULL DEFAULT 0,
5184 source_id TEXT NOT NULL DEFAULT 'local',
5185 message_count INTEGER NOT NULL DEFAULT 0,
5186 user_message_count INTEGER NOT NULL DEFAULT 0,
5187 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5188 tool_call_count INTEGER NOT NULL DEFAULT 0,
5189 plan_message_count INTEGER NOT NULL DEFAULT 0,
5190 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5191 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5192 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5193 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5194 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5195 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5196 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5197 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5198 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5199 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5200 last_updated INTEGER NOT NULL DEFAULT 0,
5201 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5202 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
5203 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id)
5204);
5205
5206CREATE TABLE IF NOT EXISTS usage_models_daily (
5207 day_id INTEGER NOT NULL,
5208 agent_slug TEXT NOT NULL,
5209 workspace_id INTEGER NOT NULL DEFAULT 0,
5210 source_id TEXT NOT NULL DEFAULT 'local',
5211 model_family TEXT NOT NULL DEFAULT 'unknown',
5212 model_tier TEXT NOT NULL DEFAULT 'unknown',
5213 message_count INTEGER NOT NULL DEFAULT 0,
5214 user_message_count INTEGER NOT NULL DEFAULT 0,
5215 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5216 tool_call_count INTEGER NOT NULL DEFAULT 0,
5217 plan_message_count INTEGER NOT NULL DEFAULT 0,
5218 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5219 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5220 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5221 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5222 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5223 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5224 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5225 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5226 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5227 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5228 last_updated INTEGER NOT NULL DEFAULT 0,
5229 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id, model_family, model_tier)
5230);
5231
5232CREATE INDEX IF NOT EXISTS idx_mm_hour ON message_metrics(hour_id);
5233CREATE INDEX IF NOT EXISTS idx_mm_day ON message_metrics(day_id);
5234CREATE INDEX IF NOT EXISTS idx_mm_agent_hour ON message_metrics(agent_slug, hour_id);
5235CREATE INDEX IF NOT EXISTS idx_mm_agent_day ON message_metrics(agent_slug, day_id);
5236CREATE INDEX IF NOT EXISTS idx_mm_workspace_hour ON message_metrics(workspace_id, hour_id);
5237CREATE INDEX IF NOT EXISTS idx_mm_source_hour ON message_metrics(source_id, hour_id);
5238CREATE INDEX IF NOT EXISTS idx_mm_model_family_day ON message_metrics(model_family, day_id);
5239CREATE INDEX IF NOT EXISTS idx_mm_provider_day ON message_metrics(provider, day_id);
5240CREATE INDEX IF NOT EXISTS idx_uh_agent ON usage_hourly(agent_slug, hour_id);
5241CREATE INDEX IF NOT EXISTS idx_uh_workspace ON usage_hourly(workspace_id, hour_id);
5242CREATE INDEX IF NOT EXISTS idx_uh_source ON usage_hourly(source_id, hour_id);
5243CREATE INDEX IF NOT EXISTS idx_ud_agent ON usage_daily(agent_slug, day_id);
5244CREATE INDEX IF NOT EXISTS idx_ud_workspace ON usage_daily(workspace_id, day_id);
5245CREATE INDEX IF NOT EXISTS idx_ud_source ON usage_daily(source_id, day_id);
5246CREATE INDEX IF NOT EXISTS idx_umd_model_day ON usage_models_daily(model_family, day_id);
5247CREATE INDEX IF NOT EXISTS idx_umd_agent_day ON usage_models_daily(agent_slug, day_id);
5248CREATE INDEX IF NOT EXISTS idx_umd_workspace_day ON usage_models_daily(workspace_id, day_id);
5249CREATE INDEX IF NOT EXISTS idx_umd_source_day ON usage_models_daily(source_id, day_id);
5250";
5251
5252const CURRENT_SCHEMA_REPAIR_BATCHES: &[SchemaRepairBatch] = &[
5253 SchemaRepairBatch {
5254 name: "sources",
5255 tables: &["sources"],
5256 sql: CURRENT_SCHEMA_REPAIR_SOURCES_SQL,
5257 },
5258 SchemaRepairBatch {
5259 name: "daily_stats",
5260 tables: &["daily_stats"],
5261 sql: CURRENT_SCHEMA_REPAIR_DAILY_STATS_SQL,
5262 },
5263 SchemaRepairBatch {
5264 name: "conversation_external_lookup",
5265 tables: &["conversation_external_lookup"],
5266 sql: CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_LOOKUP_SQL,
5267 },
5268 SchemaRepairBatch {
5269 name: "conversation_external_tail_lookup",
5270 tables: &[
5271 "conversation_tail_state",
5272 "conversation_external_tail_lookup",
5273 ],
5274 sql: CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_TAIL_LOOKUP_SQL,
5275 },
5276 SchemaRepairBatch {
5277 name: "embedding_jobs",
5278 tables: &["embedding_jobs"],
5279 sql: CURRENT_SCHEMA_REPAIR_EMBEDDING_JOBS_SQL,
5280 },
5281 SchemaRepairBatch {
5282 name: "token_analytics",
5283 tables: &["token_usage", "token_daily_stats", "model_pricing"],
5284 sql: CURRENT_SCHEMA_REPAIR_TOKEN_ANALYTICS_SQL,
5285 },
5286 SchemaRepairBatch {
5287 name: "message_rollups",
5288 tables: &[
5289 "message_metrics",
5290 "usage_hourly",
5291 "usage_daily",
5292 "usage_models_daily",
5293 ],
5294 sql: CURRENT_SCHEMA_REPAIR_MESSAGE_METRICS_SQL,
5295 },
5296];
5297
5298fn current_schema_repair_batches_for_missing_tables(
5299 missing_tables: &[&'static str],
5300) -> Result<Vec<&'static SchemaRepairBatch>> {
5301 let missing_set: HashSet<&'static str> = missing_tables.iter().copied().collect();
5302 let mut selected_batches = Vec::new();
5303 let mut covered_tables = HashSet::new();
5304
5305 for batch in CURRENT_SCHEMA_REPAIR_BATCHES {
5306 if !batch
5307 .tables
5308 .iter()
5309 .any(|table_name| missing_set.contains(table_name))
5310 {
5311 continue;
5312 }
5313 selected_batches.push(batch);
5314 covered_tables.extend(batch.tables.iter().copied());
5315 }
5316
5317 for &table_name in missing_tables {
5318 if !covered_tables.contains(table_name) {
5319 return Err(anyhow!(
5320 "no current-schema repair batch registered for missing table {table_name}"
5321 ));
5322 }
5323 }
5324
5325 Ok(selected_batches)
5326}
5327
5328const MIGRATION_NAMES: [(i64, &str); 20] = [
5330 (1, "core_tables"),
5331 (2, "fts_messages"),
5332 (3, "fts_messages_rebuild"),
5333 (4, "sources"),
5334 (5, "provenance_columns"),
5335 (6, "source_path_index"),
5336 (7, "msgpack_columns"),
5337 (8, "daily_stats"),
5338 (9, "embedding_jobs"),
5339 (10, "token_analytics"),
5340 (11, "message_metrics"),
5341 (12, "model_dimensions"),
5342 (13, "plan_token_rollups"),
5343 (14, "fts_contentless"),
5344 (15, "conversation_tail_state_cache"),
5345 (16, "drop_redundant_message_conv_idx"),
5346 (17, "drop_message_created_idx"),
5347 (18, "conversation_tail_state_hot_table"),
5348 (19, "conversation_external_lookup"),
5349 (20, "conversation_external_tail_lookup"),
5350];
5351
5352fn transition_from_meta_version(conn: &FrankenConnection) -> Result<()> {
5371 if conn
5375 .query("SELECT version FROM \"_schema_migrations\";")
5376 .is_ok()
5377 {
5378 return Ok(());
5379 }
5380
5381 if conn.query("SELECT key FROM meta;").is_err() {
5383 return Ok(());
5385 }
5386
5387 let rows = conn
5389 .query("SELECT value FROM meta WHERE key = 'schema_version';")
5390 .with_context(|| "reading schema_version from meta")?;
5391
5392 let current_version: i64 = rows
5393 .first()
5394 .and_then(|row| row.get_typed::<String>(0).ok())
5395 .and_then(|s| s.parse().ok())
5396 .unwrap_or(0);
5397
5398 if current_version == 0 {
5399 if conn.query("SELECT id FROM conversations LIMIT 1;").is_err() {
5401 return Ok(());
5403 }
5404
5405 info!("meta.schema_version=0 but tables exist; skipping transition (corrupted state)");
5408 return Ok(());
5409 }
5410
5411 info!(
5413 current_version,
5414 "transitioning schema tracking from meta table to _schema_migrations"
5415 );
5416
5417 conn.execute(
5418 "CREATE TABLE IF NOT EXISTS _schema_migrations (\
5419 version INTEGER PRIMARY KEY, \
5420 name TEXT NOT NULL, \
5421 applied_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))\
5422 );",
5423 )
5424 .with_context(|| "creating _schema_migrations table for transition")?;
5425
5426 let backfill_through_version = if (10..13).contains(¤t_version) {
5427 13
5428 } else {
5429 current_version
5430 };
5431
5432 for &(version, name) in &MIGRATION_NAMES {
5433 if version > backfill_through_version {
5434 break;
5435 }
5436 conn.execute_compat(
5437 "INSERT INTO _schema_migrations (version, name) VALUES (?1, ?2);",
5438 &[ParamValue::from(version), ParamValue::from(name)],
5439 )
5440 .with_context(|| format!("backfilling _schema_migrations version {version}"))?;
5441 }
5442
5443 info!(
5444 current_version,
5445 backfill_through_version,
5446 "schema version transition complete: backfilled legacy meta schema versions"
5447 );
5448
5449 Ok(())
5450}
5451
5452const REQUIRED_CURRENT_SCHEMA_TABLE_PROBES: &[(&str, &str)] = &[
5453 ("sources", "SELECT id FROM sources LIMIT 1;"),
5454 ("daily_stats", "SELECT day_id FROM daily_stats LIMIT 1;"),
5455 (
5456 "conversation_external_lookup",
5457 "SELECT lookup_key FROM conversation_external_lookup LIMIT 1;",
5458 ),
5459 (
5460 "conversation_tail_state",
5461 "SELECT conversation_id FROM conversation_tail_state LIMIT 1;",
5462 ),
5463 (
5464 "conversation_external_tail_lookup",
5465 "SELECT lookup_key, last_message_idx FROM conversation_external_tail_lookup LIMIT 1;",
5466 ),
5467 ("embedding_jobs", "SELECT id FROM embedding_jobs LIMIT 1;"),
5468 ("token_usage", "SELECT id FROM token_usage LIMIT 1;"),
5469 (
5470 "token_daily_stats",
5471 "SELECT day_id FROM token_daily_stats LIMIT 1;",
5472 ),
5473 (
5474 "model_pricing",
5475 "SELECT model_pattern FROM model_pricing LIMIT 1;",
5476 ),
5477 (
5478 "message_metrics",
5479 "SELECT message_id FROM message_metrics LIMIT 1;",
5480 ),
5481 ("usage_hourly", "SELECT hour_id FROM usage_hourly LIMIT 1;"),
5482 ("usage_daily", "SELECT day_id FROM usage_daily LIMIT 1;"),
5483 (
5484 "usage_models_daily",
5485 "SELECT day_id FROM usage_models_daily LIMIT 1;",
5486 ),
5487];
5488
5489const REQUIRED_CONVERSATION_TOKEN_COLUMNS: &[(&str, &str)] = &[
5490 ("total_input_tokens", "INTEGER"),
5491 ("total_output_tokens", "INTEGER"),
5492 ("total_cache_read_tokens", "INTEGER"),
5493 ("total_cache_creation_tokens", "INTEGER"),
5494 ("grand_total_tokens", "INTEGER"),
5495 ("estimated_cost_usd", "REAL"),
5496 ("primary_model", "TEXT"),
5497 ("api_call_count", "INTEGER"),
5498 ("tool_call_count", "INTEGER"),
5499 ("user_message_count", "INTEGER"),
5500 ("assistant_message_count", "INTEGER"),
5501];
5502
5503fn error_indicates_missing_table(err: &impl std::fmt::Display) -> bool {
5504 err.to_string()
5505 .to_ascii_lowercase()
5506 .contains("no such table")
5507}
5508
5509fn error_indicates_missing_column(err: &impl std::fmt::Display) -> bool {
5510 err.to_string()
5511 .to_ascii_lowercase()
5512 .contains("no such column")
5513}
5514
5515const ORPHAN_FK_ID_CHUNK_SIZE: usize = 256;
5516
5517fn collect_orphan_message_ids(conn: &FrankenConnection) -> Result<Vec<i64>> {
5518 let min_conversation_id = conn
5519 .query_map_collect(
5520 "SELECT conversation_id
5521 FROM messages
5522 ORDER BY conversation_id ASC
5523 LIMIT 1",
5524 fparams![],
5525 |row| row.get_typed(0),
5526 )
5527 .context("finding minimum message conversation id for orphan FK cleanup")?
5528 .into_iter()
5529 .next();
5530 let Some(min_conversation_id) = min_conversation_id else {
5531 return Ok(Vec::new());
5532 };
5533 let max_conversation_id: i64 = conn
5534 .query_row_map(
5535 "SELECT conversation_id
5536 FROM messages
5537 ORDER BY conversation_id DESC
5538 LIMIT 1",
5539 fparams![],
5540 |row| row.get_typed(0),
5541 )
5542 .context("finding maximum message conversation id for orphan FK cleanup")?;
5543
5544 let parent_conversation_ids: Vec<i64> = conn
5545 .query_map_collect(
5546 "SELECT id
5547 FROM conversations
5548 WHERE id BETWEEN ?1 AND ?2
5549 ORDER BY id",
5550 fparams![min_conversation_id, max_conversation_id],
5551 |row| row.get_typed(0),
5552 )
5553 .context("listing parent conversation ids for orphan FK cleanup")?;
5554
5555 let mut message_ids = Vec::new();
5556 let mut gap_start = min_conversation_id;
5557 for parent_id in parent_conversation_ids {
5558 if parent_id < gap_start {
5559 continue;
5560 }
5561 if parent_id > max_conversation_id {
5562 break;
5563 }
5564 if gap_start < parent_id {
5565 collect_message_ids_for_conversation_gap(
5566 conn,
5567 gap_start,
5568 parent_id.saturating_sub(1),
5569 &mut message_ids,
5570 )?;
5571 }
5572 if parent_id == i64::MAX {
5573 return Ok(message_ids);
5574 }
5575 gap_start = parent_id + 1;
5576 }
5577 if gap_start <= max_conversation_id {
5578 collect_message_ids_for_conversation_gap(
5579 conn,
5580 gap_start,
5581 max_conversation_id,
5582 &mut message_ids,
5583 )?;
5584 }
5585
5586 Ok(message_ids)
5587}
5588
5589fn collect_message_ids_for_conversation_gap(
5590 conn: &FrankenConnection,
5591 gap_start: i64,
5592 gap_end: i64,
5593 message_ids: &mut Vec<i64>,
5594) -> Result<()> {
5595 let (sql, params) = if gap_start == gap_end {
5596 (
5597 "SELECT id FROM messages WHERE conversation_id = ?1",
5598 vec![SqliteValue::from(gap_start)],
5599 )
5600 } else {
5601 (
5602 "SELECT id FROM messages WHERE conversation_id BETWEEN ?1 AND ?2",
5603 vec![SqliteValue::from(gap_start), SqliteValue::from(gap_end)],
5604 )
5605 };
5606 let rows = conn.query_with_params(sql, ¶ms).with_context(|| {
5607 format!("listing orphan message ids for conversation-id gap {gap_start}..={gap_end}")
5608 })?;
5609 message_ids.reserve(rows.len());
5610 for row in rows {
5611 message_ids.push(row.get_typed(0)?);
5612 }
5613 Ok(())
5614}
5615
5616fn delete_rows_by_i64_chunks(
5617 tx: &FrankenTransaction<'_>,
5618 delete_many_sql_prefix: &'static str,
5619 ids: &[i64],
5620) -> Result<usize> {
5621 if ids.is_empty() {
5622 return Ok(0);
5623 }
5624
5625 let full_chunk_sql = delete_rows_by_i64_sql(delete_many_sql_prefix, ORPHAN_FK_ID_CHUNK_SIZE);
5626 let tail_len = ids.len() % ORPHAN_FK_ID_CHUNK_SIZE;
5627 let tail_sql =
5628 (tail_len != 0).then(|| delete_rows_by_i64_sql(delete_many_sql_prefix, tail_len));
5629
5630 let mut deleted = 0;
5631 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5632 let sql = if chunk.len() == ORPHAN_FK_ID_CHUNK_SIZE {
5633 &full_chunk_sql
5634 } else {
5635 tail_sql.as_ref().unwrap_or(&full_chunk_sql)
5636 };
5637 let params = chunk
5638 .iter()
5639 .map(|id| SqliteValue::from(*id))
5640 .collect::<Vec<_>>();
5641 deleted += tx.execute_with_params(sql, ¶ms)?;
5642 }
5643 Ok(deleted)
5644}
5645
5646fn delete_rows_by_i64_sql(delete_many_sql_prefix: &'static str, count: usize) -> String {
5647 let placeholders = sql_placeholders(count);
5648 format!("{delete_many_sql_prefix} ({placeholders})")
5649}
5650
5651fn sql_placeholders(count: usize) -> String {
5652 vec!["?"; count].join(", ")
5653}
5654
5655fn delete_orphan_message_ids_bisecting_oom(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5656 let mut deleted = 0usize;
5657 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5658 deleted = deleted.saturating_add(delete_orphan_message_id_chunk(conn, chunk)?);
5659 }
5660 Ok(deleted)
5661}
5662
5663fn delete_orphan_message_id_chunk(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5664 if ids.is_empty() {
5665 return Ok(0);
5666 }
5667
5668 match delete_orphan_message_id_chunk_once(conn, ids) {
5669 Ok(deleted) => Ok(deleted),
5670 Err(err) if is_out_of_memory_error(&err) && ids.len() > 1 => {
5671 let split_at = ids.len() / 2;
5672 tracing::warn!(
5673 target: "cass::fk_repair",
5674 rows = ids.len(),
5675 left = split_at,
5676 right = ids.len().saturating_sub(split_at),
5677 error = %err,
5678 "orphan-message cleanup ran out of memory; retrying as smaller batches"
5679 );
5680 let left = delete_orphan_message_id_chunk(conn, &ids[..split_at])?;
5681 let right = delete_orphan_message_id_chunk(conn, &ids[split_at..])?;
5682 Ok(left.saturating_add(right))
5683 }
5684 Err(err) => Err(err),
5685 }
5686}
5687
5688fn delete_orphan_message_id_chunk_once(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5689 let mut tx = conn.transaction()?;
5690 let mut deleted = 0usize;
5691 for entry in ORPHAN_MESSAGE_DEPENDENT_TABLES {
5692 match delete_rows_by_i64_chunks(&tx, entry.delete_many_sql_prefix, ids) {
5693 Ok(count) => {
5694 deleted = deleted.saturating_add(count);
5695 }
5696 Err(err) if error_indicates_missing_table(&err) => {
5697 tracing::debug!(
5698 target: "cass::fk_repair",
5699 child_table = entry.child_table,
5700 error = %err,
5701 "skipping orphan-message dependent cleanup (table unavailable)"
5702 );
5703 }
5704 Err(err) => {
5705 return Err(err).with_context(|| {
5706 format!(
5707 "deleting rows from {} that depend on orphan messages",
5708 entry.child_table
5709 )
5710 });
5711 }
5712 }
5713 }
5714 deleted = deleted.saturating_add(
5715 delete_rows_by_i64_chunks(&tx, "DELETE FROM messages WHERE id IN", ids)
5716 .context("deleting orphan rows from messages")?,
5717 );
5718 tx.commit()?;
5719 Ok(deleted)
5720}
5721
5722fn collect_direct_orphan_id_page(
5723 conn: &FrankenConnection,
5724 entry: &'static OrphanFkTable,
5725) -> Result<Vec<i64>> {
5726 Ok(conn.query_map_collect(
5727 entry.orphan_id_page_sql,
5728 fparams![i64::try_from(ORPHAN_FK_ID_CHUNK_SIZE).unwrap_or(i64::MAX)],
5729 |row| row.get_typed(0),
5730 )?)
5731}
5732
5733fn delete_direct_orphan_ids_bisecting_oom(
5734 conn: &FrankenConnection,
5735 entry: &'static OrphanFkTable,
5736 ids: &[i64],
5737) -> Result<usize> {
5738 let mut deleted = 0usize;
5739 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5740 deleted = deleted.saturating_add(delete_direct_orphan_id_chunk(conn, entry, chunk)?);
5741 }
5742 Ok(deleted)
5743}
5744
5745fn delete_direct_orphan_id_chunk(
5746 conn: &FrankenConnection,
5747 entry: &'static OrphanFkTable,
5748 ids: &[i64],
5749) -> Result<usize> {
5750 if ids.is_empty() {
5751 return Ok(0);
5752 }
5753
5754 match delete_direct_orphan_id_chunk_once(conn, entry, ids) {
5755 Ok(deleted) => Ok(deleted),
5756 Err(err) if is_out_of_memory_error(&err) && ids.len() > 1 => {
5757 let split_at = ids.len() / 2;
5758 tracing::warn!(
5759 target: "cass::fk_repair",
5760 child_table = entry.child_table,
5761 rows = ids.len(),
5762 left = split_at,
5763 right = ids.len().saturating_sub(split_at),
5764 error = %err,
5765 "direct orphan cleanup ran out of memory; retrying as smaller batches"
5766 );
5767 let left = delete_direct_orphan_id_chunk(conn, entry, &ids[..split_at])?;
5768 let right = delete_direct_orphan_id_chunk(conn, entry, &ids[split_at..])?;
5769 Ok(left.saturating_add(right))
5770 }
5771 Err(err) => Err(err),
5772 }
5773}
5774
5775fn delete_direct_orphan_id_chunk_once(
5776 conn: &FrankenConnection,
5777 entry: &'static OrphanFkTable,
5778 ids: &[i64],
5779) -> Result<usize> {
5780 let mut tx = conn.transaction()?;
5781 let deleted = delete_rows_by_i64_chunks(&tx, entry.delete_many_sql_prefix, ids)?;
5782 tx.commit()?;
5783 Ok(deleted)
5784}
5785
5786struct OrphanFkTable {
5792 child_table: &'static str,
5793 orphan_id_page_sql: &'static str,
5794 delete_many_sql_prefix: &'static str,
5795}
5796
5797const ORPHAN_DIRECT_CHILD_TABLES: &[OrphanFkTable] = &[
5798 OrphanFkTable {
5799 child_table: "message_metrics",
5800 orphan_id_page_sql: "SELECT message_id FROM message_metrics \
5801 WHERE NOT EXISTS (\
5802 SELECT 1 FROM messages \
5803 WHERE messages.id = message_metrics.message_id\
5804 ) \
5805 ORDER BY message_id \
5806 LIMIT ?1",
5807 delete_many_sql_prefix: "DELETE FROM message_metrics WHERE message_id IN",
5808 },
5809 OrphanFkTable {
5810 child_table: "token_usage",
5811 orphan_id_page_sql: "SELECT message_id FROM token_usage \
5812 WHERE NOT EXISTS (\
5813 SELECT 1 FROM messages \
5814 WHERE messages.id = token_usage.message_id\
5815 ) \
5816 ORDER BY message_id \
5817 LIMIT ?1",
5818 delete_many_sql_prefix: "DELETE FROM token_usage WHERE message_id IN",
5819 },
5820 OrphanFkTable {
5821 child_table: "snippets",
5822 orphan_id_page_sql: "SELECT message_id FROM snippets \
5823 WHERE NOT EXISTS (\
5824 SELECT 1 FROM messages \
5825 WHERE messages.id = snippets.message_id\
5826 ) \
5827 ORDER BY message_id \
5828 LIMIT ?1",
5829 delete_many_sql_prefix: "DELETE FROM snippets WHERE message_id IN",
5830 },
5831 OrphanFkTable {
5832 child_table: "conversation_tags",
5833 orphan_id_page_sql: "SELECT conversation_id FROM conversation_tags \
5834 WHERE NOT EXISTS (\
5835 SELECT 1 FROM conversations \
5836 WHERE conversations.id = conversation_tags.conversation_id\
5837 ) \
5838 ORDER BY conversation_id \
5839 LIMIT ?1",
5840 delete_many_sql_prefix: "DELETE FROM conversation_tags WHERE conversation_id IN",
5841 },
5842];
5843
5844struct OrphanMessageDependentTable {
5845 child_table: &'static str,
5846 delete_many_sql_prefix: &'static str,
5847}
5848
5849const ORPHAN_MESSAGE_DEPENDENT_TABLES: &[OrphanMessageDependentTable] = &[
5850 OrphanMessageDependentTable {
5851 child_table: "message_metrics",
5852 delete_many_sql_prefix: "DELETE FROM message_metrics WHERE message_id IN",
5853 },
5854 OrphanMessageDependentTable {
5855 child_table: "token_usage",
5856 delete_many_sql_prefix: "DELETE FROM token_usage WHERE message_id IN",
5857 },
5858 OrphanMessageDependentTable {
5859 child_table: "snippets",
5860 delete_many_sql_prefix: "DELETE FROM snippets WHERE message_id IN",
5861 },
5862];
5863
5864#[derive(Debug, Default, Clone)]
5875pub(crate) struct OrphanFkCleanupReport {
5876 pub total: i64,
5877 pub per_table: Vec<(&'static str, i64)>,
5878}
5879
5880impl OrphanFkCleanupReport {
5881 fn record(&mut self, child_table: &'static str, count: i64) {
5882 if let Some((_, existing)) = self
5883 .per_table
5884 .iter_mut()
5885 .find(|(table, _)| *table == child_table)
5886 {
5887 *existing = existing.saturating_add(count);
5888 } else {
5889 self.per_table.push((child_table, count));
5890 }
5891 self.total = self.total.saturating_add(count);
5892 }
5893}
5894
5895pub struct InsertOutcome {
5896 pub conversation_id: i64,
5897 pub conversation_inserted: bool,
5898 pub inserted_indices: Vec<i64>,
5899}
5900
5901#[cfg(test)]
5902#[derive(Debug, Clone, Default)]
5903struct MessageInsertSubstageProfile {
5904 single_row_calls: usize,
5905 batch_calls: usize,
5906 batch_rows: usize,
5907 payload_duration: Duration,
5908 sql_build_duration: Duration,
5909 param_build_duration: Duration,
5910 execute_duration: Duration,
5911 rowid_duration: Duration,
5912}
5913
5914#[cfg(test)]
5915#[derive(Debug, Clone, Default)]
5916struct InsertConversationTreePerfProfile {
5917 invocations: usize,
5918 messages: usize,
5919 inserted_messages: usize,
5920 total_duration: Duration,
5921 source_duration: Duration,
5922 tx_open_duration: Duration,
5923 existing_lookup_duration: Duration,
5924 existing_idx_lookup_duration: Duration,
5925 existing_replay_lookup_duration: Duration,
5926 dedupe_filter_duration: Duration,
5927 conversation_row_duration: Duration,
5928 message_insert_duration: Duration,
5929 message_insert_breakdown: MessageInsertSubstageProfile,
5930 snippet_insert_duration: Duration,
5931 fts_entry_duration: Duration,
5932 fts_flush_duration: Duration,
5933 analytics_duration: Duration,
5934 commit_duration: Duration,
5935}
5936
5937#[cfg(test)]
5938impl InsertConversationTreePerfProfile {
5939 fn millis(duration: Duration) -> f64 {
5940 duration.as_secs_f64() * 1000.0
5941 }
5942
5943 fn log_summary(&self, label: &str) {
5944 let calls = self.invocations.max(1) as f64;
5945 let accounted_duration = self.source_duration
5946 + self.tx_open_duration
5947 + self.existing_lookup_duration
5948 + self.existing_idx_lookup_duration
5949 + self.existing_replay_lookup_duration
5950 + self.dedupe_filter_duration
5951 + self.conversation_row_duration
5952 + self.message_insert_duration
5953 + self.snippet_insert_duration
5954 + self.fts_entry_duration
5955 + self.fts_flush_duration
5956 + self.analytics_duration
5957 + self.commit_duration;
5958 let residual_duration = self.total_duration.saturating_sub(accounted_duration);
5959 eprintln!(
5960 concat!(
5961 "CASS_INSERT_TREE_STAGE_PROFILE ",
5962 "label={} calls={} messages={} inserted_messages={} ",
5963 "total_ms={:.3} source_ms={:.3} tx_open_ms={:.3} existing_lookup_ms={:.3} ",
5964 "existing_idx_lookup_ms={:.3} existing_replay_lookup_ms={:.3} dedupe_filter_ms={:.3} ",
5965 "conversation_row_ms={:.3} message_insert_ms={:.3} snippet_insert_ms={:.3} ",
5966 "fts_entry_ms={:.3} fts_flush_ms={:.3} analytics_ms={:.3} commit_ms={:.3} ",
5967 "msg_payload_ms={:.3} msg_sql_ms={:.3} msg_param_ms={:.3} msg_execute_ms={:.3} msg_rowid_ms={:.3} ",
5968 "residual_ms={:.3} avg_total_ms={:.3} avg_message_insert_ms={:.3} ",
5969 "avg_msg_execute_ms={:.3} avg_msg_payload_ms={:.3} avg_snippet_insert_ms={:.3} avg_fts_entry_ms={:.3} avg_commit_ms={:.3}"
5970 ),
5971 label,
5972 self.invocations,
5973 self.messages,
5974 self.inserted_messages,
5975 Self::millis(self.total_duration),
5976 Self::millis(self.source_duration),
5977 Self::millis(self.tx_open_duration),
5978 Self::millis(self.existing_lookup_duration),
5979 Self::millis(self.existing_idx_lookup_duration),
5980 Self::millis(self.existing_replay_lookup_duration),
5981 Self::millis(self.dedupe_filter_duration),
5982 Self::millis(self.conversation_row_duration),
5983 Self::millis(self.message_insert_duration),
5984 Self::millis(self.snippet_insert_duration),
5985 Self::millis(self.fts_entry_duration),
5986 Self::millis(self.fts_flush_duration),
5987 Self::millis(self.analytics_duration),
5988 Self::millis(self.commit_duration),
5989 Self::millis(self.message_insert_breakdown.payload_duration),
5990 Self::millis(self.message_insert_breakdown.sql_build_duration),
5991 Self::millis(self.message_insert_breakdown.param_build_duration),
5992 Self::millis(self.message_insert_breakdown.execute_duration),
5993 Self::millis(self.message_insert_breakdown.rowid_duration),
5994 Self::millis(residual_duration),
5995 Self::millis(self.total_duration) / calls,
5996 Self::millis(self.message_insert_duration) / calls,
5997 Self::millis(self.message_insert_breakdown.execute_duration) / calls,
5998 Self::millis(self.message_insert_breakdown.payload_duration) / calls,
5999 Self::millis(self.snippet_insert_duration) / calls,
6000 Self::millis(self.fts_entry_duration) / calls,
6001 Self::millis(self.commit_duration) / calls,
6002 );
6003 }
6004}
6005
6006#[derive(Debug, Clone, PartialEq, Eq, Hash)]
6007enum PendingConversationKey {
6008 External {
6009 source_id: String,
6010 agent_id: i64,
6011 external_id: String,
6012 },
6013 SourcePath {
6014 source_id: String,
6015 agent_id: i64,
6016 source_path: String,
6017 started_at: Option<i64>,
6018 },
6019}
6020
6021fn conversation_external_lookup_key(source_id: &str, agent_id: i64, external_id: &str) -> String {
6022 format!(
6023 "{}:{source_id}:{agent_id}:{}:{external_id}",
6024 source_id.chars().count(),
6025 external_id.chars().count()
6026 )
6027}
6028
6029fn conversation_external_lookup_key_for_conv(agent_id: i64, conv: &Conversation) -> Option<String> {
6030 conv.external_id
6031 .as_deref()
6032 .map(|external_id| conversation_external_lookup_key(&conv.source_id, agent_id, external_id))
6033}
6034
6035#[derive(Debug, Clone, PartialEq, Eq, Hash)]
6036struct MessageMergeFingerprint {
6037 idx: i64,
6038 created_at: Option<i64>,
6039 role: MessageRole,
6040 author: Option<String>,
6041 content_hash: [u8; 32],
6042}
6043
6044#[derive(Debug, Clone, PartialEq, Eq, Hash)]
6045struct MessageReplayFingerprint {
6046 created_at: Option<i64>,
6047 role: MessageRole,
6048 author: Option<String>,
6049 content_hash: [u8; 32],
6050}
6051
6052#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6053struct ConversationMergeEvidence {
6054 exact_overlap: usize,
6055 replay_overlap: usize,
6056 smaller_replay_set: usize,
6057 started_close: bool,
6058 start_distance_ms: i64,
6059}
6060
6061struct ExistingConversationNewMessages<'a> {
6062 messages: Vec<&'a Message>,
6063 new_chars: i64,
6064 idx_collision_count: usize,
6065 first_collision_idx: Option<i64>,
6066}
6067
6068#[derive(Debug, Clone, Copy)]
6069struct ExistingConversationTailState {
6070 last_message_idx: i64,
6071 last_message_created_at: i64,
6072 ended_at: Option<i64>,
6073}
6074
6075#[derive(Debug, Clone, Copy)]
6076struct ExistingConversationWithTail {
6077 id: i64,
6078 tail_state: Option<ExistingConversationTailState>,
6079}
6080
6081fn conversation_effective_started_at(conv: &Conversation) -> Option<i64> {
6082 conv.started_at
6083 .or_else(|| conv.messages.iter().filter_map(|msg| msg.created_at).min())
6084}
6085
6086fn conversation_tail_state(conv: &Conversation) -> (Option<i64>, Option<i64>) {
6087 (
6088 conv.messages.iter().map(|msg| msg.idx).max(),
6089 conv.messages.iter().filter_map(|msg| msg.created_at).max(),
6090 )
6091}
6092
6093fn borrowed_messages_tail_state(messages: &[&Message]) -> (Option<i64>, Option<i64>) {
6094 (
6095 messages.iter().map(|msg| msg.idx).max(),
6096 messages.iter().filter_map(|msg| msg.created_at).max(),
6097 )
6098}
6099
6100fn role_from_str(role: &str) -> MessageRole {
6101 match role {
6102 "user" => MessageRole::User,
6103 "agent" | "assistant" => MessageRole::Agent,
6104 "tool" => MessageRole::Tool,
6105 "system" => MessageRole::System,
6106 other => MessageRole::Other(other.to_string()),
6107 }
6108}
6109
6110fn message_merge_fingerprint(msg: &Message) -> MessageMergeFingerprint {
6111 MessageMergeFingerprint {
6112 idx: msg.idx,
6113 created_at: msg.created_at,
6114 role: msg.role.clone(),
6115 author: msg.author.clone(),
6116 content_hash: *blake3::hash(msg.content.as_bytes()).as_bytes(),
6117 }
6118}
6119
6120fn message_replay_fingerprint(msg: &Message) -> MessageReplayFingerprint {
6121 MessageReplayFingerprint {
6122 created_at: msg.created_at,
6123 role: msg.role.clone(),
6124 author: msg.author.clone(),
6125 content_hash: *blake3::hash(msg.content.as_bytes()).as_bytes(),
6126 }
6127}
6128
6129fn conversation_message_fingerprints(conv: &Conversation) -> HashSet<MessageMergeFingerprint> {
6130 conv.messages
6131 .iter()
6132 .map(message_merge_fingerprint)
6133 .collect()
6134}
6135
6136fn conversation_message_replay_fingerprints(
6137 conv: &Conversation,
6138) -> HashSet<MessageReplayFingerprint> {
6139 conv.messages
6140 .iter()
6141 .map(message_replay_fingerprint)
6142 .collect()
6143}
6144
6145fn replay_fingerprint_from_merge(
6146 fingerprint: &MessageMergeFingerprint,
6147) -> MessageReplayFingerprint {
6148 MessageReplayFingerprint {
6149 created_at: fingerprint.created_at,
6150 role: fingerprint.role.clone(),
6151 author: fingerprint.author.clone(),
6152 content_hash: fingerprint.content_hash,
6153 }
6154}
6155
6156fn replay_fingerprints_from_merge_set(
6157 fingerprints: &HashSet<MessageMergeFingerprint>,
6158) -> HashSet<MessageReplayFingerprint> {
6159 fingerprints
6160 .iter()
6161 .map(replay_fingerprint_from_merge)
6162 .collect()
6163}
6164
6165fn collect_new_messages_for_existing_conversation<'a>(
6166 conversation_id: i64,
6167 conv: &'a Conversation,
6168 existing_messages: &mut HashMap<i64, MessageMergeFingerprint>,
6169 existing_replay_fingerprints: &mut HashSet<MessageReplayFingerprint>,
6170 replay_skip_log: &'static str,
6171) -> ExistingConversationNewMessages<'a> {
6172 let mut idx_collision_count = 0usize;
6173 let mut first_collision_idx: Option<i64> = None;
6174 let mut new_chars: i64 = 0;
6175 let mut messages = Vec::new();
6176
6177 for msg in &conv.messages {
6178 let incoming_fingerprint = message_merge_fingerprint(msg);
6179 if let Some(existing_fingerprint) = existing_messages.get(&msg.idx) {
6180 if existing_fingerprint != &incoming_fingerprint {
6181 idx_collision_count = idx_collision_count.saturating_add(1);
6182 first_collision_idx.get_or_insert(msg.idx);
6183 }
6184 continue;
6185 }
6186
6187 let incoming_replay = replay_fingerprint_from_merge(&incoming_fingerprint);
6188 if existing_replay_fingerprints.contains(&incoming_replay) {
6189 tracing::debug!(
6190 conversation_id,
6191 idx = msg.idx,
6192 source_path = %conv.source_path.display(),
6193 "{replay_skip_log}"
6194 );
6195 continue;
6196 }
6197
6198 existing_messages.insert(msg.idx, incoming_fingerprint);
6199 existing_replay_fingerprints.insert(incoming_replay);
6200 new_chars += msg.content.len() as i64;
6201 messages.push(msg);
6202 }
6203
6204 ExistingConversationNewMessages {
6205 messages,
6206 new_chars,
6207 idx_collision_count,
6208 first_collision_idx,
6209 }
6210}
6211
6212fn franken_existing_conversation_append_tail_state(
6213 tx: &FrankenTransaction<'_>,
6214 conversation_id: i64,
6215) -> Result<Option<ExistingConversationTailState>> {
6216 let cached: Option<(Option<i64>, Option<i64>, Option<i64>)> = tx
6217 .query_row_map(
6218 "SELECT last_message_idx, last_message_created_at, ended_at
6219 FROM conversation_tail_state
6220 WHERE conversation_id = ?1",
6221 fparams![conversation_id],
6222 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
6223 )
6224 .optional()?;
6225 if let Some(cached) = cached {
6226 let (_, _, cached_ended_at) = cached;
6227 if let Some(tail_state) =
6228 existing_conversation_tail_state_from_cached(cached.0, cached.1, cached_ended_at)
6229 {
6230 return Ok(Some(tail_state));
6231 }
6232 }
6233
6234 let legacy_cached: (Option<i64>, Option<i64>, Option<i64>) = tx.query_row_map(
6235 "SELECT last_message_idx, last_message_created_at, ended_at
6236 FROM conversations
6237 WHERE id = ?1",
6238 fparams![conversation_id],
6239 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
6240 )?;
6241 let (_, _, cached_ended_at) = legacy_cached;
6242 if let Some(tail_state) = existing_conversation_tail_state_from_cached(
6243 legacy_cached.0,
6244 legacy_cached.1,
6245 cached_ended_at,
6246 ) {
6247 franken_insert_conversation_tail_state(
6248 tx,
6249 conversation_id,
6250 cached_ended_at,
6251 Some(tail_state.last_message_idx),
6252 Some(tail_state.last_message_created_at),
6253 )?;
6254 return Ok(Some(tail_state));
6255 }
6256
6257 let (max_idx, max_created_at): (Option<i64>, Option<i64>) = tx.query_row_map(
6258 "SELECT MAX(idx), MAX(created_at)
6259 FROM messages
6260 WHERE conversation_id = ?1",
6261 fparams![conversation_id],
6262 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
6263 )?;
6264 if let Some((last_message_idx, last_message_created_at)) = max_idx.zip(max_created_at) {
6265 franken_update_conversation_tail_state(
6266 tx,
6267 conversation_id,
6268 None,
6269 Some(last_message_idx),
6270 Some(last_message_created_at),
6271 )?;
6272 return Ok(Some(ExistingConversationTailState {
6273 last_message_idx,
6274 last_message_created_at,
6275 ended_at: cached_ended_at,
6276 }));
6277 }
6278 Ok(None)
6279}
6280
6281fn existing_conversation_tail_state_from_cached(
6282 last_message_idx: Option<i64>,
6283 last_message_created_at: Option<i64>,
6284 ended_at: Option<i64>,
6285) -> Option<ExistingConversationTailState> {
6286 let (last_message_idx, last_message_created_at) =
6287 last_message_idx.zip(last_message_created_at)?;
6288 Some(ExistingConversationTailState {
6289 last_message_idx,
6290 last_message_created_at,
6291 ended_at,
6292 })
6293}
6294
6295fn franken_find_existing_conversation_with_tail_by_key(
6296 tx: &FrankenTransaction<'_>,
6297 key: &PendingConversationKey,
6298 conv: Option<&Conversation>,
6299) -> Result<Option<ExistingConversationWithTail>> {
6300 if let PendingConversationKey::External {
6301 source_id,
6302 agent_id,
6303 external_id,
6304 } = key
6305 {
6306 let lookup_key = conversation_external_lookup_key(source_id, *agent_id, external_id);
6307 if let Some(existing) = franken_find_external_conversation_tail_lookup(tx, &lookup_key)? {
6308 return Ok(Some(existing));
6309 }
6310 return Ok(None);
6311 }
6312
6313 let Some(id) = franken_find_existing_conversation_by_key(tx, key, conv)? else {
6314 return Ok(None);
6315 };
6316 let tail_state = franken_existing_conversation_append_tail_state(tx, id)?;
6317 Ok(Some(ExistingConversationWithTail { id, tail_state }))
6318}
6319
6320fn franken_insert_conversation_tail_state(
6321 tx: &FrankenTransaction<'_>,
6322 conversation_id: i64,
6323 ended_at: Option<i64>,
6324 last_message_idx: Option<i64>,
6325 last_message_created_at: Option<i64>,
6326) -> Result<()> {
6327 if ended_at.is_none() && last_message_idx.is_none() && last_message_created_at.is_none() {
6328 return Ok(());
6329 }
6330 tx.execute_compat(
6331 "INSERT OR REPLACE INTO conversation_tail_state (
6332 conversation_id, ended_at, last_message_idx, last_message_created_at
6333 ) VALUES (?1, ?2, ?3, ?4)",
6334 fparams![
6335 conversation_id,
6336 ended_at,
6337 last_message_idx,
6338 last_message_created_at
6339 ],
6340 )?;
6341 Ok(())
6342}
6343
6344fn franken_update_conversation_tail_columns(
6345 tx: &FrankenTransaction<'_>,
6346 conversation_id: i64,
6347 ended_at_candidate: Option<i64>,
6348 last_message_idx_candidate: Option<i64>,
6349 last_message_created_at_candidate: Option<i64>,
6350) -> Result<()> {
6351 if ended_at_candidate.is_none()
6352 && last_message_idx_candidate.is_none()
6353 && last_message_created_at_candidate.is_none()
6354 {
6355 return Ok(());
6356 }
6357
6358 tx.execute_compat(
6359 "UPDATE conversations
6360 SET ended_at = CASE
6361 WHEN ?1 IS NULL THEN ended_at
6362 WHEN ended_at IS NULL OR ended_at < ?1 THEN ?1
6363 ELSE ended_at
6364 END,
6365 last_message_idx = CASE
6366 WHEN ?2 IS NULL THEN last_message_idx
6367 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
6368 ELSE last_message_idx
6369 END,
6370 last_message_created_at = CASE
6371 WHEN ?3 IS NULL THEN last_message_created_at
6372 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
6373 ELSE last_message_created_at
6374 END
6375 WHERE id = ?4",
6376 fparams![
6377 ended_at_candidate,
6378 last_message_idx_candidate,
6379 last_message_created_at_candidate,
6380 conversation_id
6381 ],
6382 )?;
6383 Ok(())
6384}
6385
6386fn franken_tail_state_insert_ended_at(
6387 tx: &FrankenTransaction<'_>,
6388 conversation_id: i64,
6389 candidate: Option<i64>,
6390) -> Result<Option<i64>> {
6391 let canonical: Option<i64> = tx
6392 .query_row_map(
6393 "SELECT ended_at FROM conversations WHERE id = ?1",
6394 fparams![conversation_id],
6395 |row| row.get_typed(0),
6396 )
6397 .optional()?
6398 .flatten();
6399 Ok(canonical.max(candidate))
6400}
6401
6402fn franken_update_conversation_tail_state(
6403 tx: &FrankenTransaction<'_>,
6404 conversation_id: i64,
6405 ended_at_candidate: Option<i64>,
6406 last_message_idx_candidate: Option<i64>,
6407 last_message_created_at_candidate: Option<i64>,
6408) -> Result<()> {
6409 if ended_at_candidate.is_none()
6410 && last_message_idx_candidate.is_none()
6411 && last_message_created_at_candidate.is_none()
6412 {
6413 return Ok(());
6414 }
6415
6416 let changed = tx.execute_compat(
6417 "UPDATE conversation_tail_state
6418 SET ended_at = CASE
6419 WHEN ?1 IS NULL THEN ended_at
6420 ELSE MAX(IFNULL(ended_at, 0), ?1)
6421 END,
6422 last_message_idx = CASE
6423 WHEN ?2 IS NULL THEN last_message_idx
6424 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
6425 ELSE last_message_idx
6426 END,
6427 last_message_created_at = CASE
6428 WHEN ?3 IS NULL THEN last_message_created_at
6429 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
6430 ELSE last_message_created_at
6431 END
6432 WHERE conversation_id = ?4",
6433 fparams![
6434 ended_at_candidate,
6435 last_message_idx_candidate,
6436 last_message_created_at_candidate,
6437 conversation_id
6438 ],
6439 )?;
6440 if changed == 0 {
6441 let insert_ended_at =
6442 franken_tail_state_insert_ended_at(tx, conversation_id, ended_at_candidate)?;
6443 franken_insert_conversation_tail_state(
6444 tx,
6445 conversation_id,
6446 insert_ended_at,
6447 last_message_idx_candidate,
6448 last_message_created_at_candidate,
6449 )?;
6450 }
6451 franken_update_conversation_tail_columns(
6452 tx,
6453 conversation_id,
6454 ended_at_candidate,
6455 last_message_idx_candidate,
6456 last_message_created_at_candidate,
6457 )?;
6458 Ok(())
6459}
6460
6461fn franken_set_conversation_tail_state_after_append(
6462 tx: &FrankenTransaction<'_>,
6463 conversation_id: i64,
6464 ended_at: i64,
6465 last_message_idx: i64,
6466 last_message_created_at: i64,
6467) -> Result<()> {
6468 let changed = tx.execute_compat(
6469 "UPDATE conversation_tail_state
6470 SET ended_at = ?1,
6471 last_message_idx = ?2,
6472 last_message_created_at = ?3
6473 WHERE conversation_id = ?4",
6474 fparams![
6475 ended_at,
6476 last_message_idx,
6477 last_message_created_at,
6478 conversation_id
6479 ],
6480 )?;
6481 if changed == 0 {
6482 let insert_ended_at =
6483 franken_tail_state_insert_ended_at(tx, conversation_id, Some(ended_at))?;
6484 franken_insert_conversation_tail_state(
6485 tx,
6486 conversation_id,
6487 insert_ended_at,
6488 Some(last_message_idx),
6489 Some(last_message_created_at),
6490 )?;
6491 }
6492 franken_update_conversation_tail_columns(
6493 tx,
6494 conversation_id,
6495 Some(ended_at),
6496 Some(last_message_idx),
6497 Some(last_message_created_at),
6498 )?;
6499 Ok(())
6500}
6501
6502fn collect_append_only_tail_messages<'a>(
6503 conv: &'a Conversation,
6504 existing_max_idx: i64,
6505 existing_max_created_at: i64,
6506) -> Option<ExistingConversationNewMessages<'a>> {
6507 if conv.messages.is_empty() {
6508 return Some(ExistingConversationNewMessages {
6509 messages: Vec::new(),
6510 new_chars: 0,
6511 idx_collision_count: 0,
6512 first_collision_idx: None,
6513 });
6514 }
6515
6516 let mut split_idx = None;
6517 let mut prev_idx = None;
6518 for (pos, msg) in conv.messages.iter().enumerate() {
6519 if prev_idx.is_some_and(|prev| msg.idx < prev) {
6520 return None;
6521 }
6522 prev_idx = Some(msg.idx);
6523 if split_idx.is_none() && msg.idx > existing_max_idx {
6524 split_idx = Some(pos);
6525 }
6526 }
6527 let split_idx = split_idx?;
6528
6529 let mut seen_tail_idx = HashSet::new();
6530 let mut seen_tail_replay = HashSet::new();
6531 let mut new_chars = 0i64;
6532 let mut messages = Vec::new();
6533 for msg in &conv.messages[split_idx..] {
6534 let created_at = msg.created_at?;
6535 if created_at <= existing_max_created_at {
6536 return None;
6537 }
6538
6539 if !seen_tail_idx.insert(msg.idx) {
6540 return None;
6541 }
6542
6543 let replay_fingerprint = message_replay_fingerprint(msg);
6544 if !seen_tail_replay.insert(replay_fingerprint) {
6545 return None;
6546 }
6547
6548 new_chars += msg.content.len() as i64;
6549 messages.push(msg);
6550 }
6551
6552 Some(ExistingConversationNewMessages {
6553 messages,
6554 new_chars,
6555 idx_collision_count: 0,
6556 first_collision_idx: None,
6557 })
6558}
6559
6560fn start_distance_ms(left: Option<i64>, right: Option<i64>) -> i64 {
6561 match (left, right) {
6562 (Some(left), Some(right)) => (i128::from(left) - i128::from(right))
6563 .abs()
6564 .try_into()
6565 .unwrap_or(i64::MAX),
6566 _ => i64::MAX,
6567 }
6568}
6569
6570fn conversation_merge_evidence(
6571 incoming_exact: &HashSet<MessageMergeFingerprint>,
6572 incoming_replay: &HashSet<MessageReplayFingerprint>,
6573 existing_exact: &HashSet<MessageMergeFingerprint>,
6574 existing_replay: &HashSet<MessageReplayFingerprint>,
6575 incoming_started_at: Option<i64>,
6576 existing_started_at: Option<i64>,
6577) -> Option<ConversationMergeEvidence> {
6578 let exact_overlap = incoming_exact.intersection(existing_exact).count();
6579 let replay_overlap = incoming_replay.intersection(existing_replay).count();
6580 if exact_overlap == 0 && replay_overlap == 0 {
6581 return None;
6582 }
6583
6584 let smaller_replay_set = incoming_replay.len().min(existing_replay.len());
6585 let started_close = timestamps_within_tolerance(
6586 incoming_started_at,
6587 existing_started_at,
6588 SOURCE_PATH_MERGE_START_TOLERANCE_MS,
6589 );
6590 let full_replay_subset_match = smaller_replay_set >= 2 && replay_overlap == smaller_replay_set;
6591
6592 let merge_allowed = if started_close {
6593 exact_overlap >= 1 || replay_overlap >= 2
6594 } else {
6595 exact_overlap >= 2 || full_replay_subset_match
6596 };
6597
6598 merge_allowed.then_some(ConversationMergeEvidence {
6599 exact_overlap,
6600 replay_overlap,
6601 smaller_replay_set,
6602 started_close,
6603 start_distance_ms: start_distance_ms(incoming_started_at, existing_started_at),
6604 })
6605}
6606
6607fn timestamps_within_tolerance(left: Option<i64>, right: Option<i64>, tolerance_ms: i64) -> bool {
6608 match (left, right) {
6609 (Some(left), Some(right)) => {
6610 (i128::from(left) - i128::from(right)).abs() <= i128::from(tolerance_ms)
6611 }
6612 _ => false,
6613 }
6614}
6615
6616fn conversation_merge_key(agent_id: i64, conv: &Conversation) -> PendingConversationKey {
6617 if let Some(external_id) = conv.external_id.clone() {
6618 PendingConversationKey::External {
6619 source_id: conv.source_id.clone(),
6620 agent_id,
6621 external_id,
6622 }
6623 } else {
6624 PendingConversationKey::SourcePath {
6625 source_id: conv.source_id.clone(),
6626 agent_id,
6627 source_path: path_to_string(&conv.source_path),
6628 started_at: conversation_effective_started_at(conv),
6629 }
6630 }
6631}
6632
6633pub struct MessageForEmbedding {
6635 pub message_id: i64,
6636 pub created_at: Option<i64>,
6637 pub agent_id: i64,
6638 pub workspace_id: Option<i64>,
6639 pub source_id_hash: u32,
6640 pub role: String,
6641 pub content: String,
6642}
6643
6644impl FrankenStorage {
6649 pub fn ensure_agent(&self, agent: &Agent) -> Result<i64> {
6651 let cache_key = EnsuredAgentKey::from_agent(agent);
6652 if let Some(id) = self.cached_agent_id(&cache_key) {
6653 return Ok(id);
6654 }
6655
6656 let now = Self::now_millis();
6657 self.conn.execute_compat(
6658 "INSERT INTO agents(slug, name, version, kind, created_at, updated_at)
6659 VALUES(?1, ?2, ?3, ?4, ?5, ?6)
6660 ON CONFLICT(slug) DO UPDATE SET
6661 name = excluded.name,
6662 version = excluded.version,
6663 kind = excluded.kind,
6664 updated_at = excluded.updated_at
6665 WHERE NOT (
6666 agents.name IS excluded.name
6667 AND agents.version IS excluded.version
6668 AND agents.kind IS excluded.kind
6669 )",
6670 fparams![
6671 agent.slug.as_str(),
6672 agent.name.as_str(),
6673 agent.version.as_deref(),
6674 cache_key.kind.as_str(),
6675 now,
6676 now
6677 ],
6678 )?;
6679
6680 let id = self
6681 .conn
6682 .query_row_map(
6683 "SELECT id FROM agents WHERE slug = ?1 LIMIT 1",
6684 fparams![agent.slug.as_str()],
6685 |row| row.get_typed(0),
6686 )
6687 .with_context(|| format!("fetching agent id for {}", agent.slug))?;
6688 self.mark_agent_ensured(cache_key, id);
6689 Ok(id)
6690 }
6691
6692 pub fn ensure_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64> {
6694 let path_str = path.to_string_lossy().to_string();
6695 let cache_key = EnsuredWorkspaceKey::new(path_str.clone(), display_name);
6696 if let Some(id) = self.cached_workspace_id(&cache_key) {
6697 return Ok(id);
6698 }
6699
6700 if let Some(display_name) = display_name {
6701 self.conn.execute_compat(
6702 "INSERT INTO workspaces(path, display_name)
6703 VALUES(?1, ?2)
6704 ON CONFLICT(path) DO UPDATE SET
6705 display_name = excluded.display_name
6706 WHERE NOT (workspaces.display_name IS excluded.display_name)",
6707 fparams![path_str.as_str(), display_name],
6708 )?;
6709 } else {
6710 self.conn.execute_compat(
6711 "INSERT OR IGNORE INTO workspaces(path, display_name) VALUES(?1, NULL)",
6712 fparams![path_str.as_str()],
6713 )?;
6714 }
6715
6716 let id = self
6717 .conn
6718 .query_row_map(
6719 "SELECT id FROM workspaces WHERE path = ?1 LIMIT 1",
6720 fparams![path_str.as_str()],
6721 |row| row.get_typed(0),
6722 )
6723 .with_context(|| format!("fetching workspace id for {path_str}"))?;
6724 self.mark_workspace_ensured(cache_key, id);
6725 Ok(id)
6726 }
6727
6728 pub fn now_millis() -> i64 {
6730 SystemTime::now()
6731 .duration_since(UNIX_EPOCH)
6732 .map(|d| i64::try_from(d.as_millis()).unwrap_or(i64::MAX))
6733 .unwrap_or(0)
6734 }
6735
6736 pub fn day_id_from_millis(timestamp_ms: i64) -> i64 {
6738 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6739 let secs = timestamp_ms.div_euclid(1000);
6740 (secs - EPOCH_2020_SECS).div_euclid(86400)
6741 }
6742
6743 pub fn hour_id_from_millis(timestamp_ms: i64) -> i64 {
6745 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6746 let secs = timestamp_ms.div_euclid(1000);
6747 (secs - EPOCH_2020_SECS).div_euclid(3600)
6748 }
6749
6750 pub fn millis_from_day_id(day_id: i64) -> i64 {
6752 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6753 (EPOCH_2020_SECS + day_id * 86400) * 1000
6754 }
6755
6756 pub fn millis_from_hour_id(hour_id: i64) -> i64 {
6758 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6759 (EPOCH_2020_SECS + hour_id * 3600) * 1000
6760 }
6761
6762 pub fn get_last_scan_ts(&self) -> Result<Option<i64>> {
6764 let result: Result<String, _> = self.conn.query_row_map(
6765 "SELECT value FROM meta WHERE key = 'last_scan_ts'",
6766 fparams![],
6767 |row| row.get_typed(0),
6768 );
6769 match result.optional() {
6770 Ok(Some(s)) => Ok(s.parse().ok()),
6771 Ok(None) => Ok(None),
6772 Err(e) => Err(e.into()),
6773 }
6774 }
6775
6776 pub fn set_last_scan_ts(&self, ts: i64) -> Result<()> {
6778 self.conn.execute_compat(
6779 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_scan_ts', ?1)",
6780 fparams![ts.to_string()],
6781 )?;
6782 Ok(())
6783 }
6784
6785 pub fn get_last_indexed_at(&self) -> Result<Option<i64>> {
6787 let result: Result<String, _> = self.conn.query_row_map(
6788 "SELECT value FROM meta WHERE key = 'last_indexed_at'",
6789 fparams![],
6790 |row| row.get_typed(0),
6791 );
6792 match result.optional() {
6793 Ok(Some(s)) => Ok(s.parse().ok()),
6794 Ok(None) => Ok(None),
6795 Err(e) => Err(e.into()),
6796 }
6797 }
6798
6799 pub fn set_last_indexed_at(&self, ts: i64) -> Result<()> {
6801 self.conn.execute_compat(
6802 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_indexed_at', ?1)",
6803 fparams![ts.to_string()],
6804 )?;
6805 Ok(())
6806 }
6807
6808 pub fn list_agents(&self) -> Result<Vec<Agent>> {
6810 self.conn
6811 .query_map_collect(
6812 "SELECT id, slug, name, version, kind FROM agents ORDER BY slug",
6813 fparams![],
6814 |row| {
6815 let kind: String = row.get_typed(4)?;
6816 Ok(Agent {
6817 id: Some(row.get_typed(0)?),
6818 slug: row.get_typed(1)?,
6819 name: row.get_typed(2)?,
6820 version: row.get_typed(3)?,
6821 kind: match kind.as_str() {
6822 "cli" => AgentKind::Cli,
6823 "vscode" => AgentKind::VsCode,
6824 _ => AgentKind::Hybrid,
6825 },
6826 })
6827 },
6828 )
6829 .with_context(|| "listing agents")
6830 }
6831
6832 pub fn total_conversation_count(&self) -> Result<usize> {
6834 let count: i64 =
6835 self.conn
6836 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
6837 row.get_typed(0)
6838 })?;
6839 Ok(count.max(0) as usize)
6840 }
6841
6842 pub fn total_message_count(&self) -> Result<usize> {
6844 let count: i64 =
6845 self.conn
6846 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
6847 row.get_typed(0)
6848 })?;
6849 Ok(count.max(0) as usize)
6850 }
6851
6852 pub fn purge_agent_archive_data(&self, agent_slug: &str) -> Result<AgentArchivePurgeResult> {
6857 let normalized = agent_slug.trim().to_ascii_lowercase();
6858 if normalized.is_empty() {
6859 return Err(anyhow!("agent slug cannot be empty"));
6860 }
6861
6862 let Some(agent_id) = self
6863 .conn
6864 .query_row_map(
6865 "SELECT id FROM agents WHERE slug = ?1",
6866 fparams![normalized.as_str()],
6867 |row| row.get_typed::<i64>(0),
6868 )
6869 .optional()?
6870 else {
6871 return Ok(AgentArchivePurgeResult::default());
6872 };
6873
6874 let conversations_deleted: i64 = self.conn.query_row_map(
6875 "SELECT COUNT(*) FROM conversations WHERE agent_id = ?1",
6876 fparams![agent_id],
6877 |row| row.get_typed(0),
6878 )?;
6879 if conversations_deleted == 0 {
6880 return Ok(AgentArchivePurgeResult::default());
6881 }
6882
6883 let messages_deleted: i64 = self.conn.query_row_map(
6884 "SELECT COUNT(*)
6885 FROM messages
6886 WHERE conversation_id IN (
6887 SELECT id FROM conversations WHERE agent_id = ?1
6888 )",
6889 fparams![agent_id],
6890 |row| row.get_typed(0),
6891 )?;
6892
6893 let mut tx = self.conn.transaction()?;
6894 tx.execute_compat(
6895 "DELETE FROM conversation_external_lookup
6896 WHERE conversation_id IN (
6897 SELECT id FROM conversations WHERE agent_id = ?1
6898 )",
6899 fparams![agent_id],
6900 )?;
6901 tx.execute_compat(
6902 "DELETE FROM conversation_external_tail_lookup
6903 WHERE conversation_id IN (
6904 SELECT id FROM conversations WHERE agent_id = ?1
6905 )",
6906 fparams![agent_id],
6907 )?;
6908 tx.execute_compat(
6909 "DELETE FROM conversations WHERE agent_id = ?1",
6910 fparams![agent_id],
6911 )?;
6912 tx.execute_compat(
6913 "DELETE FROM agents
6914 WHERE id = ?1
6915 AND NOT EXISTS (
6916 SELECT 1 FROM conversations WHERE agent_id = ?1
6917 )",
6918 fparams![agent_id],
6919 )?;
6920 tx.commit()?;
6921
6922 Ok(AgentArchivePurgeResult {
6923 conversations_deleted: conversations_deleted.max(0) as usize,
6924 messages_deleted: messages_deleted.max(0) as usize,
6925 })
6926 }
6927
6928 pub fn list_workspaces(&self) -> Result<Vec<crate::model::types::Workspace>> {
6930 self.conn
6931 .query_map_collect(
6932 "SELECT id, path, display_name FROM workspaces ORDER BY path",
6933 fparams![],
6934 |row| {
6935 let path_str: String = row.get_typed(1)?;
6936 Ok(crate::model::types::Workspace {
6937 id: Some(row.get_typed(0)?),
6938 path: Path::new(&path_str).to_path_buf(),
6939 display_name: row.get_typed(2)?,
6940 })
6941 },
6942 )
6943 .with_context(|| "listing workspaces")
6944 }
6945
6946 pub fn list_conversations(&self, limit: i64, offset: i64) -> Result<Vec<Conversation>> {
6948 self.conn
6955 .query_map_collect(
6956 r"SELECT c.id,
6957 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown'),
6958 (SELECT w.path FROM workspaces w WHERE w.id = c.workspace_id),
6959 c.external_id, c.title, c.source_path,
6960 c.started_at,
6961 COALESCE(
6962 (SELECT ts.ended_at
6963 FROM conversation_tail_state ts
6964 WHERE ts.conversation_id = c.id),
6965 c.ended_at
6966 ),
6967 c.approx_tokens, c.metadata_json,
6968 c.source_id, c.origin_host, c.metadata_bin
6969 FROM conversations c
6970 ORDER BY CASE WHEN c.started_at IS NULL THEN 1 ELSE 0 END, c.started_at DESC, c.id DESC
6971 LIMIT ?1 OFFSET ?2",
6972 fparams![limit, offset],
6973 |row| {
6974 let workspace_path: Option<String> = row.get_typed(2)?;
6975 let source_path: String = row.get_typed(5)?;
6976 let raw_source_id: Option<String> = row.get_typed(10)?;
6977 let raw_origin_host: Option<String> = row.get_typed(11)?;
6978 let (source_id, _, origin_host) = normalized_storage_source_parts(
6979 raw_source_id.as_deref(),
6980 None,
6981 raw_origin_host.as_deref(),
6982 );
6983 Ok(Conversation {
6984 id: Some(row.get_typed(0)?),
6985 agent_slug: row.get_typed(1)?,
6986 workspace: workspace_path.map(|p| Path::new(&p).to_path_buf()),
6987 external_id: row.get_typed(3)?,
6988 title: row.get_typed(4)?,
6989 source_path: Path::new(&source_path).to_path_buf(),
6990 started_at: row.get_typed(6)?,
6991 ended_at: row.get_typed(7)?,
6992 approx_tokens: row.get_typed(8)?,
6993 metadata_json: franken_read_metadata_compat(row, 9, 12),
6994 messages: Vec::new(),
6995 source_id,
6996 origin_host,
6997 })
6998 },
6999 )
7000 .with_context(|| "listing conversations")
7001 }
7002
7003 pub fn build_lexical_rebuild_lookups(
7007 &self,
7008 ) -> Result<(HashMap<i64, String>, HashMap<i64, PathBuf>)> {
7009 let agents: HashMap<i64, String> = self
7010 .conn
7011 .query_map_collect("SELECT id, slug FROM agents", fparams![], |row| {
7012 Ok((row.get_typed::<i64>(0)?, row.get_typed::<String>(1)?))
7013 })
7014 .with_context(|| "loading agent lookup for lexical rebuild")?
7015 .into_iter()
7016 .collect();
7017 let workspaces: HashMap<i64, PathBuf> = self
7018 .conn
7019 .query_map_collect("SELECT id, path FROM workspaces", fparams![], |row| {
7020 let path_str: String = row.get_typed(1)?;
7021 Ok((row.get_typed::<i64>(0)?, PathBuf::from(path_str)))
7022 })
7023 .with_context(|| "loading workspace lookup for lexical rebuild")?
7024 .into_iter()
7025 .collect();
7026 Ok((agents, workspaces))
7027 }
7028
7029 pub fn list_conversation_footprints_for_lexical_rebuild(
7042 &self,
7043 ) -> Result<Vec<LexicalRebuildConversationFootprintRow>> {
7044 let tail_state_rows: Vec<(i64, Option<i64>)> = match self.conn.query_map_collect(
7045 "SELECT conversation_id, last_message_idx
7046 FROM conversation_tail_state
7047 ORDER BY conversation_id ASC",
7048 fparams![],
7049 |row| Ok((row.get_typed::<i64>(0)?, row.get_typed::<Option<i64>>(1)?)),
7050 ) {
7051 Ok(rows) => rows,
7052 Err(err) if error_indicates_missing_table(&err) => Vec::new(),
7053 Err(err) => {
7054 return Err(err).with_context(|| "listing lexical rebuild tail-state estimates");
7055 }
7056 };
7057 let tail_state_by_conversation: HashMap<i64, Option<i64>> =
7058 tail_state_rows.into_iter().collect();
7059
7060 let rows: Vec<(i64, Option<i64>)> = match self.conn.query_map_collect(
7061 "SELECT id, last_message_idx
7062 FROM conversations
7063 ORDER BY id ASC",
7064 fparams![],
7065 |row| Ok((row.get_typed::<i64>(0)?, row.get_typed::<Option<i64>>(1)?)),
7066 ) {
7067 Ok(rows) => rows,
7068 Err(err) if error_indicates_missing_column(&err) => self
7069 .conn
7070 .query_map_collect(
7071 "SELECT id
7072 FROM conversations
7073 ORDER BY id ASC",
7074 fparams![],
7075 |row| Ok((row.get_typed::<i64>(0)?, None)),
7076 )
7077 .with_context(|| {
7078 "listing lexical rebuild conversation ids after missing tail column fallback"
7079 })?,
7080 Err(err) => {
7081 return Err(err)
7082 .with_context(|| "listing lexical rebuild conversation footprint estimates");
7083 }
7084 };
7085
7086 let mut footprints = Vec::with_capacity(rows.len());
7087 let mut missing_tail_positions = HashMap::new();
7088 for (conversation_id, conversation_last_message_idx) in rows {
7089 let last_message_idx = tail_state_by_conversation
7090 .get(&conversation_id)
7091 .copied()
7092 .flatten()
7093 .or(conversation_last_message_idx);
7094 let Some(message_count) = lexical_rebuild_message_count_from_tail_idx(last_message_idx)
7095 else {
7096 missing_tail_positions.insert(conversation_id, footprints.len());
7097 footprints.push(LexicalRebuildConversationFootprintRow {
7098 conversation_id,
7099 message_count: 0,
7100 message_bytes: 0,
7101 });
7102 continue;
7103 };
7104 footprints.push(lexical_rebuild_conversation_footprint_from_count(
7105 conversation_id,
7106 message_count,
7107 ));
7108 }
7109
7110 let every_footprint_was_missing_tail = missing_tail_positions.len() == footprints.len();
7111 if !missing_tail_positions.is_empty() {
7112 self.fill_missing_lexical_rebuild_footprint_tails(
7113 &mut footprints,
7114 &missing_tail_positions,
7115 )?;
7116 }
7117 if !every_footprint_was_missing_tail {
7118 self.raise_lexical_rebuild_footprints_to_exact_message_counts(&mut footprints)?;
7119 }
7120
7121 Ok(footprints)
7122 }
7123
7124 pub fn lexical_rebuild_has_tail_footprint_metadata(&self) -> Result<bool> {
7125 let total_conversations: i64 = self
7126 .conn
7127 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
7128 row.get_typed(0)
7129 })
7130 .with_context(|| "counting conversations for lexical rebuild tail metadata coverage")?;
7131 let total_conversations = usize::try_from(total_conversations.max(0)).unwrap_or(usize::MAX);
7132 if total_conversations == 0 {
7133 return Ok(true);
7134 }
7135
7136 let conversation_columns = franken_table_column_names(&self.conn, "conversations")?;
7137 let conversations_have_tail_column = conversation_columns.contains("last_message_idx");
7138 let tail_state_has_tail_column =
7139 match franken_table_column_names(&self.conn, "conversation_tail_state") {
7140 Ok(columns) => columns.contains("last_message_idx"),
7141 Err(err) if error_indicates_missing_table(&err) => false,
7142 Err(err) => {
7143 return Err(err)
7144 .with_context(|| "reading lexical rebuild tail-state metadata columns");
7145 }
7146 };
7147 if !conversations_have_tail_column && !tail_state_has_tail_column {
7148 return Ok(false);
7149 }
7150
7151 let covered_sql = match (conversations_have_tail_column, tail_state_has_tail_column) {
7152 (true, true) => {
7153 "SELECT COUNT(*)
7154 FROM conversations c
7155 LEFT JOIN conversation_tail_state ts ON ts.conversation_id = c.id
7156 WHERE c.last_message_idx IS NOT NULL
7157 OR ts.last_message_idx IS NOT NULL"
7158 }
7159 (true, false) => {
7160 "SELECT COUNT(*)
7161 FROM conversations
7162 WHERE last_message_idx IS NOT NULL"
7163 }
7164 (false, true) => {
7165 "SELECT COUNT(*)
7166 FROM conversations c
7167 WHERE EXISTS (
7168 SELECT 1
7169 FROM conversation_tail_state ts
7170 WHERE ts.conversation_id = c.id
7171 AND ts.last_message_idx IS NOT NULL
7172 )"
7173 }
7174 (false, false) => unreachable!("checked before covered_sql selection"),
7175 };
7176 let covered_conversations: i64 = self
7177 .conn
7178 .query_row_map(covered_sql, fparams![], |row| row.get_typed(0))
7179 .with_context(
7180 || "counting conversations covered by lexical rebuild tail footprint metadata",
7181 )?;
7182 let covered_conversations =
7183 usize::try_from(covered_conversations.max(0)).unwrap_or(usize::MAX);
7184
7185 Ok(lexical_rebuild_tail_metadata_coverage_is_sufficient(
7186 total_conversations,
7187 covered_conversations,
7188 ))
7189 }
7190
7191 fn raise_lexical_rebuild_footprints_to_exact_message_counts(
7192 &self,
7193 footprints: &mut [LexicalRebuildConversationFootprintRow],
7194 ) -> Result<()> {
7195 if footprints.is_empty() {
7196 return Ok(());
7197 }
7198
7199 let positions_by_conversation: HashMap<i64, usize> = footprints
7200 .iter()
7201 .enumerate()
7202 .map(|(position, footprint)| (footprint.conversation_id, position))
7203 .collect();
7204 self.conn
7205 .query_with_params_for_each(
7206 "SELECT conversation_id, COUNT(*) AS message_count
7207 FROM messages
7208 GROUP BY conversation_id
7209 ORDER BY conversation_id ASC",
7210 &[] as &[SqliteValue],
7211 |row| {
7212 let conversation_id: i64 = row.get_typed(0)?;
7213 let exact_count: i64 = row.get_typed(1)?;
7214 let Some(position) = positions_by_conversation.get(&conversation_id) else {
7215 return Ok(());
7216 };
7217 let exact_count = usize::try_from(exact_count.max(0)).unwrap_or(usize::MAX);
7218 let footprint = &mut footprints[*position];
7219 if exact_count > footprint.message_count {
7220 footprint.message_count = exact_count;
7221 footprint.message_bytes =
7222 footprint.message_bytes.max(exact_count.saturating_mul(
7223 LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
7224 ));
7225 }
7226 Ok(())
7227 },
7228 )
7229 .with_context(|| "raising lexical rebuild footprints to exact message counts")?;
7230 Ok(())
7231 }
7232
7233 fn fill_missing_lexical_rebuild_footprint_tails(
7234 &self,
7235 footprints: &mut [LexicalRebuildConversationFootprintRow],
7236 missing_tail_positions: &HashMap<i64, usize>,
7237 ) -> Result<()> {
7238 if missing_tail_positions.len() <= LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT {
7239 for (conversation_id, position) in missing_tail_positions {
7240 let last_message_idx: Option<i64> = self
7241 .conn
7242 .query_row_map(
7243 "SELECT MAX(idx) FROM messages WHERE conversation_id = ?1",
7244 fparams![*conversation_id],
7245 |row| row.get_typed(0),
7246 )
7247 .with_context(|| {
7248 format!(
7249 "looking up missing lexical rebuild tail estimate for conversation {conversation_id}"
7250 )
7251 })?;
7252 if let Some(message_count) =
7253 lexical_rebuild_message_count_from_tail_idx(last_message_idx)
7254 {
7255 footprints[*position] = lexical_rebuild_conversation_footprint_from_count(
7256 *conversation_id,
7257 message_count,
7258 );
7259 }
7260 }
7261 return Ok(());
7262 }
7263
7264 self.fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7265 footprints,
7266 missing_tail_positions,
7267 "SELECT conversation_id, MAX(idx) AS last_message_idx
7268 FROM messages INDEXED BY idx_messages_conv_idx
7269 GROUP BY conversation_id
7270 ORDER BY conversation_id ASC",
7271 )
7272 .or_else(|err| {
7273 if err
7274 .to_string()
7275 .contains("no such index: idx_messages_conv_idx")
7276 {
7277 return self.fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7278 footprints,
7279 missing_tail_positions,
7280 "SELECT conversation_id, MAX(idx) AS last_message_idx
7281 FROM messages
7282 GROUP BY conversation_id
7283 ORDER BY conversation_id ASC",
7284 );
7285 }
7286 Err(err)
7287 })
7288 .with_context(|| "grouping missing lexical rebuild tail estimates from messages")?;
7289
7290 Ok(())
7291 }
7292
7293 fn fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7294 &self,
7295 footprints: &mut [LexicalRebuildConversationFootprintRow],
7296 missing_tail_positions: &HashMap<i64, usize>,
7297 sql: &str,
7298 ) -> Result<()> {
7299 self.conn
7300 .query_with_params_for_each(sql, &[] as &[SqliteValue], |row| {
7301 let conversation_id: i64 = row.get_typed(0)?;
7302 let last_message_idx: Option<i64> = row.get_typed(1)?;
7303 let Some(position) = missing_tail_positions.get(&conversation_id) else {
7304 return Ok(());
7305 };
7306 if let Some(message_count) =
7307 lexical_rebuild_message_count_from_tail_idx(last_message_idx)
7308 {
7309 footprints[*position] = lexical_rebuild_conversation_footprint_from_count(
7310 conversation_id,
7311 message_count,
7312 );
7313 }
7314 Ok(())
7315 })
7316 .with_context(|| "grouping lexical rebuild missing tail estimates")
7317 }
7318
7319 pub fn list_conversation_ids_for_lexical_rebuild(&self) -> Result<Vec<i64>> {
7321 self.conn
7322 .query_map_collect(
7323 "SELECT id FROM conversations ORDER BY id ASC",
7324 fparams![],
7325 |row| row.get_typed(0),
7326 )
7327 .with_context(|| "listing conversation ids for lexical rebuild")
7328 }
7329 pub fn list_conversations_for_lexical_rebuild_by_offset(
7334 &self,
7335 limit: i64,
7336 offset: i64,
7337 agent_slugs: &HashMap<i64, String>,
7338 workspace_paths: &HashMap<i64, PathBuf>,
7339 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7340 self.conn
7343 .query_map_collect(
7344 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7345 started_at,
7346 COALESCE(
7347 (SELECT ts.ended_at
7348 FROM conversation_tail_state ts
7349 WHERE ts.conversation_id = conversations.id),
7350 ended_at
7351 ),
7352 source_id, origin_host
7353 FROM conversations
7354 ORDER BY id ASC
7355 LIMIT ?1 OFFSET ?2",
7356 fparams![limit, offset],
7357 |row| {
7358 let agent_id: Option<i64> = row.get_typed(1)?;
7359 let workspace_id: Option<i64> = row.get_typed(2)?;
7360 let source_path: String = row.get_typed(5)?;
7361 let raw_source_id: Option<String> = row.get_typed(8)?;
7362 let raw_origin_host: Option<String> = row.get_typed(9)?;
7363 let (source_id, _, origin_host) = normalized_storage_source_parts(
7364 raw_source_id.as_deref(),
7365 None,
7366 raw_origin_host.as_deref(),
7367 );
7368 Ok(LexicalRebuildConversationRow {
7369 id: Some(row.get_typed(0)?),
7370 agent_slug: agent_id
7371 .and_then(|aid| agent_slugs.get(&aid).cloned())
7372 .unwrap_or_else(|| "unknown".to_string()),
7373 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7374 external_id: row.get_typed(3)?,
7375 title: row.get_typed(4)?,
7376 source_path: Path::new(&source_path).to_path_buf(),
7377 started_at: row.get_typed(6)?,
7378 ended_at: row.get_typed(7)?,
7379 source_id,
7380 origin_host,
7381 })
7382 },
7383 )
7384 .with_context(|| "listing conversations for lexical rebuild")
7385 }
7386
7387 pub fn list_conversations_for_lexical_rebuild_after_id(
7392 &self,
7393 limit: i64,
7394 after_conversation_id: i64,
7395 agent_slugs: &HashMap<i64, String>,
7396 workspace_paths: &HashMap<i64, PathBuf>,
7397 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7398 self.conn
7399 .query_map_collect(
7400 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7401 started_at,
7402 COALESCE(
7403 (SELECT ts.ended_at
7404 FROM conversation_tail_state ts
7405 WHERE ts.conversation_id = conversations.id),
7406 ended_at
7407 ),
7408 source_id, origin_host
7409 FROM conversations
7410 WHERE id > ?2
7411 ORDER BY id ASC
7412 LIMIT ?1",
7413 fparams![limit, after_conversation_id],
7414 |row| {
7415 let agent_id: Option<i64> = row.get_typed(1)?;
7416 let workspace_id: Option<i64> = row.get_typed(2)?;
7417 let source_path: String = row.get_typed(5)?;
7418 let raw_source_id: Option<String> = row.get_typed(8)?;
7419 let raw_origin_host: Option<String> = row.get_typed(9)?;
7420 let (source_id, _, origin_host) = normalized_storage_source_parts(
7421 raw_source_id.as_deref(),
7422 None,
7423 raw_origin_host.as_deref(),
7424 );
7425 Ok(LexicalRebuildConversationRow {
7426 id: Some(row.get_typed(0)?),
7427 agent_slug: agent_id
7428 .and_then(|aid| agent_slugs.get(&aid).cloned())
7429 .unwrap_or_else(|| "unknown".to_string()),
7430 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7431 external_id: row.get_typed(3)?,
7432 title: row.get_typed(4)?,
7433 source_path: Path::new(&source_path).to_path_buf(),
7434 started_at: row.get_typed(6)?,
7435 ended_at: row.get_typed(7)?,
7436 source_id,
7437 origin_host,
7438 })
7439 },
7440 )
7441 .with_context(|| {
7442 format!(
7443 "listing conversations for lexical rebuild after id {after_conversation_id}"
7444 )
7445 })
7446 }
7447
7448 pub fn list_conversations_for_lexical_rebuild_after_id_through_id(
7454 &self,
7455 limit: i64,
7456 after_conversation_id: i64,
7457 through_conversation_id: i64,
7458 agent_slugs: &HashMap<i64, String>,
7459 workspace_paths: &HashMap<i64, PathBuf>,
7460 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7461 if through_conversation_id <= after_conversation_id {
7462 return Ok(Vec::new());
7463 }
7464 self.conn
7465 .query_map_collect(
7466 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7467 started_at,
7468 COALESCE(
7469 (SELECT ts.ended_at
7470 FROM conversation_tail_state ts
7471 WHERE ts.conversation_id = conversations.id),
7472 ended_at
7473 ),
7474 source_id, origin_host
7475 FROM conversations
7476 WHERE id > ?2 AND id <= ?3
7477 ORDER BY id ASC
7478 LIMIT ?1",
7479 fparams![limit, after_conversation_id, through_conversation_id],
7480 |row| {
7481 let agent_id: Option<i64> = row.get_typed(1)?;
7482 let workspace_id: Option<i64> = row.get_typed(2)?;
7483 let source_path: String = row.get_typed(5)?;
7484 let raw_source_id: Option<String> = row.get_typed(8)?;
7485 let raw_origin_host: Option<String> = row.get_typed(9)?;
7486 let (source_id, _, origin_host) = normalized_storage_source_parts(
7487 raw_source_id.as_deref(),
7488 None,
7489 raw_origin_host.as_deref(),
7490 );
7491 Ok(LexicalRebuildConversationRow {
7492 id: Some(row.get_typed(0)?),
7493 agent_slug: agent_id
7494 .and_then(|aid| agent_slugs.get(&aid).cloned())
7495 .unwrap_or_else(|| "unknown".to_string()),
7496 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7497 external_id: row.get_typed(3)?,
7498 title: row.get_typed(4)?,
7499 source_path: Path::new(&source_path).to_path_buf(),
7500 started_at: row.get_typed(6)?,
7501 ended_at: row.get_typed(7)?,
7502 source_id,
7503 origin_host,
7504 })
7505 },
7506 )
7507 .with_context(|| {
7508 format!(
7509 "listing conversations for lexical rebuild after id {after_conversation_id} through id {through_conversation_id}"
7510 )
7511 })
7512 }
7513
7514 pub fn fetch_messages(&self, conversation_id: i64) -> Result<Vec<Message>> {
7516 let hinted_sql = "SELECT id, idx, role, author, created_at, content, extra_json, extra_bin \
7517 FROM messages INDEXED BY sqlite_autoindex_messages_1 \
7518 WHERE conversation_id = ?1 ORDER BY idx";
7519 let fallback_sql = "SELECT id, idx, role, author, created_at, content, extra_json, extra_bin \
7520 FROM messages \
7521 WHERE conversation_id = ?1 ORDER BY idx";
7522
7523 self.conn
7524 .query_map_collect(hinted_sql, fparams![conversation_id], |row| {
7525 let role: String = row.get_typed(2)?;
7526 Ok(Message {
7527 id: Some(row.get_typed(0)?),
7528 idx: row.get_typed(1)?,
7529 role: match role.as_str() {
7530 "user" => MessageRole::User,
7531 "agent" | "assistant" => MessageRole::Agent,
7532 "tool" => MessageRole::Tool,
7533 "system" => MessageRole::System,
7534 other => MessageRole::Other(other.to_string()),
7535 },
7536 author: row.get_typed(3)?,
7537 created_at: row.get_typed(4)?,
7538 content: row.get_typed(5)?,
7539 extra_json: franken_read_message_extra_compat(row, 6, 7),
7540 snippets: Vec::new(),
7541 })
7542 })
7543 .or_else(|err| {
7544 if err
7545 .to_string()
7546 .contains("no such index: sqlite_autoindex_messages_1")
7547 {
7548 return self.conn.query_map_collect(
7549 fallback_sql,
7550 fparams![conversation_id],
7551 |row| {
7552 let role: String = row.get_typed(2)?;
7553 Ok(Message {
7554 id: Some(row.get_typed(0)?),
7555 idx: row.get_typed(1)?,
7556 role: match role.as_str() {
7557 "user" => MessageRole::User,
7558 "agent" | "assistant" => MessageRole::Agent,
7559 "tool" => MessageRole::Tool,
7560 "system" => MessageRole::System,
7561 other => MessageRole::Other(other.to_string()),
7562 },
7563 author: row.get_typed(3)?,
7564 created_at: row.get_typed(4)?,
7565 content: row.get_typed(5)?,
7566 extra_json: franken_read_message_extra_compat(row, 6, 7),
7567 snippets: Vec::new(),
7568 })
7569 },
7570 );
7571 }
7572 Err(err)
7573 })
7574 .with_context(|| format!("fetching messages for conversation {conversation_id}"))
7575 }
7576
7577 pub fn fetch_messages_for_lexical_rebuild(&self, conversation_id: i64) -> Result<Vec<Message>> {
7583 let hinted_sql = "SELECT id, idx, role, author, created_at, content \
7584 FROM messages INDEXED BY sqlite_autoindex_messages_1 \
7585 WHERE conversation_id = ?1 ORDER BY idx";
7586 let fallback_sql = "SELECT id, idx, role, author, created_at, content \
7587 FROM messages \
7588 WHERE conversation_id = ?1 ORDER BY idx";
7589
7590 self.conn
7591 .query_map_collect(hinted_sql, fparams![conversation_id], |row| {
7592 let role: String = row.get_typed(2)?;
7593 Ok(Message {
7594 id: Some(row.get_typed(0)?),
7595 idx: row.get_typed(1)?,
7596 role: match role.as_str() {
7597 "user" => MessageRole::User,
7598 "agent" | "assistant" => MessageRole::Agent,
7599 "tool" => MessageRole::Tool,
7600 "system" => MessageRole::System,
7601 other => MessageRole::Other(other.to_string()),
7602 },
7603 author: row.get_typed(3)?,
7604 created_at: row.get_typed(4)?,
7605 content: row.get_typed(5)?,
7606 extra_json: serde_json::Value::Null,
7607 snippets: Vec::new(),
7608 })
7609 })
7610 .or_else(|err| {
7611 if err
7612 .to_string()
7613 .contains("no such index: sqlite_autoindex_messages_1")
7614 {
7615 return self.conn.query_map_collect(
7616 fallback_sql,
7617 fparams![conversation_id],
7618 |row| {
7619 let role: String = row.get_typed(2)?;
7620 Ok(Message {
7621 id: Some(row.get_typed(0)?),
7622 idx: row.get_typed(1)?,
7623 role: match role.as_str() {
7624 "user" => MessageRole::User,
7625 "agent" | "assistant" => MessageRole::Agent,
7626 "tool" => MessageRole::Tool,
7627 "system" => MessageRole::System,
7628 other => MessageRole::Other(other.to_string()),
7629 },
7630 author: row.get_typed(3)?,
7631 created_at: row.get_typed(4)?,
7632 content: row.get_typed(5)?,
7633 extra_json: serde_json::Value::Null,
7634 snippets: Vec::new(),
7635 })
7636 },
7637 );
7638 }
7639 Err(err)
7640 })
7641 .with_context(|| {
7642 format!("fetching messages for lexical rebuild of conversation {conversation_id}")
7643 })
7644 }
7645
7646 pub fn fetch_messages_for_lexical_rebuild_batch(
7651 &self,
7652 conversation_ids: &[i64],
7653 max_messages: Option<usize>,
7654 max_content_bytes: Option<usize>,
7655 ) -> Result<HashMap<i64, Vec<Message>>> {
7656 if conversation_ids.is_empty() {
7657 return Ok(HashMap::new());
7658 }
7659
7660 let mut grouped: HashMap<i64, Vec<Message>> =
7661 HashMap::with_capacity(conversation_ids.len());
7662 let mut fetched_conversation_ids = HashSet::with_capacity(conversation_ids.len());
7663 let mut total_messages = 0usize;
7664 let mut total_content_bytes = 0usize;
7665
7666 for conversation_id in conversation_ids {
7671 if !fetched_conversation_ids.insert(*conversation_id) {
7672 continue;
7673 }
7674
7675 let messages = self
7676 .fetch_messages_for_lexical_rebuild(*conversation_id)
7677 .with_context(|| {
7678 format!("fetching lexical rebuild messages for conversation {conversation_id}")
7679 })?;
7680 total_messages = total_messages.saturating_add(messages.len());
7681 if let Some(limit) = max_messages
7682 && total_messages > limit
7683 {
7684 return Err(anyhow!(
7685 "lexical rebuild batch fetch exceeded message guardrail: messages={total_messages} limit={limit} conversations={}",
7686 conversation_ids.len()
7687 ));
7688 }
7689
7690 let message_bytes = messages
7691 .iter()
7692 .map(|message| message.content.len())
7693 .sum::<usize>();
7694 total_content_bytes = total_content_bytes.saturating_add(message_bytes);
7695 if let Some(limit) = max_content_bytes
7696 && total_content_bytes > limit
7697 {
7698 return Err(anyhow!(
7699 "lexical rebuild batch fetch exceeded content-byte guardrail: bytes={total_content_bytes} limit={limit} conversations={}",
7700 conversation_ids.len()
7701 ));
7702 }
7703
7704 if !messages.is_empty() {
7705 grouped.insert(*conversation_id, messages);
7706 }
7707 }
7708
7709 Ok(grouped)
7710 }
7711
7712 pub fn stream_messages_for_lexical_rebuild_between_conversation_ids<F>(
7715 &self,
7716 start_conversation_id: i64,
7717 end_conversation_id: i64,
7718 mut f: F,
7719 ) -> Result<()>
7720 where
7721 F: FnMut(LexicalRebuildMessageRow) -> Result<()>,
7722 {
7723 if end_conversation_id < start_conversation_id {
7724 return Ok(());
7725 }
7726
7727 let conversation_ids: Vec<i64> = self
7728 .conn
7729 .query_map_collect(
7730 "SELECT id FROM conversations WHERE id >= ?1 AND id <= ?2 ORDER BY id ASC",
7731 fparams![start_conversation_id, end_conversation_id],
7732 |row| row.get_typed(0),
7733 )
7734 .with_context(|| "listing conversation ids for streamed lexical rebuild")?;
7735
7736 for conversation_id in conversation_ids {
7737 let messages = self
7738 .fetch_messages_for_lexical_rebuild(conversation_id)
7739 .with_context(|| {
7740 format!("streaming lexical rebuild messages for conversation {conversation_id}")
7741 })?;
7742
7743 for message in messages {
7744 let message_id = message.id.ok_or_else(|| {
7745 anyhow!(
7746 "lexical rebuild message missing id for conversation {conversation_id} idx {}",
7747 message.idx
7748 )
7749 })?;
7750 f(LexicalRebuildMessageRow {
7751 conversation_id,
7752 id: message_id,
7753 idx: message.idx,
7754 role: role_str(&message.role),
7755 author: message.author,
7756 created_at: message.created_at,
7757 content: message.content,
7758 })?;
7759 }
7760 }
7761
7762 Ok(())
7763 }
7764
7765 pub fn stream_grouped_messages_for_lexical_rebuild_between_conversation_ids<F>(
7769 &self,
7770 start_conversation_id: i64,
7771 end_conversation_id: i64,
7772 mut f: F,
7773 ) -> Result<()>
7774 where
7775 F: FnMut(i64, LexicalRebuildGroupedMessageRows, i64) -> Result<()>,
7776 {
7777 if end_conversation_id < start_conversation_id {
7778 return Ok(());
7779 }
7780
7781 let mut current_conversation_id: Option<i64> = None;
7782 let mut current_messages: LexicalRebuildGroupedMessageRows = SmallVec::new();
7783 let mut current_last_message_id = 0i64;
7784 let mut flush_current = |current_conversation_id: &mut Option<i64>,
7785 current_messages: &mut LexicalRebuildGroupedMessageRows,
7786 current_last_message_id: &mut i64|
7787 -> Result<()> {
7788 let Some(conversation_id) = current_conversation_id.take() else {
7789 return Ok(());
7790 };
7791 let messages = std::mem::take(current_messages);
7792 let last_message_id = std::mem::take(current_last_message_id);
7793 f(conversation_id, messages, last_message_id)
7794 };
7795
7796 self.stream_messages_for_lexical_rebuild_between_conversation_ids(
7797 start_conversation_id,
7798 end_conversation_id,
7799 |row| {
7800 if current_conversation_id != Some(row.conversation_id) {
7801 flush_current(
7802 &mut current_conversation_id,
7803 &mut current_messages,
7804 &mut current_last_message_id,
7805 )?;
7806 current_conversation_id = Some(row.conversation_id);
7807 }
7808 current_last_message_id = row.id;
7809 current_messages.push(LexicalRebuildGroupedMessageRow {
7810 idx: row.idx,
7811 is_tool_role: row.role == "tool",
7812 created_at: row.created_at,
7813 content: row.content,
7814 });
7815 Ok(())
7816 },
7817 )
7818 .with_context(|| "streaming grouped lexical rebuild messages")?;
7819
7820 flush_current(
7821 &mut current_conversation_id,
7822 &mut current_messages,
7823 &mut current_last_message_id,
7824 )
7825 .with_context(|| "flushing grouped lexical rebuild messages")
7826 }
7827
7828 pub fn stream_grouped_messages_for_lexical_rebuild_from_conversation_id<F>(
7831 &self,
7832 start_conversation_id: i64,
7833 f: F,
7834 ) -> Result<()>
7835 where
7836 F: FnMut(i64, LexicalRebuildGroupedMessageRows, i64) -> Result<()>,
7837 {
7838 self.stream_grouped_messages_for_lexical_rebuild_between_conversation_ids(
7839 start_conversation_id,
7840 i64::MAX,
7841 f,
7842 )
7843 }
7844
7845 pub fn stream_messages_for_lexical_rebuild_from_conversation_id<F>(
7848 &self,
7849 start_conversation_id: i64,
7850 f: F,
7851 ) -> Result<()>
7852 where
7853 F: FnMut(LexicalRebuildMessageRow) -> Result<()>,
7854 {
7855 self.stream_messages_for_lexical_rebuild_between_conversation_ids(
7856 start_conversation_id,
7857 i64::MAX,
7858 f,
7859 )
7860 }
7861
7862 pub fn get_source(&self, id: &str) -> Result<Option<Source>> {
7864 let result = self.conn.query_row_map(
7865 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at FROM sources WHERE id = ?1",
7866 fparams![id],
7867 |row| {
7868 let kind_str: String = row.get_typed(1)?;
7869 let config_json_str: Option<String> = row.get_typed(5)?;
7870 Ok(Source {
7871 id: row.get_typed(0)?,
7872 kind: SourceKind::parse(&kind_str).unwrap_or_default(),
7873 host_label: row.get_typed(2)?,
7874 machine_id: row.get_typed(3)?,
7875 platform: row.get_typed(4)?,
7876 config_json: config_json_str.and_then(|s| serde_json::from_str(&s).ok()),
7877 created_at: row.get_typed(6)?,
7878 updated_at: row.get_typed(7)?,
7879 })
7880 },
7881 );
7882 Ok(result.optional()?)
7883 }
7884
7885 pub fn list_sources(&self) -> Result<Vec<Source>> {
7887 self.conn
7888 .query_map_collect(
7889 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at FROM sources ORDER BY id",
7890 fparams![],
7891 |row| {
7892 let kind_str: String = row.get_typed(1)?;
7893 let config_json_str: Option<String> = row.get_typed(5)?;
7894 Ok(Source {
7895 id: row.get_typed(0)?,
7896 kind: SourceKind::parse(&kind_str).unwrap_or_default(),
7897 host_label: row.get_typed(2)?,
7898 machine_id: row.get_typed(3)?,
7899 platform: row.get_typed(4)?,
7900 config_json: config_json_str.and_then(|s| serde_json::from_str(&s).ok()),
7901 created_at: row.get_typed(6)?,
7902 updated_at: row.get_typed(7)?,
7903 })
7904 },
7905 )
7906 .with_context(|| "listing sources")
7907 }
7908
7909 pub fn get_source_ids(&self) -> Result<Vec<String>> {
7911 self.conn
7912 .query_map_collect(
7913 "SELECT id FROM sources WHERE id != 'local' ORDER BY id",
7914 fparams![],
7915 |row| row.get_typed(0),
7916 )
7917 .with_context(|| "listing source ids")
7918 }
7919
7920 pub fn upsert_source(&self, source: &Source) -> Result<()> {
7922 self.invalidate_conversation_source_cache(source.id.as_str());
7923 let now = Self::now_millis();
7924 let kind_str = source.kind.to_string();
7925 let config_json_str = source
7926 .config_json
7927 .as_ref()
7928 .map(serde_json::to_string)
7929 .transpose()?;
7930
7931 self.conn.execute_compat(
7935 "INSERT INTO sources(id, kind, host_label, machine_id, platform, config_json, created_at, updated_at)
7936 VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)
7937 ON CONFLICT(id) DO UPDATE SET
7938 kind = excluded.kind,
7939 host_label = excluded.host_label,
7940 machine_id = excluded.machine_id,
7941 platform = excluded.platform,
7942 config_json = excluded.config_json,
7943 updated_at = excluded.updated_at
7944 WHERE NOT (
7945 sources.kind IS excluded.kind
7946 AND sources.host_label IS excluded.host_label
7947 AND sources.machine_id IS excluded.machine_id
7948 AND sources.platform IS excluded.platform
7949 AND sources.config_json IS excluded.config_json
7950 )",
7951 fparams![
7952 source.id.as_str(),
7953 kind_str.as_str(),
7954 source.host_label.as_deref(),
7955 source.machine_id.as_deref(),
7956 source.platform.as_deref(),
7957 config_json_str.as_deref(),
7958 source.created_at.unwrap_or(now),
7959 now
7960 ],
7961 )?;
7962 Ok(())
7963 }
7964
7965 fn historical_bundle_key_hash(
7966 version: u32,
7967 bundle: &HistoricalDatabaseBundle,
7968 include_bundle_stats: bool,
7969 ) -> String {
7970 let signature = if include_bundle_stats {
7971 format!(
7972 "{}:{}:{}:{}",
7973 version,
7974 bundle.root_path.display(),
7975 bundle.total_bytes,
7976 bundle.modified_at_ms
7977 )
7978 } else {
7979 format!("{}:{}", version, bundle.root_path.display())
7980 };
7981 blake3::hash(signature.as_bytes()).to_hex().to_string()
7982 }
7983
7984 fn historical_bundle_meta_key(bundle: &HistoricalDatabaseBundle) -> String {
7985 format!(
7986 "historical_bundle_salvaged:{}",
7987 Self::historical_bundle_key_hash(HISTORICAL_SALVAGE_LEDGER_VERSION, bundle, false)
7988 )
7989 }
7990
7991 fn historical_bundle_legacy_meta_key(bundle: &HistoricalDatabaseBundle) -> String {
7992 let signature = format!(
7993 "{}:{}:{}:{}",
7994 HISTORICAL_SALVAGE_LEDGER_VERSION,
7995 bundle.root_path.display(),
7996 bundle.total_bytes,
7997 bundle.modified_at_ms
7998 );
7999 format!(
8000 "historical_bundle_salvaged:{}",
8001 blake3::hash(signature.as_bytes()).to_hex()
8002 )
8003 }
8004
8005 fn historical_bundle_progress_key(bundle: &HistoricalDatabaseBundle) -> String {
8006 format!(
8007 "historical_bundle_progress:{}",
8008 Self::historical_bundle_key_hash(HISTORICAL_SALVAGE_PROGRESS_VERSION, bundle, false)
8009 )
8010 }
8011
8012 fn historical_bundle_legacy_progress_key(bundle: &HistoricalDatabaseBundle) -> String {
8013 let signature = format!(
8014 "{}:{}:{}:{}",
8015 HISTORICAL_SALVAGE_PROGRESS_VERSION,
8016 bundle.root_path.display(),
8017 bundle.total_bytes,
8018 bundle.modified_at_ms
8019 );
8020 format!(
8021 "historical_bundle_progress:{}",
8022 blake3::hash(signature.as_bytes()).to_hex()
8023 )
8024 }
8025
8026 fn historical_bundle_already_imported(
8027 &self,
8028 bundle: &HistoricalDatabaseBundle,
8029 ) -> Result<bool> {
8030 for key in [
8031 Self::historical_bundle_meta_key(bundle),
8032 Self::historical_bundle_legacy_meta_key(bundle),
8033 ] {
8034 let existing: Option<String> = self
8035 .conn
8036 .query_row_map(
8037 "SELECT value FROM meta WHERE key = ?1",
8038 fparams![key.as_str()],
8039 |row| row.get_typed(0),
8040 )
8041 .optional()?;
8042 if existing.is_some() {
8043 return Ok(true);
8044 }
8045 }
8046 Ok(false)
8047 }
8048
8049 pub(crate) fn has_pending_historical_bundles(&self, canonical_db_path: &Path) -> Result<bool> {
8050 for bundle in discover_historical_database_bundles(canonical_db_path) {
8051 if !self.historical_bundle_already_imported(&bundle)? {
8052 return Ok(true);
8053 }
8054 }
8055 Ok(false)
8056 }
8057
8058 fn load_historical_bundle_progress(
8059 &self,
8060 bundle: &HistoricalDatabaseBundle,
8061 ) -> Result<Option<HistoricalBundleProgress>> {
8062 for key in [
8063 Self::historical_bundle_progress_key(bundle),
8064 Self::historical_bundle_legacy_progress_key(bundle),
8065 ] {
8066 let raw: Option<String> = self
8067 .conn
8068 .query_row_map(
8069 "SELECT value FROM meta WHERE key = ?1",
8070 fparams![key.as_str()],
8071 |row| row.get_typed(0),
8072 )
8073 .optional()?;
8074 let Some(raw) = raw else {
8075 continue;
8076 };
8077 let parsed: HistoricalBundleProgress =
8078 serde_json::from_str(&raw).with_context(|| {
8079 format!(
8080 "parsing historical salvage progress checkpoint for {}",
8081 bundle.root_path.display()
8082 )
8083 })?;
8084 if parsed.progress_version == HISTORICAL_SALVAGE_PROGRESS_VERSION {
8085 return Ok(Some(parsed));
8086 }
8087 }
8088 Ok(None)
8089 }
8090
8091 fn record_historical_bundle_progress(
8092 &self,
8093 bundle: &HistoricalDatabaseBundle,
8094 method: &str,
8095 last_completed_source_row_id: i64,
8096 conversations_imported: usize,
8097 messages_imported: usize,
8098 ) -> Result<()> {
8099 let key = Self::historical_bundle_progress_key(bundle);
8100 let value = HistoricalBundleProgress {
8101 progress_version: HISTORICAL_SALVAGE_PROGRESS_VERSION,
8102 path: bundle.root_path.display().to_string(),
8103 bytes: bundle.total_bytes,
8104 modified_at_ms: bundle.modified_at_ms,
8105 method: method.to_string(),
8106 last_completed_source_row_id,
8107 conversations_imported,
8108 messages_imported,
8109 updated_at_ms: Self::now_millis(),
8110 };
8111 let value_str = serde_json::to_string(&value)?;
8112 self.conn.execute_compat(
8113 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
8114 fparams![key.as_str(), value_str.as_str()],
8115 )?;
8116 Ok(())
8117 }
8118
8119 fn clear_historical_bundle_progress(&self, bundle: &HistoricalDatabaseBundle) -> Result<()> {
8120 for key in [
8121 Self::historical_bundle_progress_key(bundle),
8122 Self::historical_bundle_legacy_progress_key(bundle),
8123 ] {
8124 self.conn
8125 .execute_compat("DELETE FROM meta WHERE key = ?1", fparams![key.as_str()])?;
8126 }
8127 Ok(())
8128 }
8129
8130 fn record_historical_bundle_import(
8131 &self,
8132 bundle: &HistoricalDatabaseBundle,
8133 method: &str,
8134 conversations_imported: usize,
8135 messages_imported: usize,
8136 ) -> Result<()> {
8137 let key = Self::historical_bundle_meta_key(bundle);
8138 let value = serde_json::json!({
8139 "salvage_version": HISTORICAL_SALVAGE_LEDGER_VERSION,
8140 "path": bundle.root_path.display().to_string(),
8141 "bytes": bundle.total_bytes,
8142 "modified_at_ms": bundle.modified_at_ms,
8143 "method": method,
8144 "conversations_imported": conversations_imported,
8145 "messages_imported": messages_imported,
8146 "recorded_at_ms": Self::now_millis(),
8147 });
8148 let value_str = serde_json::to_string(&value)?;
8149 self.conn.execute_compat(
8150 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
8151 fparams![key.as_str(), value_str.as_str()],
8152 )?;
8153 Ok(())
8154 }
8155
8156 fn historical_import_error_is_split_retryable(err: &anyhow::Error) -> bool {
8157 const RETRYABLE_PATTERNS: &[&str] = &[
8158 "out of memory",
8159 "string or blob too big",
8160 "too many sql variables",
8161 ];
8162 err.chain().any(|cause| {
8163 let rendered = cause.to_string().to_ascii_lowercase();
8164 RETRYABLE_PATTERNS
8165 .iter()
8166 .any(|pattern| rendered.contains(pattern))
8167 })
8168 }
8169
8170 fn split_historical_batch_entry_messages(
8171 entry: &HistoricalBatchEntry,
8172 ) -> Option<(HistoricalBatchEntry, HistoricalBatchEntry)> {
8173 if entry.conversation.messages.len() < 2 {
8174 return None;
8175 }
8176 let split_at = entry.conversation.messages.len() / 2;
8177 if split_at == 0 || split_at >= entry.conversation.messages.len() {
8178 return None;
8179 }
8180
8181 let mut left = entry.clone();
8182 left.conversation.messages = entry.conversation.messages[..split_at].to_vec();
8183
8184 let mut right = entry.clone();
8185 right.conversation.messages = entry.conversation.messages[split_at..].to_vec();
8186
8187 Some((left, right))
8188 }
8189
8190 fn import_historical_batch_with_retry<F>(
8191 entries: &[HistoricalBatchEntry],
8192 insert_batch: &mut F,
8193 ) -> Result<HistoricalBatchImportTotals>
8194 where
8195 F: FnMut(&[HistoricalBatchEntry]) -> Result<HistoricalBatchImportTotals>,
8196 {
8197 match insert_batch(entries) {
8198 Ok(totals) => Ok(totals),
8199 Err(err) if Self::historical_import_error_is_split_retryable(&err) => {
8200 if entries.len() > 1 {
8201 let mid = entries.len() / 2;
8202 tracing::warn!(
8203 batch_entries = entries.len(),
8204 split_left = mid,
8205 split_right = entries.len() - mid,
8206 error = %err,
8207 "historical salvage batch failed; retrying in smaller sub-batches"
8208 );
8209 let left =
8210 Self::import_historical_batch_with_retry(&entries[..mid], insert_batch)?;
8211 let right =
8212 Self::import_historical_batch_with_retry(&entries[mid..], insert_batch)?;
8213 return Ok(HistoricalBatchImportTotals {
8214 inserted_source_rows: left.inserted_source_rows
8215 + right.inserted_source_rows,
8216 inserted_messages: left.inserted_messages + right.inserted_messages,
8217 });
8218 }
8219
8220 if let Some(entry) = entries.first()
8221 && let Some((left, right)) = Self::split_historical_batch_entry_messages(entry)
8222 {
8223 tracing::warn!(
8224 source_row_id = entry.source_row_id,
8225 message_count = entry.conversation.messages.len(),
8226 error = %err,
8227 "historical salvage conversation failed; retrying in smaller message slices"
8228 );
8229 let left_totals = Self::import_historical_batch_with_retry(
8230 std::slice::from_ref(&left),
8231 insert_batch,
8232 )?;
8233 let right_totals = Self::import_historical_batch_with_retry(
8234 std::slice::from_ref(&right),
8235 insert_batch,
8236 )?;
8237 return Ok(HistoricalBatchImportTotals {
8238 inserted_source_rows: usize::from(
8239 left_totals.inserted_source_rows > 0
8240 || right_totals.inserted_source_rows > 0,
8241 ),
8242 inserted_messages: left_totals
8243 .inserted_messages
8244 .saturating_add(right_totals.inserted_messages),
8245 });
8246 }
8247
8248 Err(err)
8249 }
8250 Err(err) => Err(err),
8251 }
8252 }
8253
8254 fn import_historical_sources(&self, source_conn: &FrankenConnection) -> Result<()> {
8255 let sources: Vec<Source> = match source_conn.query_map_collect(
8256 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at
8257 FROM sources",
8258 fparams![],
8259 |row| {
8260 let raw_source_id: String = row.get_typed(0)?;
8261 let kind_str: String = row.get_typed(1)?;
8262 let raw_host_label: Option<String> = row.get_typed(2)?;
8263 let config_json_raw: Option<String> = row.get_typed(5)?;
8264 let (source_id, source_kind, host_label) = normalized_storage_source_parts(
8265 Some(raw_source_id.as_str()),
8266 Some(kind_str.as_str()),
8267 raw_host_label.as_deref(),
8268 );
8269 Ok(Source {
8270 id: source_id,
8271 kind: source_kind,
8272 host_label,
8273 machine_id: row.get_typed(3)?,
8274 platform: row.get_typed(4)?,
8275 config_json: config_json_raw.and_then(|raw| serde_json::from_str(&raw).ok()),
8276 created_at: row.get_typed(6)?,
8277 updated_at: row.get_typed(7)?,
8278 })
8279 },
8280 ) {
8281 Ok(rows) => rows,
8282 Err(err) => {
8283 tracing::warn!(error = %err, "historical sources table unavailable; skipping source import");
8284 return Ok(());
8285 }
8286 };
8287
8288 for source in sources {
8289 self.upsert_source(&source)?;
8290 }
8291 Ok(())
8292 }
8293
8294 fn import_historical_conversations(
8295 &self,
8296 bundle: &HistoricalDatabaseBundle,
8297 salvage_method: &str,
8298 source_conn: &FrankenConnection,
8299 ) -> Result<(usize, usize)> {
8300 let batch_limits = historical_import_batch_limits();
8301 let cache_enabled = IndexingCache::is_enabled();
8302 let mut indexing_cache = IndexingCache::new();
8303 let mut known_sources: HashSet<String> = self
8304 .list_sources()?
8305 .into_iter()
8306 .map(|source| source.id)
8307 .collect();
8308 let resume_progress = self.load_historical_bundle_progress(bundle)?;
8309 let resume_after_row_id = resume_progress
8310 .as_ref()
8311 .map(|progress| progress.last_completed_source_row_id)
8312 .filter(|row_id| *row_id > 0);
8313
8314 tracing::info!(
8315 target: "cass::historical_salvage",
8316 batch_conversations = batch_limits.conversations,
8317 batch_messages = batch_limits.messages,
8318 batch_payload_chars = batch_limits.payload_chars,
8319 cache_enabled,
8320 resume_after_row_id,
8321 "configured historical salvage batch limits"
8322 );
8323
8324 if let Some(progress) = &resume_progress {
8325 tracing::info!(
8326 target: "cass::historical_salvage",
8327 path = %bundle.root_path.display(),
8328 resume_after_row_id = progress.last_completed_source_row_id,
8329 prior_conversations_imported = progress.conversations_imported,
8330 prior_messages_imported = progress.messages_imported,
8331 "resuming historical salvage bundle from durable checkpoint"
8332 );
8333 }
8334
8335 let conv_sql = if resume_after_row_id.is_some() {
8341 "SELECT
8342 c.id,
8343 COALESCE(a.slug, 'unknown'),
8344 w.path,
8345 c.external_id,
8346 c.title,
8347 c.source_path,
8348 c.started_at,
8349 c.ended_at,
8350 c.approx_tokens,
8351 c.metadata_json,
8352 c.source_id,
8353 c.origin_host
8354 FROM conversations c
8355 LEFT JOIN agents a ON c.agent_id = a.id
8356 LEFT JOIN workspaces w ON c.workspace_id = w.id
8357 WHERE c.id > ?1
8358 ORDER BY c.id"
8359 } else {
8360 "SELECT
8361 c.id,
8362 COALESCE(a.slug, 'unknown'),
8363 w.path,
8364 c.external_id,
8365 c.title,
8366 c.source_path,
8367 c.started_at,
8368 c.ended_at,
8369 c.approx_tokens,
8370 c.metadata_json,
8371 c.source_id,
8372 c.origin_host
8373 FROM conversations c
8374 LEFT JOIN agents a ON c.agent_id = a.id
8375 LEFT JOIN workspaces w ON c.workspace_id = w.id
8376 ORDER BY c.id"
8377 };
8378 let conv_params: &[ParamValue] =
8379 if let Some(last_completed_source_row_id) = resume_after_row_id {
8380 &[ParamValue::from(last_completed_source_row_id)]
8381 } else {
8382 &[]
8383 };
8384
8385 #[allow(clippy::type_complexity)]
8386 let conv_rows: Vec<(
8387 i64,
8388 String,
8389 Option<String>,
8390 Option<String>,
8391 Option<String>,
8392 String,
8393 Option<i64>,
8394 Option<i64>,
8395 Option<i64>,
8396 Option<String>,
8397 Option<String>,
8398 Option<String>,
8399 )> = source_conn
8400 .query_map_collect(conv_sql, conv_params, |row| {
8401 Ok((
8402 row.get_typed::<i64>(0)?,
8403 row.get_typed::<String>(1)?,
8404 row.get_typed::<Option<String>>(2)?,
8405 row.get_typed::<Option<String>>(3)?,
8406 row.get_typed::<Option<String>>(4)?,
8407 row.get_typed::<String>(5)?,
8408 row.get_typed::<Option<i64>>(6)?,
8409 row.get_typed::<Option<i64>>(7)?,
8410 row.get_typed::<Option<i64>>(8)?,
8411 row.get_typed::<Option<String>>(9)?,
8412 row.get_typed::<Option<String>>(10)?,
8413 row.get_typed::<Option<String>>(11)?,
8414 ))
8415 })
8416 .context("querying historical conversations")?;
8417
8418 let msg_sql = "SELECT idx, role, author, created_at, content, extra_json
8419 FROM messages
8420 WHERE conversation_id = ?1
8421 ORDER BY idx";
8422
8423 let mut imported_conversations = resume_progress
8424 .as_ref()
8425 .map(|progress| progress.conversations_imported)
8426 .unwrap_or(0);
8427 let mut imported_messages = resume_progress
8428 .as_ref()
8429 .map(|progress| progress.messages_imported)
8430 .unwrap_or(0);
8431 let mut pending_batch: Vec<HistoricalBatchEntry> = Vec::new();
8432 let mut pending_batch_messages = 0usize;
8433 let mut pending_batch_chars = 0usize;
8434 let mut pending_batch_first_row_id: Option<i64> = None;
8435 let mut pending_batch_last_row_id: Option<i64> = None;
8436
8437 let flush_batch = |storage: &FrankenStorage,
8438 batch: &mut Vec<HistoricalBatchEntry>,
8439 pending_messages: &mut usize,
8440 pending_chars: &mut usize,
8441 first_row_id: &mut Option<i64>,
8442 last_row_id: &mut Option<i64>,
8443 imported_conversations: &mut usize,
8444 imported_messages: &mut usize|
8445 -> Result<()> {
8446 if batch.is_empty() {
8447 return Ok(());
8448 }
8449
8450 let batch_first_row_id = *first_row_id;
8451 let batch_last_row_id = *last_row_id;
8452 if historical_salvage_debug_enabled() {
8453 eprintln!(
8454 "[historical-salvage] flushing batch rows {:?}..{:?} conversations={} messages={} payload_chars={}",
8455 batch_first_row_id,
8456 batch_last_row_id,
8457 batch.len(),
8458 *pending_messages,
8459 *pending_chars
8460 );
8461 }
8462 tracing::info!(
8463 target: "cass::historical_salvage",
8464 batch_conversations = batch.len(),
8465 batch_messages = *pending_messages,
8466 batch_payload_chars = *pending_chars,
8467 first_source_row_id = batch_first_row_id,
8468 last_source_row_id = batch_last_row_id,
8469 "flushing historical salvage batch"
8470 );
8471
8472 let mut insert_batch =
8473 |entries: &[HistoricalBatchEntry]| -> Result<HistoricalBatchImportTotals> {
8474 let borrowed_batch: Vec<(i64, Option<i64>, &Conversation)> = entries
8475 .iter()
8476 .map(|entry| (entry.agent_id, entry.workspace_id, &entry.conversation))
8477 .collect();
8478 let outcomes = storage
8479 .insert_conversations_batched(&borrowed_batch)
8480 .with_context(|| {
8481 let first_source_row_id =
8482 entries.first().map(|entry| entry.source_row_id);
8483 let last_source_row_id =
8484 entries.last().map(|entry| entry.source_row_id);
8485 format!(
8486 "inserting historical salvage batch source rows {:?}..{:?}",
8487 first_source_row_id, last_source_row_id
8488 )
8489 })?;
8490 let mut totals = HistoricalBatchImportTotals::default();
8491 for outcome in outcomes {
8492 if !outcome.inserted_indices.is_empty() {
8493 totals.inserted_source_rows += 1;
8494 totals.inserted_messages += outcome.inserted_indices.len();
8495 }
8496 }
8497 Ok(totals)
8498 };
8499 let totals =
8500 Self::import_historical_batch_with_retry(batch.as_slice(), &mut insert_batch)?;
8501 *imported_conversations =
8502 (*imported_conversations).saturating_add(totals.inserted_source_rows);
8503 *imported_messages = (*imported_messages).saturating_add(totals.inserted_messages);
8504 if let Some(last_completed_row_id) = batch_last_row_id {
8505 storage.record_historical_bundle_progress(
8506 bundle,
8507 salvage_method,
8508 last_completed_row_id,
8509 *imported_conversations,
8510 *imported_messages,
8511 )?;
8512 }
8513 tracing::info!(
8514 target: "cass::historical_salvage",
8515 batch_conversations = batch.len(),
8516 batch_messages = *pending_messages,
8517 imported_conversations = *imported_conversations,
8518 imported_messages = *imported_messages,
8519 first_source_row_id = batch_first_row_id,
8520 last_source_row_id = batch_last_row_id,
8521 "historical salvage batch committed"
8522 );
8523 if historical_salvage_debug_enabled() {
8524 eprintln!(
8525 "[historical-salvage] committed batch rows {:?}..{:?} imported_conversations={} imported_messages={}",
8526 batch_first_row_id,
8527 batch_last_row_id,
8528 *imported_conversations,
8529 *imported_messages
8530 );
8531 }
8532 batch.clear();
8533 *pending_messages = 0;
8534 *pending_chars = 0;
8535 *first_row_id = None;
8536 *last_row_id = None;
8537 Ok(())
8538 };
8539
8540 for (
8541 conversation_row_id,
8542 agent_slug,
8543 workspace_path,
8544 external_id,
8545 title,
8546 source_path,
8547 started_at,
8548 ended_at,
8549 approx_tokens,
8550 metadata_json_raw,
8551 raw_source_id,
8552 raw_origin_host,
8553 ) in conv_rows
8554 {
8555 let source_id = crate::search::tantivy::normalized_index_source_id(
8556 raw_source_id.as_deref(),
8557 None,
8558 raw_origin_host.as_deref(),
8559 );
8560 let origin_host =
8561 crate::search::tantivy::normalized_index_origin_host(raw_origin_host.as_deref());
8562
8563 let messages: Vec<Message> = source_conn
8564 .query_map_collect(msg_sql, fparams![conversation_row_id], |msg_row| {
8565 let role: String = msg_row.get_typed(1)?;
8566 Ok(Message {
8567 id: None,
8568 idx: msg_row.get_typed(0)?,
8569 role: match role.as_str() {
8570 "user" => MessageRole::User,
8571 "agent" | "assistant" => MessageRole::Agent,
8572 "tool" => MessageRole::Tool,
8573 "system" => MessageRole::System,
8574 other => MessageRole::Other(other.to_string()),
8575 },
8576 author: msg_row.get_typed(2)?,
8577 created_at: msg_row.get_typed(3)?,
8578 content: msg_row.get_typed(4)?,
8579 extra_json: parse_historical_json_column(msg_row.get_typed(5)?),
8580 snippets: Vec::new(),
8581 })
8582 })
8583 .context("collecting historical message rows")?;
8584
8585 if messages.is_empty() {
8586 continue;
8587 }
8588
8589 let conversation_message_count = messages.len();
8590 let conversation_chars = messages
8591 .iter()
8592 .map(message_payload_size_hint)
8593 .sum::<usize>();
8594
8595 let conversation = Conversation {
8596 id: None,
8597 agent_slug: agent_slug.clone(),
8598 workspace: workspace_path.map(PathBuf::from),
8599 external_id,
8600 title,
8601 source_path: PathBuf::from(source_path),
8602 started_at,
8603 ended_at,
8604 approx_tokens,
8605 metadata_json: parse_json_column(metadata_json_raw),
8606 messages,
8607 source_id,
8608 origin_host,
8609 };
8610
8611 if !known_sources.contains(&conversation.source_id) {
8612 let placeholder = if conversation.source_id == LOCAL_SOURCE_ID {
8613 Source::local()
8614 } else {
8615 Source {
8616 id: conversation.source_id.clone(),
8617 kind: SourceKind::Ssh,
8618 host_label: conversation.origin_host.clone(),
8619 machine_id: None,
8620 platform: None,
8621 config_json: None,
8622 created_at: None,
8623 updated_at: None,
8624 }
8625 };
8626 self.upsert_source(&placeholder)?;
8627 known_sources.insert(conversation.source_id.clone());
8628 }
8629
8630 let agent = Agent {
8631 id: None,
8632 slug: agent_slug.clone(),
8633 name: agent_slug,
8634 version: None,
8635 kind: AgentKind::Cli,
8636 };
8637 let agent_id = if cache_enabled {
8638 indexing_cache.get_or_insert_agent(self, &agent)?
8639 } else {
8640 self.ensure_agent(&agent)?
8641 };
8642 let workspace_id = if let Some(workspace) = &conversation.workspace {
8643 if cache_enabled {
8644 Some(indexing_cache.get_or_insert_workspace(self, workspace, None)?)
8645 } else {
8646 Some(self.ensure_workspace(workspace, None)?)
8647 }
8648 } else {
8649 None
8650 };
8651
8652 let exceeds_pending_limits = !pending_batch.is_empty()
8653 && (pending_batch.len() >= batch_limits.conversations
8654 || pending_batch_messages.saturating_add(conversation_message_count)
8655 > batch_limits.messages
8656 || pending_batch_chars.saturating_add(conversation_chars)
8657 > batch_limits.payload_chars);
8658 if exceeds_pending_limits {
8659 flush_batch(
8660 self,
8661 &mut pending_batch,
8662 &mut pending_batch_messages,
8663 &mut pending_batch_chars,
8664 &mut pending_batch_first_row_id,
8665 &mut pending_batch_last_row_id,
8666 &mut imported_conversations,
8667 &mut imported_messages,
8668 )?;
8669 }
8670
8671 if pending_batch_first_row_id.is_none() {
8672 pending_batch_first_row_id = Some(conversation_row_id);
8673 }
8674 pending_batch_last_row_id = Some(conversation_row_id);
8675 pending_batch_messages =
8676 pending_batch_messages.saturating_add(conversation_message_count);
8677 pending_batch_chars = pending_batch_chars.saturating_add(conversation_chars);
8678 pending_batch.push(HistoricalBatchEntry {
8679 source_row_id: conversation_row_id,
8680 agent_id,
8681 workspace_id,
8682 conversation,
8683 });
8684
8685 if pending_batch.len() >= batch_limits.conversations
8686 || pending_batch_messages >= batch_limits.messages
8687 || pending_batch_chars >= batch_limits.payload_chars
8688 {
8689 flush_batch(
8690 self,
8691 &mut pending_batch,
8692 &mut pending_batch_messages,
8693 &mut pending_batch_chars,
8694 &mut pending_batch_first_row_id,
8695 &mut pending_batch_last_row_id,
8696 &mut imported_conversations,
8697 &mut imported_messages,
8698 )?;
8699 }
8700 }
8701
8702 flush_batch(
8703 self,
8704 &mut pending_batch,
8705 &mut pending_batch_messages,
8706 &mut pending_batch_chars,
8707 &mut pending_batch_first_row_id,
8708 &mut pending_batch_last_row_id,
8709 &mut imported_conversations,
8710 &mut imported_messages,
8711 )?;
8712
8713 if cache_enabled {
8714 let (hits, misses, hit_rate) = indexing_cache.stats();
8715 tracing::info!(
8716 target: "cass::historical_salvage",
8717 hits,
8718 misses,
8719 hit_rate = format!("{:.1}%", hit_rate * 100.0),
8720 agents = indexing_cache.agent_count(),
8721 workspaces = indexing_cache.workspace_count(),
8722 sources = known_sources.len(),
8723 "historical salvage cache stats"
8724 );
8725 }
8726
8727 Ok((imported_conversations, imported_messages))
8728 }
8729
8730 pub fn salvage_historical_databases(
8731 &self,
8732 canonical_db_path: &Path,
8733 ) -> Result<HistoricalSalvageOutcome> {
8734 let ordered_bundles = discover_historical_database_bundles(canonical_db_path);
8735 let mut outcome = HistoricalSalvageOutcome {
8736 bundles_considered: ordered_bundles.len(),
8737 ..HistoricalSalvageOutcome::default()
8738 };
8739
8740 for bundle in ordered_bundles {
8741 if self.historical_bundle_already_imported(&bundle)? {
8742 self.clear_historical_bundle_progress(&bundle)?;
8743 continue;
8744 }
8745
8746 let source = match open_historical_bundle_for_salvage(&bundle).with_context(|| {
8747 format!(
8748 "opening historical bundle {} for salvage",
8749 bundle.root_path.display()
8750 )
8751 }) {
8752 Ok(source) => source,
8753 Err(err) => {
8754 tracing::warn!(
8755 path = %bundle.root_path.display(),
8756 error = %err,
8757 "skipping unreadable historical cass database bundle during salvage"
8758 );
8759 self.clear_historical_bundle_progress(&bundle)?;
8760 continue;
8761 }
8762 };
8763
8764 if let Some(progress) = self.load_historical_bundle_progress(&bundle)? {
8772 let backup_max_conversation_id: i64 = source
8773 .conn
8774 .query_row_map(
8775 "SELECT COALESCE(MAX(id), 0) FROM conversations",
8776 fparams![],
8777 |row| row.get_typed(0),
8778 )
8779 .unwrap_or(0);
8780 if backup_max_conversation_id > 0
8781 && progress.last_completed_source_row_id >= backup_max_conversation_id
8782 {
8783 self.record_historical_bundle_import(
8784 &bundle,
8785 source.method,
8786 progress.conversations_imported,
8787 progress.messages_imported,
8788 )?;
8789 self.clear_historical_bundle_progress(&bundle)?;
8790 tracing::info!(
8791 path = %bundle.root_path.display(),
8792 last_completed_source_row_id = progress.last_completed_source_row_id,
8793 backup_max_conversation_id,
8794 conversations_imported = progress.conversations_imported,
8795 messages_imported = progress.messages_imported,
8796 "historical bundle already fully imported per checkpoint; marking salvaged and skipping O(n) re-scan"
8797 );
8798 continue;
8799 }
8800 }
8801
8802 self.import_historical_sources(&source.conn)?;
8803 let (imported_conversations, imported_messages) =
8804 self.import_historical_conversations(&bundle, source.method, &source.conn)?;
8805 self.record_historical_bundle_import(
8806 &bundle,
8807 source.method,
8808 imported_conversations,
8809 imported_messages,
8810 )?;
8811 self.clear_historical_bundle_progress(&bundle)?;
8812
8813 outcome.bundles_imported += 1;
8814 outcome.conversations_imported += imported_conversations;
8815 outcome.messages_imported += imported_messages;
8816
8817 tracing::info!(
8818 path = %bundle.root_path.display(),
8819 bytes = bundle.total_bytes,
8820 method = source.method,
8821 imported_conversations,
8822 imported_messages,
8823 "salvaged historical cass database bundle"
8824 );
8825 }
8826
8827 Ok(outcome)
8828 }
8829
8830 pub fn delete_source(&self, id: &str, _cascade: bool) -> Result<bool> {
8832 if id == LOCAL_SOURCE_ID {
8833 anyhow::bail!("cannot delete the local source");
8834 }
8835 let count = self
8836 .conn
8837 .execute_compat("DELETE FROM sources WHERE id = ?1", fparams![id])?;
8838 if count > 0 {
8839 self.invalidate_conversation_source_cache(id);
8840 }
8841 Ok(count > 0)
8842 }
8843
8844 pub fn insert_conversation_tree(
8846 &self,
8847 agent_id: i64,
8848 workspace_id: Option<i64>,
8849 conv: &Conversation,
8850 ) -> Result<InsertOutcome> {
8851 let normalized_conv = normalized_conversation_for_storage(conv);
8852 let conv = normalized_conv.as_ref();
8853 self.ensure_source_for_conversation(conv)?;
8854 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
8855 let defer_analytics_updates = defer_analytics_updates_enabled();
8856 let conversation_key = conversation_merge_key(agent_id, conv);
8857 let mut tx = self.conn.transaction()?;
8858 let existing = franken_find_existing_conversation_with_tail_by_key(
8859 &tx,
8860 &conversation_key,
8861 Some(conv),
8862 )?;
8863 if let Some(existing) = existing {
8864 let outcome = self.franken_append_messages_with_tail_in_tx(
8865 &tx,
8866 agent_id,
8867 existing.id,
8868 conv,
8869 existing.tail_state,
8870 defer_lexical_updates,
8871 defer_analytics_updates,
8872 )?;
8873 tx.commit()?;
8874 return Ok(outcome);
8875 }
8876
8877 let conv_id = match franken_insert_conversation_or_get_existing_after_miss(
8878 &tx,
8879 agent_id,
8880 workspace_id,
8881 conv,
8882 &conversation_key,
8883 )? {
8884 ConversationInsertStatus::Inserted(conv_id) => conv_id,
8885 ConversationInsertStatus::Existing(existing_id) => {
8886 let ExistingMessageLookup {
8887 by_idx: mut existing_messages,
8888 replay: mut existing_replay_fingerprints,
8889 } = franken_existing_message_lookup(&tx, existing_id, &conv.messages)?;
8890 let ExistingConversationNewMessages {
8891 messages: new_messages,
8892 new_chars,
8893 idx_collision_count,
8894 first_collision_idx,
8895 } = collect_new_messages_for_existing_conversation(
8896 existing_id,
8897 conv,
8898 &mut existing_messages,
8899 &mut existing_replay_fingerprints,
8900 "skipping replay-equivalent recovered message with shifted idx",
8901 );
8902 let (inserted_last_idx, inserted_last_created_at) =
8903 borrowed_messages_tail_state(&new_messages);
8904 let mut inserted_indices = Vec::new();
8905 let mut fts_entries = Vec::new();
8906 let mut fts_pending_chars = 0usize;
8907 let mut _fts_inserted_total = 0usize;
8908 let inserted_message_ids =
8909 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
8910 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
8911 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
8912 if !defer_lexical_updates {
8913 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
8914 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
8915 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
8916 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
8917 {
8918 flush_pending_fts_entries(
8919 self,
8920 &tx,
8921 &mut fts_entries,
8922 &mut fts_pending_chars,
8923 &mut _fts_inserted_total,
8924 )?;
8925 }
8926 }
8927 inserted_indices.push(msg.idx);
8928 }
8929
8930 if idx_collision_count > 0 {
8931 tracing::warn!(
8932 conversation_id = existing_id,
8933 collision_count = idx_collision_count,
8934 first_idx = first_collision_idx,
8935 source_path = %conv.source_path.display(),
8936 "message idx collisions encountered while merging recovered conversation; retaining canonical message variants"
8937 );
8938 }
8939
8940 if !defer_lexical_updates {
8941 flush_pending_fts_entries(
8942 self,
8943 &tx,
8944 &mut fts_entries,
8945 &mut fts_pending_chars,
8946 &mut _fts_inserted_total,
8947 )?;
8948 }
8949
8950 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
8951 franken_update_conversation_tail_state(
8952 &tx,
8953 existing_id,
8954 conv_last_ts,
8955 inserted_last_idx,
8956 inserted_last_created_at,
8957 )?;
8958 if let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv)
8959 {
8960 franken_update_external_conversation_tail_lookup_key(
8961 &tx,
8962 &lookup_key,
8963 conv_last_ts,
8964 inserted_last_idx,
8965 inserted_last_created_at,
8966 )?;
8967 }
8968
8969 if !defer_analytics_updates && !inserted_indices.is_empty() {
8970 franken_update_daily_stats_in_tx(
8971 self,
8972 &tx,
8973 &conv.agent_slug,
8974 &conv.source_id,
8975 conversation_effective_started_at(conv),
8976 StatsDelta {
8977 session_count_delta: 0,
8978 message_count_delta: inserted_indices.len() as i64,
8979 total_chars_delta: new_chars,
8980 },
8981 )?;
8982 }
8983
8984 tx.commit()?;
8985 return Ok(InsertOutcome {
8986 conversation_id: existing_id,
8987 conversation_inserted: false,
8988 inserted_indices,
8989 });
8990 }
8991 };
8992 let mut fts_entries = Vec::new();
8993 let mut fts_pending_chars = 0usize;
8994 let mut _fts_inserted_total = 0usize;
8995 let mut total_chars: i64 = 0;
8996 let mut inserted_indices = Vec::new();
8997 let mut pending_messages = HashMap::new();
8998 let mut pending_replay_fingerprints = HashSet::new();
8999 let mut idx_collision_count = 0usize;
9000 let mut first_collision_idx: Option<i64> = None;
9001 let mut new_messages = Vec::new();
9002 for msg in &conv.messages {
9003 let incoming_fingerprint = message_merge_fingerprint(msg);
9004 if let Some(existing_fingerprint) = pending_messages.get(&msg.idx) {
9005 if existing_fingerprint != &incoming_fingerprint {
9006 idx_collision_count = idx_collision_count.saturating_add(1);
9007 first_collision_idx.get_or_insert(msg.idx);
9008 }
9009 continue;
9010 }
9011 let incoming_replay = message_replay_fingerprint(msg);
9012 if pending_replay_fingerprints.contains(&incoming_replay) {
9013 tracing::debug!(
9014 conversation_id = conv_id,
9015 idx = msg.idx,
9016 source_path = %conv.source_path.display(),
9017 "skipping replay-equivalent duplicate message within new conversation insert"
9018 );
9019 continue;
9020 }
9021 pending_messages.insert(msg.idx, incoming_fingerprint);
9022 pending_replay_fingerprints.insert(incoming_replay);
9023 new_messages.push(msg);
9024 }
9025 let inserted_message_ids = franken_batch_insert_new_messages(&tx, conv_id, &new_messages)?;
9026 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9027 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9028 if !defer_lexical_updates {
9029 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9030 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9031 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9032 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9033 {
9034 flush_pending_fts_entries(
9035 self,
9036 &tx,
9037 &mut fts_entries,
9038 &mut fts_pending_chars,
9039 &mut _fts_inserted_total,
9040 )?;
9041 }
9042 }
9043 total_chars += msg.content.len() as i64;
9044 inserted_indices.push(msg.idx);
9045 }
9046 if idx_collision_count > 0 {
9047 tracing::warn!(
9048 conversation_id = conv_id,
9049 collision_count = idx_collision_count,
9050 first_idx = first_collision_idx,
9051 source_path = %conv.source_path.display(),
9052 "message idx collisions encountered while inserting a new conversation; retaining the first canonical variant per idx"
9053 );
9054 }
9055 if !defer_lexical_updates {
9056 flush_pending_fts_entries(
9057 self,
9058 &tx,
9059 &mut fts_entries,
9060 &mut fts_pending_chars,
9061 &mut _fts_inserted_total,
9062 )?;
9063 }
9064
9065 if !defer_analytics_updates {
9066 franken_update_daily_stats_in_tx(
9067 self,
9068 &tx,
9069 &conv.agent_slug,
9070 &conv.source_id,
9071 conversation_effective_started_at(conv),
9072 StatsDelta {
9073 session_count_delta: 1,
9074 message_count_delta: inserted_indices.len() as i64,
9075 total_chars_delta: total_chars,
9076 },
9077 )?;
9078 }
9079
9080 tx.commit()?;
9081 Ok(InsertOutcome {
9082 conversation_id: conv_id,
9083 conversation_inserted: true,
9084 inserted_indices,
9085 })
9086 }
9087
9088 #[cfg(test)]
9089 fn insert_conversation_tree_with_profile(
9090 &self,
9091 agent_id: i64,
9092 workspace_id: Option<i64>,
9093 conv: &Conversation,
9094 profile: &mut InsertConversationTreePerfProfile,
9095 ) -> Result<InsertOutcome> {
9096 let total_start = Instant::now();
9097 let normalized_conv = normalized_conversation_for_storage(conv);
9098 let conv = normalized_conv.as_ref();
9099
9100 let source_start = Instant::now();
9101 self.ensure_source_for_conversation(conv)?;
9102 profile.source_duration += source_start.elapsed();
9103
9104 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
9105 let defer_analytics_updates = defer_analytics_updates_enabled();
9106 let conversation_key = conversation_merge_key(agent_id, conv);
9107
9108 let tx_open_start = Instant::now();
9109 let mut tx = self.conn.transaction()?;
9110 profile.tx_open_duration += tx_open_start.elapsed();
9111
9112 let existing_lookup_start = Instant::now();
9113 let existing =
9114 franken_find_existing_conversation_by_key(&tx, &conversation_key, Some(conv))?;
9115 profile.existing_lookup_duration += existing_lookup_start.elapsed();
9116 if let Some(existing_id) = existing {
9117 return Err(anyhow!(
9118 "profile helper expects new conversation path, found existing id {existing_id}"
9119 ));
9120 }
9121
9122 let conversation_row_start = Instant::now();
9123 let conv_id = match franken_insert_conversation_or_get_existing_after_miss(
9124 &tx,
9125 agent_id,
9126 workspace_id,
9127 conv,
9128 &conversation_key,
9129 )? {
9130 ConversationInsertStatus::Inserted(conv_id) => conv_id,
9131 ConversationInsertStatus::Existing(existing_id) => {
9132 return Err(anyhow!(
9133 "profile helper expected inserted conversation row, reused existing id {existing_id}"
9134 ));
9135 }
9136 };
9137 profile.conversation_row_duration += conversation_row_start.elapsed();
9138
9139 let mut fts_entries = Vec::new();
9140 let mut fts_pending_chars = 0usize;
9141 let mut fts_inserted_total = 0usize;
9142 let mut total_chars: i64 = 0;
9143 let mut inserted_indices = Vec::new();
9144 let mut pending_messages = HashMap::new();
9145 let mut pending_replay_fingerprints = HashSet::new();
9146 let mut idx_collision_count = 0usize;
9147 let mut first_collision_idx: Option<i64> = None;
9148 let mut new_messages = Vec::new();
9149
9150 for msg in &conv.messages {
9151 let incoming_fingerprint = message_merge_fingerprint(msg);
9152 if let Some(existing_fingerprint) = pending_messages.get(&msg.idx) {
9153 if existing_fingerprint != &incoming_fingerprint {
9154 idx_collision_count = idx_collision_count.saturating_add(1);
9155 first_collision_idx.get_or_insert(msg.idx);
9156 }
9157 continue;
9158 }
9159
9160 let incoming_replay = message_replay_fingerprint(msg);
9161 if pending_replay_fingerprints.contains(&incoming_replay) {
9162 tracing::debug!(
9163 conversation_id = conv_id,
9164 idx = msg.idx,
9165 source_path = %conv.source_path.display(),
9166 "skipping replay-equivalent duplicate message within profiled new conversation insert"
9167 );
9168 continue;
9169 }
9170
9171 pending_messages.insert(msg.idx, incoming_fingerprint);
9172 pending_replay_fingerprints.insert(incoming_replay);
9173 new_messages.push(msg);
9174 }
9175
9176 let message_insert_start = Instant::now();
9177 let inserted_message_ids = franken_batch_insert_new_messages_with_profile(
9178 &tx,
9179 conv_id,
9180 &new_messages,
9181 &mut profile.message_insert_breakdown,
9182 )?;
9183 profile.message_insert_duration += message_insert_start.elapsed();
9184
9185 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9186 let snippet_insert_start = Instant::now();
9187 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9188 profile.snippet_insert_duration += snippet_insert_start.elapsed();
9189
9190 if !defer_lexical_updates {
9191 let fts_entry_start = Instant::now();
9192 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9193 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9194 profile.fts_entry_duration += fts_entry_start.elapsed();
9195 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9196 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9197 {
9198 let fts_flush_start = Instant::now();
9199 flush_pending_fts_entries(
9200 self,
9201 &tx,
9202 &mut fts_entries,
9203 &mut fts_pending_chars,
9204 &mut fts_inserted_total,
9205 )?;
9206 profile.fts_flush_duration += fts_flush_start.elapsed();
9207 }
9208 }
9209
9210 total_chars += msg.content.len() as i64;
9211 inserted_indices.push(msg.idx);
9212 }
9213
9214 if idx_collision_count > 0 {
9215 tracing::warn!(
9216 conversation_id = conv_id,
9217 collision_count = idx_collision_count,
9218 first_idx = first_collision_idx,
9219 source_path = %conv.source_path.display(),
9220 "message idx collisions encountered while profiling a new conversation insert; retaining the first canonical variant per idx"
9221 );
9222 }
9223
9224 if !defer_lexical_updates {
9225 let fts_flush_start = Instant::now();
9226 flush_pending_fts_entries(
9227 self,
9228 &tx,
9229 &mut fts_entries,
9230 &mut fts_pending_chars,
9231 &mut fts_inserted_total,
9232 )?;
9233 profile.fts_flush_duration += fts_flush_start.elapsed();
9234 }
9235
9236 if !defer_analytics_updates {
9237 let analytics_start = Instant::now();
9238 franken_update_daily_stats_in_tx(
9239 self,
9240 &tx,
9241 &conv.agent_slug,
9242 &conv.source_id,
9243 conversation_effective_started_at(conv),
9244 StatsDelta {
9245 session_count_delta: 1,
9246 message_count_delta: inserted_indices.len() as i64,
9247 total_chars_delta: total_chars,
9248 },
9249 )?;
9250 profile.analytics_duration += analytics_start.elapsed();
9251 }
9252
9253 let commit_start = Instant::now();
9254 tx.commit()?;
9255 profile.commit_duration += commit_start.elapsed();
9256 profile.invocations += 1;
9257 profile.messages += conv.messages.len();
9258 profile.inserted_messages += inserted_indices.len();
9259 profile.total_duration += total_start.elapsed();
9260
9261 Ok(InsertOutcome {
9262 conversation_id: conv_id,
9263 conversation_inserted: true,
9264 inserted_indices,
9265 })
9266 }
9267
9268 #[cfg(test)]
9269 fn append_existing_conversation_with_profile(
9270 &self,
9271 agent_id: i64,
9272 _workspace_id: Option<i64>,
9273 conv: &Conversation,
9274 profile: &mut InsertConversationTreePerfProfile,
9275 ) -> Result<InsertOutcome> {
9276 let total_start = Instant::now();
9277 let normalized_conv = normalized_conversation_for_storage(conv);
9278 let conv = normalized_conv.as_ref();
9279
9280 let source_start = Instant::now();
9281 self.ensure_source_for_conversation(conv)?;
9282 profile.source_duration += source_start.elapsed();
9283
9284 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
9285 let defer_analytics_updates = defer_analytics_updates_enabled();
9286 let conversation_key = conversation_merge_key(agent_id, conv);
9287
9288 let tx_open_start = Instant::now();
9289 let mut tx = self.conn.transaction()?;
9290 profile.tx_open_duration += tx_open_start.elapsed();
9291
9292 let existing_lookup_start = Instant::now();
9293 let existing = franken_find_existing_conversation_with_tail_by_key(
9294 &tx,
9295 &conversation_key,
9296 Some(conv),
9297 )?;
9298 profile.existing_lookup_duration += existing_lookup_start.elapsed();
9299 let existing = existing.ok_or_else(|| {
9300 anyhow!("append profile helper expects existing conversation for {conversation_key:?}")
9301 })?;
9302 let existing_id = existing.id;
9303
9304 let existing_idx_lookup_start = Instant::now();
9305 let append_tail_state = existing.tail_state;
9306 let append_tail_ended_at = append_tail_state.and_then(|state| state.ended_at);
9307 let existing_plan = append_tail_state.as_ref().and_then(|state| {
9308 collect_append_only_tail_messages(
9309 conv,
9310 state.last_message_idx,
9311 state.last_message_created_at,
9312 )
9313 });
9314 let used_append_tail_plan = existing_plan.is_some();
9315 profile.existing_idx_lookup_duration += existing_idx_lookup_start.elapsed();
9316
9317 let dedupe_filter_start = Instant::now();
9318 let ExistingConversationNewMessages {
9319 messages: new_messages,
9320 new_chars,
9321 idx_collision_count,
9322 first_collision_idx,
9323 } = if let Some(existing_plan) = existing_plan {
9324 existing_plan
9325 } else {
9326 let ExistingMessageLookup {
9327 by_idx: mut existing_messages,
9328 replay: mut existing_replay_fingerprints,
9329 } = franken_existing_message_lookup(&tx, existing_id, &conv.messages)?;
9330 collect_new_messages_for_existing_conversation(
9331 existing_id,
9332 conv,
9333 &mut existing_messages,
9334 &mut existing_replay_fingerprints,
9335 "skipping replay-equivalent profiled append message with shifted idx",
9336 )
9337 };
9338 profile.dedupe_filter_duration += dedupe_filter_start.elapsed();
9339
9340 let mut inserted_indices = Vec::new();
9341 let mut fts_entries = Vec::new();
9342 let mut fts_pending_chars = 0usize;
9343 let mut fts_inserted_total = 0usize;
9344 let (inserted_last_idx, inserted_last_created_at) =
9345 borrowed_messages_tail_state(&new_messages);
9346
9347 let message_insert_start = Instant::now();
9348 let inserted_message_ids = franken_append_insert_new_messages_with_profile(
9349 &tx,
9350 existing_id,
9351 &new_messages,
9352 &mut profile.message_insert_breakdown,
9353 )?;
9354 profile.message_insert_duration += message_insert_start.elapsed();
9355
9356 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9357 let snippet_insert_start = Instant::now();
9358 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9359 profile.snippet_insert_duration += snippet_insert_start.elapsed();
9360
9361 if !defer_lexical_updates {
9362 let fts_entry_start = Instant::now();
9363 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9364 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9365 profile.fts_entry_duration += fts_entry_start.elapsed();
9366 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9367 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9368 {
9369 let fts_flush_start = Instant::now();
9370 flush_pending_fts_entries(
9371 self,
9372 &tx,
9373 &mut fts_entries,
9374 &mut fts_pending_chars,
9375 &mut fts_inserted_total,
9376 )?;
9377 profile.fts_flush_duration += fts_flush_start.elapsed();
9378 }
9379 }
9380
9381 inserted_indices.push(msg.idx);
9382 }
9383
9384 if idx_collision_count > 0 {
9385 tracing::warn!(
9386 conversation_id = existing_id,
9387 collision_count = idx_collision_count,
9388 first_idx = first_collision_idx,
9389 source_path = %conv.source_path.display(),
9390 "message idx collisions encountered while profiling append merge; retaining canonical message variants"
9391 );
9392 }
9393
9394 if !defer_lexical_updates {
9395 let fts_flush_start = Instant::now();
9396 flush_pending_fts_entries(
9397 self,
9398 &tx,
9399 &mut fts_entries,
9400 &mut fts_pending_chars,
9401 &mut fts_inserted_total,
9402 )?;
9403 profile.fts_flush_duration += fts_flush_start.elapsed();
9404 }
9405
9406 let conversation_row_start = Instant::now();
9407 let mut exact_append_tail_set = false;
9408 if used_append_tail_plan {
9409 if let (Some(last_message_idx), Some(last_message_created_at)) =
9410 (inserted_last_idx, inserted_last_created_at)
9411 {
9412 if append_tail_ended_at.is_none_or(|ended_at| ended_at <= last_message_created_at) {
9413 franken_set_conversation_tail_state_after_append(
9414 &tx,
9415 existing_id,
9416 last_message_created_at,
9417 last_message_idx,
9418 last_message_created_at,
9419 )?;
9420 exact_append_tail_set = true;
9421 } else {
9422 franken_update_conversation_tail_state(
9423 &tx,
9424 existing_id,
9425 Some(last_message_created_at),
9426 inserted_last_idx,
9427 inserted_last_created_at,
9428 )?;
9429 }
9430 }
9431 } else {
9432 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
9433 franken_update_conversation_tail_state(
9434 &tx,
9435 existing_id,
9436 conv_last_ts,
9437 inserted_last_idx,
9438 inserted_last_created_at,
9439 )?;
9440 }
9441 franken_update_external_conversation_tail_after_append(
9442 &tx,
9443 agent_id,
9444 conv,
9445 used_append_tail_plan,
9446 exact_append_tail_set,
9447 inserted_last_idx,
9448 inserted_last_created_at,
9449 )?;
9450 profile.conversation_row_duration += conversation_row_start.elapsed();
9451
9452 if !defer_analytics_updates && !inserted_indices.is_empty() {
9453 let analytics_start = Instant::now();
9454 franken_update_daily_stats_in_tx(
9455 self,
9456 &tx,
9457 &conv.agent_slug,
9458 &conv.source_id,
9459 conversation_effective_started_at(conv),
9460 StatsDelta {
9461 session_count_delta: 0,
9462 message_count_delta: inserted_indices.len() as i64,
9463 total_chars_delta: new_chars,
9464 },
9465 )?;
9466 profile.analytics_duration += analytics_start.elapsed();
9467 }
9468
9469 let commit_start = Instant::now();
9470 tx.commit()?;
9471 profile.commit_duration += commit_start.elapsed();
9472 profile.invocations += 1;
9473 profile.messages += conv.messages.len();
9474 profile.inserted_messages += inserted_indices.len();
9475 profile.total_duration += total_start.elapsed();
9476
9477 Ok(InsertOutcome {
9478 conversation_id: existing_id,
9479 conversation_inserted: false,
9480 inserted_indices,
9481 })
9482 }
9483
9484 #[allow(clippy::too_many_arguments)]
9486 fn franken_append_messages_with_tail_in_tx(
9487 &self,
9488 tx: &FrankenTransaction<'_>,
9489 agent_id: i64,
9490 conversation_id: i64,
9491 conv: &Conversation,
9492 append_tail_state: Option<ExistingConversationTailState>,
9493 defer_lexical_updates: bool,
9494 defer_analytics_updates: bool,
9495 ) -> Result<InsertOutcome> {
9496 let append_tail_ended_at = append_tail_state.and_then(|state| state.ended_at);
9497 let append_plan = append_tail_state.as_ref().and_then(|state| {
9498 collect_append_only_tail_messages(
9499 conv,
9500 state.last_message_idx,
9501 state.last_message_created_at,
9502 )
9503 });
9504 let used_append_tail_plan = append_plan.is_some();
9505 let ExistingConversationNewMessages {
9506 messages: new_messages,
9507 new_chars,
9508 idx_collision_count,
9509 first_collision_idx,
9510 } = if let Some(append_plan) = append_plan {
9511 append_plan
9512 } else {
9513 let ExistingMessageLookup {
9514 by_idx: mut existing_messages,
9515 replay: mut existing_replay_fingerprints,
9516 } = franken_existing_message_lookup(tx, conversation_id, &conv.messages)?;
9517 collect_new_messages_for_existing_conversation(
9518 conversation_id,
9519 conv,
9520 &mut existing_messages,
9521 &mut existing_replay_fingerprints,
9522 "skipping replay-equivalent recovered message with shifted idx",
9523 )
9524 };
9525
9526 let mut inserted_indices = Vec::new();
9527 let mut fts_entries = Vec::new();
9528 let mut fts_pending_chars = 0usize;
9529 let mut _fts_inserted_total = 0usize;
9530 let (inserted_last_idx, inserted_last_created_at) =
9531 borrowed_messages_tail_state(&new_messages);
9532 let inserted_message_ids =
9533 franken_append_insert_new_messages(tx, conversation_id, &new_messages)?;
9534 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9535 franken_insert_snippets(tx, msg_id, &msg.snippets)?;
9536 if !defer_lexical_updates {
9537 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9538 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9539 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9540 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9541 {
9542 flush_pending_fts_entries(
9543 self,
9544 tx,
9545 &mut fts_entries,
9546 &mut fts_pending_chars,
9547 &mut _fts_inserted_total,
9548 )?;
9549 }
9550 }
9551 inserted_indices.push(msg.idx);
9552 }
9553
9554 if idx_collision_count > 0 {
9555 tracing::warn!(
9556 conversation_id,
9557 collision_count = idx_collision_count,
9558 first_idx = first_collision_idx,
9559 source_path = %conv.source_path.display(),
9560 "message idx collisions encountered while appending to an existing conversation; retaining canonical message variants"
9561 );
9562 }
9563
9564 if !defer_lexical_updates {
9565 flush_pending_fts_entries(
9566 self,
9567 tx,
9568 &mut fts_entries,
9569 &mut fts_pending_chars,
9570 &mut _fts_inserted_total,
9571 )?;
9572 }
9573
9574 let mut exact_append_tail_set = false;
9575 if used_append_tail_plan {
9576 if let (Some(last_message_idx), Some(last_message_created_at)) =
9577 (inserted_last_idx, inserted_last_created_at)
9578 {
9579 if append_tail_ended_at.is_none_or(|ended_at| ended_at <= last_message_created_at) {
9580 franken_set_conversation_tail_state_after_append(
9581 tx,
9582 conversation_id,
9583 last_message_created_at,
9584 last_message_idx,
9585 last_message_created_at,
9586 )?;
9587 exact_append_tail_set = true;
9588 } else {
9589 franken_update_conversation_tail_state(
9590 tx,
9591 conversation_id,
9592 Some(last_message_created_at),
9593 inserted_last_idx,
9594 inserted_last_created_at,
9595 )?;
9596 }
9597 }
9598 } else {
9599 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
9600 franken_update_conversation_tail_state(
9601 tx,
9602 conversation_id,
9603 conv_last_ts,
9604 inserted_last_idx,
9605 inserted_last_created_at,
9606 )?;
9607 }
9608 franken_update_external_conversation_tail_after_append(
9609 tx,
9610 agent_id,
9611 conv,
9612 used_append_tail_plan,
9613 exact_append_tail_set,
9614 inserted_last_idx,
9615 inserted_last_created_at,
9616 )?;
9617
9618 if !defer_analytics_updates && !inserted_indices.is_empty() {
9619 let message_count = inserted_indices.len() as i64;
9620 franken_update_daily_stats_in_tx(
9621 self,
9622 tx,
9623 &conv.agent_slug,
9624 &conv.source_id,
9625 conversation_effective_started_at(conv),
9626 StatsDelta {
9627 session_count_delta: 0,
9628 message_count_delta: message_count,
9629 total_chars_delta: new_chars,
9630 },
9631 )?;
9632 }
9633
9634 Ok(InsertOutcome {
9635 conversation_id,
9636 conversation_inserted: false,
9637 inserted_indices,
9638 })
9639 }
9640
9641 pub fn rebuild_fts(&self) -> Result<()> {
9643 self.rebuild_fts_via_frankensqlite().map(|_| ())
9644 }
9645
9646 pub(crate) fn ensure_search_fallback_fts_consistency(&self) -> Result<FtsConsistencyRepair> {
9651 self.ensure_fts_consistency_via_frankensqlite()
9652 }
9653
9654 pub(crate) fn validate_fts_messages_integrity(&self) -> Result<()> {
9655 validate_fts_messages_integrity_for_connection(&self.conn)
9656 }
9657
9658 pub(crate) fn fallback_fts_is_known_healthy_for_archive_fingerprint(
9659 &self,
9660 archive_fingerprint: &str,
9661 ) -> Result<bool> {
9662 Ok(
9663 self.read_fts_franken_rebuild_generation()? == Some(FTS_FRANKEN_REBUILD_GENERATION)
9664 && self
9665 .read_fts_franken_rebuild_archive_fingerprint()?
9666 .as_deref()
9667 == Some(archive_fingerprint),
9668 )
9669 }
9670
9671 pub(crate) fn record_search_fallback_fts_archive_fingerprint(
9672 &self,
9673 archive_fingerprint: &str,
9674 ) -> Result<()> {
9675 self.conn
9676 .execute_compat(
9677 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9678 fparams![
9679 FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY,
9680 archive_fingerprint.to_string()
9681 ],
9682 )
9683 .with_context(|| "recording frankensqlite FTS archive fingerprint")?;
9684 Ok(())
9685 }
9686
9687 pub(crate) fn daily_stats_is_known_healthy_for_archive_fingerprint(
9688 &self,
9689 archive_fingerprint: &str,
9690 ) -> Result<bool> {
9691 Ok(
9692 self.read_daily_stats_health_generation()? == Some(DAILY_STATS_HEALTH_GENERATION)
9693 && self.read_daily_stats_archive_fingerprint()?.as_deref()
9694 == Some(archive_fingerprint),
9695 )
9696 }
9697
9698 pub(crate) fn record_daily_stats_archive_fingerprint(
9699 &self,
9700 archive_fingerprint: &str,
9701 ) -> Result<()> {
9702 self.conn
9703 .execute_compat(
9704 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9705 fparams![
9706 DAILY_STATS_HEALTH_GENERATION_META_KEY,
9707 DAILY_STATS_HEALTH_GENERATION.to_string()
9708 ],
9709 )
9710 .with_context(|| "recording daily_stats health generation")?;
9711 self.conn
9712 .execute_compat(
9713 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9714 fparams![DAILY_STATS_HEALTH_META_KEY, archive_fingerprint.to_string()],
9715 )
9716 .with_context(|| "recording daily_stats archive fingerprint")?;
9717 Ok(())
9718 }
9719
9720 fn read_fts_franken_rebuild_generation(&self) -> Result<Option<i64>> {
9721 let value: Option<String> = self
9722 .conn
9723 .query_row_map(
9724 "SELECT value FROM meta WHERE key = ?1",
9725 fparams![FTS_FRANKEN_REBUILD_META_KEY],
9726 |row| row.get_typed(0),
9727 )
9728 .optional()?;
9729 Ok(value.and_then(|v| v.parse::<i64>().ok()))
9730 }
9731
9732 fn read_fts_franken_rebuild_archive_fingerprint(&self) -> Result<Option<String>> {
9733 Ok(self
9734 .conn
9735 .query_row_map(
9736 "SELECT value FROM meta WHERE key = ?1",
9737 fparams![FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY],
9738 |row| row.get_typed(0),
9739 )
9740 .optional()?)
9741 }
9742
9743 fn read_daily_stats_health_generation(&self) -> Result<Option<i64>> {
9744 let value: Option<String> = self
9745 .conn
9746 .query_row_map(
9747 "SELECT value FROM meta WHERE key = ?1",
9748 fparams![DAILY_STATS_HEALTH_GENERATION_META_KEY],
9749 |row| row.get_typed(0),
9750 )
9751 .optional()?;
9752 Ok(value.and_then(|value| value.parse::<i64>().ok()))
9753 }
9754
9755 fn read_daily_stats_archive_fingerprint(&self) -> Result<Option<String>> {
9756 Ok(self
9757 .conn
9758 .query_row_map(
9759 "SELECT value FROM meta WHERE key = ?1",
9760 fparams![DAILY_STATS_HEALTH_META_KEY],
9761 |row| row.get_typed(0),
9762 )
9763 .optional()?)
9764 }
9765
9766 fn record_fts_franken_rebuild_generation(&self) -> Result<()> {
9767 self.conn
9768 .execute_compat(
9769 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9770 fparams![
9771 FTS_FRANKEN_REBUILD_META_KEY,
9772 FTS_FRANKEN_REBUILD_GENERATION.to_string()
9773 ],
9774 )
9775 .with_context(|| "recording frankensqlite FTS rebuild generation")?;
9776 Ok(())
9777 }
9778
9779 fn ensure_fts_consistency_via_frankensqlite(&self) -> Result<FtsConsistencyRepair> {
9780 if self.read_fts_franken_rebuild_generation()? != Some(FTS_FRANKEN_REBUILD_GENERATION) {
9781 let fts_already_healthy = (|| -> Result<bool> {
9786 let fts_exists: i64 = self.conn.query_row_map(
9787 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
9788 fparams![],
9789 |row| row.get_typed(0),
9790 )?;
9791 if fts_exists != 1 {
9792 return Ok(false);
9793 }
9794 let total: i64 = self.conn.query_row_map(
9795 "SELECT COUNT(*) FROM messages",
9796 fparams![],
9797 |row| row.get_typed(0),
9798 )?;
9799 if total == 0 {
9800 return Ok(false);
9801 }
9802 let indexed: i64 = self.conn.query_row_map(
9803 "SELECT COUNT(*) FROM fts_messages",
9804 fparams![],
9805 |row| row.get_typed(0),
9806 )?;
9807 Ok(indexed > 0 && indexed * 100 >= total * 90)
9809 })()
9810 .unwrap_or(false);
9811
9812 if fts_already_healthy {
9813 tracing::info!(
9814 target: "cass::fts_rebuild",
9815 "FTS already populated and consistent; setting generation marker without rebuild"
9816 );
9817 self.record_fts_franken_rebuild_generation()?;
9818 self.set_fts_messages_present_cache(true);
9819 } else {
9820 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9821 self.record_fts_franken_rebuild_generation()?;
9822 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9823 }
9824 }
9825
9826 let inspection = (|| -> Result<(i64, bool)> {
9827 let fts_schema_rows = self.conn.query_row_map(
9828 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
9829 fparams![],
9830 |row| row.get_typed::<i64>(0),
9831 )?;
9832 let fts_queryable = fts_schema_rows == 1
9833 && self.conn.query("SELECT COUNT(*) FROM fts_messages").is_ok();
9834 Ok((fts_schema_rows, fts_queryable))
9835 })();
9836
9837 let (fts_schema_rows, fts_queryable) = match inspection {
9838 Ok(result) => result,
9839 Err(err) => {
9840 tracing::warn!(
9841 error = %err,
9842 "frankensqlite FTS consistency probe failed; rebuilding authoritative FTS"
9843 );
9844 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9845 self.record_fts_franken_rebuild_generation()?;
9846 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9847 }
9848 };
9849
9850 if fts_schema_rows != 1 || !fts_queryable {
9851 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9852 self.record_fts_franken_rebuild_generation()?;
9853 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9854 }
9855
9856 let total_messages =
9857 self.conn
9858 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
9859 row.get_typed::<i64>(0)
9860 })?;
9861 let indexed_messages =
9862 self.conn
9863 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
9864 row.get_typed::<i64>(0)
9865 })?;
9866
9867 if indexed_messages == total_messages {
9868 self.set_fts_messages_present_cache(true);
9869 return Ok(FtsConsistencyRepair::AlreadyHealthy {
9870 rows: usize::try_from(total_messages.max(0)).unwrap_or(usize::MAX),
9871 });
9872 }
9873
9874 if indexed_messages > total_messages {
9875 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9876 self.record_fts_franken_rebuild_generation()?;
9877 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9878 }
9879
9880 let inserted_rows = self
9881 .stream_fts_rows_via_frankensqlite(true)
9882 .with_context(|| "incrementally repairing missing FTS rows via frankensqlite")?;
9883 let repaired_rows =
9884 self.conn
9885 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
9886 row.get_typed::<i64>(0)
9887 })?;
9888 if repaired_rows == total_messages {
9889 self.set_fts_messages_present_cache(true);
9890 return Ok(FtsConsistencyRepair::IncrementalCatchUp {
9891 inserted_rows,
9892 total_rows: usize::try_from(repaired_rows.max(0)).unwrap_or(usize::MAX),
9893 });
9894 }
9895
9896 if inserted_rows == 0 {
9904 tracing::debug!(
9905 target: "cass::fts_rebuild",
9906 indexed_messages = repaired_rows,
9907 total_messages,
9908 un_indexable_gap = total_messages.saturating_sub(repaired_rows),
9909 "FTS catch-up inserted 0 rows; remaining gap is un-indexable (likely orphaned messages with dangling conversation_id)"
9910 );
9911 self.set_fts_messages_present_cache(true);
9912 return Ok(FtsConsistencyRepair::IncrementalCatchUp {
9913 inserted_rows: 0,
9914 total_rows: usize::try_from(repaired_rows.max(0)).unwrap_or(usize::MAX),
9915 });
9916 }
9917
9918 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9921 self.record_fts_franken_rebuild_generation()?;
9922 Ok(FtsConsistencyRepair::Rebuilt { inserted_rows })
9923 }
9924
9925 pub(crate) fn rebuild_fts_via_frankensqlite(&self) -> Result<usize> {
9926 self.invalidate_fts_messages_present_cache();
9927 self.conn
9928 .execute("DROP TABLE IF EXISTS fts_messages;")
9929 .with_context(|| "dropping derived fts_messages before frankensqlite rebuild")?;
9930 self.conn
9931 .execute_compat(FTS5_REGISTER_SQL, fparams![])
9932 .with_context(|| "creating derived fts_messages via frankensqlite rebuild")?;
9933 self.set_fts_messages_present_cache(true);
9934
9935 self.stream_fts_rows_via_frankensqlite(false)
9936 }
9937
9938 fn stream_fts_rows_via_frankensqlite(&self, missing_only: bool) -> Result<usize> {
9939 let batch_size = fts_rebuild_batch_size().max(1);
9940 let batch_limit = i64::try_from(batch_size).unwrap_or(i64::MAX);
9941 let mut total_inserted: usize = 0;
9942 let mut total_skipped_orphans: usize = 0;
9943 let mut total_skipped_existing: usize = 0;
9944 let mut last_rowid: i64 = 0;
9945 let conversation_by_id = self.load_fts_conversation_projection_map()?;
9946 let agent_slug_by_id = self.load_fts_agent_slug_map()?;
9947 let workspace_path_by_id = self.load_fts_workspace_path_map()?;
9948 let existing_fts_rowids = if missing_only {
9949 Some(self.load_fts_message_rowid_set()?)
9950 } else {
9951 None
9952 };
9953 let mut entries = Vec::new();
9954 let mut pending_chars = 0usize;
9955
9956 loop {
9957 let rows = self.fetch_fts_rebuild_message_rows(last_rowid, batch_limit)?;
9958 let fetched_count = rows.len();
9959 if fetched_count == 0 {
9960 break;
9961 }
9962
9963 let inserted_before_batch = total_inserted;
9964 let skipped_before_batch = total_skipped_orphans;
9965 let existing_before_batch = total_skipped_existing;
9966
9967 for row in rows {
9968 last_rowid = row.rowid;
9969 if existing_fts_rowids
9970 .as_ref()
9971 .is_some_and(|rowids| rowids.contains(&row.message_id))
9972 {
9973 total_skipped_existing = total_skipped_existing.saturating_add(1);
9974 continue;
9975 }
9976 let Some(conversation) = conversation_by_id.get(&row.conversation_id) else {
9977 total_skipped_orphans = total_skipped_orphans.saturating_add(1);
9978 continue;
9979 };
9980 let agent = conversation
9981 .agent_id
9982 .and_then(|agent_id| agent_slug_by_id.get(&agent_id))
9983 .filter(|slug| !slug.is_empty())
9984 .cloned()
9985 .unwrap_or_else(|| "unknown".to_string());
9986 let workspace = conversation
9987 .workspace_id
9988 .and_then(|workspace_id| workspace_path_by_id.get(&workspace_id))
9989 .cloned()
9990 .unwrap_or_default();
9991 pending_chars = pending_chars.saturating_add(row.content.len());
9992 entries.push(FtsEntry {
9993 content: row.content,
9994 title: conversation.title.clone(),
9995 agent,
9996 workspace,
9997 source_path: conversation.source_path.clone(),
9998 created_at: row.created_at,
9999 message_id: row.message_id,
10000 });
10001 if entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10002 || pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10003 {
10004 total_inserted = total_inserted.saturating_add(
10005 franken_batch_insert_fts_on_connection(&self.conn, &entries)?,
10006 );
10007 entries.clear();
10008 pending_chars = 0;
10009 }
10010 }
10011
10012 if !entries.is_empty() {
10013 total_inserted = total_inserted.saturating_add(
10014 franken_batch_insert_fts_on_connection(&self.conn, &entries)?,
10015 );
10016 entries.clear();
10017 pending_chars = 0;
10018 }
10019
10020 tracing::debug!(
10021 target: "cass::fts_rebuild",
10022 batch_rows = fetched_count,
10023 batch_inserted = total_inserted.saturating_sub(inserted_before_batch),
10024 batch_skipped_orphans = total_skipped_orphans.saturating_sub(skipped_before_batch),
10025 batch_skipped_existing = total_skipped_existing.saturating_sub(existing_before_batch),
10026 total_inserted,
10027 total_skipped_orphans,
10028 total_skipped_existing,
10029 last_rowid,
10030 missing_only,
10031 "FTS streaming maintenance batch complete"
10032 );
10033
10034 if fetched_count < batch_size {
10035 break;
10036 }
10037 }
10038
10039 Ok(total_inserted)
10040 }
10041
10042 fn fetch_fts_rebuild_message_rows(
10043 &self,
10044 last_rowid: i64,
10045 batch_limit: i64,
10046 ) -> Result<Vec<FtsRebuildMessageRow>> {
10047 self.conn
10048 .query_map_collect(
10049 "SELECT m.rowid, m.id, m.conversation_id, m.content, m.created_at
10050 FROM messages m
10051 WHERE m.rowid > ?1
10052 ORDER BY m.rowid
10053 LIMIT ?2",
10054 fparams![last_rowid, batch_limit],
10055 |row| {
10056 Ok(FtsRebuildMessageRow {
10057 rowid: row.get_typed(0)?,
10058 message_id: row.get_typed(1)?,
10059 conversation_id: row.get_typed(2)?,
10060 content: row.get_typed::<Option<String>>(3)?.unwrap_or_default(),
10061 created_at: row.get_typed(4)?,
10062 })
10063 },
10064 )
10065 .with_context(|| format!("fetching FTS maintenance messages after rowid {last_rowid}"))
10066 }
10067
10068 fn load_fts_message_rowid_set(&self) -> Result<HashSet<i64>> {
10069 let rows: Vec<i64> = self
10070 .conn
10071 .query_map_collect("SELECT rowid FROM fts_messages", fparams![], |row| {
10072 row.get_typed(0)
10073 })
10074 .with_context(|| "loading existing FTS message rowids")?;
10075 Ok(rows.into_iter().collect())
10076 }
10077
10078 fn load_fts_conversation_projection_map(
10079 &self,
10080 ) -> Result<HashMap<i64, FtsConversationProjection>> {
10081 let rows: Vec<(i64, FtsConversationProjection)> = self
10082 .conn
10083 .query_map_collect(
10084 "SELECT id, title, agent_id, workspace_id, source_path
10085 FROM conversations",
10086 fparams![],
10087 |row| {
10088 Ok((
10089 row.get_typed(0)?,
10090 FtsConversationProjection {
10091 title: row.get_typed::<Option<String>>(1)?.unwrap_or_default(),
10092 agent_id: row.get_typed(2)?,
10093 workspace_id: row.get_typed(3)?,
10094 source_path: row.get_typed::<Option<String>>(4)?.unwrap_or_default(),
10095 },
10096 ))
10097 },
10098 )
10099 .with_context(|| "loading FTS conversation projection map")?;
10100 Ok(rows.into_iter().collect())
10101 }
10102
10103 fn load_fts_agent_slug_map(&self) -> Result<HashMap<i64, String>> {
10104 let rows: Vec<(i64, String)> = self
10105 .conn
10106 .query_map_collect("SELECT id, slug FROM agents", fparams![], |row| {
10107 Ok((
10108 row.get_typed(0)?,
10109 row.get_typed::<Option<String>>(1)?
10110 .unwrap_or_else(|| "unknown".to_string()),
10111 ))
10112 })
10113 .with_context(|| "loading FTS agent slug map")?;
10114 Ok(rows.into_iter().collect())
10115 }
10116
10117 fn load_fts_workspace_path_map(&self) -> Result<HashMap<i64, String>> {
10118 let rows: Vec<(i64, String)> = self
10119 .conn
10120 .query_map_collect("SELECT id, path FROM workspaces", fparams![], |row| {
10121 Ok((
10122 row.get_typed(0)?,
10123 row.get_typed::<Option<String>>(1)?.unwrap_or_default(),
10124 ))
10125 })
10126 .with_context(|| "loading FTS workspace path map")?;
10127 Ok(rows.into_iter().collect())
10128 }
10129
10130 pub fn fetch_messages_for_embedding(&self) -> Result<Vec<MessageForEmbedding>> {
10132 self.conn
10137 .query_map_collect(
10138 "SELECT m.id, m.created_at, COALESCE(c.agent_id, 0), c.workspace_id, c.source_id, m.role, m.content
10139 FROM messages m
10140 JOIN conversations c ON m.conversation_id = c.id
10141 ORDER BY m.id",
10142 fparams![],
10143 |row| {
10144 let source_id: String = row.get_typed::<Option<String>>(4)?
10145 .unwrap_or_else(|| "local".to_string());
10146 Ok(MessageForEmbedding {
10147 message_id: row.get_typed(0)?,
10148 created_at: row.get_typed(1)?,
10149 agent_id: row.get_typed(2)?,
10150 workspace_id: row.get_typed(3)?,
10151 source_id_hash: crc32fast::hash(source_id.as_bytes()),
10152 role: row.get_typed(5)?,
10153 content: row.get_typed(6)?,
10154 })
10155 },
10156 )
10157 .with_context(|| "fetching messages for embedding")
10158 }
10159
10160 pub fn get_last_embedded_message_id(&self) -> Result<Option<i64>> {
10162 let result: Result<String, _> = self.conn.query_row_map(
10163 "SELECT value FROM meta WHERE key = 'last_embedded_message_id'",
10164 fparams![],
10165 |row| row.get_typed(0),
10166 );
10167 match result.optional() {
10168 Ok(Some(s)) => Ok(s.parse().ok()),
10169 Ok(None) => Ok(None),
10170 Err(e) => Err(e.into()),
10171 }
10172 }
10173
10174 pub fn set_last_embedded_message_id(&self, id: i64) -> Result<()> {
10176 self.conn.execute_compat(
10177 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_embedded_message_id', ?1)",
10178 fparams![id.to_string()],
10179 )?;
10180 Ok(())
10181 }
10182
10183 pub fn get_embedding_jobs(&self, db_path: &str) -> Result<Vec<EmbeddingJobRow>> {
10185 self.conn
10186 .query_map_collect(
10187 "SELECT id, db_path, model_id, status, total_docs, completed_docs, error_message, created_at, started_at, completed_at
10188 FROM embedding_jobs WHERE db_path = ?1 ORDER BY id DESC",
10189 fparams![db_path],
10190 |row| {
10191 Ok(EmbeddingJobRow {
10192 id: row.get_typed(0)?,
10193 db_path: row.get_typed(1)?,
10194 model_id: row.get_typed(2)?,
10195 status: row.get_typed(3)?,
10196 total_docs: row.get_typed(4)?,
10197 completed_docs: row.get_typed(5)?,
10198 error_message: row.get_typed(6)?,
10199 created_at: row.get_typed(7)?,
10200 started_at: row.get_typed(8)?,
10201 completed_at: row.get_typed(9)?,
10202 })
10203 },
10204 )
10205 .with_context(|| format!("fetching embedding jobs for {db_path}"))
10206 }
10207
10208 pub fn upsert_embedding_job(
10210 &self,
10211 db_path: &str,
10212 model_id: &str,
10213 total_docs: i64,
10214 ) -> Result<i64> {
10215 let updated = self.conn.execute_compat(
10216 "UPDATE embedding_jobs
10217 SET total_docs = ?3
10218 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
10219 fparams![db_path, model_id, total_docs],
10220 )?;
10221 if updated == 0 {
10222 let insert_result = self.conn.execute_compat(
10223 "INSERT INTO embedding_jobs(db_path, model_id, total_docs) VALUES(?1,?2,?3)",
10224 fparams![db_path, model_id, total_docs],
10225 );
10226 if let Err(err) = insert_result {
10227 if !matches!(err, frankensqlite::FrankenError::UniqueViolation { .. }) {
10228 return Err(err.into());
10229 }
10230 self.conn.execute_compat(
10231 "UPDATE embedding_jobs
10232 SET total_docs = ?3
10233 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
10234 fparams![db_path, model_id, total_docs],
10235 )?;
10236 }
10237 }
10238 self.conn
10239 .query_row_map(
10240 "SELECT id FROM embedding_jobs
10241 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')
10242 ORDER BY id DESC
10243 LIMIT 1",
10244 fparams![db_path, model_id],
10245 |row| row.get_typed(0),
10246 )
10247 .with_context(|| "resolving embedding job id after upsert")
10248 }
10249
10250 pub fn start_embedding_job(&self, job_id: i64) -> Result<()> {
10252 self.conn.execute_compat(
10253 "UPDATE embedding_jobs SET status = 'running', started_at = datetime('now') WHERE id = ?1",
10254 fparams![job_id],
10255 )?;
10256 Ok(())
10257 }
10258
10259 pub fn complete_embedding_job(&self, job_id: i64) -> Result<()> {
10261 self.conn.execute_compat(
10262 "UPDATE embedding_jobs SET status = 'completed', completed_at = datetime('now') WHERE id = ?1",
10263 fparams![job_id],
10264 )?;
10265 Ok(())
10266 }
10267
10268 pub fn fail_embedding_job(&self, job_id: i64, error: &str) -> Result<()> {
10270 self.conn.execute_compat(
10271 "UPDATE embedding_jobs SET status = 'failed', error_message = ?2, completed_at = datetime('now') WHERE id = ?1",
10272 fparams![job_id, error],
10273 )?;
10274 Ok(())
10275 }
10276
10277 pub fn cancel_embedding_jobs(&self, db_path: &str, model_id: Option<&str>) -> Result<usize> {
10279 if let Some(mid) = model_id {
10280 Ok(self.conn.execute_compat(
10281 "UPDATE embedding_jobs SET status = 'cancelled' WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
10282 fparams![db_path, mid],
10283 )?)
10284 } else {
10285 Ok(self.conn.execute_compat(
10286 "UPDATE embedding_jobs SET status = 'cancelled' WHERE db_path = ?1 AND status IN ('pending', 'running')",
10287 fparams![db_path],
10288 )?)
10289 }
10290 }
10291
10292 pub fn update_job_progress(&self, job_id: i64, completed_docs: i64) -> Result<()> {
10294 self.conn.execute_compat(
10295 "UPDATE embedding_jobs SET completed_docs = ?2 WHERE id = ?1",
10296 fparams![job_id, completed_docs],
10297 )?;
10298 Ok(())
10299 }
10300
10301 pub fn count_sessions_in_range(
10310 &self,
10311 start_ts_ms: Option<i64>,
10312 end_ts_ms: Option<i64>,
10313 agent_slug: Option<&str>,
10314 source_id: Option<&str>,
10315 ) -> Result<(i64, bool)> {
10316 let agent = agent_slug.unwrap_or("all");
10317 let source = source_id.unwrap_or("all");
10318
10319 let stats_count: i64 = self
10321 .conn
10322 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
10323 row.get_typed(0)
10324 })
10325 .unwrap_or(0);
10326
10327 if stats_count == 0 {
10328 return self.count_sessions_direct(start_ts_ms, end_ts_ms, agent_slug, source_id);
10329 }
10330
10331 let start_day = start_ts_ms.map(Self::day_id_from_millis);
10333 let end_day = end_ts_ms.map(Self::day_id_from_millis);
10334
10335 let count: i64 = match (start_day, end_day) {
10336 (Some(start), Some(end)) => self.conn.query_row_map(
10337 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10338 WHERE day_id BETWEEN ?1 AND ?2 AND agent_slug = ?3 AND source_id = ?4",
10339 fparams![start, end, agent, source],
10340 |row| row.get_typed(0),
10341 )?,
10342 (Some(start), None) => self.conn.query_row_map(
10343 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10344 WHERE day_id >= ?1 AND agent_slug = ?2 AND source_id = ?3",
10345 fparams![start, agent, source],
10346 |row| row.get_typed(0),
10347 )?,
10348 (None, Some(end)) => self.conn.query_row_map(
10349 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10350 WHERE day_id <= ?1 AND agent_slug = ?2 AND source_id = ?3",
10351 fparams![end, agent, source],
10352 |row| row.get_typed(0),
10353 )?,
10354 (None, None) => self.conn.query_row_map(
10355 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10356 WHERE agent_slug = ?1 AND source_id = ?2",
10357 fparams![agent, source],
10358 |row| row.get_typed(0),
10359 )?,
10360 };
10361
10362 Ok((count, true))
10363 }
10364
10365 fn count_sessions_direct(
10367 &self,
10368 start_ts_ms: Option<i64>,
10369 end_ts_ms: Option<i64>,
10370 agent_slug: Option<&str>,
10371 source_id: Option<&str>,
10372 ) -> Result<(i64, bool)> {
10373 let mut sql = "SELECT COUNT(*) FROM conversations c WHERE 1=1".to_string();
10380 let mut param_values: Vec<ParamValue> = Vec::new();
10381 let mut idx = 1;
10382
10383 if let Some(start) = start_ts_ms {
10384 sql.push_str(&format!(" AND c.started_at >= ?{idx}"));
10385 param_values.push(ParamValue::from(start));
10386 idx += 1;
10387 }
10388 if let Some(end) = end_ts_ms {
10389 sql.push_str(&format!(" AND c.started_at <= ?{idx}"));
10390 param_values.push(ParamValue::from(end));
10391 idx += 1;
10392 }
10393 if let Some(agent) = agent_slug
10394 && agent != "all"
10395 {
10396 sql.push_str(&format!(
10397 " AND EXISTS (SELECT 1 FROM agents a WHERE a.id = c.agent_id AND a.slug = ?{idx})"
10398 ));
10399 param_values.push(ParamValue::from(agent));
10400 idx += 1;
10401 }
10402 if let Some(source) = source_id
10403 && source != "all"
10404 {
10405 sql.push_str(&format!(" AND c.source_id = ?{idx}"));
10406 param_values.push(ParamValue::from(source));
10407 let _ = idx; }
10409
10410 let count: i64 = self
10411 .conn
10412 .query_row_map(&sql, ¶m_values, |row| row.get_typed(0))?;
10413 Ok((count, false))
10414 }
10415
10416 pub fn get_daily_histogram(
10418 &self,
10419 start_ts_ms: i64,
10420 end_ts_ms: i64,
10421 agent_slug: Option<&str>,
10422 source_id: Option<&str>,
10423 ) -> Result<Vec<DailyCount>> {
10424 let start_day = Self::day_id_from_millis(start_ts_ms);
10425 let end_day = Self::day_id_from_millis(end_ts_ms);
10426 let agent = agent_slug.unwrap_or("all");
10427 let source = source_id.unwrap_or("all");
10428
10429 let rows = self.conn.query_map_collect(
10430 "SELECT day_id, session_count, message_count, total_chars
10431 FROM daily_stats
10432 WHERE day_id BETWEEN ?1 AND ?2 AND agent_slug = ?3 AND source_id = ?4
10433 ORDER BY day_id",
10434 fparams![start_day, end_day, agent, source],
10435 |row| {
10436 Ok(DailyCount {
10437 day_id: row.get_typed(0)?,
10438 sessions: row.get_typed(1)?,
10439 messages: row.get_typed(2)?,
10440 chars: row.get_typed(3)?,
10441 })
10442 },
10443 )?;
10444
10445 Ok(rows)
10446 }
10447
10448 pub fn daily_stats_health(&self) -> Result<DailyStatsHealth> {
10450 let row_count: i64 =
10451 self.conn
10452 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
10453 row.get_typed(0)
10454 })?;
10455
10456 let oldest_update: Option<i64> = self.conn.query_row_map(
10457 "SELECT MIN(last_updated) FROM daily_stats",
10458 fparams![],
10459 |row| row.get_typed(0),
10460 )?;
10461
10462 let conversation_count: i64 =
10463 self.conn
10464 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
10465 row.get_typed(0)
10466 })?;
10467
10468 let materialized_total: i64 = self.conn.query_row_map(
10469 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10470 WHERE agent_slug = 'all' AND source_id = 'all'",
10471 fparams![],
10472 |row| row.get_typed(0),
10473 )?;
10474
10475 Ok(DailyStatsHealth {
10476 populated: row_count > 0,
10477 row_count,
10478 oldest_update_ms: oldest_update,
10479 conversation_count,
10480 materialized_total,
10481 drift: (conversation_count - materialized_total).abs(),
10482 })
10483 }
10484
10485 pub fn insert_conversations_batched(
10489 &self,
10490 conversations: &[(i64, Option<i64>, &Conversation)],
10491 ) -> Result<Vec<InsertOutcome>> {
10492 if conversations.is_empty() {
10493 return Ok(Vec::new());
10494 }
10495
10496 self.ensure_sources_for_batch(conversations)?;
10497
10498 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
10499 let defer_analytics_updates = defer_analytics_updates_enabled();
10500
10501 let pricing_table = PricingTable::franken_load(&self.conn).unwrap_or_else(|e| {
10502 tracing::warn!(target: "cass::analytics::pricing", error = %e, "failed to load pricing table");
10503 PricingTable { entries: Vec::new() }
10504 });
10505 let mut pricing_diag = PricingDiagnostics::default();
10506
10507 let mut tx = self.conn.transaction()?;
10508
10509 ensure_agents_in_tx(&tx, conversations)?;
10516 ensure_workspaces_in_tx(&tx, conversations)?;
10517 ensure_sources_in_tx(&tx, conversations)?;
10518
10519 let mut outcomes = Vec::with_capacity(conversations.len());
10520 let mut fts_entries = Vec::new();
10521 let mut fts_pending_chars = 0usize;
10522 let mut fts_inserted_total = 0usize;
10523 let mut fts_count_total = 0usize;
10524 let mut stats = StatsAggregator::new();
10525 let mut token_stats = TokenStatsAggregator::new();
10526 let mut token_entries: Vec<TokenUsageEntry> = Vec::new();
10527 let mut metrics_entries: Vec<MessageMetricsEntry> = Vec::new();
10528 let mut rollup_agg = AnalyticsRollupAggregator::new();
10529 let mut conv_ids_to_summarize: Vec<i64> = Vec::new();
10530 let mut pending_conversation_ids: HashMap<PendingConversationKey, i64> = HashMap::new();
10531 let mut pending_message_fingerprints: HashMap<i64, HashMap<i64, MessageMergeFingerprint>> =
10532 HashMap::new();
10533 let mut pending_message_replay_fingerprints: HashMap<
10534 i64,
10535 HashSet<MessageReplayFingerprint>,
10536 > = HashMap::new();
10537
10538 for &(agent_id, workspace_id, raw_conv) in conversations {
10539 let normalized_conv = normalized_conversation_for_storage(raw_conv);
10540 let conv = normalized_conv.as_ref();
10541 let mut total_chars: i64 = 0;
10542 let mut inserted_indices = Vec::with_capacity(conv.messages.len());
10543 let mut inserted_messages: Vec<(i64, &Message)> =
10544 Vec::with_capacity(conv.messages.len());
10545 let mut session_count_delta = 1_i64;
10546 let conversation_key = conversation_merge_key(agent_id, conv);
10547
10548 let existing_conv_id = if let Some(existing_id) =
10549 pending_conversation_ids.get(&conversation_key)
10550 {
10551 Some(*existing_id)
10552 } else {
10553 let existing_id =
10554 franken_find_existing_conversation_by_key(&tx, &conversation_key, Some(conv))?;
10555 if let Some(existing_id) = existing_id {
10556 pending_conversation_ids.insert(conversation_key.clone(), existing_id);
10557 }
10558 existing_id
10559 };
10560
10561 let conv_id = if let Some(existing_id) = existing_conv_id {
10562 session_count_delta = 0;
10563 let ExistingMessageLookup {
10564 by_idx: mut existing_messages,
10565 replay: mut existing_replay_fingerprints,
10566 } = franken_existing_message_lookup_with_pending(
10567 &tx,
10568 existing_id,
10569 &conv.messages,
10570 &mut pending_message_fingerprints,
10571 &mut pending_message_replay_fingerprints,
10572 )?;
10573 let ExistingConversationNewMessages {
10574 messages: new_messages,
10575 new_chars,
10576 idx_collision_count,
10577 first_collision_idx,
10578 } = collect_new_messages_for_existing_conversation(
10579 existing_id,
10580 conv,
10581 &mut existing_messages,
10582 &mut existing_replay_fingerprints,
10583 "skipping replay-equivalent recovered message with shifted idx during batched merge",
10584 );
10585 let (inserted_last_idx, inserted_last_created_at) =
10586 borrowed_messages_tail_state(&new_messages);
10587 let inserted_message_ids =
10588 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
10589 total_chars += new_chars;
10590 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10591 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10592 if !defer_lexical_updates {
10593 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10594 fts_count_total += 1;
10595 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
10596 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10597 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10598 {
10599 flush_pending_fts_entries(
10600 self,
10601 &tx,
10602 &mut fts_entries,
10603 &mut fts_pending_chars,
10604 &mut fts_inserted_total,
10605 )?;
10606 }
10607 }
10608 inserted_indices.push(msg.idx);
10609 inserted_messages.push((msg_id, msg));
10610 }
10611
10612 if idx_collision_count > 0 {
10613 tracing::warn!(
10614 conversation_id = existing_id,
10615 collision_count = idx_collision_count,
10616 first_idx = first_collision_idx,
10617 source_path = %conv.source_path.display(),
10618 "message idx collisions encountered during batched conversation merge; retaining canonical message variants"
10619 );
10620 }
10621
10622 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
10623 franken_update_conversation_tail_state(
10624 &tx,
10625 existing_id,
10626 conv_last_ts,
10627 inserted_last_idx,
10628 inserted_last_created_at,
10629 )?;
10630 if let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv)
10631 {
10632 franken_update_external_conversation_tail_lookup_key(
10633 &tx,
10634 &lookup_key,
10635 conv_last_ts,
10636 inserted_last_idx,
10637 inserted_last_created_at,
10638 )?;
10639 }
10640
10641 pending_message_fingerprints.insert(existing_id, existing_messages);
10642 pending_message_replay_fingerprints
10643 .insert(existing_id, existing_replay_fingerprints);
10644
10645 existing_id
10646 } else {
10647 match franken_insert_conversation_or_get_existing(
10648 &tx,
10649 agent_id,
10650 workspace_id,
10651 conv,
10652 )? {
10653 ConversationInsertStatus::Inserted(new_conv_id) => {
10654 pending_conversation_ids.insert(conversation_key.clone(), new_conv_id);
10655 let pending_messages =
10656 pending_message_fingerprints.entry(new_conv_id).or_default();
10657 let pending_replay_fingerprints = pending_message_replay_fingerprints
10658 .entry(new_conv_id)
10659 .or_default();
10660 let mut new_messages = Vec::new();
10661 for msg in &conv.messages {
10662 let incoming_replay = message_replay_fingerprint(msg);
10663 if pending_messages.contains_key(&msg.idx)
10664 || pending_replay_fingerprints.contains(&incoming_replay)
10665 {
10666 continue;
10667 }
10668 pending_messages.insert(msg.idx, message_merge_fingerprint(msg));
10669 pending_replay_fingerprints.insert(incoming_replay);
10670 new_messages.push(msg);
10671 }
10672 let inserted_message_ids =
10673 franken_batch_insert_new_messages(&tx, new_conv_id, &new_messages)?;
10674 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10675 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10676 if !defer_lexical_updates {
10677 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10678 fts_count_total += 1;
10679 fts_pending_chars =
10680 fts_pending_chars.saturating_add(msg.content.len());
10681 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10682 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10683 {
10684 flush_pending_fts_entries(
10685 self,
10686 &tx,
10687 &mut fts_entries,
10688 &mut fts_pending_chars,
10689 &mut fts_inserted_total,
10690 )?;
10691 }
10692 }
10693 total_chars += msg.content.len() as i64;
10694 inserted_indices.push(msg.idx);
10695 inserted_messages.push((msg_id, msg));
10696 }
10697 new_conv_id
10698 }
10699 ConversationInsertStatus::Existing(existing_id) => {
10700 session_count_delta = 0;
10701 pending_conversation_ids.insert(conversation_key.clone(), existing_id);
10702 let ExistingMessageLookup {
10703 by_idx: mut existing_messages,
10704 replay: mut existing_replay_fingerprints,
10705 } = franken_existing_message_lookup_with_pending(
10706 &tx,
10707 existing_id,
10708 &conv.messages,
10709 &mut pending_message_fingerprints,
10710 &mut pending_message_replay_fingerprints,
10711 )?;
10712 let ExistingConversationNewMessages {
10713 messages: new_messages,
10714 new_chars,
10715 idx_collision_count,
10716 first_collision_idx,
10717 } = collect_new_messages_for_existing_conversation(
10718 existing_id,
10719 conv,
10720 &mut existing_messages,
10721 &mut existing_replay_fingerprints,
10722 "skipping replay-equivalent recovered message with shifted idx after duplicate conversation recovery",
10723 );
10724 let (inserted_last_idx, inserted_last_created_at) =
10725 borrowed_messages_tail_state(&new_messages);
10726 let inserted_message_ids =
10727 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
10728 total_chars += new_chars;
10729 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10730 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10731 if !defer_lexical_updates {
10732 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10733 fts_count_total += 1;
10734 fts_pending_chars =
10735 fts_pending_chars.saturating_add(msg.content.len());
10736 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10737 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10738 {
10739 flush_pending_fts_entries(
10740 self,
10741 &tx,
10742 &mut fts_entries,
10743 &mut fts_pending_chars,
10744 &mut fts_inserted_total,
10745 )?;
10746 }
10747 }
10748 inserted_indices.push(msg.idx);
10749 inserted_messages.push((msg_id, msg));
10750 }
10751
10752 if idx_collision_count > 0 {
10753 tracing::warn!(
10754 conversation_id = existing_id,
10755 collision_count = idx_collision_count,
10756 first_idx = first_collision_idx,
10757 source_path = %conv.source_path.display(),
10758 "message idx collisions encountered after duplicate conversation recovery; retaining canonical message variants"
10759 );
10760 }
10761
10762 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
10763 franken_update_conversation_tail_state(
10764 &tx,
10765 existing_id,
10766 conv_last_ts,
10767 inserted_last_idx,
10768 inserted_last_created_at,
10769 )?;
10770 if let Some(lookup_key) =
10771 conversation_external_lookup_key_for_conv(agent_id, conv)
10772 {
10773 franken_update_external_conversation_tail_lookup_key(
10774 &tx,
10775 &lookup_key,
10776 conv_last_ts,
10777 inserted_last_idx,
10778 inserted_last_created_at,
10779 )?;
10780 }
10781
10782 pending_message_fingerprints.insert(existing_id, existing_messages);
10783 pending_message_replay_fingerprints
10784 .insert(existing_id, existing_replay_fingerprints);
10785
10786 existing_id
10787 }
10788 }
10789 };
10790
10791 if !defer_analytics_updates {
10792 let delta = StatsDelta {
10793 session_count_delta,
10794 message_count_delta: inserted_messages.len() as i64,
10795 total_chars_delta: total_chars,
10796 };
10797
10798 let effective_started_at = conversation_effective_started_at(conv);
10799 let day_id = effective_started_at
10800 .map(FrankenStorage::day_id_from_millis)
10801 .unwrap_or(0);
10802 stats.record_delta(
10803 &conv.agent_slug,
10804 &conv.source_id,
10805 day_id,
10806 delta.session_count_delta,
10807 delta.message_count_delta,
10808 delta.total_chars_delta,
10809 );
10810
10811 let conv_day_id = day_id;
10812 let mut session_model_family = String::from("unknown");
10813 let mut has_any_tokens = false;
10814
10815 for &(message_id, msg) in &inserted_messages {
10816 let role_s = role_str(&msg.role);
10817 let usage = if historical_raw_json(&msg.extra_json).is_some() {
10818 crate::connectors::extract_tokens_for_agent(
10819 &conv.agent_slug,
10820 &serde_json::Value::Null,
10821 &msg.content,
10822 &role_s,
10823 )
10824 } else {
10825 crate::connectors::extract_tokens_for_agent(
10826 &conv.agent_slug,
10827 &msg.extra_json,
10828 &msg.content,
10829 &role_s,
10830 )
10831 };
10832
10833 let msg_ts = msg
10834 .created_at
10835 .or(conversation_effective_started_at(conv))
10836 .unwrap_or(0);
10837 let msg_day_id = if msg_ts > 0 {
10838 FrankenStorage::day_id_from_millis(msg_ts)
10839 } else {
10840 conv_day_id
10841 };
10842
10843 let model_info = usage
10844 .model_name
10845 .as_deref()
10846 .map(crate::connectors::normalize_model);
10847
10848 let model_family = model_info
10849 .as_ref()
10850 .map(|i| i.family.clone())
10851 .unwrap_or_else(|| "unknown".into());
10852 let model_tier = model_info
10853 .as_ref()
10854 .map(|i| i.tier.clone())
10855 .unwrap_or_else(|| "unknown".into());
10856 let provider = usage
10857 .provider
10858 .clone()
10859 .or_else(|| model_info.as_ref().map(|i| i.provider.clone()))
10860 .unwrap_or_else(|| "unknown".into());
10861
10862 if model_family != "unknown" {
10863 session_model_family = model_family.clone();
10864 }
10865
10866 let estimated_cost = pricing_table.compute_cost(
10867 usage.model_name.as_deref(),
10868 msg_day_id,
10869 usage.input_tokens,
10870 usage.output_tokens,
10871 usage.cache_read_tokens,
10872 usage.cache_creation_tokens,
10873 );
10874 if estimated_cost.is_some() {
10875 pricing_diag.record_priced();
10876 } else if usage.has_token_data() {
10877 pricing_diag.record_unpriced(usage.model_name.as_deref());
10878 }
10879
10880 token_stats.record(
10881 &conv.agent_slug,
10882 &conv.source_id,
10883 msg_day_id,
10884 &model_family,
10885 &role_s,
10886 &usage,
10887 msg.content.len() as i64,
10888 estimated_cost.unwrap_or(0.0),
10889 );
10890
10891 if usage.has_token_data() {
10892 has_any_tokens = true;
10893 }
10894
10895 let content_chars = msg.content.len() as i64;
10896 let content_tokens_est = content_chars / 4;
10897 let msg_hour_id = FrankenStorage::hour_id_from_millis(msg_ts);
10898 let has_plan = has_plan_for_role(&role_s, &msg.content);
10899
10900 token_entries.push(TokenUsageEntry {
10901 message_id,
10902 conversation_id: conv_id,
10903 agent_id,
10904 workspace_id,
10905 source_id: conv.source_id.clone(),
10906 timestamp_ms: msg_ts,
10907 day_id: msg_day_id,
10908 model_name: usage.model_name.clone(),
10909 model_family: Some(model_family.clone()),
10910 model_tier: Some(model_tier.clone()),
10911 service_tier: usage.service_tier.clone(),
10912 provider: Some(provider.clone()),
10913 input_tokens: usage.input_tokens,
10914 output_tokens: usage.output_tokens,
10915 cache_read_tokens: usage.cache_read_tokens,
10916 cache_creation_tokens: usage.cache_creation_tokens,
10917 thinking_tokens: usage.thinking_tokens,
10918 total_tokens: usage.total_tokens(),
10919 estimated_cost_usd: estimated_cost,
10920 role: role_s.to_string(),
10921 content_chars,
10922 has_tool_calls: usage.has_tool_calls,
10923 tool_call_count: usage.tool_call_count,
10924 data_source: usage.data_source.as_str().to_string(),
10925 });
10926
10927 let mm = MessageMetricsEntry {
10928 message_id,
10929 created_at_ms: msg_ts,
10930 hour_id: msg_hour_id,
10931 day_id: msg_day_id,
10932 agent_slug: conv.agent_slug.clone(),
10933 workspace_id: workspace_id.unwrap_or(0),
10934 source_id: conv.source_id.clone(),
10935 role: role_s.to_string(),
10936 content_chars,
10937 content_tokens_est,
10938 model_name: usage.model_name.clone(),
10939 model_family: model_family.clone(),
10940 model_tier: model_tier.clone(),
10941 provider,
10942 api_input_tokens: usage.input_tokens,
10943 api_output_tokens: usage.output_tokens,
10944 api_cache_read_tokens: usage.cache_read_tokens,
10945 api_cache_creation_tokens: usage.cache_creation_tokens,
10946 api_thinking_tokens: usage.thinking_tokens,
10947 api_service_tier: usage.service_tier.clone(),
10948 api_data_source: usage.data_source.as_str().to_string(),
10949 tool_call_count: usage.tool_call_count as i64,
10950 has_tool_calls: usage.has_tool_calls,
10951 has_plan,
10952 };
10953 rollup_agg.record(&mm);
10954 metrics_entries.push(mm);
10955 }
10956
10957 if session_count_delta > 0 {
10958 token_stats.record_session(
10959 &conv.agent_slug,
10960 &conv.source_id,
10961 conv_day_id,
10962 &session_model_family,
10963 );
10964 }
10965
10966 if has_any_tokens {
10967 conv_ids_to_summarize.push(conv_id);
10968 }
10969 }
10970
10971 outcomes.push(InsertOutcome {
10972 conversation_id: conv_id,
10973 conversation_inserted: session_count_delta > 0,
10974 inserted_indices,
10975 });
10976 }
10977
10978 if !defer_lexical_updates {
10980 flush_pending_fts_entries(
10981 self,
10982 &tx,
10983 &mut fts_entries,
10984 &mut fts_pending_chars,
10985 &mut fts_inserted_total,
10986 )?;
10987 }
10988 if !defer_lexical_updates && fts_count_total > 0 {
10989 tracing::debug!(
10990 target: "cass::perf::fts5",
10991 total = fts_count_total,
10992 inserted = fts_inserted_total,
10993 conversations = conversations.len(),
10994 "franken_batch_fts_insert_complete"
10995 );
10996 }
10997
10998 if !defer_analytics_updates && !stats.is_empty() {
11000 let entries = stats.expand();
11001 let affected = franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
11002 tracing::debug!(
11003 target: "cass::perf::daily_stats",
11004 raw = stats.raw_entry_count(),
11005 expanded = entries.len(),
11006 affected = affected,
11007 "franken_batched_stats_update_complete"
11008 );
11009 }
11010
11011 if !defer_analytics_updates && !token_entries.is_empty() {
11013 let token_count = token_entries.len();
11014 let inserted = franken_insert_token_usage_batched_in_tx(&tx, &token_entries)?;
11015 tracing::debug!(
11016 target: "cass::perf::token_usage",
11017 total = token_count,
11018 inserted = inserted,
11019 "franken_batch_token_usage_insert_complete"
11020 );
11021 }
11022
11023 if !defer_analytics_updates && !token_stats.is_empty() {
11025 let entries = token_stats.expand();
11026 let affected = franken_update_token_daily_stats_batched_in_tx(&tx, &entries)?;
11027 tracing::debug!(
11028 target: "cass::perf::token_daily_stats",
11029 raw = token_stats.raw_entry_count(),
11030 expanded = entries.len(),
11031 affected = affected,
11032 "franken_batched_token_stats_update_complete"
11033 );
11034 }
11035
11036 if !defer_analytics_updates && !metrics_entries.is_empty() {
11038 let mm_count = metrics_entries.len();
11039 let inserted = franken_insert_message_metrics_batched_in_tx(&tx, &metrics_entries)?;
11040 tracing::debug!(
11041 target: "cass::perf::message_metrics",
11042 total = mm_count,
11043 inserted = inserted,
11044 "franken_batch_message_metrics_insert_complete"
11045 );
11046 }
11047
11048 if !defer_analytics_updates && !rollup_agg.is_empty() {
11050 let (hourly, daily, models_daily) =
11051 franken_flush_analytics_rollups_in_tx(&tx, &rollup_agg)?;
11052 tracing::debug!(
11053 target: "cass::perf::usage_rollups",
11054 hourly_buckets = rollup_agg.hourly_entry_count(),
11055 daily_buckets = rollup_agg.daily_entry_count(),
11056 models_daily_buckets = rollup_agg.models_daily_entry_count(),
11057 hourly_affected = hourly,
11058 daily_affected = daily,
11059 models_daily_affected = models_daily,
11060 "franken_batched_usage_rollups_complete"
11061 );
11062 }
11063
11064 if !defer_analytics_updates {
11066 for conv_id in &conv_ids_to_summarize {
11067 franken_update_conversation_token_summaries_in_tx(&tx, *conv_id)?;
11068 }
11069 }
11070
11071 tx.commit()?;
11072
11073 pricing_diag.log_summary();
11074
11075 Ok(outcomes)
11076 }
11077}
11078
11079fn normalized_storage_source_parts(
11080 source_id: Option<&str>,
11081 origin_kind: Option<&str>,
11082 origin_host: Option<&str>,
11083) -> (String, SourceKind, Option<String>) {
11084 let host_label = crate::search::tantivy::normalized_index_origin_host(origin_host);
11085 let source_id = crate::search::tantivy::normalized_index_source_id(
11086 source_id,
11087 origin_kind,
11088 host_label.as_deref(),
11089 );
11090
11091 if source_id == LOCAL_SOURCE_ID {
11092 (source_id, SourceKind::Local, None)
11093 } else {
11094 (source_id, SourceKind::Ssh, host_label)
11095 }
11096}
11097
11098fn normalized_source_for_conversation(conv: &Conversation) -> Source {
11099 let (id, kind, host_label) = normalized_storage_source_parts(
11100 Some(conv.source_id.as_str()),
11101 None,
11102 conv.origin_host.as_deref(),
11103 );
11104 Source {
11105 id,
11106 kind,
11107 host_label,
11108 machine_id: None,
11109 platform: None,
11110 config_json: None,
11111 created_at: None,
11112 updated_at: None,
11113 }
11114}
11115
11116fn is_bootstrap_local_source(source: &Source) -> bool {
11117 source.id == LOCAL_SOURCE_ID
11118 && matches!(source.kind, SourceKind::Local)
11119 && source.host_label.is_none()
11120 && source.machine_id.is_none()
11121 && source.platform.is_none()
11122 && source.config_json.is_none()
11123 && source.created_at.is_none()
11124 && source.updated_at.is_none()
11125}
11126
11127fn normalized_conversation_for_storage<'a>(conv: &'a Conversation) -> Cow<'a, Conversation> {
11128 let normalized_source = normalized_source_for_conversation(conv);
11129 if normalized_source.id == conv.source_id && normalized_source.host_label == conv.origin_host {
11130 Cow::Borrowed(conv)
11131 } else {
11132 let mut normalized = conv.clone();
11133 normalized.source_id = normalized_source.id;
11134 normalized.origin_host = normalized_source.host_label;
11135 Cow::Owned(normalized)
11136 }
11137}
11138
11139impl FrankenStorage {
11140 fn ensure_source_for_conversation(&self, conv: &Conversation) -> Result<()> {
11141 let source = normalized_source_for_conversation(conv);
11142 if is_bootstrap_local_source(&source) {
11143 return Ok(());
11146 }
11147 let cache_key = EnsuredConversationSourceKey::from_source(&source);
11148 if self.conversation_source_already_ensured(&cache_key) {
11149 return Ok(());
11150 }
11151 self.upsert_source(&source)?;
11152 self.mark_conversation_source_ensured(cache_key);
11153 Ok(())
11154 }
11155
11156 fn ensure_sources_for_batch(
11157 &self,
11158 conversations: &[(i64, Option<i64>, &Conversation)],
11159 ) -> Result<()> {
11160 let mut seen = HashSet::with_capacity(conversations.len());
11161 for &(_, _, conv) in conversations {
11162 let source = normalized_source_for_conversation(conv);
11163 if seen.insert(source.id.clone()) {
11164 if is_bootstrap_local_source(&source) {
11165 continue;
11166 }
11167 self.upsert_source(&source)?;
11168 self.mark_conversation_source_ensured(EnsuredConversationSourceKey::from_source(
11169 &source,
11170 ));
11171 }
11172 }
11173 Ok(())
11174 }
11175}
11176
11177fn franken_last_rowid(tx: &FrankenTransaction<'_>) -> Result<i64> {
11183 tx.last_insert_rowid()
11184 .ok()
11185 .filter(|&id| id > 0)
11186 .with_context(|| "last_insert_rowid() returned NULL or 0 after INSERT")
11187}
11188
11189fn ensure_agents_in_tx(
11195 tx: &FrankenTransaction<'_>,
11196 conversations: &[(i64, Option<i64>, &Conversation)],
11197) -> Result<()> {
11198 let mut seen = HashSet::new();
11199 let now = FrankenStorage::now_millis();
11200 for &(agent_id, _, conv) in conversations {
11201 if !seen.insert(agent_id) {
11202 continue;
11203 }
11204 let exists: i64 = tx.query_row_map(
11205 "SELECT COUNT(*) FROM agents WHERE id = ?1",
11206 fparams![agent_id],
11207 |row| row.get_typed(0),
11208 )?;
11209 if exists == 0 {
11210 tracing::debug!(
11211 target: "cass::fk_guard",
11212 agent_id,
11213 slug = %conv.agent_slug,
11214 "inserting agent row inside transaction to satisfy FK constraint"
11215 );
11216 tx.execute_compat(
11220 "INSERT OR IGNORE INTO agents(id, slug, name, kind, created_at, updated_at)
11221 VALUES(?1, ?2, ?3, 'cli', ?4, ?5)",
11222 fparams![
11223 agent_id,
11224 conv.agent_slug.as_str(),
11225 conv.agent_slug.as_str(),
11226 now,
11227 now
11228 ],
11229 )?;
11230 }
11231 }
11232 Ok(())
11233}
11234
11235fn ensure_workspaces_in_tx(
11238 tx: &FrankenTransaction<'_>,
11239 conversations: &[(i64, Option<i64>, &Conversation)],
11240) -> Result<()> {
11241 let mut seen = HashSet::new();
11242 for &(_, workspace_id, conv) in conversations {
11243 let ws_id = match workspace_id {
11244 Some(id) => id,
11245 None => continue,
11246 };
11247 if !seen.insert(ws_id) {
11248 continue;
11249 }
11250 let exists: i64 = tx.query_row_map(
11251 "SELECT COUNT(*) FROM workspaces WHERE id = ?1",
11252 fparams![ws_id],
11253 |row| row.get_typed(0),
11254 )?;
11255 if exists == 0 {
11256 let path_str = conv
11257 .workspace
11258 .as_ref()
11259 .map(|p| p.to_string_lossy().to_string())
11260 .unwrap_or_default();
11261 tracing::debug!(
11262 target: "cass::fk_guard",
11263 workspace_id = ws_id,
11264 path = %path_str,
11265 "inserting workspace row inside transaction to satisfy FK constraint"
11266 );
11267 tx.execute_compat(
11268 "INSERT OR IGNORE INTO workspaces(id, path) VALUES(?1, ?2)",
11269 fparams![ws_id, path_str.as_str()],
11270 )?;
11271 }
11272 }
11273 Ok(())
11274}
11275
11276fn ensure_sources_in_tx(
11280 tx: &FrankenTransaction<'_>,
11281 conversations: &[(i64, Option<i64>, &Conversation)],
11282) -> Result<()> {
11283 let mut seen = HashSet::new();
11284 for &(_, _, conv) in conversations {
11285 let (source_id, source_kind, host_label) = normalized_storage_source_parts(
11286 Some(conv.source_id.as_str()),
11287 None,
11288 conv.origin_host.as_deref(),
11289 );
11290 if !seen.insert(source_id.clone()) {
11291 continue;
11292 }
11293 let exists: i64 = tx.query_row_map(
11294 "SELECT COUNT(*) FROM sources WHERE id = ?1",
11295 fparams![source_id.as_str()],
11296 |row| row.get_typed(0),
11297 )?;
11298 if exists == 0 {
11299 let kind_str = source_kind.to_string();
11300 let now = FrankenStorage::now_millis();
11301 tracing::debug!(
11302 target: "cass::fk_guard",
11303 source_id = %source_id,
11304 kind = kind_str.as_str(),
11305 "inserting source row inside transaction to satisfy FK constraint"
11306 );
11307 tx.execute_compat(
11308 "INSERT OR IGNORE INTO sources(id, kind, host_label, created_at, updated_at)
11309 VALUES(?1, ?2, ?3, ?4, ?5)",
11310 fparams![
11311 source_id.as_str(),
11312 kind_str.as_str(),
11313 host_label.as_deref(),
11314 now,
11315 now
11316 ],
11317 )?;
11318 }
11319 }
11320 Ok(())
11321}
11322
11323fn env_flag_enabled(name: &str) -> bool {
11324 dotenvy::var(name)
11325 .map(|v| !(v == "0" || v.eq_ignore_ascii_case("false")))
11326 .unwrap_or(false)
11327}
11328
11329fn defer_storage_lexical_updates_enabled() -> bool {
11330 env_flag_enabled("CASS_DEFER_LEXICAL_UPDATES")
11331}
11332
11333fn defer_analytics_updates_enabled() -> bool {
11334 env_flag_enabled("CASS_DEFER_ANALYTICS_UPDATES")
11335}
11336
11337enum ConversationInsertStatus {
11338 Inserted(i64),
11339 Existing(i64),
11340}
11341
11342fn franken_find_external_conversation_tail_lookup(
11343 tx: &FrankenTransaction<'_>,
11344 lookup_key: &str,
11345) -> Result<Option<ExistingConversationWithTail>> {
11346 let params = [SqliteValue::from(lookup_key)];
11347 let row = tx
11348 .query_row_with_params(
11349 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
11350 FROM conversation_external_tail_lookup
11351 WHERE lookup_key = ?1",
11352 ¶ms,
11353 )
11354 .optional()?;
11355 let Some(row) = row else {
11356 return Ok(None);
11357 };
11358 let id = row.get_typed(0)?;
11359 let ended_at = row.get_typed(1)?;
11360 let last_message_idx = row.get_typed(2)?;
11361 let last_message_created_at = row.get_typed(3)?;
11362 Ok(Some(ExistingConversationWithTail {
11363 id,
11364 tail_state: existing_conversation_tail_state_from_cached(
11365 last_message_idx,
11366 last_message_created_at,
11367 ended_at,
11368 ),
11369 }))
11370}
11371
11372fn franken_find_external_conversation_lookup(
11373 tx: &FrankenTransaction<'_>,
11374 lookup_key: &str,
11375) -> Result<Option<i64>> {
11376 Ok(franken_find_external_conversation_tail_lookup(tx, lookup_key)?.map(|existing| existing.id))
11377}
11378
11379fn franken_insert_external_conversation_tail_lookup_key(
11380 tx: &FrankenTransaction<'_>,
11381 lookup_key: &str,
11382 conversation_id: i64,
11383 ended_at: Option<i64>,
11384 last_message_idx: Option<i64>,
11385 last_message_created_at: Option<i64>,
11386) -> Result<()> {
11387 let params = [
11388 SqliteValue::from(lookup_key),
11389 SqliteValue::from(conversation_id),
11390 SqliteValue::from(ended_at),
11391 SqliteValue::from(last_message_idx),
11392 SqliteValue::from(last_message_created_at),
11393 ];
11394 tx.execute_with_params(
11395 "INSERT OR REPLACE INTO conversation_external_tail_lookup(
11396 lookup_key, conversation_id, ended_at, last_message_idx, last_message_created_at
11397 ) VALUES(?1, ?2, ?3, ?4, ?5)",
11398 ¶ms,
11399 )?;
11400 Ok(())
11401}
11402
11403fn franken_insert_external_conversation_tail_lookup(
11404 tx: &FrankenTransaction<'_>,
11405 source_id: &str,
11406 agent_id: i64,
11407 external_id: &str,
11408 existing: ExistingConversationWithTail,
11409) -> Result<()> {
11410 let lookup_key = conversation_external_lookup_key(source_id, agent_id, external_id);
11411 let ended_at = existing.tail_state.and_then(|state| state.ended_at);
11412 let last_message_idx = existing.tail_state.map(|state| state.last_message_idx);
11413 let last_message_created_at = existing
11414 .tail_state
11415 .map(|state| state.last_message_created_at);
11416 franken_insert_external_conversation_tail_lookup_key(
11417 tx,
11418 &lookup_key,
11419 existing.id,
11420 ended_at,
11421 last_message_idx,
11422 last_message_created_at,
11423 )
11424}
11425
11426fn franken_update_external_conversation_tail_lookup_key(
11427 tx: &FrankenTransaction<'_>,
11428 lookup_key: &str,
11429 ended_at_candidate: Option<i64>,
11430 last_message_idx_candidate: Option<i64>,
11431 last_message_created_at_candidate: Option<i64>,
11432) -> Result<()> {
11433 if ended_at_candidate.is_none()
11434 && last_message_idx_candidate.is_none()
11435 && last_message_created_at_candidate.is_none()
11436 {
11437 return Ok(());
11438 }
11439 tx.execute_compat(
11440 "UPDATE conversation_external_tail_lookup
11441 SET ended_at = CASE
11442 WHEN ?1 IS NULL THEN ended_at
11443 ELSE MAX(IFNULL(ended_at, 0), ?1)
11444 END,
11445 last_message_idx = CASE
11446 WHEN ?2 IS NULL THEN last_message_idx
11447 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
11448 ELSE last_message_idx
11449 END,
11450 last_message_created_at = CASE
11451 WHEN ?3 IS NULL THEN last_message_created_at
11452 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
11453 ELSE last_message_created_at
11454 END
11455 WHERE lookup_key = ?4",
11456 fparams![
11457 ended_at_candidate,
11458 last_message_idx_candidate,
11459 last_message_created_at_candidate,
11460 lookup_key
11461 ],
11462 )?;
11463 Ok(())
11464}
11465
11466fn franken_set_external_conversation_tail_lookup_after_append(
11467 tx: &FrankenTransaction<'_>,
11468 lookup_key: &str,
11469 ended_at: i64,
11470 last_message_idx: i64,
11471 last_message_created_at: i64,
11472) -> Result<()> {
11473 tx.execute_compat(
11474 "UPDATE conversation_external_tail_lookup
11475 SET ended_at = ?1,
11476 last_message_idx = ?2,
11477 last_message_created_at = ?3
11478 WHERE lookup_key = ?4",
11479 fparams![
11480 ended_at,
11481 last_message_idx,
11482 last_message_created_at,
11483 lookup_key
11484 ],
11485 )?;
11486 Ok(())
11487}
11488
11489fn franken_update_external_conversation_tail_after_append(
11490 tx: &FrankenTransaction<'_>,
11491 agent_id: i64,
11492 conv: &Conversation,
11493 used_append_tail_plan: bool,
11494 exact_append_set: bool,
11495 inserted_last_idx: Option<i64>,
11496 inserted_last_created_at: Option<i64>,
11497) -> Result<()> {
11498 let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv) else {
11499 return Ok(());
11500 };
11501
11502 if exact_append_set
11503 && let (Some(last_message_idx), Some(last_message_created_at)) =
11504 (inserted_last_idx, inserted_last_created_at)
11505 {
11506 return franken_set_external_conversation_tail_lookup_after_append(
11507 tx,
11508 &lookup_key,
11509 last_message_created_at,
11510 last_message_idx,
11511 last_message_created_at,
11512 );
11513 }
11514
11515 let ended_at_candidate = if used_append_tail_plan {
11516 inserted_last_created_at
11517 } else {
11518 conv.messages.iter().filter_map(|m| m.created_at).max()
11519 };
11520 franken_update_external_conversation_tail_lookup_key(
11521 tx,
11522 &lookup_key,
11523 ended_at_candidate,
11524 inserted_last_idx,
11525 inserted_last_created_at,
11526 )
11527}
11528
11529fn franken_find_existing_conversation_by_key(
11530 tx: &FrankenTransaction<'_>,
11531 key: &PendingConversationKey,
11532 conv: Option<&Conversation>,
11533) -> Result<Option<i64>> {
11534 franken_find_existing_conversation_by_key_impl(tx, key, conv, false)
11535}
11536
11537fn franken_find_existing_conversation_by_key_after_conflict(
11538 tx: &FrankenTransaction<'_>,
11539 key: &PendingConversationKey,
11540 conv: Option<&Conversation>,
11541) -> Result<Option<i64>> {
11542 franken_find_existing_conversation_by_key_impl(tx, key, conv, true)
11543}
11544
11545fn franken_find_existing_conversation_by_key_impl(
11546 tx: &FrankenTransaction<'_>,
11547 key: &PendingConversationKey,
11548 conv: Option<&Conversation>,
11549 allow_legacy_external_scan: bool,
11550) -> Result<Option<i64>> {
11551 match key {
11552 PendingConversationKey::External {
11553 source_id,
11554 agent_id,
11555 external_id,
11556 } => {
11557 let lookup_key = conversation_external_lookup_key(source_id, *agent_id, external_id);
11558 if let Some(existing_id) = franken_find_external_conversation_lookup(tx, &lookup_key)? {
11559 return Ok(Some(existing_id));
11560 }
11561 if !allow_legacy_external_scan {
11562 return Ok(None);
11563 }
11564
11565 let existing_id = tx
11566 .query_row_map(
11567 "SELECT id
11568 FROM conversations
11569 WHERE source_id = ?1 AND agent_id = ?2 AND external_id = ?3",
11570 fparams![source_id.as_str(), *agent_id, external_id.as_str()],
11571 |row| row.get_typed(0),
11572 )
11573 .optional()?;
11574 if let Some(existing_id) = existing_id {
11575 let tail_state = franken_existing_conversation_append_tail_state(tx, existing_id)?;
11576 franken_insert_external_conversation_tail_lookup_key(
11577 tx,
11578 &lookup_key,
11579 existing_id,
11580 tail_state.and_then(|state| state.ended_at),
11581 tail_state.map(|state| state.last_message_idx),
11582 tail_state.map(|state| state.last_message_created_at),
11583 )?;
11584 Ok(Some(existing_id))
11585 } else {
11586 Ok(None)
11587 }
11588 }
11589 PendingConversationKey::SourcePath {
11590 source_id,
11591 agent_id,
11592 source_path,
11593 started_at,
11594 } => {
11595 let exact_match = tx
11596 .query_row_map(
11597 "SELECT c.id
11598 FROM conversations c
11599 WHERE c.source_id = ?1
11600 AND c.agent_id = ?2
11601 AND c.source_path = ?3
11602 AND ((
11603 COALESCE(
11604 c.started_at,
11605 (SELECT MIN(created_at)
11606 FROM messages
11607 WHERE conversation_id = c.id
11608 AND created_at IS NOT NULL)
11609 ) IS NULL
11610 AND ?4 IS NULL
11611 ) OR COALESCE(
11612 c.started_at,
11613 (SELECT MIN(created_at)
11614 FROM messages
11615 WHERE conversation_id = c.id
11616 AND created_at IS NOT NULL)
11617 ) = ?4)
11618 ORDER BY c.id
11619 LIMIT 1",
11620 fparams![
11621 source_id.as_str(),
11622 *agent_id,
11623 source_path.as_str(),
11624 *started_at
11625 ],
11626 |row| row.get_typed(0),
11627 )
11628 .optional()?;
11629 if exact_match.is_some() {
11630 return Ok(exact_match);
11631 }
11632
11633 let Some(conv) = conv else {
11634 return Ok(None);
11635 };
11636 let incoming_fingerprints = conversation_message_fingerprints(conv);
11637 if incoming_fingerprints.is_empty() {
11638 return Ok(None);
11639 }
11640 let incoming_replay_fingerprints = conversation_message_replay_fingerprints(conv);
11641
11642 let candidates: Vec<(i64, Option<i64>)> = tx.query_map_collect(
11643 "SELECT
11644 c.id,
11645 COALESCE(
11646 c.started_at,
11647 (SELECT MIN(created_at)
11648 FROM messages
11649 WHERE conversation_id = c.id
11650 AND created_at IS NOT NULL)
11651 ) AS effective_started_at
11652 FROM conversations c
11653 WHERE c.source_id = ?1
11654 AND c.agent_id = ?2
11655 AND c.source_path = ?3
11656 ORDER BY c.id",
11657 fparams![source_id.as_str(), *agent_id, source_path.as_str()],
11658 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
11659 )?;
11660
11661 let mut best_candidate: Option<(i64, ConversationMergeEvidence)> = None;
11662 for (candidate_id, candidate_started_at) in candidates {
11663 let existing_fingerprints =
11664 franken_existing_message_fingerprints(tx, candidate_id)?;
11665 let existing_replay_fingerprints =
11666 replay_fingerprints_from_merge_set(&existing_fingerprints);
11667 let Some(evidence) = conversation_merge_evidence(
11668 &incoming_fingerprints,
11669 &incoming_replay_fingerprints,
11670 &existing_fingerprints,
11671 &existing_replay_fingerprints,
11672 *started_at,
11673 candidate_started_at,
11674 ) else {
11675 continue;
11676 };
11677
11678 let candidate_key = (
11679 evidence.exact_overlap,
11680 evidence.replay_overlap,
11681 evidence.started_close,
11682 evidence.smaller_replay_set,
11683 std::cmp::Reverse(evidence.start_distance_ms),
11684 );
11685 let should_replace = best_candidate
11686 .as_ref()
11687 .map(|(_, best_evidence)| {
11688 candidate_key
11689 > (
11690 best_evidence.exact_overlap,
11691 best_evidence.replay_overlap,
11692 best_evidence.started_close,
11693 best_evidence.smaller_replay_set,
11694 std::cmp::Reverse(best_evidence.start_distance_ms),
11695 )
11696 })
11697 .unwrap_or(true);
11698
11699 if should_replace {
11700 best_candidate = Some((candidate_id, evidence));
11701 }
11702 }
11703
11704 Ok(best_candidate.map(|(candidate_id, _)| candidate_id))
11705 }
11706 }
11707}
11708
11709fn franken_insert_conversation_or_get_existing(
11710 tx: &FrankenTransaction<'_>,
11711 agent_id: i64,
11712 workspace_id: Option<i64>,
11713 conv: &Conversation,
11714) -> Result<ConversationInsertStatus> {
11715 let conversation_key = conversation_merge_key(agent_id, conv);
11716 if let Some(existing_id) =
11717 franken_find_existing_conversation_by_key(tx, &conversation_key, Some(conv))?
11718 {
11719 return Ok(ConversationInsertStatus::Existing(existing_id));
11720 }
11721
11722 franken_insert_conversation_or_get_existing_after_miss(
11723 tx,
11724 agent_id,
11725 workspace_id,
11726 conv,
11727 &conversation_key,
11728 )
11729}
11730
11731fn franken_insert_conversation_or_get_existing_after_miss(
11732 tx: &FrankenTransaction<'_>,
11733 agent_id: i64,
11734 workspace_id: Option<i64>,
11735 conv: &Conversation,
11736 conversation_key: &PendingConversationKey,
11737) -> Result<ConversationInsertStatus> {
11738 match franken_insert_conversation(tx, agent_id, workspace_id, conv) {
11739 Ok(Some(conv_id)) => Ok(ConversationInsertStatus::Inserted(conv_id)),
11740 Ok(None) => {
11741 let existing_id =
11744 franken_find_existing_conversation_by_key_after_conflict(
11745 tx,
11746 conversation_key,
11747 Some(conv),
11748 )?
11749 .with_context(|| {
11750 format!(
11751 "conversation INSERT produced a duplicate conflict but existing row was not found for source_id={} agent_id={} external_id={:?} source_path={}",
11752 conv.source_id,
11753 agent_id,
11754 conv.external_id,
11755 conv.source_path.display()
11756 )
11757 })?;
11758 tracing::warn!(
11759 source_id = %conv.source_id,
11760 agent_id,
11761 external_id = ?conv.external_id,
11762 existing_id,
11763 source_path = %conv.source_path.display(),
11764 "conversation INSERT: duplicate gracefully recovered, reusing existing row"
11765 );
11766 Ok(ConversationInsertStatus::Existing(existing_id))
11767 }
11768 Err(error) => {
11769 tracing::error!(
11770 source_id = %conv.source_id,
11771 agent_id,
11772 external_id = ?conv.external_id,
11773 error = %error,
11774 source_path = %conv.source_path.display(),
11775 "franken_insert_conversation failed"
11776 );
11777 Err(error)
11778 }
11779 }
11780}
11781
11782fn franken_insert_conversation(
11788 tx: &FrankenTransaction<'_>,
11789 agent_id: i64,
11790 workspace_id: Option<i64>,
11791 conv: &Conversation,
11792) -> Result<Option<i64>> {
11793 let (metadata_json_str, metadata_bin) = franken_metadata_insert_payload(&conv.metadata_json)?;
11794 let (last_message_idx, last_message_created_at) = conversation_tail_state(conv);
11795 let metadata_bin_bytes = metadata_bin.as_deref();
11796
11797 match tx.execute_compat(
11798 "INSERT INTO conversations(
11799 agent_id, workspace_id, source_id, external_id, title, source_path,
11800 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin,
11801 last_message_idx, last_message_created_at
11802 ) VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14)",
11803 fparams![
11804 agent_id,
11805 workspace_id,
11806 conv.source_id.as_str(),
11807 conv.external_id.as_deref(),
11808 conv.title.as_deref(),
11809 path_to_string(&conv.source_path),
11810 conv.started_at,
11811 conv.ended_at,
11812 conv.approx_tokens,
11813 metadata_json_str.as_deref(),
11814 conv.origin_host.as_deref(),
11815 metadata_bin_bytes,
11816 last_message_idx,
11817 last_message_created_at
11818 ],
11819 ) {
11820 Ok(_) => {
11821 let conv_id = franken_last_rowid(tx)?;
11822 franken_insert_conversation_tail_state(
11823 tx,
11824 conv_id,
11825 conv.ended_at,
11826 last_message_idx,
11827 last_message_created_at,
11828 )?;
11829 if let Some(external_id) = conv.external_id.as_deref() {
11830 franken_insert_external_conversation_tail_lookup(
11831 tx,
11832 conv.source_id.as_str(),
11833 agent_id,
11834 external_id,
11835 ExistingConversationWithTail {
11836 id: conv_id,
11837 tail_state: existing_conversation_tail_state_from_cached(
11838 last_message_idx,
11839 last_message_created_at,
11840 conv.ended_at,
11841 ),
11842 },
11843 )?;
11844 }
11845 Ok(Some(conv_id))
11846 }
11847 Err(frankensqlite::FrankenError::UniqueViolation { .. }) => {
11848 tracing::debug!(
11849 source_id = %conv.source_id,
11850 agent_id,
11851 external_id = ?conv.external_id,
11852 source_path = %conv.source_path.display(),
11853 "conversation INSERT: duplicate provenance conflict"
11854 );
11855 Ok(None)
11856 }
11857 Err(error) => Err(error.into()),
11858 }
11859}
11860
11861type MetadataInsertPayload<'a> = (Option<Cow<'a, str>>, Option<Vec<u8>>);
11862
11863fn franken_metadata_insert_payload(value: &serde_json::Value) -> Result<MetadataInsertPayload<'_>> {
11864 if let Some(raw) = historical_raw_json(value) {
11865 Ok((Some(Cow::Borrowed(raw)), None))
11866 } else if value.is_null() {
11867 Ok((Some(Cow::Borrowed("null")), None))
11868 } else if value.as_object().is_some_and(|object| object.is_empty()) {
11869 Ok((None, None))
11870 } else if let Some(metadata_bin) = serialize_json_to_msgpack(value) {
11871 Ok((None, Some(metadata_bin)))
11872 } else {
11873 Ok((Some(Cow::Owned(serde_json::to_string(value)?)), None))
11874 }
11875}
11876
11877fn franken_insert_new_message(
11878 tx: &FrankenTransaction<'_>,
11879 conversation_id: i64,
11880 msg: &Message,
11881) -> Result<i64> {
11882 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
11883 let extra_bin_bytes = extra_bin.as_deref();
11884
11885 tx.execute_compat(
11886 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
11887 VALUES(?1,?2,?3,?4,?5,?6,?7,?8)",
11888 fparams![
11889 conversation_id,
11890 msg.idx,
11891 role_as_str(&msg.role),
11892 msg.author.as_deref(),
11893 msg.created_at,
11894 msg.content.as_str(),
11895 extra_json_str.as_deref(),
11896 extra_bin_bytes
11897 ],
11898 )?;
11899 franken_last_rowid(tx)
11900}
11901
11902type MessageInsertPayload<'a> = (Option<Cow<'a, str>>, Option<Vec<u8>>);
11903
11904fn franken_message_insert_payload(msg: &Message) -> Result<MessageInsertPayload<'_>> {
11905 if let Some(raw) = historical_raw_json(&msg.extra_json) {
11906 Ok((Some(Cow::Borrowed(raw)), None))
11907 } else if msg.extra_json.is_null() {
11908 Ok((None, None))
11909 } else {
11910 let extra_bin = serialize_json_to_msgpack(&msg.extra_json);
11911 if extra_bin.is_some() {
11912 Ok((None, extra_bin))
11913 } else {
11914 Ok((
11915 Some(Cow::Owned(serde_json::to_string(&msg.extra_json)?)),
11916 None,
11917 ))
11918 }
11919 }
11920}
11921
11922const MESSAGE_INSERT_BATCH_SIZE: usize = 100;
11927
11928const APPEND_MESSAGE_INSERT_BATCH_SIZE: usize = 50;
11934
11935fn message_insert_batch_sql(row_count: usize) -> &'static str {
11936 static MESSAGE_INSERT_BATCH_SQL: std::sync::OnceLock<Vec<String>> = std::sync::OnceLock::new();
11937
11938 let max_batch_size = MESSAGE_INSERT_BATCH_SIZE.max(APPEND_MESSAGE_INSERT_BATCH_SIZE);
11939 let cached_sql = MESSAGE_INSERT_BATCH_SQL.get_or_init(|| {
11940 let mut sql_by_row_count = Vec::with_capacity(max_batch_size + 1);
11941 sql_by_row_count.push(String::new());
11942 for row_count in 1..=max_batch_size {
11943 let placeholders = (0..row_count)
11944 .map(|idx| {
11945 let base = idx * 8;
11946 format!(
11947 "(?{},?{},?{},?{},?{},?{},?{},?{})",
11948 base + 1,
11949 base + 2,
11950 base + 3,
11951 base + 4,
11952 base + 5,
11953 base + 6,
11954 base + 7,
11955 base + 8
11956 )
11957 })
11958 .collect::<Vec<_>>()
11959 .join(",");
11960 sql_by_row_count.push(format!(
11961 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin) VALUES {placeholders}"
11962 ));
11963 }
11964 sql_by_row_count
11965 });
11966
11967 cached_sql
11968 .get(row_count)
11969 .map(String::as_str)
11970 .expect("message insert batch size must be covered by the cached SQL table")
11971}
11972
11973fn franken_batch_insert_new_messages(
11974 tx: &FrankenTransaction<'_>,
11975 conversation_id: i64,
11976 messages: &[&Message],
11977) -> Result<Vec<i64>> {
11978 franken_batch_insert_new_messages_with_batch_size(
11979 tx,
11980 conversation_id,
11981 messages,
11982 MESSAGE_INSERT_BATCH_SIZE,
11983 )
11984}
11985
11986fn franken_append_insert_new_messages(
11987 tx: &FrankenTransaction<'_>,
11988 conversation_id: i64,
11989 messages: &[&Message],
11990) -> Result<Vec<i64>> {
11991 franken_batch_insert_new_messages_with_batch_size(
11992 tx,
11993 conversation_id,
11994 messages,
11995 APPEND_MESSAGE_INSERT_BATCH_SIZE,
11996 )
11997}
11998
11999fn franken_batch_insert_new_messages_with_batch_size(
12000 tx: &FrankenTransaction<'_>,
12001 conversation_id: i64,
12002 messages: &[&Message],
12003 batch_size: usize,
12004) -> Result<Vec<i64>> {
12005 let batch_size = batch_size.max(1);
12006 let mut inserted_ids = Vec::with_capacity(messages.len());
12007 for chunk in messages.chunks(batch_size) {
12008 if chunk.len() == 1 {
12009 inserted_ids.push(franken_insert_new_message(tx, conversation_id, chunk[0])?);
12010 continue;
12011 }
12012 let sql = message_insert_batch_sql(chunk.len());
12013
12014 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 8);
12015 for msg in chunk {
12016 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
12017 param_values.push(SqliteValue::from(conversation_id));
12018 param_values.push(SqliteValue::from(msg.idx));
12019 param_values.push(SqliteValue::from(role_as_str(&msg.role)));
12020 param_values.push(SqliteValue::from(msg.author.as_deref()));
12021 param_values.push(SqliteValue::from(msg.created_at));
12022 param_values.push(SqliteValue::from(msg.content.as_str()));
12023 param_values.push(SqliteValue::from(extra_json_str.as_deref()));
12024 param_values.push(SqliteValue::from(extra_bin.as_deref()));
12025 }
12026
12027 tx.execute_with_params(sql, ¶m_values)?;
12028
12029 let last_id = franken_last_rowid(tx)?;
12030 let first_id = last_id
12031 .checked_sub((chunk.len() - 1) as i64)
12032 .with_context(|| {
12033 format!(
12034 "inferring rowid range for {}-row message batch ending at {last_id}",
12035 chunk.len()
12036 )
12037 })?;
12038 inserted_ids.extend((0..chunk.len()).map(|offset| first_id + offset as i64));
12039 }
12040
12041 Ok(inserted_ids)
12042}
12043
12044#[cfg(test)]
12045fn franken_insert_new_message_with_profile(
12046 tx: &FrankenTransaction<'_>,
12047 conversation_id: i64,
12048 msg: &Message,
12049 profile: &mut MessageInsertSubstageProfile,
12050) -> Result<i64> {
12051 profile.single_row_calls += 1;
12052 profile.batch_rows += 1;
12053
12054 let payload_start = Instant::now();
12055 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
12056 profile.payload_duration += payload_start.elapsed();
12057 let extra_bin_bytes = extra_bin.as_deref();
12058
12059 let execute_start = Instant::now();
12060 tx.execute_compat(
12061 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
12062 VALUES(?1,?2,?3,?4,?5,?6,?7,?8)",
12063 fparams![
12064 conversation_id,
12065 msg.idx,
12066 role_as_str(&msg.role),
12067 msg.author.as_deref(),
12068 msg.created_at,
12069 msg.content.as_str(),
12070 extra_json_str.as_deref(),
12071 extra_bin_bytes
12072 ],
12073 )?;
12074 profile.execute_duration += execute_start.elapsed();
12075
12076 let rowid_start = Instant::now();
12077 let rowid = franken_last_rowid(tx)?;
12078 profile.rowid_duration += rowid_start.elapsed();
12079 Ok(rowid)
12080}
12081
12082#[cfg(test)]
12083fn franken_batch_insert_new_messages_with_profile(
12084 tx: &FrankenTransaction<'_>,
12085 conversation_id: i64,
12086 messages: &[&Message],
12087 profile: &mut MessageInsertSubstageProfile,
12088) -> Result<Vec<i64>> {
12089 franken_batch_insert_new_messages_with_profile_batch_size(
12090 tx,
12091 conversation_id,
12092 messages,
12093 profile,
12094 MESSAGE_INSERT_BATCH_SIZE,
12095 )
12096}
12097
12098#[cfg(test)]
12099fn franken_append_insert_new_messages_with_profile(
12100 tx: &FrankenTransaction<'_>,
12101 conversation_id: i64,
12102 messages: &[&Message],
12103 profile: &mut MessageInsertSubstageProfile,
12104) -> Result<Vec<i64>> {
12105 franken_batch_insert_new_messages_with_profile_batch_size(
12106 tx,
12107 conversation_id,
12108 messages,
12109 profile,
12110 APPEND_MESSAGE_INSERT_BATCH_SIZE,
12111 )
12112}
12113
12114#[cfg(test)]
12115fn franken_batch_insert_new_messages_with_profile_batch_size(
12116 tx: &FrankenTransaction<'_>,
12117 conversation_id: i64,
12118 messages: &[&Message],
12119 profile: &mut MessageInsertSubstageProfile,
12120 batch_size: usize,
12121) -> Result<Vec<i64>> {
12122 let batch_size = batch_size.max(1);
12123 let mut inserted_ids = Vec::with_capacity(messages.len());
12124 for chunk in messages.chunks(batch_size) {
12125 if chunk.len() == 1 {
12126 inserted_ids.push(franken_insert_new_message_with_profile(
12127 tx,
12128 conversation_id,
12129 chunk[0],
12130 profile,
12131 )?);
12132 continue;
12133 }
12134
12135 profile.batch_calls += 1;
12136 profile.batch_rows += chunk.len();
12137
12138 let sql_build_start = Instant::now();
12139 let sql = message_insert_batch_sql(chunk.len());
12140 profile.sql_build_duration += sql_build_start.elapsed();
12141
12142 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 8);
12143 for msg in chunk {
12144 let payload_start = Instant::now();
12145 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
12146 profile.payload_duration += payload_start.elapsed();
12147
12148 let param_build_start = Instant::now();
12149 param_values.push(SqliteValue::from(conversation_id));
12150 param_values.push(SqliteValue::from(msg.idx));
12151 param_values.push(SqliteValue::from(role_as_str(&msg.role)));
12152 param_values.push(SqliteValue::from(msg.author.as_deref()));
12153 param_values.push(SqliteValue::from(msg.created_at));
12154 param_values.push(SqliteValue::from(msg.content.as_str()));
12155 param_values.push(SqliteValue::from(extra_json_str.as_deref()));
12156 param_values.push(SqliteValue::from(extra_bin.as_deref()));
12157 profile.param_build_duration += param_build_start.elapsed();
12158 }
12159
12160 let execute_start = Instant::now();
12161 tx.execute_with_params(sql, ¶m_values)?;
12162 profile.execute_duration += execute_start.elapsed();
12163
12164 let rowid_start = Instant::now();
12165 let last_id = franken_last_rowid(tx)?;
12166 let first_id = last_id
12167 .checked_sub((chunk.len() - 1) as i64)
12168 .with_context(|| {
12169 format!(
12170 "inferring rowid range for {}-row message batch ending at {last_id}",
12171 chunk.len()
12172 )
12173 })?;
12174 inserted_ids.extend((0..chunk.len()).map(|offset| first_id + offset as i64));
12175 profile.rowid_duration += rowid_start.elapsed();
12176 }
12177
12178 Ok(inserted_ids)
12179}
12180
12181fn franken_insert_snippets(
12183 tx: &FrankenTransaction<'_>,
12184 message_id: i64,
12185 snippets: &[Snippet],
12186) -> Result<()> {
12187 for snip in snippets {
12188 let file_path_str = snip.file_path.as_ref().map(path_to_string);
12189 tx.execute_compat(
12190 "INSERT INTO snippets(message_id, file_path, start_line, end_line, language, snippet_text)
12191 VALUES(?1,?2,?3,?4,?5,?6)",
12192 fparams![
12193 message_id,
12194 file_path_str.as_deref(),
12195 snip.start_line,
12196 snip.end_line,
12197 snip.language.as_deref(),
12198 snip.snippet_text.as_deref()
12199 ],
12200 )?;
12201 }
12202 Ok(())
12203}
12204
12205fn franken_existing_message_fingerprints(
12206 tx: &FrankenTransaction<'_>,
12207 conversation_id: i64,
12208) -> Result<HashSet<MessageMergeFingerprint>> {
12209 let rows = tx.query_params(
12210 "SELECT idx, role, author, created_at, content
12211 FROM messages
12212 WHERE conversation_id = ?1",
12213 fparams![conversation_id],
12214 )?;
12215 let mut fingerprints = HashSet::with_capacity(rows.len());
12216 for row in rows {
12217 let role: String = row.get_typed(1)?;
12218 let content: String = row.get_typed(4)?;
12219 fingerprints.insert(MessageMergeFingerprint {
12220 idx: row.get_typed(0)?,
12221 created_at: row.get_typed(3)?,
12222 role: role_from_str(&role),
12223 author: row.get_typed(2)?,
12224 content_hash: *blake3::hash(content.as_bytes()).as_bytes(),
12225 });
12226 }
12227 Ok(fingerprints)
12228}
12229
12230struct ExistingMessageLookup {
12231 by_idx: HashMap<i64, MessageMergeFingerprint>,
12232 replay: HashSet<MessageReplayFingerprint>,
12233}
12234
12235fn franken_existing_message_lookup(
12236 tx: &FrankenTransaction<'_>,
12237 conversation_id: i64,
12238 incoming_messages: &[Message],
12239) -> Result<ExistingMessageLookup> {
12240 if incoming_messages.is_empty() {
12241 return Ok(ExistingMessageLookup {
12242 by_idx: HashMap::new(),
12243 replay: HashSet::new(),
12244 });
12245 }
12246
12247 let min_idx = incoming_messages
12248 .iter()
12249 .map(|msg| msg.idx)
12250 .min()
12251 .unwrap_or(0);
12252 let max_idx = incoming_messages
12253 .iter()
12254 .map(|msg| msg.idx)
12255 .max()
12256 .unwrap_or(min_idx);
12257 let requires_full_scan = incoming_messages.iter().any(|msg| msg.created_at.is_none());
12258 let created_bounds = incoming_messages
12259 .iter()
12260 .filter_map(|msg| msg.created_at)
12261 .fold(None, |bounds: Option<(i64, i64)>, created_at| {
12262 Some(match bounds {
12263 Some((min_created_at, max_created_at)) => (
12264 min_created_at.min(created_at),
12265 max_created_at.max(created_at),
12266 ),
12267 None => (created_at, created_at),
12268 })
12269 });
12270
12271 let mut indexed_by_idx = HashMap::with_capacity(incoming_messages.len());
12272 let mut indexed_replay = HashSet::with_capacity(incoming_messages.len());
12273 let mut exact_idx_match = true;
12274 for msg in incoming_messages {
12275 record_message_lookup_exact_idx_probe();
12276 let Some((role, author, created_at, content)) = tx
12277 .query_row_map(
12278 "SELECT role, author, created_at, content
12279 FROM messages INDEXED BY sqlite_autoindex_messages_1
12280 WHERE conversation_id = ?1 AND idx = ?2
12281 LIMIT 1",
12282 fparams![conversation_id, msg.idx],
12283 |row| {
12284 Ok((
12285 row.get_typed::<String>(0)?,
12286 row.get_typed::<Option<String>>(1)?,
12287 row.get_typed::<Option<i64>>(2)?,
12288 row.get_typed::<String>(3)?,
12289 ))
12290 },
12291 )
12292 .optional()?
12293 else {
12294 exact_idx_match = false;
12295 break;
12296 };
12297 let role = role_from_str(&role);
12298 let content_hash = *blake3::hash(content.as_bytes()).as_bytes();
12299 let fingerprint = MessageMergeFingerprint {
12300 idx: msg.idx,
12301 created_at,
12302 role: role.clone(),
12303 author: author.clone(),
12304 content_hash,
12305 };
12306 if fingerprint != message_merge_fingerprint(msg) {
12307 exact_idx_match = false;
12308 break;
12309 }
12310 indexed_by_idx.insert(msg.idx, fingerprint);
12311 indexed_replay.insert(MessageReplayFingerprint {
12312 created_at,
12313 role,
12314 author,
12315 content_hash,
12316 });
12317 }
12318
12319 if exact_idx_match {
12320 return Ok(ExistingMessageLookup {
12321 by_idx: indexed_by_idx,
12322 replay: indexed_replay,
12323 });
12324 }
12325
12326 let (rows, replay_full_scan) = if requires_full_scan {
12327 let rows = tx.query_params(
12328 "SELECT idx, role, author, created_at, content
12329 FROM messages INDEXED BY sqlite_autoindex_messages_1
12330 WHERE conversation_id = ?1",
12331 fparams![conversation_id],
12332 )?;
12333 record_message_lookup_full_scan_query(rows.len());
12334 (rows, true)
12335 } else if let Some((min_created_at, max_created_at)) = created_bounds {
12336 let mut rows = tx.query_params(
12337 "SELECT idx, role, author, created_at, content
12338 FROM messages INDEXED BY sqlite_autoindex_messages_1
12339 WHERE conversation_id = ?1
12340 AND idx >= ?2
12341 AND idx <= ?3",
12342 fparams![conversation_id, min_idx, max_idx],
12343 )?;
12344 rows.extend(tx.query_params(
12345 "SELECT idx, role, author, created_at, content
12346 FROM messages INDEXED BY sqlite_autoindex_messages_1
12347 WHERE conversation_id = ?1
12348 AND created_at IS NOT NULL
12349 AND created_at >= ?2
12350 AND created_at <= ?3",
12351 fparams![conversation_id, min_created_at, max_created_at],
12352 )?);
12353 record_message_lookup_bounded_queries(2, rows.len());
12354 (rows, false)
12355 } else {
12356 let rows = tx.query_params(
12357 "SELECT idx, role, author, created_at, content
12358 FROM messages INDEXED BY sqlite_autoindex_messages_1
12359 WHERE conversation_id = ?1",
12360 fparams![conversation_id],
12361 )?;
12362 record_message_lookup_full_scan_query(rows.len());
12363 (rows, true)
12364 };
12365
12366 let mut by_idx = HashMap::with_capacity(rows.len());
12367 let mut replay = HashSet::with_capacity(rows.len());
12368 for row in rows {
12369 let idx: i64 = row.get_typed(0)?;
12370 let role: String = row.get_typed(1)?;
12371 let author: Option<String> = row.get_typed(2)?;
12372 let created_at: Option<i64> = row.get_typed(3)?;
12373 let content: String = row.get_typed(4)?;
12374 let role = role_from_str(&role);
12375 let content_hash = *blake3::hash(content.as_bytes()).as_bytes();
12376
12377 if idx >= min_idx && idx <= max_idx {
12378 by_idx.insert(
12379 idx,
12380 MessageMergeFingerprint {
12381 idx,
12382 created_at,
12383 role: role.clone(),
12384 author: author.clone(),
12385 content_hash,
12386 },
12387 );
12388 }
12389
12390 let replay_matches = if replay_full_scan {
12391 true
12392 } else if let Some((min_created_at, max_created_at)) = created_bounds {
12393 created_at.is_some_and(|ts| ts >= min_created_at && ts <= max_created_at)
12394 } else {
12395 true
12396 };
12397 if replay_matches {
12398 replay.insert(MessageReplayFingerprint {
12399 created_at,
12400 role,
12401 author,
12402 content_hash,
12403 });
12404 }
12405 }
12406
12407 Ok(ExistingMessageLookup { by_idx, replay })
12408}
12409
12410fn franken_existing_message_lookup_with_pending(
12411 tx: &FrankenTransaction<'_>,
12412 conversation_id: i64,
12413 incoming_messages: &[Message],
12414 pending_message_fingerprints: &mut HashMap<i64, HashMap<i64, MessageMergeFingerprint>>,
12415 pending_message_replay_fingerprints: &mut HashMap<i64, HashSet<MessageReplayFingerprint>>,
12416) -> Result<ExistingMessageLookup> {
12417 if let (Some(by_idx), Some(replay)) = (
12418 pending_message_fingerprints.get(&conversation_id),
12419 pending_message_replay_fingerprints.get(&conversation_id),
12420 ) {
12421 if incoming_messages.iter().all(|msg| {
12422 by_idx.contains_key(&msg.idx) || replay.contains(&message_replay_fingerprint(msg))
12423 }) {
12424 return Ok(ExistingMessageLookup {
12425 by_idx: by_idx.clone(),
12426 replay: replay.clone(),
12427 });
12428 }
12429
12430 let fresh = franken_existing_message_lookup(tx, conversation_id, incoming_messages)?;
12431 let mut merged_by_idx = by_idx.clone();
12432 let mut merged_replay = replay.clone();
12433 merged_by_idx.extend(fresh.by_idx);
12434 merged_replay.extend(fresh.replay);
12435 pending_message_fingerprints.insert(conversation_id, merged_by_idx.clone());
12436 pending_message_replay_fingerprints.insert(conversation_id, merged_replay.clone());
12437 return Ok(ExistingMessageLookup {
12438 by_idx: merged_by_idx,
12439 replay: merged_replay,
12440 });
12441 }
12442
12443 let lookup = franken_existing_message_lookup(tx, conversation_id, incoming_messages)?;
12444 pending_message_fingerprints.insert(conversation_id, lookup.by_idx.clone());
12445 pending_message_replay_fingerprints.insert(conversation_id, lookup.replay.clone());
12446 Ok(lookup)
12447}
12448
12449fn franken_batch_insert_fts(tx: &FrankenTransaction<'_>, entries: &[FtsEntry]) -> Result<usize> {
12451 if entries.is_empty() {
12452 return Ok(0);
12453 }
12454
12455 let mut inserted = 0;
12456
12457 for chunk in entries.chunks(FTS5_BATCH_SIZE) {
12458 let placeholders: String = chunk
12459 .iter()
12460 .enumerate()
12461 .map(|(i, _)| {
12462 let base = i * 7 + 1; format!(
12464 "(?{},?{},?{},?{},?{},?{},?{})",
12465 base,
12466 base + 1,
12467 base + 2,
12468 base + 3,
12469 base + 4,
12470 base + 5,
12471 base + 6
12472 )
12473 })
12474 .collect::<Vec<_>>()
12475 .join(",");
12476
12477 let sql = format!(
12478 "INSERT INTO fts_messages(rowid, content, title, agent, workspace, source_path, created_at) VALUES {placeholders}"
12479 );
12480
12481 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 7);
12482 for entry in chunk {
12483 param_values.push(SqliteValue::from(entry.message_id));
12484 param_values.push(SqliteValue::from(entry.content.as_str()));
12485 param_values.push(SqliteValue::from(entry.title.as_str()));
12486 param_values.push(SqliteValue::from(entry.agent.as_str()));
12487 param_values.push(SqliteValue::from(entry.workspace.as_str()));
12488 param_values.push(SqliteValue::from(entry.source_path.as_str()));
12489 param_values.push(SqliteValue::from(entry.created_at));
12490 }
12491
12492 match tx.execute_with_params(&sql, ¶m_values) {
12493 Ok(_) => {
12494 inserted += chunk.len();
12495 }
12496 Err(err) => {
12497 tracing::warn!(
12498 error = %err,
12499 chunk_docs = chunk.len(),
12500 "frankensqlite FTS batch insert failed; skipping db-resident FTS maintenance because Tantivy is authoritative"
12501 );
12502 return Ok(inserted);
12503 }
12504 }
12505 }
12506
12507 Ok(inserted)
12508}
12509
12510fn franken_batch_insert_fts_on_connection(
12511 conn: &FrankenConnection,
12512 entries: &[FtsEntry],
12513) -> Result<usize> {
12514 if entries.is_empty() {
12515 return Ok(0);
12516 }
12517
12518 let mut inserted = 0;
12519
12520 for chunk in entries.chunks(FTS5_BATCH_SIZE) {
12521 let placeholders: String = chunk
12522 .iter()
12523 .enumerate()
12524 .map(|(i, _)| {
12525 let base = i * 7 + 1;
12526 format!(
12527 "(?{},?{},?{},?{},?{},?{},?{})",
12528 base,
12529 base + 1,
12530 base + 2,
12531 base + 3,
12532 base + 4,
12533 base + 5,
12534 base + 6
12535 )
12536 })
12537 .collect::<Vec<_>>()
12538 .join(",");
12539
12540 let sql = format!(
12541 "INSERT INTO fts_messages(rowid, content, title, agent, workspace, source_path, created_at) VALUES {placeholders}"
12542 );
12543
12544 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 7);
12545 for entry in chunk {
12546 param_values.push(SqliteValue::from(entry.message_id));
12547 param_values.push(SqliteValue::from(entry.content.as_str()));
12548 param_values.push(SqliteValue::from(entry.title.as_str()));
12549 param_values.push(SqliteValue::from(entry.agent.as_str()));
12550 param_values.push(SqliteValue::from(entry.workspace.as_str()));
12551 param_values.push(SqliteValue::from(entry.source_path.as_str()));
12552 param_values.push(SqliteValue::from(entry.created_at));
12553 }
12554
12555 conn.execute_with_params(&sql, ¶m_values)
12556 .with_context(|| {
12557 format!(
12558 "inserting {} rows into fts_messages during streaming FTS maintenance",
12559 chunk.len()
12560 )
12561 })?;
12562 inserted += chunk.len();
12563 }
12564
12565 Ok(inserted)
12566}
12567
12568fn franken_update_daily_stats_in_tx(
12570 storage: &FrankenStorage,
12571 tx: &FrankenTransaction<'_>,
12572 agent_slug: &str,
12573 source_id: &str,
12574 started_at: Option<i64>,
12575 delta: StatsDelta,
12576) -> Result<()> {
12577 let day_id = started_at
12578 .map(FrankenStorage::day_id_from_millis)
12579 .unwrap_or(0);
12580 let now = FrankenStorage::now_millis();
12581
12582 let targets = [
12583 DailyStatsTarget {
12584 day_id,
12585 agent_slug,
12586 source_id,
12587 },
12588 DailyStatsTarget {
12589 day_id,
12590 agent_slug: "all",
12591 source_id,
12592 },
12593 DailyStatsTarget {
12594 day_id,
12595 agent_slug,
12596 source_id: "all",
12597 },
12598 DailyStatsTarget {
12599 day_id,
12600 agent_slug: "all",
12601 source_id: "all",
12602 },
12603 ];
12604
12605 if agent_slug != "all"
12606 && source_id != "all"
12607 && franken_update_ensured_daily_stats_targets_in_tx(storage, tx, &targets, now, delta)?
12608 {
12609 return Ok(());
12610 }
12611
12612 for target in targets {
12613 franken_apply_daily_stats_delta_in_tx(storage, tx, target, now, delta)?;
12614 }
12615
12616 Ok(())
12617}
12618
12619#[derive(Clone, Copy)]
12620struct DailyStatsTarget<'a> {
12621 day_id: i64,
12622 agent_slug: &'a str,
12623 source_id: &'a str,
12624}
12625
12626fn franken_update_ensured_daily_stats_targets_in_tx(
12627 storage: &FrankenStorage,
12628 tx: &FrankenTransaction<'_>,
12629 targets: &[DailyStatsTarget<'_>; 4],
12630 now: i64,
12631 delta: StatsDelta,
12632) -> Result<bool> {
12633 let cache_keys = targets.map(|target| {
12634 EnsuredDailyStatsKey::new(target.day_id, target.agent_slug, target.source_id)
12635 });
12636 if !storage.daily_stats_keys_already_ensured(&cache_keys) {
12637 return Ok(false);
12638 }
12639
12640 let primary = targets[0];
12641 let rows_changed = tx.execute_compat(
12642 "UPDATE daily_stats
12643 SET session_count = session_count + ?4,
12644 message_count = message_count + ?5,
12645 total_chars = total_chars + ?6,
12646 last_updated = ?7
12647 WHERE day_id = ?1
12648 AND ((agent_slug = ?2 AND source_id = ?3)
12649 OR (agent_slug = 'all' AND source_id = ?3)
12650 OR (agent_slug = ?2 AND source_id = 'all')
12651 OR (agent_slug = 'all' AND source_id = 'all'))",
12652 fparams![
12653 primary.day_id,
12654 primary.agent_slug,
12655 primary.source_id,
12656 delta.session_count_delta,
12657 delta.message_count_delta,
12658 delta.total_chars_delta,
12659 now
12660 ],
12661 )?;
12662 if rows_changed == targets.len() {
12663 return Ok(true);
12664 }
12665
12666 for (target, cache_key) in targets.iter().copied().zip(cache_keys) {
12667 let exists = tx
12668 .query_row_map(
12669 "SELECT 1 FROM daily_stats
12670 WHERE day_id = ?1 AND agent_slug = ?2 AND source_id = ?3
12671 LIMIT 1",
12672 fparams![target.day_id, target.agent_slug, target.source_id],
12673 |row| row.get_typed::<i64>(0),
12674 )
12675 .optional()?
12676 .is_some();
12677 if exists {
12678 continue;
12679 }
12680
12681 tx.execute_compat(
12682 "INSERT INTO daily_stats(day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12683 VALUES(?1,?2,?3,?4,?5,?6,?7)",
12684 fparams![
12685 target.day_id,
12686 target.agent_slug,
12687 target.source_id,
12688 delta.session_count_delta,
12689 delta.message_count_delta,
12690 delta.total_chars_delta,
12691 now
12692 ],
12693 )?;
12694 storage.mark_daily_stats_key_ensured(cache_key);
12695 }
12696
12697 Ok(true)
12698}
12699
12700fn franken_apply_daily_stats_delta_in_tx(
12701 storage: &FrankenStorage,
12702 tx: &FrankenTransaction<'_>,
12703 target: DailyStatsTarget<'_>,
12704 now: i64,
12705 delta: StatsDelta,
12706) -> Result<()> {
12707 let cache_key = EnsuredDailyStatsKey::new(target.day_id, target.agent_slug, target.source_id);
12708 if storage.daily_stats_key_already_ensured(&cache_key) {
12709 let rows_changed = tx.execute_compat(
12710 "UPDATE daily_stats
12711 SET session_count = session_count + ?4,
12712 message_count = message_count + ?5,
12713 total_chars = total_chars + ?6,
12714 last_updated = ?7
12715 WHERE day_id = ?1 AND agent_slug = ?2 AND source_id = ?3",
12716 fparams![
12717 target.day_id,
12718 target.agent_slug,
12719 target.source_id,
12720 delta.session_count_delta,
12721 delta.message_count_delta,
12722 delta.total_chars_delta,
12723 now
12724 ],
12725 )?;
12726 if rows_changed > 0 {
12727 return Ok(());
12728 }
12729 }
12730
12731 tx.execute_compat(
12732 "INSERT INTO daily_stats(day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12733 VALUES(?1,?2,?3,?4,?5,?6,?7)
12734 ON CONFLICT(day_id, agent_slug, source_id) DO UPDATE SET
12735 session_count = session_count + excluded.session_count,
12736 message_count = message_count + excluded.message_count,
12737 total_chars = total_chars + excluded.total_chars,
12738 last_updated = excluded.last_updated",
12739 fparams![
12740 target.day_id,
12741 target.agent_slug,
12742 target.source_id,
12743 delta.session_count_delta,
12744 delta.message_count_delta,
12745 delta.total_chars_delta,
12746 now
12747 ],
12748 )?;
12749 storage.mark_daily_stats_key_ensured(cache_key);
12750 Ok(())
12751}
12752
12753fn franken_update_daily_stats_batched_in_tx(
12759 tx: &FrankenTransaction<'_>,
12760 entries: &[(i64, String, String, StatsDelta)],
12761) -> Result<usize> {
12762 if entries.is_empty() {
12763 return Ok(0);
12764 }
12765
12766 let now = FrankenStorage::now_millis();
12767 let mut total_affected = 0;
12768
12769 for (day_id, agent, source, delta) in entries {
12774 total_affected += tx.execute_compat(
12775 "INSERT INTO daily_stats (day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12776 VALUES(?1,?2,?3,?4,?5,?6,?7)
12777 ON CONFLICT(day_id, agent_slug, source_id) DO UPDATE SET
12778 session_count = session_count + excluded.session_count,
12779 message_count = message_count + excluded.message_count,
12780 total_chars = total_chars + excluded.total_chars,
12781 last_updated = excluded.last_updated",
12782 fparams![
12783 *day_id,
12784 agent.as_str(),
12785 source.as_str(),
12786 delta.session_count_delta,
12787 delta.message_count_delta,
12788 delta.total_chars_delta,
12789 now
12790 ],
12791 )?;
12792 }
12793
12794 Ok(total_affected)
12795}
12796
12797fn franken_insert_token_usage_batched_in_tx(
12803 tx: &FrankenTransaction<'_>,
12804 entries: &[TokenUsageEntry],
12805) -> Result<usize> {
12806 if entries.is_empty() {
12807 return Ok(0);
12808 }
12809
12810 let mut total_inserted = 0;
12811
12812 for e in entries {
12813 let params_vec: Vec<ParamValue> = vec![
12814 ParamValue::from(e.message_id),
12815 ParamValue::from(e.conversation_id),
12816 ParamValue::from(e.agent_id),
12817 ParamValue::from(e.workspace_id),
12818 ParamValue::from(e.source_id.clone()),
12819 ParamValue::from(e.timestamp_ms),
12820 ParamValue::from(e.day_id),
12821 ParamValue::from(e.model_name.clone()),
12822 ParamValue::from(e.model_family.clone()),
12823 ParamValue::from(e.model_tier.clone()),
12824 ParamValue::from(e.service_tier.clone()),
12825 ParamValue::from(e.provider.clone()),
12826 ParamValue::from(e.input_tokens),
12827 ParamValue::from(e.output_tokens),
12828 ParamValue::from(e.cache_read_tokens),
12829 ParamValue::from(e.cache_creation_tokens),
12830 ParamValue::from(e.thinking_tokens),
12831 ParamValue::from(e.total_tokens),
12832 ParamValue::from(e.estimated_cost_usd),
12833 ParamValue::from(e.role.clone()),
12834 ParamValue::from(e.content_chars),
12835 ParamValue::from(e.has_tool_calls as i64),
12836 ParamValue::from(e.tool_call_count as i64),
12837 ParamValue::from(e.data_source.clone()),
12838 ];
12839
12840 let values = param_slice_to_values(¶ms_vec);
12841 total_inserted += tx.execute_with_params(
12842 "INSERT OR IGNORE INTO token_usage (
12843 message_id, conversation_id, agent_id, workspace_id, source_id,
12844 timestamp_ms, day_id,
12845 model_name, model_family, model_tier, service_tier, provider,
12846 input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens,
12847 thinking_tokens, total_tokens, estimated_cost_usd,
12848 role, content_chars, has_tool_calls, tool_call_count, data_source
12849 )
12850 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22,?23,?24)",
12851 &values,
12852 )?;
12853 }
12854
12855 Ok(total_inserted)
12856}
12857
12858fn franken_update_token_daily_stats_batched_in_tx(
12860 tx: &FrankenTransaction<'_>,
12861 entries: &[(i64, String, String, String, TokenStatsDelta)],
12862) -> Result<usize> {
12863 if entries.is_empty() {
12864 return Ok(0);
12865 }
12866
12867 let now = FrankenStorage::now_millis();
12868 let mut total_affected = 0;
12869
12870 for (day_id, agent, source, model, delta) in entries {
12871 total_affected += tx.execute_compat(
12872 "INSERT INTO token_daily_stats (
12873 day_id, agent_slug, source_id, model_family,
12874 api_call_count, user_message_count, assistant_message_count, tool_message_count,
12875 total_input_tokens, total_output_tokens, total_cache_read_tokens,
12876 total_cache_creation_tokens, total_thinking_tokens, grand_total_tokens,
12877 total_content_chars, total_tool_calls, estimated_cost_usd, session_count,
12878 last_updated
12879 )
12880 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19)
12881 ON CONFLICT(day_id, agent_slug, source_id, model_family) DO UPDATE SET
12882 api_call_count = api_call_count + excluded.api_call_count,
12883 user_message_count = user_message_count + excluded.user_message_count,
12884 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
12885 tool_message_count = tool_message_count + excluded.tool_message_count,
12886 total_input_tokens = total_input_tokens + excluded.total_input_tokens,
12887 total_output_tokens = total_output_tokens + excluded.total_output_tokens,
12888 total_cache_read_tokens = total_cache_read_tokens + excluded.total_cache_read_tokens,
12889 total_cache_creation_tokens = total_cache_creation_tokens + excluded.total_cache_creation_tokens,
12890 total_thinking_tokens = total_thinking_tokens + excluded.total_thinking_tokens,
12891 grand_total_tokens = grand_total_tokens + excluded.grand_total_tokens,
12892 total_content_chars = total_content_chars + excluded.total_content_chars,
12893 total_tool_calls = total_tool_calls + excluded.total_tool_calls,
12894 estimated_cost_usd = estimated_cost_usd + excluded.estimated_cost_usd,
12895 session_count = session_count + excluded.session_count,
12896 last_updated = excluded.last_updated",
12897 fparams![
12898 *day_id,
12899 agent.as_str(),
12900 source.as_str(),
12901 model.as_str(),
12902 delta.api_call_count,
12903 delta.user_message_count,
12904 delta.assistant_message_count,
12905 delta.tool_message_count,
12906 delta.total_input_tokens,
12907 delta.total_output_tokens,
12908 delta.total_cache_read_tokens,
12909 delta.total_cache_creation_tokens,
12910 delta.total_thinking_tokens,
12911 delta.grand_total_tokens,
12912 delta.total_content_chars,
12913 delta.total_tool_calls,
12914 delta.estimated_cost_usd,
12915 delta.session_count,
12916 now
12917 ],
12918 )?;
12919 }
12920
12921 Ok(total_affected)
12922}
12923
12924fn franken_insert_message_metrics_batched_in_tx(
12930 tx: &FrankenTransaction<'_>,
12931 entries: &[MessageMetricsEntry],
12932) -> Result<usize> {
12933 if entries.is_empty() {
12934 return Ok(0);
12935 }
12936
12937 let mut total_inserted = 0;
12938
12939 for e in entries {
12940 let params_vec: Vec<ParamValue> = vec![
12941 ParamValue::from(e.message_id),
12942 ParamValue::from(e.created_at_ms),
12943 ParamValue::from(e.hour_id),
12944 ParamValue::from(e.day_id),
12945 ParamValue::from(e.agent_slug.clone()),
12946 ParamValue::from(e.workspace_id),
12947 ParamValue::from(e.source_id.clone()),
12948 ParamValue::from(e.role.clone()),
12949 ParamValue::from(e.content_chars),
12950 ParamValue::from(e.content_tokens_est),
12951 ParamValue::from(e.model_name.clone()),
12952 ParamValue::from(e.model_family.clone()),
12953 ParamValue::from(e.model_tier.clone()),
12954 ParamValue::from(e.provider.clone()),
12955 ParamValue::from(e.api_input_tokens),
12956 ParamValue::from(e.api_output_tokens),
12957 ParamValue::from(e.api_cache_read_tokens),
12958 ParamValue::from(e.api_cache_creation_tokens),
12959 ParamValue::from(e.api_thinking_tokens),
12960 ParamValue::from(e.api_service_tier.clone()),
12961 ParamValue::from(e.api_data_source.clone()),
12962 ParamValue::from(e.tool_call_count),
12963 ParamValue::from(e.has_tool_calls as i64),
12964 ParamValue::from(e.has_plan as i64),
12965 ];
12966
12967 let values = param_slice_to_values(¶ms_vec);
12968 total_inserted += tx.execute_with_params(
12969 "INSERT OR IGNORE INTO message_metrics (
12970 message_id, created_at_ms, hour_id, day_id,
12971 agent_slug, workspace_id, source_id, role,
12972 content_chars, content_tokens_est,
12973 model_name, model_family, model_tier, provider,
12974 api_input_tokens, api_output_tokens, api_cache_read_tokens,
12975 api_cache_creation_tokens, api_thinking_tokens,
12976 api_service_tier, api_data_source,
12977 tool_call_count, has_tool_calls, has_plan
12978 )
12979 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22,?23,?24)",
12980 &values,
12981 )?;
12982 }
12983
12984 Ok(total_inserted)
12985}
12986
12987fn franken_flush_rollup_table(
12989 tx: &FrankenTransaction<'_>,
12990 table: &str,
12991 bucket_col: &str,
12992 deltas: &HashMap<(i64, String, i64, String), UsageRollupDelta>,
12993 now: i64,
12994) -> Result<usize> {
12995 if deltas.is_empty() {
12996 return Ok(0);
12997 }
12998
12999 let mut total_affected = 0;
13000
13001 for ((bucket_id, agent, workspace_id, source), d) in deltas {
13002 let sql = format!(
13003 "INSERT INTO {table} (
13004 {bucket_col}, agent_slug, workspace_id, source_id,
13005 message_count, user_message_count, assistant_message_count,
13006 tool_call_count, plan_message_count, plan_content_tokens_est_total,
13007 plan_api_tokens_total, api_coverage_message_count,
13008 content_tokens_est_total, content_tokens_est_user, content_tokens_est_assistant,
13009 api_tokens_total, api_input_tokens_total, api_output_tokens_total,
13010 api_cache_read_tokens_total, api_cache_creation_tokens_total,
13011 api_thinking_tokens_total, last_updated
13012 )
13013 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22)
13014 ON CONFLICT({bucket_col}, agent_slug, workspace_id, source_id) DO UPDATE SET
13015 message_count = message_count + excluded.message_count,
13016 user_message_count = user_message_count + excluded.user_message_count,
13017 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
13018 tool_call_count = tool_call_count + excluded.tool_call_count,
13019 plan_message_count = plan_message_count + excluded.plan_message_count,
13020 plan_content_tokens_est_total = plan_content_tokens_est_total + excluded.plan_content_tokens_est_total,
13021 plan_api_tokens_total = plan_api_tokens_total + excluded.plan_api_tokens_total,
13022 api_coverage_message_count = api_coverage_message_count + excluded.api_coverage_message_count,
13023 content_tokens_est_total = content_tokens_est_total + excluded.content_tokens_est_total,
13024 content_tokens_est_user = content_tokens_est_user + excluded.content_tokens_est_user,
13025 content_tokens_est_assistant = content_tokens_est_assistant + excluded.content_tokens_est_assistant,
13026 api_tokens_total = api_tokens_total + excluded.api_tokens_total,
13027 api_input_tokens_total = api_input_tokens_total + excluded.api_input_tokens_total,
13028 api_output_tokens_total = api_output_tokens_total + excluded.api_output_tokens_total,
13029 api_cache_read_tokens_total = api_cache_read_tokens_total + excluded.api_cache_read_tokens_total,
13030 api_cache_creation_tokens_total = api_cache_creation_tokens_total + excluded.api_cache_creation_tokens_total,
13031 api_thinking_tokens_total = api_thinking_tokens_total + excluded.api_thinking_tokens_total,
13032 last_updated = excluded.last_updated"
13033 );
13034
13035 total_affected += tx.execute_compat(
13036 &sql,
13037 fparams![
13038 *bucket_id,
13039 agent.as_str(),
13040 *workspace_id,
13041 source.as_str(),
13042 d.message_count,
13043 d.user_message_count,
13044 d.assistant_message_count,
13045 d.tool_call_count,
13046 d.plan_message_count,
13047 d.plan_content_tokens_est_total,
13048 d.plan_api_tokens_total,
13049 d.api_coverage_message_count,
13050 d.content_tokens_est_total,
13051 d.content_tokens_est_user,
13052 d.content_tokens_est_assistant,
13053 d.api_tokens_total,
13054 d.api_input_tokens_total,
13055 d.api_output_tokens_total,
13056 d.api_cache_read_tokens_total,
13057 d.api_cache_creation_tokens_total,
13058 d.api_thinking_tokens_total,
13059 now
13060 ],
13061 )?;
13062 }
13063
13064 Ok(total_affected)
13065}
13066
13067fn franken_flush_model_daily_rollup_table(
13069 tx: &FrankenTransaction<'_>,
13070 deltas: &HashMap<(i64, String, i64, String, String, String), UsageRollupDelta>,
13071 now: i64,
13072) -> Result<usize> {
13073 if deltas.is_empty() {
13074 return Ok(0);
13075 }
13076
13077 let mut total_affected = 0;
13078
13079 for ((day_id, agent, workspace_id, source, model_family, model_tier), d) in deltas {
13080 total_affected += tx.execute_compat(
13081 "INSERT INTO usage_models_daily (
13082 day_id, agent_slug, workspace_id, source_id, model_family, model_tier,
13083 message_count, user_message_count, assistant_message_count,
13084 tool_call_count, plan_message_count, api_coverage_message_count,
13085 content_tokens_est_total, content_tokens_est_user, content_tokens_est_assistant,
13086 api_tokens_total, api_input_tokens_total, api_output_tokens_total,
13087 api_cache_read_tokens_total, api_cache_creation_tokens_total,
13088 api_thinking_tokens_total, last_updated
13089 )
13090 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22)
13091 ON CONFLICT(day_id, agent_slug, workspace_id, source_id, model_family, model_tier) DO UPDATE SET
13092 message_count = message_count + excluded.message_count,
13093 user_message_count = user_message_count + excluded.user_message_count,
13094 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
13095 tool_call_count = tool_call_count + excluded.tool_call_count,
13096 plan_message_count = plan_message_count + excluded.plan_message_count,
13097 api_coverage_message_count = api_coverage_message_count + excluded.api_coverage_message_count,
13098 content_tokens_est_total = content_tokens_est_total + excluded.content_tokens_est_total,
13099 content_tokens_est_user = content_tokens_est_user + excluded.content_tokens_est_user,
13100 content_tokens_est_assistant = content_tokens_est_assistant + excluded.content_tokens_est_assistant,
13101 api_tokens_total = api_tokens_total + excluded.api_tokens_total,
13102 api_input_tokens_total = api_input_tokens_total + excluded.api_input_tokens_total,
13103 api_output_tokens_total = api_output_tokens_total + excluded.api_output_tokens_total,
13104 api_cache_read_tokens_total = api_cache_read_tokens_total + excluded.api_cache_read_tokens_total,
13105 api_cache_creation_tokens_total = api_cache_creation_tokens_total + excluded.api_cache_creation_tokens_total,
13106 api_thinking_tokens_total = api_thinking_tokens_total + excluded.api_thinking_tokens_total,
13107 last_updated = excluded.last_updated",
13108 fparams![
13109 *day_id,
13110 agent.as_str(),
13111 *workspace_id,
13112 source.as_str(),
13113 model_family.as_str(),
13114 model_tier.as_str(),
13115 d.message_count,
13116 d.user_message_count,
13117 d.assistant_message_count,
13118 d.tool_call_count,
13119 d.plan_message_count,
13120 d.api_coverage_message_count,
13121 d.content_tokens_est_total,
13122 d.content_tokens_est_user,
13123 d.content_tokens_est_assistant,
13124 d.api_tokens_total,
13125 d.api_input_tokens_total,
13126 d.api_output_tokens_total,
13127 d.api_cache_read_tokens_total,
13128 d.api_cache_creation_tokens_total,
13129 d.api_thinking_tokens_total,
13130 now
13131 ],
13132 )?;
13133 }
13134
13135 Ok(total_affected)
13136}
13137
13138fn franken_flush_analytics_rollups_in_tx(
13140 tx: &FrankenTransaction<'_>,
13141 agg: &AnalyticsRollupAggregator,
13142) -> Result<(usize, usize, usize)> {
13143 let now = FrankenStorage::now_millis();
13144
13145 let hourly_affected =
13146 franken_flush_rollup_table(tx, "usage_hourly", "hour_id", &agg.hourly, now)?;
13147 let daily_affected = franken_flush_rollup_table(tx, "usage_daily", "day_id", &agg.daily, now)?;
13148 let models_daily_affected = franken_flush_model_daily_rollup_table(tx, &agg.models_daily, now)?;
13149
13150 Ok((hourly_affected, daily_affected, models_daily_affected))
13151}
13152
13153fn franken_update_conversation_token_summaries_in_tx(
13155 tx: &FrankenTransaction<'_>,
13156 conversation_id: i64,
13157) -> Result<()> {
13158 tx.execute_compat(
13159 "UPDATE conversations SET
13160 total_input_tokens = (SELECT SUM(input_tokens) FROM token_usage WHERE conversation_id = ?1),
13161 total_output_tokens = (SELECT SUM(output_tokens) FROM token_usage WHERE conversation_id = ?1),
13162 total_cache_read_tokens = (SELECT SUM(cache_read_tokens) FROM token_usage WHERE conversation_id = ?1),
13163 total_cache_creation_tokens = (SELECT SUM(cache_creation_tokens) FROM token_usage WHERE conversation_id = ?1),
13164 grand_total_tokens = (SELECT SUM(total_tokens) FROM token_usage WHERE conversation_id = ?1),
13165 estimated_cost_usd = (SELECT SUM(estimated_cost_usd) FROM token_usage WHERE conversation_id = ?1),
13166 primary_model = (SELECT model_name FROM token_usage WHERE conversation_id = ?1
13167 AND model_name IS NOT NULL
13168 GROUP BY model_name ORDER BY COUNT(*) DESC LIMIT 1),
13169 api_call_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
13170 AND data_source = 'api'),
13171 tool_call_count = (SELECT SUM(tool_call_count) FROM token_usage WHERE conversation_id = ?1),
13172 user_message_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
13173 AND role = 'user'),
13174 assistant_message_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
13175 AND role IN ('assistant', 'agent'))
13176 WHERE id = ?1",
13177 fparams![conversation_id],
13178 )?;
13179 Ok(())
13180}
13181
13182impl FrankenStorage {
13183 pub fn rebuild_token_daily_stats(&self) -> Result<usize> {
13185 const CONVERSATION_BATCH_SIZE: usize = 1_000;
13186 const TOKEN_USAGE_BATCH_SIZE: usize = 10_000;
13187
13188 let total_usage_rows: i64 =
13189 self.conn
13190 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
13191 row.get_typed(0)
13192 })?;
13193 tracing::info!(
13194 target: "cass::analytics",
13195 total_usage_rows,
13196 "token_daily_stats_rebuild_start"
13197 );
13198
13199 let mut tx = self.conn.transaction()?;
13200 tx.execute("DELETE FROM token_daily_stats")?;
13201
13202 let mut last_conversation_id = 0_i64;
13203 let mut rows_created = 0_usize;
13204
13205 loop {
13206 let conversation_rows = tx.query_map_collect(
13207 "SELECT c.id, c.started_at, c.source_id,
13208 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown')
13209 FROM conversations c
13210 WHERE c.id > ?1
13211 ORDER BY c.id
13212 LIMIT ?2",
13213 fparams![last_conversation_id, CONVERSATION_BATCH_SIZE as i64],
13214 |row| {
13215 Ok((
13216 row.get_typed::<i64>(0)?,
13217 row.get_typed::<Option<i64>>(1)?,
13218 row.get_typed::<String>(2)?,
13219 row.get_typed::<String>(3)?,
13220 ))
13221 },
13222 )?;
13223 if conversation_rows.is_empty() {
13224 break;
13225 }
13226
13227 let mut aggregate = TokenStatsAggregator::new();
13228
13229 for (conversation_id, started_at, source_id, agent_slug) in conversation_rows {
13230 last_conversation_id = conversation_id;
13231 let conversation_day_id = started_at.map(Self::day_id_from_millis).unwrap_or(0);
13232 let mut last_token_usage_id = 0_i64;
13233 let mut session_model_family = String::from("unknown");
13234
13235 loop {
13236 let usage_rows = tx.query_map_collect(
13237 "SELECT id, day_id, role,
13238 COALESCE(model_family, 'unknown'),
13239 input_tokens, output_tokens, cache_read_tokens,
13240 cache_creation_tokens, thinking_tokens,
13241 has_tool_calls, tool_call_count,
13242 content_chars, estimated_cost_usd
13243 FROM token_usage
13244 WHERE conversation_id = ?1
13245 AND id > ?2
13246 ORDER BY id
13247 LIMIT ?3",
13248 fparams![
13249 conversation_id,
13250 last_token_usage_id,
13251 TOKEN_USAGE_BATCH_SIZE as i64
13252 ],
13253 |row| {
13254 Ok((
13255 row.get_typed::<i64>(0)?,
13256 row.get_typed::<i64>(1)?,
13257 row.get_typed::<String>(2)?,
13258 row.get_typed::<String>(3)?,
13259 row.get_typed::<Option<i64>>(4)?,
13260 row.get_typed::<Option<i64>>(5)?,
13261 row.get_typed::<Option<i64>>(6)?,
13262 row.get_typed::<Option<i64>>(7)?,
13263 row.get_typed::<Option<i64>>(8)?,
13264 row.get_typed::<i64>(9)?,
13265 row.get_typed::<i64>(10)?,
13266 row.get_typed::<i64>(11)?,
13267 row.get_typed::<Option<f64>>(12)?,
13268 ))
13269 },
13270 )?;
13271 if usage_rows.is_empty() {
13272 break;
13273 }
13274
13275 for (
13276 token_usage_id,
13277 day_id,
13278 role,
13279 model_family,
13280 input_tokens,
13281 output_tokens,
13282 cache_read_tokens,
13283 cache_creation_tokens,
13284 thinking_tokens,
13285 has_tool_calls,
13286 tool_call_count,
13287 content_chars,
13288 estimated_cost_usd,
13289 ) in usage_rows
13290 {
13291 last_token_usage_id = token_usage_id;
13292 if model_family != "unknown" {
13293 session_model_family = model_family.clone();
13294 }
13295 let usage = crate::connectors::ExtractedTokenUsage {
13296 model_name: None,
13297 provider: None,
13298 input_tokens,
13299 output_tokens,
13300 cache_read_tokens,
13301 cache_creation_tokens,
13302 thinking_tokens,
13303 service_tier: None,
13304 has_tool_calls: has_tool_calls != 0,
13305 tool_call_count: u32::try_from(tool_call_count.max(0)).unwrap_or(0),
13306 data_source: franken_agent_detection::TokenDataSource::Api,
13307 };
13308 aggregate.record(
13309 &agent_slug,
13310 &source_id,
13311 day_id,
13312 &model_family,
13313 &role,
13314 &usage,
13315 content_chars,
13316 estimated_cost_usd.unwrap_or(0.0),
13317 );
13318 }
13319 }
13320
13321 aggregate.record_session(
13322 &agent_slug,
13323 &source_id,
13324 conversation_day_id,
13325 &session_model_family,
13326 );
13327 }
13328
13329 let entries = aggregate.expand();
13330 rows_created = rows_created.saturating_add(entries.len());
13331 franken_update_token_daily_stats_batched_in_tx(&tx, &entries)?;
13332 }
13333
13334 tx.commit()?;
13335
13336 tracing::info!(
13337 target: "cass::analytics",
13338 rows_created,
13339 "token_daily_stats_rebuild_complete"
13340 );
13341
13342 Ok(rows_created)
13343 }
13344
13345 pub fn rebuild_analytics(&self) -> Result<AnalyticsRebuildResult> {
13348 let start = Instant::now();
13349
13350 let total_messages: i64 =
13351 self.conn
13352 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
13353 row.get_typed(0)
13354 })?;
13355 tracing::info!(
13356 target: "cass::analytics",
13357 total_messages,
13358 "analytics_rebuild_start"
13359 );
13360
13361 let mut tx = self.conn.transaction()?;
13362
13363 tx.execute("DELETE FROM message_metrics")?;
13364 tx.execute("DELETE FROM usage_hourly")?;
13365 tx.execute("DELETE FROM usage_daily")?;
13366 tx.execute("DELETE FROM usage_models_daily")?;
13367
13368 const CHUNK_SIZE: i64 = 10_000;
13369 let mut offset: i64 = 0;
13370 let mut total_inserted: usize = 0;
13371 let mut usage_hourly_rows: usize = 0;
13372 let mut usage_daily_rows: usize = 0;
13373 let mut usage_models_daily_rows: usize = 0;
13374
13375 loop {
13376 #[allow(clippy::type_complexity)]
13377 let rows: Vec<(
13378 i64,
13379 String,
13380 String,
13381 Option<serde_json::Value>,
13382 Option<i64>,
13383 Option<i64>,
13384 String,
13385 Option<i64>,
13386 String,
13387 )> = tx.query_map_collect(
13388 "SELECT m.id, m.idx, m.role, m.content, m.extra_json, m.extra_bin,
13394 m.created_at,
13395 c.id AS conv_id, c.started_at AS conv_started_at,
13396 c.source_id, c.workspace_id,
13397 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown') AS agent_slug
13398 FROM messages m
13399 JOIN conversations c ON m.conversation_id = c.id
13400 ORDER BY m.id
13401 LIMIT ?1 OFFSET ?2",
13402 fparams![CHUNK_SIZE, offset],
13403 |row| {
13404 let msg_id: i64 = row.get_typed(0)?;
13405 let role: String = row.get_typed(2)?;
13406 let content: String = row.get_typed(3)?;
13407 let extra_json = row
13408 .get_typed::<Option<String>>(4)?
13409 .and_then(|s| serde_json::from_str(&s).ok())
13410 .or_else(|| {
13411 row.get_typed::<Option<Vec<u8>>>(5)
13412 .ok()
13413 .flatten()
13414 .and_then(|b| rmp_serde::from_slice(&b).ok())
13415 });
13416 let msg_ts: Option<i64> = row.get_typed(6)?;
13417 let conv_started_at: Option<i64> = row.get_typed(8)?;
13418 let source_id: String = row.get_typed(9)?;
13419 let workspace_id: Option<i64> = row.get_typed(10)?;
13420 let agent_slug: String = row.get_typed(11)?;
13421 let effective_ts = msg_ts.or(conv_started_at).unwrap_or(0);
13422
13423 Ok((
13424 msg_id,
13425 role,
13426 content,
13427 extra_json,
13428 Some(effective_ts),
13429 workspace_id,
13430 source_id,
13431 conv_started_at,
13432 agent_slug,
13433 ))
13434 },
13435 )?;
13436
13437 if rows.is_empty() {
13438 break;
13439 }
13440
13441 let chunk_len = rows.len();
13442 let mut entries = Vec::with_capacity(chunk_len);
13443 let mut rollup_agg = AnalyticsRollupAggregator::new();
13444
13445 for (
13446 msg_id,
13447 role,
13448 content,
13449 extra_json,
13450 effective_ts,
13451 workspace_id,
13452 source_id,
13453 _conv_started_at,
13454 agent_slug,
13455 ) in &rows
13456 {
13457 let ts = effective_ts.unwrap_or(0);
13458 let day_id = Self::day_id_from_millis(ts);
13459 let hour_id = Self::hour_id_from_millis(ts);
13460 let content_chars = content.len() as i64;
13461 let content_tokens_est = content_chars / 4;
13462 let extra = extra_json
13463 .as_ref()
13464 .cloned()
13465 .unwrap_or(serde_json::Value::Null);
13466 let usage =
13467 crate::connectors::extract_tokens_for_agent(agent_slug, &extra, content, role);
13468 let model_info = usage
13469 .model_name
13470 .as_deref()
13471 .map(crate::connectors::normalize_model);
13472 let model_family = model_info
13473 .as_ref()
13474 .map(|i| i.family.clone())
13475 .unwrap_or_else(|| "unknown".into());
13476 let model_tier = model_info
13477 .as_ref()
13478 .map(|i| i.tier.clone())
13479 .unwrap_or_else(|| "unknown".into());
13480 let provider = usage
13481 .provider
13482 .clone()
13483 .or_else(|| model_info.as_ref().map(|i| i.provider.clone()))
13484 .unwrap_or_else(|| "unknown".into());
13485
13486 let entry = MessageMetricsEntry {
13487 message_id: *msg_id,
13488 created_at_ms: ts,
13489 hour_id,
13490 day_id,
13491 agent_slug: agent_slug.clone(),
13492 workspace_id: workspace_id.unwrap_or(0),
13493 source_id: source_id.clone(),
13494 role: role.clone(),
13495 content_chars,
13496 content_tokens_est,
13497 model_name: usage.model_name.clone(),
13498 model_family,
13499 model_tier,
13500 provider,
13501 api_input_tokens: usage.input_tokens,
13502 api_output_tokens: usage.output_tokens,
13503 api_cache_read_tokens: usage.cache_read_tokens,
13504 api_cache_creation_tokens: usage.cache_creation_tokens,
13505 api_thinking_tokens: usage.thinking_tokens,
13506 api_service_tier: usage.service_tier,
13507 api_data_source: usage.data_source.as_str().to_string(),
13508 tool_call_count: usage.tool_call_count as i64,
13509 has_tool_calls: usage.has_tool_calls,
13510 has_plan: has_plan_for_role(role, content),
13511 };
13512 rollup_agg.record(&entry);
13513 entries.push(entry);
13514 }
13515
13516 total_inserted += franken_insert_message_metrics_batched_in_tx(&tx, &entries)?;
13517 let (hourly, daily, models_daily) =
13518 franken_flush_analytics_rollups_in_tx(&tx, &rollup_agg)?;
13519 usage_hourly_rows += hourly;
13520 usage_daily_rows += daily;
13521 usage_models_daily_rows += models_daily;
13522 offset += chunk_len as i64;
13523
13524 tracing::debug!(
13525 target: "cass::analytics",
13526 offset,
13527 chunk = chunk_len,
13528 inserted = entries.len(),
13529 total = total_inserted,
13530 "analytics_rebuild_chunk"
13531 );
13532
13533 if (chunk_len as i64) < CHUNK_SIZE {
13534 break;
13535 }
13536 }
13537
13538 tx.commit()?;
13539
13540 let elapsed = start.elapsed();
13541 let elapsed_ms = elapsed.as_millis() as u64;
13542 let msgs_per_sec = if elapsed_ms > 0 {
13543 (total_inserted as f64) / (elapsed_ms as f64 / 1000.0)
13544 } else {
13545 0.0
13546 };
13547
13548 tracing::info!(
13549 target: "cass::analytics",
13550 message_metrics_rows = total_inserted,
13551 usage_hourly_rows,
13552 usage_daily_rows,
13553 usage_models_daily_rows,
13554 elapsed_ms,
13555 messages_per_sec = format!("{:.0}", msgs_per_sec),
13556 "analytics_rebuild_complete"
13557 );
13558
13559 Ok(AnalyticsRebuildResult {
13560 message_metrics_rows: total_inserted,
13561 usage_hourly_rows,
13562 usage_daily_rows,
13563 usage_models_daily_rows,
13564 elapsed_ms,
13565 messages_per_sec: msgs_per_sec,
13566 })
13567 }
13568
13569 pub fn rebuild_daily_stats(&self) -> Result<DailyStatsRebuildResult> {
13571 const DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE: usize = 1_000;
13572 const DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE: usize = 10_000;
13573
13574 let mut conversation_batch_size = rebuild_batch_size_env(
13575 "CASS_DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE",
13576 DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE,
13577 );
13578 let mut message_batch_size = rebuild_batch_size_env(
13579 "CASS_DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE",
13580 DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE,
13581 );
13582
13583 let total_messages: i64 =
13584 self.conn
13585 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
13586 row.get_typed(0)
13587 })?;
13588 let message_metrics_rows: i64 =
13589 self.conn
13590 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
13591 row.get_typed(0)
13592 })?;
13593 let use_message_metrics = total_messages > 0 && total_messages == message_metrics_rows;
13594
13595 tracing::info!(
13596 target: "cass::perf::daily_stats",
13597 total_messages,
13598 message_metrics_rows,
13599 use_message_metrics,
13600 "daily_stats rebuild selected message source"
13601 );
13602
13603 let mut tx = self.conn.transaction()?;
13604 tx.execute("DELETE FROM daily_stats")?;
13605
13606 let mut last_conversation_id = 0_i64;
13607 let mut conversation_batch_count = 0_usize;
13608 let mut conversations_processed = 0_usize;
13609 let mut messages_processed = 0_usize;
13610 let mut message_batch_count = 0_usize;
13611 let mut raw_entries_flushed = 0_usize;
13612 let mut expanded_entries_flushed = 0_usize;
13613 let message_scan_sql = if use_message_metrics {
13614 "SELECT m.idx, mm.content_chars
13615 FROM messages m
13616 JOIN message_metrics mm ON mm.message_id = m.id
13617 WHERE m.conversation_id = ?1
13618 AND m.idx > ?2
13619 ORDER BY m.conversation_id, m.idx
13620 LIMIT ?3"
13621 } else {
13622 "SELECT m.idx, COALESCE(LENGTH(CAST(m.content AS BLOB)), 0)
13623 FROM messages m
13624 WHERE m.conversation_id = ?1
13625 AND m.idx > ?2
13626 ORDER BY m.conversation_id, m.idx
13627 LIMIT ?3"
13628 };
13629
13630 loop {
13631 let conversation_rows = match self.conn.query_with_params(
13637 "SELECT c.id, c.started_at,
13638 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown'),
13639 c.source_id
13640 FROM conversations c
13641 WHERE c.id > ?1
13642 ORDER BY c.id
13643 LIMIT ?2",
13644 ¶ms_from_iter([
13645 ParamValue::from(last_conversation_id),
13646 ParamValue::from(conversation_batch_size as i64),
13647 ]),
13648 ) {
13649 Ok(rows) => rows,
13650 Err(err) if is_out_of_memory_error(&err) && conversation_batch_size > 1 => {
13651 let previous_batch_size = conversation_batch_size;
13652 conversation_batch_size = (conversation_batch_size / 2).max(1);
13653 tracing::warn!(
13654 previous_batch_size,
13655 conversation_batch_size,
13656 last_conversation_id,
13657 "daily_stats conversation scan ran out of memory; retrying with smaller batch"
13658 );
13659 continue;
13660 }
13661 Err(err) => return Err(err.into()),
13662 };
13663 if conversation_rows.is_empty() {
13664 break;
13665 }
13666
13667 let mut aggregate = StatsAggregator::new();
13668 let mut conversation_batch_meta: Vec<(i64, i64, String, String)> =
13669 Vec::with_capacity(conversation_rows.len());
13670 for row in &conversation_rows {
13671 let conversation_id: i64 = row.get_typed(0)?;
13672 let started_at: Option<i64> = row.get_typed(1)?;
13673 let agent_slug: String = row.get_typed(2)?;
13674 let source_id: String = row.get_typed(3)?;
13675 last_conversation_id = conversation_id;
13676 let day_id = started_at.map(Self::day_id_from_millis).unwrap_or(0);
13677 aggregate.record_delta(&agent_slug, &source_id, day_id, 1, 0, 0);
13678 conversation_batch_meta.push((conversation_id, day_id, agent_slug, source_id));
13679 conversations_processed += 1;
13680 }
13681
13682 conversation_batch_count += 1;
13683 raw_entries_flushed += aggregate.raw_entry_count();
13684 let entries = aggregate.expand();
13685 expanded_entries_flushed += entries.len();
13686 if !entries.is_empty() {
13687 franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
13688 }
13689 if conversation_batch_count.is_multiple_of(25) {
13690 tracing::info!(
13691 target: "cass::perf::daily_stats",
13692 conversations_processed,
13693 batches = conversation_batch_count,
13694 batch_size = conversation_batch_size,
13695 last_conversation_id,
13696 "daily_stats rebuild conversation scan progress"
13697 );
13698 }
13699 if conversation_batch_meta.is_empty() {
13700 continue;
13701 }
13702
13703 for (conversation_id, day_id, agent_slug, source_id) in conversation_batch_meta {
13704 let mut cursor_message_idx = -1_i64;
13705 loop {
13706 let message_rows = match self.conn.query_with_params(
13707 message_scan_sql,
13708 ¶ms_from_iter([
13709 ParamValue::from(conversation_id),
13710 ParamValue::from(cursor_message_idx),
13711 ParamValue::from(message_batch_size as i64),
13712 ]),
13713 ) {
13714 Ok(rows) => rows,
13715 Err(err) if is_out_of_memory_error(&err) && message_batch_size > 1 => {
13716 let previous_batch_size = message_batch_size;
13717 message_batch_size = (message_batch_size / 2).max(1);
13718 tracing::warn!(
13719 previous_batch_size,
13720 message_batch_size,
13721 conversation_id,
13722 cursor_message_idx,
13723 "daily_stats message scan ran out of memory; retrying with smaller batch"
13724 );
13725 continue;
13726 }
13727 Err(err) => return Err(err.into()),
13728 };
13729 if message_rows.is_empty() {
13730 break;
13731 }
13732
13733 let mut aggregate = StatsAggregator::new();
13734 for row in &message_rows {
13735 let message_idx: i64 = row.get_typed(0)?;
13736 let content_len: i64 = row.get_typed(1)?;
13737 cursor_message_idx = message_idx;
13738 aggregate.record_delta(&agent_slug, &source_id, day_id, 0, 1, content_len);
13739 messages_processed += 1;
13740 }
13741
13742 message_batch_count += 1;
13743 raw_entries_flushed += aggregate.raw_entry_count();
13744 let entries = aggregate.expand();
13745 expanded_entries_flushed += entries.len();
13746 if !entries.is_empty() {
13747 franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
13748 }
13749 if message_batch_count.is_multiple_of(50) {
13750 tracing::info!(
13751 target: "cass::perf::daily_stats",
13752 messages_processed,
13753 batches = message_batch_count,
13754 batch_size = message_batch_size,
13755 source = if use_message_metrics {
13756 "message_metrics"
13757 } else {
13758 "messages"
13759 },
13760 conversation_id,
13761 cursor_message_idx,
13762 "daily_stats rebuild message scan progress"
13763 );
13764 }
13765 }
13766 }
13767 }
13768
13769 let rows_created: i64 =
13770 tx.query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
13771 row.get_typed(0)
13772 })?;
13773 let total_sessions: i64 = tx.query_row_map(
13774 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
13775 fparams![],
13776 |row| row.get_typed(0),
13777 )?;
13778
13779 tx.commit()?;
13780
13781 tracing::info!(
13782 target: "cass::perf::daily_stats",
13783 rows_created,
13784 total_sessions,
13785 conversations_processed,
13786 conversation_batches = conversation_batch_count,
13787 conversation_batch_size,
13788 message_batches = message_batch_count,
13789 message_batch_size,
13790 messages_processed,
13791 use_message_metrics,
13792 raw_entries_flushed,
13793 expanded_entries_flushed,
13794 "Daily stats rebuilt from conversations"
13795 );
13796
13797 Ok(DailyStatsRebuildResult {
13798 rows_created,
13799 total_sessions,
13800 })
13801 }
13802}
13803
13804#[derive(Debug, Default)]
13831pub struct IndexingCache {
13832 agent_ids: HashMap<String, i64>,
13833 workspace_ids: HashMap<PathBuf, i64>,
13834 hits: u64,
13835 misses: u64,
13836}
13837
13838pub trait IndexingCacheStorage {
13839 fn ensure_indexing_agent(&self, agent: &Agent) -> Result<i64>;
13840 fn ensure_indexing_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64>;
13841}
13842
13843impl IndexingCacheStorage for FrankenStorage {
13844 fn ensure_indexing_agent(&self, agent: &Agent) -> Result<i64> {
13845 self.ensure_agent(agent)
13846 }
13847
13848 fn ensure_indexing_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64> {
13849 self.ensure_workspace(path, display_name)
13850 }
13851}
13852
13853impl IndexingCache {
13856 pub fn new() -> Self {
13858 Self {
13859 agent_ids: HashMap::new(),
13860 workspace_ids: HashMap::new(),
13861 hits: 0,
13862 misses: 0,
13863 }
13864 }
13865
13866 pub fn is_enabled() -> bool {
13869 dotenvy::var("CASS_SQLITE_CACHE")
13870 .map(|v| v != "0" && v.to_lowercase() != "false")
13871 .unwrap_or(true)
13872 }
13873
13874 pub fn get_or_insert_agent<S>(&mut self, storage: &S, agent: &Agent) -> Result<i64>
13879 where
13880 S: IndexingCacheStorage + ?Sized,
13881 {
13882 if let Some(&cached) = self.agent_ids.get(&agent.slug) {
13883 self.hits += 1;
13884 return Ok(cached);
13885 }
13886
13887 self.misses += 1;
13888 let id = storage.ensure_indexing_agent(agent)?;
13889 self.agent_ids.insert(agent.slug.clone(), id);
13890 Ok(id)
13891 }
13892
13893 pub fn get_or_insert_workspace(
13898 &mut self,
13899 storage: &(impl IndexingCacheStorage + ?Sized),
13900 path: &Path,
13901 display_name: Option<&str>,
13902 ) -> Result<i64> {
13903 if let Some(&cached) = self.workspace_ids.get(path) {
13904 self.hits += 1;
13905 return Ok(cached);
13906 }
13907
13908 self.misses += 1;
13909 let id = storage.ensure_indexing_workspace(path, display_name)?;
13910 self.workspace_ids.insert(path.to_path_buf(), id);
13911 Ok(id)
13912 }
13913
13914 pub fn stats(&self) -> (u64, u64, f64) {
13916 let total = self.hits + self.misses;
13917 let hit_rate = if total > 0 {
13918 self.hits as f64 / total as f64
13919 } else {
13920 0.0
13921 };
13922 (self.hits, self.misses, hit_rate)
13923 }
13924
13925 pub fn clear(&mut self) {
13927 self.agent_ids.clear();
13928 self.workspace_ids.clear();
13929 self.hits = 0;
13930 self.misses = 0;
13931 }
13932
13933 pub fn agent_count(&self) -> usize {
13935 self.agent_ids.len()
13936 }
13937
13938 pub fn workspace_count(&self) -> usize {
13940 self.workspace_ids.len()
13941 }
13942}
13943
13944#[derive(Clone, Copy, Debug, Default)]
13953pub struct StatsDelta {
13954 pub session_count_delta: i64,
13955 pub message_count_delta: i64,
13956 pub total_chars_delta: i64,
13957}
13958
13959#[derive(Debug, Default)]
13975pub struct StatsAggregator {
13976 deltas: HashMap<(i64, String, String), StatsDelta>,
13979}
13980
13981impl StatsAggregator {
13982 pub fn new() -> Self {
13984 Self {
13985 deltas: HashMap::new(),
13986 }
13987 }
13988
13989 pub fn record(
14000 &mut self,
14001 agent_slug: &str,
14002 source_id: &str,
14003 day_id: i64,
14004 message_count: i64,
14005 total_chars: i64,
14006 ) {
14007 self.record_delta(agent_slug, source_id, day_id, 1, message_count, total_chars);
14008 }
14009
14010 pub fn record_delta(
14013 &mut self,
14014 agent_slug: &str,
14015 source_id: &str,
14016 day_id: i64,
14017 session_count_delta: i64,
14018 message_count_delta: i64,
14019 total_chars_delta: i64,
14020 ) {
14021 if session_count_delta == 0 && message_count_delta == 0 && total_chars_delta == 0 {
14022 return;
14023 }
14024 let key = (day_id, agent_slug.to_owned(), source_id.to_owned());
14025 let delta = self.deltas.entry(key).or_default();
14026 delta.session_count_delta += session_count_delta;
14027 delta.message_count_delta += message_count_delta;
14028 delta.total_chars_delta += total_chars_delta;
14029 }
14030
14031 pub fn expand(&self) -> Vec<(i64, String, String, StatsDelta)> {
14039 let mut expanded: HashMap<(i64, String, String), StatsDelta> = HashMap::new();
14040
14041 for ((day_id, agent, source), delta) in &self.deltas {
14042 let permutations = [
14043 (agent.as_str(), source.as_str()),
14044 ("all", source.as_str()),
14045 (agent.as_str(), "all"),
14046 ("all", "all"),
14047 ];
14048
14049 for idx in 0..permutations.len() {
14051 let (a, s) = permutations[idx];
14052 if permutations[..idx].contains(&(a, s)) {
14053 continue;
14054 }
14055 let key = (*day_id, a.to_owned(), s.to_owned());
14056 let entry = expanded.entry(key).or_default();
14057 entry.session_count_delta += delta.session_count_delta;
14058 entry.message_count_delta += delta.message_count_delta;
14059 entry.total_chars_delta += delta.total_chars_delta;
14060 }
14061 }
14062
14063 let mut out: Vec<(i64, String, String, StatsDelta)> = expanded
14064 .into_iter()
14065 .map(|((d, a, s), delta)| (d, a, s, delta))
14066 .collect();
14067 out.sort_by(|(d1, a1, s1, _), (d2, a2, s2, _)| {
14068 d1.cmp(d2).then_with(|| a1.cmp(a2)).then_with(|| s1.cmp(s2))
14069 });
14070 out
14071 }
14072
14073 pub fn is_empty(&self) -> bool {
14075 self.deltas.is_empty()
14076 }
14077
14078 pub fn raw_entry_count(&self) -> usize {
14080 self.deltas.len()
14081 }
14082}
14083
14084#[derive(Clone, Debug, Default)]
14093pub struct TokenStatsDelta {
14094 pub api_call_count: i64,
14095 pub user_message_count: i64,
14096 pub assistant_message_count: i64,
14097 pub tool_message_count: i64,
14098 pub total_input_tokens: i64,
14099 pub total_output_tokens: i64,
14100 pub total_cache_read_tokens: i64,
14101 pub total_cache_creation_tokens: i64,
14102 pub total_thinking_tokens: i64,
14103 pub grand_total_tokens: i64,
14104 pub total_content_chars: i64,
14105 pub total_tool_calls: i64,
14106 pub estimated_cost_usd: f64,
14107 pub session_count: i64,
14108}
14109
14110#[derive(Debug, Default)]
14116pub struct TokenStatsAggregator {
14117 deltas: HashMap<(i64, String, String, String), TokenStatsDelta>,
14119}
14120
14121impl TokenStatsAggregator {
14122 pub fn new() -> Self {
14123 Self {
14124 deltas: HashMap::new(),
14125 }
14126 }
14127
14128 #[allow(clippy::too_many_arguments)]
14130 pub fn record(
14131 &mut self,
14132 agent_slug: &str,
14133 source_id: &str,
14134 day_id: i64,
14135 model_family: &str,
14136 role: &str,
14137 usage: &crate::connectors::ExtractedTokenUsage,
14138 content_chars: i64,
14139 estimated_cost_usd: f64,
14140 ) {
14141 let key = (
14142 day_id,
14143 agent_slug.to_owned(),
14144 source_id.to_owned(),
14145 model_family.to_owned(),
14146 );
14147 let delta = self.deltas.entry(key).or_default();
14148
14149 delta.api_call_count += 1;
14150 match role {
14151 "user" => delta.user_message_count += 1,
14152 "assistant" | "agent" => delta.assistant_message_count += 1,
14153 "tool" => delta.tool_message_count += 1,
14154 _ => {}
14155 }
14156
14157 delta.total_input_tokens += usage.input_tokens.unwrap_or(0);
14158 delta.total_output_tokens += usage.output_tokens.unwrap_or(0);
14159 delta.total_cache_read_tokens += usage.cache_read_tokens.unwrap_or(0);
14160 delta.total_cache_creation_tokens += usage.cache_creation_tokens.unwrap_or(0);
14161 delta.total_thinking_tokens += usage.thinking_tokens.unwrap_or(0);
14162 delta.grand_total_tokens += usage.total_tokens().unwrap_or(0);
14163 delta.total_content_chars += content_chars;
14164 delta.total_tool_calls += usage.tool_call_count as i64;
14165 delta.estimated_cost_usd += estimated_cost_usd;
14166 }
14167
14168 pub fn record_session(
14170 &mut self,
14171 agent_slug: &str,
14172 source_id: &str,
14173 day_id: i64,
14174 model_family: &str,
14175 ) {
14176 let key = (
14177 day_id,
14178 agent_slug.to_owned(),
14179 source_id.to_owned(),
14180 model_family.to_owned(),
14181 );
14182 self.deltas.entry(key).or_default().session_count += 1;
14183 }
14184
14185 pub fn expand(&self) -> Vec<(i64, String, String, String, TokenStatsDelta)> {
14192 let mut expanded: HashMap<(i64, String, String, String), TokenStatsDelta> = HashMap::new();
14193
14194 for ((day_id, agent, source, model), delta) in &self.deltas {
14195 let permutations = [
14196 (agent.as_str(), source.as_str(), model.as_str()),
14197 ("all", source.as_str(), model.as_str()),
14198 (agent.as_str(), "all", model.as_str()),
14199 (agent.as_str(), source.as_str(), "all"),
14200 ("all", "all", "all"),
14201 ];
14202
14203 for idx in 0..permutations.len() {
14204 let (a, s, m) = permutations[idx];
14205 if permutations[..idx].contains(&(a, s, m)) {
14207 continue;
14208 }
14209 let key = (*day_id, a.to_owned(), s.to_owned(), m.to_owned());
14210 let entry = expanded.entry(key).or_default();
14211 entry.api_call_count += delta.api_call_count;
14212 entry.user_message_count += delta.user_message_count;
14213 entry.assistant_message_count += delta.assistant_message_count;
14214 entry.tool_message_count += delta.tool_message_count;
14215 entry.total_input_tokens += delta.total_input_tokens;
14216 entry.total_output_tokens += delta.total_output_tokens;
14217 entry.total_cache_read_tokens += delta.total_cache_read_tokens;
14218 entry.total_cache_creation_tokens += delta.total_cache_creation_tokens;
14219 entry.total_thinking_tokens += delta.total_thinking_tokens;
14220 entry.grand_total_tokens += delta.grand_total_tokens;
14221 entry.total_content_chars += delta.total_content_chars;
14222 entry.total_tool_calls += delta.total_tool_calls;
14223 entry.estimated_cost_usd += delta.estimated_cost_usd;
14224 entry.session_count += delta.session_count;
14225 }
14226 }
14227
14228 let mut out: Vec<(i64, String, String, String, TokenStatsDelta)> = expanded
14229 .into_iter()
14230 .map(|((d, a, s, m), delta)| (d, a, s, m, delta))
14231 .collect();
14232 out.sort_by(|(d1, a1, s1, m1, _), (d2, a2, s2, m2, _)| {
14233 d1.cmp(d2)
14234 .then_with(|| a1.cmp(a2))
14235 .then_with(|| s1.cmp(s2))
14236 .then_with(|| m1.cmp(m2))
14237 });
14238 out
14239 }
14240
14241 pub fn is_empty(&self) -> bool {
14242 self.deltas.is_empty()
14243 }
14244
14245 pub fn raw_entry_count(&self) -> usize {
14246 self.deltas.len()
14247 }
14248}
14249
14250#[derive(Clone, Debug, Default)]
14258pub struct UsageRollupDelta {
14259 pub message_count: i64,
14260 pub user_message_count: i64,
14261 pub assistant_message_count: i64,
14262 pub tool_call_count: i64,
14263 pub plan_message_count: i64,
14264 pub plan_content_tokens_est_total: i64,
14265 pub plan_api_tokens_total: i64,
14266 pub api_coverage_message_count: i64,
14267 pub content_tokens_est_total: i64,
14268 pub content_tokens_est_user: i64,
14269 pub content_tokens_est_assistant: i64,
14270 pub api_tokens_total: i64,
14271 pub api_input_tokens_total: i64,
14272 pub api_output_tokens_total: i64,
14273 pub api_cache_read_tokens_total: i64,
14274 pub api_cache_creation_tokens_total: i64,
14275 pub api_thinking_tokens_total: i64,
14276}
14277
14278#[derive(Debug, Clone)]
14280pub struct MessageMetricsEntry {
14281 pub message_id: i64,
14282 pub created_at_ms: i64,
14283 pub hour_id: i64,
14284 pub day_id: i64,
14285 pub agent_slug: String,
14286 pub workspace_id: i64,
14287 pub source_id: String,
14288 pub role: String,
14289 pub content_chars: i64,
14290 pub content_tokens_est: i64,
14291 pub model_name: Option<String>,
14292 pub model_family: String,
14293 pub model_tier: String,
14294 pub provider: String,
14295 pub api_input_tokens: Option<i64>,
14296 pub api_output_tokens: Option<i64>,
14297 pub api_cache_read_tokens: Option<i64>,
14298 pub api_cache_creation_tokens: Option<i64>,
14299 pub api_thinking_tokens: Option<i64>,
14300 pub api_service_tier: Option<String>,
14301 pub api_data_source: String,
14302 pub tool_call_count: i64,
14303 pub has_tool_calls: bool,
14304 pub has_plan: bool,
14305}
14306
14307#[derive(Debug, Default)]
14312pub struct AnalyticsRollupAggregator {
14313 hourly: HashMap<(i64, String, i64, String), UsageRollupDelta>,
14314 daily: HashMap<(i64, String, i64, String), UsageRollupDelta>,
14315 models_daily: HashMap<(i64, String, i64, String, String, String), UsageRollupDelta>,
14316}
14317
14318impl AnalyticsRollupAggregator {
14319 pub fn new() -> Self {
14320 Self::default()
14321 }
14322
14323 pub fn record(&mut self, entry: &MessageMetricsEntry) {
14325 let content_est = entry.content_tokens_est;
14326 let api_total = entry.api_input_tokens.unwrap_or(0)
14327 + entry.api_output_tokens.unwrap_or(0)
14328 + entry.api_cache_read_tokens.unwrap_or(0)
14329 + entry.api_cache_creation_tokens.unwrap_or(0)
14330 + entry.api_thinking_tokens.unwrap_or(0);
14331 let is_api = entry.api_data_source == "api";
14332 let is_user = entry.role == "user";
14333 let is_assistant = entry.role == "assistant" || entry.role == "agent";
14334
14335 for (map, bucket_id) in [
14337 (&mut self.hourly, entry.hour_id),
14338 (&mut self.daily, entry.day_id),
14339 ] {
14340 let key = (
14341 bucket_id,
14342 entry.agent_slug.clone(),
14343 entry.workspace_id,
14344 entry.source_id.clone(),
14345 );
14346 let d = map.entry(key).or_default();
14347 d.message_count += 1;
14348 if is_user {
14349 d.user_message_count += 1;
14350 d.content_tokens_est_user += content_est;
14351 }
14352 if is_assistant {
14353 d.assistant_message_count += 1;
14354 d.content_tokens_est_assistant += content_est;
14355 }
14356 d.tool_call_count += entry.tool_call_count;
14357 if entry.has_plan {
14358 d.plan_message_count += 1;
14359 d.plan_content_tokens_est_total += content_est;
14360 if is_api {
14361 d.plan_api_tokens_total += api_total;
14362 }
14363 }
14364 if is_api {
14365 d.api_coverage_message_count += 1;
14366 d.api_tokens_total += api_total;
14367 d.api_input_tokens_total += entry.api_input_tokens.unwrap_or(0);
14368 d.api_output_tokens_total += entry.api_output_tokens.unwrap_or(0);
14369 d.api_cache_read_tokens_total += entry.api_cache_read_tokens.unwrap_or(0);
14370 d.api_cache_creation_tokens_total += entry.api_cache_creation_tokens.unwrap_or(0);
14371 d.api_thinking_tokens_total += entry.api_thinking_tokens.unwrap_or(0);
14372 }
14373 d.content_tokens_est_total += content_est;
14374 }
14375
14376 let model_key = (
14377 entry.day_id,
14378 entry.agent_slug.clone(),
14379 entry.workspace_id,
14380 entry.source_id.clone(),
14381 entry.model_family.clone(),
14382 entry.model_tier.clone(),
14383 );
14384 let d = self.models_daily.entry(model_key).or_default();
14385 d.message_count += 1;
14386 if is_user {
14387 d.user_message_count += 1;
14388 d.content_tokens_est_user += content_est;
14389 }
14390 if is_assistant {
14391 d.assistant_message_count += 1;
14392 d.content_tokens_est_assistant += content_est;
14393 }
14394 d.tool_call_count += entry.tool_call_count;
14395 if entry.has_plan {
14396 d.plan_message_count += 1;
14397 d.plan_content_tokens_est_total += content_est;
14398 if is_api {
14399 d.plan_api_tokens_total += api_total;
14400 }
14401 }
14402 if is_api {
14403 d.api_coverage_message_count += 1;
14404 d.api_tokens_total += api_total;
14405 d.api_input_tokens_total += entry.api_input_tokens.unwrap_or(0);
14406 d.api_output_tokens_total += entry.api_output_tokens.unwrap_or(0);
14407 d.api_cache_read_tokens_total += entry.api_cache_read_tokens.unwrap_or(0);
14408 d.api_cache_creation_tokens_total += entry.api_cache_creation_tokens.unwrap_or(0);
14409 d.api_thinking_tokens_total += entry.api_thinking_tokens.unwrap_or(0);
14410 }
14411 d.content_tokens_est_total += content_est;
14412 }
14413
14414 pub fn is_empty(&self) -> bool {
14415 self.hourly.is_empty() && self.daily.is_empty() && self.models_daily.is_empty()
14416 }
14417
14418 pub fn hourly_entry_count(&self) -> usize {
14419 self.hourly.len()
14420 }
14421
14422 pub fn daily_entry_count(&self) -> usize {
14423 self.daily.len()
14424 }
14425
14426 pub fn models_daily_entry_count(&self) -> usize {
14427 self.models_daily.len()
14428 }
14429}
14430
14431fn has_plan_for_role(role: &str, content: &str) -> bool {
14435 let role = role.trim();
14436 (role.eq_ignore_ascii_case("assistant") || role.eq_ignore_ascii_case("agent"))
14437 && has_plan_heuristic(content)
14438}
14439
14440fn has_plan_heuristic(content: &str) -> bool {
14447 if content.len() < 24 {
14448 return false;
14449 }
14450
14451 let lower = content.to_lowercase();
14452
14453 let looks_like_tool_blob = lower.contains("```")
14455 || lower.contains("\"tool\"")
14456 || lower.contains("stdout:")
14457 || lower.contains("stderr:")
14458 || lower.contains("exit code:");
14459
14460 let mut lines: Vec<&str> = Vec::with_capacity(60);
14461 let mut in_fenced_code = false;
14462 for raw in lower.lines() {
14463 let line = raw.trim();
14464 if line.starts_with("```") {
14465 in_fenced_code = !in_fenced_code;
14466 continue;
14467 }
14468 if in_fenced_code || line.is_empty() {
14469 continue;
14470 }
14471 lines.push(line);
14472 if lines.len() >= 60 {
14473 break;
14474 }
14475 }
14476
14477 let header_pos = lines.iter().position(|line| {
14478 line.starts_with("## plan")
14479 || line.starts_with("# plan")
14480 || line.starts_with("plan:")
14481 || line.starts_with("implementation plan")
14482 || line.starts_with("next steps:")
14483 || line.starts_with("action plan:")
14484 });
14485 let preview_top = lines.iter().take(8).copied().collect::<Vec<_>>().join("\n");
14486 let header_near_top = header_pos.is_some_and(|idx| idx <= 6) || preview_top.contains("plan:");
14487
14488 if !header_near_top {
14489 return false;
14490 }
14491 if looks_like_tool_blob && header_pos.is_none() {
14492 return false;
14493 }
14494
14495 let numbered_steps = lines
14496 .iter()
14497 .filter(|line| is_numbered_step_line(line))
14498 .count();
14499 let bullet_steps = lines
14500 .iter()
14501 .filter(|line| {
14502 line.starts_with("- ")
14503 || line.starts_with("* ")
14504 || line.starts_with("+ ")
14505 || line.starts_with("- [ ] ")
14506 || line.starts_with("- [x] ")
14507 })
14508 .count();
14509
14510 numbered_steps >= 2 || (numbered_steps >= 1 && bullet_steps >= 1) || bullet_steps >= 3
14511}
14512
14513fn is_numbered_step_line(line: &str) -> bool {
14514 let trimmed = line.trim_start();
14515 let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
14516 if digit_count == 0 || digit_count > 3 {
14517 return false;
14518 }
14519 let rest = &trimmed[digit_count..];
14520 rest.starts_with(". ") || rest.starts_with(") ")
14521}
14522
14523#[derive(Debug, Clone)]
14525pub struct TokenUsageEntry {
14526 pub message_id: i64,
14527 pub conversation_id: i64,
14528 pub agent_id: i64,
14529 pub workspace_id: Option<i64>,
14530 pub source_id: String,
14531 pub timestamp_ms: i64,
14532 pub day_id: i64,
14533 pub model_name: Option<String>,
14534 pub model_family: Option<String>,
14535 pub model_tier: Option<String>,
14536 pub service_tier: Option<String>,
14537 pub provider: Option<String>,
14538 pub input_tokens: Option<i64>,
14539 pub output_tokens: Option<i64>,
14540 pub cache_read_tokens: Option<i64>,
14541 pub cache_creation_tokens: Option<i64>,
14542 pub thinking_tokens: Option<i64>,
14543 pub total_tokens: Option<i64>,
14544 pub estimated_cost_usd: Option<f64>,
14545 pub role: String,
14546 pub content_chars: i64,
14547 pub has_tool_calls: bool,
14548 pub tool_call_count: u32,
14549 pub data_source: String,
14550}
14551
14552#[derive(Debug, Clone)]
14558pub struct PricingEntry {
14559 pub model_pattern: String,
14560 pub provider: String,
14561 pub input_cost_per_mtok: f64,
14562 pub output_cost_per_mtok: f64,
14563 pub cache_read_cost_per_mtok: Option<f64>,
14564 pub cache_creation_cost_per_mtok: Option<f64>,
14565 pub effective_day_id: i64,
14567}
14568
14569#[derive(Debug, Clone, Default)]
14571pub struct PricingDiagnostics {
14572 pub priced_count: u64,
14573 pub unpriced_count: u64,
14574 pub unknown_models: HashMap<String, u64>,
14576}
14577
14578impl PricingDiagnostics {
14579 fn record_priced(&mut self) {
14580 self.priced_count += 1;
14581 }
14582
14583 fn record_unpriced(&mut self, model_name: Option<&str>) {
14584 self.unpriced_count += 1;
14585 let key = model_name.unwrap_or("(none)").to_string();
14586 *self.unknown_models.entry(key).or_insert(0) += 1;
14587 }
14588
14589 pub fn log_summary(&self) {
14591 let total = self.priced_count + self.unpriced_count;
14592 if total == 0 {
14593 return;
14594 }
14595 let pct = (self.priced_count as f64 / total as f64) * 100.0;
14596 tracing::info!(
14597 target: "cass::analytics::pricing",
14598 priced = self.priced_count,
14599 unpriced = self.unpriced_count,
14600 total = total,
14601 coverage_pct = format!("{pct:.1}%"),
14602 "pricing coverage"
14603 );
14604 if !self.unknown_models.is_empty() {
14605 let mut sorted: Vec<_> = self.unknown_models.iter().collect();
14606 sorted.sort_by(|a, b| b.1.cmp(a.1));
14607 for (model, count) in sorted.iter().take(5) {
14608 tracing::debug!(
14609 target: "cass::analytics::pricing",
14610 model = model.as_str(),
14611 count = count,
14612 "unknown model (no pricing)"
14613 );
14614 }
14615 }
14616 }
14617}
14618
14619#[derive(Debug, Clone)]
14621pub struct PricingTable {
14622 entries: Vec<PricingEntry>,
14623}
14624
14625impl PricingTable {
14626 pub fn load(conn: &FrankenConnection) -> Result<Self> {
14628 Self::franken_load(conn)
14629 }
14630
14631 pub fn franken_load(conn: &FrankenConnection) -> Result<Self> {
14633 let rows = conn.query(
14634 "SELECT model_pattern, provider, input_cost_per_mtok, output_cost_per_mtok,
14635 cache_read_cost_per_mtok, cache_creation_cost_per_mtok, effective_date
14636 FROM model_pricing
14637 ORDER BY effective_date DESC",
14638 )?;
14639 let mut entries = Vec::with_capacity(rows.len());
14640 for row in &rows {
14641 let effective_date: String = row.get_typed(6)?;
14642 let effective_day_id = date_str_to_day_id(&effective_date)?;
14643 entries.push(PricingEntry {
14644 model_pattern: row.get_typed(0)?,
14645 provider: row.get_typed(1)?,
14646 input_cost_per_mtok: row.get_typed(2)?,
14647 output_cost_per_mtok: row.get_typed(3)?,
14648 cache_read_cost_per_mtok: row.get_typed(4)?,
14649 cache_creation_cost_per_mtok: row.get_typed(5)?,
14650 effective_day_id,
14651 });
14652 }
14653 Ok(Self { entries })
14654 }
14655
14656 pub fn lookup(&self, model_name: &str, message_day_id: i64) -> Option<&PricingEntry> {
14664 let mut best: Option<&PricingEntry> = None;
14665
14666 for entry in &self.entries {
14667 if entry.effective_day_id > message_day_id {
14668 continue;
14669 }
14670 if !sql_like_match(model_name, &entry.model_pattern) {
14671 continue;
14672 }
14673
14674 match best {
14675 None => best = Some(entry),
14676 Some(current) => {
14677 if entry.effective_day_id > current.effective_day_id
14678 || (entry.effective_day_id == current.effective_day_id
14679 && entry.model_pattern.len() > current.model_pattern.len())
14680 {
14681 best = Some(entry);
14682 }
14683 }
14684 }
14685 }
14686
14687 best
14688 }
14689
14690 pub fn compute_cost(
14694 &self,
14695 model_name: Option<&str>,
14696 message_day_id: i64,
14697 input_tokens: Option<i64>,
14698 output_tokens: Option<i64>,
14699 cache_read_tokens: Option<i64>,
14700 cache_creation_tokens: Option<i64>,
14701 ) -> Option<f64> {
14702 let model = model_name?;
14703 let pricing = self.lookup(model, message_day_id)?;
14704
14705 if input_tokens.is_none() && output_tokens.is_none() {
14706 return None;
14707 }
14708
14709 let mut cost = 0.0;
14710 let cache_read = cache_read_tokens.unwrap_or(0);
14711 let cache_creation = cache_creation_tokens.unwrap_or(0);
14712 let non_cache_input = input_tokens
14715 .unwrap_or(0)
14716 .saturating_sub(cache_read)
14717 .saturating_sub(cache_creation)
14718 .max(0);
14719 cost += non_cache_input as f64 * pricing.input_cost_per_mtok / 1_000_000.0;
14720 cost += output_tokens.unwrap_or(0) as f64 * pricing.output_cost_per_mtok / 1_000_000.0;
14721
14722 if let Some(cache_price) = pricing.cache_read_cost_per_mtok {
14723 cost += cache_read as f64 * cache_price / 1_000_000.0;
14724 }
14725 if let Some(cache_price) = pricing.cache_creation_cost_per_mtok {
14726 cost += cache_creation as f64 * cache_price / 1_000_000.0;
14727 }
14728
14729 Some(cost)
14730 }
14731
14732 pub fn is_empty(&self) -> bool {
14734 self.entries.is_empty()
14735 }
14736}
14737
14738fn date_str_to_day_id(s: &str) -> Result<i64> {
14741 use chrono::NaiveDate;
14742 const EPOCH_2020: NaiveDate = match NaiveDate::from_ymd_opt(2020, 1, 1) {
14743 Some(d) => d,
14744 None => unreachable!(),
14745 };
14746 NaiveDate::parse_from_str(s, "%Y-%m-%d")
14747 .map(|d| (d - EPOCH_2020).num_days())
14748 .with_context(|| format!("invalid effective_date '{s}'"))
14749}
14750
14751fn sql_like_match(value: &str, pattern: &str) -> bool {
14753 sql_like_match_bytes(
14754 value.to_ascii_lowercase().as_bytes(),
14755 pattern.to_ascii_lowercase().as_bytes(),
14756 )
14757}
14758
14759fn utf8_char_len(b: u8) -> usize {
14761 if b < 0x80 {
14762 1
14763 } else if b < 0xE0 {
14764 2
14765 } else if b < 0xF0 {
14766 3
14767 } else {
14768 4
14769 }
14770}
14771
14772fn sql_like_match_bytes(val: &[u8], pat: &[u8]) -> bool {
14773 if pat.is_empty() {
14774 return val.is_empty();
14775 }
14776 match pat[0] {
14777 b'%' => {
14778 let mut p = 1;
14779 while p < pat.len() && pat[p] == b'%' {
14780 p += 1;
14781 }
14782 let rest = &pat[p..];
14783 let mut i = 0;
14785 while i <= val.len() {
14786 if sql_like_match_bytes(&val[i..], rest) {
14787 return true;
14788 }
14789 if i < val.len() {
14790 i += utf8_char_len(val[i]);
14791 } else {
14792 break;
14793 }
14794 }
14795 false
14796 }
14797 b'_' => {
14798 if val.is_empty() {
14800 return false;
14801 }
14802 let char_len = utf8_char_len(val[0]);
14803 val.len() >= char_len && sql_like_match_bytes(&val[char_len..], &pat[1..])
14804 }
14805 c => !val.is_empty() && val[0] == c && sql_like_match_bytes(&val[1..], &pat[1..]),
14806 }
14807}
14808
14809fn rebuild_batch_size_env(var: &str, default: usize) -> usize {
14810 dotenvy::var(var)
14811 .ok()
14812 .and_then(|raw| raw.parse::<usize>().ok())
14813 .filter(|value| *value > 0)
14814 .unwrap_or(default)
14815}
14816
14817fn is_out_of_memory_error<E: OutOfMemoryProbe + ?Sized>(err: &E) -> bool {
14827 err.is_out_of_memory()
14828}
14829
14830trait OutOfMemoryProbe {
14831 fn is_out_of_memory(&self) -> bool;
14832}
14833
14834impl OutOfMemoryProbe for anyhow::Error {
14835 fn is_out_of_memory(&self) -> bool {
14836 self.chain().any(|cause| {
14837 if cause
14838 .downcast_ref::<frankensqlite::FrankenError>()
14839 .is_some_and(|err| matches!(err, frankensqlite::FrankenError::OutOfMemory))
14840 {
14841 return true;
14842 }
14843 is_exact_out_of_memory_message(&cause.to_string())
14844 })
14845 }
14846}
14847
14848impl OutOfMemoryProbe for frankensqlite::FrankenError {
14849 fn is_out_of_memory(&self) -> bool {
14850 matches!(self, frankensqlite::FrankenError::OutOfMemory)
14851 }
14852}
14853
14854fn is_exact_out_of_memory_message(message: &str) -> bool {
14855 matches!(
14856 message.trim().to_ascii_lowercase().as_str(),
14857 "out of memory" | "not enough memory"
14858 )
14859}
14860
14861#[derive(Debug, Clone)]
14867pub struct DailyCount {
14868 pub day_id: i64,
14869 pub sessions: i64,
14870 pub messages: i64,
14871 pub chars: i64,
14872}
14873
14874#[derive(Debug, Clone)]
14876pub struct AnalyticsRebuildResult {
14877 pub message_metrics_rows: usize,
14878 pub usage_hourly_rows: usize,
14879 pub usage_daily_rows: usize,
14880 pub usage_models_daily_rows: usize,
14881 pub elapsed_ms: u64,
14882 pub messages_per_sec: f64,
14883}
14884
14885#[derive(Debug, Clone)]
14887pub struct DailyStatsRebuildResult {
14888 pub rows_created: i64,
14889 pub total_sessions: i64,
14890}
14891
14892#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
14894pub struct AgentArchivePurgeResult {
14895 pub conversations_deleted: usize,
14896 pub messages_deleted: usize,
14897}
14898
14899#[derive(Debug, Clone)]
14901pub struct DailyStatsHealth {
14902 pub populated: bool,
14903 pub row_count: i64,
14904 pub oldest_update_ms: Option<i64>,
14905 pub conversation_count: i64,
14906 pub materialized_total: i64,
14907 pub drift: i64,
14908}
14909
14910const FTS5_BATCH_SIZE: usize = 100;
14918
14919#[derive(Debug, Clone)]
14920struct FtsRebuildMessageRow {
14921 rowid: i64,
14922 message_id: i64,
14923 conversation_id: i64,
14924 content: String,
14925 created_at: Option<i64>,
14926}
14927
14928#[derive(Debug, Clone)]
14929struct FtsConversationProjection {
14930 title: String,
14931 agent_id: Option<i64>,
14932 workspace_id: Option<i64>,
14933 source_path: String,
14934}
14935
14936#[derive(Debug, Clone)]
14938pub struct FtsEntry {
14939 pub content: String,
14940 pub title: String,
14941 pub agent: String,
14942 pub workspace: String,
14943 pub source_path: String,
14944 pub created_at: Option<i64>,
14945 pub message_id: i64,
14946}
14947
14948impl FtsEntry {
14949 pub fn from_message(message_id: i64, msg: &Message, conv: &Conversation) -> Self {
14951 FtsEntry {
14952 content: msg.content.clone(),
14953 title: conv.title.clone().unwrap_or_default(),
14954 agent: conv.agent_slug.clone(),
14955 workspace: conv
14956 .workspace
14957 .as_ref()
14958 .map(|p| p.to_string_lossy().into_owned())
14959 .unwrap_or_default(),
14960 source_path: path_to_string(&conv.source_path),
14961 created_at: msg.created_at.or(conv.started_at),
14962 message_id,
14963 }
14964 }
14965}
14966
14967const FTS_ENTRY_BATCH_MAX_DOCS: usize = 512;
14968const FTS_ENTRY_BATCH_MAX_CHARS: usize = 1024 * 1024;
14969
14970const FTS_REBUILD_BATCH_SIZE_DEFAULT: usize = 5_000;
14975
14976fn fts_rebuild_batch_size() -> usize {
14979 dotenvy::var("CASS_FTS_REBUILD_BATCH_SIZE")
14980 .ok()
14981 .and_then(|v| v.parse::<usize>().ok())
14982 .filter(|&n| n > 0)
14983 .unwrap_or(FTS_REBUILD_BATCH_SIZE_DEFAULT)
14984}
14985
14986fn flush_pending_fts_entries(
14987 storage: &FrankenStorage,
14988 tx: &FrankenTransaction<'_>,
14989 entries: &mut Vec<FtsEntry>,
14990 pending_chars: &mut usize,
14991 inserted_total: &mut usize,
14992) -> Result<()> {
14993 if entries.is_empty() {
14994 return Ok(());
14995 }
14996
14997 if storage.fts_messages_present_cached(tx) {
14998 *inserted_total += franken_batch_insert_fts(tx, entries)?;
14999 }
15000 entries.clear();
15001 *pending_chars = 0;
15002 Ok(())
15003}
15004
15005fn path_to_string<P: AsRef<Path>>(p: P) -> String {
15006 p.as_ref().to_string_lossy().into_owned()
15007}
15008
15009fn role_str(role: &MessageRole) -> String {
15010 role_as_str(role).to_owned()
15011}
15012
15013fn role_as_str(role: &MessageRole) -> &str {
15014 match role {
15015 MessageRole::User => "user",
15016 MessageRole::Agent => "agent",
15017 MessageRole::Tool => "tool",
15018 MessageRole::System => "system",
15019 MessageRole::Other(v) => v.as_str(),
15020 }
15021}
15022
15023fn agent_kind_str(kind: AgentKind) -> String {
15024 match kind {
15025 AgentKind::Cli => "cli".into(),
15026 AgentKind::VsCode => "vscode".into(),
15027 AgentKind::Hybrid => "hybrid".into(),
15028 }
15029}
15030
15031#[cfg(test)]
15036mod tests {
15037 use super::*;
15038 use serial_test::serial;
15039 use tempfile::TempDir;
15040
15041 struct EnvGuard {
15042 key: &'static str,
15043 previous: Option<String>,
15044 }
15045
15046 impl Drop for EnvGuard {
15047 fn drop(&mut self) {
15048 if let Some(value) = &self.previous {
15049 unsafe {
15051 std::env::set_var(self.key, value);
15052 }
15053 } else {
15054 unsafe {
15056 std::env::remove_var(self.key);
15057 }
15058 }
15059 }
15060 }
15061
15062 fn set_env_var(key: &'static str, value: impl AsRef<str>) -> EnvGuard {
15063 let previous = dotenvy::var(key).ok();
15064 unsafe {
15066 std::env::set_var(key, value.as_ref());
15067 }
15068 EnvGuard { key, previous }
15069 }
15070
15071 #[test]
15072 fn doctor_mutation_open_guard_only_targets_canonical_archive_db() {
15073 let dir = TempDir::new().unwrap();
15074 let canonical = dir.path().join("agent_search.db");
15075 let scratch = dir.path().join("scratch.db");
15076
15077 assert_eq!(
15078 doctor_mutation_lock_path_for_db_open(&canonical),
15079 Some(dir.path().join("doctor/locks/doctor-repair.lock"))
15080 );
15081 assert_eq!(doctor_mutation_lock_path_for_db_open(&scratch), None);
15082 }
15083
15084 #[test]
15085 fn doctor_lock_metadata_pid_detection_is_exact() {
15086 let current = std::process::id();
15087
15088 assert!(doctor_lock_metadata_pid_is_current_process(&format!(
15089 "schema_version=1\npid={current}\nmode=safe_auto_run\n"
15090 )));
15091 assert!(!doctor_lock_metadata_pid_is_current_process(
15092 "schema_version=1\npid=not-a-pid\n"
15093 ));
15094 assert!(!doctor_lock_metadata_pid_is_current_process(&format!(
15095 "pid={}\n",
15096 current.saturating_add(1)
15097 )));
15098 }
15099
15100 #[test]
15101 fn doctor_storage_open_refuses_active_doctor_mutation_lock_from_other_process() {
15102 use std::io::Write as _;
15103
15104 let dir = TempDir::new().unwrap();
15105 let db_path = dir.path().join("agent_search.db");
15106 {
15107 let storage = FrankenStorage::open(&db_path).unwrap();
15108 storage.close().unwrap();
15109 }
15110
15111 let lock_path = doctor_mutation_lock_path_for_db_open(&db_path).unwrap();
15112 let mut lock_file = fs::OpenOptions::new()
15113 .create(true)
15114 .truncate(false)
15115 .read(true)
15116 .write(true)
15117 .open(&lock_path)
15118 .unwrap();
15119 fs2::FileExt::try_lock_exclusive(&lock_file).unwrap();
15120 lock_file.set_len(0).unwrap();
15121 lock_file.write_all(b"schema_version=1\npid=1\n").unwrap();
15122 lock_file.sync_all().unwrap();
15123
15124 let err =
15125 open_franken_raw_readonly_connection_with_timeout(&db_path, Duration::from_millis(25))
15126 .expect_err("active doctor mutation lock must block canonical DB opens");
15127 let message = err.to_string();
15128 assert!(
15129 message.contains("doctor mutation lock") && message.contains("active"),
15130 "error should identify the active doctor mutation lock: {message}"
15131 );
15132
15133 fs2::FileExt::unlock(&lock_file).unwrap();
15134 }
15135
15136 #[test]
15137 fn doctor_storage_open_allows_current_doctor_process_probe() {
15138 use std::io::Write as _;
15139
15140 let dir = TempDir::new().unwrap();
15141 let db_path = dir.path().join("agent_search.db");
15142 {
15143 let storage = FrankenStorage::open(&db_path).unwrap();
15144 storage.close().unwrap();
15145 }
15146
15147 let lock_path = doctor_mutation_lock_path_for_db_open(&db_path).unwrap();
15148 let mut lock_file = fs::OpenOptions::new()
15149 .create(true)
15150 .truncate(false)
15151 .read(true)
15152 .write(true)
15153 .open(&lock_path)
15154 .unwrap();
15155 fs2::FileExt::try_lock_exclusive(&lock_file).unwrap();
15156 lock_file.set_len(0).unwrap();
15157 write!(lock_file, "schema_version=1\npid={}\n", std::process::id()).unwrap();
15158 lock_file.sync_all().unwrap();
15159
15160 let conn =
15161 open_franken_raw_readonly_connection_with_timeout(&db_path, Duration::from_millis(25))
15162 .expect(
15163 "doctor process must be able to run post-repair read probes under its own lock",
15164 );
15165 drop(conn);
15166
15167 fs2::FileExt::unlock(&lock_file).unwrap();
15168 }
15169
15170 #[test]
15171 fn autocommit_retain_disable_tries_compat_then_canonical_pragma() {
15172 let mut attempts = Vec::new();
15173
15174 let selected = disable_autocommit_retain(|pragma| {
15175 attempts.push(pragma);
15176 if pragma == "PRAGMA fsqlite.autocommit_retain = OFF;" {
15177 Err("compat namespace unavailable")
15178 } else {
15179 Ok(())
15180 }
15181 })
15182 .expect("canonical pragma should disable autocommit retain");
15183
15184 assert_eq!(selected, "PRAGMA autocommit_retain = OFF;");
15185 assert_eq!(attempts, AUTOCOMMIT_RETAIN_OFF_PRAGMAS);
15186 }
15187
15188 #[test]
15189 fn autocommit_retain_disable_fails_closed_when_no_pragma_works() {
15190 let mut attempts = Vec::new();
15191
15192 let err = disable_autocommit_retain(|pragma| {
15193 attempts.push(pragma);
15194 Err("unsupported pragma")
15195 })
15196 .expect_err("unsupported autocommit retain controls should fail closed");
15197
15198 assert_eq!(attempts, AUTOCOMMIT_RETAIN_OFF_PRAGMAS);
15199 let message = err.to_string();
15200 assert!(
15201 message.contains("refusing to keep a long-lived MVCC connection"),
15202 "error should force callers away from unbounded snapshot retention: {message}"
15203 );
15204 assert!(
15205 message.contains("PRAGMA fsqlite.autocommit_retain = OFF;")
15206 && message.contains("PRAGMA autocommit_retain = OFF;"),
15207 "error should preserve attempted PRAGMAs for diagnostics: {message}"
15208 );
15209 }
15210
15211 fn rusqlite_test_fixture_conn(db_path: &Path) -> rusqlite::Connection {
15220 rusqlite::Connection::open(db_path).expect("open rusqlite test fixture connection")
15221 }
15222
15223 fn seed_historical_db_direct(
15224 db_path: &Path,
15225 conversations: &[crate::model::types::Conversation],
15226 ) {
15227 if let Some(parent) = db_path.parent() {
15228 fs::create_dir_all(parent).unwrap();
15229 }
15230
15231 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
15232 conn.execute_batch(HISTORICAL_RECOVERY_CORE_SCHEMA).unwrap();
15233 conn.execute_compat(
15234 "INSERT INTO agents(id, slug, name, version, kind, created_at, updated_at)
15235 VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7)",
15236 fparams![1_i64, "codex", "Codex", "0.2.3", "cli", 0_i64, 0_i64],
15237 )
15238 .unwrap();
15239
15240 let mut next_message_id = 1_i64;
15241 for (conv_index, conv) in conversations.iter().enumerate() {
15242 let conversation_id = i64::try_from(conv_index + 1).unwrap();
15243 let workspace_id = conv.workspace.as_ref().map(|workspace| {
15244 let workspace_id = conversation_id;
15245 let workspace_path = workspace.to_string_lossy().into_owned();
15246 conn.execute_compat(
15247 "INSERT INTO workspaces(id, path, display_name) VALUES(?1, ?2, ?3)",
15248 fparams![
15249 workspace_id,
15250 workspace_path.as_str(),
15251 workspace_path.as_str()
15252 ],
15253 )
15254 .unwrap();
15255 workspace_id
15256 });
15257 let source_path = conv.source_path.to_string_lossy().into_owned();
15258 let metadata_json = conv.metadata_json.to_string();
15259 conn.execute_compat(
15260 "INSERT INTO conversations (
15261 id, agent_id, workspace_id, source_id, external_id, title, source_path,
15262 started_at, ended_at, approx_tokens, metadata_json, origin_host
15263 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
15264 fparams![
15265 conversation_id,
15266 1_i64,
15267 workspace_id,
15268 conv.source_id.as_str(),
15269 conv.external_id.as_deref(),
15270 conv.title.as_deref(),
15271 source_path.as_str(),
15272 conv.started_at,
15273 conv.ended_at,
15274 conv.approx_tokens,
15275 metadata_json.as_str(),
15276 conv.origin_host.as_deref()
15277 ],
15278 )
15279 .unwrap();
15280
15281 for msg in &conv.messages {
15282 let extra_json = msg.extra_json.to_string();
15283 let role = role_str(&msg.role);
15284 conn.execute_compat(
15285 "INSERT INTO messages(
15286 id, conversation_id, idx, role, author, created_at, content, extra_json
15287 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
15288 fparams![
15289 next_message_id,
15290 conversation_id,
15291 msg.idx,
15292 role.as_str(),
15293 msg.author.as_deref(),
15294 msg.created_at,
15295 msg.content.as_str(),
15296 extra_json.as_str()
15297 ],
15298 )
15299 .unwrap();
15300 next_message_id += 1;
15301 }
15302 }
15303 }
15304
15305 #[test]
15310 fn is_user_data_file_detects_bookmarks() {
15311 assert!(is_user_data_file(Path::new("/data/bookmarks.db")));
15312 assert!(is_user_data_file(Path::new("bookmarks.db")));
15313 }
15314
15315 #[test]
15316 fn is_user_data_file_detects_tui_state() {
15317 assert!(is_user_data_file(Path::new("/data/tui_state.json")));
15318 }
15319
15320 #[test]
15321 fn is_user_data_file_detects_sources_toml() {
15322 assert!(is_user_data_file(Path::new("/config/sources.toml")));
15323 }
15324
15325 #[test]
15326 fn is_user_data_file_detects_env() {
15327 assert!(is_user_data_file(Path::new(".env")));
15328 }
15329
15330 #[test]
15331 fn is_user_data_file_rejects_other_files() {
15332 assert!(!is_user_data_file(Path::new("index.db")));
15333 assert!(!is_user_data_file(Path::new("conversations.db")));
15334 assert!(!is_user_data_file(Path::new("random.txt")));
15335 }
15336
15337 #[test]
15342 fn create_backup_returns_none_for_nonexistent() {
15343 let dir = TempDir::new().unwrap();
15344 let db_path = dir.path().join("nonexistent.db");
15345 let result = create_backup(&db_path).unwrap();
15346 assert!(result.is_none());
15347 }
15348
15349 #[test]
15350 fn create_backup_creates_named_file() {
15351 let dir = TempDir::new().unwrap();
15352 let db_path = dir.path().join("test.db");
15353 std::fs::write(&db_path, b"test data").unwrap();
15354
15355 let backup_path = create_backup(&db_path).unwrap();
15356 assert!(backup_path.is_some());
15357 let backup = backup_path.unwrap();
15358 assert!(backup.exists());
15359 assert!(
15360 backup
15361 .file_name()
15362 .unwrap()
15363 .to_str()
15364 .unwrap()
15365 .contains("backup")
15366 );
15367 }
15368
15369 #[test]
15370 fn create_backup_paths_are_unique() {
15371 let dir = TempDir::new().unwrap();
15372 let db_path = dir.path().join("test.db");
15373 std::fs::write(&db_path, b"test data").unwrap();
15374
15375 let first = create_backup(&db_path).unwrap().unwrap();
15376 let second = create_backup(&db_path).unwrap().unwrap();
15377
15378 assert_ne!(first, second);
15379 assert!(first.exists());
15380 assert!(second.exists());
15381 }
15382
15383 #[test]
15384 fn lexical_rebuild_messages_query_uses_conversation_idx_access_path() {
15385 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
15386 use std::path::PathBuf;
15387
15388 let dir = TempDir::new().unwrap();
15389 let db_path = dir.path().join("agent_search.db");
15390 let storage = SqliteStorage::open(&db_path).unwrap();
15391
15392 let agent = Agent {
15393 id: None,
15394 slug: "claude_code".into(),
15395 name: "Claude Code".into(),
15396 version: None,
15397 kind: AgentKind::Cli,
15398 };
15399 let agent_id = storage.ensure_agent(&agent).unwrap();
15400 let conversation = Conversation {
15401 id: None,
15402 agent_slug: "claude_code".into(),
15403 workspace: Some(PathBuf::from("/tmp/workspace")),
15404 external_id: Some("conv-1".into()),
15405 title: Some("Lexical rebuild".into()),
15406 source_path: PathBuf::from("/tmp/conv-1.jsonl"),
15407 started_at: Some(1_700_000_000_000),
15408 ended_at: Some(1_700_000_000_100),
15409 approx_tokens: None,
15410 metadata_json: serde_json::Value::Null,
15411 messages: vec![
15412 Message {
15413 id: None,
15414 idx: 0,
15415 role: MessageRole::User,
15416 author: Some("user".into()),
15417 created_at: Some(1_700_000_000_010),
15418 content: "first".into(),
15419 extra_json: serde_json::Value::Null,
15420 snippets: Vec::new(),
15421 },
15422 Message {
15423 id: None,
15424 idx: 1,
15425 role: MessageRole::Agent,
15426 author: Some("assistant".into()),
15427 created_at: Some(1_700_000_000_020),
15428 content: "second".into(),
15429 extra_json: serde_json::Value::Null,
15430 snippets: Vec::new(),
15431 },
15432 ],
15433 source_id: LOCAL_SOURCE_ID.into(),
15434 origin_host: None,
15435 };
15436 storage
15437 .insert_conversation_tree(agent_id, None, &conversation)
15438 .unwrap();
15439 let conversation_id = storage
15440 .conn
15441 .query_row_map(
15442 "SELECT id FROM conversations WHERE external_id = ?1",
15443 fparams!["conv-1"],
15444 |row| row.get_typed::<i64>(0),
15445 )
15446 .unwrap();
15447
15448 let opcodes: Vec<String> = storage
15449 .conn
15450 .query_map_collect(
15451 "EXPLAIN \
15452 SELECT id, idx, role, author, created_at, content \
15453 FROM messages \
15454 WHERE conversation_id = ?1 ORDER BY idx",
15455 fparams![conversation_id],
15456 |row| row.get_typed(1),
15457 )
15458 .unwrap();
15459
15460 assert!(
15461 opcodes.iter().any(|opcode| opcode == "SeekGE"),
15462 "expected lexical rebuild message fetch to seek into the conversation_id/idx access path, got {opcodes:?}"
15463 );
15464 assert!(
15465 !opcodes.iter().any(|opcode| opcode == "SorterOpen"),
15466 "expected lexical rebuild message fetch to avoid sorter temp b-trees, got {opcodes:?}"
15467 );
15468 }
15469
15470 #[test]
15471 fn schema_check_rebuild_classification_ignores_transient_errors() {
15472 assert!(!schema_check_error_requires_rebuild(
15473 &frankensqlite::FrankenError::Busy
15474 ));
15475 assert!(!schema_check_error_requires_rebuild(
15476 &frankensqlite::FrankenError::DatabaseLocked {
15477 path: PathBuf::from("/tmp/test.db"),
15478 }
15479 ));
15480 assert!(!schema_check_error_requires_rebuild(
15481 &frankensqlite::FrankenError::CannotOpen {
15482 path: PathBuf::from("/tmp/test.db"),
15483 }
15484 ));
15485 assert!(!schema_check_error_requires_rebuild(
15486 &frankensqlite::FrankenError::Io(std::io::Error::other("disk hiccup"))
15487 ));
15488 }
15489
15490 #[test]
15491 fn schema_check_rebuild_classification_keeps_corruption_errors() {
15492 assert!(schema_check_error_requires_rebuild(
15493 &frankensqlite::FrankenError::DatabaseCorrupt {
15494 detail: "bad header".to_string(),
15495 }
15496 ));
15497 assert!(schema_check_error_requires_rebuild(
15498 &frankensqlite::FrankenError::WalCorrupt {
15499 detail: "bad wal".to_string(),
15500 }
15501 ));
15502 assert!(schema_check_error_requires_rebuild(
15503 &frankensqlite::FrankenError::NotADatabase {
15504 path: PathBuf::from("/tmp/test.db"),
15505 }
15506 ));
15507 assert!(schema_check_error_requires_rebuild(
15508 &frankensqlite::FrankenError::ShortRead {
15509 expected: 4096,
15510 actual: 64,
15511 }
15512 ));
15513 }
15514
15515 #[test]
15516 fn create_backup_refuses_raw_copy_after_retryable_vacuum_errors() {
15517 let retryable_errors = [
15518 frankensqlite::FrankenError::Busy,
15519 frankensqlite::FrankenError::BusyRecovery,
15520 frankensqlite::FrankenError::BusySnapshot {
15521 conflicting_pages: "1,2".to_string(),
15522 },
15523 frankensqlite::FrankenError::DatabaseLocked {
15524 path: PathBuf::from("/tmp/test.db"),
15525 },
15526 frankensqlite::FrankenError::LockFailed {
15527 detail: "fcntl lock still held".to_string(),
15528 },
15529 frankensqlite::FrankenError::WriteConflict { page: 7, holder: 9 },
15530 frankensqlite::FrankenError::SerializationFailure { page: 11 },
15531 frankensqlite::FrankenError::Internal("database is locked".to_string()),
15532 ];
15533
15534 for err in retryable_errors {
15535 assert!(
15536 backup_vacuum_error_requires_consistent_retry(&err),
15537 "retryable VACUUM failure must not fall back to raw bundle copy: {err}"
15538 );
15539 }
15540
15541 assert!(!backup_vacuum_error_requires_consistent_retry(
15542 &frankensqlite::FrankenError::NotADatabase {
15543 path: PathBuf::from("/tmp/test.db")
15544 }
15545 ));
15546 assert!(!backup_vacuum_error_requires_consistent_retry(
15547 &frankensqlite::FrankenError::DatabaseCorrupt {
15548 detail: "bad header".to_string()
15549 }
15550 ));
15551 }
15552
15553 #[test]
15554 fn create_backup_uses_hidden_vacuum_stage_path() {
15555 let backup_path = PathBuf::from("/tmp/test.db.backup.123.456.0");
15556 let stage_path = vacuum_stage_backup_path(&backup_path);
15557 let stage_name = stage_path
15558 .file_name()
15559 .and_then(|name| name.to_str())
15560 .unwrap_or_default();
15561
15562 assert!(stage_name.starts_with('.'));
15563 assert!(stage_name.ends_with(".vacuum-in-progress"));
15564 assert!(
15565 !is_backup_root_name(stage_name, "test.db.backup."),
15566 "incomplete VACUUM output must not be discoverable as a backup root"
15567 );
15568 }
15569
15570 #[test]
15571 fn create_backup_preserves_content() {
15572 let dir = TempDir::new().unwrap();
15573 let db_path = dir.path().join("test.db");
15574 let original_content = b"test database content 12345";
15575 std::fs::write(&db_path, original_content).unwrap();
15576
15577 let backup_path = create_backup(&db_path).unwrap().unwrap();
15578 let backup_content = std::fs::read(&backup_path).unwrap();
15579 assert_eq!(backup_content, original_content);
15580 }
15581
15582 #[test]
15583 fn create_backup_copies_sidecars_when_present() {
15584 let dir = TempDir::new().unwrap();
15585 let db_path = dir.path().join("test.db");
15586 std::fs::write(&db_path, b"db").unwrap();
15587 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15588 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15589
15590 let backup_path = create_backup(&db_path).unwrap().unwrap();
15591
15592 assert_eq!(
15593 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15594 b"wal"
15595 );
15596 assert_eq!(
15597 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15598 b"shm"
15599 );
15600 }
15601
15602 #[test]
15603 #[cfg(unix)]
15604 fn create_backup_rejects_symlink_root_during_raw_fallback() {
15605 use std::os::unix::fs::symlink;
15606
15607 let dir = TempDir::new().unwrap();
15608 let outside_db = dir.path().join("outside.db");
15609 let db_path = dir.path().join("test.db");
15610 std::fs::write(&outside_db, b"not sqlite").unwrap();
15611 symlink(&outside_db, &db_path).unwrap();
15612
15613 let err = create_backup(&db_path).unwrap_err();
15614
15615 assert!(
15616 err.to_string().contains("bundle symlink"),
15617 "unexpected error: {err:#}"
15618 );
15619 assert_eq!(std::fs::read(&outside_db).unwrap(), b"not sqlite");
15620 let backup_roots: Vec<_> = std::fs::read_dir(dir.path())
15621 .unwrap()
15622 .filter_map(|entry| entry.ok())
15623 .map(|entry| entry.file_name().to_string_lossy().into_owned())
15624 .filter(|name| name.starts_with("test.db.backup."))
15625 .collect();
15626 assert!(
15627 backup_roots.is_empty(),
15628 "symlinked backup source must not publish backup roots: {backup_roots:?}"
15629 );
15630 }
15631
15632 #[test]
15633 #[cfg(unix)]
15634 fn create_backup_rejects_symlink_sidecar_without_partial_backup() {
15635 use std::os::unix::fs::symlink;
15636
15637 let dir = TempDir::new().unwrap();
15638 let db_path = dir.path().join("test.db");
15639 let outside_wal = dir.path().join("outside.wal");
15640 let wal_path = database_sidecar_path(&db_path, "-wal");
15641 std::fs::write(&db_path, b"not sqlite").unwrap();
15642 std::fs::write(&outside_wal, b"outside wal").unwrap();
15643 symlink(&outside_wal, &wal_path).unwrap();
15644
15645 let err = create_backup(&db_path).unwrap_err();
15646
15647 assert!(
15648 err.to_string().contains("bundle symlink"),
15649 "unexpected error: {err:#}"
15650 );
15651 assert_eq!(std::fs::read(&outside_wal).unwrap(), b"outside wal");
15652 let backup_roots: Vec<_> = std::fs::read_dir(dir.path())
15653 .unwrap()
15654 .filter_map(|entry| entry.ok())
15655 .map(|entry| entry.file_name().to_string_lossy().into_owned())
15656 .filter(|name| name.starts_with("test.db.backup."))
15657 .collect();
15658 assert!(
15659 backup_roots.is_empty(),
15660 "sidecar preflight failure must not leave a partial backup root: {backup_roots:?}"
15661 );
15662 }
15663
15664 #[test]
15669 fn cleanup_old_backups_keeps_recent() {
15670 let dir = TempDir::new().unwrap();
15671 let db_path = dir.path().join("test.db");
15672
15673 for i in 0..5 {
15675 let backup_name = format!("test.db.backup.{}", 1000 + i);
15676 std::fs::write(dir.path().join(&backup_name), format!("backup {i}")).unwrap();
15677 }
15678
15679 cleanup_old_backups(&db_path, 3).unwrap();
15680
15681 let backups: Vec<_> = std::fs::read_dir(dir.path())
15683 .unwrap()
15684 .filter_map(|e| e.ok())
15685 .filter(|e| e.file_name().to_str().unwrap_or("").contains("backup"))
15686 .collect();
15687
15688 assert_eq!(backups.len(), 3);
15689 }
15690
15691 #[test]
15692 fn cleanup_old_backups_ignores_wal_and_shm_sidecars() {
15693 let dir = TempDir::new().unwrap();
15694 let db_path = dir.path().join("test.db");
15695
15696 for i in 0..3 {
15697 let backup_name = format!("test.db.backup.{}", 1000 + i);
15698 let backup_path = dir.path().join(&backup_name);
15699 std::fs::write(&backup_path, format!("backup {i}")).unwrap();
15700 std::fs::write(format!("{}-wal", backup_path.display()), b"wal").unwrap();
15701 std::fs::write(format!("{}-shm", backup_path.display()), b"shm").unwrap();
15702 std::thread::sleep(std::time::Duration::from_millis(20));
15703 }
15704
15705 cleanup_old_backups(&db_path, 2).unwrap();
15706
15707 let mut roots = Vec::new();
15708 let mut wals = Vec::new();
15709 let mut shms = Vec::new();
15710 for entry in std::fs::read_dir(dir.path())
15711 .unwrap()
15712 .filter_map(|e| e.ok())
15713 {
15714 let name = entry.file_name().to_string_lossy().into_owned();
15715 if name.ends_with("-wal") {
15716 wals.push(name);
15717 } else if name.ends_with("-shm") {
15718 shms.push(name);
15719 } else if name.contains("backup") {
15720 roots.push(name);
15721 }
15722 }
15723
15724 assert_eq!(roots.len(), 2, "should keep two backup roots");
15725 assert_eq!(
15726 wals.len(),
15727 2,
15728 "should keep WAL sidecars only for retained backups"
15729 );
15730 assert_eq!(
15731 shms.len(),
15732 2,
15733 "should keep SHM sidecars only for retained backups"
15734 );
15735 }
15736
15737 #[test]
15738 fn move_database_bundle_moves_database_and_sidecars() {
15739 let dir = TempDir::new().unwrap();
15740 let db_path = dir.path().join("test.db");
15741 let backup_path = dir.path().join("test.db.corrupt");
15742
15743 std::fs::write(&db_path, b"db").unwrap();
15744 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15745 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15746
15747 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15748 assert_eq!(
15749 moved,
15750 DatabaseBundleMoveResult {
15751 database: true,
15752 wal: true,
15753 shm: true
15754 }
15755 );
15756 assert!(moved.moved_any());
15757
15758 assert!(!db_path.exists());
15759 assert!(!database_sidecar_path(&db_path, "-wal").exists());
15760 assert!(!database_sidecar_path(&db_path, "-shm").exists());
15761
15762 assert_eq!(std::fs::read(&backup_path).unwrap(), b"db");
15763 assert_eq!(
15764 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15765 b"wal"
15766 );
15767 assert_eq!(
15768 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15769 b"shm"
15770 );
15771 }
15772
15773 #[test]
15774 fn move_database_bundle_preserves_orphan_sidecars_without_main_db() {
15775 let dir = TempDir::new().unwrap();
15776 let db_path = dir.path().join("test.db");
15777 let backup_path = dir.path().join("test.db.corrupt");
15778
15779 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15780 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15781
15782 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15783 assert_eq!(
15784 moved,
15785 DatabaseBundleMoveResult {
15786 database: false,
15787 wal: true,
15788 shm: true
15789 }
15790 );
15791 assert!(moved.moved_any());
15792 assert!(!db_path.exists());
15793 assert!(!database_sidecar_path(&db_path, "-wal").exists());
15794 assert!(!database_sidecar_path(&db_path, "-shm").exists());
15795 assert_eq!(
15796 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15797 b"wal"
15798 );
15799 assert_eq!(
15800 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15801 b"shm"
15802 );
15803 }
15804
15805 #[test]
15806 #[cfg(unix)]
15807 fn move_database_bundle_moves_dangling_symlink_database_root() {
15808 use std::os::unix::fs::symlink;
15809
15810 let dir = TempDir::new().unwrap();
15811 let db_path = dir.path().join("test.db");
15812 let backup_path = dir.path().join("test.db.corrupt");
15813 let missing_target = dir.path().join("missing-target.db");
15814
15815 symlink(&missing_target, &db_path).unwrap();
15816
15817 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15818
15819 assert_eq!(
15820 moved,
15821 DatabaseBundleMoveResult {
15822 database: true,
15823 wal: false,
15824 shm: false
15825 }
15826 );
15827 assert!(std::fs::symlink_metadata(&db_path).is_err());
15828 assert!(
15829 std::fs::symlink_metadata(&backup_path)
15830 .unwrap()
15831 .file_type()
15832 .is_symlink()
15833 );
15834 assert!(!missing_target.exists());
15835 }
15836
15837 #[test]
15838 #[cfg(unix)]
15839 fn move_database_bundle_moves_dangling_symlink_sidecars_without_main_db() {
15840 use std::os::unix::fs::symlink;
15841
15842 let dir = TempDir::new().unwrap();
15843 let db_path = dir.path().join("test.db");
15844 let backup_path = dir.path().join("test.db.corrupt");
15845 let missing_wal_target = dir.path().join("missing-wal");
15846 let missing_shm_target = dir.path().join("missing-shm");
15847 let wal_path = database_sidecar_path(&db_path, "-wal");
15848 let shm_path = database_sidecar_path(&db_path, "-shm");
15849
15850 symlink(&missing_wal_target, &wal_path).unwrap();
15851 symlink(&missing_shm_target, &shm_path).unwrap();
15852
15853 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15854
15855 assert_eq!(
15856 moved,
15857 DatabaseBundleMoveResult {
15858 database: false,
15859 wal: true,
15860 shm: true
15861 }
15862 );
15863 assert!(std::fs::symlink_metadata(&wal_path).is_err());
15864 assert!(std::fs::symlink_metadata(&shm_path).is_err());
15865 assert!(
15866 std::fs::symlink_metadata(database_sidecar_path(&backup_path, "-wal"))
15867 .unwrap()
15868 .file_type()
15869 .is_symlink()
15870 );
15871 assert!(
15872 std::fs::symlink_metadata(database_sidecar_path(&backup_path, "-shm"))
15873 .unwrap()
15874 .file_type()
15875 .is_symlink()
15876 );
15877 assert!(!missing_wal_target.exists());
15878 assert!(!missing_shm_target.exists());
15879 }
15880
15881 #[test]
15882 fn copy_database_bundle_copies_database_and_sidecars() {
15883 let dir = TempDir::new().unwrap();
15884 let db_path = dir.path().join("test.db");
15885 let copied_path = dir.path().join("copy.db");
15886
15887 std::fs::write(&db_path, b"db").unwrap();
15888 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15889 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15890
15891 copy_database_bundle(&db_path, &copied_path).unwrap();
15892
15893 assert_eq!(std::fs::read(&copied_path).unwrap(), b"db");
15894 assert_eq!(
15895 std::fs::read(database_sidecar_path(&copied_path, "-wal")).unwrap(),
15896 b"wal"
15897 );
15898 assert_eq!(
15899 std::fs::read(database_sidecar_path(&copied_path, "-shm")).unwrap(),
15900 b"shm"
15901 );
15902 assert_eq!(std::fs::read(&db_path).unwrap(), b"db");
15903 }
15904
15905 #[test]
15906 fn copy_database_bundle_creates_destination_parent() {
15907 let dir = TempDir::new().unwrap();
15908 let db_path = dir.path().join("test.db");
15909 let copied_path = dir.path().join("nested/copies/copy.db");
15910
15911 std::fs::write(&db_path, b"db").unwrap();
15912 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15913
15914 copy_database_bundle(&db_path, &copied_path).unwrap();
15915
15916 assert!(copied_path.parent().unwrap().is_dir());
15917 assert_eq!(std::fs::read(&copied_path).unwrap(), b"db");
15918 assert_eq!(
15919 std::fs::read(database_sidecar_path(&copied_path, "-wal")).unwrap(),
15920 b"wal"
15921 );
15922 }
15923
15924 #[test]
15925 #[cfg(unix)]
15926 fn copy_database_bundle_rejects_symlink_source_root() {
15927 use std::os::unix::fs::symlink;
15928
15929 let dir = TempDir::new().unwrap();
15930 let outside_db = dir.path().join("outside.db");
15931 let db_path = dir.path().join("test.db");
15932 let copied_path = dir.path().join("copy.db");
15933
15934 std::fs::write(&outside_db, b"outside").unwrap();
15935 symlink(&outside_db, &db_path).unwrap();
15936
15937 let err = copy_database_bundle(&db_path, &copied_path).unwrap_err();
15938
15939 assert!(
15940 err.to_string().contains("bundle symlink"),
15941 "unexpected error: {err:#}"
15942 );
15943 assert!(!copied_path.exists());
15944 assert_eq!(std::fs::read(&outside_db).unwrap(), b"outside");
15945 }
15946
15947 #[test]
15948 #[cfg(unix)]
15949 fn copy_database_bundle_rejects_symlink_sidecar() {
15950 use std::os::unix::fs::symlink;
15951
15952 let dir = TempDir::new().unwrap();
15953 let db_path = dir.path().join("test.db");
15954 let copied_path = dir.path().join("copy.db");
15955 let outside_wal = dir.path().join("outside.wal");
15956 let wal_path = database_sidecar_path(&db_path, "-wal");
15957
15958 std::fs::write(&db_path, b"db").unwrap();
15959 std::fs::write(&outside_wal, b"outside wal").unwrap();
15960 symlink(&outside_wal, &wal_path).unwrap();
15961
15962 let err = copy_database_bundle(&db_path, &copied_path).unwrap_err();
15963
15964 assert!(
15965 err.to_string().contains("bundle symlink"),
15966 "unexpected error: {err:#}"
15967 );
15968 assert_eq!(std::fs::read(&outside_wal).unwrap(), b"outside wal");
15969 assert!(!copied_path.exists());
15970 assert!(!database_sidecar_path(&copied_path, "-wal").exists());
15971 }
15972
15973 #[test]
15974 fn move_database_bundle_creates_destination_parent_and_moves_sidecars() {
15975 let dir = TempDir::new().unwrap();
15976 let db_path = dir.path().join("test.db");
15977 let backup_path = dir.path().join("nested/backups/test.db.corrupt");
15978
15979 std::fs::write(&db_path, b"db").unwrap();
15980 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15981 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15982
15983 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15984 assert_eq!(
15985 moved,
15986 DatabaseBundleMoveResult {
15987 database: true,
15988 wal: true,
15989 shm: true
15990 }
15991 );
15992 assert!(backup_path.parent().unwrap().is_dir());
15993 assert_eq!(std::fs::read(&backup_path).unwrap(), b"db");
15994 assert_eq!(
15995 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15996 b"wal"
15997 );
15998 assert_eq!(
15999 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
16000 b"shm"
16001 );
16002 }
16003
16004 #[test]
16005 fn remove_database_files_removes_orphan_sidecars_without_main_db() {
16006 let dir = TempDir::new().unwrap();
16007 let db_path = dir.path().join("test.db");
16008
16009 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
16010 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
16011
16012 remove_database_files(&db_path).unwrap();
16013
16014 assert!(!db_path.exists());
16015 assert!(!database_sidecar_path(&db_path, "-wal").exists());
16016 assert!(!database_sidecar_path(&db_path, "-shm").exists());
16017 }
16018
16019 #[test]
16020 fn cleanup_old_backups_ignores_backup_named_directories() {
16021 let dir = TempDir::new().unwrap();
16022 let db_path = dir.path().join("test.db");
16023
16024 for i in 0..3 {
16025 let backup_name = format!("test.db.backup.{}", 1000 + i);
16026 std::fs::write(dir.path().join(&backup_name), format!("backup {i}")).unwrap();
16027 }
16028 std::fs::create_dir(dir.path().join("test.db.backup.directory")).unwrap();
16029
16030 cleanup_old_backups(&db_path, 2).unwrap();
16031
16032 let mut backup_files = Vec::new();
16033 let mut backup_dirs = Vec::new();
16034 for entry in std::fs::read_dir(dir.path())
16035 .unwrap()
16036 .filter_map(|e| e.ok())
16037 {
16038 let name = entry.file_name().to_string_lossy().into_owned();
16039 if !name.starts_with("test.db.backup.") {
16040 continue;
16041 }
16042 if entry.path().is_dir() {
16043 backup_dirs.push(name);
16044 } else {
16045 backup_files.push(name);
16046 }
16047 }
16048
16049 assert_eq!(
16050 backup_files.len(),
16051 2,
16052 "only real backup files count toward retention"
16053 );
16054 assert_eq!(
16055 backup_dirs.len(),
16056 1,
16057 "backup-named directories should be ignored"
16058 );
16059 }
16060
16061 #[test]
16066 fn open_creates_new_database() {
16067 let dir = TempDir::new().unwrap();
16068 let db_path = dir.path().join("new.db");
16069 assert!(!db_path.exists());
16070
16071 let storage = SqliteStorage::open(&db_path).unwrap();
16072 assert!(db_path.exists());
16073 storage.close().unwrap();
16074 }
16075
16076 #[test]
16077 fn open_readonly_fails_for_nonexistent() {
16078 let dir = TempDir::new().unwrap();
16079 let db_path = dir.path().join("nonexistent.db");
16080 let result = SqliteStorage::open_readonly(&db_path);
16081 assert!(result.is_err());
16082 }
16083
16084 #[test]
16085 fn open_readonly_succeeds_for_existing() {
16086 let dir = TempDir::new().unwrap();
16087 let db_path = dir.path().join("existing.db");
16088
16089 let _storage = SqliteStorage::open(&db_path).unwrap();
16091 drop(_storage);
16092
16093 let storage = SqliteStorage::open_readonly(&db_path).unwrap();
16095 assert!(storage.schema_version().is_ok());
16096 }
16097
16098 #[test]
16099 fn reopen_existing_current_schema_is_idempotent() {
16100 let dir = TempDir::new().unwrap();
16101 let db_path = dir.path().join("existing.db");
16102
16103 {
16105 let storage = SqliteStorage::open(&db_path).unwrap();
16106 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
16107 }
16108
16109 let reopened = SqliteStorage::open(&db_path).unwrap();
16111 assert_eq!(
16112 reopened.schema_version().unwrap(),
16113 CURRENT_SCHEMA_VERSION,
16114 "reopening current schema DB should be idempotent"
16115 );
16116 }
16117
16118 #[test]
16119 fn open_or_rebuild_current_schema_does_not_trigger_rebuild() {
16120 let dir = TempDir::new().unwrap();
16121 let db_path = dir.path().join("existing.db");
16122
16123 {
16125 let storage = SqliteStorage::open(&db_path).unwrap();
16126 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
16127 }
16128
16129 let reopened = SqliteStorage::open_or_rebuild(&db_path)
16131 .expect("current schema DB should open without rebuild");
16132 assert_eq!(reopened.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
16133 }
16134
16135 #[test]
16136 fn open_or_rebuild_does_not_treat_non_database_paths_as_corruption() {
16137 let dir = TempDir::new().unwrap();
16138 let db_path = dir.path().join("db_dir");
16139 std::fs::create_dir(&db_path).unwrap();
16140
16141 let result = SqliteStorage::open_or_rebuild(&db_path);
16142
16143 match result {
16144 Err(MigrationError::Database(_)) | Err(MigrationError::Io(_)) => {}
16145 Err(MigrationError::RebuildRequired { reason, .. }) => {
16146 panic!("should not rebuild non-database path: {reason}")
16147 }
16148 Err(MigrationError::Other(msg)) => {
16149 panic!("should preserve underlying open error, got Other: {msg}")
16150 }
16151 Ok(_) => panic!("directory path must not open as a database"),
16152 }
16153
16154 assert!(
16155 db_path.is_dir(),
16156 "non-database directory must be left in place"
16157 );
16158 }
16159
16160 #[test]
16165 fn schema_version_returns_current() {
16166 let dir = TempDir::new().unwrap();
16167 let db_path = dir.path().join("test.db");
16168 let storage = SqliteStorage::open(&db_path).unwrap();
16169 let version = storage.schema_version().unwrap();
16170 assert!(version >= 5, "Schema version should be at least 5");
16171 }
16172
16173 #[test]
16178 fn migration_v13_creates_analytics_tables() {
16179 let dir = TempDir::new().unwrap();
16180 let db_path = dir.path().join("test.db");
16181 let storage = SqliteStorage::open(&db_path).unwrap();
16182
16183 let version = storage.schema_version().unwrap();
16185 assert_eq!(
16186 version, CURRENT_SCHEMA_VERSION,
16187 "Schema version must match CURRENT_SCHEMA_VERSION after migration"
16188 );
16189
16190 let conn = storage.raw();
16191
16192 fn col_names(conn: &FrankenConnection, table: &str) -> Vec<String> {
16194 conn.query_map_collect(
16195 &format!("PRAGMA table_info({})", table),
16196 fparams![],
16197 |row: &FrankenRow| row.get_typed(1),
16198 )
16199 .unwrap()
16200 }
16201
16202 fn idx_names(conn: &FrankenConnection, table: &str) -> Vec<String> {
16204 conn.query_map_collect(
16205 &format!("PRAGMA index_list({})", table),
16206 fparams![],
16207 |row: &FrankenRow| row.get_typed(1),
16208 )
16209 .unwrap()
16210 }
16211
16212 let mm_cols = col_names(conn, "message_metrics");
16214 for expected in &[
16215 "message_id",
16216 "hour_id",
16217 "day_id",
16218 "content_tokens_est",
16219 "model_name",
16220 "model_family",
16221 "model_tier",
16222 "provider",
16223 "api_input_tokens",
16224 "has_plan",
16225 "agent_slug",
16226 "role",
16227 "api_data_source",
16228 ] {
16229 assert!(
16230 mm_cols.contains(&expected.to_string()),
16231 "message_metrics missing column: {expected}"
16232 );
16233 }
16234
16235 let uh_cols = col_names(conn, "usage_hourly");
16237 for expected in &[
16238 "hour_id",
16239 "plan_message_count",
16240 "plan_content_tokens_est_total",
16241 "plan_api_tokens_total",
16242 "api_coverage_message_count",
16243 "content_tokens_est_user",
16244 "api_thinking_tokens_total",
16245 ] {
16246 assert!(
16247 uh_cols.contains(&expected.to_string()),
16248 "usage_hourly missing column: {expected}"
16249 );
16250 }
16251
16252 let ud_cols = col_names(conn, "usage_daily");
16254 for expected in &[
16255 "day_id",
16256 "plan_content_tokens_est_total",
16257 "plan_api_tokens_total",
16258 "api_thinking_tokens_total",
16259 "content_tokens_est_assistant",
16260 "message_count",
16261 ] {
16262 assert!(
16263 ud_cols.contains(&expected.to_string()),
16264 "usage_daily missing column: {expected}"
16265 );
16266 }
16267
16268 let umd_cols = col_names(conn, "usage_models_daily");
16270 for expected in &[
16271 "day_id",
16272 "model_family",
16273 "model_tier",
16274 "message_count",
16275 "api_tokens_total",
16276 "api_coverage_message_count",
16277 ] {
16278 assert!(
16279 umd_cols.contains(&expected.to_string()),
16280 "usage_models_daily missing column: {expected}"
16281 );
16282 }
16283
16284 let mm_idxs = idx_names(conn, "message_metrics");
16286 assert!(
16287 mm_idxs.iter().any(|n| n.contains("idx_mm_hour")),
16288 "message_metrics must have hour index"
16289 );
16290 assert!(
16291 mm_idxs.iter().any(|n| n.contains("idx_mm_agent_day")),
16292 "message_metrics must have agent+day index"
16293 );
16294 assert!(
16295 mm_idxs
16296 .iter()
16297 .any(|n| n.contains("idx_mm_model_family_day")),
16298 "message_metrics must have model_family+day index"
16299 );
16300
16301 let uh_idxs = idx_names(conn, "usage_hourly");
16303 assert!(
16304 uh_idxs.iter().any(|n| n.contains("idx_uh_agent")),
16305 "usage_hourly must have agent index"
16306 );
16307
16308 let ud_idxs = idx_names(conn, "usage_daily");
16310 assert!(
16311 ud_idxs.iter().any(|n| n.contains("idx_ud_agent")),
16312 "usage_daily must have agent index"
16313 );
16314
16315 let umd_idxs = idx_names(conn, "usage_models_daily");
16317 assert!(
16318 umd_idxs.iter().any(|n| n.contains("idx_umd_model_day")),
16319 "usage_models_daily must have model+day index"
16320 );
16321
16322 let conversation_cols = col_names(conn, "conversations");
16323 assert!(
16324 conversation_cols.contains(&"last_message_idx".to_string())
16325 && conversation_cols.contains(&"last_message_created_at".to_string()),
16326 "fresh schema must include V15 tail columns without ALTER TABLE on conversations"
16327 );
16328 let fts_schema_rows: i64 = conn
16329 .query_row_map(
16330 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
16331 fparams![],
16332 |row: &FrankenRow| row.get_typed(0),
16333 )
16334 .unwrap();
16335 assert_eq!(
16336 fts_schema_rows, 0,
16337 "fresh schema should not create and immediately drop derived fts_messages"
16338 );
16339 let integrity: Vec<String> = conn
16340 .query_map_collect("PRAGMA integrity_check;", fparams![], |row: &FrankenRow| {
16341 row.get_typed(0)
16342 })
16343 .unwrap();
16344 assert_eq!(
16345 integrity,
16346 vec!["ok".to_string()],
16347 "fresh schema must pass SQLite integrity_check"
16348 );
16349 }
16350
16351 #[test]
16352 fn hour_id_round_trip() {
16353 let ts_ms = 1_770_508_800_000_i64;
16355 let hour_id = SqliteStorage::hour_id_from_millis(ts_ms);
16356 let day_id = SqliteStorage::day_id_from_millis(ts_ms);
16357
16358 assert_eq!(hour_id / 24, day_id, "hour_id/24 should equal day_id");
16360
16361 let back = SqliteStorage::millis_from_hour_id(hour_id);
16363 assert!(
16364 back <= ts_ms && ts_ms - back < 3_600_000,
16365 "Round-trip should land within the same hour"
16366 );
16367 }
16368
16369 #[test]
16370 fn day_and_hour_ids_floor_negative_millis() {
16371 let ts_ms = -1_i64;
16374 let expected_secs = -1_i64;
16375 let epoch_2020_secs = 1_577_836_800_i64;
16376
16377 assert_eq!(
16378 SqliteStorage::day_id_from_millis(ts_ms),
16379 (expected_secs - epoch_2020_secs).div_euclid(86_400)
16380 );
16381 assert_eq!(
16382 SqliteStorage::hour_id_from_millis(ts_ms),
16383 (expected_secs - epoch_2020_secs).div_euclid(3_600)
16384 );
16385 }
16386
16387 #[test]
16388 fn migration_v13_from_v10() {
16389 let dir = TempDir::new().unwrap();
16390 let db_path = dir.path().join("test.db");
16391
16392 {
16394 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
16395 conn.execute_batch("PRAGMA journal_mode=WAL;").unwrap();
16396 conn.execute_batch(
16397 "CREATE TABLE IF NOT EXISTS meta (key TEXT PRIMARY KEY, value TEXT);",
16398 )
16399 .unwrap();
16400 conn.execute("INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', '10')")
16401 .unwrap();
16402 conn.execute_batch(MIGRATION_V1).unwrap();
16407 conn.execute_batch(MIGRATION_V2).unwrap();
16408 conn.execute_batch(MIGRATION_V4).unwrap();
16409 conn.execute_batch(MIGRATION_V5).unwrap();
16410 conn.execute_batch(MIGRATION_V6).unwrap();
16411 conn.execute_batch(MIGRATION_V7).unwrap();
16412 conn.execute_batch(MIGRATION_V8).unwrap();
16413 conn.execute_batch(MIGRATION_V9).unwrap();
16414 conn.execute_batch(MIGRATION_V10).unwrap();
16415 conn.execute("UPDATE meta SET value = '10' WHERE key = 'schema_version'")
16416 .unwrap();
16417 }
16418 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
16419
16420 let storage = SqliteStorage::open(&db_path).unwrap();
16422 let version = storage.schema_version().unwrap();
16423 assert_eq!(
16424 version, CURRENT_SCHEMA_VERSION,
16425 "Should have migrated from v10 to the current schema"
16426 );
16427
16428 let count: i64 = storage
16430 .raw()
16431 .query_row_map(
16432 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name IN ('message_metrics', 'usage_hourly', 'usage_daily', 'usage_models_daily')",
16433 &[],
16434 |row: &FrankenRow| row.get_typed::<i64>(0),
16435 )
16436 .unwrap();
16437 assert_eq!(count, 4, "All 4 analytics tables should exist");
16438 }
16439
16440 #[test]
16445 fn analytics_ingest_populates_metrics_and_rollups() {
16446 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
16447 use std::path::PathBuf;
16448
16449 let dir = TempDir::new().unwrap();
16450 let db_path = dir.path().join("test.db");
16451 let storage = SqliteStorage::open(&db_path).unwrap();
16452
16453 let agent = Agent {
16455 id: None,
16456 slug: "claude_code".into(),
16457 name: "Claude Code".into(),
16458 version: Some("1.0".into()),
16459 kind: AgentKind::Cli,
16460 };
16461 let agent_id = storage.ensure_agent(&agent).unwrap();
16462
16463 let ts_ms = 1_770_551_400_000_i64;
16466 let expected_day = SqliteStorage::day_id_from_millis(ts_ms);
16467 let expected_hour = SqliteStorage::hour_id_from_millis(ts_ms);
16468
16469 let usage_json = serde_json::json!({
16471 "message": {
16472 "model": "claude-opus-4-6",
16473 "usage": {
16474 "input_tokens": 100,
16475 "output_tokens": 50,
16476 "cache_read_input_tokens": 200,
16477 "cache_creation_input_tokens": 30,
16478 "service_tier": "standard"
16479 }
16480 }
16481 });
16482
16483 let conv = Conversation {
16484 id: None,
16485 agent_slug: "claude_code".into(),
16486 workspace: None,
16487 external_id: Some("test-conv-1".into()),
16488 title: Some("Test conversation".into()),
16489 source_path: PathBuf::from("/tmp/test.jsonl"),
16490 started_at: Some(ts_ms),
16491 ended_at: Some(ts_ms + 60_000),
16492 approx_tokens: None,
16493 metadata_json: serde_json::Value::Null,
16494 messages: vec![
16495 Message {
16496 id: None,
16497 idx: 0,
16498 role: MessageRole::User,
16499 author: None,
16500 created_at: Some(ts_ms),
16501 content: "Hello, can you help me with a plan?".into(),
16502 extra_json: serde_json::Value::Null,
16503 snippets: vec![],
16504 },
16505 Message {
16506 id: None,
16507 idx: 1,
16508 role: MessageRole::Agent,
16509 author: None,
16510 created_at: Some(ts_ms + 30_000),
16511 content: "## Plan\n\n1. First step\n2. Second step\n3. Third step".into(),
16512 extra_json: usage_json,
16513 snippets: vec![],
16514 },
16515 Message {
16516 id: None,
16517 idx: 2,
16518 role: MessageRole::User,
16519 author: None,
16520 created_at: Some(ts_ms + 60_000),
16521 content: "Great, let's proceed!".into(),
16522 extra_json: serde_json::Value::Null,
16523 snippets: vec![],
16524 },
16525 ],
16526 source_id: "local".into(),
16527 origin_host: None,
16528 };
16529
16530 let outcomes = storage
16531 .insert_conversations_batched(&[(agent_id, None, &conv)])
16532 .unwrap();
16533 assert_eq!(outcomes.len(), 1);
16534 assert_eq!(outcomes[0].inserted_indices.len(), 3);
16535
16536 let conn = storage.raw();
16537
16538 let mm_count: i64 = conn
16540 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
16541 row.get_typed::<i64>(0)
16542 })
16543 .unwrap();
16544 assert_eq!(mm_count, 3, "Should have 3 message_metrics rows");
16545
16546 #[allow(clippy::type_complexity)]
16548 let rows: Vec<(i64, i64, String, i64, i64, String, String, String, String)> = conn
16549 .query_map_collect(
16550 "SELECT hour_id, day_id, role, content_tokens_est, has_plan, api_data_source, model_family, model_tier, provider FROM message_metrics ORDER BY message_id",
16551 fparams![],
16552 |row: &FrankenRow| {
16553 Ok((
16554 row.get_typed(0)?,
16555 row.get_typed(1)?,
16556 row.get_typed(2)?,
16557 row.get_typed(3)?,
16558 row.get_typed(4)?,
16559 row.get_typed(5)?,
16560 row.get_typed(6)?,
16561 row.get_typed(7)?,
16562 row.get_typed(8)?,
16563 ))
16564 },
16565 )
16566 .unwrap();
16567
16568 assert_eq!(rows.len(), 3);
16569 assert_eq!(rows[0].0, expected_hour);
16571 assert_eq!(rows[0].1, expected_day);
16572 assert_eq!(rows[0].2, "user");
16574 assert_eq!(
16576 rows[1].4, 1,
16577 "Assistant message with plan should have has_plan=1"
16578 );
16579 assert_eq!(
16581 rows[1].5, "api",
16582 "Claude Code assistant message should have api data source"
16583 );
16584 assert_eq!(rows[0].5, "estimated");
16586 assert_eq!(rows[2].5, "estimated");
16587 assert_eq!(rows[1].6, "claude");
16588 assert_eq!(rows[1].7, "opus");
16589 assert_eq!(rows[1].8, "anthropic");
16590 assert_eq!(rows[0].6, "unknown");
16591 let user_chars = "Hello, can you help me with a plan?".len() as i64;
16593 assert_eq!(rows[0].3, user_chars / 4);
16594
16595 let (uh_msg, uh_user, uh_asst, uh_plan, uh_plan_content, uh_plan_api, uh_api_cov): (
16597 i64,
16598 i64,
16599 i64,
16600 i64,
16601 i64,
16602 i64,
16603 i64,
16604 ) = conn
16605 .query_row_map(
16606 "SELECT message_count, user_message_count, assistant_message_count, plan_message_count,
16607 plan_content_tokens_est_total, plan_api_tokens_total, api_coverage_message_count
16608 FROM usage_hourly WHERE hour_id = ?",
16609 fparams![expected_hour],
16610 |row: &FrankenRow| {
16611 Ok((
16612 row.get_typed(0)?,
16613 row.get_typed(1)?,
16614 row.get_typed(2)?,
16615 row.get_typed(3)?,
16616 row.get_typed(4)?,
16617 row.get_typed(5)?,
16618 row.get_typed(6)?,
16619 ))
16620 },
16621 )
16622 .unwrap();
16623 assert_eq!(uh_msg, 3, "Hourly rollup should have 3 messages");
16624 assert_eq!(uh_user, 2, "Hourly rollup should have 2 user messages");
16625 assert_eq!(uh_asst, 1, "Hourly rollup should have 1 assistant message");
16626 assert_eq!(uh_plan, 1, "Hourly rollup should have 1 plan message");
16627 assert!(
16628 uh_plan_content > 0,
16629 "Hourly rollup should include plan content tokens"
16630 );
16631 assert!(
16632 uh_plan_api > 0,
16633 "Hourly rollup should include plan API tokens"
16634 );
16635 assert_eq!(
16636 uh_api_cov, 1,
16637 "Hourly rollup should have 1 API-covered message"
16638 );
16639
16640 let (ud_msg, ud_api_cov): (i64, i64) = conn
16642 .query_row_map(
16643 "SELECT message_count, api_coverage_message_count FROM usage_daily WHERE day_id = ?",
16644 fparams![expected_day],
16645 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?)),
16646 )
16647 .unwrap();
16648 assert_eq!(ud_msg, 3, "Daily rollup should match hourly");
16649 assert_eq!(
16650 ud_api_cov, 1,
16651 "Daily api_coverage should be 1 (only assistant msg has real API data)"
16652 );
16653
16654 let api_only_input: i64 = conn
16656 .query_row_map(
16657 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE day_id = ? AND api_data_source = 'api'",
16658 fparams![expected_day],
16659 |row: &FrankenRow| row.get_typed::<i64>(0),
16660 )
16661 .unwrap();
16662 assert_eq!(
16663 api_only_input, 100,
16664 "Only API-sourced input tokens should be 100"
16665 );
16666
16667 let mm_total_content_est: i64 = conn
16669 .query_row_map(
16670 "SELECT SUM(content_tokens_est) FROM message_metrics WHERE day_id = ?",
16671 fparams![expected_day],
16672 |row| row.get_typed::<i64>(0),
16673 )
16674 .unwrap();
16675 let mm_plan_content_est: i64 = conn
16676 .query_row_map(
16677 "SELECT COALESCE(SUM(content_tokens_est), 0) FROM message_metrics WHERE day_id = ? AND has_plan = 1",
16678 fparams![expected_day],
16679 |row: &FrankenRow| row.get_typed::<i64>(0),
16680 )
16681 .unwrap();
16682 let mm_plan_api_total: i64 = conn
16683 .query_row_map(
16684 "SELECT COALESCE(SUM(COALESCE(api_input_tokens, 0) + COALESCE(api_output_tokens, 0) + COALESCE(api_cache_read_tokens, 0) + COALESCE(api_cache_creation_tokens, 0) + COALESCE(api_thinking_tokens, 0)), 0)
16685 FROM message_metrics WHERE day_id = ? AND has_plan = 1 AND api_data_source = 'api'",
16686 fparams![expected_day],
16687 |row: &FrankenRow| row.get_typed::<i64>(0),
16688 )
16689 .unwrap();
16690 let ud_content_est: i64 = conn
16691 .query_row_map(
16692 "SELECT content_tokens_est_total FROM usage_daily WHERE day_id = ?",
16693 fparams![expected_day],
16694 |row| row.get_typed::<i64>(0),
16695 )
16696 .unwrap();
16697 let (ud_plan_content_est, ud_plan_api_total): (i64, i64) = conn
16698 .query_row_map(
16699 "SELECT plan_content_tokens_est_total, plan_api_tokens_total FROM usage_daily WHERE day_id = ?",
16700 fparams![expected_day],
16701 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?)),
16702 )
16703 .unwrap();
16704 assert_eq!(
16705 mm_total_content_est, ud_content_est,
16706 "Daily rollup content_tokens_est_total must equal SUM of message_metrics"
16707 );
16708 assert_eq!(
16709 mm_plan_content_est, ud_plan_content_est,
16710 "Daily rollup plan_content_tokens_est_total must equal planned message_metrics content sum"
16711 );
16712 assert_eq!(
16713 mm_plan_api_total, ud_plan_api_total,
16714 "Daily rollup plan_api_tokens_total must equal planned message_metrics API token sum"
16715 );
16716
16717 let (claude_msg, claude_user, claude_asst, claude_api_total, claude_api_cov): (
16719 i64,
16720 i64,
16721 i64,
16722 i64,
16723 i64,
16724 ) = conn
16725 .query_row_map(
16726 "SELECT message_count, user_message_count, assistant_message_count, api_tokens_total, api_coverage_message_count
16727 FROM usage_models_daily
16728 WHERE day_id = ? AND model_family = 'claude' AND model_tier = 'opus'",
16729 fparams![expected_day],
16730 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?, row.get_typed(3)?, row.get_typed(4)?)),
16731 )
16732 .unwrap();
16733 assert_eq!(claude_msg, 1);
16734 assert_eq!(claude_user, 0);
16735 assert_eq!(claude_asst, 1);
16736 assert_eq!(claude_api_total, 380);
16737 assert_eq!(claude_api_cov, 1);
16738
16739 let unknown_msg: i64 = conn
16740 .query_row_map(
16741 "SELECT message_count FROM usage_models_daily
16742 WHERE day_id = ? AND model_family = 'unknown' AND model_tier = 'unknown'",
16743 fparams![expected_day],
16744 |row| row.get_typed(0),
16745 )
16746 .unwrap();
16747 assert_eq!(
16748 unknown_msg, 2,
16749 "user messages should land in unknown model bucket"
16750 );
16751 }
16752
16753 #[test]
16754 fn has_plan_heuristic_detects_plans() {
16755 assert!(has_plan_heuristic(
16756 "## Plan\n\n1. First step\n2. Second step"
16757 ));
16758 assert!(has_plan_heuristic(
16759 "# Plan\nHere is what we will do:\n1. Step one\n2. Step two"
16760 ));
16761 assert!(has_plan_heuristic(
16762 "Plan:\n- Gather baseline\n- Implement changes\n- Validate with tests"
16763 ));
16764 assert!(has_plan_heuristic(
16765 "Next steps:\n1. Update schema\n2. Rebuild rollups"
16766 ));
16767 assert!(!has_plan_heuristic("Hello world"));
16768 assert!(!has_plan_heuristic("Short"));
16769 assert!(!has_plan_heuristic(
16770 "This is a regular message without plans"
16771 ));
16772 assert!(!has_plan_heuristic(
16773 "```json\n{\"tool\":\"shell\",\"stdout\":\"1. install\\n2. run\"}\n```"
16774 ));
16775 }
16776
16777 #[test]
16778 fn has_plan_for_role_only_counts_assistant_messages() {
16779 let plan_text = "## Plan\n1. First\n2. Second";
16780 assert!(has_plan_for_role("assistant", plan_text));
16781 assert!(has_plan_for_role("agent", plan_text));
16782 assert!(has_plan_for_role("Assistant", plan_text));
16783 assert!(!has_plan_for_role("user", plan_text));
16784 assert!(!has_plan_for_role("tool", plan_text));
16785 }
16786
16787 #[test]
16788 fn api_rollups_require_api_data_source() {
16789 let mut agg = AnalyticsRollupAggregator::new();
16790
16791 let estimated_plan = MessageMetricsEntry {
16792 message_id: 1,
16793 created_at_ms: 0,
16794 hour_id: 1,
16795 day_id: 1,
16796 agent_slug: "codex".into(),
16797 workspace_id: 0,
16798 source_id: "local".into(),
16799 role: "assistant".into(),
16800 content_chars: 120,
16801 content_tokens_est: 30,
16802 model_name: None,
16803 model_family: "unknown".into(),
16804 model_tier: "unknown".into(),
16805 provider: "unknown".into(),
16806 api_input_tokens: Some(100),
16807 api_output_tokens: Some(50),
16808 api_cache_read_tokens: Some(0),
16809 api_cache_creation_tokens: Some(0),
16810 api_thinking_tokens: Some(0),
16811 api_service_tier: None,
16812 api_data_source: "estimated".into(),
16813 tool_call_count: 0,
16814 has_tool_calls: false,
16815 has_plan: true,
16816 };
16817 agg.record(&estimated_plan);
16818
16819 let api_plan = MessageMetricsEntry {
16820 message_id: 2,
16821 created_at_ms: 0,
16822 hour_id: 1,
16823 day_id: 1,
16824 agent_slug: "codex".into(),
16825 workspace_id: 0,
16826 source_id: "local".into(),
16827 role: "assistant".into(),
16828 content_chars: 80,
16829 content_tokens_est: 20,
16830 model_name: None,
16831 model_family: "unknown".into(),
16832 model_tier: "unknown".into(),
16833 provider: "unknown".into(),
16834 api_input_tokens: Some(40),
16835 api_output_tokens: Some(10),
16836 api_cache_read_tokens: Some(0),
16837 api_cache_creation_tokens: Some(0),
16838 api_thinking_tokens: Some(0),
16839 api_service_tier: None,
16840 api_data_source: "api".into(),
16841 tool_call_count: 0,
16842 has_tool_calls: false,
16843 has_plan: true,
16844 };
16845 agg.record(&api_plan);
16846
16847 let key = (1_i64, "codex".to_string(), 0_i64, "local".to_string());
16848 let hourly = agg.hourly.get(&key).expect("hourly rollup key must exist");
16849 let daily = agg.daily.get(&key).expect("daily rollup key must exist");
16850 let model_key = (
16851 1_i64,
16852 "codex".to_string(),
16853 0_i64,
16854 "local".to_string(),
16855 "unknown".to_string(),
16856 "unknown".to_string(),
16857 );
16858 let models_daily = agg
16859 .models_daily
16860 .get(&model_key)
16861 .expect("model rollup key must exist");
16862
16863 assert_eq!(hourly.plan_message_count, 2);
16865 assert_eq!(hourly.plan_content_tokens_est_total, 50);
16866 assert_eq!(hourly.plan_api_tokens_total, 50);
16868 assert_eq!(daily.plan_api_tokens_total, 50);
16869 assert_eq!(models_daily.plan_api_tokens_total, 50);
16870 assert_eq!(hourly.api_tokens_total, 50);
16872 assert_eq!(hourly.api_input_tokens_total, 40);
16873 assert_eq!(hourly.api_output_tokens_total, 10);
16874 assert_eq!(hourly.api_coverage_message_count, 1);
16875 assert_eq!(daily.api_tokens_total, 50);
16876 assert_eq!(models_daily.api_tokens_total, 50);
16877 }
16878
16879 #[test]
16880 fn has_plan_heuristic_curated_corpus_thresholds() {
16881 let positives = [
16883 "## Plan\n1. Inspect current schema\n2. Add migration\n3. Verify rebuild",
16884 "Plan:\n1) Reproduce\n2) Patch\n3) Add tests",
16885 "Implementation plan:\n- Parse inputs\n- Update rollups\n- Run checks",
16886 "Next steps:\n1. Reserve file\n2. Implement\n3. Report status",
16887 "# Plan\n1. Gather requirements\n2. Ship changes",
16888 "Action plan:\n- Identify root cause\n- Fix it\n- Validate",
16889 ];
16890
16891 let negatives = [
16893 "The plan is to move fast and fix things later.",
16894 "```json\n{\"tool\":\"shell\",\"stdout\":\"1. ls\\n2. cat\"}\n```",
16895 "stdout:\n1. Build started\n2. Build finished\nexit code: 0",
16896 "I can help with that request. Let me know if you want details.",
16897 "Here is a list:\n- apples\n- oranges",
16898 "Status update: completed tasks and blockers below.",
16899 ];
16900
16901 let tp = positives
16902 .iter()
16903 .filter(|msg| has_plan_heuristic(msg))
16904 .count();
16905 let fp = negatives
16906 .iter()
16907 .filter(|msg| has_plan_heuristic(msg))
16908 .count();
16909
16910 let recall = tp as f64 / positives.len() as f64;
16911 let false_positive_rate = fp as f64 / negatives.len() as f64;
16912
16913 assert!(
16914 recall >= 0.80,
16915 "plan heuristic recall too low: got {recall:.2}"
16916 );
16917 assert!(
16918 false_positive_rate <= 0.20,
16919 "plan heuristic false-positive rate too high: got {false_positive_rate:.2}"
16920 );
16921 }
16922
16923 #[test]
16924 fn rebuild_analytics_repopulates_from_messages() {
16925 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
16926 use std::path::PathBuf;
16927
16928 let dir = TempDir::new().unwrap();
16929 let db_path = dir.path().join("test.db");
16930 let storage = SqliteStorage::open(&db_path).unwrap();
16931
16932 let agent = Agent {
16934 id: None,
16935 slug: "claude_code".into(),
16936 name: "Claude Code".into(),
16937 version: Some("1.0".into()),
16938 kind: AgentKind::Cli,
16939 };
16940 let agent_id = storage.ensure_agent(&agent).unwrap();
16941
16942 let ts_ms = 1_770_551_400_000_i64;
16944 let expected_day = SqliteStorage::day_id_from_millis(ts_ms);
16945 let expected_hour = SqliteStorage::hour_id_from_millis(ts_ms);
16946
16947 let usage_json = serde_json::json!({
16948 "message": {
16949 "model": "claude-opus-4-6",
16950 "usage": {
16951 "input_tokens": 100,
16952 "output_tokens": 50,
16953 "cache_read_input_tokens": 200,
16954 "cache_creation_input_tokens": 30,
16955 "service_tier": "standard"
16956 }
16957 }
16958 });
16959
16960 let conv = Conversation {
16961 id: None,
16962 agent_slug: "claude_code".into(),
16963 workspace: None,
16964 external_id: Some("test-rebuild-1".into()),
16965 title: Some("Test conversation".into()),
16966 source_path: PathBuf::from("/tmp/test.jsonl"),
16967 started_at: Some(ts_ms),
16968 ended_at: Some(ts_ms + 60_000),
16969 approx_tokens: None,
16970 metadata_json: serde_json::Value::Null,
16971 messages: vec![
16972 Message {
16973 id: None,
16974 idx: 0,
16975 role: MessageRole::User,
16976 author: None,
16977 created_at: Some(ts_ms),
16978 content: "Hello, can you help me with a plan?".into(),
16979 extra_json: serde_json::Value::Null,
16980 snippets: vec![],
16981 },
16982 Message {
16983 id: None,
16984 idx: 1,
16985 role: MessageRole::Agent,
16986 author: None,
16987 created_at: Some(ts_ms + 30_000),
16988 content: "## Plan\n\n1. First step\n2. Second step\n3. Third step".into(),
16989 extra_json: usage_json,
16990 snippets: vec![],
16991 },
16992 Message {
16993 id: None,
16994 idx: 2,
16995 role: MessageRole::User,
16996 author: None,
16997 created_at: Some(ts_ms + 60_000),
16998 content: "Great, let's proceed!".into(),
16999 extra_json: serde_json::Value::Null,
17000 snippets: vec![],
17001 },
17002 ],
17003 source_id: "local".into(),
17004 origin_host: None,
17005 };
17006
17007 storage
17008 .insert_conversations_batched(&[(agent_id, None, &conv)])
17009 .unwrap();
17010
17011 let conn = storage.raw();
17013 let orig_mm: i64 = conn
17014 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
17015 row.get_typed(0)
17016 })
17017 .unwrap();
17018 let orig_hourly: i64 = conn
17019 .query_row_map("SELECT COUNT(*) FROM usage_hourly", &[], |row| {
17020 row.get_typed(0)
17021 })
17022 .unwrap();
17023 let orig_daily: i64 = conn
17024 .query_row_map("SELECT COUNT(*) FROM usage_daily", &[], |row| {
17025 row.get_typed(0)
17026 })
17027 .unwrap();
17028 let orig_models_daily: i64 = conn
17029 .query_row_map("SELECT COUNT(*) FROM usage_models_daily", &[], |row| {
17030 row.get_typed(0)
17031 })
17032 .unwrap();
17033 let orig_api_input: i64 = conn
17034 .query_row_map(
17035 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE api_data_source = 'api'",
17036 &[],
17037 |row: &FrankenRow| row.get_typed(0),
17038 )
17039 .unwrap();
17040
17041 assert_eq!(orig_mm, 3);
17042 assert!(orig_hourly > 0);
17043 assert!(orig_daily > 0);
17044 assert!(orig_models_daily > 0);
17045
17046 conn.execute("DELETE FROM message_metrics").unwrap();
17048 conn.execute("DELETE FROM usage_hourly").unwrap();
17049 conn.execute("DELETE FROM usage_daily").unwrap();
17050 conn.execute("DELETE FROM usage_models_daily").unwrap();
17051
17052 let zero: i64 = conn
17054 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
17055 row.get_typed(0)
17056 })
17057 .unwrap();
17058 assert_eq!(zero, 0);
17059
17060 let result = storage.rebuild_analytics().unwrap();
17062
17063 assert_eq!(result.message_metrics_rows, 3);
17064 assert!(result.usage_hourly_rows > 0);
17065 assert!(result.usage_daily_rows > 0);
17066 assert!(result.usage_models_daily_rows > 0);
17067 assert!(
17068 result.elapsed_ms < 10_000,
17069 "Rebuild should be fast for 3 msgs"
17070 );
17071
17072 let conn = storage.raw();
17074 let rebuilt_mm: i64 = conn
17075 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
17076 row.get_typed(0)
17077 })
17078 .unwrap();
17079 assert_eq!(
17080 rebuilt_mm, orig_mm,
17081 "Rebuilt message_metrics count should match"
17082 );
17083
17084 let rebuilt_hourly: i64 = conn
17085 .query_row_map("SELECT COUNT(*) FROM usage_hourly", &[], |row| {
17086 row.get_typed(0)
17087 })
17088 .unwrap();
17089 assert_eq!(
17090 rebuilt_hourly, orig_hourly,
17091 "Rebuilt hourly rows should match"
17092 );
17093
17094 let rebuilt_daily: i64 = conn
17095 .query_row_map("SELECT COUNT(*) FROM usage_daily", &[], |row| {
17096 row.get_typed(0)
17097 })
17098 .unwrap();
17099 assert_eq!(rebuilt_daily, orig_daily, "Rebuilt daily rows should match");
17100
17101 let rebuilt_models_daily: i64 = conn
17102 .query_row_map("SELECT COUNT(*) FROM usage_models_daily", &[], |row| {
17103 row.get_typed(0)
17104 })
17105 .unwrap();
17106 assert_eq!(
17107 rebuilt_models_daily, orig_models_daily,
17108 "Rebuilt model rollup rows should match"
17109 );
17110
17111 let rebuilt_api_input: i64 = conn
17113 .query_row_map(
17114 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE api_data_source = 'api'",
17115 &[],
17116 |row: &FrankenRow| row.get_typed(0),
17117 )
17118 .unwrap();
17119 assert_eq!(
17120 rebuilt_api_input, orig_api_input,
17121 "Rebuilt API input tokens should match original"
17122 );
17123
17124 let (uh_msg, uh_user, uh_asst, uh_plan, uh_plan_content, uh_plan_api): (
17126 i64,
17127 i64,
17128 i64,
17129 i64,
17130 i64,
17131 i64,
17132 ) = conn
17133 .query_row_map(
17134 "SELECT message_count, user_message_count, assistant_message_count, plan_message_count,
17135 plan_content_tokens_est_total, plan_api_tokens_total
17136 FROM usage_hourly WHERE hour_id = ?",
17137 fparams![expected_hour],
17138 |row: &FrankenRow| {
17139 Ok((
17140 row.get_typed(0)?,
17141 row.get_typed(1)?,
17142 row.get_typed(2)?,
17143 row.get_typed(3)?,
17144 row.get_typed(4)?,
17145 row.get_typed(5)?,
17146 ))
17147 },
17148 )
17149 .unwrap();
17150 assert_eq!(uh_msg, 3);
17151 assert_eq!(uh_user, 2);
17152 assert_eq!(uh_asst, 1);
17153 assert_eq!(uh_plan, 1);
17154 assert!(uh_plan_content > 0);
17155 assert!(uh_plan_api > 0);
17156
17157 let ud_msg: i64 = conn
17158 .query_row_map(
17159 "SELECT message_count FROM usage_daily WHERE day_id = ?",
17160 fparams![expected_day],
17161 |row| row.get_typed(0),
17162 )
17163 .unwrap();
17164 assert_eq!(ud_msg, 3);
17165 }
17166
17167 #[test]
17168 fn insert_conversations_batched_flushes_large_fts_batches() {
17169 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
17170 use std::path::PathBuf;
17171
17172 let dir = TempDir::new().unwrap();
17173 let db_path = dir.path().join("test.db");
17174 let storage = SqliteStorage::open(&db_path).unwrap();
17175 storage
17180 .ensure_search_fallback_fts_consistency()
17181 .expect("ensure FTS consistency before insert");
17182
17183 let agent = Agent {
17184 id: None,
17185 slug: "codex".into(),
17186 name: "Codex".into(),
17187 version: Some("0.2.3".into()),
17188 kind: AgentKind::Cli,
17189 };
17190 let agent_id = storage.ensure_agent(&agent).unwrap();
17191
17192 let content = "y".repeat((FTS_ENTRY_BATCH_MAX_CHARS / 2) + 1);
17193 let messages: Vec<_> = (0_i64..2)
17194 .map(|i| Message {
17195 id: None,
17196 idx: i,
17197 role: MessageRole::Agent,
17198 author: None,
17199 created_at: Some(1_700_000_000_000 + i),
17200 content: format!("{i}-{content}"),
17201 extra_json: serde_json::Value::Null,
17202 snippets: Vec::new(),
17203 })
17204 .collect();
17205 let conv = Conversation {
17206 id: None,
17207 agent_slug: "codex".into(),
17208 workspace: Some(PathBuf::from("/tmp/workspace")),
17209 external_id: Some("fts-large-batch".into()),
17210 title: Some("FTS Large Batch".into()),
17211 source_path: PathBuf::from("/tmp/rollout.jsonl"),
17212 started_at: Some(1_700_000_000_000),
17213 ended_at: Some(1_700_000_000_999),
17214 approx_tokens: None,
17215 metadata_json: serde_json::Value::Null,
17216 messages,
17217 source_id: "local".into(),
17218 origin_host: None,
17219 };
17220
17221 let outcomes = storage
17222 .insert_conversations_batched(&[(agent_id, None, &conv)])
17223 .unwrap();
17224 assert_eq!(outcomes.len(), 1);
17225 assert_eq!(outcomes[0].inserted_indices.len(), conv.messages.len());
17226
17227 let message_count: i64 = storage
17228 .conn
17229 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
17230 row.get_typed(0)
17231 })
17232 .unwrap();
17233 let fts_count: i64 = storage
17234 .conn
17235 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
17236 row.get_typed(0)
17237 })
17238 .unwrap();
17239
17240 assert_eq!(message_count, conv.messages.len() as i64);
17241 assert_eq!(fts_count, conv.messages.len() as i64);
17242 }
17243
17244 fn make_profiled_storage_remote_conversation(
17245 external_id: i64,
17246 msg_count: usize,
17247 ) -> Conversation {
17248 Conversation {
17249 id: None,
17250 agent_slug: "codex".into(),
17251 workspace: Some(PathBuf::from("/ws/profiled-storage-remote")),
17252 external_id: Some(format!("profiled-storage-remote-{external_id}")),
17253 title: Some(format!(
17254 "Profiled storage remote conversation {external_id}"
17255 )),
17256 source_path: PathBuf::from(format!("/log/profiled-storage-remote-{external_id}.jsonl")),
17257 started_at: Some(10_000 + external_id * 100),
17258 ended_at: Some(10_000 + external_id * 100 + msg_count as i64),
17259 approx_tokens: Some(msg_count as i64 * 32),
17260 metadata_json: serde_json::json!({ "bench": true }),
17261 messages: (0..msg_count)
17262 .map(|idx| Message {
17263 id: None,
17264 idx: idx as i64,
17265 role: if idx % 2 == 0 {
17266 MessageRole::User
17267 } else {
17268 MessageRole::Agent
17269 },
17270 author: Some("tester".into()),
17271 created_at: Some(20_000 + external_id * 100 + idx as i64),
17272 content: format!(
17273 "profiled storage remote content ext={external_id} idx={idx} {}",
17274 "x".repeat(64)
17275 ),
17276 extra_json: serde_json::json!({ "idx": idx }),
17277 snippets: Vec::new(),
17278 })
17279 .collect(),
17280 source_id: "profiled-storage-remote-source".into(),
17281 origin_host: Some("builder-profile".into()),
17282 }
17283 }
17284
17285 fn make_profiled_append_remote_merge_conversation(
17286 external_id: i64,
17287 msg_count: usize,
17288 ) -> Conversation {
17289 let base_ts = 100_000 + external_id * 1_000;
17290 Conversation {
17291 id: None,
17292 agent_slug: "codex".into(),
17293 workspace: Some(PathBuf::from("/ws/profiled-append-remote")),
17294 external_id: Some(format!("profiled-append-remote-{external_id}")),
17295 title: Some(format!("Profiled append remote conversation {external_id}")),
17296 source_path: PathBuf::from(format!("/log/profiled-append-remote-{external_id}.jsonl")),
17297 started_at: Some(base_ts),
17298 ended_at: Some(base_ts + msg_count as i64),
17299 approx_tokens: Some(msg_count as i64 * 50),
17300 metadata_json: serde_json::json!({ "bench": true }),
17301 messages: (0..msg_count)
17302 .map(|idx| Message {
17303 id: None,
17304 idx: idx as i64,
17305 role: if idx % 2 == 0 {
17306 MessageRole::User
17307 } else {
17308 MessageRole::Agent
17309 },
17310 author: Some(format!("model-{}", external_id % 5)),
17311 created_at: Some(base_ts + idx as i64),
17312 content: format!(
17313 "Profiled append remote conversation {} message {}: Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
17314 external_id, idx
17315 ),
17316 extra_json: serde_json::json!({ "bench": true }),
17317 snippets: Vec::new(),
17318 })
17319 .collect(),
17320 source_id: "profiled-append-remote-source".into(),
17321 origin_host: Some("builder-profile".into()),
17322 }
17323 }
17324
17325 #[test]
17326 fn insert_conversation_tree_batched_new_message_ids_match_snippet_rows() {
17327 let dir = TempDir::new().unwrap();
17328 let db_path = dir.path().join("batched-message-ids.db");
17329 let storage = SqliteStorage::open(&db_path).unwrap();
17330 let agent_id = storage
17331 .ensure_agent(&Agent {
17332 id: None,
17333 slug: "codex".into(),
17334 name: "Codex".into(),
17335 version: None,
17336 kind: AgentKind::Cli,
17337 })
17338 .unwrap();
17339 let workspace_id = storage
17340 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17341 .unwrap();
17342 let mut conv = make_profiled_storage_remote_conversation(42, 5);
17343 for (idx, msg) in conv.messages.iter_mut().enumerate() {
17344 msg.snippets.push(Snippet {
17345 id: None,
17346 file_path: Some(PathBuf::from(format!("src/file_{idx}.rs"))),
17347 start_line: Some((idx + 1) as i64),
17348 end_line: Some((idx + 2) as i64),
17349 language: Some("rust".into()),
17350 snippet_text: Some(format!("fn snippet_{idx}() {{}}")),
17351 });
17352 }
17353 let outcome = storage
17354 .insert_conversation_tree(agent_id, Some(workspace_id), &conv)
17355 .unwrap();
17356
17357 let message_count: i64 = storage
17358 .conn
17359 .query_row_map(
17360 "SELECT COUNT(*) FROM messages WHERE conversation_id = ?1",
17361 fparams![outcome.conversation_id],
17362 |row| row.get_typed(0),
17363 )
17364 .unwrap();
17365 let joined_snippet_count: i64 = storage
17366 .conn
17367 .query_row_map(
17368 "SELECT COUNT(*)
17369 FROM snippets s
17370 JOIN messages m ON s.message_id = m.id
17371 WHERE m.conversation_id = ?1",
17372 fparams![outcome.conversation_id],
17373 |row| row.get_typed(0),
17374 )
17375 .unwrap();
17376
17377 assert_eq!(message_count, conv.messages.len() as i64);
17378 assert_eq!(joined_snippet_count, conv.messages.len() as i64);
17379 }
17380
17381 #[test]
17382 fn insert_conversation_tree_batched_appended_message_ids_match_snippet_rows() {
17383 let dir = TempDir::new().unwrap();
17384 let db_path = dir.path().join("batched-append-message-ids.db");
17385 let storage = SqliteStorage::open(&db_path).unwrap();
17386 let agent_id = storage
17387 .ensure_agent(&Agent {
17388 id: None,
17389 slug: "codex".into(),
17390 name: "Codex".into(),
17391 version: None,
17392 kind: AgentKind::Cli,
17393 })
17394 .unwrap();
17395 let workspace_id = storage
17396 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17397 .unwrap();
17398
17399 let mut initial = make_profiled_storage_remote_conversation(77, 2);
17400 for (idx, msg) in initial.messages.iter_mut().enumerate() {
17401 msg.snippets.push(Snippet {
17402 id: None,
17403 file_path: Some(PathBuf::from(format!("src/append_initial_{idx}.rs"))),
17404 start_line: Some((idx + 1) as i64),
17405 end_line: Some((idx + 2) as i64),
17406 language: Some("rust".into()),
17407 snippet_text: Some(format!("fn append_initial_{idx}() {{}}")),
17408 });
17409 }
17410 let first = storage
17411 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
17412 .unwrap();
17413 assert_eq!(first.inserted_indices, vec![0, 1]);
17414
17415 let mut appended = make_profiled_storage_remote_conversation(77, 5);
17416 for (idx, msg) in appended.messages.iter_mut().enumerate() {
17417 msg.snippets.push(Snippet {
17418 id: None,
17419 file_path: Some(PathBuf::from(format!("src/append_full_{idx}.rs"))),
17420 start_line: Some((idx + 10) as i64),
17421 end_line: Some((idx + 11) as i64),
17422 language: Some("rust".into()),
17423 snippet_text: Some(format!("fn append_full_{idx}() {{}}")),
17424 });
17425 }
17426 let second = storage
17427 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
17428 .unwrap();
17429 assert_eq!(second.conversation_id, first.conversation_id);
17430 assert_eq!(second.inserted_indices, vec![2, 3, 4]);
17431
17432 let message_count: i64 = storage
17433 .conn
17434 .query_row_map(
17435 "SELECT COUNT(*) FROM messages WHERE conversation_id = ?1",
17436 fparams![first.conversation_id],
17437 |row| row.get_typed(0),
17438 )
17439 .unwrap();
17440 let joined_snippets: Vec<(i64, String)> = storage
17441 .conn
17442 .query_map_collect(
17443 "SELECT m.idx, s.file_path
17444 FROM snippets s
17445 JOIN messages m ON s.message_id = m.id
17446 WHERE m.conversation_id = ?1
17447 ORDER BY m.idx, s.id",
17448 fparams![first.conversation_id],
17449 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
17450 )
17451 .unwrap();
17452
17453 assert_eq!(message_count, 5);
17454 assert_eq!(
17455 joined_snippets,
17456 vec![
17457 (0, "src/append_initial_0.rs".to_string()),
17458 (1, "src/append_initial_1.rs".to_string()),
17459 (2, "src/append_full_2.rs".to_string()),
17460 (3, "src/append_full_3.rs".to_string()),
17461 (4, "src/append_full_4.rs".to_string()),
17462 ]
17463 );
17464 }
17465
17466 #[test]
17467 fn insert_conversation_tree_rehydrates_external_lookup_after_manual_clear() {
17468 let dir = TempDir::new().unwrap();
17469 let db_path = dir.path().join("external-lookup-rehydrate.db");
17470 let storage = SqliteStorage::open(&db_path).unwrap();
17471 let agent_id = storage
17472 .ensure_agent(&Agent {
17473 id: None,
17474 slug: "codex".into(),
17475 name: "Codex".into(),
17476 version: None,
17477 kind: AgentKind::Cli,
17478 })
17479 .unwrap();
17480 let workspace_id = storage
17481 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17482 .unwrap();
17483
17484 let initial = make_profiled_storage_remote_conversation(88, 2);
17485 let first = storage
17486 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
17487 .unwrap();
17488 let external_id = initial.external_id.as_deref().unwrap();
17489 let lookup_key =
17490 conversation_external_lookup_key(&initial.source_id, agent_id, external_id);
17491 let lookup_id: i64 = storage
17492 .conn
17493 .query_row_map(
17494 "SELECT conversation_id
17495 FROM conversation_external_tail_lookup
17496 WHERE lookup_key = ?1",
17497 fparams![lookup_key.as_str()],
17498 |row| row.get_typed(0),
17499 )
17500 .unwrap();
17501 assert_eq!(lookup_id, first.conversation_id);
17502
17503 storage
17504 .conn
17505 .execute_compat(
17506 "DELETE FROM conversation_external_tail_lookup WHERE lookup_key = ?1",
17507 fparams![lookup_key.as_str()],
17508 )
17509 .unwrap();
17510
17511 let appended = make_profiled_storage_remote_conversation(88, 4);
17512 let second = storage
17513 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
17514 .unwrap();
17515 assert_eq!(second.conversation_id, first.conversation_id);
17516 assert_eq!(second.inserted_indices, vec![2, 3]);
17517
17518 let conversation_count: i64 = storage
17519 .conn
17520 .query_row_map(
17521 "SELECT COUNT(*)
17522 FROM conversations
17523 WHERE source_id = ?1 AND agent_id = ?2 AND external_id = ?3",
17524 fparams![initial.source_id.as_str(), agent_id, external_id],
17525 |row| row.get_typed(0),
17526 )
17527 .unwrap();
17528 let restored_lookup: (i64, Option<i64>, Option<i64>, Option<i64>) = storage
17529 .conn
17530 .query_row_map(
17531 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
17532 FROM conversation_external_tail_lookup
17533 WHERE lookup_key = ?1",
17534 fparams![lookup_key.as_str()],
17535 |row| {
17536 Ok((
17537 row.get_typed(0)?,
17538 row.get_typed(1)?,
17539 row.get_typed(2)?,
17540 row.get_typed(3)?,
17541 ))
17542 },
17543 )
17544 .unwrap();
17545 let tail_state: (Option<i64>, Option<i64>, Option<i64>) = storage
17546 .conn
17547 .query_row_map(
17548 "SELECT ended_at, last_message_idx, last_message_created_at
17549 FROM conversation_tail_state
17550 WHERE conversation_id = ?1",
17551 fparams![first.conversation_id],
17552 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
17553 )
17554 .unwrap();
17555 assert_eq!(conversation_count, 1);
17556 assert_eq!(
17557 restored_lookup,
17558 (
17559 first.conversation_id,
17560 tail_state.0,
17561 tail_state.1,
17562 tail_state.2
17563 )
17564 );
17565 assert_eq!(
17566 tail_state,
17567 (
17568 appended.messages[3].created_at,
17569 Some(3),
17570 appended.messages[3].created_at
17571 )
17572 );
17573 }
17574
17575 #[test]
17576 fn insert_conversation_tree_recreates_daily_stats_after_manual_clear() {
17577 let dir = TempDir::new().unwrap();
17578 let db_path = dir.path().join("test.db");
17579 let storage = SqliteStorage::open(&db_path).unwrap();
17580 let agent_id = storage
17581 .ensure_agent(&Agent {
17582 id: None,
17583 slug: "codex".into(),
17584 name: "Codex".into(),
17585 version: None,
17586 kind: AgentKind::Cli,
17587 })
17588 .unwrap();
17589 let workspace = PathBuf::from("/ws/profiled-storage-remote");
17590 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17591
17592 storage
17593 .insert_conversation_tree(
17594 agent_id,
17595 Some(workspace_id),
17596 &make_profiled_storage_remote_conversation(0, 3),
17597 )
17598 .unwrap();
17599 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17600
17601 storage
17602 .insert_conversation_tree(
17603 agent_id,
17604 Some(workspace_id),
17605 &make_profiled_storage_remote_conversation(1, 2),
17606 )
17607 .unwrap();
17608
17609 let row_count: i64 = storage
17610 .conn
17611 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
17612 row.get_typed(0)
17613 })
17614 .unwrap();
17615 let (session_count, message_count): (i64, i64) = storage
17616 .conn
17617 .query_row_map(
17618 "SELECT session_count, message_count
17619 FROM daily_stats
17620 WHERE agent_slug = 'all' AND source_id = 'all'",
17621 fparams![],
17622 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
17623 )
17624 .unwrap();
17625
17626 assert_eq!(row_count, 4);
17627 assert_eq!(session_count, 1);
17628 assert_eq!(message_count, 2);
17629 }
17630
17631 #[test]
17632 #[serial]
17633 fn insert_conversation_tree_stage_profile_tracks_steady_state_remote_reuse() {
17634 let _defer_guard = set_env_var("CASS_DEFER_LEXICAL_UPDATES", "0");
17635
17636 for &(msg_count, iterations) in &[(5usize, 80usize), (20, 50), (50, 24)] {
17637 let dir = TempDir::new().unwrap();
17638 let db_path = dir.path().join(format!("profile-{msg_count}.db"));
17639 let storage = SqliteStorage::open(&db_path).unwrap();
17640 let agent_id = storage
17641 .ensure_agent(&Agent {
17642 id: None,
17643 slug: "codex".into(),
17644 name: "Codex".into(),
17645 version: None,
17646 kind: AgentKind::Cli,
17647 })
17648 .unwrap();
17649 let workspace = PathBuf::from("/ws/profiled-storage-remote");
17650 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17651
17652 storage
17653 .insert_conversation_tree(
17654 agent_id,
17655 Some(workspace_id),
17656 &make_profiled_storage_remote_conversation(0, msg_count),
17657 )
17658 .unwrap();
17659
17660 let mut profile = InsertConversationTreePerfProfile::default();
17661 for external_id in 1..=iterations {
17662 storage
17663 .insert_conversation_tree_with_profile(
17664 agent_id,
17665 Some(workspace_id),
17666 &make_profiled_storage_remote_conversation(external_id as i64, msg_count),
17667 &mut profile,
17668 )
17669 .unwrap();
17670 }
17671
17672 let accounted_duration = profile.source_duration
17673 + profile.tx_open_duration
17674 + profile.existing_lookup_duration
17675 + profile.conversation_row_duration
17676 + profile.message_insert_duration
17677 + profile.snippet_insert_duration
17678 + profile.fts_entry_duration
17679 + profile.fts_flush_duration
17680 + profile.analytics_duration
17681 + profile.commit_duration;
17682 assert_eq!(profile.invocations, iterations);
17683 assert_eq!(profile.messages, iterations * msg_count);
17684 assert_eq!(profile.inserted_messages, iterations * msg_count);
17685 assert!(
17686 profile.total_duration >= accounted_duration,
17687 "accounted stage durations cannot exceed total duration"
17688 );
17689
17690 profile.log_summary(&format!("remote_reuse_{msg_count}_msgs"));
17691 }
17692 }
17693
17694 #[test]
17695 #[serial]
17696 fn insert_conversation_tree_stage_profile_tracks_append_remote_source_merge() {
17697 let _defer_guard = set_env_var("CASS_DEFER_LEXICAL_UPDATES", "0");
17698
17699 for &(msg_count, iterations) in &[(5usize, 80usize), (20, 50), (50, 24)] {
17700 let dir = TempDir::new().unwrap();
17701 let db_path = dir.path().join(format!("append-profile-{msg_count}.db"));
17702 let storage = SqliteStorage::open(&db_path).unwrap();
17703 let agent_id = storage
17704 .ensure_agent(&Agent {
17705 id: None,
17706 slug: "codex".into(),
17707 name: "Codex".into(),
17708 version: None,
17709 kind: AgentKind::Cli,
17710 })
17711 .unwrap();
17712 let workspace = PathBuf::from("/ws/profiled-append-remote");
17713 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17714
17715 for external_id in 0..iterations {
17716 storage
17717 .insert_conversation_tree(
17718 agent_id,
17719 Some(workspace_id),
17720 &make_profiled_append_remote_merge_conversation(
17721 external_id as i64,
17722 msg_count,
17723 ),
17724 )
17725 .unwrap();
17726 }
17727
17728 let mut profile = InsertConversationTreePerfProfile::default();
17729 for external_id in 0..iterations {
17730 storage
17731 .append_existing_conversation_with_profile(
17732 agent_id,
17733 Some(workspace_id),
17734 &make_profiled_append_remote_merge_conversation(
17735 external_id as i64,
17736 msg_count * 2,
17737 ),
17738 &mut profile,
17739 )
17740 .unwrap();
17741 }
17742
17743 let accounted_duration = profile.source_duration
17744 + profile.tx_open_duration
17745 + profile.existing_lookup_duration
17746 + profile.existing_idx_lookup_duration
17747 + profile.existing_replay_lookup_duration
17748 + profile.dedupe_filter_duration
17749 + profile.conversation_row_duration
17750 + profile.message_insert_duration
17751 + profile.snippet_insert_duration
17752 + profile.fts_entry_duration
17753 + profile.fts_flush_duration
17754 + profile.analytics_duration
17755 + profile.commit_duration;
17756 assert_eq!(profile.invocations, iterations);
17757 assert_eq!(profile.messages, iterations * msg_count * 2);
17758 assert_eq!(profile.inserted_messages, iterations * msg_count);
17759 assert!(
17760 profile.total_duration >= accounted_duration,
17761 "accounted append stage durations cannot exceed total duration"
17762 );
17763
17764 profile.log_summary(&format!("append_remote_merge_{msg_count}_msgs"));
17765 }
17766 }
17767
17768 #[test]
17769 fn rebuild_daily_stats_recomputes_materialized_totals_without_monolithic_group_by() {
17770 let dir = TempDir::new().unwrap();
17771 let db_path = dir.path().join("test.db");
17772 let storage = SqliteStorage::open(&db_path).unwrap();
17773 let started_at = 1_700_000_000_000_i64;
17774 let day_id = FrankenStorage::day_id_from_millis(started_at);
17775 let hour_id = FrankenStorage::hour_id_from_millis(started_at);
17776
17777 storage
17778 .conn
17779 .execute_compat(
17780 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17781 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17782 fparams![1_i64, "codex", "Codex", "cli"],
17783 )
17784 .unwrap();
17785 storage
17786 .conn
17787 .execute_compat(
17788 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17789 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17790 fparams![2_i64, "claude", "Claude", "cli"],
17791 )
17792 .unwrap();
17793
17794 storage
17795 .conn
17796 .execute_compat(
17797 "INSERT INTO conversations (
17798 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17799 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17800 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, ?8, NULL, ?9, NULL, NULL)",
17801 fparams![
17802 1_i64,
17803 1_i64,
17804 LOCAL_SOURCE_ID,
17805 "daily-a",
17806 "Daily A",
17807 "/tmp/daily-a.jsonl",
17808 started_at,
17809 started_at + 200,
17810 "{}"
17811 ],
17812 )
17813 .unwrap();
17814 storage
17815 .conn
17816 .execute_compat(
17817 "INSERT INTO conversations (
17818 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17819 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17820 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, ?8, NULL, ?9, NULL, NULL)",
17821 fparams![
17822 2_i64,
17823 2_i64,
17824 LOCAL_SOURCE_ID,
17825 "daily-b",
17826 "Daily B",
17827 "/tmp/daily-b.jsonl",
17828 started_at,
17829 started_at + 300,
17830 "{}"
17831 ],
17832 )
17833 .unwrap();
17834
17835 storage
17836 .conn
17837 .execute_compat(
17838 "INSERT INTO messages (
17839 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17840 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17841 fparams![1_i64, 1_i64, 0_i64, "user", started_at, "hello"],
17842 )
17843 .unwrap();
17844 storage
17845 .conn
17846 .execute_compat(
17847 "INSERT INTO messages (
17848 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17849 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17850 fparams![2_i64, 1_i64, 1_i64, "assistant", started_at + 100, "response"],
17851 )
17852 .unwrap();
17853 storage
17854 .conn
17855 .execute_compat(
17856 "INSERT INTO messages (
17857 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17858 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17859 fparams![3_i64, 2_i64, 0_i64, "user", started_at + 50, "abc"],
17860 )
17861 .unwrap();
17862
17863 for (message_id, agent_slug, role, content_len) in [
17864 (1_i64, "codex", "user", 5_i64),
17865 (2_i64, "codex", "assistant", 8_i64),
17866 (3_i64, "claude", "user", 3_i64),
17867 ] {
17868 storage
17869 .conn
17870 .execute_compat(
17871 "INSERT INTO message_metrics (
17872 message_id, created_at_ms, hour_id, day_id, agent_slug, workspace_id, source_id,
17873 role, content_chars, content_tokens_est, api_input_tokens, api_output_tokens,
17874 api_cache_read_tokens, api_cache_creation_tokens, api_thinking_tokens,
17875 api_service_tier, api_data_source, tool_call_count, has_tool_calls, has_plan,
17876 model_name, model_family, model_tier, provider
17877 ) VALUES (
17878 ?1, ?2, ?3, ?4, ?5, ?6, ?7,
17879 ?8, ?9, ?10, ?11, ?12,
17880 ?13, ?14, ?15,
17881 ?16, ?17, ?18, ?19, ?20,
17882 ?21, ?22, ?23, ?24
17883 )",
17884 fparams![
17885 message_id,
17886 started_at,
17887 hour_id,
17888 day_id,
17889 agent_slug,
17890 0_i64,
17891 LOCAL_SOURCE_ID,
17892 role,
17893 content_len,
17894 content_len / 4,
17895 0_i64,
17896 0_i64,
17897 0_i64,
17898 0_i64,
17899 0_i64,
17900 "",
17901 "estimated",
17902 0_i64,
17903 0_i64,
17904 0_i64,
17905 "",
17906 "unknown",
17907 "unknown",
17908 "unknown"
17909 ],
17910 )
17911 .unwrap();
17912 }
17913
17914 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17915
17916 let rebuilt = storage.rebuild_daily_stats().unwrap();
17917 assert_eq!(rebuilt.total_sessions, 2);
17918
17919 let health = storage.daily_stats_health().unwrap();
17920 assert_eq!(health.conversation_count, 2);
17921 assert_eq!(health.materialized_total, 2);
17922 assert_eq!(health.drift, 0);
17923
17924 let total_messages: i64 = storage
17925 .conn
17926 .query_row_map(
17927 "SELECT message_count FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
17928 fparams![],
17929 |row| row.get_typed(0),
17930 )
17931 .unwrap();
17932 assert_eq!(total_messages, 3);
17933 }
17934
17935 #[test]
17936 fn rebuild_daily_stats_preserves_byte_counts_with_message_metrics() {
17937 let dir = TempDir::new().unwrap();
17938 let db_path = dir.path().join("test.db");
17939 let storage = SqliteStorage::open(&db_path).unwrap();
17940
17941 let content = "ASCII🙂é漢字";
17942 let expected_bytes = content.len() as i64;
17943 let started_at = 1_704_067_200_000_i64;
17944 let day_id = FrankenStorage::day_id_from_millis(started_at);
17945 let hour_id = FrankenStorage::hour_id_from_millis(started_at);
17946
17947 storage
17948 .conn
17949 .execute_compat(
17950 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17951 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17952 fparams![1_i64, "tester", "Tester", "cli"],
17953 )
17954 .unwrap();
17955 storage
17956 .conn
17957 .execute_compat(
17958 "INSERT INTO conversations (
17959 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17960 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17961 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, NULL, NULL, ?8, NULL, NULL)",
17962 fparams![
17963 1_i64,
17964 1_i64,
17965 LOCAL_SOURCE_ID,
17966 "unicode-metrics",
17967 "Unicode Metrics",
17968 "/tmp/unicode-metrics.jsonl",
17969 started_at,
17970 "{}"
17971 ],
17972 )
17973 .unwrap();
17974 storage
17975 .conn
17976 .execute_compat(
17977 "INSERT INTO messages (
17978 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17979 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17980 fparams![1_i64, 1_i64, 0_i64, "user", started_at, content],
17981 )
17982 .unwrap();
17983 storage
17984 .conn
17985 .execute_compat(
17986 "INSERT INTO message_metrics (
17987 message_id, created_at_ms, hour_id, day_id, agent_slug, workspace_id, source_id,
17988 role, content_chars, content_tokens_est, api_input_tokens, api_output_tokens,
17989 api_cache_read_tokens, api_cache_creation_tokens, api_thinking_tokens,
17990 api_service_tier, api_data_source, tool_call_count, has_tool_calls, has_plan,
17991 model_name, model_family, model_tier, provider
17992 ) VALUES (
17993 ?1, ?2, ?3, ?4, ?5, ?6, ?7,
17994 ?8, ?9, ?10, ?11, ?12,
17995 ?13, ?14, ?15,
17996 ?16, ?17, ?18, ?19, ?20,
17997 ?21, ?22, ?23, ?24
17998 )",
17999 fparams![
18000 1_i64,
18001 started_at,
18002 hour_id,
18003 day_id,
18004 "tester",
18005 0_i64,
18006 LOCAL_SOURCE_ID,
18007 "user",
18008 expected_bytes,
18009 expected_bytes / 4,
18010 0_i64,
18011 0_i64,
18012 0_i64,
18013 0_i64,
18014 0_i64,
18015 "",
18016 "estimated",
18017 0_i64,
18018 0_i64,
18019 0_i64,
18020 "",
18021 "unknown",
18022 "unknown",
18023 "unknown"
18024 ],
18025 )
18026 .unwrap();
18027
18028 let mut tx = storage.conn.transaction().unwrap();
18029 franken_update_daily_stats_in_tx(
18030 &storage,
18031 &tx,
18032 "tester",
18033 LOCAL_SOURCE_ID,
18034 Some(started_at),
18035 StatsDelta {
18036 session_count_delta: 1,
18037 message_count_delta: 1,
18038 total_chars_delta: expected_bytes,
18039 },
18040 )
18041 .unwrap();
18042 tx.commit().unwrap();
18043
18044 let inline_total: i64 = storage
18045 .conn
18046 .query_row_map(
18047 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
18048 fparams![],
18049 |row| row.get_typed(0),
18050 )
18051 .unwrap();
18052 assert_eq!(inline_total, expected_bytes);
18053
18054 storage.conn.execute("DELETE FROM daily_stats").unwrap();
18055
18056 let rebuilt = storage.rebuild_daily_stats().unwrap();
18057 assert_eq!(rebuilt.total_sessions, 1);
18058
18059 let rebuilt_total: i64 = storage
18060 .conn
18061 .query_row_map(
18062 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
18063 fparams![],
18064 |row| row.get_typed(0),
18065 )
18066 .unwrap();
18067 assert_eq!(rebuilt_total, expected_bytes);
18068 }
18069
18070 #[test]
18071 fn rebuild_daily_stats_raw_fallback_preserves_byte_counts() {
18072 let dir = TempDir::new().unwrap();
18073 let db_path = dir.path().join("test.db");
18074 let storage = SqliteStorage::open(&db_path).unwrap();
18075
18076 let content = "fallback🙂é漢字";
18077 let expected_bytes = content.len() as i64;
18078 let started_at = 1_704_067_200_000_i64;
18079 storage
18080 .conn
18081 .execute_compat(
18082 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
18083 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
18084 fparams![1_i64, "tester", "Tester", "cli"],
18085 )
18086 .unwrap();
18087 storage
18088 .conn
18089 .execute_compat(
18090 "INSERT INTO conversations (
18091 id, agent_id, workspace_id, source_id, external_id, title, source_path,
18092 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
18093 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, NULL, NULL, ?8, NULL, NULL)",
18094 fparams![
18095 1_i64,
18096 1_i64,
18097 LOCAL_SOURCE_ID,
18098 "unicode-fallback",
18099 "Unicode Fallback",
18100 "/tmp/unicode-fallback.jsonl",
18101 started_at,
18102 "{}"
18103 ],
18104 )
18105 .unwrap();
18106 storage
18107 .conn
18108 .execute_compat(
18109 "INSERT INTO messages (
18110 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
18111 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
18112 fparams![1_i64, 1_i64, 0_i64, "assistant", started_at, content],
18113 )
18114 .unwrap();
18115
18116 let mut tx = storage.conn.transaction().unwrap();
18117 franken_update_daily_stats_in_tx(
18118 &storage,
18119 &tx,
18120 "tester",
18121 LOCAL_SOURCE_ID,
18122 Some(started_at),
18123 StatsDelta {
18124 session_count_delta: 1,
18125 message_count_delta: 1,
18126 total_chars_delta: expected_bytes,
18127 },
18128 )
18129 .unwrap();
18130 tx.commit().unwrap();
18131
18132 storage.conn.execute("DELETE FROM daily_stats").unwrap();
18133
18134 let rebuilt = storage.rebuild_daily_stats().unwrap();
18135 assert_eq!(rebuilt.total_sessions, 1);
18136
18137 let rebuilt_total: i64 = storage
18138 .conn
18139 .query_row_map(
18140 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
18141 fparams![],
18142 |row| row.get_typed(0),
18143 )
18144 .unwrap();
18145 assert_eq!(rebuilt_total, expected_bytes);
18146 }
18147
18148 #[test]
18149 fn insert_conversations_batched_appends_duplicate_external_id() {
18150 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18151 use std::path::PathBuf;
18152
18153 let dir = TempDir::new().unwrap();
18154 let db_path = dir.path().join("test.db");
18155 let storage = SqliteStorage::open(&db_path).unwrap();
18156
18157 let agent = Agent {
18158 id: None,
18159 slug: "codex".into(),
18160 name: "Codex".into(),
18161 version: Some("0.2.3".into()),
18162 kind: AgentKind::Cli,
18163 };
18164 let agent_id = storage.ensure_agent(&agent).unwrap();
18165
18166 let base_conv = |messages: Vec<Message>| Conversation {
18167 id: None,
18168 agent_slug: "codex".into(),
18169 workspace: Some(PathBuf::from("/tmp/workspace")),
18170 external_id: Some("shared-session".into()),
18171 title: Some("Shared Session".into()),
18172 source_path: PathBuf::from("/tmp/rollout.jsonl"),
18173 started_at: Some(1_700_000_000_000),
18174 ended_at: Some(1_700_000_000_999),
18175 approx_tokens: None,
18176 metadata_json: serde_json::Value::Null,
18177 messages,
18178 source_id: "local".into(),
18179 origin_host: None,
18180 };
18181
18182 let conv_a = base_conv(vec![
18183 Message {
18184 id: None,
18185 idx: 0,
18186 role: MessageRole::User,
18187 author: None,
18188 created_at: Some(1_700_000_000_000),
18189 content: "first".into(),
18190 extra_json: serde_json::Value::Null,
18191 snippets: Vec::new(),
18192 },
18193 Message {
18194 id: None,
18195 idx: 1,
18196 role: MessageRole::Agent,
18197 author: None,
18198 created_at: Some(1_700_000_000_100),
18199 content: "second".into(),
18200 extra_json: serde_json::Value::Null,
18201 snippets: Vec::new(),
18202 },
18203 ]);
18204 let conv_b = base_conv(vec![
18205 Message {
18206 id: None,
18207 idx: 0,
18208 role: MessageRole::User,
18209 author: None,
18210 created_at: Some(1_700_000_000_000),
18211 content: "first".into(),
18212 extra_json: serde_json::Value::Null,
18213 snippets: Vec::new(),
18214 },
18215 Message {
18216 id: None,
18217 idx: 1,
18218 role: MessageRole::Agent,
18219 author: None,
18220 created_at: Some(1_700_000_000_100),
18221 content: "second".into(),
18222 extra_json: serde_json::Value::Null,
18223 snippets: Vec::new(),
18224 },
18225 Message {
18226 id: None,
18227 idx: 2,
18228 role: MessageRole::User,
18229 author: None,
18230 created_at: Some(1_700_000_000_200),
18231 content: "third".into(),
18232 extra_json: serde_json::Value::Null,
18233 snippets: Vec::new(),
18234 },
18235 Message {
18236 id: None,
18237 idx: 3,
18238 role: MessageRole::Agent,
18239 author: None,
18240 created_at: Some(1_700_000_000_300),
18241 content: "fourth".into(),
18242 extra_json: serde_json::Value::Null,
18243 snippets: Vec::new(),
18244 },
18245 ]);
18246
18247 let outcomes = storage
18248 .insert_conversations_batched(&[(agent_id, None, &conv_a), (agent_id, None, &conv_b)])
18249 .unwrap();
18250 assert_eq!(outcomes.len(), 2);
18251 assert_eq!(outcomes[0].inserted_indices, vec![0, 1]);
18252 assert_eq!(outcomes[1].inserted_indices, vec![2, 3]);
18253 assert_eq!(outcomes[0].conversation_id, outcomes[1].conversation_id);
18254
18255 let conversation_count: i64 = storage
18256 .conn
18257 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18258 row.get_typed(0)
18259 })
18260 .unwrap();
18261 let conversation_count_not_indexed: i64 = storage
18262 .conn
18263 .query_row_map(
18264 "SELECT COUNT(*) FROM conversations NOT INDEXED",
18265 fparams![],
18266 |row| row.get_typed(0),
18267 )
18268 .unwrap();
18269 let conversation_count_source_index: i64 = storage
18270 .conn
18271 .query_row_map(
18272 "SELECT COUNT(*) FROM conversations INDEXED BY idx_conversations_source_id",
18273 fparams![],
18274 |row| row.get_typed(0),
18275 )
18276 .unwrap();
18277 let message_count: i64 = storage
18278 .conn
18279 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
18280 row.get_typed(0)
18281 })
18282 .unwrap();
18283 let reopened_storage = SqliteStorage::open(&db_path).unwrap();
18284 let reopened_conversation_count: i64 = reopened_storage
18285 .conn
18286 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18287 row.get_typed(0)
18288 })
18289 .unwrap();
18290 let reopened_conversation_count_not_indexed: i64 = reopened_storage
18291 .conn
18292 .query_row_map(
18293 "SELECT COUNT(*) FROM conversations NOT INDEXED",
18294 fparams![],
18295 |row| row.get_typed(0),
18296 )
18297 .unwrap();
18298 let reopened_conversation_ids: Vec<i64> = reopened_storage
18299 .conn
18300 .query_map_collect(
18301 "SELECT id FROM conversations ORDER BY id",
18302 fparams![],
18303 |row| row.get_typed(0),
18304 )
18305 .unwrap();
18306 let reopened_conversation_ids_not_indexed: Vec<i64> = reopened_storage
18307 .conn
18308 .query_map_collect(
18309 "SELECT id FROM conversations NOT INDEXED ORDER BY id",
18310 fparams![],
18311 |row| row.get_typed(0),
18312 )
18313 .unwrap();
18314 let reopened_conversation_ids_source_index: Vec<i64> = reopened_storage
18315 .conn
18316 .query_map_collect(
18317 "SELECT id FROM conversations INDEXED BY idx_conversations_source_id ORDER BY id",
18318 fparams![],
18319 |row| row.get_typed(0),
18320 )
18321 .unwrap();
18322
18323 assert_eq!(reopened_conversation_ids, vec![outcomes[0].conversation_id]);
18324 assert_eq!(
18325 reopened_conversation_ids_not_indexed,
18326 vec![outcomes[0].conversation_id]
18327 );
18328 assert_eq!(
18329 reopened_conversation_ids_source_index,
18330 vec![outcomes[0].conversation_id]
18331 );
18332 assert_eq!(reopened_conversation_count, 1);
18333 assert_eq!(reopened_conversation_count_not_indexed, 1);
18334 assert_eq!(conversation_count_not_indexed, 1);
18335 assert_eq!(conversation_count_source_index, 1);
18336 assert_eq!(conversation_count, 1);
18337 assert_eq!(message_count, 4);
18338 }
18339
18340 #[test]
18341 fn franken_insert_conversation_or_get_existing_recovers_unique_conflict() {
18342 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18343 use std::path::PathBuf;
18344
18345 let dir = TempDir::new().unwrap();
18346 let db_path = dir.path().join("test.db");
18347 let storage = SqliteStorage::open(&db_path).unwrap();
18348
18349 let agent = Agent {
18350 id: None,
18351 slug: "codex".into(),
18352 name: "Codex".into(),
18353 version: Some("0.2.3".into()),
18354 kind: AgentKind::Cli,
18355 };
18356 let agent_id = storage.ensure_agent(&agent).unwrap();
18357
18358 let conv = Conversation {
18359 id: None,
18360 agent_slug: "codex".into(),
18361 workspace: Some(PathBuf::from("/tmp/workspace")),
18362 external_id: Some("recover-duplicate".into()),
18363 title: Some("Recover Duplicate".into()),
18364 source_path: PathBuf::from("/tmp/rollout.jsonl"),
18365 started_at: Some(1_700_000_000_000),
18366 ended_at: Some(1_700_000_000_100),
18367 approx_tokens: None,
18368 metadata_json: serde_json::Value::Null,
18369 messages: vec![Message {
18370 id: None,
18371 idx: 0,
18372 role: MessageRole::User,
18373 author: None,
18374 created_at: Some(1_700_000_000_000),
18375 content: "hello".into(),
18376 extra_json: serde_json::Value::Null,
18377 snippets: Vec::new(),
18378 }],
18379 source_id: "local".into(),
18380 origin_host: None,
18381 };
18382
18383 let tx = storage.conn.transaction().unwrap();
18384 let inserted_id = franken_insert_conversation(&tx, agent_id, None, &conv)
18385 .unwrap()
18386 .expect("first insert should succeed");
18387
18388 let conversation_key = conversation_merge_key(agent_id, &conv);
18389 let resolved = franken_insert_conversation_or_get_existing_after_miss(
18390 &tx,
18391 agent_id,
18392 None,
18393 &conv,
18394 &conversation_key,
18395 )
18396 .unwrap();
18397
18398 match resolved {
18399 ConversationInsertStatus::Existing(existing_id) => {
18400 assert_eq!(existing_id, inserted_id);
18401 }
18402 ConversationInsertStatus::Inserted(new_id) => {
18403 panic!("expected existing conversation id, got freshly inserted {new_id}");
18404 }
18405 }
18406
18407 let conversation_count: i64 = tx
18408 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18409 row.get_typed(0)
18410 })
18411 .unwrap();
18412 assert_eq!(conversation_count, 1);
18413 }
18414
18415 #[test]
18416 fn insert_conversations_batched_merges_duplicate_external_id_with_gaps() {
18417 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18418 use std::path::PathBuf;
18419
18420 let dir = TempDir::new().unwrap();
18421 let db_path = dir.path().join("test.db");
18422 let storage = SqliteStorage::open(&db_path).unwrap();
18423
18424 let agent = Agent {
18425 id: None,
18426 slug: "codex".into(),
18427 name: "Codex".into(),
18428 version: Some("0.2.3".into()),
18429 kind: AgentKind::Cli,
18430 };
18431 let agent_id = storage.ensure_agent(&agent).unwrap();
18432
18433 let base_conv = |messages: Vec<Message>| Conversation {
18434 id: None,
18435 agent_slug: "codex".into(),
18436 workspace: Some(PathBuf::from("/tmp/workspace")),
18437 external_id: Some("shared-session-gap".into()),
18438 title: Some("Shared Session Gap".into()),
18439 source_path: PathBuf::from("/tmp/rollout.jsonl"),
18440 started_at: Some(1_700_000_000_000),
18441 ended_at: Some(1_700_000_000_999),
18442 approx_tokens: None,
18443 metadata_json: serde_json::Value::Null,
18444 messages,
18445 source_id: "local".into(),
18446 origin_host: None,
18447 };
18448
18449 let conv_a = base_conv(vec![
18450 Message {
18451 id: None,
18452 idx: 2,
18453 role: MessageRole::User,
18454 author: None,
18455 created_at: Some(1_700_000_000_200),
18456 content: "third".into(),
18457 extra_json: serde_json::Value::Null,
18458 snippets: Vec::new(),
18459 },
18460 Message {
18461 id: None,
18462 idx: 3,
18463 role: MessageRole::Agent,
18464 author: None,
18465 created_at: Some(1_700_000_000_300),
18466 content: "fourth".into(),
18467 extra_json: serde_json::Value::Null,
18468 snippets: Vec::new(),
18469 },
18470 ]);
18471 let conv_b = base_conv(vec![
18472 Message {
18473 id: None,
18474 idx: 0,
18475 role: MessageRole::User,
18476 author: None,
18477 created_at: Some(1_700_000_000_000),
18478 content: "first".into(),
18479 extra_json: serde_json::Value::Null,
18480 snippets: Vec::new(),
18481 },
18482 Message {
18483 id: None,
18484 idx: 1,
18485 role: MessageRole::Agent,
18486 author: None,
18487 created_at: Some(1_700_000_000_100),
18488 content: "second".into(),
18489 extra_json: serde_json::Value::Null,
18490 snippets: Vec::new(),
18491 },
18492 Message {
18493 id: None,
18494 idx: 3,
18495 role: MessageRole::Agent,
18496 author: None,
18497 created_at: Some(1_700_000_000_300),
18498 content: "fourth".into(),
18499 extra_json: serde_json::Value::Null,
18500 snippets: Vec::new(),
18501 },
18502 ]);
18503
18504 let outcomes = storage
18505 .insert_conversations_batched(&[(agent_id, None, &conv_a), (agent_id, None, &conv_b)])
18506 .unwrap();
18507 assert_eq!(outcomes.len(), 2);
18508 assert_eq!(outcomes[0].inserted_indices, vec![2, 3]);
18509 assert_eq!(outcomes[1].inserted_indices, vec![0, 1]);
18510 assert_eq!(outcomes[0].conversation_id, outcomes[1].conversation_id);
18511
18512 let stored_indices: Vec<i64> = storage
18513 .conn
18514 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
18515 row.get_typed(0)
18516 })
18517 .unwrap();
18518 assert_eq!(stored_indices, vec![0, 1, 2, 3]);
18519 }
18520
18521 #[test]
18522 fn insert_conversations_batched_refreshes_partial_pending_message_lookup() {
18523 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18524 use std::path::PathBuf;
18525
18526 let dir = TempDir::new().unwrap();
18527 let db_path = dir.path().join("test.db");
18528 let storage = SqliteStorage::open(&db_path).unwrap();
18529
18530 let agent = Agent {
18531 id: None,
18532 slug: "codex".into(),
18533 name: "Codex".into(),
18534 version: Some("0.2.3".into()),
18535 kind: AgentKind::Cli,
18536 };
18537 let agent_id = storage.ensure_agent(&agent).unwrap();
18538
18539 let make_message = |idx: i64, content: &str| Message {
18540 id: None,
18541 idx,
18542 role: if idx == 0 {
18543 MessageRole::User
18544 } else {
18545 MessageRole::Agent
18546 },
18547 author: None,
18548 created_at: Some(1_700_000_000_000 + idx),
18549 content: content.into(),
18550 extra_json: serde_json::Value::Null,
18551 snippets: Vec::new(),
18552 };
18553
18554 let base_conv = |messages: Vec<Message>| Conversation {
18555 id: None,
18556 agent_slug: "codex".into(),
18557 workspace: Some(PathBuf::from("/tmp/workspace")),
18558 external_id: Some("partial-cache-session".into()),
18559 title: Some("Partial cache session".into()),
18560 source_path: PathBuf::from("/tmp/partial-cache.jsonl"),
18561 started_at: Some(1_700_000_000_000),
18562 ended_at: Some(1_700_000_000_100),
18563 approx_tokens: None,
18564 metadata_json: serde_json::Value::Null,
18565 messages,
18566 source_id: "local".into(),
18567 origin_host: None,
18568 };
18569
18570 let canonical = base_conv(vec![
18571 make_message(0, "canonical zero"),
18572 make_message(20, "canonical twenty"),
18573 ]);
18574 storage
18575 .insert_conversation_tree(agent_id, None, &canonical)
18576 .unwrap();
18577
18578 let exact_prefix = base_conv(vec![make_message(0, "canonical zero")]);
18579 let conflicting_tail = base_conv(vec![make_message(20, "conflicting twenty")]);
18580
18581 let outcomes = storage
18582 .insert_conversations_batched(&[
18583 (agent_id, None, &exact_prefix),
18584 (agent_id, None, &conflicting_tail),
18585 ])
18586 .unwrap();
18587
18588 assert_eq!(outcomes.len(), 2);
18589 assert!(outcomes[0].inserted_indices.is_empty());
18590 assert!(
18591 outcomes[1].inserted_indices.is_empty(),
18592 "the second batch item must refresh the partial pending lookup and retain the canonical idx=20 row"
18593 );
18594
18595 let stored_messages: Vec<(i64, String)> = storage
18596 .conn
18597 .query_map_collect(
18598 "SELECT idx, content FROM messages ORDER BY idx",
18599 fparams![],
18600 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
18601 )
18602 .unwrap();
18603 assert_eq!(
18604 stored_messages,
18605 vec![
18606 (0, "canonical zero".to_string()),
18607 (20, "canonical twenty".to_string()),
18608 ]
18609 );
18610 }
18611
18612 #[test]
18613 fn insert_conversations_batched_reprocessing_conversation_is_idempotent() {
18614 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18615 use std::path::PathBuf;
18616
18617 const MESSAGE_COUNT: i64 = 64;
18618
18619 let dir = TempDir::new().unwrap();
18620 let db_path = dir.path().join("test.db");
18621 let storage = SqliteStorage::open(&db_path).unwrap();
18622
18623 let agent = Agent {
18624 id: None,
18625 slug: "codex".into(),
18626 name: "Codex".into(),
18627 version: Some("0.2.3".into()),
18628 kind: AgentKind::Cli,
18629 };
18630 let agent_id = storage.ensure_agent(&agent).unwrap();
18631
18632 let messages: Vec<Message> = (0..MESSAGE_COUNT)
18633 .map(|idx| Message {
18634 id: None,
18635 idx,
18636 role: if idx % 2 == 0 {
18637 MessageRole::User
18638 } else {
18639 MessageRole::Agent
18640 },
18641 author: None,
18642 created_at: Some(1_700_000_000_000 + idx),
18643 content: format!("message {idx}"),
18644 extra_json: serde_json::Value::Null,
18645 snippets: Vec::new(),
18646 })
18647 .collect();
18648
18649 let conversation = Conversation {
18650 id: None,
18651 agent_slug: "codex".into(),
18652 workspace: Some(PathBuf::from("/tmp/workspace")),
18653 external_id: Some("large-reprocess-session".into()),
18654 title: Some("Large Reprocess Session".into()),
18655 source_path: PathBuf::from("/tmp/large-reprocess-session.jsonl"),
18656 started_at: Some(1_700_000_000_000),
18657 ended_at: Some(1_700_000_000_000 + MESSAGE_COUNT - 1),
18658 approx_tokens: None,
18659 metadata_json: serde_json::Value::Null,
18660 messages,
18661 source_id: "local".into(),
18662 origin_host: None,
18663 };
18664
18665 let first = storage
18666 .insert_conversations_batched(&[(agent_id, None, &conversation)])
18667 .unwrap();
18668 let second = storage
18669 .insert_conversations_batched(&[(agent_id, None, &conversation)])
18670 .unwrap();
18671
18672 assert_eq!(first.len(), 1);
18673 assert_eq!(second.len(), 1);
18674 assert_eq!(first[0].inserted_indices.len(), MESSAGE_COUNT as usize);
18675 assert!(
18676 second[0].inserted_indices.is_empty(),
18677 "full reprocessing of a large conversation must not attempt duplicate idx inserts"
18678 );
18679 assert_eq!(first[0].conversation_id, second[0].conversation_id);
18680
18681 let conversation_count: i64 = storage
18682 .conn
18683 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18684 row.get_typed(0)
18685 })
18686 .unwrap();
18687 let message_count: i64 = storage
18688 .conn
18689 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
18690 row.get_typed(0)
18691 })
18692 .unwrap();
18693
18694 assert_eq!(conversation_count, 1);
18695 assert_eq!(message_count, MESSAGE_COUNT);
18696 }
18697
18698 #[test]
18699 fn parallel_insert_conversation_tree_keeps_unique_external_ids_distinct() {
18700 use crate::connectors::{NormalizedConversation, NormalizedMessage};
18701 use crate::indexer::persist::map_to_internal;
18702 use crate::model::types::{Agent, AgentKind};
18703 use frankensqlite::compat::{ConnectionExt, RowExt};
18704 use rand::RngExt;
18705 use rayon::prelude::*;
18706
18707 fn retryable_franken_error(err: &anyhow::Error) -> bool {
18708 err.downcast_ref::<frankensqlite::FrankenError>()
18709 .or_else(|| {
18710 err.root_cause()
18711 .downcast_ref::<frankensqlite::FrankenError>()
18712 })
18713 .is_some_and(|inner| {
18714 matches!(
18715 inner,
18716 frankensqlite::FrankenError::Busy
18717 | frankensqlite::FrankenError::BusyRecovery
18718 | frankensqlite::FrankenError::BusySnapshot { .. }
18719 | frankensqlite::FrankenError::WriteConflict { .. }
18720 | frankensqlite::FrankenError::SerializationFailure { .. }
18721 )
18722 })
18723 }
18724
18725 fn with_retry<F, T>(mut f: F) -> anyhow::Result<T>
18726 where
18727 F: FnMut() -> anyhow::Result<T>,
18728 {
18729 let mut rng = rand::rng();
18730 let mut backoff_ms = 4_u64;
18731 for attempt in 0..=24 {
18732 match f() {
18733 Ok(value) => return Ok(value),
18734 Err(err) if attempt < 24 && retryable_franken_error(&err) => {
18735 let sleep_ms = backoff_ms + rng.random_range(0..=backoff_ms);
18736 std::thread::sleep(Duration::from_millis(sleep_ms));
18737 backoff_ms = (backoff_ms * 2).min(512);
18738 }
18739 Err(err) => return Err(err),
18740 }
18741 }
18742 unreachable!("retry loop must return on success or final failure")
18743 }
18744
18745 let dir = TempDir::new().unwrap();
18746 let db_path = dir.path().join("parallel_insert_conversation_tree.db");
18747 let seed = FrankenStorage::open(&db_path).unwrap();
18748 drop(seed);
18749
18750 let conversations: Vec<NormalizedConversation> = (0..10)
18751 .map(|i| NormalizedConversation {
18752 agent_slug: format!("agent-{}", i % 3),
18753 external_id: Some(format!("conv-{i}")),
18754 title: Some(format!("Conversation {i}")),
18755 workspace: Some(PathBuf::from(format!("/ws/{i}"))),
18756 source_path: PathBuf::from(format!("/log/{i}.jsonl")),
18757 started_at: Some(1_000 + i * 100),
18758 ended_at: Some(1_000 + i * 100 + 50),
18759 metadata: serde_json::json!({}),
18760 messages: (0..3)
18761 .map(|j| NormalizedMessage {
18762 idx: j,
18763 role: if j % 2 == 0 { "user" } else { "assistant" }.to_string(),
18764 author: Some("tester".into()),
18765 created_at: Some(1_000 + i * 100 + j * 10),
18766 content: format!("parallel-distinct-test conv={i} msg={j}"),
18767 extra: serde_json::json!({}),
18768 snippets: vec![],
18769 invocations: Vec::new(),
18770 })
18771 .collect(),
18772 })
18773 .collect();
18774
18775 let mut outcomes: Vec<(String, i64, Vec<i64>)> = conversations
18776 .par_chunks(3)
18777 .map(|chunk| {
18778 let storage = FrankenStorage::open_writer(&db_path).unwrap();
18779 let mut agent_cache: HashMap<String, i64> = HashMap::new();
18780 let mut workspace_cache: HashMap<PathBuf, i64> = HashMap::new();
18781 let mut chunk_outcomes = Vec::with_capacity(chunk.len());
18782
18783 for conv in chunk {
18784 let agent_slug = conv.agent_slug.clone();
18785 let workspace = conv.workspace.clone();
18786 let external_id = conv.external_id.clone().expect("external id");
18787 let internal = map_to_internal(conv);
18788 let outcome = with_retry(|| {
18789 let agent_id = if let Some(id) = agent_cache.get(&agent_slug) {
18790 *id
18791 } else {
18792 let agent = Agent {
18793 id: None,
18794 slug: agent_slug.clone(),
18795 name: agent_slug.clone(),
18796 version: None,
18797 kind: AgentKind::Cli,
18798 };
18799 let id = storage.ensure_agent(&agent)?;
18800 agent_cache.insert(agent_slug.clone(), id);
18801 id
18802 };
18803 let workspace_id = if let Some(path) = &workspace {
18804 if let Some(id) = workspace_cache.get(path) {
18805 Some(*id)
18806 } else {
18807 let id = storage.ensure_workspace(path, None)?;
18808 workspace_cache.insert(path.clone(), id);
18809 Some(id)
18810 }
18811 } else {
18812 None
18813 };
18814 storage.insert_conversation_tree(agent_id, workspace_id, &internal)
18815 })
18816 .unwrap();
18817 chunk_outcomes.push((
18818 external_id,
18819 outcome.conversation_id,
18820 outcome.inserted_indices,
18821 ));
18822 }
18823
18824 storage.close().unwrap();
18825 chunk_outcomes
18826 })
18827 .flatten()
18828 .collect();
18829 outcomes.sort_by(|left, right| left.0.cmp(&right.0));
18830
18831 assert!(
18832 outcomes
18833 .iter()
18834 .all(|(_, _, inserted_indices)| inserted_indices == &vec![0, 1, 2]),
18835 "unique external ids must not be routed through the existing-conversation merge path: {outcomes:?}"
18836 );
18837
18838 let distinct_ids: HashSet<i64> = outcomes
18839 .iter()
18840 .map(|(_, conversation_id, _)| *conversation_id)
18841 .collect();
18842 assert_eq!(
18843 distinct_ids.len(),
18844 conversations.len(),
18845 "unique external ids must produce distinct conversation ids: {outcomes:?}"
18846 );
18847
18848 let reader = FrankenStorage::open(&db_path).unwrap();
18849 let stored_rows: Vec<(i64, String)> = reader
18850 .raw()
18851 .query_map_collect(
18852 "SELECT id, external_id FROM conversations ORDER BY id",
18853 &[],
18854 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
18855 )
18856 .unwrap();
18857 let stored_count: i64 = reader
18858 .raw()
18859 .query_row_map("SELECT COUNT(*) FROM conversations", &[], |row| {
18860 row.get_typed(0)
18861 })
18862 .unwrap();
18863
18864 assert_eq!(
18865 stored_count as usize,
18866 conversations.len(),
18867 "parallel distinct inserts must persist one row per external id; rows={stored_rows:?}; outcomes={outcomes:?}"
18868 );
18869 assert_eq!(
18870 stored_rows.len(),
18871 conversations.len(),
18872 "parallel distinct inserts must remain visible after reopening; rows={stored_rows:?}; outcomes={outcomes:?}"
18873 );
18874 }
18875
18876 #[test]
18877 fn insert_conversation_tree_merges_duplicate_external_id_with_gaps() {
18878 use crate::connectors::{NormalizedConversation, NormalizedMessage};
18879 use crate::indexer::persist::map_to_internal;
18880 use crate::model::types::{Agent, AgentKind};
18881 use std::path::PathBuf;
18882
18883 let dir = TempDir::new().unwrap();
18884 let db_path = dir.path().join("test.db");
18885 let storage = SqliteStorage::open(&db_path).unwrap();
18886
18887 let agent = Agent {
18888 id: None,
18889 slug: "codex".into(),
18890 name: "Codex".into(),
18891 version: Some("0.2.3".into()),
18892 kind: AgentKind::Cli,
18893 };
18894 let agent_id = storage.ensure_agent(&agent).unwrap();
18895
18896 let base_conv = |messages: Vec<NormalizedMessage>| NormalizedConversation {
18897 agent_slug: "codex".into(),
18898 workspace: Some(PathBuf::from("/tmp/workspace")),
18899 external_id: Some("tree-gap-session".into()),
18900 title: Some("Tree Gap Session".into()),
18901 source_path: PathBuf::from("/tmp/tree.jsonl"),
18902 started_at: Some(1_700_000_000_000),
18903 ended_at: Some(1_700_000_000_999),
18904 metadata: serde_json::Value::Null,
18905 messages,
18906 };
18907
18908 let conv_a = map_to_internal(&base_conv(vec![
18909 NormalizedMessage {
18910 idx: 2,
18911 role: "user".into(),
18912 author: None,
18913 created_at: Some(1_700_000_000_200),
18914 content: "third".into(),
18915 extra: serde_json::Value::Null,
18916 snippets: Vec::new(),
18917 invocations: Vec::new(),
18918 },
18919 NormalizedMessage {
18920 idx: 3,
18921 role: "assistant".into(),
18922 author: None,
18923 created_at: Some(1_700_000_000_300),
18924 content: "fourth".into(),
18925 extra: serde_json::Value::Null,
18926 snippets: Vec::new(),
18927 invocations: Vec::new(),
18928 },
18929 ]));
18930 let conv_b = map_to_internal(&base_conv(vec![
18931 NormalizedMessage {
18932 idx: 0,
18933 role: "user".into(),
18934 author: None,
18935 created_at: Some(1_700_000_000_000),
18936 content: "first".into(),
18937 extra: serde_json::Value::Null,
18938 snippets: Vec::new(),
18939 invocations: Vec::new(),
18940 },
18941 NormalizedMessage {
18942 idx: 1,
18943 role: "assistant".into(),
18944 author: None,
18945 created_at: Some(1_700_000_000_100),
18946 content: "second".into(),
18947 extra: serde_json::Value::Null,
18948 snippets: Vec::new(),
18949 invocations: Vec::new(),
18950 },
18951 NormalizedMessage {
18952 idx: 3,
18953 role: "assistant".into(),
18954 author: None,
18955 created_at: Some(1_700_000_000_300),
18956 content: "fourth".into(),
18957 extra: serde_json::Value::Null,
18958 snippets: Vec::new(),
18959 invocations: Vec::new(),
18960 },
18961 ]));
18962
18963 let first = storage
18964 .insert_conversation_tree(agent_id, None, &conv_a)
18965 .unwrap();
18966 let second = storage
18967 .insert_conversation_tree(agent_id, None, &conv_b)
18968 .unwrap();
18969
18970 assert_eq!(first.inserted_indices, vec![2, 3]);
18971 assert_eq!(second.inserted_indices, vec![0, 1]);
18972 assert_eq!(first.conversation_id, second.conversation_id);
18973
18974 let stored_indices: Vec<i64> = storage
18975 .conn
18976 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
18977 row.get_typed(0)
18978 })
18979 .unwrap();
18980 assert_eq!(stored_indices, vec![0, 1, 2, 3]);
18981 }
18982
18983 #[test]
18984 fn insert_conversation_tree_skips_duplicate_message_indices_for_new_conversation() {
18985 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18986 use std::path::PathBuf;
18987
18988 let dir = TempDir::new().unwrap();
18989 let db_path = dir.path().join("test.db");
18990 let storage = SqliteStorage::open(&db_path).unwrap();
18991
18992 let agent = Agent {
18993 id: None,
18994 slug: "codex".into(),
18995 name: "Codex".into(),
18996 version: Some("0.2.3".into()),
18997 kind: AgentKind::Cli,
18998 };
18999 let agent_id = storage.ensure_agent(&agent).unwrap();
19000
19001 let conversation = Conversation {
19002 id: None,
19003 agent_slug: "codex".into(),
19004 workspace: Some(PathBuf::from("/tmp/workspace")),
19005 external_id: Some("duplicate-new-session".into()),
19006 title: Some("Duplicate New Session".into()),
19007 source_path: PathBuf::from("/tmp/duplicate-new-session.jsonl"),
19008 started_at: Some(1_700_000_000_000),
19009 ended_at: Some(1_700_000_000_999),
19010 approx_tokens: None,
19011 metadata_json: serde_json::Value::Null,
19012 messages: vec![
19013 Message {
19014 id: None,
19015 idx: 0,
19016 role: MessageRole::User,
19017 author: None,
19018 created_at: Some(1_700_000_000_000),
19019 content: "first canonical".into(),
19020 extra_json: serde_json::Value::Null,
19021 snippets: Vec::new(),
19022 },
19023 Message {
19024 id: None,
19025 idx: 0,
19026 role: MessageRole::User,
19027 author: None,
19028 created_at: Some(1_700_000_000_001),
19029 content: "duplicate idx should be skipped".into(),
19030 extra_json: serde_json::Value::Null,
19031 snippets: Vec::new(),
19032 },
19033 Message {
19034 id: None,
19035 idx: 1,
19036 role: MessageRole::Agent,
19037 author: None,
19038 created_at: Some(1_700_000_000_100),
19039 content: "second".into(),
19040 extra_json: serde_json::Value::Null,
19041 snippets: Vec::new(),
19042 },
19043 ],
19044 source_id: "local".into(),
19045 origin_host: None,
19046 };
19047
19048 let outcome = storage
19049 .insert_conversation_tree(agent_id, None, &conversation)
19050 .unwrap();
19051
19052 assert_eq!(outcome.inserted_indices, vec![0, 1]);
19053
19054 let stored_messages: Vec<(i64, String)> = storage
19055 .conn
19056 .query_map_collect(
19057 "SELECT idx, content FROM messages ORDER BY idx",
19058 fparams![],
19059 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
19060 )
19061 .unwrap();
19062 assert_eq!(
19063 stored_messages,
19064 vec![
19065 (0, "first canonical".to_string()),
19066 (1, "second".to_string())
19067 ]
19068 );
19069 }
19070
19071 #[test]
19072 fn insert_conversation_tree_merges_duplicate_source_path_without_external_id() {
19073 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19074 use std::path::PathBuf;
19075
19076 let dir = TempDir::new().unwrap();
19077 let db_path = dir.path().join("test.db");
19078 let storage = SqliteStorage::open(&db_path).unwrap();
19079
19080 let agent = Agent {
19081 id: None,
19082 slug: "codex".into(),
19083 name: "Codex".into(),
19084 version: Some("0.2.3".into()),
19085 kind: AgentKind::Cli,
19086 };
19087 let agent_id = storage.ensure_agent(&agent).unwrap();
19088
19089 let base_conv = |messages: Vec<Message>| Conversation {
19090 id: None,
19091 agent_slug: "codex".into(),
19092 workspace: Some(PathBuf::from("/tmp/workspace")),
19093 external_id: None,
19094 title: Some("Source Path Merge".into()),
19095 source_path: PathBuf::from("/tmp/shared-session.jsonl"),
19096 started_at: Some(1_700_000_000_000),
19097 ended_at: Some(1_700_000_000_999),
19098 approx_tokens: None,
19099 metadata_json: serde_json::Value::Null,
19100 messages,
19101 source_id: "local".into(),
19102 origin_host: None,
19103 };
19104
19105 let first = storage
19106 .insert_conversation_tree(
19107 agent_id,
19108 None,
19109 &base_conv(vec![
19110 Message {
19111 id: None,
19112 idx: 0,
19113 role: MessageRole::User,
19114 author: None,
19115 created_at: Some(1_700_000_000_000),
19116 content: "first".into(),
19117 extra_json: serde_json::Value::Null,
19118 snippets: Vec::new(),
19119 },
19120 Message {
19121 id: None,
19122 idx: 1,
19123 role: MessageRole::Agent,
19124 author: None,
19125 created_at: Some(1_700_000_000_100),
19126 content: "second".into(),
19127 extra_json: serde_json::Value::Null,
19128 snippets: Vec::new(),
19129 },
19130 ]),
19131 )
19132 .unwrap();
19133
19134 let second = storage
19135 .insert_conversation_tree(
19136 agent_id,
19137 None,
19138 &base_conv(vec![
19139 Message {
19140 id: None,
19141 idx: 1,
19142 role: MessageRole::Agent,
19143 author: None,
19144 created_at: Some(1_700_000_000_100),
19145 content: "second".into(),
19146 extra_json: serde_json::Value::Null,
19147 snippets: Vec::new(),
19148 },
19149 Message {
19150 id: None,
19151 idx: 2,
19152 role: MessageRole::User,
19153 author: None,
19154 created_at: Some(1_700_000_000_200),
19155 content: "third".into(),
19156 extra_json: serde_json::Value::Null,
19157 snippets: Vec::new(),
19158 },
19159 ]),
19160 )
19161 .unwrap();
19162
19163 assert_eq!(first.conversation_id, second.conversation_id);
19164 assert_eq!(first.inserted_indices, vec![0, 1]);
19165 assert_eq!(second.inserted_indices, vec![2]);
19166
19167 let stored_indices: Vec<i64> = storage
19168 .conn
19169 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
19170 row.get_typed(0)
19171 })
19172 .unwrap();
19173 assert_eq!(stored_indices, vec![0, 1, 2]);
19174 }
19175
19176 #[test]
19177 fn insert_conversation_tree_merges_source_path_duplicates_with_start_drift() {
19178 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19179 use std::path::PathBuf;
19180
19181 let dir = TempDir::new().unwrap();
19182 let db_path = dir.path().join("test.db");
19183 let storage = SqliteStorage::open(&db_path).unwrap();
19184
19185 let agent = Agent {
19186 id: None,
19187 slug: "codex".into(),
19188 name: "Codex".into(),
19189 version: Some("0.2.3".into()),
19190 kind: AgentKind::Cli,
19191 };
19192 let agent_id = storage.ensure_agent(&agent).unwrap();
19193
19194 let base_conv = |started_at: Option<i64>, messages: Vec<Message>| Conversation {
19195 id: None,
19196 agent_slug: "codex".into(),
19197 workspace: Some(PathBuf::from("/tmp/workspace")),
19198 external_id: None,
19199 title: Some("Drift Merge".into()),
19200 source_path: PathBuf::from("/tmp/drift-session.jsonl"),
19201 started_at,
19202 ended_at: Some(1_700_000_000_999),
19203 approx_tokens: None,
19204 metadata_json: serde_json::Value::Null,
19205 messages,
19206 source_id: "local".into(),
19207 origin_host: None,
19208 };
19209
19210 let first = storage
19211 .insert_conversation_tree(
19212 agent_id,
19213 None,
19214 &base_conv(
19215 Some(1_700_000_000_000),
19216 vec![
19217 Message {
19218 id: None,
19219 idx: 0,
19220 role: MessageRole::User,
19221 author: None,
19222 created_at: Some(1_700_000_000_000),
19223 content: "first".into(),
19224 extra_json: serde_json::Value::Null,
19225 snippets: Vec::new(),
19226 },
19227 Message {
19228 id: None,
19229 idx: 1,
19230 role: MessageRole::Agent,
19231 author: None,
19232 created_at: Some(1_700_000_000_100),
19233 content: "second".into(),
19234 extra_json: serde_json::Value::Null,
19235 snippets: Vec::new(),
19236 },
19237 ],
19238 ),
19239 )
19240 .unwrap();
19241
19242 let second = storage
19243 .insert_conversation_tree(
19244 agent_id,
19245 None,
19246 &base_conv(
19247 Some(1_700_000_004_000),
19248 vec![
19249 Message {
19250 id: None,
19251 idx: 1,
19252 role: MessageRole::Agent,
19253 author: None,
19254 created_at: Some(1_700_000_000_100),
19255 content: "second".into(),
19256 extra_json: serde_json::Value::Null,
19257 snippets: Vec::new(),
19258 },
19259 Message {
19260 id: None,
19261 idx: 2,
19262 role: MessageRole::User,
19263 author: None,
19264 created_at: Some(1_700_000_004_200),
19265 content: "third".into(),
19266 extra_json: serde_json::Value::Null,
19267 snippets: Vec::new(),
19268 },
19269 ],
19270 ),
19271 )
19272 .unwrap();
19273
19274 assert_eq!(first.conversation_id, second.conversation_id);
19275 assert_eq!(second.inserted_indices, vec![2]);
19276 }
19277
19278 #[test]
19279 fn insert_conversation_tree_keeps_single_message_overlap_sessions_separate() {
19280 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19281 use std::path::PathBuf;
19282
19283 let dir = TempDir::new().unwrap();
19284 let db_path = dir.path().join("test.db");
19285 let storage = SqliteStorage::open(&db_path).unwrap();
19286
19287 let agent = Agent {
19288 id: None,
19289 slug: "codex".into(),
19290 name: "Codex".into(),
19291 version: Some("0.2.3".into()),
19292 kind: AgentKind::Cli,
19293 };
19294 let agent_id = storage.ensure_agent(&agent).unwrap();
19295
19296 let make_conv = |started_at: i64, idx: i64, content: &str| Conversation {
19297 id: None,
19298 agent_slug: "codex".into(),
19299 workspace: Some(PathBuf::from("/tmp/workspace")),
19300 external_id: None,
19301 title: Some("Partial overlap".into()),
19302 source_path: PathBuf::from("/tmp/reused-session.jsonl"),
19303 started_at: Some(started_at),
19304 ended_at: Some(started_at + 500),
19305 approx_tokens: None,
19306 metadata_json: serde_json::Value::Null,
19307 messages: vec![Message {
19308 id: None,
19309 idx,
19310 role: MessageRole::User,
19311 author: None,
19312 created_at: Some(started_at),
19313 content: content.into(),
19314 extra_json: serde_json::Value::Null,
19315 snippets: Vec::new(),
19316 }],
19317 source_id: "local".into(),
19318 origin_host: None,
19319 };
19320
19321 storage
19322 .insert_conversation_tree(
19323 agent_id,
19324 None,
19325 &Conversation {
19326 messages: vec![
19327 Message {
19328 id: None,
19329 idx: 0,
19330 role: MessageRole::User,
19331 author: None,
19332 created_at: Some(1_700_000_000_000),
19333 content: "shared opener".into(),
19334 extra_json: serde_json::Value::Null,
19335 snippets: Vec::new(),
19336 },
19337 Message {
19338 id: None,
19339 idx: 1,
19340 role: MessageRole::Agent,
19341 author: None,
19342 created_at: Some(1_700_000_000_100),
19343 content: "first session unique".into(),
19344 extra_json: serde_json::Value::Null,
19345 snippets: Vec::new(),
19346 },
19347 ],
19348 ..make_conv(1_700_000_000_000, 0, "unused")
19349 },
19350 )
19351 .unwrap();
19352 storage
19353 .insert_conversation_tree(
19354 agent_id,
19355 None,
19356 &make_conv(1_700_000_900_000, 0, "shared opener"),
19357 )
19358 .unwrap();
19359
19360 let conversation_count: i64 = storage
19361 .conn
19362 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
19363 row.get_typed(0)
19364 })
19365 .unwrap();
19366 assert_eq!(conversation_count, 2);
19367 }
19368
19369 #[test]
19370 fn insert_conversation_tree_keeps_distinct_source_path_sessions_separate() {
19371 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19372 use std::path::PathBuf;
19373
19374 let dir = TempDir::new().unwrap();
19375 let db_path = dir.path().join("test.db");
19376 let storage = SqliteStorage::open(&db_path).unwrap();
19377
19378 let agent = Agent {
19379 id: None,
19380 slug: "codex".into(),
19381 name: "Codex".into(),
19382 version: Some("0.2.3".into()),
19383 kind: AgentKind::Cli,
19384 };
19385 let agent_id = storage.ensure_agent(&agent).unwrap();
19386
19387 let make_conv = |started_at: i64, created_at: i64, content: &str| Conversation {
19388 id: None,
19389 agent_slug: "codex".into(),
19390 workspace: Some(PathBuf::from("/tmp/workspace")),
19391 external_id: None,
19392 title: Some("Same Path Different Session".into()),
19393 source_path: PathBuf::from("/tmp/reused-session.jsonl"),
19394 started_at: Some(started_at),
19395 ended_at: Some(started_at + 500),
19396 approx_tokens: None,
19397 metadata_json: serde_json::Value::Null,
19398 messages: vec![Message {
19399 id: None,
19400 idx: 0,
19401 role: MessageRole::User,
19402 author: None,
19403 created_at: Some(created_at),
19404 content: content.into(),
19405 extra_json: serde_json::Value::Null,
19406 snippets: Vec::new(),
19407 }],
19408 source_id: "local".into(),
19409 origin_host: None,
19410 };
19411
19412 storage
19413 .insert_conversation_tree(
19414 agent_id,
19415 None,
19416 &make_conv(1_700_000_000_000, 1_700_000_000_000, "first session"),
19417 )
19418 .unwrap();
19419 storage
19420 .insert_conversation_tree(
19421 agent_id,
19422 None,
19423 &make_conv(1_700_000_900_000, 1_700_000_900_000, "second session"),
19424 )
19425 .unwrap();
19426
19427 let conversation_count: i64 = storage
19428 .conn
19429 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
19430 row.get_typed(0)
19431 })
19432 .unwrap();
19433 assert_eq!(conversation_count, 2);
19434 }
19435
19436 #[test]
19437 fn insert_conversation_tree_merges_replay_equivalent_messages_with_shifted_idx() {
19438 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19439 use std::path::PathBuf;
19440
19441 let dir = TempDir::new().unwrap();
19442 let db_path = dir.path().join("test.db");
19443 let storage = SqliteStorage::open(&db_path).unwrap();
19444
19445 let agent = Agent {
19446 id: None,
19447 slug: "codex".into(),
19448 name: "Codex".into(),
19449 version: Some("0.2.3".into()),
19450 kind: AgentKind::Cli,
19451 };
19452 let agent_id = storage.ensure_agent(&agent).unwrap();
19453
19454 let make_conv = |started_at: i64, messages: Vec<Message>| Conversation {
19455 id: None,
19456 agent_slug: "codex".into(),
19457 workspace: Some(PathBuf::from("/tmp/workspace")),
19458 external_id: None,
19459 title: Some("Shifted replay".into()),
19460 source_path: PathBuf::from("/tmp/replay-session.jsonl"),
19461 started_at: Some(started_at),
19462 ended_at: Some(started_at + 500),
19463 approx_tokens: None,
19464 metadata_json: serde_json::Value::Null,
19465 messages,
19466 source_id: "local".into(),
19467 origin_host: None,
19468 };
19469
19470 let first = storage
19471 .insert_conversation_tree(
19472 agent_id,
19473 None,
19474 &make_conv(
19475 1_700_000_000_000,
19476 vec![
19477 Message {
19478 id: None,
19479 idx: 0,
19480 role: MessageRole::User,
19481 author: None,
19482 created_at: Some(1_700_000_000_000),
19483 content: "first".into(),
19484 extra_json: serde_json::Value::Null,
19485 snippets: Vec::new(),
19486 },
19487 Message {
19488 id: None,
19489 idx: 1,
19490 role: MessageRole::Agent,
19491 author: None,
19492 created_at: Some(1_700_000_000_100),
19493 content: "second".into(),
19494 extra_json: serde_json::Value::Null,
19495 snippets: Vec::new(),
19496 },
19497 ],
19498 ),
19499 )
19500 .unwrap();
19501
19502 let second = storage
19503 .insert_conversation_tree(
19504 agent_id,
19505 None,
19506 &make_conv(
19507 1_700_000_900_000,
19508 vec![
19509 Message {
19510 id: None,
19511 idx: 10,
19512 role: MessageRole::User,
19513 author: None,
19514 created_at: Some(1_700_000_000_000),
19515 content: "first".into(),
19516 extra_json: serde_json::Value::Null,
19517 snippets: Vec::new(),
19518 },
19519 Message {
19520 id: None,
19521 idx: 11,
19522 role: MessageRole::Agent,
19523 author: None,
19524 created_at: Some(1_700_000_000_100),
19525 content: "second".into(),
19526 extra_json: serde_json::Value::Null,
19527 snippets: Vec::new(),
19528 },
19529 Message {
19530 id: None,
19531 idx: 12,
19532 role: MessageRole::User,
19533 author: None,
19534 created_at: Some(1_700_000_000_200),
19535 content: "third".into(),
19536 extra_json: serde_json::Value::Null,
19537 snippets: Vec::new(),
19538 },
19539 ],
19540 ),
19541 )
19542 .unwrap();
19543
19544 assert_eq!(first.conversation_id, second.conversation_id);
19545 assert_eq!(second.inserted_indices, vec![12]);
19546
19547 let stored_indices: Vec<i64> = storage
19548 .conn
19549 .query_map_collect(
19550 "SELECT idx FROM messages WHERE conversation_id = ?1 ORDER BY idx",
19551 fparams![first.conversation_id],
19552 |row| row.get_typed(0),
19553 )
19554 .unwrap();
19555 assert_eq!(stored_indices, vec![0, 1, 12]);
19556 }
19557
19558 #[test]
19559 fn salvage_historical_databases_imports_backups_once_and_merges_overlap() {
19560 use crate::model::types::{Conversation, Message, MessageRole};
19561 use std::path::PathBuf;
19562
19563 fn base_conv(source_path: &str, messages: Vec<Message>) -> Conversation {
19564 Conversation {
19565 id: None,
19566 agent_slug: "codex".into(),
19567 workspace: Some(PathBuf::from("/tmp/workspace")),
19568 external_id: None,
19569 title: Some("Recovered".into()),
19570 source_path: PathBuf::from(source_path),
19571 started_at: Some(1_700_000_000_000),
19572 ended_at: Some(1_700_000_000_999),
19573 approx_tokens: None,
19574 metadata_json: serde_json::Value::Null,
19575 messages,
19576 source_id: "local".into(),
19577 origin_host: None,
19578 }
19579 }
19580
19581 let dir = TempDir::new().unwrap();
19582 let canonical_db = dir.path().join("agent_search.db");
19583 let storage = SqliteStorage::open(&canonical_db).unwrap();
19584
19585 let overlapping_a = base_conv(
19586 "/tmp/shared-history.jsonl",
19587 vec![
19588 Message {
19589 id: None,
19590 idx: 0,
19591 role: MessageRole::User,
19592 author: None,
19593 created_at: Some(1_700_000_000_000),
19594 content: "first".into(),
19595 extra_json: serde_json::Value::Null,
19596 snippets: Vec::new(),
19597 },
19598 Message {
19599 id: None,
19600 idx: 1,
19601 role: MessageRole::Agent,
19602 author: None,
19603 created_at: Some(1_700_000_000_100),
19604 content: "second".into(),
19605 extra_json: serde_json::Value::Null,
19606 snippets: Vec::new(),
19607 },
19608 ],
19609 );
19610 let overlapping_b = base_conv(
19611 "/tmp/shared-history.jsonl",
19612 vec![
19613 Message {
19614 id: None,
19615 idx: 1,
19616 role: MessageRole::Agent,
19617 author: None,
19618 created_at: Some(1_700_000_000_100),
19619 content: "second".into(),
19620 extra_json: serde_json::Value::Null,
19621 snippets: Vec::new(),
19622 },
19623 Message {
19624 id: None,
19625 idx: 2,
19626 role: MessageRole::User,
19627 author: None,
19628 created_at: Some(1_700_000_000_200),
19629 content: "third".into(),
19630 extra_json: serde_json::Value::Null,
19631 snippets: Vec::new(),
19632 },
19633 ],
19634 );
19635 let unique = Conversation {
19636 source_path: PathBuf::from("/tmp/unique-history.jsonl"),
19637 messages: vec![Message {
19638 id: None,
19639 idx: 0,
19640 role: MessageRole::User,
19641 author: None,
19642 created_at: Some(1_700_000_001_000),
19643 content: "unique".into(),
19644 extra_json: serde_json::Value::Null,
19645 snippets: Vec::new(),
19646 }],
19647 started_at: Some(1_700_000_001_000),
19648 ended_at: Some(1_700_000_001_100),
19649 ..base_conv("/tmp/unique-history.jsonl", Vec::new())
19650 };
19651
19652 seed_historical_db_direct(
19653 &dir.path()
19654 .join("backups/agent_search.db.20260322T020200.bak"),
19655 std::slice::from_ref(&overlapping_a),
19656 );
19657 seed_historical_db_direct(
19658 &dir.path().join("agent_search.corrupt.20260324_212907"),
19659 &[overlapping_b, unique],
19660 );
19661
19662 let first = storage.salvage_historical_databases(&canonical_db).unwrap();
19663 assert_eq!(first.bundles_considered, 2);
19664 assert_eq!(first.bundles_imported, 2);
19665 assert_eq!(first.messages_imported, 4);
19666
19667 let conversations = storage.list_conversations(10, 0).unwrap();
19668 assert_eq!(conversations.len(), 2);
19669
19670 let shared_id = conversations
19671 .iter()
19672 .find(|conv| conv.source_path == std::path::Path::new("/tmp/shared-history.jsonl"))
19673 .and_then(|conv| conv.id)
19674 .unwrap();
19675 let shared_indices: Vec<i64> = storage
19676 .fetch_messages(shared_id)
19677 .unwrap()
19678 .into_iter()
19679 .map(|msg| msg.idx)
19680 .collect();
19681 assert_eq!(shared_indices, vec![0, 1, 2]);
19682
19683 let second = storage.salvage_historical_databases(&canonical_db).unwrap();
19684 assert_eq!(second.bundles_imported, 0);
19685 assert_eq!(second.messages_imported, 0);
19686 }
19687
19688 #[test]
19689 fn salvage_historical_databases_normalizes_host_only_remote_provenance() {
19690 use crate::model::types::{Conversation, Message, MessageRole};
19691 use std::path::PathBuf;
19692
19693 let dir = TempDir::new().unwrap();
19694 let canonical_db = dir.path().join("agent_search.db");
19695 let storage = SqliteStorage::open(&canonical_db).unwrap();
19696
19697 let host_only_remote = Conversation {
19698 id: None,
19699 agent_slug: "codex".into(),
19700 workspace: Some(PathBuf::from("/tmp/workspace")),
19701 external_id: None,
19702 title: Some("Recovered Host Only Remote".into()),
19703 source_path: PathBuf::from("/tmp/host-only-history.jsonl"),
19704 started_at: Some(1_700_000_000_000),
19705 ended_at: Some(1_700_000_000_999),
19706 approx_tokens: None,
19707 metadata_json: serde_json::Value::Null,
19708 messages: vec![Message {
19709 id: None,
19710 idx: 0,
19711 role: MessageRole::User,
19712 author: None,
19713 created_at: Some(1_700_000_000_000),
19714 content: "host-only remote".into(),
19715 extra_json: serde_json::Value::Null,
19716 snippets: Vec::new(),
19717 }],
19718 source_id: " ".into(),
19719 origin_host: Some("builder-5".into()),
19720 };
19721
19722 let historical_db = dir
19723 .path()
19724 .join("backups/agent_search.db.20260322T020200.bak");
19725 seed_historical_db_direct(&historical_db, std::slice::from_ref(&host_only_remote));
19726
19727 let historical_conn =
19728 FrankenConnection::open(historical_db.to_string_lossy().into_owned()).unwrap();
19729 historical_conn
19730 .execute_compat(
19731 "INSERT INTO sources(id, kind, host_label, created_at, updated_at) VALUES(?1, ?2, ?3, ?4, ?5)",
19732 fparams![" ", "ssh", "builder-5", 0_i64, 0_i64],
19733 )
19734 .unwrap();
19735 historical_conn
19736 .execute_compat(
19737 "UPDATE conversations SET source_id = ?1, origin_host = ?2 WHERE source_path = ?3",
19738 fparams![" ", "builder-5", "/tmp/host-only-history.jsonl"],
19739 )
19740 .unwrap();
19741 historical_conn
19742 .execute_compat("DELETE FROM sources WHERE id = ?1", fparams!["builder-5"])
19743 .unwrap();
19744 drop(historical_conn);
19745
19746 let first = storage.salvage_historical_databases(&canonical_db).unwrap();
19747 assert_eq!(first.bundles_imported, 1);
19748 assert_eq!(first.messages_imported, 1);
19749
19750 let source_ids = storage.get_source_ids().unwrap();
19751 assert_eq!(source_ids, vec!["builder-5".to_string()]);
19752
19753 let conversations = storage.list_conversations(10, 0).unwrap();
19754 assert_eq!(conversations.len(), 1);
19755 assert_eq!(conversations[0].source_id, "builder-5");
19756 assert_eq!(conversations[0].origin_host.as_deref(), Some("builder-5"));
19757 }
19758
19759 #[test]
19760 fn historical_salvage_retry_splits_single_conversation_until_it_fits() {
19761 use crate::model::types::{Conversation, Message, MessageRole};
19762 use std::path::PathBuf;
19763
19764 let mut attempts: Vec<Vec<usize>> = Vec::new();
19765 let entry = HistoricalBatchEntry {
19766 source_row_id: 77,
19767 agent_id: 1,
19768 workspace_id: None,
19769 conversation: Conversation {
19770 id: None,
19771 agent_slug: "gemini".into(),
19772 workspace: Some(PathBuf::from("/tmp/workspace")),
19773 external_id: Some("conv-77".into()),
19774 title: Some("Large recovered conversation".into()),
19775 source_path: PathBuf::from("/tmp/history.jsonl"),
19776 started_at: Some(1_700_000_000_000),
19777 ended_at: Some(1_700_000_000_999),
19778 approx_tokens: None,
19779 metadata_json: serde_json::Value::Null,
19780 messages: (0..4)
19781 .map(|idx| Message {
19782 id: None,
19783 idx,
19784 role: MessageRole::User,
19785 author: None,
19786 created_at: Some(1_700_000_000_000 + idx),
19787 content: format!("message-{idx}"),
19788 extra_json: serde_json::Value::Null,
19789 snippets: Vec::new(),
19790 })
19791 .collect(),
19792 source_id: LOCAL_SOURCE_ID.into(),
19793 origin_host: None,
19794 },
19795 };
19796
19797 let totals = SqliteStorage::import_historical_batch_with_retry(
19798 std::slice::from_ref(&entry),
19799 &mut |batch| {
19800 attempts.push(
19801 batch
19802 .iter()
19803 .map(|entry| entry.conversation.messages.len())
19804 .collect(),
19805 );
19806 let total_messages: usize = batch
19807 .iter()
19808 .map(|entry| entry.conversation.messages.len())
19809 .sum();
19810 if total_messages > 1 {
19811 Err(anyhow!("out of memory"))
19812 } else {
19813 Ok(HistoricalBatchImportTotals {
19814 inserted_source_rows: batch.len(),
19815 inserted_messages: total_messages,
19816 })
19817 }
19818 },
19819 )
19820 .unwrap();
19821
19822 assert_eq!(
19823 totals,
19824 HistoricalBatchImportTotals {
19825 inserted_source_rows: 1,
19826 inserted_messages: 4,
19827 }
19828 );
19829 assert_eq!(attempts.first().cloned(), Some(vec![4]));
19830 assert!(
19831 attempts.iter().filter(|sizes| sizes == &&vec![1]).count() >= 4,
19832 "expected recursive fallback to reach one-message slices"
19833 );
19834 }
19835
19836 #[test]
19837 fn salvage_historical_databases_resumes_from_progress_checkpoint() {
19838 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19839 use std::path::PathBuf;
19840
19841 fn make_conv(source_path: &str, idx_seed: i64) -> Conversation {
19842 Conversation {
19843 id: None,
19844 agent_slug: "codex".into(),
19845 workspace: Some(PathBuf::from("/tmp/workspace")),
19846 external_id: Some(format!("conv-{idx_seed}")),
19847 title: Some(format!("Recovered {idx_seed}")),
19848 source_path: PathBuf::from(source_path),
19849 started_at: Some(1_700_000_000_000 + idx_seed),
19850 ended_at: Some(1_700_000_000_100 + idx_seed),
19851 approx_tokens: None,
19852 metadata_json: serde_json::Value::Null,
19853 messages: vec![Message {
19854 id: None,
19855 idx: 0,
19856 role: MessageRole::User,
19857 author: None,
19858 created_at: Some(1_700_000_000_000 + idx_seed),
19859 content: format!("message-{idx_seed}"),
19860 extra_json: serde_json::Value::Null,
19861 snippets: Vec::new(),
19862 }],
19863 source_id: LOCAL_SOURCE_ID.into(),
19864 origin_host: None,
19865 }
19866 }
19867
19868 let dir = TempDir::new().unwrap();
19869 let canonical_db = dir.path().join("agent_search.db");
19870 let backup_db = dir
19871 .path()
19872 .join("backups/agent_search.db.20260322T020200.bak");
19873 let storage = SqliteStorage::open(&canonical_db).unwrap();
19874 let conv_a = make_conv("/tmp/one.jsonl", 1);
19875 let conv_b = make_conv("/tmp/two.jsonl", 2);
19876 let conv_c = make_conv("/tmp/three.jsonl", 3);
19877 seed_historical_db_direct(
19878 &backup_db,
19879 &[conv_a.clone(), conv_b.clone(), conv_c.clone()],
19880 );
19881
19882 let agent = Agent {
19883 id: None,
19884 slug: "codex".into(),
19885 name: "Codex".into(),
19886 version: Some("0.2.3".into()),
19887 kind: AgentKind::Cli,
19888 };
19889 let agent_id = storage.ensure_agent(&agent).unwrap();
19890 storage
19891 .insert_conversation_tree(agent_id, None, &conv_a)
19892 .unwrap();
19893
19894 let bundle = discover_historical_database_bundles(&canonical_db)
19895 .into_iter()
19896 .find(|bundle| bundle.root_path == backup_db)
19897 .unwrap();
19898 let first_row_id: i64 = FrankenConnection::open(backup_db.to_string_lossy().into_owned())
19899 .unwrap()
19900 .query_row_map(
19901 "SELECT id FROM conversations WHERE source_path = ?1",
19902 fparams!["/tmp/one.jsonl"],
19903 |row| row.get_typed(0),
19904 )
19905 .unwrap();
19906 storage
19907 .record_historical_bundle_progress(&bundle, "direct-readonly", first_row_id, 50, 99)
19908 .unwrap();
19909
19910 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
19911 assert_eq!(outcome.bundles_imported, 1);
19912 assert_eq!(outcome.conversations_imported, 52);
19913 assert_eq!(outcome.messages_imported, 101);
19914 assert_eq!(storage.list_conversations(10, 0).unwrap().len(), 3);
19915
19916 let progress_key = SqliteStorage::historical_bundle_progress_key(&bundle);
19917 let progress_left: Option<String> = storage
19918 .conn
19919 .query_row_map(
19920 "SELECT value FROM meta WHERE key = ?1",
19921 fparams![progress_key.as_str()],
19922 |row| row.get_typed(0),
19923 )
19924 .optional()
19925 .unwrap();
19926 assert!(
19927 progress_left.is_none(),
19928 "completed salvage should clear bundle progress"
19929 );
19930
19931 let second = storage.salvage_historical_databases(&canonical_db).unwrap();
19932 assert_eq!(second.bundles_imported, 0);
19933 assert_eq!(second.messages_imported, 0);
19934 }
19935
19936 #[test]
19937 fn salvage_historical_databases_skips_bundle_when_checkpoint_covers_backup() {
19938 use crate::model::types::{Conversation, Message, MessageRole};
19944 use std::path::PathBuf;
19945
19946 fn make_conv(source_path: &str, idx_seed: i64) -> Conversation {
19947 Conversation {
19948 id: None,
19949 agent_slug: "codex".into(),
19950 workspace: Some(PathBuf::from("/tmp/workspace")),
19951 external_id: Some(format!("conv-{idx_seed}")),
19952 title: Some(format!("Recovered {idx_seed}")),
19953 source_path: PathBuf::from(source_path),
19954 started_at: Some(1_700_000_000_000 + idx_seed),
19955 ended_at: Some(1_700_000_000_100 + idx_seed),
19956 approx_tokens: None,
19957 metadata_json: serde_json::Value::Null,
19958 messages: vec![Message {
19959 id: None,
19960 idx: 0,
19961 role: MessageRole::User,
19962 author: None,
19963 created_at: Some(1_700_000_000_000 + idx_seed),
19964 content: format!("message-{idx_seed}"),
19965 extra_json: serde_json::Value::Null,
19966 snippets: Vec::new(),
19967 }],
19968 source_id: LOCAL_SOURCE_ID.into(),
19969 origin_host: None,
19970 }
19971 }
19972
19973 let dir = TempDir::new().unwrap();
19974 let canonical_db = dir.path().join("agent_search.db");
19975 let backup_db = dir
19976 .path()
19977 .join("backups/agent_search.db.20260322T020200.bak");
19978 let storage = SqliteStorage::open(&canonical_db).unwrap();
19979 seed_historical_db_direct(
19980 &backup_db,
19981 &[
19982 make_conv("/tmp/one.jsonl", 1),
19983 make_conv("/tmp/two.jsonl", 2),
19984 make_conv("/tmp/three.jsonl", 3),
19985 ],
19986 );
19987
19988 let bundle = discover_historical_database_bundles(&canonical_db)
19989 .into_iter()
19990 .find(|bundle| bundle.root_path == backup_db)
19991 .unwrap();
19992
19993 let backup_max_id: i64 = FrankenConnection::open(backup_db.to_string_lossy().into_owned())
19995 .unwrap()
19996 .query_row_map(
19997 "SELECT COALESCE(MAX(id), 0) FROM conversations",
19998 fparams![],
19999 |row| row.get_typed(0),
20000 )
20001 .unwrap();
20002 assert!(backup_max_id > 0, "seeded backup should have conversations");
20003 storage
20004 .record_historical_bundle_progress(&bundle, "direct-readonly", backup_max_id, 3, 3)
20005 .unwrap();
20006
20007 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
20008 assert_eq!(
20009 outcome.bundles_imported, 0,
20010 "fully-checkpointed bundle must not be re-scanned"
20011 );
20012 assert_eq!(outcome.conversations_imported, 0);
20013 assert_eq!(outcome.messages_imported, 0);
20014 assert_eq!(
20015 storage.list_conversations(10, 0).unwrap().len(),
20016 0,
20017 "skip path must not import anything"
20018 );
20019 assert!(
20020 storage.historical_bundle_already_imported(&bundle).unwrap(),
20021 "skipped bundle must be ledgered as salvaged so future runs short-circuit"
20022 );
20023
20024 let progress_key = SqliteStorage::historical_bundle_progress_key(&bundle);
20025 let progress_left: Option<String> = storage
20026 .conn
20027 .query_row_map(
20028 "SELECT value FROM meta WHERE key = ?1",
20029 fparams![progress_key.as_str()],
20030 |row| row.get_typed(0),
20031 )
20032 .optional()
20033 .unwrap();
20034 assert!(
20035 progress_left.is_none(),
20036 "skip path must clear the bundle progress checkpoint"
20037 );
20038 }
20039
20040 #[test]
20041 fn list_conversations_for_lexical_rebuild_uses_stable_id_order() {
20042 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20043 use std::path::PathBuf;
20044
20045 let dir = TempDir::new().unwrap();
20046 let db_path = dir.path().join("agent_search.db");
20047 let storage = SqliteStorage::open(&db_path).unwrap();
20048 let agent = Agent {
20049 id: None,
20050 slug: "codex".into(),
20051 name: "Codex".into(),
20052 version: Some("0.2.3".into()),
20053 kind: AgentKind::Cli,
20054 };
20055 let agent_id = storage.ensure_agent(&agent).unwrap();
20056
20057 let make_conv = |source_path: &str, started_at: i64| Conversation {
20058 id: None,
20059 agent_slug: "codex".into(),
20060 workspace: Some(PathBuf::from("/tmp/workspace")),
20061 external_id: Some(source_path.to_string()),
20062 title: Some(source_path.to_string()),
20063 source_path: PathBuf::from(source_path),
20064 started_at: Some(started_at),
20065 ended_at: Some(started_at + 1),
20066 approx_tokens: None,
20067 metadata_json: serde_json::Value::Null,
20068 messages: vec![Message {
20069 id: None,
20070 idx: 0,
20071 role: MessageRole::User,
20072 author: None,
20073 created_at: Some(started_at),
20074 content: format!("message for {source_path}"),
20075 extra_json: serde_json::Value::Null,
20076 snippets: Vec::new(),
20077 }],
20078 source_id: LOCAL_SOURCE_ID.into(),
20079 origin_host: None,
20080 };
20081
20082 let conv_a = make_conv("/tmp/a.jsonl", 3_000);
20083 let conv_b = make_conv("/tmp/b.jsonl", 1_000);
20084 let conv_c = make_conv("/tmp/c.jsonl", 2_000);
20085
20086 storage
20087 .insert_conversation_tree(agent_id, None, &conv_a)
20088 .unwrap();
20089 storage
20090 .insert_conversation_tree(agent_id, None, &conv_b)
20091 .unwrap();
20092 storage
20093 .insert_conversation_tree(agent_id, None, &conv_c)
20094 .unwrap();
20095
20096 let user_order: Vec<PathBuf> = storage
20097 .list_conversations(10, 0)
20098 .unwrap()
20099 .into_iter()
20100 .map(|conv| conv.source_path)
20101 .collect();
20102 assert_eq!(
20103 user_order,
20104 vec![
20105 PathBuf::from("/tmp/a.jsonl"),
20106 PathBuf::from("/tmp/c.jsonl"),
20107 PathBuf::from("/tmp/b.jsonl"),
20108 ]
20109 );
20110
20111 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
20112 let rebuild_order: Vec<PathBuf> = storage
20113 .list_conversations_for_lexical_rebuild_after_id(10, 0, &agent_slugs, &workspace_paths)
20114 .unwrap()
20115 .into_iter()
20116 .map(|conv| conv.source_path)
20117 .collect();
20118 assert_eq!(
20119 rebuild_order,
20120 vec![
20121 PathBuf::from("/tmp/a.jsonl"),
20122 PathBuf::from("/tmp/b.jsonl"),
20123 PathBuf::from("/tmp/c.jsonl"),
20124 ]
20125 );
20126
20127 let first_page = storage
20128 .list_conversations_for_lexical_rebuild_after_id(2, 0, &agent_slugs, &workspace_paths)
20129 .unwrap();
20130 let first_page_paths: Vec<PathBuf> = first_page
20131 .iter()
20132 .map(|conv| conv.source_path.clone())
20133 .collect();
20134 assert_eq!(
20135 first_page_paths,
20136 vec![PathBuf::from("/tmp/a.jsonl"), PathBuf::from("/tmp/b.jsonl")]
20137 );
20138
20139 let second_page = storage
20140 .list_conversations_for_lexical_rebuild_after_id(
20141 2,
20142 first_page
20143 .last()
20144 .and_then(|conv| conv.id)
20145 .expect("first page should include an id"),
20146 &agent_slugs,
20147 &workspace_paths,
20148 )
20149 .unwrap();
20150 let second_page_paths: Vec<PathBuf> = second_page
20151 .iter()
20152 .map(|conv| conv.source_path.clone())
20153 .collect();
20154 assert_eq!(second_page_paths, vec![PathBuf::from("/tmp/c.jsonl")]);
20155
20156 let bounded_page = storage
20157 .list_conversations_for_lexical_rebuild_after_id_through_id(
20158 10,
20159 0,
20160 first_page
20161 .last()
20162 .and_then(|conv| conv.id)
20163 .expect("first page should include an id"),
20164 &agent_slugs,
20165 &workspace_paths,
20166 )
20167 .unwrap();
20168 let bounded_paths: Vec<PathBuf> = bounded_page
20169 .iter()
20170 .map(|conv| conv.source_path.clone())
20171 .collect();
20172 assert_eq!(
20173 bounded_paths,
20174 vec![PathBuf::from("/tmp/a.jsonl"), PathBuf::from("/tmp/b.jsonl")]
20175 );
20176 }
20177
20178 #[test]
20179 fn keyset_traversal_handles_sparse_holey_conversation_ids() {
20180 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20181 use std::path::PathBuf;
20182
20183 let dir = TempDir::new().unwrap();
20184 let db_path = dir.path().join("agent_search.db");
20185 let storage = SqliteStorage::open(&db_path).unwrap();
20186 let agent = Agent {
20187 id: None,
20188 slug: "codex".into(),
20189 name: "Codex".into(),
20190 version: Some("0.2.3".into()),
20191 kind: AgentKind::Cli,
20192 };
20193 let agent_id = storage.ensure_agent(&agent).unwrap();
20194
20195 let make_conv = |label: &str, ts: i64| Conversation {
20196 id: None,
20197 agent_slug: "codex".into(),
20198 workspace: Some(PathBuf::from("/tmp/workspace")),
20199 external_id: Some(label.to_string()),
20200 title: Some(label.to_string()),
20201 source_path: PathBuf::from(format!("/tmp/{label}.jsonl")),
20202 started_at: Some(ts),
20203 ended_at: Some(ts + 1),
20204 approx_tokens: None,
20205 metadata_json: serde_json::Value::Null,
20206 messages: vec![Message {
20207 id: None,
20208 idx: 0,
20209 role: MessageRole::User,
20210 author: None,
20211 created_at: Some(ts),
20212 content: format!("msg for {label}"),
20213 extra_json: serde_json::Value::Null,
20214 snippets: Vec::new(),
20215 }],
20216 source_id: LOCAL_SOURCE_ID.into(),
20217 origin_host: None,
20218 };
20219
20220 for i in 0..6 {
20221 storage
20222 .insert_conversation_tree(
20223 agent_id,
20224 None,
20225 &make_conv(&format!("conv-{i}"), 1000 + i),
20226 )
20227 .unwrap();
20228 }
20229
20230 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
20231 storage
20232 .conn
20233 .execute_compat("DELETE FROM conversations WHERE id IN (2, 4)", fparams![])
20234 .unwrap();
20235 storage
20236 .conn
20237 .execute_compat(
20238 "DELETE FROM messages WHERE conversation_id IN (2, 4)",
20239 fparams![],
20240 )
20241 .unwrap();
20242 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
20243
20244 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
20245
20246 let page1 = storage
20247 .list_conversations_for_lexical_rebuild_after_id(2, 0, &agent_slugs, &workspace_paths)
20248 .unwrap();
20249 assert_eq!(page1.len(), 2);
20250 let page1_ids: Vec<i64> = page1.iter().map(|c| c.id.unwrap()).collect();
20251 assert_eq!(page1_ids, vec![1, 3]);
20252
20253 let page2 = storage
20254 .list_conversations_for_lexical_rebuild_after_id(
20255 2,
20256 *page1_ids.last().unwrap(),
20257 &agent_slugs,
20258 &workspace_paths,
20259 )
20260 .unwrap();
20261 assert_eq!(page2.len(), 2);
20262 let page2_ids: Vec<i64> = page2.iter().map(|c| c.id.unwrap()).collect();
20263 assert_eq!(page2_ids, vec![5, 6]);
20264
20265 let page3 = storage
20266 .list_conversations_for_lexical_rebuild_after_id(
20267 2,
20268 *page2_ids.last().unwrap(),
20269 &agent_slugs,
20270 &workspace_paths,
20271 )
20272 .unwrap();
20273 assert!(page3.is_empty());
20274
20275 let all_ids: Vec<i64> = page1_ids.iter().chain(page2_ids.iter()).copied().collect();
20276 assert_eq!(all_ids, vec![1, 3, 5, 6]);
20277 }
20278
20279 #[test]
20280 fn keyset_traversal_through_id_with_sparse_ranges() {
20281 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20282 use std::path::PathBuf;
20283
20284 let dir = TempDir::new().unwrap();
20285 let db_path = dir.path().join("agent_search.db");
20286 let storage = SqliteStorage::open(&db_path).unwrap();
20287 let agent = Agent {
20288 id: None,
20289 slug: "codex".into(),
20290 name: "Codex".into(),
20291 version: Some("0.2.3".into()),
20292 kind: AgentKind::Cli,
20293 };
20294 let agent_id = storage.ensure_agent(&agent).unwrap();
20295
20296 let make_conv = |label: &str, ts: i64| Conversation {
20297 id: None,
20298 agent_slug: "codex".into(),
20299 workspace: Some(PathBuf::from("/tmp/workspace")),
20300 external_id: Some(label.to_string()),
20301 title: Some(label.to_string()),
20302 source_path: PathBuf::from(format!("/tmp/{label}.jsonl")),
20303 started_at: Some(ts),
20304 ended_at: Some(ts + 1),
20305 approx_tokens: None,
20306 metadata_json: serde_json::Value::Null,
20307 messages: vec![Message {
20308 id: None,
20309 idx: 0,
20310 role: MessageRole::User,
20311 author: None,
20312 created_at: Some(ts),
20313 content: format!("msg for {label}"),
20314 extra_json: serde_json::Value::Null,
20315 snippets: Vec::new(),
20316 }],
20317 source_id: LOCAL_SOURCE_ID.into(),
20318 origin_host: None,
20319 };
20320
20321 for i in 0..10 {
20322 storage
20323 .insert_conversation_tree(
20324 agent_id,
20325 None,
20326 &make_conv(&format!("conv-{i}"), 1000 + i),
20327 )
20328 .unwrap();
20329 }
20330
20331 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
20332 storage
20333 .conn
20334 .execute_compat(
20335 "DELETE FROM conversations WHERE id IN (3, 5, 7, 8)",
20336 fparams![],
20337 )
20338 .unwrap();
20339 storage
20340 .conn
20341 .execute_compat(
20342 "DELETE FROM messages WHERE conversation_id IN (3, 5, 7, 8)",
20343 fparams![],
20344 )
20345 .unwrap();
20346 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
20347
20348 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
20349
20350 let through_5 = storage
20351 .list_conversations_for_lexical_rebuild_after_id_through_id(
20352 100,
20353 0,
20354 5,
20355 &agent_slugs,
20356 &workspace_paths,
20357 )
20358 .unwrap();
20359 let through_5_ids: Vec<i64> = through_5.iter().map(|c| c.id.unwrap()).collect();
20360 assert_eq!(through_5_ids, vec![1, 2, 4]);
20361
20362 let after_4_through_10 = storage
20363 .list_conversations_for_lexical_rebuild_after_id_through_id(
20364 100,
20365 4,
20366 10,
20367 &agent_slugs,
20368 &workspace_paths,
20369 )
20370 .unwrap();
20371 let ids: Vec<i64> = after_4_through_10.iter().map(|c| c.id.unwrap()).collect();
20372 assert_eq!(ids, vec![6, 9, 10]);
20373
20374 let after_10 = storage
20375 .list_conversations_for_lexical_rebuild_after_id_through_id(
20376 100,
20377 10,
20378 20,
20379 &agent_slugs,
20380 &workspace_paths,
20381 )
20382 .unwrap();
20383 assert!(after_10.is_empty());
20384 }
20385
20386 #[test]
20387 fn list_conversation_footprints_for_lexical_rebuild_estimates_bytes_and_keeps_empty_conversations()
20388 {
20389 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20390 use std::path::PathBuf;
20391
20392 let dir = TempDir::new().unwrap();
20393 let db_path = dir.path().join("agent_search.db");
20394 let storage = SqliteStorage::open(&db_path).unwrap();
20395 let agent = Agent {
20396 id: None,
20397 slug: "codex".into(),
20398 name: "Codex".into(),
20399 version: Some("0.2.3".into()),
20400 kind: AgentKind::Cli,
20401 };
20402 let agent_id = storage.ensure_agent(&agent).unwrap();
20403
20404 let insert = |external_id: &str, base_ts: i64, messages: Vec<Message>| {
20405 storage
20406 .insert_conversation_tree(
20407 agent_id,
20408 None,
20409 &Conversation {
20410 id: None,
20411 agent_slug: "codex".into(),
20412 workspace: Some(PathBuf::from("/tmp/workspace")),
20413 external_id: Some(external_id.to_string()),
20414 title: Some(external_id.to_string()),
20415 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
20416 started_at: Some(base_ts),
20417 ended_at: Some(base_ts + 100),
20418 approx_tokens: None,
20419 metadata_json: serde_json::Value::Null,
20420 messages,
20421 source_id: LOCAL_SOURCE_ID.into(),
20422 origin_host: None,
20423 },
20424 )
20425 .unwrap()
20426 .conversation_id
20427 };
20428
20429 let ascii_id = insert(
20430 "footprint-ascii",
20431 1_700_000_000_000,
20432 vec![
20433 Message {
20434 id: None,
20435 idx: 0,
20436 role: MessageRole::User,
20437 author: None,
20438 created_at: Some(1_700_000_000_001),
20439 content: "abc".into(),
20440 extra_json: serde_json::Value::Null,
20441 snippets: Vec::new(),
20442 },
20443 Message {
20444 id: None,
20445 idx: 1,
20446 role: MessageRole::Agent,
20447 author: None,
20448 created_at: Some(1_700_000_000_002),
20449 content: "defg".into(),
20450 extra_json: serde_json::Value::Null,
20451 snippets: Vec::new(),
20452 },
20453 ],
20454 );
20455 let empty_id = insert("footprint-empty", 1_700_000_001_000, Vec::new());
20456 let utf8_id = insert(
20457 "footprint-utf8",
20458 1_700_000_002_000,
20459 vec![Message {
20460 id: None,
20461 idx: 0,
20462 role: MessageRole::Tool,
20463 author: None,
20464 created_at: Some(1_700_000_002_001),
20465 content: "hé🙂".into(),
20466 extra_json: serde_json::Value::Null,
20467 snippets: Vec::new(),
20468 }],
20469 );
20470 let sparse_id = insert(
20471 "footprint-sparse",
20472 1_700_000_003_000,
20473 vec![Message {
20474 id: None,
20475 idx: 10,
20476 role: MessageRole::User,
20477 author: None,
20478 created_at: Some(1_700_000_003_010),
20479 content: "sparse".into(),
20480 extra_json: serde_json::Value::Null,
20481 snippets: Vec::new(),
20482 }],
20483 );
20484 storage
20485 .conn
20486 .execute_compat(
20487 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
20488 fparams![utf8_id],
20489 )
20490 .unwrap();
20491
20492 let footprints = storage
20493 .list_conversation_footprints_for_lexical_rebuild()
20494 .unwrap();
20495 assert_eq!(
20496 footprints,
20497 vec![
20498 LexicalRebuildConversationFootprintRow {
20499 conversation_id: ascii_id,
20500 message_count: 2,
20501 message_bytes: 2 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20502 },
20503 LexicalRebuildConversationFootprintRow {
20504 conversation_id: empty_id,
20505 message_count: 0,
20506 message_bytes: 0,
20507 },
20508 LexicalRebuildConversationFootprintRow {
20509 conversation_id: utf8_id,
20510 message_count: 1,
20511 message_bytes: LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20512 },
20513 LexicalRebuildConversationFootprintRow {
20514 conversation_id: sparse_id,
20515 message_count: 11,
20516 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20517 },
20518 ]
20519 );
20520 }
20521
20522 #[test]
20523 fn list_conversation_footprints_for_lexical_rebuild_falls_back_for_missing_tail_cache() {
20524 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20525 use std::path::PathBuf;
20526
20527 let dir = TempDir::new().unwrap();
20528 let db_path = dir.path().join("agent_search.db");
20529 let storage = SqliteStorage::open(&db_path).unwrap();
20530 let agent = Agent {
20531 id: None,
20532 slug: "codex".into(),
20533 name: "Codex".into(),
20534 version: Some("0.2.3".into()),
20535 kind: AgentKind::Cli,
20536 };
20537 let agent_id = storage.ensure_agent(&agent).unwrap();
20538 let conversation_id = storage
20539 .insert_conversation_tree(
20540 agent_id,
20541 None,
20542 &Conversation {
20543 id: None,
20544 agent_slug: "codex".into(),
20545 workspace: Some(PathBuf::from("/tmp/workspace")),
20546 external_id: Some("footprint-missing-tail".to_string()),
20547 title: Some("footprint-missing-tail".to_string()),
20548 source_path: PathBuf::from("/tmp/footprint-missing-tail.jsonl"),
20549 started_at: Some(1_700_000_000_000),
20550 ended_at: Some(1_700_000_000_100),
20551 approx_tokens: None,
20552 metadata_json: serde_json::Value::Null,
20553 messages: vec![Message {
20554 id: None,
20555 idx: 10,
20556 role: MessageRole::User,
20557 author: None,
20558 created_at: Some(1_700_000_000_010),
20559 content: "legacy sparse tail".into(),
20560 extra_json: serde_json::Value::Null,
20561 snippets: Vec::new(),
20562 }],
20563 source_id: LOCAL_SOURCE_ID.into(),
20564 origin_host: None,
20565 },
20566 )
20567 .unwrap()
20568 .conversation_id;
20569
20570 storage
20571 .conn
20572 .execute_compat(
20573 "UPDATE conversations
20574 SET last_message_idx = NULL, last_message_created_at = NULL
20575 WHERE id = ?1",
20576 fparams![conversation_id],
20577 )
20578 .unwrap();
20579 storage
20580 .conn
20581 .execute_compat(
20582 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
20583 fparams![conversation_id],
20584 )
20585 .unwrap();
20586
20587 let footprints = storage
20588 .list_conversation_footprints_for_lexical_rebuild()
20589 .unwrap();
20590
20591 assert_eq!(
20592 footprints,
20593 vec![LexicalRebuildConversationFootprintRow {
20594 conversation_id,
20595 message_count: 11,
20596 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20597 }],
20598 "missing tail-cache metadata should fall back to messages MAX(idx) instead of treating legacy conversations as empty"
20599 );
20600 }
20601
20602 #[test]
20603 fn list_conversation_footprints_for_lexical_rebuild_raises_stale_low_tail_cache() {
20604 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20605 use std::path::PathBuf;
20606
20607 let dir = TempDir::new().unwrap();
20608 let db_path = dir.path().join("agent_search.db");
20609 let storage = SqliteStorage::open(&db_path).unwrap();
20610 let agent = Agent {
20611 id: None,
20612 slug: "codex".into(),
20613 name: "Codex".into(),
20614 version: Some("0.2.3".into()),
20615 kind: AgentKind::Cli,
20616 };
20617 let agent_id = storage.ensure_agent(&agent).unwrap();
20618 let conversation_id = storage
20619 .insert_conversation_tree(
20620 agent_id,
20621 None,
20622 &Conversation {
20623 id: None,
20624 agent_slug: "codex".into(),
20625 workspace: Some(PathBuf::from("/tmp/workspace")),
20626 external_id: Some("footprint-stale-tail".to_string()),
20627 title: Some("footprint-stale-tail".to_string()),
20628 source_path: PathBuf::from("/tmp/footprint-stale-tail.jsonl"),
20629 started_at: Some(1_700_000_000_000),
20630 ended_at: Some(1_700_000_000_100),
20631 approx_tokens: None,
20632 metadata_json: serde_json::Value::Null,
20633 messages: (0..3)
20634 .map(|idx| Message {
20635 id: None,
20636 idx,
20637 role: MessageRole::User,
20638 author: None,
20639 created_at: Some(1_700_000_000_010 + idx),
20640 content: format!("message {idx}"),
20641 extra_json: serde_json::Value::Null,
20642 snippets: Vec::new(),
20643 })
20644 .collect(),
20645 source_id: LOCAL_SOURCE_ID.into(),
20646 origin_host: None,
20647 },
20648 )
20649 .unwrap()
20650 .conversation_id;
20651
20652 storage
20653 .conn
20654 .execute_compat(
20655 "UPDATE conversations
20656 SET last_message_idx = 0, last_message_created_at = 1700000000010
20657 WHERE id = ?1",
20658 fparams![conversation_id],
20659 )
20660 .unwrap();
20661 storage
20662 .conn
20663 .execute_compat(
20664 "UPDATE conversation_tail_state
20665 SET last_message_idx = 0, last_message_created_at = 1700000000010
20666 WHERE conversation_id = ?1",
20667 fparams![conversation_id],
20668 )
20669 .unwrap();
20670
20671 let footprints = storage
20672 .list_conversation_footprints_for_lexical_rebuild()
20673 .unwrap();
20674
20675 assert_eq!(
20676 footprints,
20677 vec![LexicalRebuildConversationFootprintRow {
20678 conversation_id,
20679 message_count: 3,
20680 message_bytes: 3 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20681 }],
20682 "stale-low tail caches must not under-plan lexical shards and trip doc>plan invariants"
20683 );
20684 }
20685
20686 #[test]
20687 fn list_conversation_footprints_for_lexical_rebuild_tolerates_missing_tail_state_table() {
20688 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20689 use std::path::PathBuf;
20690
20691 let dir = TempDir::new().unwrap();
20692 let db_path = dir.path().join("agent_search.db");
20693 let storage = SqliteStorage::open(&db_path).unwrap();
20694 let agent = Agent {
20695 id: None,
20696 slug: "codex".into(),
20697 name: "Codex".into(),
20698 version: Some("0.2.3".into()),
20699 kind: AgentKind::Cli,
20700 };
20701 let agent_id = storage.ensure_agent(&agent).unwrap();
20702 let conversation_id = storage
20703 .insert_conversation_tree(
20704 agent_id,
20705 None,
20706 &Conversation {
20707 id: None,
20708 agent_slug: "codex".into(),
20709 workspace: Some(PathBuf::from("/tmp/workspace")),
20710 external_id: Some("footprint-missing-tail-table".to_string()),
20711 title: Some("footprint-missing-tail-table".to_string()),
20712 source_path: PathBuf::from("/tmp/footprint-missing-tail-table.jsonl"),
20713 started_at: Some(1_700_000_000_000),
20714 ended_at: Some(1_700_000_000_100),
20715 approx_tokens: None,
20716 metadata_json: serde_json::Value::Null,
20717 messages: vec![Message {
20718 id: None,
20719 idx: 10,
20720 role: MessageRole::User,
20721 author: None,
20722 created_at: Some(1_700_000_000_010),
20723 content: "legacy sparse tail without hot table".into(),
20724 extra_json: serde_json::Value::Null,
20725 snippets: Vec::new(),
20726 }],
20727 source_id: LOCAL_SOURCE_ID.into(),
20728 origin_host: None,
20729 },
20730 )
20731 .unwrap()
20732 .conversation_id;
20733
20734 storage
20735 .conn
20736 .execute_compat(
20737 "UPDATE conversations
20738 SET last_message_idx = NULL, last_message_created_at = NULL
20739 WHERE id = ?1",
20740 fparams![conversation_id],
20741 )
20742 .unwrap();
20743 storage
20744 .conn
20745 .execute_compat("DROP TABLE conversation_tail_state", fparams![])
20746 .unwrap();
20747
20748 let footprints = storage
20749 .list_conversation_footprints_for_lexical_rebuild()
20750 .unwrap();
20751
20752 assert_eq!(
20753 footprints,
20754 vec![LexicalRebuildConversationFootprintRow {
20755 conversation_id,
20756 message_count: 11,
20757 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20758 }],
20759 "read-only lexical self-heal must tolerate pre-tail-cache databases and use messages MAX(idx)"
20760 );
20761 }
20762
20763 #[test]
20764 fn list_conversation_footprints_for_lexical_rebuild_tolerates_legacy_search_demo_fixture() {
20765 let fixture_db = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
20766 .join("tests")
20767 .join("fixtures")
20768 .join("search_demo_data")
20769 .join("agent_search.db");
20770 let storage = FrankenStorage::open_readonly(&fixture_db).unwrap();
20771
20772 let footprints = storage
20773 .list_conversation_footprints_for_lexical_rebuild()
20774 .unwrap();
20775
20776 assert!(
20777 !footprints.is_empty(),
20778 "search self-heal should be able to plan a lexical rebuild from the legacy search demo fixture"
20779 );
20780 assert!(
20781 footprints
20782 .iter()
20783 .all(|footprint| footprint.message_count > 0),
20784 "legacy fixture conversations should derive message counts from messages when tail caches are absent"
20785 );
20786 }
20787
20788 #[test]
20789 fn lexical_rebuild_listing_normalizes_host_only_remote_source_from_blank_source_id() {
20790 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20791 use std::path::PathBuf;
20792
20793 let dir = TempDir::new().unwrap();
20794 let db_path = dir.path().join("agent_search.db");
20795 let storage = SqliteStorage::open(&db_path).unwrap();
20796 let agent = Agent {
20797 id: None,
20798 slug: "codex".into(),
20799 name: "Codex".into(),
20800 version: Some("0.2.3".into()),
20801 kind: AgentKind::Cli,
20802 };
20803 let agent_id = storage.ensure_agent(&agent).unwrap();
20804 let conversation = Conversation {
20805 id: None,
20806 agent_slug: "codex".into(),
20807 workspace: Some(PathBuf::from("/tmp/workspace")),
20808 external_id: Some("legacy-blank-source".into()),
20809 title: Some("Legacy blank source".into()),
20810 source_path: PathBuf::from("/tmp/legacy-blank-source.jsonl"),
20811 started_at: Some(1_700_000_000_000),
20812 ended_at: Some(1_700_000_000_100),
20813 approx_tokens: None,
20814 metadata_json: serde_json::Value::Null,
20815 messages: vec![Message {
20816 id: None,
20817 idx: 0,
20818 role: MessageRole::User,
20819 author: None,
20820 created_at: Some(1_700_000_000_000),
20821 content: "hello".into(),
20822 extra_json: serde_json::Value::Null,
20823 snippets: Vec::new(),
20824 }],
20825 source_id: LOCAL_SOURCE_ID.into(),
20826 origin_host: None,
20827 };
20828
20829 let conversation_id = storage
20830 .insert_conversation_tree(agent_id, None, &conversation)
20831 .unwrap()
20832 .conversation_id;
20833 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
20834 storage
20835 .conn
20836 .execute_compat(
20837 "UPDATE conversations SET source_id = ?1, origin_host = ?2 WHERE id = ?3",
20838 fparams![" ", "dev@laptop", conversation_id],
20839 )
20840 .unwrap();
20841 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
20842
20843 let listed = storage.list_conversations(10, 0).unwrap();
20844 assert_eq!(listed.len(), 1);
20845 assert_eq!(listed[0].source_id, "dev@laptop");
20846 assert_eq!(listed[0].origin_host.as_deref(), Some("dev@laptop"));
20847
20848 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
20849 let rebuild_listed = storage
20850 .list_conversations_for_lexical_rebuild_after_id(10, 0, &agent_slugs, &workspace_paths)
20851 .unwrap();
20852 assert_eq!(rebuild_listed.len(), 1);
20853 assert_eq!(rebuild_listed[0].source_id, "dev@laptop");
20854 assert_eq!(rebuild_listed[0].origin_host.as_deref(), Some("dev@laptop"));
20855 }
20856
20857 #[test]
20858 fn seed_canonical_from_best_historical_bundle_copies_data_and_resets_runtime_meta() {
20859 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20860 use std::path::PathBuf;
20861
20862 let dir = TempDir::new().unwrap();
20863 let canonical_db = dir.path().join("agent_search.db");
20864 let source_db = dir
20865 .path()
20866 .join("backups/agent_search.db.20260322T020200.bak");
20867
20868 fs::create_dir_all(source_db.parent().unwrap()).unwrap();
20869
20870 let source = SqliteStorage::open(&source_db).unwrap();
20871 let agent = Agent {
20872 id: None,
20873 slug: "codex".into(),
20874 name: "Codex".into(),
20875 version: Some("0.2.3".into()),
20876 kind: AgentKind::Cli,
20877 };
20878 let agent_id = source.ensure_agent(&agent).unwrap();
20879 let conversation = Conversation {
20880 id: None,
20881 agent_slug: "codex".into(),
20882 workspace: Some(PathBuf::from("/tmp/workspace")),
20883 external_id: Some("seed-conv".into()),
20884 title: Some("Historical seed".into()),
20885 source_path: PathBuf::from("/tmp/historical-seed.jsonl"),
20886 started_at: Some(1_700_000_000_000),
20887 ended_at: Some(1_700_000_000_100),
20888 approx_tokens: Some(42),
20889 metadata_json: serde_json::json!({"seed": true}),
20890 messages: vec![Message {
20891 id: None,
20892 idx: 0,
20893 role: MessageRole::Agent,
20894 author: Some("assistant".into()),
20895 created_at: Some(1_700_000_000_050),
20896 content: "seeded message".into(),
20897 extra_json: serde_json::json!({"usage": {"total_tokens": 12}}),
20898 snippets: Vec::new(),
20899 }],
20900 source_id: LOCAL_SOURCE_ID.into(),
20901 origin_host: None,
20902 };
20903 source
20904 .insert_conversation_tree(agent_id, None, &conversation)
20905 .unwrap();
20906 source.set_last_scan_ts(123).unwrap();
20907 source.set_last_indexed_at(456).unwrap();
20908 source.set_last_embedded_message_id(789).unwrap();
20909 source
20910 .conn
20911 .execute_compat(
20912 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
20913 fparams!["historical_bundle_salvaged:stale", "{\"stale\":true}"],
20914 )
20915 .unwrap();
20916 drop(source);
20917
20918 let legacy_v13_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, content='', tokenize='porter')";
20929 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
20930 let legacy = rusqlite_test_fixture_conn(&source_db);
20931 legacy
20932 .execute_batch(
20933 "UPDATE meta SET value = '13' WHERE key = 'schema_version';
20934 DELETE FROM _schema_migrations WHERE version = 14;
20935 PRAGMA writable_schema = ON;",
20936 )
20937 .unwrap();
20938 legacy
20939 .execute(
20940 "DELETE FROM meta WHERE key = ?1",
20941 [FTS_FRANKEN_REBUILD_META_KEY],
20942 )
20943 .unwrap();
20944 legacy
20946 .execute(
20947 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
20948 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
20949 [legacy_v13_fts_sql],
20950 )
20951 .unwrap();
20952 legacy
20954 .execute(
20955 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
20956 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
20957 [duplicate_legacy_fts_sql],
20958 )
20959 .unwrap();
20960 legacy
20961 .execute_batch("PRAGMA writable_schema = OFF;")
20962 .unwrap();
20963 drop(legacy);
20964
20965 {
20968 let verify = rusqlite_test_fixture_conn(&source_db);
20969 verify
20970 .execute_batch("PRAGMA writable_schema = ON;")
20971 .unwrap();
20972 let fts_entries: i64 = verify
20973 .query_row(
20974 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
20975 [],
20976 |row| row.get(0),
20977 )
20978 .unwrap();
20979 assert_eq!(
20980 fts_entries, 2,
20981 "test fixture should reproduce the duplicate legacy fts_messages rows"
20982 );
20983 let msg_count: i64 = verify
20984 .query_row("SELECT COUNT(*) FROM messages", [], |row| row.get(0))
20985 .unwrap();
20986 assert_eq!(msg_count, 1);
20987 }
20988
20989 let fresh = SqliteStorage::open(&canonical_db).unwrap();
20990 drop(fresh);
20991
20992 let outcome = seed_canonical_from_best_historical_bundle(&canonical_db)
20993 .unwrap()
20994 .unwrap();
20995 assert_eq!(outcome.bundles_imported, 1);
20996 assert_eq!(outcome.conversations_imported, 1);
20997 assert_eq!(outcome.messages_imported, 1);
20998
20999 let readonly = open_franken_with_flags(
21000 &canonical_db.to_string_lossy(),
21001 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
21002 )
21003 .unwrap();
21004 let readonly_message_count: i64 = readonly
21005 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
21006 row.get_typed(0)
21007 })
21008 .unwrap();
21009 assert_eq!(readonly_message_count, 1);
21010
21011 let seeded = SqliteStorage::open(&canonical_db).unwrap();
21012 assert_eq!(
21013 seeded
21014 .count_sessions_in_range(None, None, None, None)
21015 .unwrap()
21016 .0,
21017 1
21018 );
21019 let message_count: i64 = seeded
21020 .conn
21021 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
21022 row.get_typed(0)
21023 })
21024 .unwrap();
21025 assert_eq!(message_count, 1);
21026 assert_eq!(seeded.get_last_scan_ts().unwrap(), None);
21027 assert_eq!(seeded.get_last_embedded_message_id().unwrap(), None);
21028
21029 let last_indexed: Option<String> = seeded
21030 .conn
21031 .query_row_map(
21032 "SELECT value FROM meta WHERE key = 'last_indexed_at'",
21033 fparams![],
21034 |row| row.get_typed(0),
21035 )
21036 .optional()
21037 .unwrap();
21038 assert!(last_indexed.is_none());
21039
21040 let salvage_keys: Vec<String> = seeded
21041 .conn
21042 .query_map_collect(
21043 "SELECT key FROM meta WHERE key LIKE 'historical_bundle_salvaged:%' ORDER BY key",
21044 fparams![],
21045 |row| row.get_typed(0),
21046 )
21047 .unwrap();
21048 assert_eq!(salvage_keys.len(), 1);
21049
21050 let reopened_readonly = open_franken_with_flags(
21051 &canonical_db.to_string_lossy(),
21052 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
21053 )
21054 .unwrap();
21055 let reopened_fts_entries: i64 = reopened_readonly
21056 .query_row_map(
21057 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
21058 fparams![],
21059 |row| row.get_typed(0),
21060 )
21061 .unwrap();
21062 assert_eq!(
21063 reopened_fts_entries, 1,
21064 "seeded canonical db should keep a single stock-SQLite fts_messages schema row"
21065 );
21066 let reopened_message_count: i64 = reopened_readonly
21067 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
21068 row.get_typed(0)
21069 })
21070 .unwrap();
21071 assert_eq!(reopened_message_count, 1);
21072
21073 let franken_seeded = FrankenStorage::open(&canonical_db).unwrap();
21074 assert_eq!(
21075 franken_seeded.schema_version().unwrap(),
21076 CURRENT_SCHEMA_VERSION
21077 );
21078 franken_seeded
21085 .ensure_search_fallback_fts_consistency()
21086 .expect("ensure FTS consistency after seed");
21087 let post_franken_schema_rows: i64 = franken_seeded
21088 .raw()
21089 .query_row_map(
21090 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
21091 fparams![],
21092 |row| row.get_typed(0),
21093 )
21094 .unwrap();
21095 assert_eq!(post_franken_schema_rows, 1);
21096 let fts_probe = franken_seeded
21097 .raw()
21098 .query("SELECT COUNT(*) FROM fts_messages");
21099 assert!(
21100 fts_probe.is_ok(),
21101 "expected post-seed FTS to be queryable, got {fts_probe:?}"
21102 );
21103 }
21104
21105 #[test]
21106 fn failed_baseline_seed_preserves_existing_canonical_bundle() {
21107 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21108 use std::path::PathBuf;
21109
21110 let dir = TempDir::new().unwrap();
21111 let canonical_db = dir.path().join("agent_search.db");
21112 let source_db = dir
21113 .path()
21114 .join("backups/agent_search.db.20260325T120000Z.bad-seed.bak");
21115
21116 fs::create_dir_all(source_db.parent().unwrap()).unwrap();
21117
21118 let canonical = SqliteStorage::open(&canonical_db).unwrap();
21119 canonical
21120 .conn
21121 .execute_compat(
21122 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
21123 fparams!["sentinel", "keep-me"],
21124 )
21125 .unwrap();
21126 drop(canonical);
21127
21128 let source = SqliteStorage::open(&source_db).unwrap();
21129 let agent = Agent {
21130 id: None,
21131 slug: "codex".into(),
21132 name: "Codex".into(),
21133 version: Some("0.2.3".into()),
21134 kind: AgentKind::Cli,
21135 };
21136 let agent_id = source.ensure_agent(&agent).unwrap();
21137 let conversation = Conversation {
21138 id: None,
21139 agent_slug: "codex".into(),
21140 workspace: Some(PathBuf::from("/tmp/workspace")),
21141 external_id: Some("bad-seed-conv".into()),
21142 title: Some("Bad seed".into()),
21143 source_path: PathBuf::from("/tmp/bad-seed.jsonl"),
21144 started_at: Some(1_700_000_000_000),
21145 ended_at: Some(1_700_000_000_100),
21146 approx_tokens: Some(42),
21147 metadata_json: serde_json::json!({"seed": "bad"}),
21148 messages: vec![Message {
21149 id: None,
21150 idx: 0,
21151 role: MessageRole::Agent,
21152 author: Some("assistant".into()),
21153 created_at: Some(1_700_000_000_050),
21154 content: "this seed should fail".into(),
21155 extra_json: serde_json::Value::Null,
21156 snippets: Vec::new(),
21157 }],
21158 source_id: LOCAL_SOURCE_ID.into(),
21159 origin_host: None,
21160 };
21161 source
21162 .insert_conversation_tree(agent_id, None, &conversation)
21163 .unwrap();
21164 drop(source);
21165
21166 let legacy = FrankenConnection::open(source_db.to_string_lossy().into_owned()).unwrap();
21167 legacy
21168 .execute("UPDATE meta SET value = '12' WHERE key = 'schema_version'")
21169 .unwrap();
21170 drop(legacy);
21171
21172 let err = seed_canonical_from_best_historical_bundle(&canonical_db).unwrap_err();
21173 assert!(
21174 err.to_string()
21175 .contains("schema_version 12 is too old for baseline import"),
21176 "unexpected seed error: {err:#}"
21177 );
21178
21179 let reopened = SqliteStorage::open(&canonical_db).unwrap();
21180 let sentinel: Option<String> = reopened
21181 .conn
21182 .query_row_map(
21183 "SELECT value FROM meta WHERE key = 'sentinel'",
21184 fparams![],
21185 |row| row.get_typed(0),
21186 )
21187 .optional()
21188 .unwrap();
21189 assert_eq!(sentinel.as_deref(), Some("keep-me"));
21190
21191 let conversation_count: i64 = reopened
21192 .conn
21193 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
21194 row.get_typed(0)
21195 })
21196 .unwrap();
21197 assert_eq!(conversation_count, 0);
21198
21199 let readonly = open_franken_with_flags(
21200 &canonical_db.to_string_lossy(),
21201 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
21202 )
21203 .unwrap();
21204 let readonly_conversation_count: i64 = readonly
21205 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
21206 row.get_typed(0)
21207 })
21208 .unwrap();
21209 assert_eq!(readonly_conversation_count, 0);
21210 }
21211
21212 #[test]
21213 fn fetch_messages_for_lexical_rebuild_skips_extra_json() {
21214 let dir = TempDir::new().unwrap();
21215 let db_path = dir.path().join("test.db");
21216 let storage = SqliteStorage::open(&db_path).unwrap();
21217
21218 let agent = Agent {
21219 id: None,
21220 slug: "codex".into(),
21221 name: "Codex".into(),
21222 version: Some("0.2.3".into()),
21223 kind: AgentKind::Cli,
21224 };
21225 let agent_id = storage.ensure_agent(&agent).unwrap();
21226
21227 let conversation = Conversation {
21228 id: None,
21229 agent_slug: "codex".into(),
21230 workspace: Some(PathBuf::from("/tmp/workspace")),
21231 external_id: Some("lexical-rebuild-test".into()),
21232 title: Some("Lexical rebuild".into()),
21233 source_path: PathBuf::from("/tmp/lexical-rebuild.jsonl"),
21234 started_at: Some(1_700_000_000_000),
21235 ended_at: Some(1_700_000_000_100),
21236 approx_tokens: Some(42),
21237 metadata_json: serde_json::Value::Null,
21238 messages: vec![Message {
21239 id: None,
21240 idx: 0,
21241 role: MessageRole::Agent,
21242 author: Some("assistant".into()),
21243 created_at: Some(1_700_000_000_050),
21244 content: "indexed text".into(),
21245 extra_json: serde_json::json!({
21246 "usage": { "total_tokens": 1234 },
21247 "irrelevant_blob": "still preserved in canonical storage"
21248 }),
21249 snippets: Vec::new(),
21250 }],
21251 source_id: LOCAL_SOURCE_ID.into(),
21252 origin_host: None,
21253 };
21254
21255 let inserted = storage
21256 .insert_conversation_tree(agent_id, None, &conversation)
21257 .unwrap();
21258 let conversation_id = inserted.conversation_id;
21259
21260 let stored = storage.fetch_messages(conversation_id).unwrap();
21261 assert_eq!(stored.len(), 1);
21262 assert!(!stored[0].extra_json.is_null());
21263
21264 let lexical = storage
21265 .fetch_messages_for_lexical_rebuild(conversation_id)
21266 .unwrap();
21267 assert_eq!(lexical.len(), 1);
21268 assert_eq!(lexical[0].content, "indexed text");
21269 assert_eq!(lexical[0].author.as_deref(), Some("assistant"));
21270 assert!(lexical[0].extra_json.is_null());
21271 }
21272
21273 #[test]
21274 fn fetch_messages_for_lexical_rebuild_batch_groups_and_orders_messages() {
21275 let dir = TempDir::new().unwrap();
21276 let db_path = dir.path().join("test.db");
21277 let storage = SqliteStorage::open(&db_path).unwrap();
21278
21279 let agent = Agent {
21280 id: None,
21281 slug: "codex".into(),
21282 name: "Codex".into(),
21283 version: Some("0.2.3".into()),
21284 kind: AgentKind::Cli,
21285 };
21286 let agent_id = storage.ensure_agent(&agent).unwrap();
21287
21288 let first = Conversation {
21289 id: None,
21290 agent_slug: "codex".into(),
21291 workspace: Some(PathBuf::from("/tmp/workspace")),
21292 external_id: Some("lexical-batch-1".into()),
21293 title: Some("Lexical batch 1".into()),
21294 source_path: PathBuf::from("/tmp/lexical-batch-1.jsonl"),
21295 started_at: Some(1_700_000_000_000),
21296 ended_at: Some(1_700_000_000_100),
21297 approx_tokens: Some(42),
21298 metadata_json: serde_json::Value::Null,
21299 messages: vec![
21300 Message {
21301 id: None,
21302 idx: 0,
21303 role: MessageRole::User,
21304 author: Some("user".into()),
21305 created_at: Some(1_700_000_000_010),
21306 content: "first-a".into(),
21307 extra_json: serde_json::json!({"opaque": true}),
21308 snippets: Vec::new(),
21309 },
21310 Message {
21311 id: None,
21312 idx: 1,
21313 role: MessageRole::Agent,
21314 author: Some("assistant".into()),
21315 created_at: Some(1_700_000_000_020),
21316 content: "first-b".into(),
21317 extra_json: serde_json::json!({"opaque": true}),
21318 snippets: Vec::new(),
21319 },
21320 ],
21321 source_id: LOCAL_SOURCE_ID.into(),
21322 origin_host: None,
21323 };
21324
21325 let second = Conversation {
21326 id: None,
21327 agent_slug: "codex".into(),
21328 workspace: Some(PathBuf::from("/tmp/workspace")),
21329 external_id: Some("lexical-batch-2".into()),
21330 title: Some("Lexical batch 2".into()),
21331 source_path: PathBuf::from("/tmp/lexical-batch-2.jsonl"),
21332 started_at: Some(1_700_000_000_200),
21333 ended_at: Some(1_700_000_000_300),
21334 approx_tokens: Some(84),
21335 metadata_json: serde_json::Value::Null,
21336 messages: vec![Message {
21337 id: None,
21338 idx: 0,
21339 role: MessageRole::Tool,
21340 author: Some("tool".into()),
21341 created_at: Some(1_700_000_000_210),
21342 content: "second-a".into(),
21343 extra_json: serde_json::json!({"opaque": true}),
21344 snippets: Vec::new(),
21345 }],
21346 source_id: LOCAL_SOURCE_ID.into(),
21347 origin_host: None,
21348 };
21349 let third = Conversation {
21350 external_id: Some("lexical-batch-3".into()),
21351 title: Some("Lexical batch 3".into()),
21352 source_path: PathBuf::from("/tmp/lexical-batch-3.jsonl"),
21353 messages: vec![Message {
21354 id: None,
21355 idx: 0,
21356 role: MessageRole::System,
21357 author: Some("system".into()),
21358 created_at: Some(1_700_000_000_410),
21359 content: "third-a".into(),
21360 extra_json: serde_json::json!({"opaque": true}),
21361 snippets: Vec::new(),
21362 }],
21363 ..second.clone()
21364 };
21365
21366 let first_id = storage
21367 .insert_conversation_tree(agent_id, None, &first)
21368 .unwrap()
21369 .conversation_id;
21370 let second_id = storage
21371 .insert_conversation_tree(agent_id, None, &second)
21372 .unwrap()
21373 .conversation_id;
21374 let third_id = storage
21375 .insert_conversation_tree(agent_id, None, &third)
21376 .unwrap()
21377 .conversation_id;
21378
21379 let lexical = storage
21380 .fetch_messages_for_lexical_rebuild_batch(&[third_id, first_id], None, None)
21381 .unwrap();
21382
21383 let first_messages = lexical.get(&first_id).expect("first conversation");
21384 assert_eq!(first_messages.len(), 2);
21385 assert_eq!(first_messages[0].content, "first-a");
21386 assert_eq!(first_messages[1].content, "first-b");
21387 assert!(
21388 first_messages
21389 .iter()
21390 .all(|message| message.extra_json.is_null())
21391 );
21392
21393 assert!(
21394 !lexical.contains_key(&second_id),
21395 "batch fetch must exclude conversations not requested by the caller"
21396 );
21397
21398 let third_messages = lexical.get(&third_id).expect("third conversation");
21399 assert_eq!(third_messages.len(), 1);
21400 assert_eq!(third_messages[0].content, "third-a");
21401 assert!(third_messages[0].extra_json.is_null());
21402 }
21403
21404 #[test]
21405 fn fetch_messages_for_lexical_rebuild_batch_enforces_content_byte_guardrail() {
21406 let dir = TempDir::new().unwrap();
21407 let db_path = dir.path().join("test.db");
21408 let storage = SqliteStorage::open(&db_path).unwrap();
21409
21410 let agent = Agent {
21411 id: None,
21412 slug: "codex".into(),
21413 name: "Codex".into(),
21414 version: Some("0.2.3".into()),
21415 kind: AgentKind::Cli,
21416 };
21417 let agent_id = storage.ensure_agent(&agent).unwrap();
21418
21419 let conversation = Conversation {
21420 id: None,
21421 agent_slug: "codex".into(),
21422 workspace: Some(PathBuf::from("/tmp/workspace")),
21423 external_id: Some("lexical-batch-guard".into()),
21424 title: Some("Lexical batch guard".into()),
21425 source_path: PathBuf::from("/tmp/lexical-batch-guard.jsonl"),
21426 started_at: Some(1_700_000_000_000),
21427 ended_at: Some(1_700_000_000_100),
21428 approx_tokens: Some(42),
21429 metadata_json: serde_json::Value::Null,
21430 messages: vec![
21431 Message {
21432 id: None,
21433 idx: 0,
21434 role: MessageRole::User,
21435 author: Some("user".into()),
21436 created_at: Some(1_700_000_000_010),
21437 content: "123456".into(),
21438 extra_json: serde_json::Value::Null,
21439 snippets: Vec::new(),
21440 },
21441 Message {
21442 id: None,
21443 idx: 1,
21444 role: MessageRole::Agent,
21445 author: Some("assistant".into()),
21446 created_at: Some(1_700_000_000_020),
21447 content: "abcdef".into(),
21448 extra_json: serde_json::Value::Null,
21449 snippets: Vec::new(),
21450 },
21451 ],
21452 source_id: LOCAL_SOURCE_ID.into(),
21453 origin_host: None,
21454 };
21455
21456 let conversation_id = storage
21457 .insert_conversation_tree(agent_id, None, &conversation)
21458 .unwrap()
21459 .conversation_id;
21460
21461 let error = storage
21462 .fetch_messages_for_lexical_rebuild_batch(&[conversation_id], Some(10), Some(8))
21463 .expect_err("guardrail should reject oversized batch content");
21464
21465 let message = format!("{error:#}");
21466 assert!(
21467 message.contains("content-byte guardrail"),
21468 "expected guardrail reason in error, got {message}"
21469 );
21470 }
21471
21472 #[test]
21473 fn fetch_messages_handles_manual_rows_inserted_via_raw_connection() {
21474 let dir = TempDir::new().unwrap();
21475 let db_path = dir.path().join("manual-rows.db");
21476 let storage = FrankenStorage::open(&db_path).unwrap();
21477 let conn = storage.raw();
21478
21479 conn.execute(
21480 "INSERT INTO agents (id, slug, name, kind, created_at, updated_at)
21481 VALUES (1, 'claude_code', 'Claude Code', 'local', 0, 0)",
21482 )
21483 .unwrap();
21484 conn.execute(
21485 "INSERT INTO conversations
21486 (id, agent_id, external_id, title, source_path, source_id, started_at)
21487 VALUES (1, 1, 'manual-ext', 'Manual Session', '/tmp/manual.jsonl', 'local', 200)",
21488 )
21489 .unwrap();
21490 conn.execute(
21491 "INSERT INTO messages
21492 (id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
21493 VALUES (1, 1, 0, 'user', 'tester', 1700000000000, 'manual body', '{\"k\":1}', NULL)",
21494 )
21495 .unwrap();
21496
21497 let lexical = storage.fetch_messages_for_lexical_rebuild(1).unwrap();
21498 assert_eq!(lexical.len(), 1);
21499 assert_eq!(lexical[0].content, "manual body");
21500
21501 let full = storage.fetch_messages(1).unwrap();
21502 assert_eq!(full.len(), 1);
21503 assert_eq!(full[0].content, "manual body");
21504 assert_eq!(full[0].author.as_deref(), Some("tester"));
21505 assert_eq!(full[0].extra_json, serde_json::json!({ "k": 1 }));
21506 }
21507
21508 #[test]
21509 fn lexical_rebuild_batch_messages_query_avoids_sorter_temp_btrees() {
21510 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21511 use std::path::PathBuf;
21512
21513 let dir = TempDir::new().unwrap();
21514 let db_path = dir.path().join("agent_search.db");
21515 let storage = SqliteStorage::open(&db_path).unwrap();
21516
21517 let agent = Agent {
21518 id: None,
21519 slug: "claude_code".into(),
21520 name: "Claude Code".into(),
21521 version: None,
21522 kind: AgentKind::Cli,
21523 };
21524 let agent_id = storage.ensure_agent(&agent).unwrap();
21525
21526 for (external_id, base_ts) in [
21527 ("conv-1", 1_700_000_000_000_i64),
21528 ("conv-2", 1_700_000_001_000_i64),
21529 ] {
21530 let conversation = Conversation {
21531 id: None,
21532 agent_slug: "claude_code".into(),
21533 workspace: Some(PathBuf::from("/tmp/workspace")),
21534 external_id: Some(external_id.to_string()),
21535 title: Some("Lexical rebuild".into()),
21536 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
21537 started_at: Some(base_ts),
21538 ended_at: Some(base_ts + 100),
21539 approx_tokens: None,
21540 metadata_json: serde_json::Value::Null,
21541 messages: vec![
21542 Message {
21543 id: None,
21544 idx: 0,
21545 role: MessageRole::User,
21546 author: Some("user".into()),
21547 created_at: Some(base_ts + 10),
21548 content: format!("{external_id}-first"),
21549 extra_json: serde_json::Value::Null,
21550 snippets: Vec::new(),
21551 },
21552 Message {
21553 id: None,
21554 idx: 1,
21555 role: MessageRole::Agent,
21556 author: Some("assistant".into()),
21557 created_at: Some(base_ts + 20),
21558 content: format!("{external_id}-second"),
21559 extra_json: serde_json::Value::Null,
21560 snippets: Vec::new(),
21561 },
21562 ],
21563 source_id: LOCAL_SOURCE_ID.into(),
21564 origin_host: None,
21565 };
21566 storage
21567 .insert_conversation_tree(agent_id, None, &conversation)
21568 .unwrap();
21569 }
21570
21571 let conversation_ids: Vec<i64> = storage
21572 .conn
21573 .query_map_collect(
21574 "SELECT id FROM conversations ORDER BY id",
21575 fparams![],
21576 |row| row.get_typed(0),
21577 )
21578 .unwrap();
21579 assert_eq!(conversation_ids.len(), 2);
21580
21581 let plan_details: Vec<String> = storage
21582 .conn
21583 .query_map_collect(
21584 "EXPLAIN QUERY PLAN \
21585 SELECT conversation_id, id, idx, role, author, created_at, content \
21586 FROM messages \
21587 WHERE conversation_id IN (?1, ?2) \
21588 ORDER BY conversation_id ASC, idx ASC",
21589 fparams![conversation_ids[0], conversation_ids[1]],
21590 |row| row.get_typed(3),
21591 )
21592 .unwrap();
21593
21594 assert!(
21595 plan_details
21596 .iter()
21597 .any(|detail| detail.contains("sqlite_autoindex_messages_1")),
21598 "expected batched lexical rebuild fetch to use the conversation_id/idx composite index, got {plan_details:?}"
21599 );
21600 assert!(
21601 !plan_details
21602 .iter()
21603 .any(|detail| detail.contains("TEMP B-TREE")),
21604 "expected batched lexical rebuild fetch to avoid sorter temp b-trees, got {plan_details:?}"
21605 );
21606 }
21607
21608 #[test]
21609 fn stream_messages_for_lexical_rebuild_groups_and_orders_messages() {
21610 let dir = TempDir::new().unwrap();
21611 let db_path = dir.path().join("test.db");
21612 let storage = SqliteStorage::open(&db_path).unwrap();
21613
21614 let agent = Agent {
21615 id: None,
21616 slug: "codex".into(),
21617 name: "Codex".into(),
21618 version: Some("0.2.3".into()),
21619 kind: AgentKind::Cli,
21620 };
21621 let agent_id = storage.ensure_agent(&agent).unwrap();
21622
21623 let first = Conversation {
21624 id: None,
21625 agent_slug: "codex".into(),
21626 workspace: Some(PathBuf::from("/tmp/workspace")),
21627 external_id: Some("lexical-stream-1".into()),
21628 title: Some("Lexical stream 1".into()),
21629 source_path: PathBuf::from("/tmp/lexical-stream-1.jsonl"),
21630 started_at: Some(1_700_000_000_000),
21631 ended_at: Some(1_700_000_000_100),
21632 approx_tokens: Some(42),
21633 metadata_json: serde_json::Value::Null,
21634 messages: vec![
21635 Message {
21636 id: None,
21637 idx: 0,
21638 role: MessageRole::User,
21639 author: Some("user".into()),
21640 created_at: Some(1_700_000_000_010),
21641 content: "first-a".into(),
21642 extra_json: serde_json::json!({"opaque": true}),
21643 snippets: Vec::new(),
21644 },
21645 Message {
21646 id: None,
21647 idx: 1,
21648 role: MessageRole::Agent,
21649 author: Some("assistant".into()),
21650 created_at: Some(1_700_000_000_020),
21651 content: "first-b".into(),
21652 extra_json: serde_json::json!({"opaque": true}),
21653 snippets: Vec::new(),
21654 },
21655 ],
21656 source_id: LOCAL_SOURCE_ID.into(),
21657 origin_host: None,
21658 };
21659
21660 let second = Conversation {
21661 id: None,
21662 agent_slug: "codex".into(),
21663 workspace: Some(PathBuf::from("/tmp/workspace")),
21664 external_id: Some("lexical-stream-2".into()),
21665 title: Some("Lexical stream 2".into()),
21666 source_path: PathBuf::from("/tmp/lexical-stream-2.jsonl"),
21667 started_at: Some(1_700_000_000_200),
21668 ended_at: Some(1_700_000_000_300),
21669 approx_tokens: Some(84),
21670 metadata_json: serde_json::Value::Null,
21671 messages: vec![Message {
21672 id: None,
21673 idx: 0,
21674 role: MessageRole::Tool,
21675 author: Some("tool".into()),
21676 created_at: Some(1_700_000_000_210),
21677 content: "second-a".into(),
21678 extra_json: serde_json::json!({"opaque": true}),
21679 snippets: Vec::new(),
21680 }],
21681 source_id: LOCAL_SOURCE_ID.into(),
21682 origin_host: None,
21683 };
21684
21685 let first_id = storage
21686 .insert_conversation_tree(agent_id, None, &first)
21687 .unwrap()
21688 .conversation_id;
21689 let second_id = storage
21690 .insert_conversation_tree(agent_id, None, &second)
21691 .unwrap()
21692 .conversation_id;
21693
21694 let mut streamed = Vec::new();
21695 storage
21696 .stream_messages_for_lexical_rebuild_from_conversation_id(first_id, |row| {
21697 streamed.push((
21698 row.conversation_id,
21699 row.idx,
21700 row.role,
21701 row.author,
21702 row.content,
21703 ));
21704 Ok(())
21705 })
21706 .unwrap();
21707
21708 assert_eq!(
21709 streamed,
21710 vec![
21711 (
21712 first_id,
21713 0,
21714 "user".to_string(),
21715 Some("user".to_string()),
21716 "first-a".to_string(),
21717 ),
21718 (
21719 first_id,
21720 1,
21721 "agent".to_string(),
21722 Some("assistant".to_string()),
21723 "first-b".to_string(),
21724 ),
21725 (
21726 second_id,
21727 0,
21728 "tool".to_string(),
21729 Some("tool".to_string()),
21730 "second-a".to_string(),
21731 ),
21732 ]
21733 );
21734 }
21735
21736 #[test]
21737 fn stream_messages_for_lexical_rebuild_between_conversation_ids_respects_upper_bound() {
21738 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21739 use std::path::PathBuf;
21740
21741 let dir = TempDir::new().unwrap();
21742 let db_path = dir.path().join("agent_search.db");
21743 let storage = SqliteStorage::open(&db_path).unwrap();
21744
21745 let agent = Agent {
21746 id: None,
21747 slug: "claude_code".into(),
21748 name: "Claude Code".into(),
21749 version: Some("1.2.3".into()),
21750 kind: AgentKind::Cli,
21751 };
21752 let agent_id = storage.ensure_agent(&agent).unwrap();
21753
21754 let first = Conversation {
21755 id: None,
21756 agent_slug: "claude_code".into(),
21757 workspace: Some(PathBuf::from("/tmp/workspace")),
21758 external_id: Some("lexical-range-1".into()),
21759 title: Some("Lexical range 1".into()),
21760 source_path: PathBuf::from("/tmp/lexical-range-1.jsonl"),
21761 started_at: Some(1_700_000_000_000),
21762 ended_at: Some(1_700_000_000_100),
21763 approx_tokens: Some(42),
21764 metadata_json: serde_json::Value::Null,
21765 messages: vec![Message {
21766 id: None,
21767 idx: 0,
21768 role: MessageRole::User,
21769 author: Some("user".into()),
21770 created_at: Some(1_700_000_000_010),
21771 content: "first-only".into(),
21772 extra_json: serde_json::json!({"opaque": true}),
21773 snippets: Vec::new(),
21774 }],
21775 source_id: LOCAL_SOURCE_ID.into(),
21776 origin_host: None,
21777 };
21778
21779 let second = Conversation {
21780 id: None,
21781 agent_slug: "claude_code".into(),
21782 workspace: Some(PathBuf::from("/tmp/workspace")),
21783 external_id: Some("lexical-range-2".into()),
21784 title: Some("Lexical range 2".into()),
21785 source_path: PathBuf::from("/tmp/lexical-range-2.jsonl"),
21786 started_at: Some(1_700_000_000_200),
21787 ended_at: Some(1_700_000_000_300),
21788 approx_tokens: Some(84),
21789 metadata_json: serde_json::Value::Null,
21790 messages: vec![Message {
21791 id: None,
21792 idx: 0,
21793 role: MessageRole::Tool,
21794 author: Some("tool".into()),
21795 created_at: Some(1_700_000_000_210),
21796 content: "second-should-not-appear".into(),
21797 extra_json: serde_json::json!({"opaque": true}),
21798 snippets: Vec::new(),
21799 }],
21800 source_id: LOCAL_SOURCE_ID.into(),
21801 origin_host: None,
21802 };
21803
21804 let first_id = storage
21805 .insert_conversation_tree(agent_id, None, &first)
21806 .unwrap()
21807 .conversation_id;
21808 let second_id = storage
21809 .insert_conversation_tree(agent_id, None, &second)
21810 .unwrap()
21811 .conversation_id;
21812
21813 let mut streamed = Vec::new();
21814 storage
21815 .stream_messages_for_lexical_rebuild_between_conversation_ids(
21816 first_id,
21817 first_id,
21818 |row| {
21819 streamed.push((row.conversation_id, row.idx, row.content));
21820 Ok(())
21821 },
21822 )
21823 .unwrap();
21824
21825 assert_eq!(streamed, vec![(first_id, 0, "first-only".to_string())]);
21826 assert!(
21827 streamed
21828 .iter()
21829 .all(|(conversation_id, _, _)| *conversation_id != second_id),
21830 "upper bound should exclude later conversation ids"
21831 );
21832 }
21833
21834 #[test]
21835 fn stream_messages_for_lexical_rebuild_between_conversation_ids_handles_mixed_ranges() {
21836 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21837 use std::path::PathBuf;
21838
21839 let dir = TempDir::new().unwrap();
21840 let db_path = dir.path().join("agent_search.db");
21841 let storage = SqliteStorage::open(&db_path).unwrap();
21842
21843 let claude_agent_id = storage
21844 .ensure_agent(&Agent {
21845 id: None,
21846 slug: "claude_code".into(),
21847 name: "Claude Code".into(),
21848 version: None,
21849 kind: AgentKind::Cli,
21850 })
21851 .unwrap();
21852 let aider_agent_id = storage
21853 .ensure_agent(&Agent {
21854 id: None,
21855 slug: "aider".into(),
21856 name: "Aider".into(),
21857 version: None,
21858 kind: AgentKind::Cli,
21859 })
21860 .unwrap();
21861
21862 type MessageSpec = (i64, MessageRole, Option<String>, Option<i64>, String);
21863
21864 let mut expected = Vec::new();
21865 let mut first_conversation_id = None;
21866 let mut last_conversation_id = None;
21867 let mut insert_conversation =
21868 |agent_id: i64,
21869 external_id: &str,
21870 title: &str,
21871 source_path: &str,
21872 started_at: i64,
21873 message_specs: Vec<MessageSpec>| {
21874 let conversation = Conversation {
21875 id: None,
21876 agent_slug: if agent_id == aider_agent_id {
21877 "aider".into()
21878 } else {
21879 "claude_code".into()
21880 },
21881 workspace: Some(PathBuf::from("/tmp/workspace")),
21882 external_id: Some(external_id.to_string()),
21883 title: Some(title.to_string()),
21884 source_path: PathBuf::from(source_path),
21885 started_at: Some(started_at),
21886 ended_at: Some(started_at + 100),
21887 approx_tokens: None,
21888 metadata_json: serde_json::Value::Null,
21889 messages: message_specs
21890 .iter()
21891 .map(|(idx, role, author, created_at, content)| Message {
21892 id: None,
21893 idx: *idx,
21894 role: role.clone(),
21895 author: author.clone(),
21896 created_at: *created_at,
21897 content: content.clone(),
21898 extra_json: serde_json::Value::Null,
21899 snippets: Vec::new(),
21900 })
21901 .collect(),
21902 source_id: LOCAL_SOURCE_ID.into(),
21903 origin_host: None,
21904 };
21905 let conversation_id = storage
21906 .insert_conversation_tree(agent_id, None, &conversation)
21907 .unwrap()
21908 .conversation_id;
21909 if first_conversation_id.is_none() {
21910 first_conversation_id = Some(conversation_id);
21911 }
21912 last_conversation_id = Some(conversation_id);
21913 expected.extend(message_specs.into_iter().map(
21914 |(idx, role, author, created_at, content)| {
21915 (
21916 conversation_id,
21917 idx,
21918 match role {
21919 MessageRole::User => "user".to_string(),
21920 MessageRole::Agent => "agent".to_string(),
21921 MessageRole::Tool => "tool".to_string(),
21922 MessageRole::System => "system".to_string(),
21923 MessageRole::Other(other) => other,
21924 },
21925 author,
21926 created_at,
21927 content,
21928 )
21929 },
21930 ));
21931 };
21932
21933 for (label, base_ts) in [
21934 ("alpha", 1_700_000_000_000_i64),
21935 ("beta", 1_700_000_001_000_i64),
21936 ("gamma", 1_700_000_002_000_i64),
21937 ("delta", 1_700_000_003_000_i64),
21938 ("epsilon", 1_700_000_004_000_i64),
21939 ] {
21940 insert_conversation(
21941 claude_agent_id,
21942 &format!("lexical-{label}"),
21943 &format!("Lexical {label}"),
21944 &format!("/tmp/{label}.jsonl"),
21945 base_ts,
21946 vec![
21947 (
21948 0,
21949 MessageRole::User,
21950 None,
21951 Some(base_ts + 10),
21952 format!("{label}_content"),
21953 ),
21954 (
21955 1,
21956 MessageRole::Agent,
21957 None,
21958 Some(base_ts + 20),
21959 format!("{label}_content_response"),
21960 ),
21961 ],
21962 );
21963 }
21964
21965 insert_conversation(
21966 aider_agent_id,
21967 "lexical-aider-history",
21968 "Aider Chat: coding_agent_session_search",
21969 "/tmp/.aider.chat.history.md",
21970 1_764_619_673_394,
21971 vec![
21972 (
21973 0,
21974 MessageRole::System,
21975 Some("system".to_string()),
21976 None,
21977 "# aider chat started at 2025-12-01 20:07:47".to_string(),
21978 ),
21979 (
21980 1,
21981 MessageRole::User,
21982 Some("user".to_string()),
21983 None,
21984 "/tmp/workspace/.venv/bin/aider --no-git --message hello world".to_string(),
21985 ),
21986 ],
21987 );
21988 insert_conversation(
21989 aider_agent_id,
21990 "lexical-aider-fixture",
21991 "Aider Chat: aider",
21992 "/tmp/tests/fixtures/aider/.aider.chat.history.md",
21993 1_764_621_401_399,
21994 vec![
21995 (
21996 0,
21997 MessageRole::User,
21998 Some("user".to_string()),
21999 None,
22000 "/add src/main.rs".to_string(),
22001 ),
22002 (
22003 1,
22004 MessageRole::Agent,
22005 Some("assistant".to_string()),
22006 None,
22007 "Added src/main.rs to the chat.
22008
22009#### /add src/main.rs"
22010 .to_string(),
22011 ),
22012 (
22013 2,
22014 MessageRole::User,
22015 Some("user".to_string()),
22016 None,
22017 "Please refactor.".to_string(),
22018 ),
22019 (
22020 3,
22021 MessageRole::Agent,
22022 Some("assistant".to_string()),
22023 None,
22024 "Sure, here is the code.".to_string(),
22025 ),
22026 ],
22027 );
22028
22029 let mut streamed = Vec::new();
22030 storage
22031 .stream_messages_for_lexical_rebuild_between_conversation_ids(
22032 first_conversation_id.unwrap(),
22033 last_conversation_id.unwrap(),
22034 |row| {
22035 streamed.push((
22036 row.conversation_id,
22037 row.idx,
22038 row.role,
22039 row.author,
22040 row.created_at,
22041 row.content,
22042 ));
22043 Ok(())
22044 },
22045 )
22046 .unwrap();
22047
22048 assert_eq!(streamed, expected);
22049 }
22050
22051 #[test]
22052 fn lexical_rebuild_stream_queries_use_rowid_and_per_conversation_probes() {
22053 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22054 use std::path::PathBuf;
22055
22056 let dir = TempDir::new().unwrap();
22057 let db_path = dir.path().join("agent_search.db");
22058 let storage = SqliteStorage::open(&db_path).unwrap();
22059
22060 let agent = Agent {
22061 id: None,
22062 slug: "claude_code".into(),
22063 name: "Claude Code".into(),
22064 version: None,
22065 kind: AgentKind::Cli,
22066 };
22067 let agent_id = storage.ensure_agent(&agent).unwrap();
22068
22069 for (external_id, base_ts) in [
22070 ("conv-1", 1_700_000_000_000_i64),
22071 ("conv-2", 1_700_000_001_000_i64),
22072 ] {
22073 let conversation = Conversation {
22074 id: None,
22075 agent_slug: "claude_code".into(),
22076 workspace: Some(PathBuf::from("/tmp/workspace")),
22077 external_id: Some(external_id.to_string()),
22078 title: Some("Lexical rebuild".into()),
22079 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
22080 started_at: Some(base_ts),
22081 ended_at: Some(base_ts + 100),
22082 approx_tokens: None,
22083 metadata_json: serde_json::Value::Null,
22084 messages: vec![
22085 Message {
22086 id: None,
22087 idx: 0,
22088 role: MessageRole::User,
22089 author: Some("user".into()),
22090 created_at: Some(base_ts + 10),
22091 content: format!("{external_id}-first"),
22092 extra_json: serde_json::Value::Null,
22093 snippets: Vec::new(),
22094 },
22095 Message {
22096 id: None,
22097 idx: 1,
22098 role: MessageRole::Agent,
22099 author: Some("assistant".into()),
22100 created_at: Some(base_ts + 20),
22101 content: format!("{external_id}-second"),
22102 extra_json: serde_json::Value::Null,
22103 snippets: Vec::new(),
22104 },
22105 ],
22106 source_id: LOCAL_SOURCE_ID.into(),
22107 origin_host: None,
22108 };
22109 storage
22110 .insert_conversation_tree(agent_id, None, &conversation)
22111 .unwrap();
22112 }
22113
22114 let first_id: i64 = storage
22115 .conn
22116 .query_row_map(
22117 "SELECT id FROM conversations ORDER BY id LIMIT 1",
22118 fparams![],
22119 |row| row.get_typed(0),
22120 )
22121 .unwrap();
22122 let last_id: i64 = storage
22123 .conn
22124 .query_row_map(
22125 "SELECT id FROM conversations ORDER BY id DESC LIMIT 1",
22126 fparams![],
22127 |row| row.get_typed(0),
22128 )
22129 .unwrap();
22130
22131 let conversation_plan_details: Vec<String> = storage
22132 .conn
22133 .query_map_collect(
22134 "EXPLAIN QUERY PLAN SELECT id FROM conversations WHERE id >= ?1 AND id <= ?2 ORDER BY id ASC",
22135 fparams![first_id, last_id],
22136 |row| row.get_typed(3),
22137 )
22138 .unwrap();
22139 assert!(
22140 !conversation_plan_details
22141 .iter()
22142 .any(|detail| detail.contains("TEMP B-TREE")),
22143 "expected streamed lexical rebuild conversation listing to avoid sorter temp b-trees, got {conversation_plan_details:?}"
22144 );
22145
22146 let message_plan_details: Vec<String> = storage
22147 .conn
22148 .query_map_collect(
22149 "EXPLAIN QUERY PLAN SELECT id, idx, role, author, created_at, content FROM messages INDEXED BY sqlite_autoindex_messages_1 WHERE conversation_id = ?1 ORDER BY idx",
22150 fparams![first_id],
22151 |row| row.get_typed(3),
22152 )
22153 .unwrap();
22154 assert!(
22155 message_plan_details
22156 .iter()
22157 .any(|detail| detail.contains("sqlite_autoindex_messages_1")
22158 || detail.contains("idx_messages_conv_idx")),
22159 "expected per-conversation lexical rebuild fetch to use the conversation_id/idx index, got {message_plan_details:?}"
22160 );
22161 assert!(
22162 !message_plan_details
22163 .iter()
22164 .any(|detail| detail.contains("TEMP B-TREE")),
22165 "expected per-conversation lexical rebuild fetch to avoid sorter temp b-trees, got {message_plan_details:?}"
22166 );
22167 }
22168
22169 #[test]
22170 fn discover_historical_database_bundles_prefers_larger_archives_first() {
22171 let dir = TempDir::new().unwrap();
22172 let canonical_db = dir.path().join("agent_search.db");
22173 fs::write(&canonical_db, b"canonical").unwrap();
22174
22175 let smaller = dir.path().join("agent_search.corrupt.small");
22176 fs::write(&smaller, vec![0_u8; 32]).unwrap();
22177
22178 let backups_dir = dir.path().join("backups");
22179 fs::create_dir_all(&backups_dir).unwrap();
22180 let larger = backups_dir.join("agent_search.db.20260322T020200.bak");
22181 fs::write(&larger, vec![0_u8; 128]).unwrap();
22182
22183 let bundles = discover_historical_database_bundles(&canonical_db);
22184 let ordered_paths: Vec<PathBuf> =
22185 bundles.into_iter().map(|bundle| bundle.root_path).collect();
22186
22187 assert_eq!(ordered_paths, vec![larger, smaller]);
22188 }
22189
22190 #[test]
22191 fn discover_historical_database_bundles_prefers_queryable_direct_bundles_first() {
22192 let dir = TempDir::new().unwrap();
22193 let canonical_db = dir.path().join("agent_search.db");
22194 fs::write(&canonical_db, b"canonical").unwrap();
22195
22196 let larger_corrupt = dir.path().join("agent_search.corrupt.20260324_212907");
22197 fs::write(&larger_corrupt, vec![0_u8; 4096]).unwrap();
22198
22199 let backups_dir = dir.path().join("backups");
22200 fs::create_dir_all(&backups_dir).unwrap();
22201 let smaller_healthy = backups_dir.join("agent_search.db.20260322T020200.bak");
22202 let conn = FrankenConnection::open(smaller_healthy.to_string_lossy().into_owned()).unwrap();
22203 conn.execute_batch(
22204 "CREATE TABLE conversations (id INTEGER PRIMARY KEY, source_path TEXT);
22205 CREATE TABLE messages (
22206 id INTEGER PRIMARY KEY,
22207 conversation_id INTEGER NOT NULL,
22208 idx INTEGER NOT NULL,
22209 content TEXT
22210 );
22211 INSERT INTO conversations(id, source_path) VALUES (1, '/tmp/history.jsonl');
22212 INSERT INTO messages(id, conversation_id, idx, content)
22213 VALUES (1, 1, 0, 'seed');",
22214 )
22215 .unwrap();
22216 drop(conn);
22217
22218 let bundles = discover_historical_database_bundles(&canonical_db);
22219 let ordered_paths: Vec<PathBuf> = bundles
22220 .iter()
22221 .map(|bundle| bundle.root_path.clone())
22222 .collect();
22223
22224 assert_eq!(ordered_paths, vec![smaller_healthy, larger_corrupt]);
22225 assert!(bundles[0].supports_direct_readonly);
22226 assert!(!bundles[1].supports_direct_readonly);
22227 }
22228
22229 #[test]
22230 fn salvage_historical_databases_skips_unreadable_quarantined_bundles() {
22231 let dir = TempDir::new().unwrap();
22232 let canonical_db = dir.path().join("agent_search.db");
22233 let storage = SqliteStorage::open(&canonical_db).unwrap();
22234
22235 let quarantined = dir.path().join("agent_search.corrupt.20260324_212907");
22236 fs::write(&quarantined, b"not a sqlite database").unwrap();
22237
22238 let discovered: Vec<PathBuf> = discover_historical_database_bundles(&canonical_db)
22239 .into_iter()
22240 .map(|bundle| bundle.root_path)
22241 .collect();
22242 assert_eq!(discovered, vec![quarantined]);
22243
22244 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
22245 assert_eq!(outcome.bundles_considered, 1);
22246 assert_eq!(outcome.bundles_imported, 0);
22247 assert_eq!(outcome.conversations_imported, 0);
22248 assert_eq!(outcome.messages_imported, 0);
22249 assert!(storage.list_conversations(10, 0).unwrap().is_empty());
22250 }
22251
22252 #[test]
22253 fn discover_historical_database_bundles_includes_repair_lab_and_snapshots_named_roots() {
22254 let dir = TempDir::new().unwrap();
22255 let canonical_db = dir.path().join("agent_search.db");
22256 fs::write(&canonical_db, b"canonical").unwrap();
22257
22258 let repair_lab_dir = dir.path().join("repair-lab").join("live-copy");
22259 fs::create_dir_all(&repair_lab_dir).unwrap();
22260 let repair_lab_db = repair_lab_dir.join("agent_search.db");
22261 fs::write(&repair_lab_db, vec![0_u8; 96]).unwrap();
22262 fs::write(
22263 repair_lab_dir.join("agent_search.rebuild-test.db"),
22264 vec![0_u8; 192],
22265 )
22266 .unwrap();
22267
22268 let snapshots_dir = dir.path().join("snapshots").join("20260324T013201Z");
22269 fs::create_dir_all(&snapshots_dir).unwrap();
22270 let snapshot_db = snapshots_dir.join("agent_search.db");
22271 fs::write(&snapshot_db, vec![0_u8; 64]).unwrap();
22272
22273 let bundles = discover_historical_database_bundles(&canonical_db);
22274 let ordered_paths: Vec<PathBuf> =
22275 bundles.into_iter().map(|bundle| bundle.root_path).collect();
22276
22277 assert!(ordered_paths.contains(&repair_lab_db));
22278 assert!(ordered_paths.contains(&snapshot_db));
22279 assert!(
22280 !ordered_paths
22281 .iter()
22282 .any(|path| path.file_name().and_then(|name| name.to_str())
22283 == Some("agent_search.rebuild-test.db"))
22284 );
22285 }
22286
22287 #[test]
22288 fn discover_historical_database_bundles_prefers_healthy_backup_over_replay_priority() {
22289 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22290
22291 let dir = TempDir::new().unwrap();
22292 let canonical_db = dir.path().join("agent_search.db");
22293 fs::write(&canonical_db, b"canonical").unwrap();
22294
22295 let replay_dir = dir
22296 .path()
22297 .join("repair-lab")
22298 .join("replay-20260324T070101Z");
22299 fs::create_dir_all(&replay_dir).unwrap();
22300 let replay_db = replay_dir.join("agent_search.db");
22301 let replay_storage = SqliteStorage::open(&replay_db).unwrap();
22302 let agent = Agent {
22303 id: None,
22304 slug: "codex".into(),
22305 name: "Codex".into(),
22306 version: Some("0.2.3".into()),
22307 kind: AgentKind::Cli,
22308 };
22309 let agent_id = replay_storage.ensure_agent(&agent).unwrap();
22310 let conversation = Conversation {
22311 id: None,
22312 agent_slug: "codex".into(),
22313 workspace: Some(PathBuf::from("/tmp/workspace")),
22314 external_id: Some("replay-conv".into()),
22315 title: Some("Replay bundle".into()),
22316 source_path: PathBuf::from("/tmp/replay.jsonl"),
22317 started_at: Some(1_700_000_000_000),
22318 ended_at: Some(1_700_000_000_100),
22319 approx_tokens: Some(42),
22320 metadata_json: serde_json::Value::Null,
22321 messages: vec![Message {
22322 id: None,
22323 idx: 0,
22324 role: MessageRole::Agent,
22325 author: Some("assistant".into()),
22326 created_at: Some(1_700_000_000_050),
22327 content: "replay message".into(),
22328 extra_json: serde_json::Value::Null,
22329 snippets: Vec::new(),
22330 }],
22331 source_id: LOCAL_SOURCE_ID.into(),
22332 origin_host: None,
22333 };
22334 replay_storage
22335 .insert_conversation_tree(agent_id, None, &conversation)
22336 .unwrap();
22337 drop(replay_storage);
22338
22339 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
22340 let replay_legacy = rusqlite_test_fixture_conn(&replay_db);
22341 replay_legacy
22342 .execute_batch(
22343 "UPDATE meta SET value = '13' WHERE key = 'schema_version';
22344 DELETE FROM _schema_migrations WHERE version = 14;
22345 PRAGMA writable_schema = ON;",
22346 )
22347 .unwrap();
22348 replay_legacy
22349 .execute(
22350 "DELETE FROM meta WHERE key = ?1",
22351 [FTS_FRANKEN_REBUILD_META_KEY],
22352 )
22353 .unwrap();
22354 replay_legacy
22355 .execute(
22356 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
22357 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
22358 [duplicate_legacy_fts_sql],
22359 )
22360 .unwrap();
22361 replay_legacy
22362 .execute_batch("PRAGMA writable_schema = OFF;")
22363 .unwrap();
22364 drop(replay_legacy);
22365
22366 let backups_dir = dir.path().join("backups");
22367 fs::create_dir_all(&backups_dir).unwrap();
22368 let clean_backup = backups_dir.join("agent_search.db.20260322T020200.bak");
22369 let clean_storage = SqliteStorage::open(&clean_backup).unwrap();
22370 let clean_agent_id = clean_storage.ensure_agent(&agent).unwrap();
22371 clean_storage
22372 .insert_conversation_tree(clean_agent_id, None, &conversation)
22373 .unwrap();
22374 drop(clean_storage);
22375
22376 let bundles = discover_historical_database_bundles(&canonical_db);
22377 let ordered_paths: Vec<PathBuf> = bundles
22378 .iter()
22379 .map(|bundle| bundle.root_path.clone())
22380 .collect();
22381
22382 assert_eq!(ordered_paths[0], clean_backup);
22383 assert_eq!(ordered_paths[1], replay_db);
22384 assert_eq!(
22385 bundles[0].probe.schema_version,
22386 Some(CURRENT_SCHEMA_VERSION)
22387 );
22388 assert_eq!(bundles[0].probe.fts_schema_rows, Some(0));
22394 assert!(!bundles[0].probe.fts_queryable);
22397 assert_eq!(bundles[1].probe.schema_version, Some(13));
22398 assert_eq!(bundles[1].probe.fts_schema_rows, Some(1));
22403 }
22404
22405 #[test]
22406 fn ensure_fts_consistency_via_rusqlite_catches_up_missing_rows() {
22407 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22408
22409 let dir = TempDir::new().unwrap();
22410 let db_path = dir.path().join("fts-catchup.db");
22411 let storage = SqliteStorage::open(&db_path).unwrap();
22412 let agent = Agent {
22413 id: None,
22414 slug: "codex".into(),
22415 name: "Codex".into(),
22416 version: Some("0.2.3".into()),
22417 kind: AgentKind::Cli,
22418 };
22419 let agent_id = storage.ensure_agent(&agent).unwrap();
22420 let conversation = Conversation {
22421 id: None,
22422 agent_slug: "codex".into(),
22423 workspace: Some(PathBuf::from("/tmp/workspace")),
22424 external_id: Some("fts-catchup".into()),
22425 title: Some("FTS catchup".into()),
22426 source_path: PathBuf::from("/tmp/fts-catchup.jsonl"),
22427 started_at: Some(1_700_000_000_000),
22428 ended_at: Some(1_700_000_000_100),
22429 approx_tokens: Some(42),
22430 metadata_json: serde_json::Value::Null,
22431 messages: vec![Message {
22432 id: None,
22433 idx: 0,
22434 role: MessageRole::User,
22435 author: Some("user".into()),
22436 created_at: Some(1_700_000_000_050),
22437 content: "initial message".into(),
22438 extra_json: serde_json::Value::Null,
22439 snippets: Vec::new(),
22440 }],
22441 source_id: LOCAL_SOURCE_ID.into(),
22442 origin_host: None,
22443 };
22444 storage
22445 .insert_conversation_tree(agent_id, None, &conversation)
22446 .unwrap();
22447 drop(storage);
22448
22449 rebuild_fts_via_rusqlite(&db_path).unwrap();
22450
22451 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22452 let conversation_id: i64 = conn
22453 .query_row_map("SELECT id FROM conversations LIMIT 1", fparams![], |row| {
22454 row.get_typed(0)
22455 })
22456 .unwrap();
22457 conn.execute_compat(
22458 "INSERT INTO messages(id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
22459 VALUES(2, ?1, 1, 'assistant', 'assistant', 1700000000060, 'authentication catchup', NULL, NULL)",
22460 fparams![conversation_id],
22461 )
22462 .unwrap();
22463 drop(conn);
22464
22465 let repair = ensure_fts_consistency_via_rusqlite(&db_path).unwrap();
22466 assert_eq!(
22467 repair,
22468 FtsConsistencyRepair::IncrementalCatchUp {
22469 inserted_rows: 1,
22470 total_rows: 2
22471 }
22472 );
22473
22474 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22475 let auth_rows: i64 = conn
22476 .query_row_map(
22477 "SELECT COUNT(*) FROM fts_messages WHERE rowid = 2",
22478 fparams![],
22479 |row| row.get_typed(0),
22480 )
22481 .unwrap();
22482 assert_eq!(auth_rows, 1);
22483 }
22484
22485 #[test]
22486 fn rebuild_fts_via_rusqlite_cleans_duplicate_legacy_schema_rows() {
22487 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22488
22489 let dir = TempDir::new().unwrap();
22490 let db_path = dir.path().join("fts-duplicate-rebuild.db");
22491
22492 let storage = SqliteStorage::open(&db_path).unwrap();
22493 let agent = Agent {
22494 id: None,
22495 slug: "codex".into(),
22496 name: "Codex".into(),
22497 version: Some("0.2.3".into()),
22498 kind: AgentKind::Cli,
22499 };
22500 let agent_id = storage.ensure_agent(&agent).unwrap();
22501 let conversation = Conversation {
22502 id: None,
22503 agent_slug: "codex".into(),
22504 workspace: Some(PathBuf::from("/ws")),
22505 external_id: Some("retro".into()),
22506 title: Some("retro".into()),
22507 source_path: PathBuf::from("/tmp/retro.jsonl"),
22508 started_at: Some(42),
22509 ended_at: Some(42),
22510 approx_tokens: None,
22511 metadata_json: serde_json::Value::Null,
22512 messages: vec![Message {
22513 id: None,
22514 idx: 0,
22515 role: MessageRole::User,
22516 author: None,
22517 created_at: Some(42),
22518 content: "retro investigation".into(),
22519 extra_json: serde_json::Value::Null,
22520 snippets: Vec::new(),
22521 }],
22522 source_id: LOCAL_SOURCE_ID.into(),
22523 origin_host: None,
22524 };
22525 storage
22526 .insert_conversation_tree(agent_id, None, &conversation)
22527 .unwrap();
22528 drop(storage);
22529 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
22530
22531 let conn = rusqlite_test_fixture_conn(&db_path);
22532 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
22533 conn.execute(
22534 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
22535 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
22536 ["CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')"],
22537 )
22538 .unwrap();
22539 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
22540 let duplicate_rows: i64 = conn
22541 .query_row(
22542 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
22543 [],
22544 |row| row.get(0),
22545 )
22546 .unwrap();
22547 assert_eq!(duplicate_rows, 2);
22548 drop(conn);
22549
22550 let inserted = rebuild_fts_via_rusqlite(&db_path).unwrap();
22551 assert_eq!(inserted, 1);
22552
22553 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22554 let schema_rows = franken_fts_schema_rows(&conn).unwrap();
22555 assert_eq!(
22556 schema_rows, 1,
22557 "DROP TABLE should leave one clean FTS schema"
22558 );
22559 let match_count: i64 = conn
22560 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
22561 row.get_typed(0)
22562 })
22563 .unwrap();
22564 assert_eq!(match_count, 1);
22565 }
22566
22567 #[test]
22572 fn ensure_agent_creates_new() {
22573 let dir = TempDir::new().unwrap();
22574 let db_path = dir.path().join("test.db");
22575 let storage = SqliteStorage::open(&db_path).unwrap();
22576
22577 let agent = Agent {
22578 id: None,
22579 slug: "test_agent".into(),
22580 name: "Test Agent".into(),
22581 version: Some("1.0".into()),
22582 kind: AgentKind::Cli,
22583 };
22584
22585 let id = storage.ensure_agent(&agent).unwrap();
22586 assert!(id > 0);
22587 }
22588
22589 #[test]
22590 fn ensure_agent_returns_existing_id() {
22591 let dir = TempDir::new().unwrap();
22592 let db_path = dir.path().join("test.db");
22593 let storage = SqliteStorage::open(&db_path).unwrap();
22594
22595 let agent = Agent {
22596 id: None,
22597 slug: "codex".into(),
22598 name: "Codex".into(),
22599 version: None,
22600 kind: AgentKind::Cli,
22601 };
22602
22603 let id1 = storage.ensure_agent(&agent).unwrap();
22604 let id2 = storage.ensure_agent(&agent).unwrap();
22605 assert_eq!(id1, id2);
22606 }
22607
22608 #[test]
22609 fn ensure_agent_unchanged_preserves_updated_at() {
22610 let dir = TempDir::new().unwrap();
22611 let db_path = dir.path().join("test.db");
22612 let storage = SqliteStorage::open(&db_path).unwrap();
22613
22614 let agent = Agent {
22615 id: None,
22616 slug: "codex".into(),
22617 name: "Codex".into(),
22618 version: Some("1.0".into()),
22619 kind: AgentKind::Cli,
22620 };
22621
22622 storage.ensure_agent(&agent).unwrap();
22623 let initial_updated_at: i64 = storage
22624 .conn
22625 .query_row_map(
22626 "SELECT updated_at FROM agents WHERE slug = ?1",
22627 fparams![agent.slug.as_str()],
22628 |row| row.get_typed(0),
22629 )
22630 .unwrap();
22631 std::thread::sleep(std::time::Duration::from_millis(5));
22632
22633 storage.ensure_agent(&agent).unwrap();
22634 let fetched_updated_at: i64 = storage
22635 .conn
22636 .query_row_map(
22637 "SELECT updated_at FROM agents WHERE slug = ?1",
22638 fparams![agent.slug.as_str()],
22639 |row| row.get_typed(0),
22640 )
22641 .unwrap();
22642
22643 assert_eq!(fetched_updated_at, initial_updated_at);
22644 }
22645
22646 #[test]
22647 fn ensure_agent_changed_metadata_updates_cached_slug() {
22648 let dir = TempDir::new().unwrap();
22649 let db_path = dir.path().join("test.db");
22650 let storage = SqliteStorage::open(&db_path).unwrap();
22651
22652 let mut agent = Agent {
22653 id: None,
22654 slug: "codex".into(),
22655 name: "Codex".into(),
22656 version: Some("1.0".into()),
22657 kind: AgentKind::Cli,
22658 };
22659
22660 let id1 = storage.ensure_agent(&agent).unwrap();
22661 agent.name = "Codex CLI".into();
22662 agent.version = Some("1.1".into());
22663 let id2 = storage.ensure_agent(&agent).unwrap();
22664
22665 let fetched: (String, Option<String>) = storage
22666 .conn
22667 .query_row_map(
22668 "SELECT name, version FROM agents WHERE slug = ?1",
22669 fparams![agent.slug.as_str()],
22670 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
22671 )
22672 .unwrap();
22673
22674 assert_eq!(id1, id2);
22675 assert_eq!(fetched, ("Codex CLI".into(), Some("1.1".into())));
22676 }
22677
22678 #[test]
22679 fn list_agents_returns_inserted() {
22680 let dir = TempDir::new().unwrap();
22681 let db_path = dir.path().join("test.db");
22682 let storage = SqliteStorage::open(&db_path).unwrap();
22683
22684 let agent = Agent {
22685 id: None,
22686 slug: "new_agent".into(),
22687 name: "New Agent".into(),
22688 version: None,
22689 kind: AgentKind::VsCode,
22690 };
22691 storage.ensure_agent(&agent).unwrap();
22692
22693 let agents = storage.list_agents().unwrap();
22694 assert!(agents.iter().any(|a| a.slug == "new_agent"));
22695 }
22696
22697 #[test]
22702 fn ensure_workspace_creates_new() {
22703 let dir = TempDir::new().unwrap();
22704 let db_path = dir.path().join("test.db");
22705 let storage = SqliteStorage::open(&db_path).unwrap();
22706
22707 let id = storage
22708 .ensure_workspace(Path::new("/home/user/project"), Some("My Project"))
22709 .unwrap();
22710 assert!(id > 0);
22711 }
22712
22713 #[test]
22714 fn ensure_workspace_returns_existing() {
22715 let dir = TempDir::new().unwrap();
22716 let db_path = dir.path().join("test.db");
22717 let storage = SqliteStorage::open(&db_path).unwrap();
22718
22719 let path = Path::new("/home/user/myproject");
22720 let id1 = storage.ensure_workspace(path, None).unwrap();
22721 let id2 = storage.ensure_workspace(path, None).unwrap();
22722 assert_eq!(id1, id2);
22723 }
22724
22725 #[test]
22726 fn ensure_workspace_changed_display_name_updates_cached_path() {
22727 let dir = TempDir::new().unwrap();
22728 let db_path = dir.path().join("test.db");
22729 let storage = SqliteStorage::open(&db_path).unwrap();
22730
22731 let path = Path::new("/home/user/myproject");
22732 let id1 = storage.ensure_workspace(path, Some("Before")).unwrap();
22733 let id2 = storage.ensure_workspace(path, Some("After")).unwrap();
22734
22735 let display_name: Option<String> = storage
22736 .conn
22737 .query_row_map(
22738 "SELECT display_name FROM workspaces WHERE path = ?1",
22739 fparams![path.to_string_lossy().as_ref()],
22740 |row| row.get_typed(0),
22741 )
22742 .unwrap();
22743
22744 assert_eq!(id1, id2);
22745 assert_eq!(display_name.as_deref(), Some("After"));
22746 }
22747
22748 #[test]
22749 fn list_workspaces_returns_inserted() {
22750 let dir = TempDir::new().unwrap();
22751 let db_path = dir.path().join("test.db");
22752 let storage = SqliteStorage::open(&db_path).unwrap();
22753
22754 storage
22755 .ensure_workspace(Path::new("/test/workspace"), Some("Test WS"))
22756 .unwrap();
22757
22758 let workspaces = storage.list_workspaces().unwrap();
22759 assert!(
22760 workspaces
22761 .iter()
22762 .any(|w| w.path.to_str() == Some("/test/workspace"))
22763 );
22764 }
22765
22766 #[test]
22771 fn upsert_source_creates_new() {
22772 let dir = TempDir::new().unwrap();
22773 let db_path = dir.path().join("test.db");
22774 let storage = SqliteStorage::open(&db_path).unwrap();
22775
22776 let source = Source {
22777 id: "test-laptop".into(),
22778 kind: SourceKind::Ssh,
22779 host_label: Some("test.local".into()),
22780 machine_id: Some("test-machine-id".into()),
22781 platform: None,
22782 config_json: None,
22783 created_at: Some(SqliteStorage::now_millis()),
22784 updated_at: None,
22785 };
22786
22787 storage.upsert_source(&source).unwrap();
22788 let fetched = storage.get_source("test-laptop").unwrap();
22789 assert!(fetched.is_some());
22790 assert_eq!(fetched.unwrap().host_label, Some("test.local".into()));
22791 }
22792
22793 #[test]
22794 fn upsert_source_updates_existing() {
22795 let dir = TempDir::new().unwrap();
22796 let db_path = dir.path().join("test.db");
22797 let storage = SqliteStorage::open(&db_path).unwrap();
22798
22799 let source1 = Source {
22800 id: "my-source".into(),
22801 kind: SourceKind::Ssh,
22802 host_label: Some("Original Label".into()),
22803 machine_id: None,
22804 platform: None,
22805 config_json: None,
22806 created_at: Some(SqliteStorage::now_millis()),
22807 updated_at: None,
22808 };
22809 storage.upsert_source(&source1).unwrap();
22810
22811 let source2 = Source {
22812 id: "my-source".into(),
22813 kind: SourceKind::Ssh,
22814 host_label: Some("Updated Label".into()),
22815 machine_id: None,
22816 platform: Some("linux".into()),
22817 config_json: None,
22818 created_at: Some(SqliteStorage::now_millis()),
22819 updated_at: Some(SqliteStorage::now_millis()),
22820 };
22821 storage.upsert_source(&source2).unwrap();
22822
22823 let fetched = storage.get_source("my-source").unwrap().unwrap();
22824 assert_eq!(fetched.host_label, Some("Updated Label".into()));
22825 assert!(fetched.platform.is_some());
22826 }
22827
22828 #[test]
22829 fn upsert_source_unchanged_preserves_updated_at() {
22830 let dir = TempDir::new().unwrap();
22831 let db_path = dir.path().join("test.db");
22832 let storage = SqliteStorage::open(&db_path).unwrap();
22833
22834 let source = Source {
22835 id: "stable-source".into(),
22836 kind: SourceKind::Ssh,
22837 host_label: Some("builder.local".into()),
22838 machine_id: None,
22839 platform: Some("linux".into()),
22840 config_json: Some(serde_json::json!({"role": "bench"})),
22841 created_at: None,
22842 updated_at: None,
22843 };
22844
22845 storage.upsert_source(&source).unwrap();
22846 let initial = storage.get_source("stable-source").unwrap().unwrap();
22847 std::thread::sleep(std::time::Duration::from_millis(5));
22848
22849 storage.upsert_source(&source).unwrap();
22850 let fetched = storage.get_source("stable-source").unwrap().unwrap();
22851
22852 assert_eq!(fetched.created_at, initial.created_at);
22853 assert_eq!(fetched.updated_at, initial.updated_at);
22854 assert_eq!(fetched.host_label, initial.host_label);
22855 assert_eq!(fetched.platform, initial.platform);
22856 assert_eq!(fetched.config_json, initial.config_json);
22857 }
22858
22859 #[test]
22860 fn ensure_source_for_conversation_recreates_remote_source_after_delete() {
22861 let dir = TempDir::new().unwrap();
22862 let db_path = dir.path().join("test.db");
22863 let storage = SqliteStorage::open(&db_path).unwrap();
22864
22865 let conversation = Conversation {
22866 id: None,
22867 agent_slug: "codex".into(),
22868 workspace: Some(PathBuf::from("/ws/cache-recreate")),
22869 external_id: Some("cache-recreate".into()),
22870 title: Some("Cache Recreate".into()),
22871 source_path: PathBuf::from("/log/cache-recreate.jsonl"),
22872 started_at: Some(1_700_000_000_000),
22873 ended_at: Some(1_700_000_000_001),
22874 approx_tokens: Some(16),
22875 metadata_json: serde_json::json!({}),
22876 messages: vec![Message {
22877 id: None,
22878 idx: 0,
22879 role: MessageRole::User,
22880 author: Some("tester".into()),
22881 created_at: Some(1_700_000_000_000),
22882 content: "cache recreate".into(),
22883 extra_json: serde_json::json!({}),
22884 snippets: Vec::new(),
22885 }],
22886 source_id: "cache-remote-source".into(),
22887 origin_host: Some("builder-cache".into()),
22888 };
22889
22890 storage
22891 .ensure_source_for_conversation(&conversation)
22892 .unwrap();
22893 assert!(storage.get_source("cache-remote-source").unwrap().is_some());
22894
22895 let deleted = storage.delete_source("cache-remote-source", false).unwrap();
22896 assert!(deleted);
22897 assert!(storage.get_source("cache-remote-source").unwrap().is_none());
22898
22899 storage
22900 .ensure_source_for_conversation(&conversation)
22901 .unwrap();
22902 let recreated = storage.get_source("cache-remote-source").unwrap();
22903 assert!(recreated.is_some());
22904 assert_eq!(
22905 recreated.unwrap().host_label.as_deref(),
22906 Some("builder-cache")
22907 );
22908 }
22909
22910 #[test]
22911 fn delete_source_removes_entry() {
22912 let dir = TempDir::new().unwrap();
22913 let db_path = dir.path().join("test.db");
22914 let storage = SqliteStorage::open(&db_path).unwrap();
22915
22916 let source = Source {
22917 id: "to-delete".into(),
22918 kind: SourceKind::Local,
22919 host_label: None,
22920 machine_id: None,
22921 platform: None,
22922 config_json: None,
22923 created_at: Some(SqliteStorage::now_millis()),
22924 updated_at: None,
22925 };
22926 storage.upsert_source(&source).unwrap();
22927
22928 let deleted = storage.delete_source("to-delete", false).unwrap();
22929 assert!(deleted);
22930
22931 let fetched = storage.get_source("to-delete").unwrap();
22932 assert!(fetched.is_none());
22933 }
22934
22935 #[test]
22936 fn delete_source_cannot_delete_local() {
22937 let dir = TempDir::new().unwrap();
22938 let db_path = dir.path().join("test.db");
22939 let storage = SqliteStorage::open(&db_path).unwrap();
22940
22941 let result = storage.delete_source(LOCAL_SOURCE_ID, false);
22942 assert!(result.is_err());
22943 }
22944
22945 #[test]
22946 fn list_sources_includes_local() {
22947 let dir = TempDir::new().unwrap();
22948 let db_path = dir.path().join("test.db");
22949 let storage = SqliteStorage::open(&db_path).unwrap();
22950
22951 let sources = storage.list_sources().unwrap();
22952 assert!(sources.iter().any(|s| s.id == LOCAL_SOURCE_ID));
22953 }
22954
22955 #[test]
22956 fn insert_conversation_tree_blank_local_source_normalizes_to_local_id() {
22957 let dir = TempDir::new().unwrap();
22958 let db_path = dir.path().join("test.db");
22959 let storage = SqliteStorage::open(&db_path).unwrap();
22960
22961 let agent_id = storage
22962 .ensure_agent(&Agent {
22963 id: None,
22964 slug: "codex".into(),
22965 name: "Codex".into(),
22966 version: None,
22967 kind: AgentKind::Cli,
22968 })
22969 .unwrap();
22970
22971 let conversation = Conversation {
22972 id: None,
22973 agent_slug: "codex".into(),
22974 workspace: None,
22975 external_id: Some("blank-local-source".into()),
22976 title: Some("Blank local source".into()),
22977 source_path: dir.path().join("blank-local.jsonl"),
22978 started_at: Some(1_700_000_000_000),
22979 ended_at: Some(1_700_000_000_001),
22980 approx_tokens: None,
22981 metadata_json: serde_json::Value::Null,
22982 messages: vec![Message {
22983 id: None,
22984 idx: 0,
22985 role: MessageRole::User,
22986 author: None,
22987 created_at: Some(1_700_000_000_000),
22988 content: "hello".into(),
22989 extra_json: serde_json::Value::Null,
22990 snippets: Vec::new(),
22991 }],
22992 source_id: " ".into(),
22993 origin_host: None,
22994 };
22995
22996 storage
22997 .insert_conversation_tree(agent_id, None, &conversation)
22998 .unwrap();
22999
23000 assert!(storage.get_source(" ").unwrap().is_none());
23001 let source = storage
23002 .get_source(LOCAL_SOURCE_ID)
23003 .unwrap()
23004 .expect("local source row should exist");
23005 assert_eq!(source.kind, SourceKind::Local);
23006 assert_eq!(source.host_label, None);
23007
23008 let conversations = storage.list_conversations(10, 0).unwrap();
23009 assert_eq!(conversations.len(), 1);
23010 assert_eq!(conversations[0].source_id, LOCAL_SOURCE_ID);
23011 assert_eq!(conversations[0].origin_host, None);
23012 }
23013
23014 #[test]
23015 fn repeated_local_inserts_do_not_touch_bootstrap_source_row() {
23016 let dir = TempDir::new().unwrap();
23017 let db_path = dir.path().join("test.db");
23018 let storage = SqliteStorage::open(&db_path).unwrap();
23019
23020 let agent_id = storage
23021 .ensure_agent(&Agent {
23022 id: None,
23023 slug: "codex".into(),
23024 name: "Codex".into(),
23025 version: None,
23026 kind: AgentKind::Cli,
23027 })
23028 .unwrap();
23029
23030 let bootstrap_updated_at: i64 = storage
23031 .conn
23032 .query_row_map(
23033 "SELECT updated_at FROM sources WHERE id = ?1",
23034 fparams![LOCAL_SOURCE_ID],
23035 |row| row.get_typed(0),
23036 )
23037 .unwrap();
23038
23039 let make_conversation = |external_id: &str, suffix: &str| Conversation {
23040 id: None,
23041 agent_slug: "codex".into(),
23042 workspace: None,
23043 external_id: Some(external_id.into()),
23044 title: Some(format!("Local source {suffix}")),
23045 source_path: dir.path().join(format!("local-{suffix}.jsonl")),
23046 started_at: Some(1_700_000_000_000),
23047 ended_at: Some(1_700_000_000_001),
23048 approx_tokens: None,
23049 metadata_json: serde_json::Value::Null,
23050 messages: vec![Message {
23051 id: None,
23052 idx: 0,
23053 role: MessageRole::User,
23054 author: None,
23055 created_at: Some(1_700_000_000_000),
23056 content: format!("hello-{suffix}"),
23057 extra_json: serde_json::Value::Null,
23058 snippets: Vec::new(),
23059 }],
23060 source_id: LOCAL_SOURCE_ID.into(),
23061 origin_host: None,
23062 };
23063
23064 std::thread::sleep(std::time::Duration::from_millis(5));
23065 storage
23066 .insert_conversation_tree(agent_id, None, &make_conversation("local-source-1", "one"))
23067 .unwrap();
23068 let after_first_insert: i64 = storage
23069 .conn
23070 .query_row_map(
23071 "SELECT updated_at FROM sources WHERE id = ?1",
23072 fparams![LOCAL_SOURCE_ID],
23073 |row| row.get_typed(0),
23074 )
23075 .unwrap();
23076
23077 std::thread::sleep(std::time::Duration::from_millis(5));
23078 storage
23079 .insert_conversation_tree(agent_id, None, &make_conversation("local-source-2", "two"))
23080 .unwrap();
23081 let after_second_insert: i64 = storage
23082 .conn
23083 .query_row_map(
23084 "SELECT updated_at FROM sources WHERE id = ?1",
23085 fparams![LOCAL_SOURCE_ID],
23086 |row| row.get_typed(0),
23087 )
23088 .unwrap();
23089
23090 assert_eq!(after_first_insert, bootstrap_updated_at);
23091 assert_eq!(after_second_insert, bootstrap_updated_at);
23092 }
23093
23094 #[test]
23095 fn insert_conversation_tree_blank_remote_source_normalizes_to_origin_host() {
23096 let dir = TempDir::new().unwrap();
23097 let db_path = dir.path().join("test.db");
23098 let storage = SqliteStorage::open(&db_path).unwrap();
23099
23100 let agent_id = storage
23101 .ensure_agent(&Agent {
23102 id: None,
23103 slug: "codex".into(),
23104 name: "Codex".into(),
23105 version: None,
23106 kind: AgentKind::Cli,
23107 })
23108 .unwrap();
23109
23110 let conversation = Conversation {
23111 id: None,
23112 agent_slug: "codex".into(),
23113 workspace: None,
23114 external_id: Some("blank-remote-source".into()),
23115 title: Some("Blank remote source".into()),
23116 source_path: dir.path().join("blank-remote.jsonl"),
23117 started_at: Some(1_700_000_000_000),
23118 ended_at: Some(1_700_000_000_001),
23119 approx_tokens: None,
23120 metadata_json: serde_json::Value::Null,
23121 messages: vec![Message {
23122 id: None,
23123 idx: 0,
23124 role: MessageRole::User,
23125 author: None,
23126 created_at: Some(1_700_000_000_000),
23127 content: "hello".into(),
23128 extra_json: serde_json::Value::Null,
23129 snippets: Vec::new(),
23130 }],
23131 source_id: " ".into(),
23132 origin_host: Some("user@work-laptop".into()),
23133 };
23134
23135 storage
23136 .insert_conversation_tree(agent_id, None, &conversation)
23137 .unwrap();
23138
23139 assert!(storage.get_source(" ").unwrap().is_none());
23140 let source = storage
23141 .get_source("user@work-laptop")
23142 .unwrap()
23143 .expect("normalized remote source row should exist");
23144 assert_eq!(source.kind, SourceKind::Ssh);
23145 assert_eq!(source.host_label.as_deref(), Some("user@work-laptop"));
23146
23147 let conversations = storage.list_conversations(10, 0).unwrap();
23148 assert_eq!(conversations.len(), 1);
23149 assert_eq!(conversations[0].source_id, "user@work-laptop");
23150 assert_eq!(
23151 conversations[0].origin_host.as_deref(),
23152 Some("user@work-laptop")
23153 );
23154 }
23155
23156 #[test]
23157 fn insert_conversations_batched_normalizes_host_only_remote_source_id() {
23158 let dir = TempDir::new().unwrap();
23159 let db_path = dir.path().join("test.db");
23160 let storage = SqliteStorage::open(&db_path).unwrap();
23161
23162 let agent_id = storage
23163 .ensure_agent(&Agent {
23164 id: None,
23165 slug: "codex".into(),
23166 name: "Codex".into(),
23167 version: None,
23168 kind: AgentKind::Cli,
23169 })
23170 .unwrap();
23171
23172 let conversation = Conversation {
23173 id: None,
23174 agent_slug: "codex".into(),
23175 workspace: None,
23176 external_id: Some("batched-blank-remote-source".into()),
23177 title: Some("Batched blank remote source".into()),
23178 source_path: dir.path().join("batched-blank-remote.jsonl"),
23179 started_at: Some(1_700_000_000_000),
23180 ended_at: Some(1_700_000_000_001),
23181 approx_tokens: None,
23182 metadata_json: serde_json::Value::Null,
23183 messages: vec![Message {
23184 id: None,
23185 idx: 0,
23186 role: MessageRole::User,
23187 author: None,
23188 created_at: Some(1_700_000_000_000),
23189 content: "hello".into(),
23190 extra_json: serde_json::Value::Null,
23191 snippets: Vec::new(),
23192 }],
23193 source_id: " ".into(),
23194 origin_host: Some("user@batch-host".into()),
23195 };
23196
23197 storage
23198 .insert_conversations_batched(&[(agent_id, None, &conversation)])
23199 .unwrap();
23200
23201 assert!(storage.get_source(" ").unwrap().is_none());
23202 let source = storage
23203 .get_source("user@batch-host")
23204 .unwrap()
23205 .expect("normalized batched remote source row should exist");
23206 assert_eq!(source.kind, SourceKind::Ssh);
23207 assert_eq!(source.host_label.as_deref(), Some("user@batch-host"));
23208
23209 let conversations = storage.list_conversations(10, 0).unwrap();
23210 assert_eq!(conversations.len(), 1);
23211 assert_eq!(conversations[0].source_id, "user@batch-host");
23212 assert_eq!(
23213 conversations[0].origin_host.as_deref(),
23214 Some("user@batch-host")
23215 );
23216 }
23217
23218 #[test]
23219 fn get_source_ids_excludes_local() {
23220 let dir = TempDir::new().unwrap();
23221 let db_path = dir.path().join("test.db");
23222 let storage = SqliteStorage::open(&db_path).unwrap();
23223
23224 let source = Source {
23226 id: "remote-1".into(),
23227 kind: SourceKind::Ssh,
23228 host_label: Some("server".into()),
23229 machine_id: None,
23230 platform: None,
23231 config_json: None,
23232 created_at: Some(SqliteStorage::now_millis()),
23233 updated_at: None,
23234 };
23235 storage.upsert_source(&source).unwrap();
23236
23237 let ids = storage.get_source_ids().unwrap();
23238 assert!(!ids.contains(&LOCAL_SOURCE_ID.to_string()));
23239 assert!(ids.contains(&"remote-1".to_string()));
23240 }
23241
23242 #[test]
23247 fn get_last_scan_ts_returns_none_initially() {
23248 let dir = TempDir::new().unwrap();
23249 let db_path = dir.path().join("test.db");
23250 let storage = SqliteStorage::open(&db_path).unwrap();
23251
23252 let ts = storage.get_last_scan_ts().unwrap();
23253 assert!(ts.is_none());
23254 }
23255
23256 #[test]
23257 fn set_and_get_last_scan_ts() {
23258 let dir = TempDir::new().unwrap();
23259 let db_path = dir.path().join("test.db");
23260 let storage = SqliteStorage::open(&db_path).unwrap();
23261
23262 let expected_ts = 1700000000000_i64;
23263 storage.set_last_scan_ts(expected_ts).unwrap();
23264
23265 let actual_ts = storage.get_last_scan_ts().unwrap();
23266 assert_eq!(actual_ts, Some(expected_ts));
23267 }
23268
23269 #[test]
23274 fn now_millis_returns_reasonable_value() {
23275 let ts = SqliteStorage::now_millis();
23276 assert!(ts > 1577836800000);
23278 assert!(ts < 4102444800000);
23280 }
23281
23282 #[test]
23287 fn msgpack_roundtrip_basic_object() {
23288 let value = serde_json::json!({
23289 "key": "value",
23290 "number": 42,
23291 "nested": { "inner": true }
23292 });
23293
23294 let bytes = serialize_json_to_msgpack(&value).expect("should serialize");
23295 let recovered = deserialize_msgpack_to_json(&bytes);
23296
23297 assert_eq!(value, recovered);
23298 }
23299
23300 #[test]
23301 fn msgpack_returns_none_for_null() {
23302 let value = serde_json::Value::Null;
23303 assert!(serialize_json_to_msgpack(&value).is_none());
23304 }
23305
23306 #[test]
23307 fn message_insert_stores_null_extra_json_as_sql_null() {
23308 let dir = TempDir::new().unwrap();
23309 let db_path = dir.path().join("test.db");
23310 let storage = SqliteStorage::open(&db_path).unwrap();
23311 let agent_id = storage
23312 .ensure_agent(&Agent {
23313 id: None,
23314 slug: "codex".into(),
23315 name: "Codex".into(),
23316 version: None,
23317 kind: AgentKind::Cli,
23318 })
23319 .unwrap();
23320 let conversation = Conversation {
23321 id: None,
23322 agent_slug: "codex".into(),
23323 workspace: None,
23324 external_id: Some("null-extra-json".into()),
23325 title: Some("Null extra_json".into()),
23326 source_path: PathBuf::from("/tmp/null-extra-json.jsonl"),
23327 started_at: Some(1_700_000_000_000),
23328 ended_at: Some(1_700_000_000_001),
23329 approx_tokens: None,
23330 metadata_json: serde_json::Value::Null,
23331 messages: vec![Message {
23332 id: None,
23333 idx: 0,
23334 role: MessageRole::User,
23335 author: None,
23336 created_at: Some(1_700_000_000_000),
23337 content: "null metadata message".into(),
23338 extra_json: serde_json::Value::Null,
23339 snippets: Vec::new(),
23340 }],
23341 source_id: LOCAL_SOURCE_ID.into(),
23342 origin_host: None,
23343 };
23344
23345 let conversation_id = storage
23346 .insert_conversation_tree(agent_id, None, &conversation)
23347 .unwrap()
23348 .conversation_id;
23349
23350 let (extra_json, extra_bin): (Option<String>, Option<Vec<u8>>) = storage
23351 .conn
23352 .query_row_map(
23353 "SELECT extra_json, extra_bin FROM messages WHERE conversation_id = ?1",
23354 fparams![conversation_id],
23355 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23356 )
23357 .unwrap();
23358 assert!(extra_json.is_none());
23359 assert!(extra_bin.is_none());
23360
23361 let stored = storage.fetch_messages(conversation_id).unwrap();
23362 assert!(stored[0].extra_json.is_null());
23363 }
23364
23365 #[test]
23366 fn message_insert_stores_nonempty_extra_json_as_msgpack_only() {
23367 let dir = TempDir::new().unwrap();
23368 let db_path = dir.path().join("test.db");
23369 let storage = SqliteStorage::open(&db_path).unwrap();
23370 let agent_id = storage
23371 .ensure_agent(&Agent {
23372 id: None,
23373 slug: "codex".into(),
23374 name: "Codex".into(),
23375 version: None,
23376 kind: AgentKind::Cli,
23377 })
23378 .unwrap();
23379 let extra_json = serde_json::json!({ "idx": 7, "kind": "profile" });
23380 let conversation = Conversation {
23381 id: None,
23382 agent_slug: "codex".into(),
23383 workspace: None,
23384 external_id: Some("msgpack-extra-json".into()),
23385 title: Some("MessagePack extra_json".into()),
23386 source_path: PathBuf::from("/tmp/msgpack-extra-json.jsonl"),
23387 started_at: Some(1_700_000_000_000),
23388 ended_at: Some(1_700_000_000_001),
23389 approx_tokens: None,
23390 metadata_json: serde_json::Value::Null,
23391 messages: vec![Message {
23392 id: None,
23393 idx: 0,
23394 role: MessageRole::User,
23395 author: None,
23396 created_at: Some(1_700_000_000_000),
23397 content: "msgpack metadata message".into(),
23398 extra_json: extra_json.clone(),
23399 snippets: Vec::new(),
23400 }],
23401 source_id: LOCAL_SOURCE_ID.into(),
23402 origin_host: None,
23403 };
23404
23405 let conversation_id = storage
23406 .insert_conversation_tree(agent_id, None, &conversation)
23407 .unwrap()
23408 .conversation_id;
23409
23410 let (extra_json_text, extra_bin): (Option<String>, Option<Vec<u8>>) = storage
23411 .conn
23412 .query_row_map(
23413 "SELECT extra_json, extra_bin FROM messages WHERE conversation_id = ?1",
23414 fparams![conversation_id],
23415 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23416 )
23417 .unwrap();
23418 assert!(extra_json_text.is_none());
23419 assert!(extra_bin.is_some());
23420
23421 let stored = storage.fetch_messages(conversation_id).unwrap();
23422 assert_eq!(stored[0].extra_json, extra_json);
23423 }
23424
23425 #[test]
23426 fn conversation_insert_preserves_null_metadata_json_as_json_null() {
23427 let dir = TempDir::new().unwrap();
23428 let db_path = dir.path().join("test.db");
23429 let storage = SqliteStorage::open(&db_path).unwrap();
23430 let agent_id = storage
23431 .ensure_agent(&Agent {
23432 id: None,
23433 slug: "codex".into(),
23434 name: "Codex".into(),
23435 version: None,
23436 kind: AgentKind::Cli,
23437 })
23438 .unwrap();
23439 let conversation = Conversation {
23440 id: None,
23441 agent_slug: "codex".into(),
23442 workspace: None,
23443 external_id: Some("null-conversation-metadata".into()),
23444 title: Some("Null conversation metadata".into()),
23445 source_path: PathBuf::from("/tmp/null-conversation-metadata.jsonl"),
23446 started_at: Some(1_700_000_000_000),
23447 ended_at: Some(1_700_000_000_001),
23448 approx_tokens: None,
23449 metadata_json: serde_json::Value::Null,
23450 messages: vec![Message {
23451 id: None,
23452 idx: 0,
23453 role: MessageRole::User,
23454 author: None,
23455 created_at: Some(1_700_000_000_000),
23456 content: "null conversation metadata message".into(),
23457 extra_json: serde_json::Value::Null,
23458 snippets: Vec::new(),
23459 }],
23460 source_id: LOCAL_SOURCE_ID.into(),
23461 origin_host: None,
23462 };
23463
23464 storage
23465 .insert_conversation_tree(agent_id, None, &conversation)
23466 .unwrap();
23467
23468 let (metadata_json, metadata_bin): (Option<String>, Option<Vec<u8>>) = storage
23469 .conn
23470 .query_row_map(
23471 "SELECT metadata_json, metadata_bin FROM conversations WHERE external_id = ?1",
23472 fparams!["null-conversation-metadata"],
23473 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23474 )
23475 .unwrap();
23476 assert_eq!(metadata_json.as_deref(), Some("null"));
23477 assert!(metadata_bin.is_none());
23478
23479 let listed = storage.list_conversations(10, 0).unwrap();
23480 assert!(listed[0].metadata_json.is_null());
23481 }
23482
23483 #[test]
23484 fn conversation_insert_stores_nonempty_metadata_as_msgpack_only() {
23485 let dir = TempDir::new().unwrap();
23486 let db_path = dir.path().join("test.db");
23487 let storage = SqliteStorage::open(&db_path).unwrap();
23488 let agent_id = storage
23489 .ensure_agent(&Agent {
23490 id: None,
23491 slug: "codex".into(),
23492 name: "Codex".into(),
23493 version: None,
23494 kind: AgentKind::Cli,
23495 })
23496 .unwrap();
23497 let metadata_json = serde_json::json!({ "bench": true, "source": "profile" });
23498 let conversation = Conversation {
23499 id: None,
23500 agent_slug: "codex".into(),
23501 workspace: None,
23502 external_id: Some("msgpack-conversation-metadata".into()),
23503 title: Some("MessagePack conversation metadata".into()),
23504 source_path: PathBuf::from("/tmp/msgpack-conversation-metadata.jsonl"),
23505 started_at: Some(1_700_000_000_000),
23506 ended_at: Some(1_700_000_000_001),
23507 approx_tokens: None,
23508 metadata_json: metadata_json.clone(),
23509 messages: vec![Message {
23510 id: None,
23511 idx: 0,
23512 role: MessageRole::User,
23513 author: None,
23514 created_at: Some(1_700_000_000_000),
23515 content: "msgpack conversation metadata message".into(),
23516 extra_json: serde_json::Value::Null,
23517 snippets: Vec::new(),
23518 }],
23519 source_id: LOCAL_SOURCE_ID.into(),
23520 origin_host: None,
23521 };
23522
23523 storage
23524 .insert_conversation_tree(agent_id, None, &conversation)
23525 .unwrap();
23526
23527 let (metadata_text, metadata_bin): (Option<String>, Option<Vec<u8>>) = storage
23528 .conn
23529 .query_row_map(
23530 "SELECT metadata_json, metadata_bin FROM conversations WHERE external_id = ?1",
23531 fparams!["msgpack-conversation-metadata"],
23532 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23533 )
23534 .unwrap();
23535 assert!(metadata_text.is_none());
23536 assert!(metadata_bin.is_some());
23537
23538 let listed = storage.list_conversations(10, 0).unwrap();
23539 assert_eq!(listed[0].metadata_json, metadata_json);
23540 }
23541
23542 #[test]
23543 fn msgpack_returns_none_for_empty_object() {
23544 let value = serde_json::json!({});
23545 assert!(serialize_json_to_msgpack(&value).is_none());
23546 }
23547
23548 #[test]
23549 fn parse_historical_json_column_preserves_large_payloads_as_raw_json() {
23550 let raw = format!("{{\"blob\":\"{}\"}}", "x".repeat(1_000_000));
23551
23552 let value = parse_historical_json_column(Some(raw.clone()));
23553
23554 assert_eq!(historical_raw_json(&value), Some(raw.as_str()));
23555 assert_eq!(json_value_size_hint(&value), raw.len());
23556 }
23557
23558 #[test]
23559 fn parse_historical_json_column_preserves_small_payloads_as_raw_json() {
23560 let raw = String::from("{\"ok\":true,\"n\":1}");
23561
23562 let value = parse_historical_json_column(Some(raw.clone()));
23563
23564 assert_eq!(historical_raw_json(&value), Some(raw.as_str()));
23565 }
23566
23567 #[test]
23568 fn msgpack_serializes_non_empty_array() {
23569 let value = serde_json::json!([1, 2, 3]);
23570 let bytes = serialize_json_to_msgpack(&value).expect("should serialize array");
23571 let recovered = deserialize_msgpack_to_json(&bytes);
23572 assert_eq!(value, recovered);
23573 }
23574
23575 #[test]
23576 fn msgpack_smaller_than_json() {
23577 let value = serde_json::json!({
23578 "field_name_one": "some_value",
23579 "field_name_two": 123456,
23580 "field_name_three": [1, 2, 3, 4, 5],
23581 "field_name_four": { "nested": true }
23582 });
23583
23584 let json_bytes = serde_json::to_vec(&value).unwrap();
23585 let msgpack_bytes = serialize_json_to_msgpack(&value).unwrap();
23586
23587 assert!(
23589 msgpack_bytes.len() < json_bytes.len(),
23590 "MessagePack ({} bytes) should be smaller than JSON ({} bytes)",
23591 msgpack_bytes.len(),
23592 json_bytes.len()
23593 );
23594 }
23595
23596 #[test]
23597 fn migration_v7_adds_binary_columns() {
23598 let dir = TempDir::new().unwrap();
23599 let db_path = dir.path().join("test.db");
23600 let storage = SqliteStorage::open(&db_path).unwrap();
23601
23602 let has_metadata_bin = storage
23604 .raw()
23605 .query("PRAGMA table_info(conversations)")
23606 .unwrap()
23607 .iter()
23608 .any(|row| row.get_typed::<String>(1).unwrap() == "metadata_bin");
23609 assert!(
23610 has_metadata_bin,
23611 "conversations should have metadata_bin column"
23612 );
23613
23614 let has_extra_bin = storage
23616 .raw()
23617 .query("PRAGMA table_info(messages)")
23618 .unwrap()
23619 .iter()
23620 .any(|row| row.get_typed::<String>(1).unwrap() == "extra_bin");
23621 assert!(has_extra_bin, "messages should have extra_bin column");
23622 }
23623
23624 #[test]
23625 fn insert_conversation_tree_rehydrates_append_tail_state_cache_after_manual_clear() {
23626 let dir = TempDir::new().unwrap();
23627 let db_path = dir.path().join("append-tail-state-cache.db");
23628 let storage = SqliteStorage::open(&db_path).unwrap();
23629 let agent_id = storage
23630 .ensure_agent(&Agent {
23631 id: None,
23632 slug: "codex".into(),
23633 name: "Codex".into(),
23634 version: None,
23635 kind: AgentKind::Cli,
23636 })
23637 .unwrap();
23638 let workspace = PathBuf::from("/ws/profiled-append-remote");
23639 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
23640
23641 let initial = make_profiled_append_remote_merge_conversation(11, 5);
23642 let insert_outcome = storage
23643 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
23644 .unwrap();
23645 let conversation_id = insert_outcome.conversation_id;
23646
23647 let initial_tail: (Option<i64>, Option<i64>, Option<i64>) = storage
23648 .raw()
23649 .query_row_map(
23650 "SELECT ended_at, last_message_idx, last_message_created_at
23651 FROM conversation_tail_state
23652 WHERE conversation_id = ?1",
23653 fparams![conversation_id],
23654 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
23655 )
23656 .unwrap();
23657 assert_eq!(initial_tail, (Some(111_005), Some(4), Some(111_004)));
23658
23659 storage
23660 .raw()
23661 .execute_compat(
23662 "UPDATE conversations SET ended_at = ?1 WHERE id = ?2",
23663 fparams![111_999_i64, conversation_id],
23664 )
23665 .unwrap();
23666 storage
23667 .raw()
23668 .execute_compat(
23669 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
23670 fparams![conversation_id],
23671 )
23672 .unwrap();
23673
23674 let appended = make_profiled_append_remote_merge_conversation(11, 10);
23675 let append_outcome = storage
23676 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
23677 .unwrap();
23678 assert_eq!(append_outcome.inserted_indices, vec![5, 6, 7, 8, 9]);
23679
23680 let final_tail: (Option<i64>, Option<i64>, Option<i64>) = storage
23681 .raw()
23682 .query_row_map(
23683 "SELECT ended_at, last_message_idx, last_message_created_at
23684 FROM conversation_tail_state
23685 WHERE conversation_id = ?1",
23686 fparams![conversation_id],
23687 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
23688 )
23689 .unwrap();
23690 assert_eq!(final_tail, (Some(111_999), Some(9), Some(111_009)));
23691 }
23692
23693 #[test]
23694 fn msgpack_deserialize_empty_returns_default() {
23695 let recovered = deserialize_msgpack_to_json(&[]);
23696 assert_eq!(recovered, serde_json::Value::Object(serde_json::Map::new()));
23697 }
23698
23699 #[test]
23700 fn msgpack_deserialize_garbage_returns_default() {
23701 let recovered = deserialize_msgpack_to_json(&[0x85]);
23704 assert_eq!(recovered, serde_json::Value::Object(serde_json::Map::new()));
23705 }
23706
23707 #[test]
23708 fn stats_aggregator_collects_and_expands() {
23709 let mut agg = StatsAggregator::new();
23710 assert!(agg.is_empty());
23711
23712 agg.record("claude", "local", 100, 5, 500);
23715 agg.record("codex", "local", 100, 3, 300);
23717 agg.record("claude", "local", 101, 2, 200);
23719
23720 assert!(!agg.is_empty());
23721 assert_eq!(agg.raw_entry_count(), 3);
23722
23723 let entries = agg.expand();
23724 assert_eq!(entries.len(), 10);
23752
23753 let day100_all = entries
23755 .iter()
23756 .find(|(d, a, s, _)| *d == 100 && a == "all" && s == "all")
23757 .unwrap();
23758 assert_eq!(day100_all.3.session_count_delta, 2);
23759 assert_eq!(day100_all.3.message_count_delta, 8);
23760 assert_eq!(day100_all.3.total_chars_delta, 800);
23761 }
23762
23763 #[test]
23768 fn lazy_franken_db_not_open_before_get() {
23769 let dir = TempDir::new().unwrap();
23770 let db_path = dir.path().join("lazy_test.db");
23771
23772 let _storage = SqliteStorage::open(&db_path).unwrap();
23774
23775 let lazy = LazyFrankenDb::new(db_path);
23776 assert!(
23777 !lazy.is_open(),
23778 "LazyFrankenDb must not open on construction"
23779 );
23780 }
23781
23782 #[test]
23783 fn lazy_franken_db_opens_on_first_get() {
23784 let dir = TempDir::new().unwrap();
23785 let db_path = dir.path().join("lazy_test.db");
23786
23787 let _storage = SqliteStorage::open(&db_path).unwrap();
23789 drop(_storage);
23790
23791 let lazy = LazyFrankenDb::new(db_path);
23792 assert!(!lazy.is_open());
23793
23794 let conn = lazy.get("test").expect("should open successfully");
23795 let count: i64 = conn
23796 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |r| {
23797 r.get_typed(0)
23798 })
23799 .unwrap();
23800 assert_eq!(count, 0);
23801 drop(conn);
23802
23803 assert!(lazy.is_open(), "LazyFrankenDb must be open after get()");
23804 }
23805
23806 #[test]
23807 fn lazy_franken_db_reuses_connection() {
23808 let dir = TempDir::new().unwrap();
23809 let db_path = dir.path().join("lazy_test.db");
23810 let _storage = SqliteStorage::open(&db_path).unwrap();
23811 drop(_storage);
23812
23813 let lazy = LazyFrankenDb::new(db_path);
23814
23815 {
23817 let conn = lazy.get("first").unwrap();
23818 conn.execute_batch("CREATE TABLE IF NOT EXISTS test_tbl (id INTEGER)")
23819 .unwrap();
23820 }
23821
23822 {
23824 let conn = lazy.get("second").unwrap();
23825 let count: i64 = conn
23826 .query_row_map("SELECT COUNT(*) FROM test_tbl", fparams![], |r| {
23827 r.get_typed(0)
23828 })
23829 .unwrap();
23830 assert_eq!(count, 0);
23831 }
23832 }
23833
23834 #[test]
23835 fn lazy_franken_db_not_found_error() {
23836 let dir = TempDir::new().unwrap();
23837 let db_path = dir.path().join("nonexistent.db");
23838
23839 let lazy = LazyFrankenDb::new(db_path);
23840 let result = lazy.get("test");
23841 assert!(result.is_err());
23842 assert!(
23843 matches!(result.unwrap_err(), LazyDbError::NotFound(_)),
23844 "should return NotFound for missing DB"
23845 );
23846 }
23847
23848 #[test]
23849 fn lazy_franken_db_path_accessor() {
23850 let path = PathBuf::from("/tmp/test_lazy.db");
23851 let lazy = LazyFrankenDb::new(path.clone());
23852 assert_eq!(lazy.path(), path.as_path());
23853 }
23854
23855 #[test]
23860 fn sql_like_match_basic_patterns() {
23861 assert!(sql_like_match("claude-opus-4-20250101", "claude-opus-4%"));
23862 assert!(sql_like_match("claude-opus-4", "claude-opus-4%"));
23863 assert!(!sql_like_match("claude-sonnet-4", "claude-opus-4%"));
23864
23865 assert!(sql_like_match("gemini-2.0-flash-001", "gemini-2%flash%"));
23867 assert!(sql_like_match("gemini-2-flash", "gemini-2%flash%"));
23868 assert!(!sql_like_match("gemini-2-pro", "gemini-2%flash%"));
23869
23870 assert!(sql_like_match("hello", "hello"));
23872 assert!(!sql_like_match("hello!", "hello"));
23873
23874 assert!(sql_like_match("gpt-4o", "gpt-4_"));
23876 assert!(!sql_like_match("gpt-4oo", "gpt-4_"));
23877
23878 assert!(sql_like_match("Claude-Opus-4", "claude-opus-4%"));
23880 }
23881
23882 #[test]
23883 fn date_str_to_day_id_converts_correctly() {
23884 assert_eq!(date_str_to_day_id("2025-10-01").unwrap(), 2100);
23886 assert_eq!(date_str_to_day_id("2024-04-01").unwrap(), 1552);
23888 assert!(date_str_to_day_id("invalid").is_err());
23889 }
23890
23891 #[test]
23892 fn pricing_table_lookup_selects_matching_entry() {
23893 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
23894 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
23895 let table = PricingTable {
23896 entries: vec![
23897 PricingEntry {
23898 model_pattern: "claude-opus-4%".into(),
23899 provider: "anthropic".into(),
23900 input_cost_per_mtok: 15.0,
23901 output_cost_per_mtok: 75.0,
23902 cache_read_cost_per_mtok: Some(1.5),
23903 cache_creation_cost_per_mtok: Some(18.75),
23904 effective_day_id: effective_day,
23905 },
23906 PricingEntry {
23907 model_pattern: "claude-sonnet-4%".into(),
23908 provider: "anthropic".into(),
23909 input_cost_per_mtok: 3.0,
23910 output_cost_per_mtok: 15.0,
23911 cache_read_cost_per_mtok: Some(0.3),
23912 cache_creation_cost_per_mtok: Some(3.75),
23913 effective_day_id: effective_day,
23914 },
23915 ],
23916 };
23917
23918 let result = table.lookup("claude-opus-4-20260101", lookup_day);
23919 assert!(result.is_some());
23920 assert_eq!(result.unwrap().input_cost_per_mtok, 15.0);
23921
23922 let result = table.lookup("claude-sonnet-4-latest", lookup_day);
23923 assert!(result.is_some());
23924 assert_eq!(result.unwrap().input_cost_per_mtok, 3.0);
23925
23926 assert!(table.lookup("unknown-model", lookup_day).is_none());
23927 }
23928
23929 #[test]
23930 fn pricing_table_lookup_respects_effective_date() {
23931 let effective_day_1 = date_str_to_day_id("2025-10-01").unwrap();
23932 let effective_day_2 = date_str_to_day_id("2026-01-01").unwrap();
23933 let table = PricingTable {
23934 entries: vec![
23935 PricingEntry {
23936 model_pattern: "claude-opus-4%".into(),
23937 provider: "anthropic".into(),
23938 input_cost_per_mtok: 15.0,
23939 output_cost_per_mtok: 75.0,
23940 cache_read_cost_per_mtok: None,
23941 cache_creation_cost_per_mtok: None,
23942 effective_day_id: effective_day_1,
23943 },
23944 PricingEntry {
23945 model_pattern: "claude-opus-4%".into(),
23946 provider: "anthropic".into(),
23947 input_cost_per_mtok: 12.0,
23948 output_cost_per_mtok: 60.0,
23949 cache_read_cost_per_mtok: None,
23950 cache_creation_cost_per_mtok: None,
23951 effective_day_id: effective_day_2,
23952 },
23953 ],
23954 };
23955
23956 let result = table.lookup("claude-opus-4", date_str_to_day_id("2025-11-01").unwrap());
23958 assert!(result.is_some());
23959 assert_eq!(result.unwrap().input_cost_per_mtok, 15.0);
23960
23961 let result = table.lookup("claude-opus-4", date_str_to_day_id("2026-02-01").unwrap());
23963 assert!(result.is_some());
23964 assert_eq!(result.unwrap().input_cost_per_mtok, 12.0);
23965
23966 assert!(
23968 table
23969 .lookup("claude-opus-4", date_str_to_day_id("2024-01-01").unwrap())
23970 .is_none()
23971 );
23972 }
23973
23974 #[test]
23975 fn pricing_table_lookup_specificity_tiebreak() {
23976 let effective_day = date_str_to_day_id("2025-01-01").unwrap();
23977 let lookup_day = date_str_to_day_id("2026-01-01").unwrap();
23978 let table = PricingTable {
23979 entries: vec![
23980 PricingEntry {
23981 model_pattern: "gpt-4%".into(),
23982 provider: "openai".into(),
23983 input_cost_per_mtok: 10.0,
23984 output_cost_per_mtok: 30.0,
23985 cache_read_cost_per_mtok: None,
23986 cache_creation_cost_per_mtok: None,
23987 effective_day_id: effective_day,
23988 },
23989 PricingEntry {
23990 model_pattern: "gpt-4-turbo%".into(),
23991 provider: "openai".into(),
23992 input_cost_per_mtok: 5.0,
23993 output_cost_per_mtok: 15.0,
23994 cache_read_cost_per_mtok: None,
23995 cache_creation_cost_per_mtok: None,
23996 effective_day_id: effective_day,
23997 },
23998 ],
23999 };
24000
24001 let result = table.lookup("gpt-4-turbo-2025", lookup_day);
24003 assert!(result.is_some());
24004 assert_eq!(result.unwrap().input_cost_per_mtok, 5.0);
24005
24006 let result = table.lookup("gpt-4o", lookup_day);
24008 assert!(result.is_some());
24009 assert_eq!(result.unwrap().input_cost_per_mtok, 10.0);
24010 }
24011
24012 #[test]
24013 fn pricing_table_compute_cost_basic() {
24014 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
24015 let table = PricingTable {
24016 entries: vec![PricingEntry {
24017 model_pattern: "claude-opus-4%".into(),
24018 provider: "anthropic".into(),
24019 input_cost_per_mtok: 15.0,
24020 output_cost_per_mtok: 75.0,
24021 cache_read_cost_per_mtok: Some(1.5),
24022 cache_creation_cost_per_mtok: Some(18.75),
24023 effective_day_id: effective_day,
24024 }],
24025 };
24026
24027 let cost = table.compute_cost(
24028 Some("claude-opus-4-latest"),
24029 date_str_to_day_id("2026-02-06").unwrap(),
24030 Some(1000),
24031 Some(500),
24032 None,
24033 None,
24034 );
24035 assert!(cost.is_some());
24036 assert!((cost.unwrap() - 0.0525).abs() < 1e-10);
24038 }
24039
24040 #[test]
24041 fn pricing_table_compute_cost_with_cache() {
24042 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
24043 let table = PricingTable {
24044 entries: vec![PricingEntry {
24045 model_pattern: "claude-opus-4%".into(),
24046 provider: "anthropic".into(),
24047 input_cost_per_mtok: 15.0,
24048 output_cost_per_mtok: 75.0,
24049 cache_read_cost_per_mtok: Some(1.5),
24050 cache_creation_cost_per_mtok: Some(18.75),
24051 effective_day_id: effective_day,
24052 }],
24053 };
24054
24055 let cost = table.compute_cost(
24056 Some("claude-opus-4-latest"),
24057 date_str_to_day_id("2026-02-06").unwrap(),
24058 Some(1_000_000),
24059 Some(100_000),
24060 Some(500_000),
24061 Some(200_000),
24062 );
24063 assert!(cost.is_some());
24064 assert!((cost.unwrap() - 16.5).abs() < 1e-10);
24070 }
24071
24072 #[test]
24073 fn pricing_table_compute_cost_returns_none_for_unknown_model() {
24074 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
24075 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
24076 let table = PricingTable {
24077 entries: vec![PricingEntry {
24078 model_pattern: "claude-opus-4%".into(),
24079 provider: "anthropic".into(),
24080 input_cost_per_mtok: 15.0,
24081 output_cost_per_mtok: 75.0,
24082 cache_read_cost_per_mtok: None,
24083 cache_creation_cost_per_mtok: None,
24084 effective_day_id: effective_day,
24085 }],
24086 };
24087
24088 assert!(
24089 table
24090 .compute_cost(
24091 Some("unknown-model"),
24092 lookup_day,
24093 Some(1000),
24094 Some(500),
24095 None,
24096 None
24097 )
24098 .is_none()
24099 );
24100 assert!(
24101 table
24102 .compute_cost(None, lookup_day, Some(1000), Some(500), None, None)
24103 .is_none()
24104 );
24105 assert!(
24106 table
24107 .compute_cost(Some("claude-opus-4"), lookup_day, None, None, None, None)
24108 .is_none()
24109 );
24110 }
24111
24112 #[test]
24113 fn pricing_table_load_from_db() {
24114 let dir = TempDir::new().unwrap();
24115 let db_path = dir.path().join("test.db");
24116 let storage = SqliteStorage::open(&db_path).unwrap();
24117
24118 let table = PricingTable::load(&storage.conn).unwrap();
24119 assert!(!table.is_empty());
24120
24121 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
24122
24123 let opus = table.lookup("claude-opus-4-latest", lookup_day);
24124 assert!(opus.is_some());
24125 assert_eq!(opus.unwrap().input_cost_per_mtok, 15.0);
24126
24127 let flash = table.lookup("gemini-2.0-flash-001", lookup_day);
24128 assert!(flash.is_some());
24129 assert_eq!(flash.unwrap().input_cost_per_mtok, 0.075);
24130 }
24131
24132 #[test]
24133 fn pricing_table_load_rejects_invalid_effective_date() {
24134 let dir = TempDir::new().unwrap();
24135 let db_path = dir.path().join("test.db");
24136 let storage = SqliteStorage::open(&db_path).unwrap();
24137
24138 storage
24139 .conn
24140 .execute_compat(
24141 "INSERT INTO model_pricing (
24142 model_pattern, provider, input_cost_per_mtok, output_cost_per_mtok,
24143 cache_read_cost_per_mtok, cache_creation_cost_per_mtok, effective_date
24144 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
24145 fparams![
24146 "broken-model%",
24147 "test",
24148 1.0_f64,
24149 2.0_f64,
24150 Option::<f64>::None,
24151 Option::<f64>::None,
24152 "not-a-date"
24153 ],
24154 )
24155 .unwrap();
24156
24157 let err = PricingTable::load(&storage.conn).unwrap_err();
24158 assert!(err.to_string().contains("invalid effective_date"));
24159 }
24160
24161 #[test]
24162 fn pricing_diagnostics_tracks_coverage() {
24163 let mut diag = PricingDiagnostics::default();
24164 diag.record_priced();
24165 diag.record_priced();
24166 diag.record_unpriced(Some("custom-model-v1"));
24167 diag.record_unpriced(Some("custom-model-v1"));
24168 diag.record_unpriced(None);
24169
24170 assert_eq!(diag.priced_count, 2);
24171 assert_eq!(diag.unpriced_count, 3);
24172 assert_eq!(diag.unknown_models.len(), 2);
24173 assert_eq!(diag.unknown_models["custom-model-v1"], 2);
24174 assert_eq!(diag.unknown_models["(none)"], 1);
24175 }
24176
24177 fn franken_storage_in_memory() -> FrankenStorage {
24187 let conn = FrankenConnection::open(":memory:").unwrap();
24188 let storage = FrankenStorage::new(conn, PathBuf::from(":memory:"));
24189 storage.run_migrations().unwrap();
24190 storage.apply_config().unwrap();
24191 storage
24192 }
24193
24194 #[test]
24195 fn franken_migrations_create_all_tables() {
24196 let storage = franken_storage_in_memory();
24197
24198 let version = storage.schema_version().unwrap();
24200 assert_eq!(
24201 version, CURRENT_SCHEMA_VERSION,
24202 "fresh FrankenStorage should be at current schema version"
24203 );
24204
24205 let rows = storage
24207 .raw()
24208 .query("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;")
24209 .unwrap();
24210 let table_names: Vec<String> = rows
24211 .iter()
24212 .filter_map(|r| r.get_typed::<String>(0).ok())
24213 .collect();
24214
24215 for required in [
24216 "meta",
24217 "agents",
24218 "workspaces",
24219 "conversations",
24220 "messages",
24221 "snippets",
24222 "tags",
24223 "conversation_tags",
24224 ] {
24225 assert!(
24226 table_names.contains(&required.to_string()),
24227 "missing table: {required}"
24228 );
24229 }
24230
24231 assert!(
24233 table_names.contains(&"sources".to_string()),
24234 "missing sources table"
24235 );
24236
24237 assert!(
24239 table_names.contains(&"daily_stats".to_string()),
24240 "missing daily_stats table"
24241 );
24242
24243 assert!(
24245 table_names.contains(&"embedding_jobs".to_string()),
24246 "missing embedding_jobs table"
24247 );
24248
24249 for analytics_table in ["message_metrics", "usage_hourly", "usage_daily"] {
24251 assert!(
24252 table_names.contains(&analytics_table.to_string()),
24253 "missing table: {analytics_table}"
24254 );
24255 }
24256 assert!(
24257 table_names.contains(&"conversation_tail_state".to_string()),
24258 "missing conversation_tail_state table"
24259 );
24260 assert!(
24261 table_names.contains(&"conversation_external_lookup".to_string()),
24262 "missing conversation_external_lookup table"
24263 );
24264 assert!(
24265 table_names.contains(&"conversation_external_tail_lookup".to_string()),
24266 "missing conversation_external_tail_lookup table"
24267 );
24268
24269 let rows = storage
24272 .raw()
24273 .query("SELECT COUNT(*) FROM _schema_migrations;")
24274 .unwrap();
24275 let count: i64 = rows.first().unwrap().get_typed(0).unwrap();
24276 assert_eq!(
24277 count,
24278 (13..=CURRENT_SCHEMA_VERSION).count() as i64,
24279 "_schema_migrations should record the V13 base schema and post-V13 migrations"
24280 );
24281
24282 let rows = storage
24284 .raw()
24285 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24286 .unwrap();
24287 let versions: Vec<i64> = rows
24288 .iter()
24289 .map(|row| row.get_typed(0))
24290 .collect::<std::result::Result<_, _>>()
24291 .unwrap();
24292 assert_eq!(
24293 versions,
24294 (13..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
24295 "_schema_migrations should contain v13 through current"
24296 );
24297 }
24298
24299 #[test]
24300 fn franken_migrations_idempotent() {
24301 let storage = franken_storage_in_memory();
24302 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24303
24304 storage.run_migrations().unwrap();
24306 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24307 }
24308
24309 #[test]
24310 fn migration_v20_backfills_conversation_external_tail_lookup() {
24311 let storage = franken_storage_in_memory();
24312 let agent_id = storage
24313 .ensure_agent(&Agent {
24314 id: None,
24315 slug: "codex".into(),
24316 name: "Codex".into(),
24317 version: None,
24318 kind: AgentKind::Cli,
24319 })
24320 .unwrap();
24321 let workspace_id = storage
24322 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
24323 .unwrap();
24324 let mut conv = make_profiled_storage_remote_conversation(1919, 2);
24325 conv.source_id = "profiled-storage-remote-source-東京".into();
24326 conv.external_id = Some("profiled-storage-remote-☃-1919".into());
24327 let outcome = storage
24328 .insert_conversation_tree(agent_id, Some(workspace_id), &conv)
24329 .unwrap();
24330 let external_id = conv.external_id.as_deref().unwrap();
24331 let lookup_key = conversation_external_lookup_key(&conv.source_id, agent_id, external_id);
24332
24333 storage
24334 .raw()
24335 .execute("DELETE FROM conversation_external_tail_lookup")
24336 .unwrap();
24337 storage
24338 .raw()
24339 .execute("DELETE FROM _schema_migrations WHERE version = 20")
24340 .unwrap();
24341 storage
24342 .raw()
24343 .execute_compat(
24344 "UPDATE meta SET value = ?1 WHERE key = 'schema_version'",
24345 fparams!["19"],
24346 )
24347 .unwrap();
24348
24349 storage.run_migrations().unwrap();
24350
24351 let backfilled: (i64, Option<i64>, Option<i64>, Option<i64>) = storage
24352 .raw()
24353 .query_row_map(
24354 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
24355 FROM conversation_external_tail_lookup
24356 WHERE lookup_key = ?1",
24357 fparams![lookup_key.as_str()],
24358 |row| {
24359 Ok((
24360 row.get_typed(0)?,
24361 row.get_typed(1)?,
24362 row.get_typed(2)?,
24363 row.get_typed(3)?,
24364 ))
24365 },
24366 )
24367 .unwrap();
24368 assert_eq!(
24369 backfilled,
24370 (
24371 outcome.conversation_id,
24372 conv.ended_at,
24373 Some(1),
24374 conv.messages[1].created_at
24375 )
24376 );
24377 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24378 }
24379
24380 #[test]
24381 fn migration_v15_creates_lazy_tail_state_cache() {
24382 let conn = FrankenConnection::open(":memory:").unwrap();
24383 conn.execute_batch(
24384 "CREATE TABLE conversations (
24385 id INTEGER PRIMARY KEY,
24386 ended_at INTEGER
24387 );
24388 CREATE TABLE messages (
24389 id INTEGER PRIMARY KEY,
24390 conversation_id INTEGER NOT NULL,
24391 idx INTEGER NOT NULL,
24392 created_at INTEGER
24393 );
24394 INSERT INTO conversations(id, ended_at) VALUES
24395 (1, 1710000000300),
24396 (2, NULL);
24397 INSERT INTO messages(id, conversation_id, idx, created_at) VALUES
24398 (10, 1, 0, 1710000000100),
24399 (11, 1, 1, 1710000000200),
24400 (12, 2, 0, 1710000000400);",
24401 )
24402 .unwrap();
24403
24404 conn.execute(
24405 "CREATE TABLE _schema_migrations (
24406 version INTEGER PRIMARY KEY,
24407 name TEXT NOT NULL,
24408 applied_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
24409 );",
24410 )
24411 .unwrap();
24412
24413 assert!(
24414 apply_conversation_tail_state_cache_migration(&conn).unwrap(),
24415 "v15 migration should apply once"
24416 );
24417 assert!(
24418 !apply_conversation_tail_state_cache_migration(&conn).unwrap(),
24419 "v15 migration should be idempotent once recorded"
24420 );
24421
24422 let columns = conn.query("PRAGMA table_info(conversations);").unwrap();
24423 let column_names: HashSet<String> = columns
24424 .iter()
24425 .map(|row| row.get_typed(1))
24426 .collect::<std::result::Result<_, frankensqlite::FrankenError>>()
24427 .unwrap();
24428 assert!(column_names.contains("last_message_idx"));
24429 assert!(column_names.contains("last_message_created_at"));
24430
24431 let tail_rows: i64 = conn
24432 .query("SELECT COUNT(*) FROM conversation_tail_state;")
24433 .unwrap()
24434 .first()
24435 .unwrap()
24436 .get_typed(0)
24437 .unwrap();
24438 assert_eq!(
24439 tail_rows, 0,
24440 "v15 should create the cache without an open-time message scan"
24441 );
24442
24443 let applied: i64 = conn
24444 .query("SELECT COUNT(*) FROM _schema_migrations WHERE version = 15;")
24445 .unwrap()
24446 .first()
24447 .unwrap()
24448 .get_typed(0)
24449 .unwrap();
24450 assert_eq!(applied, 1);
24451 }
24452
24453 #[test]
24454 fn schema_repair_adds_missing_conversations_token_columns() {
24455 let conn = FrankenConnection::open(":memory:").unwrap();
24456 conn.execute_batch(
24457 "CREATE TABLE conversations (
24458 id INTEGER PRIMARY KEY,
24459 agent_id INTEGER NOT NULL,
24460 source_path TEXT NOT NULL
24461 );",
24462 )
24463 .unwrap();
24464 let storage = FrankenStorage::new(conn, std::path::PathBuf::from(":memory:"));
24465
24466 storage.repair_missing_conversation_token_columns().unwrap();
24467 storage.repair_missing_conversation_token_columns().unwrap();
24468
24469 let columns = franken_table_column_names(&storage.conn, "conversations").unwrap();
24470 for &(column_name, _) in REQUIRED_CONVERSATION_TOKEN_COLUMNS {
24471 assert!(
24472 columns.contains(column_name),
24473 "schema repair should add conversations.{column_name}"
24474 );
24475 }
24476 }
24477
24478 #[test]
24479 fn franken_meta_schema_version_in_sync() {
24480 let storage = franken_storage_in_memory();
24481
24482 let rows = storage
24484 .raw()
24485 .query("SELECT value FROM meta WHERE key = 'schema_version';")
24486 .unwrap();
24487 let meta_version: String = rows.first().unwrap().get_typed(0).unwrap();
24488 assert_eq!(
24489 meta_version,
24490 CURRENT_SCHEMA_VERSION.to_string(),
24491 "meta.schema_version should match CURRENT_SCHEMA_VERSION"
24492 );
24493 }
24494
24495 #[test]
24496 fn franken_transition_from_meta_version() {
24497 let dir = TempDir::new().unwrap();
24498 let db_path = dir.path().join("test_transition.db");
24499
24500 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24503 conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);")
24504 .unwrap();
24505 conn.execute("INSERT INTO meta(key, value) VALUES('schema_version', '10');")
24506 .unwrap();
24507 conn.execute("CREATE TABLE conversations (id INTEGER PRIMARY KEY);")
24509 .unwrap();
24510 drop(conn);
24511
24512 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24514 transition_from_meta_version(&conn).unwrap();
24515
24516 let rows = conn
24520 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24521 .unwrap();
24522 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24523 assert_eq!(
24524 versions,
24525 (1..=13).collect::<Vec<i64>>(),
24526 "transition should bridge legacy V10 databases through the combined V13 base marker"
24527 );
24528 }
24529
24530 #[test]
24531 fn franken_transition_from_current_meta_backfills_current_schema_marker() {
24532 let dir = TempDir::new().unwrap();
24533 let db_path = dir.path().join("test_current_transition.db");
24534
24535 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24536 conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);")
24537 .unwrap();
24538 conn.execute_compat(
24539 "INSERT INTO meta(key, value) VALUES('schema_version', ?1);",
24540 &[ParamValue::from(CURRENT_SCHEMA_VERSION.to_string())],
24541 )
24542 .unwrap();
24543 conn.execute("CREATE TABLE conversations (id INTEGER PRIMARY KEY);")
24544 .unwrap();
24545 drop(conn);
24546
24547 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24548 transition_from_meta_version(&conn).unwrap();
24549
24550 let rows = conn
24551 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24552 .unwrap();
24553 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24554 assert_eq!(
24555 versions,
24556 (1..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
24557 "current meta schema marker should backfill every known migration"
24558 );
24559 }
24560
24561 #[test]
24562 fn franken_transition_skips_when_already_done() {
24563 let dir = TempDir::new().unwrap();
24564 let db_path = dir.path().join("test_transition_skip.db");
24565
24566 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24568 conn.execute(
24569 "CREATE TABLE _schema_migrations (version INTEGER PRIMARY KEY, name TEXT NOT NULL, applied_at TEXT NOT NULL DEFAULT 'now');",
24570 ).unwrap();
24571 conn.execute("INSERT INTO _schema_migrations (version, name) VALUES (1, 'test');")
24572 .unwrap();
24573
24574 transition_from_meta_version(&conn).unwrap();
24576
24577 let rows = conn
24579 .query("SELECT COUNT(*) FROM _schema_migrations;")
24580 .unwrap();
24581 let count: i64 = rows.first().unwrap().get_typed(0).unwrap();
24582 assert_eq!(
24583 count, 1,
24584 "transition should not re-run on already-transitioned DB"
24585 );
24586 }
24587
24588 #[test]
24589 fn franken_transition_fresh_db_is_noop() {
24590 let dir = TempDir::new().unwrap();
24591 let db_path = dir.path().join("test_fresh_noop.db");
24592
24593 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24595 transition_from_meta_version(&conn).unwrap();
24596
24597 let res = conn.query("SELECT * FROM \"_schema_migrations\";");
24599 assert!(
24600 res.is_err(),
24601 "transition should not create _schema_migrations on fresh DB"
24602 );
24603 }
24604
24605 #[test]
24606 fn franken_transition_with_fts_virtual_table_succeeds() {
24607 let dir = TempDir::new().unwrap();
24608 let db_path = dir.path().join("test_transition_with_fts.db");
24609
24610 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24611 conn.execute_batch(
24612 "CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
24613 INSERT INTO meta(key, value) VALUES('schema_version', '13');
24614 CREATE TABLE conversations (id INTEGER PRIMARY KEY);
24615 CREATE VIRTUAL TABLE fts_messages USING fts5(
24616 content,
24617 title,
24618 agent,
24619 workspace,
24620 source_path,
24621 created_at,
24622 content='',
24623 tokenize='porter unicode61'
24624 );",
24625 )
24626 .unwrap();
24627 drop(conn);
24628
24629 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24630 transition_from_meta_version(&conn).unwrap();
24631
24632 let rows = conn
24633 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24634 .unwrap();
24635 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24636 assert_eq!(versions, (1..=13).collect::<Vec<i64>>());
24637 }
24638
24639 #[test]
24640 fn franken_storage_open_legacy_v13_with_fts_virtual_table_succeeds() {
24641 let dir = TempDir::new().unwrap();
24642 let db_path = dir.path().join("test_open_legacy_v13_with_fts.db");
24643
24644 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24645 conn.execute_batch(
24646 "CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
24647 INSERT INTO meta(key, value) VALUES('schema_version', '13');
24648 CREATE TABLE agents (
24649 id INTEGER PRIMARY KEY,
24650 slug TEXT NOT NULL
24651 );
24652 CREATE TABLE workspaces (
24653 id INTEGER PRIMARY KEY,
24654 path TEXT NOT NULL
24655 );
24656 CREATE TABLE sources (
24657 id TEXT PRIMARY KEY,
24658 kind TEXT NOT NULL,
24659 host_label TEXT,
24660 machine_id TEXT,
24661 platform TEXT,
24662 config_json TEXT,
24663 created_at INTEGER NOT NULL,
24664 updated_at INTEGER NOT NULL
24665 );
24666 CREATE TABLE conversations (
24667 id INTEGER PRIMARY KEY,
24668 agent_id INTEGER NOT NULL,
24669 workspace_id INTEGER,
24670 source_id TEXT NOT NULL DEFAULT 'local',
24671 external_id TEXT,
24672 title TEXT,
24673 source_path TEXT NOT NULL,
24674 started_at INTEGER,
24675 ended_at INTEGER
24676 );
24677 CREATE TABLE messages (
24678 id INTEGER PRIMARY KEY,
24679 conversation_id INTEGER NOT NULL,
24680 idx INTEGER NOT NULL,
24681 role TEXT NOT NULL,
24682 author TEXT,
24683 created_at INTEGER,
24684 content TEXT NOT NULL,
24685 extra_json TEXT,
24686 extra_bin BLOB
24687 );
24688 INSERT INTO agents(id, slug) VALUES (1, 'codex');
24689 INSERT INTO workspaces(id, path) VALUES (1, '/data/projects/coding_agent_session_search');
24690 INSERT INTO sources(id, kind, host_label, created_at, updated_at)
24691 VALUES ('local', 'local', NULL, 1710000000000, 1710000000000);
24692 INSERT INTO conversations(
24693 id,
24694 agent_id,
24695 workspace_id,
24696 source_id,
24697 external_id,
24698 title,
24699 source_path,
24700 started_at
24701 )
24702 VALUES (
24703 1,
24704 1,
24705 1,
24706 'local',
24707 'legacy-session',
24708 'legacy session',
24709 '/tmp/legacy.jsonl',
24710 1710000000000
24711 );
24712 INSERT INTO messages(id, conversation_id, idx, role, author, created_at, content)
24713 VALUES (1, 1, 0, 'user', 'tester', 1710000000000, 'legacy content');
24714 CREATE VIRTUAL TABLE fts_messages USING fts5(
24715 content,
24716 title,
24717 agent,
24718 workspace,
24719 source_path,
24720 created_at,
24721 message_id,
24722 content='',
24723 tokenize='porter unicode61'
24724 );",
24725 )
24726 .unwrap();
24727 drop(conn);
24728
24729 let storage = FrankenStorage::open(&db_path).unwrap();
24730 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24731
24732 let rows = storage
24733 .raw()
24734 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24735 .unwrap();
24736 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24737 assert_eq!(versions, (1..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>());
24738 }
24739
24740 #[test]
24741 fn franken_storage_open_repairs_duplicate_fts_messages_schema_rows() {
24742 let dir = TempDir::new().unwrap();
24743 let db_path = dir.path().join("test_open_repairs_duplicate_fts_schema.db");
24744
24745 let storage = FrankenStorage::open(&db_path).unwrap();
24746 let agent = Agent {
24747 id: None,
24748 slug: "codex".into(),
24749 name: "Codex".into(),
24750 version: None,
24751 kind: AgentKind::Cli,
24752 };
24753 let agent_id = storage.ensure_agent(&agent).unwrap();
24754 let conversation = Conversation {
24755 id: None,
24756 agent_slug: "codex".into(),
24757 workspace: Some(PathBuf::from("/tmp/workspace")),
24758 external_id: Some("dup-fts-schema".into()),
24759 title: Some("Duplicate FTS schema".into()),
24760 source_path: PathBuf::from("/tmp/dup-fts-schema.jsonl"),
24761 started_at: Some(1_700_000_000_000),
24762 ended_at: Some(1_700_000_000_100),
24763 approx_tokens: Some(42),
24764 metadata_json: serde_json::Value::Null,
24765 messages: vec![Message {
24766 id: None,
24767 idx: 0,
24768 role: MessageRole::User,
24769 author: Some("user".into()),
24770 created_at: Some(1_700_000_000_050),
24771 content: "message that should remain queryable".into(),
24772 extra_json: serde_json::Value::Null,
24773 snippets: Vec::new(),
24774 }],
24775 source_id: LOCAL_SOURCE_ID.into(),
24776 origin_host: None,
24777 };
24778 storage
24779 .insert_conversation_tree(agent_id, None, &conversation)
24780 .unwrap();
24781 drop(storage);
24782 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
24783
24784 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
24785 let conn = rusqlite_test_fixture_conn(&db_path);
24786 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
24787 conn.execute(
24788 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
24789 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
24790 [duplicate_legacy_fts_sql],
24791 )
24792 .unwrap();
24793 conn.execute(
24794 "DELETE FROM meta WHERE key = ?1",
24795 [FTS_FRANKEN_REBUILD_META_KEY],
24796 )
24797 .unwrap();
24798 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
24801
24802 let duplicate_rows: i64 = conn
24803 .query_row(
24804 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
24805 [],
24806 |row| row.get(0),
24807 )
24808 .unwrap();
24809 assert_eq!(duplicate_rows, 2);
24810 drop(conn);
24811
24812 let reopened = FrankenStorage::open(&db_path).unwrap();
24813 assert_eq!(reopened.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24814 let generation_rows: Vec<String> = reopened
24815 .raw()
24816 .query_map_collect(
24817 "SELECT value FROM meta WHERE key = ?1",
24818 fparams![FTS_FRANKEN_REBUILD_META_KEY],
24819 |row| row.get_typed(0),
24820 )
24821 .unwrap();
24822 assert_eq!(
24823 generation_rows.len(),
24824 0,
24825 "canonical open should not eagerly rewrite FTS repair metadata"
24826 );
24827 reopened.ensure_search_fallback_fts_consistency().unwrap();
24828 let repaired = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24829 assert_eq!(franken_fts_schema_rows(&repaired).unwrap(), 1);
24830
24831 let total_messages: i64 = reopened
24832 .raw()
24833 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
24834 row.get_typed(0)
24835 })
24836 .unwrap();
24837 let total_fts_rows: i64 = reopened
24838 .raw()
24839 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
24840 row.get_typed(0)
24841 })
24842 .unwrap();
24843 assert_eq!(total_fts_rows, total_messages);
24844 }
24845
24846 #[test]
24847 fn fts_messages_integrity_reports_missing_shadow_tables() {
24848 let dir = TempDir::new().unwrap();
24849 let healthy_db_path = dir.path().join("healthy_fts.db");
24850
24851 {
24852 let storage = FrankenStorage::open(&healthy_db_path).unwrap();
24853 storage.ensure_search_fallback_fts_consistency().unwrap();
24854 storage
24855 .validate_fts_messages_integrity()
24856 .expect("freshly materialized fts_messages should pass integrity validation");
24857 }
24858
24859 let corrupt_db_path = dir.path().join("test_corrupt_fts_missing_shadows.db");
24860 {
24861 let conn = rusqlite_test_fixture_conn(&corrupt_db_path);
24862 conn.execute("CREATE TABLE schema_anchor(id INTEGER PRIMARY KEY)", [])
24863 .unwrap();
24864 let orphaned_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
24865 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
24866 conn.execute(
24867 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
24868 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
24869 [orphaned_fts_sql],
24870 )
24871 .unwrap();
24872 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
24873 }
24874
24875 let open_err = FrankenConnection::open(corrupt_db_path.to_string_lossy().to_string())
24876 .expect_err("orphaned fts_messages schema should fail during connection open");
24877 let integrity = fts_messages_integrity_error_from_message(open_err.to_string())
24878 .expect("open-time FTS corruption should map to the typed FTS integrity kind");
24879 assert_eq!(integrity.missing_shadow_tables(), &["fts_messages_content"]);
24880 let rendered = integrity.to_string();
24881 assert!(
24882 rendered.contains("fts_messages")
24883 && rendered.contains("required FTS5 shadow tables")
24884 && rendered.contains("fts_messages_content"),
24885 "error should be an operator-facing FTS corruption diagnosis: {rendered}"
24886 );
24887 }
24888
24889 #[test]
24890 fn franken_storage_open_fresh_db_keeps_single_franken_fts_schema_row() {
24891 let dir = TempDir::new().unwrap();
24892 let db_path = dir.path().join("fresh-franken-storage-open.db");
24893
24894 let storage = FrankenStorage::open(&db_path).unwrap();
24895 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24896
24897 storage
24904 .ensure_search_fallback_fts_consistency()
24905 .expect("ensure FTS consistency after fresh open");
24906 drop(storage);
24907
24908 let c_reader = FrankenConnection::open(db_path.to_string_lossy().into_owned())
24909 .expect("open DB via frankensqlite for sqlite_master inspection");
24910 assert_eq!(
24911 franken_fts_schema_rows(&c_reader).unwrap(),
24912 1,
24913 "exactly one fts_messages schema row should exist after ensure_search_fallback_fts_consistency"
24914 );
24915 drop(c_reader);
24916
24917 let storage = FrankenStorage::open(&db_path).unwrap();
24918 assert!(
24919 storage
24920 .raw()
24921 .query("SELECT COUNT(*) FROM fts_messages")
24922 .is_ok(),
24923 "fts_messages must be queryable through frankensqlite after open"
24924 );
24925 }
24926
24927 #[test]
24928 fn franken_storage_open_repairs_missing_analytics_tables_when_version_markers_lie() {
24929 let dir = TempDir::new().unwrap();
24930 let db_path = dir.path().join("test_repair_missing_analytics.db");
24931
24932 {
24933 let storage = FrankenStorage::open(&db_path).unwrap();
24934 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24935 }
24936
24937 {
24938 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24939 for table in &[
24940 "usage_models_daily",
24941 "usage_daily",
24942 "usage_hourly",
24943 "message_metrics",
24944 "token_daily_stats",
24945 "token_usage",
24946 "model_pricing",
24947 "embedding_jobs",
24948 "daily_stats",
24949 ] {
24950 conn.execute(&format!("DROP TABLE IF EXISTS {table}"))
24951 .unwrap();
24952 }
24953 conn.execute_compat(
24954 "UPDATE meta SET value = ?1 WHERE key = 'schema_version'",
24955 &[ParamValue::from(CURRENT_SCHEMA_VERSION.to_string())],
24956 )
24957 .unwrap();
24958 }
24959
24960 let repaired = FrankenStorage::open(&db_path).unwrap();
24961 assert_eq!(repaired.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24962
24963 let analytics_count: i64 = repaired
24964 .raw()
24965 .query_row_map(
24966 "SELECT COUNT(*) FROM sqlite_master
24967 WHERE type='table'
24968 AND name IN (
24969 'daily_stats',
24970 'embedding_jobs',
24971 'token_usage',
24972 'token_daily_stats',
24973 'model_pricing',
24974 'message_metrics',
24975 'usage_hourly',
24976 'usage_daily',
24977 'usage_models_daily'
24978 )",
24979 &[],
24980 |row| row.get_typed(0),
24981 )
24982 .unwrap();
24983 assert_eq!(
24984 analytics_count, 9,
24985 "open() should recreate the missing analytics tables even when schema_version already says current"
24986 );
24987 }
24988
24989 #[test]
24990 fn current_schema_repair_batches_cover_every_required_probe() {
24991 let missing_tables: Vec<&'static str> = REQUIRED_CURRENT_SCHEMA_TABLE_PROBES
24992 .iter()
24993 .map(|(table_name, _)| *table_name)
24994 .collect();
24995
24996 let batches = current_schema_repair_batches_for_missing_tables(&missing_tables).unwrap();
24997 let covered_tables: HashSet<&'static str> = batches
24998 .iter()
24999 .flat_map(|batch| batch.tables.iter().copied())
25000 .collect();
25001
25002 for table_name in missing_tables {
25003 assert!(
25004 covered_tables.contains(table_name),
25005 "missing repair coverage for {table_name}"
25006 );
25007 }
25008 }
25009
25010 #[test]
25011 fn current_schema_repair_batches_do_not_replay_core_schema_bootstrap() {
25012 for batch in CURRENT_SCHEMA_REPAIR_BATCHES {
25013 assert!(
25014 !batch.sql.contains("CREATE TABLE IF NOT EXISTS meta"),
25015 "repair batch {} should not recreate meta",
25016 batch.name
25017 );
25018 assert!(
25019 !batch.sql.contains("CREATE TABLE IF NOT EXISTS agents"),
25020 "repair batch {} should not recreate agents",
25021 batch.name
25022 );
25023 assert!(
25024 !batch.sql.contains("CREATE TABLE IF NOT EXISTS workspaces"),
25025 "repair batch {} should not recreate workspaces",
25026 batch.name
25027 );
25028 assert!(
25029 !batch
25030 .sql
25031 .contains("CREATE TABLE IF NOT EXISTS conversations"),
25032 "repair batch {} should not recreate conversations",
25033 batch.name
25034 );
25035 assert!(
25036 !batch.sql.contains("CREATE TABLE IF NOT EXISTS messages"),
25037 "repair batch {} should not recreate messages",
25038 batch.name
25039 );
25040 assert!(
25041 !batch.sql.contains("CREATE TABLE IF NOT EXISTS snippets"),
25042 "repair batch {} should not recreate snippets",
25043 batch.name
25044 );
25045 assert!(
25046 !batch.sql.contains("CREATE VIRTUAL TABLE fts_messages"),
25047 "repair batch {} should not recreate FTS tables",
25048 batch.name
25049 );
25050 assert!(
25051 !batch.sql.contains("DROP TABLE"),
25052 "repair batch {} should never drop tables",
25053 batch.name
25054 );
25055 }
25056 }
25057
25058 #[test]
25059 fn build_cass_migrations_applies_combined_v13() {
25060 let conn = FrankenConnection::open(":memory:").unwrap();
25061 let base_result = build_cass_migrations_before_tail_cache()
25062 .run(&conn)
25063 .unwrap();
25064 assert!(apply_conversation_tail_state_cache_migration(&conn).unwrap());
25065 let post_result = build_cass_migrations_after_tail_cache().run(&conn).unwrap();
25066
25067 assert!(base_result.was_fresh);
25068 let mut applied = base_result.applied;
25069 applied.push(15);
25070 applied.extend(post_result.applied);
25071 assert_eq!(
25072 applied,
25073 (13..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
25074 "should apply combined V13 plus additive post-V13 migrations"
25075 );
25076 let current: i64 = conn
25077 .query("SELECT MAX(version) FROM _schema_migrations;")
25078 .unwrap()
25079 .first()
25080 .unwrap()
25081 .get_typed(0)
25082 .unwrap();
25083 assert_eq!(current, CURRENT_SCHEMA_VERSION);
25084 }
25085
25086 #[test]
25087 fn franken_insert_conversations_batched_populates_analytics_rollups() {
25088 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
25089 use frankensqlite::compat::{ConnectionExt, RowExt};
25090 use std::path::PathBuf;
25091
25092 let dir = TempDir::new().unwrap();
25093 let db_path = dir.path().join("franken-index.db");
25094 let storage = FrankenStorage::open(&db_path).unwrap();
25095
25096 let agent = Agent {
25097 id: None,
25098 slug: "claude_code".into(),
25099 name: "Claude Code".into(),
25100 version: Some("1.0".into()),
25101 kind: AgentKind::Cli,
25102 };
25103 let agent_id = storage.ensure_agent(&agent).unwrap();
25104
25105 let ts_ms = 1_770_551_400_000_i64;
25106 let usage_json = serde_json::json!({
25107 "message": {
25108 "model": "claude-opus-4-6",
25109 "usage": {
25110 "input_tokens": 100,
25111 "output_tokens": 50,
25112 "cache_read_input_tokens": 25,
25113 "cache_creation_input_tokens": 10,
25114 "service_tier": "standard"
25115 }
25116 }
25117 });
25118
25119 let conv = Conversation {
25120 id: None,
25121 agent_slug: "claude_code".into(),
25122 workspace: Some(PathBuf::from("/tmp/workspace")),
25123 external_id: Some("franken-batch-upsert".into()),
25124 title: Some("Franken batch upsert".into()),
25125 source_path: PathBuf::from("/tmp/franken.jsonl"),
25126 started_at: Some(ts_ms),
25127 ended_at: Some(ts_ms + 60_000),
25128 approx_tokens: None,
25129 metadata_json: serde_json::Value::Null,
25130 messages: vec![
25131 Message {
25132 id: None,
25133 idx: 0,
25134 role: MessageRole::User,
25135 author: None,
25136 created_at: Some(ts_ms),
25137 content: "Please make a plan.".into(),
25138 extra_json: serde_json::Value::Null,
25139 snippets: vec![],
25140 },
25141 Message {
25142 id: None,
25143 idx: 1,
25144 role: MessageRole::Agent,
25145 author: None,
25146 created_at: Some(ts_ms + 30_000),
25147 content: "## Plan\n\n1. Reproduce\n2. Patch\n3. Verify".into(),
25148 extra_json: usage_json,
25149 snippets: vec![],
25150 },
25151 ],
25152 source_id: "local".into(),
25153 origin_host: None,
25154 };
25155
25156 let outcomes = storage
25157 .insert_conversations_batched(&[(agent_id, None, &conv)])
25158 .unwrap();
25159 assert_eq!(outcomes.len(), 1);
25160 assert_eq!(outcomes[0].inserted_indices, vec![0, 1]);
25161
25162 let conn = storage.raw();
25163 let daily_stats_rows: i64 = conn
25164 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
25165 row.get_typed(0)
25166 })
25167 .unwrap();
25168 let token_daily_rows: i64 = conn
25169 .query_row_map(
25170 "SELECT COUNT(*) FROM token_daily_stats",
25171 fparams![],
25172 |row| row.get_typed(0),
25173 )
25174 .unwrap();
25175 let usage_daily_rows: i64 = conn
25176 .query_row_map("SELECT COUNT(*) FROM usage_daily", fparams![], |row| {
25177 row.get_typed(0)
25178 })
25179 .unwrap();
25180 let model_daily_rows: i64 = conn
25181 .query_row_map(
25182 "SELECT COUNT(*) FROM usage_models_daily",
25183 fparams![],
25184 |row| row.get_typed(0),
25185 )
25186 .unwrap();
25187
25188 assert!(daily_stats_rows > 0, "daily_stats should be populated");
25189 assert!(
25190 token_daily_rows > 0,
25191 "token_daily_stats should be populated"
25192 );
25193 assert!(usage_daily_rows > 0, "usage_daily should be populated");
25194 assert!(
25195 model_daily_rows > 0,
25196 "usage_models_daily should be populated"
25197 );
25198 }
25199
25200 #[test]
25205 fn connection_manager_creates_readers() {
25206 let dir = TempDir::new().unwrap();
25207 let db_path = dir.path().join("cm.db");
25208
25209 let fs = FrankenStorage::open(&db_path).unwrap();
25211 drop(fs);
25212
25213 let config = ConnectionManagerConfig {
25214 reader_count: 3,
25215 max_writers: 2,
25216 };
25217 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
25218 assert_eq!(mgr.reader_count(), 3);
25219 assert_eq!(mgr.max_writers(), 2);
25220 }
25221
25222 #[test]
25223 fn connection_manager_clamps_zero_writer_limit_to_prevent_deadlock() {
25224 let dir = TempDir::new().unwrap();
25225 let db_path = dir.path().join("cm.db");
25226
25227 let fs = FrankenStorage::open(&db_path).unwrap();
25228 drop(fs);
25229
25230 let mgr = std::sync::Arc::new(
25231 FrankenConnectionManager::new(
25232 &db_path,
25233 ConnectionManagerConfig {
25234 reader_count: 0,
25235 max_writers: 0,
25236 },
25237 )
25238 .unwrap(),
25239 );
25240 assert_eq!(mgr.reader_count(), 1);
25241 assert_eq!(mgr.max_writers(), 1);
25242
25243 let (tx, rx) = std::sync::mpsc::channel();
25244 let mgr_for_thread = std::sync::Arc::clone(&mgr);
25245 std::thread::spawn(move || {
25246 let result = mgr_for_thread.writer().map(|mut guard| {
25247 guard.mark_committed();
25248 });
25249 tx.send(result.is_ok()).expect("writer result send");
25250 });
25251
25252 assert!(
25253 rx.recv_timeout(Duration::from_secs(10)).unwrap(),
25254 "writer acquisition should not block forever when configured with zero writer slots"
25255 );
25256 }
25257
25258 #[test]
25259 fn connection_manager_reader_round_robin() {
25260 let dir = TempDir::new().unwrap();
25261 let db_path = dir.path().join("cm.db");
25262
25263 let fs = FrankenStorage::open(&db_path).unwrap();
25264 drop(fs);
25265
25266 let config = ConnectionManagerConfig {
25267 reader_count: 2,
25268 max_writers: 1,
25269 };
25270 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
25271
25272 let idx_before = mgr.reader_idx.load(std::sync::atomic::Ordering::Relaxed);
25274 let _r1 = mgr.reader();
25275 let idx_after = mgr.reader_idx.load(std::sync::atomic::Ordering::Relaxed);
25276 assert_eq!(idx_after, idx_before + 1, "reader index should advance");
25277 }
25278
25279 #[test]
25280 fn connection_manager_writer_reads_and_writes() {
25281 use frankensqlite::compat::RowExt;
25282
25283 let dir = TempDir::new().unwrap();
25284 let db_path = dir.path().join("cm.db");
25285
25286 let fs = FrankenStorage::open(&db_path).unwrap();
25287 drop(fs);
25288
25289 let mgr = FrankenConnectionManager::new(&db_path, Default::default()).unwrap();
25290
25291 {
25293 let mut guard = mgr.writer().unwrap();
25294 guard
25295 .storage()
25296 .raw()
25297 .execute("CREATE TABLE IF NOT EXISTS cm_test (id INTEGER PRIMARY KEY, val TEXT)")
25298 .unwrap();
25299 guard
25300 .storage()
25301 .raw()
25302 .execute("INSERT INTO cm_test (val) VALUES ('hello')")
25303 .unwrap();
25304 guard.mark_committed();
25305 }
25306
25307 let reader_guard = mgr.reader();
25309 let rows = reader_guard.query("SELECT val FROM cm_test").unwrap();
25310 assert_eq!(rows.len(), 1);
25311 assert_eq!(rows[0].get_typed::<String>(0).unwrap(), "hello");
25312 }
25313
25314 #[test]
25315 fn connection_manager_writer_guard_drops_releases_slot() {
25316 let dir = TempDir::new().unwrap();
25317 let db_path = dir.path().join("cm.db");
25318
25319 let fs = FrankenStorage::open(&db_path).unwrap();
25320 drop(fs);
25321
25322 let config = ConnectionManagerConfig {
25323 reader_count: 1,
25324 max_writers: 1,
25325 };
25326 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
25327
25328 {
25330 let mut guard = mgr.writer().unwrap();
25331 guard.mark_committed();
25332 }
25333
25334 let mut guard2 = mgr.writer().unwrap();
25336 guard2.mark_committed();
25337 }
25338
25339 #[test]
25340 fn connection_manager_concurrent_writer_works() {
25341 use frankensqlite::compat::RowExt;
25342
25343 let dir = TempDir::new().unwrap();
25344 let db_path = dir.path().join("cm.db");
25345
25346 let fs = FrankenStorage::open(&db_path).unwrap();
25347 drop(fs);
25348
25349 let config = ConnectionManagerConfig {
25350 reader_count: 1,
25351 max_writers: 2,
25352 };
25353 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
25354
25355 {
25356 let mut guard = mgr.concurrent_writer().unwrap();
25357 guard
25358 .storage()
25359 .raw()
25360 .execute("CREATE TABLE IF NOT EXISTS cm_conc (id INTEGER PRIMARY KEY, val TEXT)")
25361 .unwrap();
25362 guard
25363 .storage()
25364 .raw()
25365 .execute("INSERT INTO cm_conc (val) VALUES ('concurrent')")
25366 .unwrap();
25367 guard.mark_committed();
25368 }
25369
25370 let reader_guard = mgr.reader();
25371 let rows = reader_guard.query("SELECT val FROM cm_conc").unwrap();
25372 assert_eq!(rows.len(), 1);
25373 assert_eq!(rows[0].get_typed::<String>(0).unwrap(), "concurrent");
25374 }
25375
25376 #[test]
25377 fn connection_manager_default_config() {
25378 let config = ConnectionManagerConfig::default();
25379 assert_eq!(config.reader_count, 4);
25380 assert!(config.max_writers > 0);
25381 }
25382
25383 #[test]
25384 fn purge_agent_archive_data_removes_only_target_agent_and_rebuilds_derived_tables() {
25385 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
25386 use std::path::PathBuf;
25387
25388 fn seed_conversation(storage: &FrankenStorage, agent_slug: &str, marker: &str) {
25389 let agent = Agent {
25390 id: None,
25391 slug: agent_slug.into(),
25392 name: agent_slug.into(),
25393 version: None,
25394 kind: AgentKind::Cli,
25395 };
25396 let agent_id = storage.ensure_agent(&agent).unwrap();
25397 let conversation = Conversation {
25398 id: None,
25399 agent_slug: agent_slug.into(),
25400 workspace: Some(PathBuf::from("/tmp/workspace")),
25401 external_id: Some(format!("{agent_slug}-{marker}")),
25402 title: Some(format!("{agent_slug} {marker}")),
25403 source_path: PathBuf::from(format!("/tmp/{agent_slug}-{marker}.jsonl")),
25404 started_at: Some(1_700_000_000_000),
25405 ended_at: Some(1_700_000_000_100),
25406 approx_tokens: None,
25407 metadata_json: serde_json::Value::Null,
25408 messages: vec![
25409 Message {
25410 id: None,
25411 idx: 0,
25412 role: MessageRole::User,
25413 author: Some("user".into()),
25414 created_at: Some(1_700_000_000_010),
25415 content: format!("{agent_slug} {marker} user"),
25416 extra_json: serde_json::Value::Null,
25417 snippets: Vec::new(),
25418 },
25419 Message {
25420 id: None,
25421 idx: 1,
25422 role: MessageRole::Agent,
25423 author: Some("assistant".into()),
25424 created_at: Some(1_700_000_000_020),
25425 content: format!("{agent_slug} {marker} assistant"),
25426 extra_json: serde_json::Value::Null,
25427 snippets: Vec::new(),
25428 },
25429 ],
25430 source_id: LOCAL_SOURCE_ID.into(),
25431 origin_host: None,
25432 };
25433 storage
25434 .insert_conversation_tree(agent_id, None, &conversation)
25435 .unwrap();
25436 }
25437
25438 let dir = TempDir::new().unwrap();
25439 let db_path = dir.path().join("agent_search.db");
25440 let storage = FrankenStorage::open(&db_path).unwrap();
25441
25442 seed_conversation(&storage, "openclaw", "purge-target");
25443 seed_conversation(&storage, "codex", "keep-target");
25444
25445 let purge = storage.purge_agent_archive_data("openclaw").unwrap();
25446 assert_eq!(purge.conversations_deleted, 1);
25447 assert_eq!(purge.messages_deleted, 2);
25448
25449 storage.rebuild_fts().unwrap();
25450 storage.rebuild_analytics().unwrap();
25451 storage.rebuild_daily_stats().unwrap();
25452 storage.rebuild_token_daily_stats().unwrap();
25453
25454 let agents = storage.list_agents().unwrap();
25455 assert_eq!(agents.len(), 1);
25456 assert_eq!(agents[0].slug, "codex");
25457 assert_eq!(storage.total_conversation_count().unwrap(), 1);
25458 assert_eq!(storage.total_message_count().unwrap(), 2);
25459
25460 let fts_rows: i64 = storage
25461 .raw()
25462 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
25463 row.get_typed(0)
25464 })
25465 .unwrap();
25466 assert_eq!(fts_rows, 2);
25467
25468 let total_daily_sessions: i64 = storage
25469 .raw()
25470 .query_row_map(
25471 "SELECT COALESCE(SUM(session_count), 0)
25472 FROM daily_stats
25473 WHERE agent_slug = 'all' AND source_id = 'all'",
25474 fparams![],
25475 |row| row.get_typed(0),
25476 )
25477 .unwrap();
25478 assert_eq!(total_daily_sessions, 1);
25479
25480 let openclaw_token_rows: i64 = storage
25481 .raw()
25482 .query_row_map(
25483 "SELECT COUNT(*) FROM token_daily_stats WHERE agent_slug = 'openclaw'",
25484 fparams![],
25485 |row| row.get_typed(0),
25486 )
25487 .unwrap();
25488 assert_eq!(openclaw_token_rows, 0);
25489 }
25490
25491 #[test]
25498 fn cleanup_orphan_fk_rows_removes_orphans_and_is_noop_on_clean_db() {
25499 let dir = TempDir::new().unwrap();
25500 let db_path = dir.path().join("orphan_fk_self_heal.db");
25501 let storage = FrankenStorage::open(&db_path).unwrap();
25502
25503 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25506
25507 storage
25510 .raw()
25511 .execute_compat(
25512 "INSERT INTO agents(id, slug, name, kind, created_at, updated_at) \
25513 VALUES(1, 'test-agent', 'Test Agent', 'cli', 0, 0)",
25514 fparams![],
25515 )
25516 .unwrap();
25517 storage
25518 .raw()
25519 .execute_compat(
25520 "INSERT INTO conversations(id, agent_id, source_id, source_path, started_at) \
25521 VALUES(1, 1, 'local', '/tmp/real.jsonl', 0)",
25522 fparams![],
25523 )
25524 .unwrap();
25525 storage
25526 .raw()
25527 .execute_compat(
25528 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25529 VALUES(1, 1, 0, 'user', 'real message')",
25530 fparams![],
25531 )
25532 .unwrap();
25533
25534 for (mid, cid, idx) in [(101_i64, 99_999_i64, 0_i64), (102, 0, 0), (103, 0, 1)] {
25538 storage
25539 .raw()
25540 .execute_compat(
25541 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25542 VALUES(?1, ?2, ?3, 'user', 'orphan message')",
25543 fparams![mid, cid, idx],
25544 )
25545 .unwrap();
25546 }
25547
25548 for message_id in [1_i64, 101_i64, 102_i64] {
25553 storage
25554 .raw()
25555 .execute_compat(
25556 "INSERT INTO message_metrics(
25557 message_id, created_at_ms, hour_id, day_id, agent_slug,
25558 role, content_chars, content_tokens_est
25559 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 13, 2)",
25560 fparams![message_id],
25561 )
25562 .unwrap();
25563 storage
25564 .raw()
25565 .execute_compat(
25566 "INSERT INTO token_usage(
25567 message_id, conversation_id, agent_id, timestamp_ms, day_id,
25568 role, content_chars
25569 ) VALUES(?1, 1, 1, 0, 0, 'user', 13)",
25570 fparams![message_id],
25571 )
25572 .unwrap();
25573 }
25574
25575 storage
25579 .raw()
25580 .execute_compat(
25581 "INSERT INTO snippets(message_id, file_path, start_line, end_line, language, snippet_text) \
25582 VALUES(99999, '/tmp/orphan-snippet.rs', 1, 2, 'rust', 'fn main() {}')",
25583 fparams![],
25584 )
25585 .unwrap();
25586
25587 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25588
25589 let messages_before: i64 = storage
25591 .raw()
25592 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25593 row.get_typed(0)
25594 })
25595 .unwrap();
25596 assert_eq!(messages_before, 4); let snippets_before: i64 = storage
25598 .raw()
25599 .query_row_map("SELECT COUNT(*) FROM snippets", fparams![], |row| {
25600 row.get_typed(0)
25601 })
25602 .unwrap();
25603 assert_eq!(snippets_before, 1);
25604 let metrics_before: i64 = storage
25605 .raw()
25606 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25607 row.get_typed(0)
25608 })
25609 .unwrap();
25610 assert_eq!(metrics_before, 3);
25611 let token_usage_before: i64 = storage
25612 .raw()
25613 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
25614 row.get_typed(0)
25615 })
25616 .unwrap();
25617 assert_eq!(token_usage_before, 3);
25618
25619 let report = storage.cleanup_orphan_fk_rows().unwrap();
25621
25622 let messages_after: i64 = storage
25627 .raw()
25628 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25629 row.get_typed(0)
25630 })
25631 .unwrap();
25632 assert_eq!(messages_after, 1, "real message must be preserved");
25633 let snippets_after: i64 = storage
25634 .raw()
25635 .query_row_map("SELECT COUNT(*) FROM snippets", fparams![], |row| {
25636 row.get_typed(0)
25637 })
25638 .unwrap();
25639 assert_eq!(snippets_after, 0);
25640 let metrics_after: i64 = storage
25641 .raw()
25642 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25643 row.get_typed(0)
25644 })
25645 .unwrap();
25646 assert_eq!(metrics_after, 1, "real message metric must be preserved");
25647 let token_usage_after: i64 = storage
25648 .raw()
25649 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
25650 row.get_typed(0)
25651 })
25652 .unwrap();
25653 assert_eq!(token_usage_after, 1, "real token row must be preserved");
25654
25655 assert_eq!(report.total, 4, "report total: {:?}", report);
25656 let messages_count = report
25657 .per_table
25658 .iter()
25659 .find(|(t, _)| *t == "messages")
25660 .map(|(_, c)| *c);
25661 assert_eq!(messages_count, Some(3));
25662 let snippets_count = report
25663 .per_table
25664 .iter()
25665 .find(|(t, _)| *t == "snippets")
25666 .map(|(_, c)| *c);
25667 assert_eq!(snippets_count, Some(1));
25668
25669 let second = storage.cleanup_orphan_fk_rows().unwrap();
25671 assert_eq!(second.total, 0);
25672 assert!(second.per_table.is_empty());
25673 }
25674
25675 #[test]
25676 fn cleanup_orphan_fk_rows_handles_more_than_one_delete_chunk() {
25677 let dir = TempDir::new().unwrap();
25678 let db_path = dir.path().join("orphan_fk_chunked_self_heal.db");
25679 let storage = FrankenStorage::open(&db_path).unwrap();
25680 let orphan_count = ORPHAN_FK_ID_CHUNK_SIZE + 3;
25681
25682 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25683 {
25684 let mut tx = storage.raw().transaction().unwrap();
25685 for idx in 0..orphan_count {
25686 let message_id = 10_000_i64 + i64::try_from(idx).unwrap();
25687 let conversation_id = 20_000_i64 + i64::try_from(idx).unwrap();
25688 tx.execute_compat(
25689 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25690 VALUES(?1, ?2, 0, 'user', 'orphan message')",
25691 fparams![message_id, conversation_id],
25692 )
25693 .unwrap();
25694 tx.execute_compat(
25695 "INSERT INTO message_metrics(
25696 message_id, created_at_ms, hour_id, day_id, agent_slug,
25697 role, content_chars, content_tokens_est
25698 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 14, 2)",
25699 fparams![message_id],
25700 )
25701 .unwrap();
25702 }
25703 tx.commit().unwrap();
25704 }
25705 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25706
25707 let report = storage.cleanup_orphan_fk_rows().unwrap();
25708
25709 assert_eq!(report.total, i64::try_from(orphan_count).unwrap());
25710 let messages_count = report
25711 .per_table
25712 .iter()
25713 .find(|(table, _)| *table == "messages")
25714 .map(|(_, count)| *count);
25715 assert_eq!(messages_count, Some(i64::try_from(orphan_count).unwrap()));
25716 let messages_after: i64 = storage
25717 .raw()
25718 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25719 row.get_typed(0)
25720 })
25721 .unwrap();
25722 assert_eq!(messages_after, 0);
25723 let metrics_after: i64 = storage
25724 .raw()
25725 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25726 row.get_typed(0)
25727 })
25728 .unwrap();
25729 assert_eq!(metrics_after, 0);
25730 }
25731
25732 #[test]
25733 fn cleanup_orphan_fk_rows_pages_direct_child_orphans() {
25734 let dir = TempDir::new().unwrap();
25735 let db_path = dir.path().join("direct_orphan_fk_paged_self_heal.db");
25736 let storage = FrankenStorage::open(&db_path).unwrap();
25737 let orphan_count = (ORPHAN_FK_ID_CHUNK_SIZE * 2) + 5;
25738
25739 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25740 {
25741 let mut tx = storage.raw().transaction().unwrap();
25742 for idx in 0..orphan_count {
25743 let message_id = 50_000_i64 + i64::try_from(idx).unwrap();
25744 tx.execute_compat(
25745 "INSERT INTO message_metrics(
25746 message_id, created_at_ms, hour_id, day_id, agent_slug,
25747 role, content_chars, content_tokens_est
25748 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 21, 3)",
25749 fparams![message_id],
25750 )
25751 .unwrap();
25752 }
25753 tx.commit().unwrap();
25754 }
25755 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25756
25757 let report = storage.cleanup_orphan_fk_rows().unwrap();
25758
25759 assert_eq!(report.total, i64::try_from(orphan_count).unwrap());
25760 let metrics_count = report
25761 .per_table
25762 .iter()
25763 .filter(|(table, _)| *table == "message_metrics")
25764 .map(|(_, count)| *count)
25765 .sum::<i64>();
25766 assert_eq!(metrics_count, i64::try_from(orphan_count).unwrap());
25767 assert_eq!(
25768 report
25769 .per_table
25770 .iter()
25771 .filter(|(table, _)| *table == "message_metrics")
25772 .count(),
25773 1,
25774 "paged cleanup should aggregate report entries by table: {report:?}"
25775 );
25776 let metrics_after: i64 = storage
25777 .raw()
25778 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25779 row.get_typed(0)
25780 })
25781 .unwrap();
25782 assert_eq!(metrics_after, 0);
25783 }
25784}