1use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole, Snippet};
4use crate::sources::provenance::{LOCAL_SOURCE_ID, Source, SourceKind};
5use anyhow::{Context, Result, anyhow, bail};
6use frankensqlite::{
7 Connection as FrankenConnection, Row as FrankenRow, SqliteValue,
8 compat::{
9 ConnectionExt as FrankenConnectionExt, OpenFlags as FrankenOpenFlags,
10 OptionalExtension as FrankenOptionalExtension, ParamValue, RowExt as FrankenRowExt,
11 Transaction as FrankenTransaction, TransactionExt as FrankenTransactionExt,
12 open_with_flags as open_franken_with_flags, param_slice_to_values, params_from_iter,
13 },
14 migrate::MigrationRunner,
15};
16use serde::{Deserialize, Serialize};
17use smallvec::SmallVec;
18use std::borrow::Cow;
19use std::collections::{HashMap, HashSet};
20use std::fs;
21use std::io::{BufRead, BufReader, Write};
22use std::process::{Command, Stdio};
23use std::sync::{
24 Arc,
25 atomic::{AtomicBool, AtomicI8, AtomicI64, AtomicU64, AtomicUsize, Ordering},
26};
27
28macro_rules! fparams {
30 () => {
31 &[] as &[ParamValue]
32 };
33 ($($val:expr),+ $(,)?) => {
34 &[$(ParamValue::from($val)),+] as &[ParamValue]
35 };
36}
37use std::path::{Path, PathBuf};
38use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
39use thiserror::Error;
40use tracing::info;
41
42const DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT: Duration = Duration::from_secs(30);
43const DOCTOR_MUTATION_LOCK_MAX_METADATA_READ: u64 = 64 * 1024;
44
45#[derive(Debug, Error)]
54pub enum LazyDbError {
55 #[error("Database not found at {0}")]
56 NotFound(PathBuf),
57 #[error("Failed to open FrankenSQLite database at {path}: {source}")]
58 FrankenOpenFailed {
59 path: PathBuf,
60 source: frankensqlite::FrankenError,
61 },
62}
63
64pub struct SendFrankenConnection(FrankenConnection, i64, u64);
75
76unsafe impl Send for SendFrankenConnection {}
79
80impl SendFrankenConnection {
81 pub(crate) fn new(conn: FrankenConnection) -> Self {
82 Self(
83 conn,
84 UNSET_INDEX_WRITER_CHECKPOINT_PAGES,
85 UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS,
86 )
87 }
88
89 pub(crate) fn new_with_index_writer_state(
90 conn: FrankenConnection,
91 checkpoint_pages: i64,
92 busy_timeout_ms: u64,
93 ) -> Self {
94 Self(conn, checkpoint_pages, busy_timeout_ms)
95 }
96
97 pub(crate) fn into_parts(self) -> (FrankenConnection, i64, u64) {
98 (self.0, self.1, self.2)
99 }
100}
101
102impl std::ops::Deref for SendFrankenConnection {
103 type Target = FrankenConnection;
104 fn deref(&self) -> &FrankenConnection {
105 &self.0
106 }
107}
108
109pub struct LazyFrankenDb {
115 path: PathBuf,
116 conn: parking_lot::Mutex<Option<SendFrankenConnection>>,
117}
118
119pub struct LazyFrankenDbGuard<'a>(parking_lot::MutexGuard<'a, Option<SendFrankenConnection>>);
121
122impl std::fmt::Debug for LazyFrankenDbGuard<'_> {
123 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124 f.debug_tuple("LazyFrankenDbGuard")
125 .field(&self.0.is_some())
126 .finish()
127 }
128}
129
130impl std::ops::Deref for LazyFrankenDbGuard<'_> {
131 type Target = FrankenConnection;
132 fn deref(&self) -> &FrankenConnection {
133 self.0
134 .as_ref()
135 .expect("LazyFrankenDb connection must be initialized before access")
136 }
137}
138
139impl LazyFrankenDb {
140 pub fn new(path: PathBuf) -> Self {
142 Self {
143 path,
144 conn: parking_lot::Mutex::new(None),
145 }
146 }
147
148 pub fn from_overrides(data_dir: &Option<PathBuf>, db_override: Option<PathBuf>) -> Self {
152 let data_dir = data_dir.clone().unwrap_or_else(crate::default_data_dir);
153 let path = db_override.unwrap_or_else(|| data_dir.join("agent_search.db"));
154 Self::new(path)
155 }
156
157 pub fn get(&self, reason: &str) -> std::result::Result<LazyFrankenDbGuard<'_>, LazyDbError> {
162 let mut guard = self.conn.lock();
163 if guard.is_none() {
164 if !self.path.exists() {
165 return Err(LazyDbError::NotFound(self.path.clone()));
166 }
167 let start = Instant::now();
168 let _doctor_guard = acquire_doctor_mutation_db_open_guard(
169 &self.path,
170 DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT,
171 )
172 .map_err(|err| LazyDbError::FrankenOpenFailed {
173 path: self.path.clone(),
174 source: frankensqlite::FrankenError::Internal(err.to_string()),
175 })?;
176 let conn =
177 FrankenConnection::open(self.path.to_string_lossy().into_owned()).map_err(|e| {
178 LazyDbError::FrankenOpenFailed {
179 path: self.path.clone(),
180 source: e,
181 }
182 })?;
183 let elapsed_ms = start.elapsed().as_millis();
184 info!(
185 path = %self.path.display(),
186 elapsed_ms = elapsed_ms,
187 reason = reason,
188 "lazily opened FrankenSQLite database"
189 );
190 *guard = Some(SendFrankenConnection::new(conn));
191 }
192 Ok(LazyFrankenDbGuard(guard))
193 }
194
195 pub fn get_with_timeout(
201 &self,
202 reason: &str,
203 timeout: Duration,
204 ) -> std::result::Result<LazyFrankenDbGuard<'_>, LazyDbError> {
205 let mut guard = self.conn.lock();
206 if guard.is_none() {
207 if !self.path.exists() {
208 return Err(LazyDbError::NotFound(self.path.clone()));
209 }
210 let start = Instant::now();
211 let path_owned = self.path.to_string_lossy().into_owned();
212 let path_for_guard = self.path.clone();
213 let (tx, rx) = std::sync::mpsc::channel();
214 std::thread::spawn(move || {
215 let _doctor_guard =
216 match acquire_doctor_mutation_db_open_guard(&path_for_guard, timeout) {
217 Ok(guard) => guard,
218 Err(err) => {
219 let _ = tx
220 .send(Err(frankensqlite::FrankenError::Internal(err.to_string())));
221 return;
222 }
223 };
224 let _ =
225 tx.send(FrankenConnection::open(path_owned).map(SendFrankenConnection::new));
226 });
227 let conn = rx
228 .recv_timeout(timeout)
229 .map_err(|_| LazyDbError::FrankenOpenFailed {
230 path: self.path.clone(),
231 source: frankensqlite::FrankenError::Internal(format!(
232 "database open timed out after {}s (possible corruption or lock contention)",
233 timeout.as_secs()
234 )),
235 })?
236 .map_err(|e| LazyDbError::FrankenOpenFailed {
237 path: self.path.clone(),
238 source: e,
239 })?;
240 let elapsed_ms = start.elapsed().as_millis();
241 info!(
242 path = %self.path.display(),
243 elapsed_ms = elapsed_ms,
244 reason = reason,
245 "lazily opened FrankenSQLite database (with timeout)"
246 );
247 *guard = Some(conn);
248 }
249 Ok(LazyFrankenDbGuard(guard))
250 }
251
252 pub fn path(&self) -> &Path {
254 &self.path
255 }
256
257 pub fn is_open(&self) -> bool {
259 self.conn.lock().is_some()
260 }
261}
262
263static FRANKEN_RETRY_JITTER_STATE: AtomicU64 = AtomicU64::new(0x9e37_79b9_7f4a_7c15);
264static DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH: AtomicUsize = AtomicUsize::new(0);
265static MESSAGE_LOOKUP_TRACE_ENABLED: AtomicBool = AtomicBool::new(false);
266static MESSAGE_LOOKUP_EXACT_IDX_PROBES: AtomicU64 = AtomicU64::new(0);
267static MESSAGE_LOOKUP_BOUNDED_QUERIES: AtomicU64 = AtomicU64::new(0);
268static MESSAGE_LOOKUP_FULL_SCAN_QUERIES: AtomicU64 = AtomicU64::new(0);
269static MESSAGE_LOOKUP_ROWS_MATERIALIZED: AtomicU64 = AtomicU64::new(0);
270static DEFAULT_DEFER_ANALYTICS_UPDATES: AtomicBool = AtomicBool::new(false);
271
272#[derive(Debug, Clone, Copy, Default, Serialize)]
273pub(crate) struct MessageLookupTraceCounters {
274 pub exact_idx_probes: u64,
275 pub bounded_lookup_queries: u64,
276 pub full_scan_queries: u64,
277 pub rows_materialized: u64,
278}
279
280impl MessageLookupTraceCounters {
281 pub(crate) fn saturating_sub(self, before: Self) -> Self {
282 Self {
283 exact_idx_probes: self
284 .exact_idx_probes
285 .saturating_sub(before.exact_idx_probes),
286 bounded_lookup_queries: self
287 .bounded_lookup_queries
288 .saturating_sub(before.bounded_lookup_queries),
289 full_scan_queries: self
290 .full_scan_queries
291 .saturating_sub(before.full_scan_queries),
292 rows_materialized: self
293 .rows_materialized
294 .saturating_sub(before.rows_materialized),
295 }
296 }
297
298 pub(crate) fn lookups_against_global(self) -> u64 {
299 self.exact_idx_probes.saturating_add(self.rows_materialized)
300 }
301}
302
303pub(crate) fn set_message_lookup_trace_enabled(enabled: bool) -> bool {
304 MESSAGE_LOOKUP_TRACE_ENABLED.swap(enabled, Ordering::Relaxed)
305}
306
307pub(crate) fn message_lookup_trace_snapshot() -> MessageLookupTraceCounters {
308 MessageLookupTraceCounters {
309 exact_idx_probes: MESSAGE_LOOKUP_EXACT_IDX_PROBES.load(Ordering::Relaxed),
310 bounded_lookup_queries: MESSAGE_LOOKUP_BOUNDED_QUERIES.load(Ordering::Relaxed),
311 full_scan_queries: MESSAGE_LOOKUP_FULL_SCAN_QUERIES.load(Ordering::Relaxed),
312 rows_materialized: MESSAGE_LOOKUP_ROWS_MATERIALIZED.load(Ordering::Relaxed),
313 }
314}
315
316pub(crate) struct DefaultDeferAnalyticsUpdatesGuard {
317 previous: bool,
318}
319
320impl Drop for DefaultDeferAnalyticsUpdatesGuard {
321 fn drop(&mut self) {
322 DEFAULT_DEFER_ANALYTICS_UPDATES.store(self.previous, Ordering::Relaxed);
323 }
324}
325
326pub(crate) fn default_defer_analytics_updates_guard(
327 enabled: bool,
328) -> DefaultDeferAnalyticsUpdatesGuard {
329 let previous = DEFAULT_DEFER_ANALYTICS_UPDATES.swap(enabled, Ordering::Relaxed);
330 DefaultDeferAnalyticsUpdatesGuard { previous }
331}
332
333fn record_message_lookup_bounded_queries(query_count: u64, rows: usize) {
334 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
335 MESSAGE_LOOKUP_BOUNDED_QUERIES.fetch_add(query_count, Ordering::Relaxed);
336 MESSAGE_LOOKUP_ROWS_MATERIALIZED.fetch_add(rows as u64, Ordering::Relaxed);
337 }
338}
339
340fn record_message_lookup_full_scan_query(rows: usize) {
341 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
342 MESSAGE_LOOKUP_FULL_SCAN_QUERIES.fetch_add(1, Ordering::Relaxed);
343 MESSAGE_LOOKUP_ROWS_MATERIALIZED.fetch_add(rows as u64, Ordering::Relaxed);
344 }
345}
346
347pub(crate) struct DoctorMutationDbOpenBypassGuard;
348
349impl Drop for DoctorMutationDbOpenBypassGuard {
350 fn drop(&mut self) {
351 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.fetch_sub(1, Ordering::SeqCst);
352 }
353}
354
355pub(crate) fn enter_doctor_mutation_db_open_bypass() -> DoctorMutationDbOpenBypassGuard {
356 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.fetch_add(1, Ordering::SeqCst);
357 DoctorMutationDbOpenBypassGuard
358}
359
360fn doctor_mutation_db_open_bypass_active() -> bool {
361 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.load(Ordering::SeqCst) > 0
362}
363
364fn next_franken_retry_jitter_ms(max_inclusive: u64) -> u64 {
365 let mut value = FRANKEN_RETRY_JITTER_STATE.fetch_add(0x9e37_79b9_7f4a_7c15, Ordering::Relaxed);
366 value ^= value >> 30;
367 value = value.wrapping_mul(0xbf58_476d_1ce4_e5b9);
368 value ^= value >> 27;
369 value = value.wrapping_mul(0x94d0_49bb_1331_11eb);
370 value ^= value >> 31;
371 value % max_inclusive.saturating_add(1)
372}
373
374pub(crate) fn sleep_with_franken_retry_backoff(
377 backoff: &mut Duration,
378 remaining: Duration,
379 max_backoff: Duration,
380) {
381 let capped = (*backoff).min(remaining);
382 let extra_budget = remaining.saturating_sub(capped).min(capped);
383 let extra_ms = extra_budget.as_millis().min(u128::from(u64::MAX)) as u64;
384 let sleep_for = if extra_ms == 0 {
385 capped
386 } else {
387 capped
388 .saturating_add(Duration::from_millis(next_franken_retry_jitter_ms(
389 extra_ms,
390 )))
391 .min(remaining)
392 };
393 std::thread::sleep(sleep_for);
394 *backoff = backoff.saturating_mul(2).min(max_backoff);
395}
396
397struct DoctorMutationDbOpenGuard(Option<fs::File>);
398
399impl Drop for DoctorMutationDbOpenGuard {
400 fn drop(&mut self) {
401 if let Some(file) = self.0.as_ref() {
402 let _ = fs2::FileExt::unlock(file);
403 }
404 }
405}
406
407fn doctor_mutation_lock_path_for_db_open(db_path: &Path) -> Option<PathBuf> {
408 if db_path.file_name().and_then(|name| name.to_str()) != Some("agent_search.db") {
409 return None;
410 }
411
412 Some(
413 db_path
414 .parent()?
415 .join("doctor")
416 .join("locks")
417 .join("doctor-repair.lock"),
418 )
419}
420
421fn doctor_lock_metadata_pid_is_current_process(raw: &str) -> bool {
422 raw.lines().any(|line| {
423 let Some((key, value)) = line.split_once('=') else {
424 return false;
425 };
426 key.trim() == "pid"
427 && value
428 .trim()
429 .parse::<u32>()
430 .is_ok_and(|pid| pid == std::process::id())
431 })
432}
433
434fn doctor_lock_file_pid_is_current_process(file: &fs::File) -> bool {
435 use std::io::Read as _;
436
437 let Ok(mut file) = file.try_clone() else {
438 return false;
439 };
440 let mut raw = String::new();
441 let _ = std::io::Read::take(&mut file, DOCTOR_MUTATION_LOCK_MAX_METADATA_READ)
442 .read_to_string(&mut raw);
443 doctor_lock_metadata_pid_is_current_process(&raw)
444}
445
446fn doctor_mutation_lock_error_is_active(err: &std::io::Error) -> bool {
447 if err.kind() == std::io::ErrorKind::WouldBlock {
448 return true;
449 }
450
451 #[cfg(windows)]
452 {
453 err.raw_os_error() == Some(33)
454 }
455 #[cfg(not(windows))]
456 {
457 false
458 }
459}
460
461fn acquire_doctor_mutation_db_open_guard(
462 db_path: &Path,
463 timeout: Duration,
464) -> Result<DoctorMutationDbOpenGuard> {
465 let Some(lock_path) = doctor_mutation_lock_path_for_db_open(db_path) else {
466 return Ok(DoctorMutationDbOpenGuard(None));
467 };
468 if doctor_mutation_db_open_bypass_active() {
469 return Ok(DoctorMutationDbOpenGuard(None));
470 }
471
472 if let Some(parent) = lock_path.parent() {
473 fs::create_dir_all(parent).with_context(|| {
474 format!(
475 "creating doctor mutation lock directory {} before opening {}",
476 parent.display(),
477 db_path.display()
478 )
479 })?;
480 }
481
482 let deadline = Instant::now() + timeout;
483 let mut backoff = Duration::from_millis(4);
484 loop {
485 let file = fs::OpenOptions::new()
486 .create(true)
487 .truncate(false)
488 .read(true)
489 .write(true)
490 .open(&lock_path)
491 .with_context(|| {
492 format!(
493 "opening doctor mutation lock {} before opening {}",
494 lock_path.display(),
495 db_path.display()
496 )
497 })?;
498
499 if doctor_lock_file_pid_is_current_process(&file) {
500 return Ok(DoctorMutationDbOpenGuard(None));
501 }
502
503 match fs2::FileExt::try_lock_shared(&file) {
504 Ok(()) => return Ok(DoctorMutationDbOpenGuard(Some(file))),
505 Err(err) if doctor_mutation_lock_error_is_active(&err) => {
506 let now = Instant::now();
507 if now >= deadline {
508 return Err(anyhow!(
509 "doctor mutation lock {} is active while opening {}; refusing to open during repair after waiting {}ms",
510 lock_path.display(),
511 db_path.display(),
512 timeout.as_millis()
513 ));
514 }
515 let remaining = deadline.saturating_duration_since(now);
516 sleep_with_franken_retry_backoff(
517 &mut backoff,
518 remaining,
519 Duration::from_millis(128),
520 );
521 }
522 Err(err) => {
523 return Err(anyhow!(
524 "failed to acquire shared doctor mutation lock {} before opening {}: {}",
525 lock_path.display(),
526 db_path.display(),
527 err
528 ));
529 }
530 }
531 }
532}
533
534pub(crate) fn open_franken_storage_with_timeout(
535 path: &Path,
536 timeout: Duration,
537) -> Result<FrankenStorage> {
538 if !path.exists() {
539 return Err(anyhow!("Database not found at {}", path.display()));
540 }
541
542 let deadline = Instant::now() + timeout;
543 let mut backoff = Duration::from_millis(4);
544 loop {
545 match FrankenStorage::open(path) {
546 Ok(storage) => return Ok(storage),
547 Err(err) if retryable_franken_anyhow(&err) => {
548 let now = Instant::now();
549 if now >= deadline {
550 return Err(err);
551 }
552 let remaining = deadline.saturating_duration_since(now);
553 sleep_with_franken_retry_backoff(
554 &mut backoff,
555 remaining,
556 Duration::from_millis(128),
557 );
558 }
559 Err(err) => return Err(err),
560 }
561 }
562}
563
564pub(crate) fn open_current_schema_storage_with_timeout(
565 path: &Path,
566 timeout: Duration,
567) -> Result<Option<FrankenStorage>> {
568 if !path.exists() {
569 return Ok(None);
570 }
571
572 let mut storage = FrankenStorage::new(
573 open_franken_raw_connection_with_timeout(path, timeout)?,
574 path.to_path_buf(),
575 );
576 storage.apply_open_stage_busy_timeout();
577
578 let version = storage
579 .raw()
580 .query("SELECT value FROM meta WHERE key = 'schema_version';")
581 .ok()
582 .and_then(|rows| rows.first().cloned())
583 .and_then(|row| row.get_typed::<String>(0).ok())
584 .and_then(|raw| raw.parse::<i64>().ok());
585
586 if version != Some(CURRENT_SCHEMA_VERSION) {
587 if let Err(close_err) = storage.close_without_checkpoint_in_place() {
588 tracing::debug!(
589 error = %close_err,
590 db_path = %path.display(),
591 "open_current_schema_storage_with_timeout: close_without_checkpoint_in_place failed; falling back to best-effort close"
592 );
593 storage.close_best_effort_in_place();
594 }
595 return Ok(None);
596 }
597
598 transition_from_meta_version(&storage.conn)?;
599 storage.repair_missing_current_schema_objects()?;
600 storage.apply_config()?;
601 Ok(Some(storage))
602}
603
604pub(crate) fn open_franken_readonly_storage_with_timeout(
605 path: &Path,
606 timeout: Duration,
607) -> Result<FrankenStorage> {
608 if !path.exists() {
609 return Err(anyhow!("Database not found at {}", path.display()));
610 }
611
612 let deadline = Instant::now() + timeout;
613 let mut backoff = Duration::from_millis(4);
614 loop {
615 match FrankenStorage::open_readonly(path) {
616 Ok(storage) => return Ok(storage),
617 Err(err) if retryable_franken_anyhow(&err) => {
618 let now = Instant::now();
619 if now >= deadline {
620 return Err(err);
621 }
622 let remaining = deadline.saturating_duration_since(now);
623 sleep_with_franken_retry_backoff(
624 &mut backoff,
625 remaining,
626 Duration::from_millis(128),
627 );
628 }
629 Err(err) => return Err(err),
630 }
631 }
632}
633
634pub(crate) fn open_franken_raw_connection_with_timeout(
635 path: &Path,
636 timeout: Duration,
637) -> Result<FrankenConnection> {
638 if !path.exists() {
639 return Err(anyhow!("Database not found at {}", path.display()));
640 }
641
642 let path_str = path.to_string_lossy().to_string();
643 let deadline = Instant::now() + timeout;
644 let mut backoff = Duration::from_millis(4);
645 loop {
646 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
647 match FrankenConnection::open(&path_str)
648 .with_context(|| format!("opening raw frankensqlite db at {}", path.display()))
649 {
650 Ok(conn) => return Ok(conn),
651 Err(err) if retryable_franken_anyhow(&err) => {
652 let now = Instant::now();
653 if now >= deadline {
654 return Err(err);
655 }
656 let remaining = deadline.saturating_duration_since(now);
657 sleep_with_franken_retry_backoff(
658 &mut backoff,
659 remaining,
660 Duration::from_millis(128),
661 );
662 }
663 Err(err) => return Err(err),
664 }
665 }
666}
667
668pub(crate) fn open_franken_raw_readonly_connection_with_timeout(
669 path: &Path,
670 timeout: Duration,
671) -> Result<FrankenConnection> {
672 if !path.exists() {
673 return Err(anyhow!("Database not found at {}", path.display()));
674 }
675
676 let path_str = path.to_string_lossy().to_string();
677 let deadline = Instant::now() + timeout;
678 let mut backoff = Duration::from_millis(4);
679 loop {
680 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
681 match open_franken_with_flags(&path_str, FrankenOpenFlags::SQLITE_OPEN_READ_ONLY)
682 .with_context(|| {
683 format!(
684 "opening raw frankensqlite db readonly at {}",
685 path.display()
686 )
687 }) {
688 Ok(conn) => return Ok(conn),
689 Err(err) if retryable_franken_anyhow(&err) => {
690 let now = Instant::now();
691 if now >= deadline {
692 return Err(err);
693 }
694 let remaining = deadline.saturating_duration_since(now);
695 sleep_with_franken_retry_backoff(
696 &mut backoff,
697 remaining,
698 Duration::from_millis(128),
699 );
700 }
701 Err(err) => return Err(err),
702 }
703 }
704}
705
706pub(crate) fn retryable_franken_error(err: &frankensqlite::FrankenError) -> bool {
707 matches!(
708 err,
709 frankensqlite::FrankenError::Busy
710 | frankensqlite::FrankenError::BusyRecovery
711 | frankensqlite::FrankenError::BusySnapshot { .. }
712 | frankensqlite::FrankenError::DatabaseLocked { .. }
713 | frankensqlite::FrankenError::LockFailed { .. }
714 | frankensqlite::FrankenError::WriteConflict { .. }
715 | frankensqlite::FrankenError::SerializationFailure { .. }
716 ) || retryable_storage_error_message(&err.to_string())
717}
718
719pub(crate) fn retryable_storage_error_message(message: &str) -> bool {
720 let lower = message.to_ascii_lowercase();
721 lower.contains("busy")
722 || lower.contains("locked")
723 || lower.contains("locking")
724 || lower.contains("contention")
725 || lower.contains("temporarily unavailable")
726 || lower.contains("would block")
727}
728
729pub(crate) fn retryable_franken_anyhow(err: &anyhow::Error) -> bool {
730 err.chain().any(|cause| {
731 cause
732 .downcast_ref::<frankensqlite::FrankenError>()
733 .is_some_and(retryable_franken_error)
734 || retryable_storage_error_message(&cause.to_string())
735 })
736}
737
738impl Drop for LazyFrankenDb {
739 fn drop(&mut self) {
740 let Some(mut conn) = self.conn.get_mut().take() else {
741 return;
742 };
743 conn.0.close_best_effort_in_place();
744 }
745}
746
747#[derive(Debug, Clone)]
756pub struct ConnectionManagerConfig {
757 pub reader_count: usize,
759 pub max_writers: usize,
761}
762
763impl Default for ConnectionManagerConfig {
764 fn default() -> Self {
765 let cpus = std::thread::available_parallelism()
766 .map(|n| n.get())
767 .unwrap_or(4);
768 Self {
769 reader_count: 4,
770 max_writers: cpus,
771 }
772 }
773}
774
775pub struct FrankenConnectionManager {
785 db_path: PathBuf,
786 readers: Vec<parking_lot::Mutex<SendFrankenConnection>>,
787 reader_idx: std::sync::atomic::AtomicUsize,
788 writer_tokens: (
791 crossbeam_channel::Sender<()>,
792 crossbeam_channel::Receiver<()>,
793 ),
794 config: ConnectionManagerConfig,
795}
796
797unsafe impl Send for FrankenConnectionManager {}
802unsafe impl Sync for FrankenConnectionManager {}
803
804impl FrankenConnectionManager {
805 pub fn new(db_path: impl Into<PathBuf>, config: ConnectionManagerConfig) -> Result<Self> {
810 let db_path = db_path.into();
811 let path_str = db_path.to_string_lossy().to_string();
812
813 let reader_count = config.reader_count.max(1);
814 let mut readers = Vec::with_capacity(reader_count);
815 for _ in 0..reader_count {
816 let conn = FrankenConnection::open(&path_str)
817 .with_context(|| format!("opening reader connection at {}", db_path.display()))?;
818 let _ = conn.execute("PRAGMA busy_timeout = 5000;"); let _ = conn.execute("PRAGMA cache_size = -16384;"); readers.push(parking_lot::Mutex::new(SendFrankenConnection::new(conn)));
822 }
823
824 let max_writers = config.max_writers.max(1);
825
826 let (tx, rx) = crossbeam_channel::bounded(max_writers);
830 for _ in 0..max_writers {
831 tx.send(())
832 .map_err(|_| anyhow!("writer token channel closed during initialization"))?;
833 }
834
835 Ok(Self {
836 db_path,
837 readers,
838 reader_idx: std::sync::atomic::AtomicUsize::new(0),
839 writer_tokens: (tx, rx),
840 config: ConnectionManagerConfig {
841 reader_count,
842 max_writers,
843 },
844 })
845 }
846
847 pub fn reader(&self) -> parking_lot::MutexGuard<'_, SendFrankenConnection> {
852 let idx = self
853 .reader_idx
854 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
855 self.readers[idx % self.readers.len()].lock()
856 }
857
858 pub fn writer(&self) -> Result<WriterGuard<'_>> {
864 self.writer_tokens
865 .1
866 .recv()
867 .map_err(|_| anyhow!("writer token channel closed"))?;
868 let path_str = self.db_path.to_string_lossy().to_string();
869 let conn = match FrankenConnection::open(&path_str) {
870 Ok(c) => c,
871 Err(e) => {
872 let _ = self.writer_tokens.0.send(());
873 return Err(anyhow::Error::from(e).context(format!(
874 "opening writer connection at {}",
875 self.db_path.display()
876 )));
877 }
878 };
879 let storage = FrankenStorage::new(conn, self.db_path.clone());
880 if let Err(e) = storage.apply_config() {
881 let _ = self.writer_tokens.0.send(());
882 return Err(e);
883 }
884 Ok(WriterGuard {
885 storage,
886 mgr: self,
887 committed: false,
888 })
889 }
890
891 pub fn concurrent_writer(&self) -> Result<WriterGuard<'_>> {
896 self.writer_tokens
897 .1
898 .recv()
899 .map_err(|_| anyhow!("writer token channel closed"))?;
900 let path_str = self.db_path.to_string_lossy().to_string();
901 let conn = match FrankenConnection::open(&path_str) {
902 Ok(c) => c,
903 Err(e) => {
904 let _ = self.writer_tokens.0.send(());
905 return Err(anyhow::Error::from(e).context(format!(
906 "opening concurrent writer at {}",
907 self.db_path.display()
908 )));
909 }
910 };
911 let storage = FrankenStorage::new(conn, self.db_path.clone());
912 if let Err(e) = storage.apply_config() {
913 let _ = self.writer_tokens.0.send(());
914 return Err(e);
915 }
916 let _ = storage.raw().execute("PRAGMA cache_size = -4096;");
918 Ok(WriterGuard {
919 storage,
920 mgr: self,
921 committed: false,
922 })
923 }
924
925 pub fn db_path(&self) -> &Path {
927 &self.db_path
928 }
929
930 pub fn reader_count(&self) -> usize {
932 self.readers.len()
933 }
934
935 pub fn max_writers(&self) -> usize {
937 self.config.max_writers
938 }
939}
940
941impl Drop for FrankenConnectionManager {
942 fn drop(&mut self) {
943 for reader in &mut self.readers {
944 reader.get_mut().0.close_best_effort_in_place();
945 }
946 }
947}
948
949pub struct WriterGuard<'a> {
954 storage: FrankenStorage,
955 mgr: &'a FrankenConnectionManager,
956 committed: bool,
957}
958
959impl<'a> WriterGuard<'a> {
960 pub fn storage(&self) -> &FrankenStorage {
962 &self.storage
963 }
964
965 pub fn mark_committed(&mut self) {
970 self.committed = true;
971 }
972}
973
974impl Drop for WriterGuard<'_> {
975 fn drop(&mut self) {
976 if !self.committed {
977 let _ = self.storage.raw().execute("ROLLBACK;");
979 }
980 self.storage.close_best_effort_in_place();
981 let _ = self.mgr.writer_tokens.0.send(());
983 }
984}
985
986fn serialize_json_to_msgpack(value: &serde_json::Value) -> Option<Vec<u8>> {
995 if value.is_null() || value.as_object().is_some_and(|o| o.is_empty()) {
996 return None;
997 }
998 rmp_serde::to_vec(value).ok()
999}
1000
1001fn deserialize_msgpack_to_json(bytes: &[u8]) -> serde_json::Value {
1004 if bytes.is_empty() {
1005 return serde_json::Value::Object(serde_json::Map::new());
1006 }
1007 rmp_serde::from_slice(bytes).unwrap_or_else(|e| {
1008 tracing::debug!(
1009 error = %e,
1010 bytes_len = bytes.len(),
1011 "Failed to deserialize metadata - returning empty object"
1012 );
1013 serde_json::Value::Object(serde_json::Map::new())
1014 })
1015}
1016
1017fn franken_read_metadata_compat(
1019 row: &FrankenRow,
1020 json_idx: usize,
1021 bin_idx: usize,
1022) -> serde_json::Value {
1023 if let Ok(Some(bytes)) = row.get_typed::<Option<Vec<u8>>>(bin_idx)
1025 && !bytes.is_empty()
1026 {
1027 return deserialize_msgpack_to_json(&bytes);
1028 }
1029
1030 if let Ok(Some(json_str)) = row.get_typed::<Option<String>>(json_idx) {
1032 return serde_json::from_str(&json_str)
1033 .unwrap_or_else(|_| serde_json::Value::Object(serde_json::Map::new()));
1034 }
1035
1036 serde_json::Value::Object(serde_json::Map::new())
1037}
1038
1039fn franken_read_message_extra_compat(
1040 row: &FrankenRow,
1041 json_idx: usize,
1042 bin_idx: usize,
1043) -> serde_json::Value {
1044 if let Ok(Some(bytes)) = row.get_typed::<Option<Vec<u8>>>(bin_idx)
1045 && !bytes.is_empty()
1046 {
1047 return deserialize_msgpack_to_json(&bytes);
1048 }
1049
1050 if let Ok(Some(json_str)) = row.get_typed::<Option<String>>(json_idx) {
1051 return serde_json::from_str(&json_str).unwrap_or(serde_json::Value::Null);
1052 }
1053
1054 serde_json::Value::Null
1055}
1056
1057#[derive(Debug, Error)]
1063pub enum MigrationError {
1064 #[error("Rebuild required: {reason}")]
1066 RebuildRequired {
1067 reason: String,
1068 backup_path: Option<std::path::PathBuf>,
1069 },
1070
1071 #[error("Database error: {0}")]
1073 Database(#[from] frankensqlite::FrankenError),
1074
1075 #[error("I/O error: {0}")]
1077 Io(#[from] std::io::Error),
1078
1079 #[error("{0}")]
1081 Other(String),
1082}
1083
1084impl From<anyhow::Error> for MigrationError {
1085 fn from(e: anyhow::Error) -> Self {
1086 MigrationError::Other(e.to_string())
1087 }
1088}
1089
1090const MAX_BACKUPS: usize = 3;
1092const BACKUP_VACUUM_BUSY_TIMEOUT_PRAGMA: &str = "PRAGMA busy_timeout = 30000;";
1093
1094const USER_DATA_FILES: &[&str] = &["bookmarks.db", "tui_state.json", "sources.toml", ".env"];
1096
1097pub fn is_user_data_file(path: &Path) -> bool {
1099 path.file_name()
1100 .and_then(|n| n.to_str())
1101 .map(|name| USER_DATA_FILES.contains(&name))
1102 .unwrap_or(false)
1103}
1104
1105pub const FTS5_REGISTER_SQL: &str = "\
1112 CREATE VIRTUAL TABLE IF NOT EXISTS fts_messages USING fts5(\
1113 content, title, agent, workspace, source_path, \
1114 created_at UNINDEXED, \
1115 content='', tokenize='porter'\
1116 )";
1117
1118const FTS_FRANKEN_REBUILD_META_KEY: &str = "fts_frankensqlite_rebuild_generation";
1119const FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY: &str = "fts_frankensqlite_archive_fingerprint";
1120const FTS_FRANKEN_REBUILD_GENERATION: i64 = 1;
1121const DAILY_STATS_HEALTH_META_KEY: &str = "daily_stats_archive_fingerprint";
1122const DAILY_STATS_HEALTH_GENERATION_META_KEY: &str = "daily_stats_health_generation";
1123const DAILY_STATS_HEALTH_GENERATION: i64 = 1;
1124
1125pub const FTS5_DELETE_ALL_SQL: &str =
1129 "INSERT INTO fts_messages(fts_messages) VALUES('delete-all');";
1130
1131pub const FTS_MESSAGES_REQUIRED_SHADOW_TABLES: [&str; 5] = [
1132 "fts_messages_config",
1133 "fts_messages_content",
1134 "fts_messages_data",
1135 "fts_messages_docsize",
1136 "fts_messages_idx",
1137];
1138
1139pub const FTS_MESSAGES_INTEGRITY_PROBE_SQL: &str = "SELECT * FROM fts_messages LIMIT 0";
1140
1141pub const FTS_MESSAGES_CORRUPTION_RECOVERY_HINT: &str = "Stop all cass index/watch processes, back up the current database, then run \
1142 'cass doctor check --json' for a read-only diagnosis before using a supported \
1143 repair/rebuild path.";
1144
1145#[derive(Debug, Clone, PartialEq, Eq)]
1146pub struct FtsMessagesIntegrityError {
1147 missing_shadow_tables: Vec<&'static str>,
1148 failed_sql: Option<&'static str>,
1149 source_error: Option<String>,
1150}
1151
1152impl FtsMessagesIntegrityError {
1153 fn new(
1154 missing_shadow_tables: Vec<&'static str>,
1155 failed_sql: Option<&'static str>,
1156 source_error: Option<String>,
1157 ) -> Self {
1158 Self {
1159 missing_shadow_tables,
1160 failed_sql,
1161 source_error,
1162 }
1163 }
1164
1165 pub fn missing_shadow_tables(&self) -> &[&'static str] {
1166 &self.missing_shadow_tables
1167 }
1168}
1169
1170impl std::fmt::Display for FtsMessagesIntegrityError {
1171 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1172 write!(
1173 f,
1174 "CASS database FTS5 index is corrupt: fts_messages exists, but required FTS5 shadow tables are missing or unreadable"
1175 )?;
1176 if !self.missing_shadow_tables.is_empty() {
1177 write!(
1178 f,
1179 "; missing shadow tables: {}",
1180 self.missing_shadow_tables.join(", ")
1181 )?;
1182 }
1183 if let Some(sql) = self.failed_sql {
1184 write!(f, "; failed SQL: {sql}")?;
1185 }
1186 if let Some(source_error) = &self.source_error {
1187 write!(f, "; error: {source_error}")?;
1188 }
1189 write!(
1190 f,
1191 ". Suggested recovery: {FTS_MESSAGES_CORRUPTION_RECOVERY_HINT}"
1192 )
1193 }
1194}
1195
1196impl std::error::Error for FtsMessagesIntegrityError {}
1197
1198pub fn fts_messages_integrity_error_from_message(
1199 source_error: impl Into<String>,
1200) -> Option<FtsMessagesIntegrityError> {
1201 let source_error = source_error.into();
1202 let lower = source_error.to_ascii_lowercase();
1203 if !lower.contains("fts_messages") {
1204 return None;
1205 }
1206
1207 let mentions_structural_fts_failure = lower.contains("shadow table")
1208 || lower.contains("vtable constructor failed")
1209 || lower.contains("sqlite_corrupt")
1210 || lower.contains("databasecorrupt")
1211 || lower.contains("database corrupt")
1212 || lower.contains("missing required");
1213 if !mentions_structural_fts_failure {
1214 return None;
1215 }
1216
1217 let missing_shadow_tables = FTS_MESSAGES_REQUIRED_SHADOW_TABLES
1218 .iter()
1219 .copied()
1220 .filter(|table| lower.contains(&table.to_ascii_lowercase()))
1221 .collect::<Vec<_>>();
1222
1223 Some(FtsMessagesIntegrityError::new(
1224 missing_shadow_tables,
1225 Some(FTS_MESSAGES_INTEGRITY_PROBE_SQL),
1226 Some(source_error),
1227 ))
1228}
1229
1230fn fts_schema_tolerates_missing_shadow_metadata(sql: &str) -> bool {
1231 let normalized = sql
1232 .chars()
1233 .filter(|ch| !ch.is_whitespace())
1234 .collect::<String>()
1235 .to_ascii_lowercase();
1236 normalized.contains("usingfts5(")
1237 && normalized.contains("content=''")
1238 && !normalized.contains("message_id")
1239}
1240
1241pub fn validate_fts_messages_integrity_for_connection(conn: &FrankenConnection) -> Result<()> {
1242 let fts_schema_sql: Vec<String> = conn
1243 .query_map_collect(
1244 "SELECT sql FROM sqlite_master WHERE type = 'table' AND name = 'fts_messages'",
1245 fparams![],
1246 |row: &FrankenRow| row.get_typed::<String>(0),
1247 )
1248 .with_context(|| "checking for fts_messages in sqlite_master")?;
1249 if fts_schema_sql.is_empty() {
1250 return Ok(());
1251 }
1252
1253 let probe_error = conn.query(FTS_MESSAGES_INTEGRITY_PROBE_SQL).err();
1254 if probe_error.is_none()
1255 && fts_schema_sql
1256 .iter()
1257 .all(|sql| fts_schema_tolerates_missing_shadow_metadata(sql))
1258 {
1259 return Ok(());
1260 }
1261
1262 let present_shadow_tables: HashSet<String> = conn
1263 .query_map_collect(
1264 "SELECT name FROM sqlite_master
1265 WHERE type = 'table'
1266 AND name IN (
1267 'fts_messages_config',
1268 'fts_messages_content',
1269 'fts_messages_data',
1270 'fts_messages_docsize',
1271 'fts_messages_idx'
1272 )",
1273 fparams![],
1274 |row: &FrankenRow| row.get_typed::<String>(0),
1275 )
1276 .map(|rows| rows.into_iter().collect())
1277 .map_err(|err| {
1278 FtsMessagesIntegrityError::new(
1279 Vec::new(),
1280 Some(
1281 "SELECT name FROM sqlite_master WHERE name IN \
1282 ('fts_messages_config','fts_messages_content','fts_messages_data','fts_messages_docsize','fts_messages_idx')",
1283 ),
1284 Some(err.to_string()),
1285 )
1286 })?;
1287 let missing_shadow_tables = FTS_MESSAGES_REQUIRED_SHADOW_TABLES
1288 .iter()
1289 .copied()
1290 .filter(|table| !present_shadow_tables.contains(*table))
1291 .collect::<Vec<_>>();
1292
1293 if missing_shadow_tables.is_empty() {
1302 return Ok(());
1303 }
1304
1305 Err(FtsMessagesIntegrityError::new(
1306 missing_shadow_tables,
1307 probe_error
1308 .as_ref()
1309 .map(|_| FTS_MESSAGES_INTEGRITY_PROBE_SQL),
1310 probe_error.map(|err| err.to_string()),
1311 )
1312 .into())
1313}
1314
1315#[cfg(test)]
1316pub(crate) fn materialize_fresh_fts_schema_via_rusqlite(db_path: &Path) -> Result<()> {
1317 let storage = FrankenStorage::open(db_path).with_context(|| {
1323 format!(
1324 "opening frankensqlite db at {} for FTS materialization",
1325 db_path.display()
1326 )
1327 })?;
1328 storage.rebuild_fts_via_frankensqlite().map(|_| ())
1329}
1330
1331#[cfg(test)]
1332pub(crate) fn rebuild_fts_via_rusqlite(db_path: &Path) -> Result<usize> {
1333 let storage = FrankenStorage::open(db_path).with_context(|| {
1334 format!(
1335 "opening frankensqlite db at {} for FTS rebuild",
1336 db_path.display()
1337 )
1338 })?;
1339 let inserted = storage.rebuild_fts_via_frankensqlite()?;
1340 storage.record_fts_franken_rebuild_generation()?;
1341 Ok(inserted)
1342}
1343
1344pub(crate) fn ensure_fts_consistency_via_rusqlite(db_path: &Path) -> Result<FtsConsistencyRepair> {
1345 let storage = FrankenStorage::open(db_path).with_context(|| {
1349 format!(
1350 "opening frankensqlite db at {} for FTS consistency check",
1351 db_path.display()
1352 )
1353 })?;
1354 storage.ensure_search_fallback_fts_consistency()
1355}
1356
1357pub fn create_backup(db_path: &Path) -> Result<Option<std::path::PathBuf>, MigrationError> {
1361 if !bundle_path_exists(db_path)? {
1362 return Ok(None);
1363 }
1364
1365 if !copyable_bundle_file_exists(db_path)? {
1366 return Ok(None);
1367 }
1368 let _ = copyable_bundle_sidecar_sources(db_path)?;
1369
1370 let backup_path = unique_backup_path(db_path);
1371 let vacuum_stage_path = vacuum_stage_backup_path(&backup_path);
1372
1373 match vacuum_into_backup_stage(db_path, &vacuum_stage_path) {
1376 Ok(()) => {
1377 fs::rename(&vacuum_stage_path, &backup_path)?;
1378 }
1379 Err(err) if backup_vacuum_error_requires_consistent_retry(&err) => {
1380 tracing::warn!(
1381 db_path = %db_path.display(),
1382 error = %err,
1383 "create_backup: VACUUM INTO hit transient contention; refusing raw WAL bundle copy"
1384 );
1385 return Err(MigrationError::Database(err));
1386 }
1387 Err(err) => {
1388 tracing::warn!(
1389 db_path = %db_path.display(),
1390 error = %err,
1391 "create_backup: VACUUM INTO failed; falling back to raw evidence copy"
1392 );
1393 }
1394 }
1395
1396 if backup_path.exists() {
1397 sync_file_if_exists(&backup_path)?;
1398 if let Some(parent) = backup_path.parent() {
1399 sync_parent_directory(parent)?;
1400 }
1401 return Ok(Some(backup_path));
1402 }
1403
1404 copy_database_bundle(db_path, &backup_path)?;
1409
1410 Ok(Some(backup_path))
1411}
1412
1413fn vacuum_into_backup_stage(
1414 db_path: &Path,
1415 stage_path: &Path,
1416) -> std::result::Result<(), frankensqlite::FrankenError> {
1417 let mut conn = open_franken_with_flags(
1418 &db_path.to_string_lossy(),
1419 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
1420 )?;
1421 let result = (|| {
1422 conn.execute(BACKUP_VACUUM_BUSY_TIMEOUT_PRAGMA)?;
1423 let path_str = stage_path.to_string_lossy();
1424 conn.execute_compat("VACUUM INTO ?", fparams![path_str.as_ref()])?;
1425 Ok(())
1426 })();
1427 if let Err(close_err) = conn.close_in_place() {
1428 tracing::warn!(
1429 error = %close_err,
1430 db_path = %db_path.display(),
1431 "create_backup: close_in_place failed after VACUUM INTO; falling back to best-effort close"
1432 );
1433 conn.close_best_effort_in_place();
1434 }
1435 result
1436}
1437
1438fn backup_vacuum_error_requires_consistent_retry(err: &frankensqlite::FrankenError) -> bool {
1439 retryable_franken_error(err)
1440}
1441
1442#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1443pub struct DatabaseBundleMoveResult {
1444 pub database: bool,
1445 pub wal: bool,
1446 pub shm: bool,
1447}
1448
1449impl DatabaseBundleMoveResult {
1450 pub fn moved_any(&self) -> bool {
1451 self.database || self.wal || self.shm
1452 }
1453}
1454
1455fn database_sidecar_path(path: &Path, suffix: &str) -> PathBuf {
1456 PathBuf::from(format!("{}{}", path.to_string_lossy(), suffix))
1457}
1458
1459pub(crate) fn move_database_bundle(
1466 source_root: &Path,
1467 destination_root: &Path,
1468) -> std::io::Result<DatabaseBundleMoveResult> {
1469 let mut moved = DatabaseBundleMoveResult::default();
1470 if let Some(parent) = destination_root.parent() {
1471 fs::create_dir_all(parent)?;
1472 sync_parent_directory(parent)?;
1473 }
1474
1475 if bundle_path_exists(source_root)? {
1476 fs::rename(source_root, destination_root)?;
1477 moved.database = true;
1478 }
1479
1480 let wal_source = database_sidecar_path(source_root, "-wal");
1481 if bundle_path_exists(&wal_source)? {
1482 fs::rename(&wal_source, database_sidecar_path(destination_root, "-wal"))?;
1483 moved.wal = true;
1484 }
1485
1486 let shm_source = database_sidecar_path(source_root, "-shm");
1487 if bundle_path_exists(&shm_source)? {
1488 fs::rename(&shm_source, database_sidecar_path(destination_root, "-shm"))?;
1489 moved.shm = true;
1490 }
1491
1492 if moved.moved_any() {
1493 if let Some(parent) = source_root.parent() {
1494 sync_parent_directory(parent)?;
1495 }
1496 if let Some(parent) = destination_root.parent() {
1497 sync_parent_directory(parent)?;
1498 }
1499 }
1500
1501 Ok(moved)
1502}
1503
1504fn bundle_path_exists(path: &Path) -> std::io::Result<bool> {
1505 match fs::symlink_metadata(path) {
1506 Ok(_) => Ok(true),
1507 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1508 Err(err) => Err(err),
1509 }
1510}
1511
1512fn copy_database_bundle(source_root: &Path, destination_root: &Path) -> Result<()> {
1513 if let Some(parent) = destination_root.parent() {
1514 fs::create_dir_all(parent).with_context(|| {
1515 format!(
1516 "creating destination directory for database bundle copy: {}",
1517 parent.display()
1518 )
1519 })?;
1520 sync_parent_directory(parent)
1521 .with_context(|| format!("syncing destination directory {}", parent.display()))?;
1522 }
1523
1524 if !copyable_bundle_file_exists(source_root)? {
1525 bail!(
1526 "database bundle root is missing before copy: {}",
1527 source_root.display()
1528 );
1529 }
1530
1531 let sidecars = copyable_bundle_sidecar_sources(source_root)?;
1532
1533 fs::copy(source_root, destination_root).with_context(|| {
1534 format!(
1535 "copying database bundle {} -> {}",
1536 source_root.display(),
1537 destination_root.display()
1538 )
1539 })?;
1540 sync_file_if_exists(destination_root).with_context(|| {
1541 format!(
1542 "syncing copied database bundle {}",
1543 destination_root.display()
1544 )
1545 })?;
1546
1547 for (source_sidecar, suffix) in sidecars {
1548 let destination_sidecar = database_sidecar_path(destination_root, suffix);
1549 fs::copy(&source_sidecar, &destination_sidecar).with_context(|| {
1550 format!(
1551 "copying database bundle sidecar {} -> {}",
1552 source_sidecar.display(),
1553 destination_sidecar.display()
1554 )
1555 })?;
1556 sync_file_if_exists(&destination_sidecar).with_context(|| {
1557 format!(
1558 "syncing copied database bundle sidecar {}",
1559 destination_sidecar.display()
1560 )
1561 })?;
1562 }
1563
1564 if let Some(parent) = destination_root.parent() {
1565 sync_parent_directory(parent)
1566 .with_context(|| format!("syncing destination directory {}", parent.display()))?;
1567 }
1568
1569 Ok(())
1570}
1571
1572fn copyable_bundle_sidecar_sources(source_root: &Path) -> Result<Vec<(PathBuf, &'static str)>> {
1573 let mut sidecars = Vec::new();
1574 for suffix in ["-wal", "-shm"] {
1575 let source_sidecar = database_sidecar_path(source_root, suffix);
1576 if copyable_bundle_file_exists(&source_sidecar)? {
1577 sidecars.push((source_sidecar, suffix));
1578 }
1579 }
1580 Ok(sidecars)
1581}
1582
1583fn copyable_bundle_file_exists(path: &Path) -> Result<bool> {
1584 match fs::symlink_metadata(path) {
1585 Ok(metadata) => {
1586 let file_type = metadata.file_type();
1587 if file_type.is_symlink() {
1588 bail!(
1589 "refusing to copy database bundle symlink: {}",
1590 path.display()
1591 );
1592 }
1593 if !file_type.is_file() {
1594 bail!(
1595 "refusing to copy non-file database bundle path: {}",
1596 path.display()
1597 );
1598 }
1599 Ok(true)
1600 }
1601 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1602 Err(err) => Err(err).with_context(|| {
1603 format!(
1604 "checking database bundle path before copy: {}",
1605 path.display()
1606 )
1607 }),
1608 }
1609}
1610
1611pub(crate) fn remove_database_files(path: &Path) -> std::io::Result<()> {
1613 let mut removed_any = false;
1614
1615 match fs::remove_file(path) {
1616 Ok(()) => removed_any = true,
1617 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1618 Err(err) => return Err(err),
1619 }
1620
1621 for suffix in ["-wal", "-shm"] {
1623 match fs::remove_file(database_sidecar_path(path, suffix)) {
1624 Ok(()) => removed_any = true,
1625 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1626 Err(err) => return Err(err),
1627 }
1628 }
1629
1630 if removed_any && let Some(parent) = path.parent() {
1631 sync_parent_directory(parent)?;
1632 }
1633
1634 Ok(())
1635}
1636
1637#[cfg(not(windows))]
1638fn sync_parent_directory(path: &Path) -> std::io::Result<()> {
1639 fs::File::open(path)?.sync_all()
1640}
1641
1642#[cfg(windows)]
1643fn sync_parent_directory(_path: &Path) -> std::io::Result<()> {
1644 Ok(())
1645}
1646
1647#[cfg(not(windows))]
1648fn sync_file_if_exists(path: &Path) -> std::io::Result<()> {
1649 if path.exists() {
1650 fs::File::open(path)?.sync_all()?;
1651 }
1652 Ok(())
1653}
1654
1655#[cfg(windows)]
1656fn sync_file_if_exists(path: &Path) -> std::io::Result<()> {
1657 if path.exists() {
1658 fs::OpenOptions::new()
1659 .read(true)
1660 .write(true)
1661 .open(path)?
1662 .sync_all()?;
1663 }
1664 Ok(())
1665}
1666
1667pub fn cleanup_old_backups(db_path: &Path, keep_count: usize) -> Result<(), std::io::Error> {
1669 let parent = match db_path.parent() {
1670 Some(p) => p,
1671 None => return Ok(()),
1672 };
1673
1674 let db_name = db_path.file_name().and_then(|n| n.to_str()).unwrap_or("db");
1675
1676 let prefix = format!("{}.backup.", db_name);
1677
1678 let mut backups: Vec<(std::path::PathBuf, SystemTime)> = Vec::new();
1680
1681 if let Ok(entries) = fs::read_dir(parent) {
1682 for entry in entries.flatten() {
1683 let path = entry.path();
1684 if let Some(name) = path.file_name().and_then(|n| n.to_str())
1685 && is_backup_root_name(name, &prefix)
1686 && let Ok(meta) = fs::metadata(&path)
1687 && meta.is_file()
1688 && let Ok(mtime) = meta.modified()
1689 {
1690 backups.push((path, mtime));
1691 }
1692 }
1693 }
1694
1695 backups.sort_by_key(|entry| std::cmp::Reverse(entry.1));
1697
1698 for (path, _) in backups.into_iter().skip(keep_count) {
1700 let _ = fs::remove_file(&path);
1701
1702 let _ = fs::remove_file(database_sidecar_path(&path, "-wal"));
1704 let _ = fs::remove_file(database_sidecar_path(&path, "-shm"));
1705 }
1706
1707 Ok(())
1708}
1709
1710#[derive(Debug, Clone)]
1711pub(crate) struct HistoricalDatabaseBundle {
1712 root_path: PathBuf,
1713 total_bytes: u64,
1714 modified_at_ms: i64,
1715 supports_direct_readonly: bool,
1716 probe: HistoricalBundleProbe,
1717}
1718
1719#[derive(Debug, Clone, Copy, Default)]
1720struct HistoricalBundleProbe {
1721 schema_version: Option<i64>,
1722 fts_schema_rows: Option<i64>,
1723 fts_queryable: bool,
1724 max_message_id: i64,
1725}
1726
1727#[cfg(test)]
1728#[allow(dead_code)]
1729#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1730pub(crate) struct SqliteDatabaseHealthProbe {
1731 pub schema_version: Option<i64>,
1732 pub quick_check_ok: bool,
1733 pub fts_schema_rows: i64,
1734 pub fts_queryable: bool,
1735 pub message_count: i64,
1736 pub max_message_id: i64,
1737}
1738
1739#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1740pub(crate) enum FtsConsistencyRepair {
1741 AlreadyHealthy {
1742 rows: usize,
1743 },
1744 IncrementalCatchUp {
1745 inserted_rows: usize,
1746 total_rows: usize,
1747 },
1748 Rebuilt {
1749 inserted_rows: usize,
1750 },
1751}
1752
1753#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
1754pub struct HistoricalSalvageOutcome {
1755 pub bundles_considered: usize,
1756 pub bundles_imported: usize,
1757 pub conversations_imported: usize,
1758 pub messages_imported: usize,
1759}
1760
1761impl HistoricalSalvageOutcome {
1762 pub(crate) fn accumulate(&mut self, other: Self) {
1763 self.bundles_considered += other.bundles_considered;
1764 self.bundles_imported += other.bundles_imported;
1765 self.conversations_imported += other.conversations_imported;
1766 self.messages_imported += other.messages_imported;
1767 }
1768}
1769
1770#[derive(Debug)]
1771struct HistoricalReadConnection {
1772 conn: FrankenConnection,
1773 method: &'static str,
1774 root_path: PathBuf,
1775 _tempdir: Option<tempfile::TempDir>,
1776}
1777
1778const HISTORICAL_RECOVERY_CORE_SCHEMA: &str = r"
1779CREATE TABLE sources (
1780 id TEXT PRIMARY KEY,
1781 kind TEXT,
1782 host_label TEXT,
1783 machine_id TEXT,
1784 platform TEXT,
1785 config_json TEXT,
1786 created_at INTEGER,
1787 updated_at INTEGER
1788);
1789CREATE TABLE agents (
1790 id INTEGER PRIMARY KEY,
1791 slug TEXT,
1792 name TEXT,
1793 version TEXT,
1794 kind TEXT,
1795 created_at INTEGER,
1796 updated_at INTEGER
1797);
1798CREATE TABLE workspaces (
1799 id INTEGER PRIMARY KEY,
1800 path TEXT,
1801 display_name TEXT
1802);
1803CREATE TABLE conversations (
1804 id INTEGER PRIMARY KEY,
1805 agent_id INTEGER,
1806 workspace_id INTEGER,
1807 source_id TEXT,
1808 external_id TEXT,
1809 title TEXT,
1810 source_path TEXT,
1811 started_at INTEGER,
1812 ended_at INTEGER,
1813 approx_tokens INTEGER,
1814 metadata_json TEXT,
1815 origin_host TEXT,
1816 metadata_bin BLOB,
1817 total_input_tokens INTEGER,
1818 total_output_tokens INTEGER,
1819 total_cache_read_tokens INTEGER,
1820 total_cache_creation_tokens INTEGER,
1821 grand_total_tokens INTEGER,
1822 estimated_cost_usd REAL,
1823 primary_model TEXT,
1824 api_call_count INTEGER,
1825 tool_call_count INTEGER,
1826 user_message_count INTEGER,
1827 assistant_message_count INTEGER,
1828 last_message_idx INTEGER,
1829 last_message_created_at INTEGER
1830);
1831CREATE TABLE messages (
1832 id INTEGER PRIMARY KEY,
1833 conversation_id INTEGER,
1834 idx INTEGER,
1835 role TEXT,
1836 author TEXT,
1837 created_at INTEGER,
1838 content TEXT,
1839 extra_json TEXT,
1840 extra_bin BLOB
1841);
1842CREATE TABLE snippets (
1843 id INTEGER PRIMARY KEY,
1844 message_id INTEGER,
1845 file_path TEXT,
1846 start_line INTEGER,
1847 end_line INTEGER,
1848 language TEXT,
1849 snippet_text TEXT
1850);
1851";
1852const HISTORICAL_SALVAGE_LEDGER_VERSION: u32 = 2;
1853const HISTORICAL_SALVAGE_PROGRESS_VERSION: u32 = 1;
1854const SOURCE_PATH_MERGE_START_TOLERANCE_MS: i64 = 5 * 60 * 1000;
1855
1856#[derive(Debug, Clone, Serialize, Deserialize)]
1857struct HistoricalBundleProgress {
1858 progress_version: u32,
1859 path: String,
1860 bytes: u64,
1861 modified_at_ms: i64,
1862 method: String,
1863 last_completed_source_row_id: i64,
1864 conversations_imported: usize,
1865 messages_imported: usize,
1866 updated_at_ms: i64,
1867}
1868
1869#[derive(Debug, Clone)]
1870struct HistoricalBatchEntry {
1871 source_row_id: i64,
1872 agent_id: i64,
1873 workspace_id: Option<i64>,
1874 conversation: Conversation,
1875}
1876
1877#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1878struct HistoricalBatchImportTotals {
1879 inserted_source_rows: usize,
1880 inserted_messages: usize,
1881}
1882
1883fn historical_bundle_root_paths(db_path: &Path) -> Vec<PathBuf> {
1884 let mut roots = Vec::new();
1885 let Some(parent) = db_path.parent() else {
1886 return roots;
1887 };
1888 let db_name = db_path
1889 .file_name()
1890 .and_then(|n| n.to_str())
1891 .unwrap_or("agent_search.db");
1892 let db_stem = db_path
1893 .file_stem()
1894 .and_then(|n| n.to_str())
1895 .unwrap_or("agent_search");
1896
1897 let mut push_root = |path: PathBuf| {
1898 if path == db_path {
1899 return;
1900 }
1901 if !roots.iter().any(|existing| existing == &path) {
1902 roots.push(path);
1903 }
1904 };
1905
1906 if let Ok(entries) = fs::read_dir(parent) {
1907 for entry in entries.flatten() {
1908 let path = entry.path();
1909 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1910 continue;
1911 };
1912 if has_db_sidecar_suffix(name) {
1913 continue;
1914 }
1915 if name.starts_with(&format!("{db_name}.backup."))
1916 || name.starts_with(&format!("{db_stem}.corrupt."))
1917 {
1918 push_root(path);
1919 }
1920 }
1921 }
1922
1923 let backups_dir = parent.join("backups");
1924 if let Ok(entries) = fs::read_dir(backups_dir) {
1925 for entry in entries.flatten() {
1926 let path = entry.path();
1927 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1928 continue;
1929 };
1930 if has_db_sidecar_suffix(name) {
1931 continue;
1932 }
1933 if name.starts_with(&format!("{db_name}.")) && name.ends_with(".bak") {
1934 push_root(path);
1935 }
1936 }
1937 }
1938
1939 push_named_database_children(&mut roots, db_path, &parent.join("repair-lab"), db_name);
1940 push_named_database_children(&mut roots, db_path, &parent.join("snapshots"), db_name);
1941
1942 roots
1943}
1944
1945fn push_named_database_children(
1946 roots: &mut Vec<PathBuf>,
1947 canonical_db_path: &Path,
1948 dir: &Path,
1949 db_name: &str,
1950) {
1951 if let Ok(entries) = fs::read_dir(dir) {
1952 for entry in entries.flatten() {
1953 let candidate = entry.path().join(db_name);
1954 if candidate == canonical_db_path {
1955 continue;
1956 }
1957 if candidate.exists() && !roots.iter().any(|existing| existing == &candidate) {
1958 roots.push(candidate);
1959 }
1960 }
1961 }
1962}
1963
1964fn file_mtime_ms(path: &Path) -> i64 {
1965 fs::metadata(path)
1966 .and_then(|meta| meta.modified())
1967 .ok()
1968 .and_then(|ts| ts.duration_since(UNIX_EPOCH).ok())
1969 .map(|d| d.as_millis() as i64)
1970 .unwrap_or(0)
1971}
1972
1973fn bundle_total_bytes(root_path: &Path) -> u64 {
1974 let mut total = fs::metadata(root_path).map(|meta| meta.len()).unwrap_or(0);
1975 for suffix in ["-wal", "-shm"] {
1976 let sidecar = database_sidecar_path(root_path, suffix);
1977 total = total.saturating_add(fs::metadata(sidecar).map(|meta| meta.len()).unwrap_or(0));
1978 }
1979 total
1980}
1981
1982pub(crate) fn discover_historical_database_bundles(
1983 db_path: &Path,
1984) -> Vec<HistoricalDatabaseBundle> {
1985 let mut bundles: Vec<_> = historical_bundle_root_paths(db_path)
1986 .into_iter()
1987 .filter(|root| root.exists())
1988 .map(|root_path| {
1989 let modified_at_ms = file_mtime_ms(&root_path);
1990 let total_bytes = bundle_total_bytes(&root_path);
1991 let supports_direct_readonly = historical_bundle_supports_direct_readonly(&root_path);
1992 let probe = probe_historical_bundle(&root_path);
1993 HistoricalDatabaseBundle {
1994 modified_at_ms,
1995 total_bytes,
1996 supports_direct_readonly,
1997 root_path,
1998 probe,
1999 }
2000 })
2001 .filter(|bundle| bundle.total_bytes > 0)
2002 .collect();
2003
2004 fn bundle_priority(path: &Path) -> i32 {
2005 let path_str = path.to_string_lossy();
2006 if path_str.contains("/repair-lab/replay-") {
2007 return 5;
2008 }
2009 if path_str.contains("/repair-lab/") {
2010 return 4;
2011 }
2012 if path_str.contains("/snapshots/") {
2013 return 3;
2014 }
2015 if path_str.contains(".corrupt.") || path_str.contains("failed-baseline-seed") {
2016 return 0;
2017 }
2018 1
2019 }
2020
2021 fn bundle_health_rank(bundle: &HistoricalDatabaseBundle) -> i32 {
2022 let fts_clean = match bundle.probe.fts_schema_rows {
2045 Some(1) => bundle.probe.fts_queryable,
2046 Some(0) => bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION),
2047 _ => false,
2048 };
2049
2050 let clean_schema14_fts =
2051 bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION) && fts_clean;
2052 if clean_schema14_fts {
2053 return 5;
2054 }
2055
2056 if fts_clean {
2057 return 4;
2058 }
2059
2060 if bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION)
2061 && bundle.supports_direct_readonly
2062 {
2063 return 3;
2064 }
2065
2066 if bundle.supports_direct_readonly {
2067 return 2;
2068 }
2069
2070 1
2071 }
2072
2073 bundles.sort_by(|left, right| {
2074 bundle_health_rank(right)
2075 .cmp(&bundle_health_rank(left))
2076 .then_with(|| right.probe.max_message_id.cmp(&left.probe.max_message_id))
2077 .then_with(|| bundle_priority(&right.root_path).cmp(&bundle_priority(&left.root_path)))
2078 .then_with(|| {
2079 right
2080 .supports_direct_readonly
2081 .cmp(&left.supports_direct_readonly)
2082 })
2083 .then_with(|| right.total_bytes.cmp(&left.total_bytes))
2084 .then_with(|| right.modified_at_ms.cmp(&left.modified_at_ms))
2085 .then_with(|| right.root_path.cmp(&left.root_path))
2086 });
2087 bundles
2088}
2089
2090fn probe_historical_bundle(root_path: &Path) -> HistoricalBundleProbe {
2091 let Ok(conn) = open_historical_bundle_readonly(root_path) else {
2092 return probe_historical_bundle_via_sqlite3_metadata(root_path).unwrap_or_default();
2093 };
2094
2095 let schema_version = read_meta_schema_version(&conn).ok().flatten();
2096 let fts_schema_rows: Option<i64> = conn
2097 .query_row_map(
2098 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
2099 fparams![],
2100 |row| row.get_typed(0),
2101 )
2102 .ok();
2103 let fts_queryable =
2104 historical_bundle_fts_queryable_via_frankensqlite(root_path, fts_schema_rows);
2105 let max_message_id: i64 = conn
2106 .query_row_map(
2107 "SELECT COALESCE(MAX(id), 0) FROM messages",
2108 fparams![],
2109 |row| row.get_typed(0),
2110 )
2111 .unwrap_or(0);
2112
2113 let probe = HistoricalBundleProbe {
2114 schema_version,
2115 fts_schema_rows,
2116 fts_queryable,
2117 max_message_id,
2118 };
2119
2120 if probe.schema_version.is_none()
2121 && probe.fts_schema_rows.is_none()
2122 && probe.max_message_id == 0
2123 {
2124 return probe_historical_bundle_via_sqlite3_metadata(root_path).unwrap_or(probe);
2125 }
2126
2127 probe
2128}
2129
2130fn probe_historical_bundle_via_sqlite3_metadata(root_path: &Path) -> Option<HistoricalBundleProbe> {
2131 let bundle_uri = format!("file:{}?immutable=1", root_path.to_string_lossy());
2132 let output = Command::new("sqlite3")
2133 .arg("-batch")
2134 .arg("-noheader")
2135 .arg(&bundle_uri)
2136 .arg(
2137 "PRAGMA writable_schema=ON;
2138 SELECT COALESCE((SELECT value FROM meta WHERE key = 'schema_version'), '');
2139 SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages';
2140 SELECT COALESCE(MAX(id), 0) FROM messages;",
2141 )
2142 .output()
2143 .ok()?;
2144 if !output.status.success() {
2145 return None;
2146 }
2147
2148 let stdout = String::from_utf8(output.stdout).ok()?;
2149 let mut lines = stdout.lines();
2150 let schema_version = lines.next().and_then(|raw| raw.trim().parse::<i64>().ok());
2151 let fts_schema_rows = lines.next().and_then(|raw| raw.trim().parse::<i64>().ok());
2152 let max_message_id = lines
2153 .next()
2154 .and_then(|raw| raw.trim().parse::<i64>().ok())
2155 .unwrap_or(0);
2156
2157 Some(HistoricalBundleProbe {
2158 schema_version,
2159 fts_schema_rows,
2160 fts_queryable: false,
2161 max_message_id,
2162 })
2163}
2164
2165fn historical_bundle_fts_queryable_via_frankensqlite(
2166 root_path: &Path,
2167 fts_schema_rows: Option<i64>,
2168) -> bool {
2169 matches!(fts_schema_rows, Some(1))
2170 && FrankenStorage::open_readonly(root_path)
2171 .map(|storage| {
2172 storage
2173 .raw()
2174 .query("SELECT COUNT(*) FROM fts_messages")
2175 .is_ok()
2176 })
2177 .unwrap_or(false)
2178}
2179
2180fn historical_bundle_supports_direct_readonly(root_path: &Path) -> bool {
2181 open_historical_bundle_readonly(root_path)
2182 .and_then(|conn| historical_bundle_has_queryable_core_tables(&conn))
2183 .is_ok()
2184}
2185
2186fn historical_table_exists(conn: &FrankenConnection, table: &str) -> Result<bool> {
2187 let found: Option<i64> = conn
2188 .query_row_map(
2189 "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1 LIMIT 1",
2190 fparams![table],
2191 |row| row.get_typed(0),
2192 )
2193 .optional()
2194 .with_context(|| format!("checking for historical table {table}"))?;
2195 Ok(found.is_some())
2196}
2197
2198fn probe_historical_table_reads(conn: &FrankenConnection, table: &str) -> Result<()> {
2199 if !historical_table_exists(conn, table)? {
2200 return Err(anyhow!(
2201 "historical database missing required table {table}"
2202 ));
2203 }
2204
2205 let sql = format!("SELECT rowid FROM {table} LIMIT 1");
2206 let _: Option<i64> = conn
2207 .query_row_map(&sql, fparams![], |row| row.get_typed(0))
2208 .optional()
2209 .with_context(|| format!("probing rows from historical table {table}"))?;
2210 Ok(())
2211}
2212
2213fn historical_bundle_has_queryable_core_tables(conn: &FrankenConnection) -> Result<()> {
2214 probe_historical_table_reads(conn, "conversations")?;
2215 probe_historical_table_reads(conn, "messages")?;
2216 Ok(())
2217}
2218
2219fn open_historical_bundle_readonly(root_path: &Path) -> Result<FrankenConnection> {
2220 let path_str = root_path.to_string_lossy();
2221 let flags = FrankenOpenFlags::SQLITE_OPEN_READ_ONLY;
2222 let conn = open_franken_with_flags(&path_str, flags)
2223 .with_context(|| format!("opening historical database {}", root_path.display()))?;
2224 Ok(conn)
2225}
2226
2227fn is_recoverable_insert_line(line: &str) -> bool {
2228 [
2229 "sources",
2230 "agents",
2231 "workspaces",
2232 "conversations",
2233 "messages",
2234 "snippets",
2235 ]
2236 .iter()
2237 .any(|table| {
2238 line.starts_with(&format!("INSERT INTO '{table}'"))
2239 || line.starts_with(&format!("INSERT OR IGNORE INTO '{table}'"))
2240 || line.starts_with(&format!("INSERT INTO \"{table}\""))
2241 || line.starts_with(&format!("INSERT OR IGNORE INTO \"{table}\""))
2242 })
2243}
2244
2245fn recover_historical_bundle_via_sqlite3(
2246 bundle: &HistoricalDatabaseBundle,
2247) -> Result<HistoricalReadConnection> {
2248 let tempdir = tempfile::TempDir::new().context("creating temporary salvage directory")?;
2249 let recovered_db = tempdir.path().join("historical-recovered.db");
2250 let temp_conn = FrankenConnection::open(recovered_db.to_string_lossy().as_ref())
2251 .with_context(|| format!("creating recovered database {}", recovered_db.display()))?;
2252 temp_conn
2253 .execute_batch(HISTORICAL_RECOVERY_CORE_SCHEMA)
2254 .with_context(|| format!("initializing recovered schema {}", recovered_db.display()))?;
2255 drop(temp_conn);
2256
2257 let bundle_uri = format!("file:{}?immutable=1", bundle.root_path.to_string_lossy());
2258 let mut recover = Command::new("sqlite3")
2259 .arg(&bundle_uri)
2260 .arg(".recover")
2261 .stdout(Stdio::piped())
2262 .spawn()
2263 .with_context(|| {
2264 format!(
2265 "launching sqlite3 .recover for historical bundle {}",
2266 bundle.root_path.display()
2267 )
2268 })?;
2269 let recover_stdout = recover
2270 .stdout
2271 .take()
2272 .context("capturing sqlite3 .recover stdout")?;
2273
2274 let mut importer = Command::new("sqlite3")
2275 .arg(&recovered_db)
2276 .stdin(Stdio::piped())
2277 .spawn()
2278 .with_context(|| {
2279 format!(
2280 "launching sqlite3 importer for recovered bundle {}",
2281 recovered_db.display()
2282 )
2283 })?;
2284
2285 {
2286 let importer_stdin = importer
2287 .stdin
2288 .as_mut()
2289 .context("opening sqlite3 importer stdin")?;
2290 importer_stdin
2291 .write_all(b"BEGIN;\n")
2292 .context("starting recovery import transaction")?;
2293
2294 let reader = BufReader::new(recover_stdout);
2295 for line in reader.lines() {
2296 let line = line.context("reading sqlite3 .recover output")?;
2297 if is_recoverable_insert_line(&line) {
2298 importer_stdin
2299 .write_all(line.as_bytes())
2300 .context("writing recovered INSERT")?;
2301 importer_stdin
2302 .write_all(b"\n")
2303 .context("writing recovered INSERT newline")?;
2304 }
2305 }
2306
2307 importer_stdin
2308 .write_all(b"COMMIT;\n")
2309 .context("committing recovery import transaction")?;
2310 }
2311
2312 let importer_status = importer
2313 .wait()
2314 .context("waiting for sqlite3 recovery importer")?;
2315 let recover_status = recover
2316 .wait()
2317 .context("waiting for sqlite3 .recover process")?;
2318 if !importer_status.success() {
2319 anyhow::bail!(
2320 "sqlite3 recovery importer exited with status {} for {} after sqlite3 .recover exited with status {}",
2321 importer_status,
2322 recovered_db.display(),
2323 recover_status
2324 );
2325 }
2326
2327 let conn = open_historical_bundle_readonly(&recovered_db)?;
2328 historical_bundle_has_queryable_core_tables(&conn)?;
2329 if !recover_status.success() {
2330 let (conversations, messages) = historical_bundle_counts(&conn)?;
2331 if conversations == 0 && messages == 0 {
2332 anyhow::bail!(
2333 "sqlite3 .recover exited with status {} for {} and recovered no core rows",
2334 recover_status,
2335 bundle.root_path.display()
2336 );
2337 }
2338 tracing::warn!(
2339 path = %bundle.root_path.display(),
2340 status = %recover_status,
2341 conversations,
2342 messages,
2343 "sqlite3 .recover exited nonzero after emitting recoverable core rows; continuing with recovered subset"
2344 );
2345 }
2346 Ok(HistoricalReadConnection {
2347 conn,
2348 method: "sqlite3-recover",
2349 root_path: recovered_db,
2350 _tempdir: Some(tempdir),
2351 })
2352}
2353
2354fn open_historical_bundle_for_salvage(
2355 bundle: &HistoricalDatabaseBundle,
2356) -> Result<HistoricalReadConnection> {
2357 match open_historical_bundle_readonly(&bundle.root_path) {
2358 Ok(conn) => {
2359 if historical_bundle_has_queryable_core_tables(&conn).is_ok() {
2360 return Ok(HistoricalReadConnection {
2361 conn,
2362 method: "direct-readonly",
2363 root_path: bundle.root_path.clone(),
2364 _tempdir: None,
2365 });
2366 }
2367 }
2368 Err(err) => {
2369 tracing::warn!(
2370 path = %bundle.root_path.display(),
2371 error = %err,
2372 "historical bundle direct open failed; falling back to sqlite3 .recover"
2373 );
2374 }
2375 }
2376
2377 recover_historical_bundle_via_sqlite3(bundle)
2378}
2379
2380fn historical_bundle_counts(conn: &FrankenConnection) -> Result<(usize, usize)> {
2381 let conversations: i64 =
2382 conn.query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
2383 row.get_typed(0)
2384 })?;
2385 let messages: i64 = conn.query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
2386 row.get_typed(0)
2387 })?;
2388 Ok((
2389 usize::try_from(conversations.max(0)).unwrap_or(usize::MAX),
2390 usize::try_from(messages.max(0)).unwrap_or(usize::MAX),
2391 ))
2392}
2393
2394fn clear_seeded_runtime_meta(conn: &FrankenConnection) -> Result<()> {
2395 conn.execute(
2396 "DELETE FROM meta
2397 WHERE key LIKE 'historical_bundle_salvaged:%'
2398 OR key IN ('last_scan_ts', 'last_indexed_at', 'last_embedded_message_id')",
2399 )?;
2400 Ok(())
2401}
2402
2403fn record_historical_bundle_import(
2404 conn: &FrankenConnection,
2405 bundle: &HistoricalDatabaseBundle,
2406 method: &str,
2407 conversations_imported: usize,
2408 messages_imported: usize,
2409) -> Result<()> {
2410 let key = FrankenStorage::historical_bundle_meta_key(bundle);
2411 let value = serde_json::json!({
2412 "salvage_version": HISTORICAL_SALVAGE_LEDGER_VERSION,
2413 "path": bundle.root_path.display().to_string(),
2414 "bytes": bundle.total_bytes,
2415 "modified_at_ms": bundle.modified_at_ms,
2416 "method": method,
2417 "conversations_imported": conversations_imported,
2418 "messages_imported": messages_imported,
2419 "recorded_at_ms": FrankenStorage::now_millis(),
2420 });
2421 let value_str = serde_json::to_string(&value)?;
2422 conn.execute_compat(
2423 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
2424 fparams![key, value_str],
2425 )?;
2426 Ok(())
2427}
2428
2429fn scrub_staged_derived_fts_metadata_via_sqlite3(staged_db_path: &Path) -> Result<()> {
2430 let scrub_sql = "PRAGMA writable_schema = ON;
2431 DELETE FROM sqlite_master
2432 WHERE name = 'fts_messages'
2433 OR tbl_name = 'fts_messages'
2434 OR name IN (
2435 'fts_messages_config',
2436 'fts_messages_content',
2437 'fts_messages_data',
2438 'fts_messages_docsize',
2439 'fts_messages_idx'
2440 )
2441 OR tbl_name IN (
2442 'fts_messages_config',
2443 'fts_messages_content',
2444 'fts_messages_data',
2445 'fts_messages_docsize',
2446 'fts_messages_idx'
2447 );
2448 PRAGMA writable_schema = OFF;";
2449
2450 let run_scrub = |disable_defensive: bool| -> Result<std::process::Output> {
2451 let mut command = Command::new("sqlite3");
2452 command.arg("-batch").arg(staged_db_path);
2453 if disable_defensive {
2454 command.arg(".dbconfig defensive off");
2455 }
2456 command.arg(scrub_sql).output().with_context(|| {
2457 format!(
2458 "running sqlite3 staged FTS metadata scrub for {}",
2459 staged_db_path.display()
2460 )
2461 })
2462 };
2463 let render_output = |output: &std::process::Output| -> String {
2464 format!(
2465 "status {}; stdout: {}; stderr: {}",
2466 output.status,
2467 String::from_utf8_lossy(&output.stdout).trim(),
2468 String::from_utf8_lossy(&output.stderr).trim()
2469 )
2470 };
2471
2472 let defensive_off_output = run_scrub(true)?;
2473 if defensive_off_output.status.success() {
2474 return Ok(());
2475 }
2476
2477 let fallback_output = run_scrub(false)?;
2478 if !fallback_output.status.success() {
2479 anyhow::bail!(
2480 "sqlite3 staged FTS metadata scrub failed for {}; defensive-off attempt {}; fallback without .dbconfig {}",
2481 staged_db_path.display(),
2482 render_output(&defensive_off_output),
2483 render_output(&fallback_output)
2484 );
2485 }
2486 Ok(())
2487}
2488
2489fn ensure_seeded_canonical_fts_consistency(staged_db_path: &Path) -> Result<FtsConsistencyRepair> {
2490 match ensure_fts_consistency_via_rusqlite(staged_db_path) {
2491 Ok(repair) => Ok(repair),
2492 Err(err) => {
2493 if fts_messages_integrity_error_from_message(format!("{err:#}")).is_none() {
2494 return Err(err).with_context(|| {
2495 format!(
2496 "repairing staged canonical FTS consistency before finalization: {}",
2497 staged_db_path.display()
2498 )
2499 });
2500 }
2501
2502 tracing::warn!(
2503 path = %staged_db_path.display(),
2504 error = %err,
2505 "staged historical seed has malformed derived FTS metadata; scrubbing and rebuilding FTS on staged copy"
2506 );
2507 scrub_staged_derived_fts_metadata_via_sqlite3(staged_db_path).with_context(|| {
2508 format!(
2509 "scrubbing malformed staged FTS metadata before finalization: {}",
2510 staged_db_path.display()
2511 )
2512 })?;
2513 ensure_fts_consistency_via_rusqlite(staged_db_path).with_context(|| {
2514 format!(
2515 "repairing staged canonical FTS consistency after metadata scrub: {}",
2516 staged_db_path.display()
2517 )
2518 })
2519 }
2520 }
2521}
2522
2523fn finalize_seeded_canonical_bundle_via_rusqlite(
2524 canonical_db_path: &Path,
2525 bundle: &HistoricalDatabaseBundle,
2526) -> Result<(usize, usize)> {
2527 let _fts_repair = ensure_seeded_canonical_fts_consistency(canonical_db_path)?;
2528
2529 let path_str = canonical_db_path.to_string_lossy();
2530 let conn = FrankenConnection::open(path_str.as_ref()).with_context(|| {
2531 format!(
2532 "opening seeded canonical database for post-seed finalization: {}",
2533 canonical_db_path.display()
2534 )
2535 })?;
2536 conn.execute("PRAGMA busy_timeout = 30000;")
2537 .with_context(|| {
2538 format!(
2539 "configuring busy timeout for seeded canonical database {}",
2540 canonical_db_path.display()
2541 )
2542 })?;
2543 let schema_version = read_meta_schema_version(&conn)?;
2544
2545 if let Some(version) = schema_version
2546 && version < CURRENT_SCHEMA_VERSION
2547 && version != 13
2548 {
2549 anyhow::bail!(
2550 "seeded canonical bundle schema_version {version} is too old for baseline import and cannot be finalized automatically"
2551 );
2552 }
2553
2554 clear_seeded_runtime_meta(&conn)?;
2555 let (conversations_imported, messages_imported) = historical_bundle_counts(&conn)?;
2556
2557 conn.execute_compat(
2558 "INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', ?1)",
2559 fparams![CURRENT_SCHEMA_VERSION.to_string()],
2560 )?;
2561
2562 conn.execute_compat(
2563 "INSERT OR IGNORE INTO _schema_migrations(version, name) VALUES(?1, 'fts_contentless')",
2564 fparams![CURRENT_SCHEMA_VERSION],
2565 )?;
2566 record_historical_bundle_import(
2567 &conn,
2568 bundle,
2569 "baseline-bulk-sql-copy",
2570 conversations_imported,
2571 messages_imported,
2572 )?;
2573 Ok((conversations_imported, messages_imported))
2574}
2575
2576fn read_meta_schema_version(conn: &FrankenConnection) -> Result<Option<i64>> {
2577 let version: Option<String> = conn
2578 .query_row_map(
2579 "SELECT value FROM meta WHERE key = 'schema_version'",
2580 fparams![],
2581 |row| row.get_typed(0),
2582 )
2583 .optional()?;
2584 Ok(version.and_then(|raw| raw.parse::<i64>().ok()))
2585}
2586
2587#[cfg(test)]
2588fn franken_fts_schema_rows(conn: &FrankenConnection) -> Result<i64> {
2589 conn.query_row_map(
2590 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
2591 fparams![],
2592 |row| row.get_typed(0),
2593 )
2594 .context("counting sqlite_master rows for fts_messages via frankensqlite")
2595}
2596
2597#[cfg(test)]
2598fn franken_fts_limit_probe(conn: &FrankenConnection) -> bool {
2599 conn.query("SELECT COUNT(*) FROM fts_messages").is_ok()
2600}
2601
2602#[cfg(test)]
2603#[allow(dead_code)]
2604pub(crate) fn probe_database_health_via_frankensqlite(
2605 db_path: &Path,
2606) -> Result<SqliteDatabaseHealthProbe> {
2607 let path_str = db_path.to_string_lossy();
2608 let conn = FrankenConnection::open(path_str.as_ref()).with_context(|| {
2609 format!(
2610 "opening frankensqlite db at {} for database health probe",
2611 db_path.display()
2612 )
2613 })?;
2614 conn.execute_batch("PRAGMA busy_timeout = 30000;")
2615 .with_context(|| {
2616 format!(
2617 "configuring busy timeout for database health probe at {}",
2618 db_path.display()
2619 )
2620 })?;
2621
2622 let schema_version = read_meta_schema_version(&conn)?;
2623 let quick_check_status: String = conn
2624 .query_row_map("PRAGMA quick_check(1)", fparams![], |row| row.get_typed(0))
2625 .with_context(|| format!("running PRAGMA quick_check(1) for {}", db_path.display()))?;
2626 let quick_check_ok = quick_check_status.trim().eq_ignore_ascii_case("ok");
2627 let fts_schema_rows = franken_fts_schema_rows(&conn)?;
2628 let fts_queryable = fts_schema_rows == 1 && franken_fts_limit_probe(&conn);
2629
2630 if !quick_check_ok {
2631 return Ok(SqliteDatabaseHealthProbe {
2632 schema_version,
2633 quick_check_ok,
2634 fts_schema_rows,
2635 fts_queryable,
2636 message_count: 0,
2637 max_message_id: 0,
2638 });
2639 }
2640
2641 let message_count: i64 = conn
2642 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
2643 row.get_typed(0)
2644 })
2645 .context("counting messages during frankensqlite database health probe")?;
2646 let max_message_id: i64 = conn
2647 .query_row_map(
2648 "SELECT COALESCE(MAX(id), 0) FROM messages",
2649 fparams![],
2650 |row| row.get_typed(0),
2651 )
2652 .context("reading max message id during frankensqlite database health probe")?;
2653
2654 Ok(SqliteDatabaseHealthProbe {
2655 schema_version,
2656 quick_check_ok,
2657 fts_schema_rows,
2658 fts_queryable,
2659 message_count,
2660 max_message_id,
2661 })
2662}
2663
2664struct StagedHistoricalSeed {
2665 tempdir: tempfile::TempDir,
2666 db_path: PathBuf,
2667}
2668
2669fn stage_historical_bundle_for_seed(
2670 canonical_db_path: &Path,
2671 source_root_path: &Path,
2672) -> Result<StagedHistoricalSeed> {
2673 let canonical_parent = canonical_db_path.parent().unwrap_or_else(|| Path::new("."));
2674 fs::create_dir_all(canonical_parent).with_context(|| {
2675 format!(
2676 "creating canonical database directory before bulk historical seed import: {}",
2677 canonical_parent.display()
2678 )
2679 })?;
2680 let tempdir = tempfile::TempDir::new_in(canonical_parent)
2681 .context("creating temporary baseline seed directory")?;
2682 let staged_seed_db = tempdir.path().join("baseline-seed-output.db");
2683 copy_database_bundle(source_root_path, &staged_seed_db)?;
2684
2685 Ok(StagedHistoricalSeed {
2686 tempdir,
2687 db_path: staged_seed_db,
2688 })
2689}
2690
2691fn stage_and_finalize_historical_seed(
2692 canonical_db_path: &Path,
2693 bundle: &HistoricalDatabaseBundle,
2694 source_root_path: &Path,
2695) -> Result<(StagedHistoricalSeed, usize, usize)> {
2696 let staged_seed = stage_historical_bundle_for_seed(canonical_db_path, source_root_path)?;
2697 let (conversations_imported, messages_imported) =
2698 finalize_seeded_canonical_bundle_via_rusqlite(&staged_seed.db_path, bundle)?;
2699 Ok((staged_seed, conversations_imported, messages_imported))
2700}
2701
2702fn promote_staged_historical_seed(
2703 canonical_db_path: &Path,
2704 staged_seed: &StagedHistoricalSeed,
2705) -> Result<()> {
2706 let canonical_backup = staged_seed
2707 .tempdir
2708 .path()
2709 .join("pre-seed-canonical-backup.db");
2710 let had_canonical = canonical_db_path.exists()
2711 || database_sidecar_path(canonical_db_path, "-wal").exists()
2712 || database_sidecar_path(canonical_db_path, "-shm").exists();
2713
2714 if had_canonical {
2715 move_database_bundle(canonical_db_path, &canonical_backup).with_context(|| {
2716 format!(
2717 "backing up canonical database before promoting staged historical seed import: {}",
2718 canonical_db_path.display()
2719 )
2720 })?;
2721 }
2722
2723 if let Err(err) =
2724 move_database_bundle(&staged_seed.db_path, canonical_db_path).with_context(|| {
2725 format!(
2726 "promoting staged historical seed database bundle {} into canonical path {}",
2727 staged_seed.db_path.display(),
2728 canonical_db_path.display()
2729 )
2730 })
2731 {
2732 if had_canonical {
2733 let _ = move_database_bundle(&canonical_backup, canonical_db_path);
2734 }
2735 return Err(err);
2736 }
2737
2738 Ok(())
2739}
2740
2741pub(crate) fn seed_canonical_from_best_historical_bundle(
2742 canonical_db_path: &Path,
2743) -> Result<Option<HistoricalSalvageOutcome>> {
2744 let ordered_bundles = discover_historical_database_bundles(canonical_db_path);
2745 let mut last_seed_error: Option<anyhow::Error> = None;
2746 for bundle in ordered_bundles {
2747 if let Some(version) = bundle.probe.schema_version
2748 && version < 13
2749 {
2750 let err = anyhow!(
2751 "historical bundle {} schema_version {version} is too old for baseline import",
2752 bundle.root_path.display()
2753 );
2754 tracing::warn!(
2755 path = %bundle.root_path.display(),
2756 schema_version = version,
2757 "historical bundle is too old for baseline seed import"
2758 );
2759 last_seed_error = Some(err);
2760 continue;
2761 }
2762
2763 let (staged_seed, conversations_imported, messages_imported) =
2764 match stage_and_finalize_historical_seed(canonical_db_path, &bundle, &bundle.root_path)
2765 {
2766 Ok(result) => result,
2767 Err(primary_err) => {
2768 tracing::warn!(
2769 path = %bundle.root_path.display(),
2770 error = %primary_err,
2771 "direct bulk baseline seed from historical bundle failed; trying sqlite3 salvage copy"
2772 );
2773 let source = match open_historical_bundle_for_salvage(&bundle).with_context(
2774 || {
2775 format!(
2776 "opening historical seed bundle {} for baseline import",
2777 bundle.root_path.display()
2778 )
2779 },
2780 ) {
2781 Ok(source) => source,
2782 Err(salvage_err) => {
2783 last_seed_error = Some(anyhow!(
2784 "direct baseline seed from {} failed: {primary_err:#}; sqlite3 salvage open also failed: {salvage_err:#}",
2785 bundle.root_path.display()
2786 ));
2787 continue;
2788 }
2789 };
2790 match stage_and_finalize_historical_seed(
2791 canonical_db_path,
2792 &bundle,
2793 &source.root_path,
2794 ) {
2795 Ok(result) => result,
2796 Err(err) => {
2797 tracing::warn!(
2798 path = %bundle.root_path.display(),
2799 source_path = %source.root_path.display(),
2800 error = %err,
2801 "bulk baseline seed staging from sqlite3-salvaged historical bundle failed; trying next candidate"
2802 );
2803 last_seed_error = Some(err);
2804 continue;
2805 }
2806 }
2807 }
2808 };
2809
2810 if conversations_imported == 0 && messages_imported == 0 {
2811 let err = anyhow!(
2812 "historical bundle {} has no core rows for baseline import",
2813 bundle.root_path.display()
2814 );
2815 tracing::warn!(
2816 path = %bundle.root_path.display(),
2817 "historical bundle has no core rows for baseline seed import"
2818 );
2819 last_seed_error = Some(err);
2820 continue;
2821 }
2822
2823 if let Err(err) = promote_staged_historical_seed(canonical_db_path, &staged_seed) {
2824 tracing::warn!(
2825 path = %bundle.root_path.display(),
2826 error = %err,
2827 "promoting staged historical seed import failed; trying next candidate"
2828 );
2829 last_seed_error = Some(err);
2830 continue;
2831 }
2832
2833 tracing::info!(
2834 path = %bundle.root_path.display(),
2835 conversations_imported,
2836 messages_imported,
2837 "seeded empty canonical database from largest healthy historical bundle"
2838 );
2839
2840 return Ok(Some(HistoricalSalvageOutcome {
2841 bundles_considered: 0,
2842 bundles_imported: 1,
2843 conversations_imported,
2844 messages_imported,
2845 }));
2846 }
2847 if let Some(err) = last_seed_error {
2848 return Err(err);
2849 }
2850 Ok(None)
2851}
2852
2853fn parse_json_column(value: Option<String>) -> serde_json::Value {
2854 value
2855 .and_then(|raw| serde_json::from_str(&raw).ok())
2856 .unwrap_or(serde_json::Value::Null)
2857}
2858
2859const HISTORICAL_RAW_JSON_SENTINEL_KEY: &str = "__cass_historical_raw_json__";
2860
2861fn wrap_historical_raw_json(raw: String) -> serde_json::Value {
2862 serde_json::json!({ HISTORICAL_RAW_JSON_SENTINEL_KEY: raw })
2863}
2864
2865fn historical_raw_json(value: &serde_json::Value) -> Option<&str> {
2866 match value {
2867 serde_json::Value::Object(map) if map.len() == 1 => map
2868 .get(HISTORICAL_RAW_JSON_SENTINEL_KEY)
2869 .and_then(serde_json::Value::as_str),
2870 _ => None,
2871 }
2872}
2873
2874fn parse_historical_json_column(value: Option<String>) -> serde_json::Value {
2875 match value {
2876 Some(raw) if raw.trim().is_empty() => serde_json::Value::Null,
2877 Some(raw) => wrap_historical_raw_json(raw),
2878 None => serde_json::Value::Null,
2879 }
2880}
2881
2882fn historical_salvage_debug_enabled() -> bool {
2883 std::env::var_os("CASS_DEBUG_HISTORICAL_SALVAGE").is_some()
2884}
2885
2886#[derive(Debug, Clone, Copy)]
2887struct HistoricalImportBatchLimits {
2888 conversations: usize,
2889 messages: usize,
2890 payload_chars: usize,
2891}
2892
2893fn env_positive_usize(key: &str) -> Option<usize> {
2894 dotenvy::var(key)
2895 .ok()
2896 .and_then(|value| value.parse::<usize>().ok())
2897 .filter(|value| *value > 0)
2898}
2899
2900fn historical_import_batch_limits() -> HistoricalImportBatchLimits {
2901 let cpu_count = std::thread::available_parallelism()
2902 .map(std::num::NonZeroUsize::get)
2903 .unwrap_or(1);
2904
2905 let default_limits = if cpu_count >= 32 {
2906 HistoricalImportBatchLimits {
2907 conversations: 128,
2908 messages: 16_384,
2909 payload_chars: 12_000_000,
2910 }
2911 } else {
2912 HistoricalImportBatchLimits {
2913 conversations: 32,
2914 messages: 4_096,
2915 payload_chars: 3_000_000,
2916 }
2917 };
2918
2919 HistoricalImportBatchLimits {
2920 conversations: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_CONVERSATIONS")
2921 .unwrap_or(default_limits.conversations),
2922 messages: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_MESSAGES")
2923 .unwrap_or(default_limits.messages),
2924 payload_chars: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_CHARS")
2925 .unwrap_or(default_limits.payload_chars),
2926 }
2927}
2928
2929fn json_value_size_hint(value: &serde_json::Value) -> usize {
2930 if let Some(raw) = historical_raw_json(value) {
2931 return raw.len();
2932 }
2933 match value {
2934 serde_json::Value::Null => 0,
2935 other => serde_json::to_string(other)
2936 .map(|raw| raw.len())
2937 .unwrap_or(0),
2938 }
2939}
2940
2941fn message_payload_size_hint(message: &Message) -> usize {
2942 message
2943 .content
2944 .len()
2945 .saturating_add(json_value_size_hint(&message.extra_json))
2946}
2947
2948fn is_backup_root_name(name: &str, prefix: &str) -> bool {
2949 name.starts_with(prefix) && !name.ends_with("-wal") && !name.ends_with("-shm")
2950}
2951
2952fn has_db_sidecar_suffix(name: &str) -> bool {
2959 const SIDECAR_SUFFIXES: &[&str] = &[
2960 "-wal",
2961 "-shm",
2962 "-lock-shared",
2963 "-lock-reserved",
2964 "-lock-pending",
2965 ];
2966 SIDECAR_SUFFIXES.iter().any(|suffix| name.ends_with(suffix))
2967}
2968
2969pub const CURRENT_SCHEMA_VERSION: i64 = 20;
2971const MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION: i64 = 13;
2972
2973#[derive(Debug, Clone)]
2975pub enum SchemaCheck {
2976 Compatible,
2978 NeedsMigration,
2980 NeedsRebuild(String),
2982}
2983
2984fn schema_check_error_requires_rebuild(err: &frankensqlite::FrankenError) -> bool {
2985 matches!(
2989 err,
2990 frankensqlite::FrankenError::DatabaseCorrupt { .. }
2991 | frankensqlite::FrankenError::WalCorrupt { .. }
2992 | frankensqlite::FrankenError::NotADatabase { .. }
2993 | frankensqlite::FrankenError::ShortRead { .. }
2994 )
2995}
2996
2997fn unique_backup_path(path: &Path) -> PathBuf {
2998 static NEXT_NONCE: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
2999
3000 let timestamp = SystemTime::now()
3001 .duration_since(UNIX_EPOCH)
3002 .map(|d| d.as_nanos())
3003 .unwrap_or(0);
3004 let nonce = NEXT_NONCE.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
3005 let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("db");
3006
3007 path.with_file_name(format!(
3008 "{file_name}.backup.{}.{}.{}",
3009 std::process::id(),
3010 timestamp,
3011 nonce
3012 ))
3013}
3014
3015fn vacuum_stage_backup_path(backup_path: &Path) -> PathBuf {
3016 let file_name = backup_path
3017 .file_name()
3018 .and_then(|name| name.to_str())
3019 .unwrap_or("db.backup");
3020 backup_path.with_file_name(format!(".{file_name}.vacuum-in-progress"))
3021}
3022
3023fn check_schema_compatibility(
3027 path: &Path,
3028) -> std::result::Result<SchemaCheck, frankensqlite::FrankenError> {
3029 let mut conn = open_franken_with_flags(
3030 &path.to_string_lossy(),
3031 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
3032 )?;
3033
3034 let result = (|| {
3035 let meta_exists: i32 = conn.query_row_map(
3037 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='meta'",
3038 fparams![],
3039 |row| row.get_typed(0),
3040 )?;
3041
3042 if meta_exists == 0 {
3043 let table_count: i32 = conn.query_row_map(
3046 "SELECT COUNT(*) FROM sqlite_master WHERE type='table'",
3047 fparams![],
3048 |row| row.get_typed(0),
3049 )?;
3050
3051 if table_count == 0 {
3052 return Ok(SchemaCheck::NeedsMigration);
3054 }
3055
3056 return Ok(SchemaCheck::NeedsRebuild(
3058 "Database missing schema version metadata".to_string(),
3059 ));
3060 }
3061
3062 let version: Option<i64> = conn
3064 .query_row_map(
3065 "SELECT value FROM meta WHERE key = 'schema_version'",
3066 fparams![],
3067 |row| Ok(row.get_typed::<String>(0)?.parse().ok()),
3068 )
3069 .ok()
3070 .flatten();
3071
3072 match version {
3073 Some(v) if v == SCHEMA_VERSION => Ok(SchemaCheck::Compatible),
3074 Some(v) if (MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION..SCHEMA_VERSION).contains(&v) => {
3075 Ok(SchemaCheck::NeedsMigration)
3076 }
3077 Some(v) if v > 0 && v < MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION => {
3078 Ok(SchemaCheck::NeedsRebuild(format!(
3079 "Schema version {} is too old for in-place migration; supported upgrade path starts at version {}",
3080 v, MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION
3081 )))
3082 }
3083 Some(v) => {
3084 Ok(SchemaCheck::NeedsRebuild(format!(
3086 "Schema version {} is newer than supported version {}",
3087 v, SCHEMA_VERSION
3088 )))
3089 }
3090 None => Ok(SchemaCheck::NeedsRebuild(
3091 "Schema version not found or invalid".to_string(),
3092 )),
3093 }
3094 })();
3095
3096 if let Err(close_err) = conn.close_in_place() {
3097 tracing::warn!(
3098 error = %close_err,
3099 db_path = %path.display(),
3100 "check_schema_compatibility: close_in_place failed; falling back to best-effort close"
3101 );
3102 conn.close_best_effort_in_place();
3103 }
3104
3105 result
3106}
3107
3108const SCHEMA_VERSION: i64 = CURRENT_SCHEMA_VERSION;
3109
3110#[cfg(test)]
3111const MIGRATION_V1: &str = r"
3112PRAGMA foreign_keys = ON;
3113
3114CREATE TABLE IF NOT EXISTS meta (
3115 key TEXT PRIMARY KEY,
3116 value TEXT NOT NULL
3117);
3118
3119CREATE TABLE IF NOT EXISTS agents (
3120 id INTEGER PRIMARY KEY,
3121 slug TEXT NOT NULL UNIQUE,
3122 name TEXT NOT NULL,
3123 version TEXT,
3124 kind TEXT NOT NULL,
3125 created_at INTEGER NOT NULL,
3126 updated_at INTEGER NOT NULL
3127);
3128
3129CREATE TABLE IF NOT EXISTS workspaces (
3130 id INTEGER PRIMARY KEY,
3131 path TEXT NOT NULL UNIQUE,
3132 display_name TEXT
3133);
3134
3135CREATE TABLE IF NOT EXISTS conversations (
3136 id INTEGER PRIMARY KEY,
3137 agent_id INTEGER NOT NULL REFERENCES agents(id),
3138 workspace_id INTEGER REFERENCES workspaces(id),
3139 external_id TEXT,
3140 title TEXT,
3141 source_path TEXT NOT NULL,
3142 started_at INTEGER,
3143 ended_at INTEGER,
3144 approx_tokens INTEGER,
3145 metadata_json TEXT,
3146 UNIQUE(agent_id, external_id)
3147);
3148
3149CREATE TABLE IF NOT EXISTS messages (
3150 id INTEGER PRIMARY KEY,
3151 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
3152 idx INTEGER NOT NULL,
3153 role TEXT NOT NULL,
3154 author TEXT,
3155 created_at INTEGER,
3156 content TEXT NOT NULL,
3157 extra_json TEXT,
3158 UNIQUE(conversation_id, idx)
3159);
3160
3161CREATE TABLE IF NOT EXISTS snippets (
3162 id INTEGER PRIMARY KEY,
3163 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
3164 file_path TEXT,
3165 start_line INTEGER,
3166 end_line INTEGER,
3167 language TEXT,
3168 snippet_text TEXT
3169);
3170
3171CREATE TABLE IF NOT EXISTS tags (
3172 id INTEGER PRIMARY KEY,
3173 name TEXT NOT NULL UNIQUE
3174);
3175
3176CREATE TABLE IF NOT EXISTS conversation_tags (
3177 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
3178 tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
3179 PRIMARY KEY (conversation_id, tag_id)
3180);
3181
3182CREATE INDEX IF NOT EXISTS idx_conversations_agent_started
3183 ON conversations(agent_id, started_at DESC);
3184
3185CREATE INDEX IF NOT EXISTS idx_messages_conv_idx
3186 ON messages(conversation_id, idx);
3187
3188";
3189
3190#[cfg(test)]
3191const MIGRATION_V2: &str = r"
3192CREATE VIRTUAL TABLE IF NOT EXISTS fts_messages USING fts5(
3193 content,
3194 title,
3195 agent,
3196 workspace,
3197 source_path,
3198 created_at UNINDEXED,
3199 message_id UNINDEXED,
3200 tokenize='porter'
3201);
3202INSERT INTO fts_messages(content, title, agent, workspace, source_path, created_at, message_id)
3203SELECT
3204 m.content,
3205 c.title,
3206 a.slug,
3207 w.path,
3208 c.source_path,
3209 m.created_at,
3210 m.id
3211FROM messages m
3212JOIN conversations c ON m.conversation_id = c.id
3213JOIN agents a ON c.agent_id = a.id
3214LEFT JOIN workspaces w ON c.workspace_id = w.id;
3215";
3216
3217#[cfg(test)]
3218#[allow(dead_code)]
3219const MIGRATION_V3: &str = r"
3220DROP TABLE IF EXISTS fts_messages;
3221CREATE VIRTUAL TABLE fts_messages USING fts5(
3222 content,
3223 title,
3224 agent,
3225 workspace,
3226 source_path,
3227 created_at UNINDEXED,
3228 message_id UNINDEXED,
3229 tokenize='porter'
3230);
3231INSERT INTO fts_messages(content, title, agent, workspace, source_path, created_at, message_id)
3232SELECT
3233 m.content,
3234 c.title,
3235 a.slug,
3236 w.path,
3237 c.source_path,
3238 m.created_at,
3239 m.id
3240FROM messages m
3241JOIN conversations c ON m.conversation_id = c.id
3242JOIN agents a ON c.agent_id = a.id
3243LEFT JOIN workspaces w ON c.workspace_id = w.id;
3244";
3245
3246#[cfg(test)]
3247const MIGRATION_V4: &str = r"
3248-- Sources table for tracking where conversations come from
3249CREATE TABLE IF NOT EXISTS sources (
3250 id TEXT PRIMARY KEY, -- source_id (e.g., 'local', 'work-laptop')
3251 kind TEXT NOT NULL, -- 'local', 'ssh', etc.
3252 host_label TEXT, -- display label
3253 machine_id TEXT, -- optional stable machine id
3254 platform TEXT, -- 'macos', 'linux', 'windows'
3255 config_json TEXT, -- JSON blob for extra config (SSH params, path rewrites)
3256 created_at INTEGER NOT NULL,
3257 updated_at INTEGER NOT NULL
3258);
3259
3260-- Bootstrap: Insert the default 'local' source
3261INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
3262VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
3263";
3264
3265#[cfg(test)]
3266const MIGRATION_V5: &str = r"
3267-- Add provenance columns to conversations table
3268-- SQLite cannot alter unique constraints, so we need to recreate the table
3269
3270-- Create new table with provenance columns and updated unique constraint
3271CREATE TABLE conversations_new (
3272 id INTEGER PRIMARY KEY,
3273 agent_id INTEGER NOT NULL REFERENCES agents(id),
3274 workspace_id INTEGER REFERENCES workspaces(id),
3275 source_id TEXT NOT NULL DEFAULT 'local' REFERENCES sources(id),
3276 external_id TEXT,
3277 title TEXT,
3278 source_path TEXT NOT NULL,
3279 started_at INTEGER,
3280 ended_at INTEGER,
3281 approx_tokens INTEGER,
3282 metadata_json TEXT,
3283 origin_host TEXT,
3284 UNIQUE(source_id, agent_id, external_id)
3285);
3286
3287-- Copy data from old table (all existing conversations get source_id='local')
3288INSERT INTO conversations_new (id, agent_id, workspace_id, source_id, external_id, title,
3289 source_path, started_at, ended_at, approx_tokens, metadata_json, origin_host)
3290SELECT id, agent_id, workspace_id, 'local', external_id, title,
3291 source_path, started_at, ended_at, approx_tokens, metadata_json, NULL
3292FROM conversations;
3293
3294-- Drop old table and rename new
3295DROP TABLE conversations;
3296ALTER TABLE conversations_new RENAME TO conversations;
3297
3298-- Recreate indexes
3299CREATE INDEX IF NOT EXISTS idx_conversations_agent_started ON conversations(agent_id, started_at DESC);
3300CREATE INDEX IF NOT EXISTS idx_conversations_source_id ON conversations(source_id);
3301";
3302
3303#[cfg(test)]
3304const MIGRATION_V6: &str = r"
3305-- Optimize lookup by source_path (used by TUI detail view)
3306CREATE INDEX IF NOT EXISTS idx_conversations_source_path ON conversations(source_path);
3307";
3308
3309#[cfg(test)]
3310const MIGRATION_V7: &str = r"
3311-- Add binary columns for MessagePack serialization (Opt 3.1)
3312-- Binary format is 50-70% smaller than JSON and faster to parse
3313ALTER TABLE conversations ADD COLUMN metadata_bin BLOB;
3314ALTER TABLE messages ADD COLUMN extra_bin BLOB;
3315";
3316
3317#[cfg(test)]
3318const MIGRATION_V8: &str = r"
3319-- Opt 3.2: Daily stats materialized table for O(1) time-range histograms
3320-- Provides fast aggregated queries for stats/dashboard without full table scans
3321
3322CREATE TABLE IF NOT EXISTS daily_stats (
3323 day_id INTEGER NOT NULL, -- Days since 2020-01-01 (Unix epoch + offset)
3324 agent_slug TEXT NOT NULL, -- 'all' for totals, or specific agent slug
3325 source_id TEXT NOT NULL DEFAULT 'all', -- 'all' for totals, or specific source
3326 session_count INTEGER NOT NULL DEFAULT 0,
3327 message_count INTEGER NOT NULL DEFAULT 0,
3328 total_chars INTEGER NOT NULL DEFAULT 0,
3329 last_updated INTEGER NOT NULL,
3330 PRIMARY KEY (day_id, agent_slug, source_id)
3331);
3332
3333CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
3334CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
3335";
3336
3337#[cfg(test)]
3338const MIGRATION_V9: &str = r"
3339-- Background embedding jobs tracking table
3340CREATE TABLE IF NOT EXISTS embedding_jobs (
3341 id INTEGER PRIMARY KEY AUTOINCREMENT,
3342 db_path TEXT NOT NULL,
3343 model_id TEXT NOT NULL,
3344 status TEXT NOT NULL DEFAULT 'pending',
3345 total_docs INTEGER NOT NULL DEFAULT 0,
3346 completed_docs INTEGER NOT NULL DEFAULT 0,
3347 error_message TEXT,
3348 created_at TEXT NOT NULL DEFAULT (datetime('now')),
3349 started_at TEXT,
3350 completed_at TEXT
3351);
3352
3353-- Only one pending or running job per (db_path, model_id) at a time.
3354-- Multiple completed/failed/cancelled jobs are allowed for history.
3355CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
3356ON embedding_jobs(db_path, model_id)
3357WHERE status IN ('pending', 'running');
3358";
3359
3360#[cfg(test)]
3361const MIGRATION_V10: &str = r"
3362-- Token analytics: per-message token usage ledger
3363CREATE TABLE IF NOT EXISTS token_usage (
3364 id INTEGER PRIMARY KEY AUTOINCREMENT,
3365 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
3366 conversation_id INTEGER NOT NULL,
3367 agent_id INTEGER NOT NULL,
3368 workspace_id INTEGER,
3369 source_id TEXT NOT NULL DEFAULT 'local',
3370
3371 -- Timing
3372 timestamp_ms INTEGER NOT NULL,
3373 day_id INTEGER NOT NULL,
3374
3375 -- Model identification
3376 model_name TEXT,
3377 model_family TEXT,
3378 model_tier TEXT,
3379 service_tier TEXT,
3380 provider TEXT,
3381
3382 -- Token counts (nullable — not all agents provide all fields)
3383 input_tokens INTEGER,
3384 output_tokens INTEGER,
3385 cache_read_tokens INTEGER,
3386 cache_creation_tokens INTEGER,
3387 thinking_tokens INTEGER,
3388 total_tokens INTEGER,
3389
3390 -- Cost estimation
3391 estimated_cost_usd REAL,
3392
3393 -- Message context
3394 role TEXT NOT NULL,
3395 content_chars INTEGER NOT NULL,
3396 has_tool_calls INTEGER NOT NULL DEFAULT 0,
3397 tool_call_count INTEGER NOT NULL DEFAULT 0,
3398
3399 -- Data quality
3400 data_source TEXT NOT NULL DEFAULT 'api',
3401
3402 UNIQUE(message_id)
3403);
3404
3405CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
3406CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
3407CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
3408CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
3409CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
3410
3411-- Token analytics: pre-aggregated daily rollups
3412CREATE TABLE IF NOT EXISTS token_daily_stats (
3413 day_id INTEGER NOT NULL,
3414 agent_slug TEXT NOT NULL,
3415 source_id TEXT NOT NULL DEFAULT 'all',
3416 model_family TEXT NOT NULL DEFAULT 'all',
3417
3418 api_call_count INTEGER NOT NULL DEFAULT 0,
3419 user_message_count INTEGER NOT NULL DEFAULT 0,
3420 assistant_message_count INTEGER NOT NULL DEFAULT 0,
3421 tool_message_count INTEGER NOT NULL DEFAULT 0,
3422
3423 total_input_tokens INTEGER NOT NULL DEFAULT 0,
3424 total_output_tokens INTEGER NOT NULL DEFAULT 0,
3425 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
3426 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
3427 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
3428 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
3429
3430 total_content_chars INTEGER NOT NULL DEFAULT 0,
3431 total_tool_calls INTEGER NOT NULL DEFAULT 0,
3432
3433 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
3434
3435 session_count INTEGER NOT NULL DEFAULT 0,
3436
3437 last_updated INTEGER NOT NULL,
3438
3439 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
3440);
3441
3442CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
3443CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
3444
3445-- Model pricing lookup table
3446CREATE TABLE IF NOT EXISTS model_pricing (
3447 model_pattern TEXT NOT NULL,
3448 provider TEXT NOT NULL,
3449 input_cost_per_mtok REAL NOT NULL,
3450 output_cost_per_mtok REAL NOT NULL,
3451 cache_read_cost_per_mtok REAL,
3452 cache_creation_cost_per_mtok REAL,
3453 effective_date TEXT NOT NULL,
3454 PRIMARY KEY (model_pattern, effective_date)
3455);
3456
3457-- Seed with current pricing (as of 2026-02)
3458INSERT OR IGNORE INTO model_pricing VALUES
3459 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
3460 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
3461 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
3462 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
3463 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
3464 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
3465 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
3466 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
3467 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
3468 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
3469
3470-- Extend conversations table with token summary columns
3471ALTER TABLE conversations ADD COLUMN total_input_tokens INTEGER;
3472ALTER TABLE conversations ADD COLUMN total_output_tokens INTEGER;
3473ALTER TABLE conversations ADD COLUMN total_cache_read_tokens INTEGER;
3474ALTER TABLE conversations ADD COLUMN total_cache_creation_tokens INTEGER;
3475ALTER TABLE conversations ADD COLUMN grand_total_tokens INTEGER;
3476ALTER TABLE conversations ADD COLUMN estimated_cost_usd REAL;
3477ALTER TABLE conversations ADD COLUMN primary_model TEXT;
3478ALTER TABLE conversations ADD COLUMN api_call_count INTEGER;
3479ALTER TABLE conversations ADD COLUMN tool_call_count INTEGER;
3480ALTER TABLE conversations ADD COLUMN user_message_count INTEGER;
3481ALTER TABLE conversations ADD COLUMN assistant_message_count INTEGER;
3482";
3483
3484const MIGRATION_V14: &str = r"
3485-- Switch FTS5 from internal-content to contentless mode (CASS #163).
3486-- Drop the old V13 internal-content fts_messages first so that
3487-- sqlite_schema does not contain two conflicting CREATE VIRTUAL TABLE
3488-- entries, which makes the database completely unreadable.
3489-- The current contentless table is recreated lazily after open() only when the
3490-- frankensqlite FTS consistency check finds it missing or malformed.
3491DROP TABLE IF EXISTS fts_messages;
3492";
3493
3494const MIGRATION_V15_TAIL_STATE_TABLE: &str = r"
3495CREATE TABLE IF NOT EXISTS conversation_tail_state (
3496 -- Deliberately no FOREIGN KEY: this hot row is maintained by insert/append
3497 -- paths, and FK metadata keeps frankensqlite off the direct rowid update path.
3498 conversation_id INTEGER PRIMARY KEY,
3499 ended_at INTEGER,
3500 last_message_idx INTEGER,
3501 last_message_created_at INTEGER
3502);
3503";
3504
3505const MIGRATION_V16: &str = r"
3506-- UNIQUE(conversation_id, idx) already creates sqlite_autoindex_messages_1,
3507-- which covers the same lookup/order key as idx_messages_conv_idx. Keeping both
3508-- doubles message insert index maintenance on the hot indexing path.
3509DROP INDEX IF EXISTS idx_messages_conv_idx;
3510";
3511
3512const MIGRATION_V17: &str = r"
3513-- Drop the global messages(created_at) secondary index from the ingest hot
3514-- path. Search/time filters are served by the derived search layer and
3515-- conversation/analytics indexes, while this index is maintained on every
3516-- message insert.
3517DROP INDEX IF EXISTS idx_messages_created;
3518";
3519
3520const MIGRATION_V18: &str = r"
3521-- Move append-tail state out of the wide, indexed conversations row. The hot
3522-- append path updates this cache for every appended conversation; keeping it in
3523-- a tiny rowid table avoids rewriting the large conversation record.
3524CREATE TABLE IF NOT EXISTS conversation_tail_state (
3525 -- Deliberately no FOREIGN KEY: this hot row is maintained by insert/append
3526 -- paths, and FK metadata keeps frankensqlite off the direct rowid update path.
3527 conversation_id INTEGER PRIMARY KEY,
3528 ended_at INTEGER,
3529 last_message_idx INTEGER,
3530 last_message_created_at INTEGER
3531);
3532
3533INSERT OR REPLACE INTO conversation_tail_state (
3534 conversation_id, ended_at, last_message_idx, last_message_created_at
3535)
3536SELECT id, ended_at, last_message_idx, last_message_created_at
3537FROM conversations
3538WHERE ended_at IS NOT NULL
3539 OR last_message_idx IS NOT NULL
3540 OR last_message_created_at IS NOT NULL;
3541";
3542
3543const MIGRATION_V19: &str = r"
3544-- Materialize external conversation provenance into one compact lookup key.
3545-- This keeps the hot append/new-conversation probe on a single primary-key
3546-- lookup instead of a composite conversations-table predicate.
3547CREATE TABLE IF NOT EXISTS conversation_external_lookup (
3548 lookup_key TEXT PRIMARY KEY,
3549 conversation_id INTEGER NOT NULL
3550);
3551
3552INSERT OR REPLACE INTO conversation_external_lookup (lookup_key, conversation_id)
3553SELECT
3554 CAST(length(source_id) AS TEXT) || ':' || source_id || ':' ||
3555 CAST(agent_id AS TEXT) || ':' ||
3556 CAST(length(external_id) AS TEXT) || ':' || external_id,
3557 id
3558FROM conversations
3559WHERE external_id IS NOT NULL;
3560";
3561
3562const MIGRATION_V20: &str = r"
3563-- Fuse external conversation lookup with append-tail state. Append-heavy
3564-- workloads can resolve both the conversation id and tail plan from one
3565-- primary-key probe.
3566CREATE TABLE IF NOT EXISTS conversation_external_tail_lookup (
3567 lookup_key TEXT PRIMARY KEY,
3568 conversation_id INTEGER NOT NULL,
3569 ended_at INTEGER,
3570 last_message_idx INTEGER,
3571 last_message_created_at INTEGER
3572);
3573
3574INSERT OR REPLACE INTO conversation_external_tail_lookup (
3575 lookup_key,
3576 conversation_id,
3577 ended_at,
3578 last_message_idx,
3579 last_message_created_at
3580)
3581SELECT
3582 CAST(length(c.source_id) AS TEXT) || ':' || c.source_id || ':' ||
3583 CAST(c.agent_id AS TEXT) || ':' ||
3584 CAST(length(c.external_id) AS TEXT) || ':' || c.external_id,
3585 c.id,
3586 (SELECT ts.ended_at
3587 FROM conversation_tail_state ts
3588 WHERE ts.conversation_id = c.id),
3589 (SELECT ts.last_message_idx
3590 FROM conversation_tail_state ts
3591 WHERE ts.conversation_id = c.id),
3592 (SELECT ts.last_message_created_at
3593 FROM conversation_tail_state ts
3594 WHERE ts.conversation_id = c.id)
3595FROM conversations c
3596WHERE c.external_id IS NOT NULL;
3597";
3598
3599#[derive(Debug, Clone)]
3601pub struct EmbeddingJobRow {
3602 pub id: i64,
3603 pub db_path: String,
3604 pub model_id: String,
3605 pub status: String,
3606 pub total_docs: i64,
3607 pub completed_docs: i64,
3608 pub error_message: Option<String>,
3609 pub created_at: String,
3610 pub started_at: Option<String>,
3611 pub completed_at: Option<String>,
3612}
3613
3614#[derive(Debug, Clone)]
3621pub struct LexicalRebuildConversationRow {
3622 pub id: Option<i64>,
3623 pub agent_slug: String,
3624 pub workspace: Option<PathBuf>,
3625 pub external_id: Option<String>,
3626 pub title: Option<String>,
3627 pub source_path: PathBuf,
3628 pub started_at: Option<i64>,
3629 pub ended_at: Option<i64>,
3630 pub source_id: String,
3631 pub origin_host: Option<String>,
3632}
3633
3634#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3637pub struct LexicalRebuildConversationFootprintRow {
3638 pub conversation_id: i64,
3639 pub message_count: usize,
3640 pub message_bytes: usize,
3641}
3642
3643pub(crate) const LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE: usize = 4 * 1024;
3644const LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT: usize = 64;
3645
3646fn lexical_rebuild_tail_metadata_coverage_is_sufficient(
3647 total_conversations: usize,
3648 covered_conversations: usize,
3649) -> bool {
3650 total_conversations == 0
3651 || total_conversations.saturating_sub(covered_conversations.min(total_conversations))
3652 <= LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT
3653}
3654
3655fn lexical_rebuild_message_count_from_tail_idx(last_message_idx: Option<i64>) -> Option<usize> {
3656 let last_message_idx = u64::try_from(last_message_idx?).ok()?;
3657 let high_water = last_message_idx.checked_add(1)?;
3658 usize::try_from(high_water).ok()
3659}
3660
3661fn lexical_rebuild_conversation_footprint_from_count(
3662 conversation_id: i64,
3663 message_count: usize,
3664) -> LexicalRebuildConversationFootprintRow {
3665 LexicalRebuildConversationFootprintRow {
3666 conversation_id,
3667 message_count,
3668 message_bytes: message_count
3669 .saturating_mul(LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE),
3670 }
3671}
3672
3673#[derive(Debug, Clone)]
3675pub struct LexicalRebuildMessageRow {
3676 pub conversation_id: i64,
3677 pub id: i64,
3678 pub idx: i64,
3679 pub role: String,
3680 pub author: Option<String>,
3681 pub created_at: Option<i64>,
3682 pub content: String,
3683}
3684
3685#[derive(Debug, Clone, PartialEq, Eq)]
3689pub struct LexicalRebuildGroupedMessageRow {
3690 pub idx: i64,
3691 pub is_tool_role: bool,
3692 pub created_at: Option<i64>,
3693 pub content: String,
3694}
3695
3696pub type LexicalRebuildGroupedMessageRows = SmallVec<[LexicalRebuildGroupedMessageRow; 32]>;
3697
3698pub type SqliteStorage = FrankenStorage;
3700
3701pub struct FrankenStorage {
3703 conn: FrankenConnection,
3704 db_path: PathBuf,
3705 ephemeral_writer_preflight_verified: AtomicBool,
3706 index_writer_checkpoint_pages: AtomicI64,
3707 index_writer_busy_timeout_ms: AtomicU64,
3708 cached_ephemeral_writer: parking_lot::Mutex<CachedEphemeralWriter>,
3709 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3710 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3711 ensured_conversation_sources: Arc<parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>>,
3712 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3713 fts_messages_present_cache: AtomicI8,
3714}
3715
3716const DEFAULT_WAL_AUTOCHECKPOINT_PAGES: i64 = 4096;
3720const UNSET_INDEX_WRITER_CHECKPOINT_PAGES: i64 = i64::MIN;
3721const UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS: u64 = 0;
3722const FTS_MESSAGES_PRESENT_UNKNOWN: i8 = 0;
3723const FTS_MESSAGES_PRESENT_ABSENT: i8 = 1;
3724const FTS_MESSAGES_PRESENT_PRESENT: i8 = 2;
3725
3726enum CachedEphemeralWriter {
3727 Uninitialized,
3728 Cached(Box<SendFrankenConnection>),
3729 InUse,
3730}
3731
3732#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3733struct EnsuredAgentKey {
3734 slug: String,
3735 name: String,
3736 version: Option<String>,
3737 kind: String,
3738}
3739
3740impl EnsuredAgentKey {
3741 fn from_agent(agent: &Agent) -> Self {
3742 Self {
3743 slug: agent.slug.clone(),
3744 name: agent.name.clone(),
3745 version: agent.version.clone(),
3746 kind: agent_kind_str(agent.kind.clone()),
3747 }
3748 }
3749}
3750
3751#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3752struct EnsuredWorkspaceKey {
3753 path: String,
3754 display_name: Option<String>,
3755}
3756
3757impl EnsuredWorkspaceKey {
3758 fn new(path: String, display_name: Option<&str>) -> Self {
3759 Self {
3760 path,
3761 display_name: display_name.map(str::to_owned),
3762 }
3763 }
3764}
3765
3766#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3767struct EnsuredConversationSourceKey {
3768 id: String,
3769 kind: SourceKind,
3770 host_label: Option<String>,
3771}
3772
3773impl EnsuredConversationSourceKey {
3774 fn from_source(source: &Source) -> Self {
3775 Self {
3776 id: source.id.clone(),
3777 kind: source.kind,
3778 host_label: source.host_label.clone(),
3779 }
3780 }
3781}
3782
3783#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3784struct EnsuredDailyStatsKey {
3785 day_id: i64,
3786 agent_slug: String,
3787 source_id: String,
3788}
3789
3790impl EnsuredDailyStatsKey {
3791 fn new(day_id: i64, agent_slug: &str, source_id: &str) -> Self {
3792 Self {
3793 day_id,
3794 agent_slug: agent_slug.to_owned(),
3795 source_id: source_id.to_owned(),
3796 }
3797 }
3798}
3799
3800const AUTOCOMMIT_RETAIN_OFF_PRAGMAS: [&str; 2] = [
3801 "PRAGMA fsqlite.autocommit_retain = OFF;",
3802 "PRAGMA autocommit_retain = OFF;",
3803];
3804
3805fn disable_autocommit_retain<E>(
3806 mut execute: impl FnMut(&'static str) -> std::result::Result<(), E>,
3807) -> Result<&'static str>
3808where
3809 E: std::fmt::Display,
3810{
3811 let mut failures = Vec::new();
3812 for pragma in AUTOCOMMIT_RETAIN_OFF_PRAGMAS {
3813 match execute(pragma) {
3814 Ok(()) => return Ok(pragma),
3815 Err(err) => {
3816 let error = err.to_string();
3817 tracing::debug!(
3818 %pragma,
3819 error = %error,
3820 "autocommit_retain PRAGMA variant not supported"
3821 );
3822 failures.push(format!("{pragma}: {error}"));
3823 }
3824 }
3825 }
3826
3827 Err(anyhow!(
3828 "failed to disable autocommit_retain on frankensqlite connection; \
3829 refusing to keep a long-lived MVCC connection that may accumulate \
3830 unbounded write snapshots. Upgrade frankensqlite to a version that \
3831 supports one of these PRAGMAs or use a short-lived connection path. \
3832 attempts: {}",
3833 failures.join("; ")
3834 ))
3835}
3836
3837pub(crate) fn error_message_indicates_populated_fts_shadow_without_rowid_reload(
3838 message: &str,
3839) -> bool {
3840 let lower = message.to_ascii_lowercase();
3841 let mentions_populated_without_rowid_shadow = (lower
3842 .contains("loading populated without rowid table")
3843 || lower.contains("reloading populated without rowid table"))
3844 && (lower.contains("table `fts_messages_") || lower.contains("table fts_messages_"));
3845
3846 mentions_populated_without_rowid_shadow && lower.contains("not yet supported")
3847}
3848
3849impl FrankenStorage {
3850 fn new(conn: FrankenConnection, db_path: PathBuf) -> Self {
3851 Self::new_with_shared_caches(
3852 conn,
3853 db_path,
3854 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3855 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3856 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3857 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3858 )
3859 }
3860
3861 fn new_with_shared_caches(
3862 conn: FrankenConnection,
3863 db_path: PathBuf,
3864 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3865 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3866 ensured_conversation_sources: Arc<
3867 parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>,
3868 >,
3869 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3870 ) -> Self {
3871 Self {
3872 conn,
3873 db_path,
3874 ephemeral_writer_preflight_verified: AtomicBool::new(false),
3875 index_writer_checkpoint_pages: AtomicI64::new(UNSET_INDEX_WRITER_CHECKPOINT_PAGES),
3876 index_writer_busy_timeout_ms: AtomicU64::new(UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS),
3877 cached_ephemeral_writer: parking_lot::Mutex::new(CachedEphemeralWriter::Uninitialized),
3878 ensured_agents,
3879 ensured_workspaces,
3880 ensured_conversation_sources,
3881 ensured_daily_stats_keys,
3882 fts_messages_present_cache: AtomicI8::new(FTS_MESSAGES_PRESENT_UNKNOWN),
3883 }
3884 }
3885
3886 fn apply_open_stage_busy_timeout(&self) {
3887 if let Err(err) = self.conn.execute("PRAGMA busy_timeout = 5000;") {
3888 tracing::debug!(
3889 error = %err,
3890 "failed to apply open-stage busy_timeout before migrations"
3891 );
3892 }
3893 }
3894
3895 pub fn open(path: &Path) -> Result<Self> {
3901 if let Some(parent) = path.parent() {
3902 fs::create_dir_all(parent)
3903 .with_context(|| format!("creating db directory {}", parent.display()))?;
3904 }
3905
3906 let path_str = path.to_string_lossy().to_string();
3907 let _doctor_guard =
3908 acquire_doctor_mutation_db_open_guard(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)?;
3909 let conn = FrankenConnection::open(&path_str)
3910 .with_context(|| format!("opening frankensqlite db at {}", path.display()))?;
3911 let storage = Self::new(conn, path.to_path_buf());
3912 storage.apply_open_stage_busy_timeout();
3913 storage.run_migrations()?;
3914 storage.repair_missing_current_schema_objects()?;
3915 storage.apply_config()?;
3916 storage.set_fts_messages_present_cache(true);
3917 Ok(storage)
3918 }
3919
3920 pub fn open_writer(path: &Path) -> Result<Self> {
3926 Self::open_writer_with_shared_caches(
3927 path,
3928 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3929 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3930 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3931 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3932 )
3933 }
3934
3935 fn open_writer_with_shared_caches(
3936 path: &Path,
3937 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3938 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3939 ensured_conversation_sources: Arc<
3940 parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>,
3941 >,
3942 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3943 ) -> Result<Self> {
3944 let path_str = path.to_string_lossy().to_string();
3945 let _doctor_guard =
3946 acquire_doctor_mutation_db_open_guard(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)?;
3947 let conn = FrankenConnection::open(&path_str)
3948 .with_context(|| format!("opening frankensqlite writer at {}", path.display()))?;
3949 let storage = Self::new_with_shared_caches(
3950 conn,
3951 path.to_path_buf(),
3952 ensured_agents,
3953 ensured_workspaces,
3954 ensured_conversation_sources,
3955 ensured_daily_stats_keys,
3956 );
3957 storage.apply_config()?;
3958 storage.set_fts_messages_present_cache(true);
3959 Ok(storage)
3960 }
3961
3962 pub(crate) fn acquire_cached_ephemeral_writer(&self) -> Result<(Self, bool)> {
3963 let mut cached = self.cached_ephemeral_writer.lock();
3964 match std::mem::replace(&mut *cached, CachedEphemeralWriter::InUse) {
3965 CachedEphemeralWriter::Cached(conn) => {
3966 let (conn, checkpoint_pages, busy_timeout_ms) = (*conn).into_parts();
3967 let writer = Self::new_with_shared_caches(
3968 conn,
3969 self.db_path.clone(),
3970 Arc::clone(&self.ensured_agents),
3971 Arc::clone(&self.ensured_workspaces),
3972 Arc::clone(&self.ensured_conversation_sources),
3973 Arc::clone(&self.ensured_daily_stats_keys),
3974 );
3975 writer
3976 .index_writer_checkpoint_pages
3977 .store(checkpoint_pages, Ordering::Relaxed);
3978 writer
3979 .index_writer_busy_timeout_ms
3980 .store(busy_timeout_ms, Ordering::Relaxed);
3981 writer.set_fts_messages_present_cache(true);
3982 Ok((writer, true))
3983 }
3984 CachedEphemeralWriter::Uninitialized => {
3985 drop(cached);
3986 match Self::open_writer_with_shared_caches(
3987 &self.db_path,
3988 Arc::clone(&self.ensured_agents),
3989 Arc::clone(&self.ensured_workspaces),
3990 Arc::clone(&self.ensured_conversation_sources),
3991 Arc::clone(&self.ensured_daily_stats_keys),
3992 ) {
3993 Ok(writer) => Ok((writer, true)),
3994 Err(err) => {
3995 let mut cached = self.cached_ephemeral_writer.lock();
3996 if matches!(&*cached, CachedEphemeralWriter::InUse) {
3997 *cached = CachedEphemeralWriter::Uninitialized;
3998 }
3999 Err(err)
4000 }
4001 }
4002 }
4003 CachedEphemeralWriter::InUse => {
4004 *cached = CachedEphemeralWriter::InUse;
4005 drop(cached);
4006 Ok((
4007 Self::open_writer_with_shared_caches(
4008 &self.db_path,
4009 Arc::clone(&self.ensured_agents),
4010 Arc::clone(&self.ensured_workspaces),
4011 Arc::clone(&self.ensured_conversation_sources),
4012 Arc::clone(&self.ensured_daily_stats_keys),
4013 )?,
4014 false,
4015 ))
4016 }
4017 }
4018 }
4019
4020 pub(crate) fn release_cached_ephemeral_writer(&self, writer: Self) {
4021 let checkpoint_pages = writer.index_writer_checkpoint_pages.load(Ordering::Relaxed);
4022 let busy_timeout_ms = writer.index_writer_busy_timeout_ms.load(Ordering::Relaxed);
4023 let conn = writer.into_raw();
4024 let mut cached = self.cached_ephemeral_writer.lock();
4025 debug_assert!(
4026 matches!(&*cached, CachedEphemeralWriter::InUse),
4027 "cached ephemeral writer state should be in-use when releasing"
4028 );
4029 *cached = CachedEphemeralWriter::Cached(Box::new(
4030 SendFrankenConnection::new_with_index_writer_state(
4031 conn,
4032 checkpoint_pages,
4033 busy_timeout_ms,
4034 ),
4035 ));
4036 }
4037
4038 pub(crate) fn discard_cached_ephemeral_writer(&self, mut writer: Self) {
4039 writer.close_best_effort_in_place();
4040 let mut cached = self.cached_ephemeral_writer.lock();
4041 if matches!(&*cached, CachedEphemeralWriter::InUse) {
4042 *cached = CachedEphemeralWriter::Uninitialized;
4043 }
4044 }
4045
4046 fn cached_agent_id(&self, key: &EnsuredAgentKey) -> Option<i64> {
4047 self.ensured_agents.lock().get(key).copied()
4048 }
4049
4050 fn mark_agent_ensured(&self, key: EnsuredAgentKey, id: i64) {
4051 self.ensured_agents.lock().insert(key, id);
4052 }
4053
4054 fn cached_workspace_id(&self, key: &EnsuredWorkspaceKey) -> Option<i64> {
4055 self.ensured_workspaces.lock().get(key).copied()
4056 }
4057
4058 fn mark_workspace_ensured(&self, key: EnsuredWorkspaceKey, id: i64) {
4059 self.ensured_workspaces.lock().insert(key, id);
4060 }
4061
4062 fn conversation_source_already_ensured(&self, key: &EnsuredConversationSourceKey) -> bool {
4063 self.ensured_conversation_sources.lock().contains(key)
4064 }
4065
4066 fn mark_conversation_source_ensured(&self, key: EnsuredConversationSourceKey) {
4067 self.ensured_conversation_sources.lock().insert(key);
4068 }
4069
4070 fn daily_stats_key_already_ensured(&self, key: &EnsuredDailyStatsKey) -> bool {
4071 self.ensured_daily_stats_keys.lock().contains(key)
4072 }
4073
4074 fn daily_stats_keys_already_ensured(&self, keys: &[EnsuredDailyStatsKey; 4]) -> bool {
4075 let ensured = self.ensured_daily_stats_keys.lock();
4076 keys.iter().all(|key| ensured.contains(key))
4077 }
4078
4079 fn mark_daily_stats_key_ensured(&self, key: EnsuredDailyStatsKey) {
4080 self.ensured_daily_stats_keys.lock().insert(key);
4081 }
4082
4083 fn fts_messages_present_cached(&self, tx: &FrankenTransaction<'_>) -> bool {
4084 match self.fts_messages_present_cache.load(Ordering::Acquire) {
4085 FTS_MESSAGES_PRESENT_PRESENT => return true,
4086 FTS_MESSAGES_PRESENT_ABSENT => return false,
4087 _ => {}
4088 }
4089
4090 let present = tx
4091 .query_row_map(
4092 "SELECT COUNT(*) FROM sqlite_master
4093 WHERE name = 'fts_messages'
4094 AND rootpage > 0",
4095 fparams![],
4096 |row| row.get_typed::<i64>(0),
4097 )
4098 .map(|count| count > 0)
4099 .unwrap_or_else(|err| {
4100 tracing::debug!(
4101 error = %err,
4102 "failed to probe fts_messages presence; skipping db-resident FTS maintenance"
4103 );
4104 false
4105 });
4106 self.set_fts_messages_present_cache(present);
4107 present
4108 }
4109
4110 fn set_fts_messages_present_cache(&self, present: bool) {
4111 self.fts_messages_present_cache.store(
4112 if present {
4113 FTS_MESSAGES_PRESENT_PRESENT
4114 } else {
4115 FTS_MESSAGES_PRESENT_ABSENT
4116 },
4117 Ordering::Release,
4118 );
4119 }
4120
4121 fn invalidate_fts_messages_present_cache(&self) {
4122 self.fts_messages_present_cache
4123 .store(FTS_MESSAGES_PRESENT_UNKNOWN, Ordering::Release);
4124 }
4125
4126 fn invalidate_conversation_source_cache(&self, source_id: &str) {
4127 self.ensured_conversation_sources
4128 .lock()
4129 .retain(|key| key.id != source_id);
4130 }
4131
4132 fn close_cached_ephemeral_writer_best_effort_in_place(&mut self) {
4133 let cached = self.cached_ephemeral_writer.get_mut();
4134 if let CachedEphemeralWriter::Cached(conn) =
4135 std::mem::replace(cached, CachedEphemeralWriter::Uninitialized)
4136 {
4137 let mut conn = conn;
4138 conn.0.close_best_effort_in_place();
4139 }
4140 }
4141
4142 fn close_cached_ephemeral_writer_without_checkpoint_in_place(&mut self) -> Result<()> {
4143 let cached = self.cached_ephemeral_writer.get_mut();
4144 match std::mem::replace(cached, CachedEphemeralWriter::Uninitialized) {
4145 CachedEphemeralWriter::Cached(mut conn) => conn
4146 .0
4147 .close_without_checkpoint_in_place()
4148 .with_context(|| "closing cached frankensqlite writer without final checkpoint"),
4149 CachedEphemeralWriter::Uninitialized | CachedEphemeralWriter::InUse => Ok(()),
4150 }
4151 }
4152
4153 pub fn open_readonly(path: &Path) -> Result<Self> {
4155 Self::open_readonly_with_doctor_lock_timeout(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)
4156 }
4157
4158 pub fn open_readonly_with_doctor_lock_timeout(path: &Path, timeout: Duration) -> Result<Self> {
4163 let path_str = path.to_string_lossy().to_string();
4164 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
4165 let conn = open_franken_with_flags(&path_str, FrankenOpenFlags::SQLITE_OPEN_READ_ONLY)
4166 .with_context(|| format!("opening frankensqlite db readonly at {}", path.display()))?;
4167 let storage = Self::new(conn, path.to_path_buf());
4168 storage.apply_readonly_config()?;
4169 Ok(storage)
4170 }
4171
4172 pub fn close(self) -> Result<()> {
4173 let mut this = self;
4174 this.close_cached_ephemeral_writer_best_effort_in_place();
4175 this.conn
4176 .close()
4177 .with_context(|| "closing frankensqlite connection")
4178 }
4179
4180 pub fn close_without_checkpoint(self) -> Result<()> {
4181 let mut this = self;
4182 this.close_cached_ephemeral_writer_without_checkpoint_in_place()?;
4183 this.conn
4184 .close_without_checkpoint()
4185 .with_context(|| "closing frankensqlite connection without final checkpoint")
4186 }
4187
4188 pub fn close_best_effort_in_place(&mut self) {
4189 self.close_cached_ephemeral_writer_best_effort_in_place();
4190 self.conn.close_best_effort_in_place();
4191 }
4192
4193 pub fn close_without_checkpoint_in_place(&mut self) -> Result<()> {
4194 self.close_cached_ephemeral_writer_without_checkpoint_in_place()?;
4195 self.conn
4196 .close_without_checkpoint_in_place()
4197 .with_context(|| "closing frankensqlite connection without final checkpoint")
4198 }
4199
4200 pub fn raw(&self) -> &FrankenConnection {
4202 &self.conn
4203 }
4204
4205 pub fn into_raw(self) -> FrankenConnection {
4208 let mut this = self;
4209 this.close_cached_ephemeral_writer_best_effort_in_place();
4210 this.conn
4211 }
4212
4213 pub fn apply_config(&self) -> Result<()> {
4220 self.conn
4224 .execute("PRAGMA journal_mode = WAL;")
4225 .with_context(|| "setting journal_mode")?;
4226 self.conn
4227 .execute("PRAGMA synchronous = NORMAL;")
4228 .with_context(|| "setting synchronous")?;
4229
4230 self.conn
4232 .execute("PRAGMA cache_size = -65536;")
4233 .with_context(|| "setting cache_size")?;
4234
4235 self.conn
4237 .execute("PRAGMA foreign_keys = ON;")
4238 .with_context(|| "setting foreign_keys")?;
4239
4240 self.conn
4242 .execute("PRAGMA busy_timeout = 5000;")
4243 .with_context(|| "setting busy_timeout")?;
4244
4245 let checkpoint_pragma =
4253 format!("PRAGMA wal_autocheckpoint = {DEFAULT_WAL_AUTOCHECKPOINT_PAGES};");
4254 let _ = self.conn.execute(&checkpoint_pragma);
4255 self.index_writer_checkpoint_pages
4256 .store(DEFAULT_WAL_AUTOCHECKPOINT_PAGES, Ordering::Relaxed);
4257 let _ = self.conn.execute("PRAGMA fsqlite.concurrent_mode = ON;");
4260 let _ = self.conn.execute("PRAGMA concurrent_mode = ON;");
4261 match disable_autocommit_retain(|pragma| self.conn.execute(pragma).map(|_| ())) {
4272 Ok(autocommit_pragma) => {
4273 tracing::debug!(
4274 pragma = autocommit_pragma,
4275 "disabled frankensqlite autocommit_retain for storage connection"
4276 );
4277 }
4278 Err(err) => {
4279 let detail = format!("{err:#}");
4280 if error_message_indicates_populated_fts_shadow_without_rowid_reload(&detail) {
4281 tracing::warn!(
4282 error = %detail,
4283 "frankensqlite could not disable autocommit_retain because a populated derived FTS shadow table cannot yet be reloaded; continuing so canonical indexing can proceed"
4284 );
4285 } else {
4286 return Err(err);
4287 }
4288 }
4289 }
4290
4291 Ok(())
4292 }
4293
4294 fn apply_readonly_config(&self) -> Result<()> {
4295 self.conn
4296 .execute("PRAGMA query_only = 1;")
4297 .with_context(|| "setting query_only")?;
4298 self.conn
4299 .execute("PRAGMA busy_timeout = 5000;")
4300 .with_context(|| "setting busy_timeout")?;
4301 self.conn
4302 .execute("PRAGMA cache_size = -65536;")
4303 .with_context(|| "setting cache_size")?;
4304 self.conn
4305 .execute("PRAGMA foreign_keys = ON;")
4306 .with_context(|| "setting foreign_keys")?;
4307 Ok(())
4308 }
4309
4310 pub fn run_migrations(&self) -> Result<()> {
4328 transition_from_meta_version(&self.conn)?;
4329
4330 let base_result = build_cass_migrations_before_tail_cache()
4331 .run(&self.conn)
4332 .with_context(|| "running base schema migrations")?;
4333
4334 let mut applied = base_result.applied;
4335 if apply_conversation_tail_state_cache_migration(&self.conn)
4336 .with_context(|| "running conversation tail-state cache migration")?
4337 {
4338 applied.push(15);
4339 }
4340
4341 let post_result = build_cass_migrations_after_tail_cache()
4342 .run(&self.conn)
4343 .with_context(|| "running post-tail-cache schema migrations")?;
4344 applied.extend(post_result.applied);
4345
4346 let current = self.schema_version()?;
4347 if !applied.is_empty() {
4348 info!(
4349 applied = ?applied,
4350 current,
4351 was_fresh = base_result.was_fresh,
4352 "frankensqlite schema migrations applied"
4353 );
4354 }
4355
4356 self.sync_meta_schema_version(current)?;
4358
4359 Ok(())
4360 }
4361
4362 fn repair_missing_current_schema_objects(&self) -> Result<()> {
4367 let mut missing_tables = Vec::new();
4368 for &(table_name, probe_sql) in REQUIRED_CURRENT_SCHEMA_TABLE_PROBES {
4369 if let Err(err) = self.conn.query(probe_sql) {
4370 if error_indicates_missing_table(&err) {
4371 missing_tables.push(table_name);
4372 continue;
4373 }
4374 return Err(err).with_context(|| {
4375 format!("probing required schema table {table_name} for completeness")
4376 });
4377 }
4378 }
4379
4380 if !missing_tables.is_empty() {
4381 info!(
4382 missing_tables = ?missing_tables,
4383 "repairing missing current-schema tables on an already-versioned cass database"
4384 );
4385
4386 for batch in current_schema_repair_batches_for_missing_tables(&missing_tables)? {
4387 self.conn
4388 .execute_batch(batch.sql)
4389 .with_context(|| format!("repairing current-schema batch {}", batch.name))?;
4390 }
4391
4392 for &(table_name, probe_sql) in REQUIRED_CURRENT_SCHEMA_TABLE_PROBES {
4393 if !missing_tables.contains(&table_name) {
4394 continue;
4395 }
4396 self.conn
4397 .query(probe_sql)
4398 .with_context(|| format!("verifying repaired schema table {table_name}"))?;
4399 }
4400 }
4401 self.repair_missing_conversation_token_columns()?;
4402 Ok(())
4403 }
4404
4405 fn repair_missing_conversation_token_columns(&self) -> Result<()> {
4406 let columns = franken_table_column_names(&self.conn, "conversations")
4407 .with_context(|| "inspecting conversations columns for token-summary repair")?;
4408 let mut missing_columns = Vec::new();
4409 for &(column_name, column_type) in REQUIRED_CONVERSATION_TOKEN_COLUMNS {
4410 if columns.contains(column_name) {
4411 continue;
4412 }
4413 let sql = format!("ALTER TABLE conversations ADD COLUMN {column_name} {column_type};");
4414 self.conn.execute(&sql).with_context(|| {
4415 format!("adding missing conversations.{column_name} token-summary column")
4416 })?;
4417 missing_columns.push(column_name);
4418 }
4419 if !missing_columns.is_empty() {
4420 tracing::warn!(
4421 target: "cass::schema_repair",
4422 db_path = %self.db_path.display(),
4423 missing_columns = ?missing_columns,
4424 "cass#222: repaired missing conversations token-summary columns"
4425 );
4426 }
4427 Ok(())
4428 }
4429
4430 pub(crate) fn cleanup_orphan_fk_rows(&self) -> Result<OrphanFkCleanupReport> {
4449 let mut report = OrphanFkCleanupReport::default();
4450 let orphan_message_ids = match collect_orphan_message_ids(&self.conn) {
4451 Ok(ids) => ids,
4452 Err(err) if error_indicates_missing_table(&err) => {
4453 tracing::debug!(
4454 target: "cass::fk_repair",
4455 child_table = "messages",
4456 error = %err,
4457 "skipping orphan-message probe (table or column unavailable)"
4458 );
4459 Vec::new()
4460 }
4461 Err(err) => return Err(err),
4462 };
4463 if !orphan_message_ids.is_empty() {
4464 report.record("messages", orphan_message_ids.len() as i64);
4465 }
4466
4467 if !orphan_message_ids.is_empty() {
4468 delete_orphan_message_ids_bisecting_oom(&self.conn, &orphan_message_ids)
4469 .context("deleting orphan message rows and dependent children")?;
4470 }
4471
4472 for entry in ORPHAN_DIRECT_CHILD_TABLES {
4473 loop {
4474 let ids = match collect_direct_orphan_id_page(&self.conn, entry) {
4475 Ok(ids) => ids,
4476 Err(err)
4477 if error_indicates_missing_table(&err)
4478 || error_indicates_missing_column(&err) =>
4479 {
4480 tracing::debug!(
4484 target: "cass::fk_repair",
4485 child_table = entry.child_table,
4486 error = %err,
4487 "skipping orphan probe (table or column unavailable)"
4488 );
4489 break;
4490 }
4491 Err(err) => {
4492 return Err(err).with_context(|| {
4493 format!("probing orphan rows in {}", entry.child_table)
4494 });
4495 }
4496 };
4497 if ids.is_empty() {
4498 break;
4499 }
4500
4501 let deleted = delete_direct_orphan_ids_bisecting_oom(&self.conn, entry, &ids)
4502 .with_context(|| format!("deleting orphan rows from {}", entry.child_table))?;
4503 if deleted == 0 {
4504 break;
4505 }
4506 report.record(
4507 entry.child_table,
4508 i64::try_from(deleted).unwrap_or(i64::MAX),
4509 );
4510 }
4511 }
4512
4513 if report.total == 0 {
4514 return Ok(report);
4515 }
4516
4517 tracing::warn!(
4522 target: "cass::fk_repair",
4523 db_path = %self.db_path.display(),
4524 total_orphans = report.total,
4525 per_table = ?report.per_table,
4526 "cass#202: removed orphan rows left behind by interrupted index transactions"
4527 );
4528
4529 Ok(report)
4530 }
4531
4532 pub fn schema_version(&self) -> Result<i64> {
4534 let rows = self
4535 .conn
4536 .query("SELECT MAX(version) FROM _schema_migrations;")
4537 .with_context(|| "reading schema version from _schema_migrations")?;
4538
4539 if let Some(row) = rows.first()
4540 && let Ok(v) = row.get_typed::<Option<i64>>(0)
4541 {
4542 return Ok(v.unwrap_or(0));
4543 }
4544 Ok(0)
4545 }
4546
4547 fn sync_meta_schema_version(&self, version: i64) -> Result<()> {
4549 if self.conn.query("SELECT key FROM meta LIMIT 1;").is_err() {
4552 return Ok(());
4553 }
4554
4555 if let Ok(rows) = self
4557 .conn
4558 .query("SELECT value FROM meta WHERE key = 'schema_version';")
4559 && let Some(row) = rows.first()
4560 && let Ok(val) = row.get_typed::<String>(0)
4561 && val == version.to_string()
4562 {
4563 return Ok(()); }
4565
4566 self.conn
4567 .execute_compat(
4568 "INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', ?1);",
4569 &[ParamValue::from(version.to_string())],
4570 )
4571 .with_context(|| "syncing meta schema_version")?;
4572
4573 Ok(())
4574 }
4575
4576 pub fn database_path(&self) -> Result<PathBuf> {
4578 Ok(self.db_path.clone())
4579 }
4580
4581 pub(crate) fn ephemeral_writer_preflight_verified(&self) -> bool {
4582 self.ephemeral_writer_preflight_verified
4583 .load(Ordering::Relaxed)
4584 }
4585
4586 pub(crate) fn mark_ephemeral_writer_preflight_verified(&self) {
4587 self.ephemeral_writer_preflight_verified
4588 .store(true, Ordering::Relaxed);
4589 }
4590
4591 pub(crate) fn index_writer_checkpoint_pages(&self) -> Option<i64> {
4592 let pages = self.index_writer_checkpoint_pages.load(Ordering::Relaxed);
4593 (pages != UNSET_INDEX_WRITER_CHECKPOINT_PAGES).then_some(pages)
4594 }
4595
4596 pub(crate) fn mark_index_writer_checkpoint_pages(&self, pages: i64) {
4597 self.index_writer_checkpoint_pages
4598 .store(pages, Ordering::Relaxed);
4599 }
4600
4601 pub(crate) fn index_writer_busy_timeout_ms(&self) -> Option<u64> {
4602 let timeout_ms = self.index_writer_busy_timeout_ms.load(Ordering::Relaxed);
4603 (timeout_ms != UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS).then_some(timeout_ms)
4604 }
4605
4606 pub(crate) fn mark_index_writer_busy_timeout_ms(&self, timeout_ms: u64) {
4607 self.index_writer_busy_timeout_ms
4608 .store(timeout_ms, Ordering::Relaxed);
4609 }
4610
4611 pub fn open_or_rebuild(path: &Path) -> std::result::Result<Self, MigrationError> {
4613 if let Some(parent) = path.parent() {
4614 fs::create_dir_all(parent)?;
4615 }
4616
4617 if path.exists() {
4618 let check_result = check_schema_compatibility(path);
4619 match check_result {
4620 Ok(SchemaCheck::Compatible) | Ok(SchemaCheck::NeedsMigration) => {
4621 }
4623 Ok(SchemaCheck::NeedsRebuild(reason)) => {
4624 let backup_path = create_backup(path)?;
4625 cleanup_old_backups(path, MAX_BACKUPS)?;
4626 remove_database_files(path)?;
4627 return Err(MigrationError::RebuildRequired {
4628 reason,
4629 backup_path,
4630 });
4631 }
4632 Err(err) if schema_check_error_requires_rebuild(&err) => {
4633 let backup_path = create_backup(path)?;
4634 cleanup_old_backups(path, MAX_BACKUPS)?;
4635 remove_database_files(path)?;
4636 return Err(MigrationError::RebuildRequired {
4637 reason: format!("Database appears corrupted: {err}"),
4638 backup_path,
4639 });
4640 }
4641 Err(err) => return Err(MigrationError::Database(err)),
4642 }
4643 }
4644
4645 let storage = Self::open(path).map_err(|e| MigrationError::Other(e.to_string()))?;
4646 Ok(storage)
4647 }
4648}
4649
4650fn build_cass_migrations_before_tail_cache() -> MigrationRunner {
4666 MigrationRunner::new()
4667 .add(13, "full_schema_v13", MIGRATION_FRESH_SCHEMA)
4668 .add(14, "fts_contentless", MIGRATION_V14)
4669}
4670
4671fn build_cass_migrations_after_tail_cache() -> MigrationRunner {
4672 MigrationRunner::new()
4673 .add(16, "drop_redundant_message_conv_idx", MIGRATION_V16)
4674 .add(17, "drop_message_created_idx", MIGRATION_V17)
4675 .add(18, "conversation_tail_state_hot_table", MIGRATION_V18)
4676 .add(19, "conversation_external_lookup", MIGRATION_V19)
4677 .add(20, "conversation_external_tail_lookup", MIGRATION_V20)
4678}
4679
4680fn schema_migration_is_applied(conn: &FrankenConnection, version: i64) -> Result<bool> {
4681 let rows = conn
4682 .query_with_params(
4683 "SELECT 1 FROM _schema_migrations WHERE version = ?1 LIMIT 1;",
4684 &[SqliteValue::from(version)],
4685 )
4686 .with_context(|| format!("checking schema migration version {version}"))?;
4687 Ok(!rows.is_empty())
4688}
4689
4690fn apply_conversation_tail_state_cache_migration(conn: &FrankenConnection) -> Result<bool> {
4691 conn.execute("BEGIN IMMEDIATE;")
4692 .with_context(|| "starting v15 conversation tail-state migration transaction")?;
4693
4694 let result = (|| -> Result<bool> {
4695 if schema_migration_is_applied(conn, 15)? {
4696 conn.execute("COMMIT;")
4697 .with_context(|| "committing already-applied v15 migration transaction")?;
4698 return Ok(false);
4699 }
4700
4701 let started = Instant::now();
4702 let conversation_columns = franken_table_column_names(conn, "conversations")
4703 .with_context(|| "inspecting conversations columns before v15 migration")?;
4704 if !conversation_columns.contains("last_message_idx") {
4705 conn.execute("ALTER TABLE conversations ADD COLUMN last_message_idx INTEGER;")
4706 .with_context(|| "adding v15 conversations.last_message_idx column")?;
4707 }
4708 if !conversation_columns.contains("last_message_created_at") {
4709 conn.execute("ALTER TABLE conversations ADD COLUMN last_message_created_at INTEGER;")
4710 .with_context(|| "adding v15 conversations.last_message_created_at column")?;
4711 }
4712 conn.execute_batch(MIGRATION_V15_TAIL_STATE_TABLE)
4713 .with_context(|| "applying v15 conversation tail-state table schema")?;
4714 conn.execute_compat(
4715 "INSERT INTO _schema_migrations (version, name) VALUES (?1, ?2);",
4716 fparams![15_i64, "conversation_tail_state_cache"],
4717 )
4718 .with_context(|| "recording v15 conversation tail-state migration")?;
4719 conn.execute("COMMIT;")
4720 .with_context(|| "committing v15 conversation tail-state migration")?;
4721 info!(
4722 elapsed_ms = started.elapsed().as_millis(),
4723 "applied v15 conversation tail-state cache migration"
4724 );
4725 Ok(true)
4726 })();
4727
4728 if result.is_err() {
4729 let _ = conn.execute("ROLLBACK;");
4730 }
4731
4732 result
4733}
4734
4735fn franken_table_column_names(
4736 conn: &FrankenConnection,
4737 table_name: &str,
4738) -> Result<HashSet<String>> {
4739 if !table_name
4740 .chars()
4741 .all(|c| c.is_ascii_alphanumeric() || c == '_')
4742 {
4743 return Err(anyhow!(
4744 "unsafe table name for PRAGMA table_info: {table_name}"
4745 ));
4746 }
4747
4748 conn.query_map_collect(
4749 &format!("PRAGMA table_info({table_name})"),
4750 fparams![],
4751 |row: &FrankenRow| row.get_typed::<String>(1),
4752 )
4753 .with_context(|| format!("reading PRAGMA table_info({table_name})"))
4754 .map(|columns| columns.into_iter().collect())
4755}
4756
4757const MIGRATION_FRESH_SCHEMA: &str = r"
4767-- Core tables (V1)
4768CREATE TABLE IF NOT EXISTS meta (
4769 key TEXT PRIMARY KEY,
4770 value TEXT NOT NULL
4771);
4772
4773CREATE TABLE IF NOT EXISTS agents (
4774 id INTEGER PRIMARY KEY,
4775 slug TEXT NOT NULL UNIQUE,
4776 name TEXT NOT NULL,
4777 version TEXT,
4778 kind TEXT NOT NULL,
4779 created_at INTEGER NOT NULL,
4780 updated_at INTEGER NOT NULL
4781);
4782
4783CREATE TABLE IF NOT EXISTS workspaces (
4784 id INTEGER PRIMARY KEY,
4785 path TEXT NOT NULL UNIQUE,
4786 display_name TEXT
4787);
4788
4789-- Sources (V4)
4790CREATE TABLE IF NOT EXISTS sources (
4791 id TEXT PRIMARY KEY,
4792 kind TEXT NOT NULL,
4793 host_label TEXT,
4794 machine_id TEXT,
4795 platform TEXT,
4796 config_json TEXT,
4797 created_at INTEGER NOT NULL,
4798 updated_at INTEGER NOT NULL
4799);
4800
4801INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
4802VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
4803
4804-- Conversations: V1 base + V5 provenance + V7 metadata_bin + V10 token summary
4805CREATE TABLE IF NOT EXISTS conversations (
4806 id INTEGER PRIMARY KEY,
4807 agent_id INTEGER NOT NULL REFERENCES agents(id),
4808 workspace_id INTEGER REFERENCES workspaces(id),
4809 source_id TEXT NOT NULL DEFAULT 'local' REFERENCES sources(id),
4810 external_id TEXT,
4811 title TEXT,
4812 source_path TEXT NOT NULL,
4813 started_at INTEGER,
4814 ended_at INTEGER,
4815 approx_tokens INTEGER,
4816 metadata_json TEXT,
4817 origin_host TEXT,
4818 metadata_bin BLOB,
4819 total_input_tokens INTEGER,
4820 total_output_tokens INTEGER,
4821 total_cache_read_tokens INTEGER,
4822 total_cache_creation_tokens INTEGER,
4823 grand_total_tokens INTEGER,
4824 estimated_cost_usd REAL,
4825 primary_model TEXT,
4826 api_call_count INTEGER,
4827 tool_call_count INTEGER,
4828 user_message_count INTEGER,
4829 assistant_message_count INTEGER,
4830 -- V15 columns are included in the fresh schema so fresh DB creation does
4831 -- not need ALTER TABLE on conversations. That ALTER path can duplicate
4832 -- provenance autoindex state in frankensqlite when the named unique
4833 -- provenance index already exists.
4834 last_message_idx INTEGER,
4835 last_message_created_at INTEGER
4836);
4837
4838-- Named unique index avoids autoindex issues if table is ever recreated
4839CREATE UNIQUE INDEX IF NOT EXISTS idx_conversations_provenance
4840 ON conversations(source_id, agent_id, external_id);
4841
4842-- Messages: V1 base + V7 extra_bin
4843CREATE TABLE IF NOT EXISTS messages (
4844 id INTEGER PRIMARY KEY,
4845 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
4846 idx INTEGER NOT NULL,
4847 role TEXT NOT NULL,
4848 author TEXT,
4849 created_at INTEGER,
4850 content TEXT NOT NULL,
4851 extra_json TEXT,
4852 extra_bin BLOB,
4853 UNIQUE(conversation_id, idx)
4854);
4855
4856CREATE TABLE IF NOT EXISTS snippets (
4857 id INTEGER PRIMARY KEY,
4858 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4859 file_path TEXT,
4860 start_line INTEGER,
4861 end_line INTEGER,
4862 language TEXT,
4863 snippet_text TEXT
4864);
4865
4866CREATE TABLE IF NOT EXISTS tags (
4867 id INTEGER PRIMARY KEY,
4868 name TEXT NOT NULL UNIQUE
4869);
4870
4871CREATE TABLE IF NOT EXISTS conversation_tags (
4872 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
4873 tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
4874 PRIMARY KEY (conversation_id, tag_id)
4875);
4876
4877-- Daily stats (V8)
4878CREATE TABLE IF NOT EXISTS daily_stats (
4879 day_id INTEGER NOT NULL,
4880 agent_slug TEXT NOT NULL,
4881 source_id TEXT NOT NULL DEFAULT 'all',
4882 session_count INTEGER NOT NULL DEFAULT 0,
4883 message_count INTEGER NOT NULL DEFAULT 0,
4884 total_chars INTEGER NOT NULL DEFAULT 0,
4885 last_updated INTEGER NOT NULL,
4886 PRIMARY KEY (day_id, agent_slug, source_id)
4887);
4888
4889-- Embedding jobs (V9)
4890CREATE TABLE IF NOT EXISTS embedding_jobs (
4891 id INTEGER PRIMARY KEY AUTOINCREMENT,
4892 db_path TEXT NOT NULL,
4893 model_id TEXT NOT NULL,
4894 status TEXT NOT NULL DEFAULT 'pending',
4895 total_docs INTEGER NOT NULL DEFAULT 0,
4896 completed_docs INTEGER NOT NULL DEFAULT 0,
4897 error_message TEXT,
4898 created_at TEXT NOT NULL DEFAULT (datetime('now')),
4899 started_at TEXT,
4900 completed_at TEXT
4901);
4902
4903CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
4904ON embedding_jobs(db_path, model_id)
4905WHERE status IN ('pending', 'running');
4906
4907-- Token usage ledger (V10)
4908CREATE TABLE IF NOT EXISTS token_usage (
4909 id INTEGER PRIMARY KEY AUTOINCREMENT,
4910 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4911 conversation_id INTEGER NOT NULL,
4912 agent_id INTEGER NOT NULL,
4913 workspace_id INTEGER,
4914 source_id TEXT NOT NULL DEFAULT 'local',
4915 timestamp_ms INTEGER NOT NULL,
4916 day_id INTEGER NOT NULL,
4917 model_name TEXT,
4918 model_family TEXT,
4919 model_tier TEXT,
4920 service_tier TEXT,
4921 provider TEXT,
4922 input_tokens INTEGER,
4923 output_tokens INTEGER,
4924 cache_read_tokens INTEGER,
4925 cache_creation_tokens INTEGER,
4926 thinking_tokens INTEGER,
4927 total_tokens INTEGER,
4928 estimated_cost_usd REAL,
4929 role TEXT NOT NULL,
4930 content_chars INTEGER NOT NULL,
4931 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4932 tool_call_count INTEGER NOT NULL DEFAULT 0,
4933 data_source TEXT NOT NULL DEFAULT 'api',
4934 UNIQUE(message_id)
4935);
4936
4937-- Token daily stats (V10)
4938CREATE TABLE IF NOT EXISTS token_daily_stats (
4939 day_id INTEGER NOT NULL,
4940 agent_slug TEXT NOT NULL,
4941 source_id TEXT NOT NULL DEFAULT 'all',
4942 model_family TEXT NOT NULL DEFAULT 'all',
4943 api_call_count INTEGER NOT NULL DEFAULT 0,
4944 user_message_count INTEGER NOT NULL DEFAULT 0,
4945 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4946 tool_message_count INTEGER NOT NULL DEFAULT 0,
4947 total_input_tokens INTEGER NOT NULL DEFAULT 0,
4948 total_output_tokens INTEGER NOT NULL DEFAULT 0,
4949 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
4950 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
4951 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
4952 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
4953 total_content_chars INTEGER NOT NULL DEFAULT 0,
4954 total_tool_calls INTEGER NOT NULL DEFAULT 0,
4955 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
4956 session_count INTEGER NOT NULL DEFAULT 0,
4957 last_updated INTEGER NOT NULL,
4958 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
4959);
4960
4961-- Model pricing (V10)
4962CREATE TABLE IF NOT EXISTS model_pricing (
4963 model_pattern TEXT NOT NULL,
4964 provider TEXT NOT NULL,
4965 input_cost_per_mtok REAL NOT NULL,
4966 output_cost_per_mtok REAL NOT NULL,
4967 cache_read_cost_per_mtok REAL,
4968 cache_creation_cost_per_mtok REAL,
4969 effective_date TEXT NOT NULL,
4970 PRIMARY KEY (model_pattern, effective_date)
4971);
4972
4973INSERT OR IGNORE INTO model_pricing VALUES
4974 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
4975 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
4976 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
4977 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
4978 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
4979 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4980 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4981 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
4982 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
4983 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
4984
4985-- Message metrics: V11 base + V12 model dimensions
4986CREATE TABLE IF NOT EXISTS message_metrics (
4987 message_id INTEGER PRIMARY KEY REFERENCES messages(id) ON DELETE CASCADE,
4988 created_at_ms INTEGER NOT NULL,
4989 hour_id INTEGER NOT NULL,
4990 day_id INTEGER NOT NULL,
4991 agent_slug TEXT NOT NULL,
4992 workspace_id INTEGER NOT NULL DEFAULT 0,
4993 source_id TEXT NOT NULL DEFAULT 'local',
4994 role TEXT NOT NULL,
4995 content_chars INTEGER NOT NULL,
4996 content_tokens_est INTEGER NOT NULL,
4997 api_input_tokens INTEGER,
4998 api_output_tokens INTEGER,
4999 api_cache_read_tokens INTEGER,
5000 api_cache_creation_tokens INTEGER,
5001 api_thinking_tokens INTEGER,
5002 api_service_tier TEXT,
5003 api_data_source TEXT NOT NULL DEFAULT 'estimated',
5004 tool_call_count INTEGER NOT NULL DEFAULT 0,
5005 has_tool_calls INTEGER NOT NULL DEFAULT 0,
5006 has_plan INTEGER NOT NULL DEFAULT 0,
5007 model_name TEXT,
5008 model_family TEXT NOT NULL DEFAULT 'unknown',
5009 model_tier TEXT NOT NULL DEFAULT 'unknown',
5010 provider TEXT NOT NULL DEFAULT 'unknown'
5011);
5012
5013-- Hourly rollups: V11 base + V13 plan columns
5014CREATE TABLE IF NOT EXISTS usage_hourly (
5015 hour_id INTEGER NOT NULL,
5016 agent_slug TEXT NOT NULL,
5017 workspace_id INTEGER NOT NULL DEFAULT 0,
5018 source_id TEXT NOT NULL DEFAULT 'local',
5019 message_count INTEGER NOT NULL DEFAULT 0,
5020 user_message_count INTEGER NOT NULL DEFAULT 0,
5021 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5022 tool_call_count INTEGER NOT NULL DEFAULT 0,
5023 plan_message_count INTEGER NOT NULL DEFAULT 0,
5024 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5025 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5026 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5027 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5028 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5029 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5030 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5031 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5032 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5033 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5034 last_updated INTEGER NOT NULL DEFAULT 0,
5035 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5036 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
5037 PRIMARY KEY (hour_id, agent_slug, workspace_id, source_id)
5038);
5039
5040-- Daily rollups: V11 base + V13 plan columns
5041CREATE TABLE IF NOT EXISTS usage_daily (
5042 day_id INTEGER NOT NULL,
5043 agent_slug TEXT NOT NULL,
5044 workspace_id INTEGER NOT NULL DEFAULT 0,
5045 source_id TEXT NOT NULL DEFAULT 'local',
5046 message_count INTEGER NOT NULL DEFAULT 0,
5047 user_message_count INTEGER NOT NULL DEFAULT 0,
5048 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5049 tool_call_count INTEGER NOT NULL DEFAULT 0,
5050 plan_message_count INTEGER NOT NULL DEFAULT 0,
5051 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5052 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5053 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5054 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5055 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5056 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5057 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5058 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5059 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5060 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5061 last_updated INTEGER NOT NULL DEFAULT 0,
5062 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5063 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
5064 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id)
5065);
5066
5067-- Model daily rollups (V12)
5068CREATE TABLE IF NOT EXISTS usage_models_daily (
5069 day_id INTEGER NOT NULL,
5070 agent_slug TEXT NOT NULL,
5071 workspace_id INTEGER NOT NULL DEFAULT 0,
5072 source_id TEXT NOT NULL DEFAULT 'local',
5073 model_family TEXT NOT NULL DEFAULT 'unknown',
5074 model_tier TEXT NOT NULL DEFAULT 'unknown',
5075 message_count INTEGER NOT NULL DEFAULT 0,
5076 user_message_count INTEGER NOT NULL DEFAULT 0,
5077 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5078 tool_call_count INTEGER NOT NULL DEFAULT 0,
5079 plan_message_count INTEGER NOT NULL DEFAULT 0,
5080 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5081 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5082 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5083 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5084 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5085 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5086 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5087 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5088 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5089 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5090 last_updated INTEGER NOT NULL DEFAULT 0,
5091 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id, model_family, model_tier)
5092);
5093
5094-- All indexes
5095CREATE INDEX IF NOT EXISTS idx_conversations_agent_started ON conversations(agent_id, started_at DESC);
5096CREATE INDEX IF NOT EXISTS idx_conversations_source_id ON conversations(source_id);
5097CREATE INDEX IF NOT EXISTS idx_conversations_source_path ON conversations(source_path);
5098CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
5099CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
5100CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
5101CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
5102CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
5103CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
5104CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
5105CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
5106CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
5107CREATE INDEX IF NOT EXISTS idx_mm_hour ON message_metrics(hour_id);
5108CREATE INDEX IF NOT EXISTS idx_mm_day ON message_metrics(day_id);
5109CREATE INDEX IF NOT EXISTS idx_mm_agent_hour ON message_metrics(agent_slug, hour_id);
5110CREATE INDEX IF NOT EXISTS idx_mm_agent_day ON message_metrics(agent_slug, day_id);
5111CREATE INDEX IF NOT EXISTS idx_mm_workspace_hour ON message_metrics(workspace_id, hour_id);
5112CREATE INDEX IF NOT EXISTS idx_mm_source_hour ON message_metrics(source_id, hour_id);
5113CREATE INDEX IF NOT EXISTS idx_mm_model_family_day ON message_metrics(model_family, day_id);
5114CREATE INDEX IF NOT EXISTS idx_mm_provider_day ON message_metrics(provider, day_id);
5115CREATE INDEX IF NOT EXISTS idx_uh_agent ON usage_hourly(agent_slug, hour_id);
5116CREATE INDEX IF NOT EXISTS idx_uh_workspace ON usage_hourly(workspace_id, hour_id);
5117CREATE INDEX IF NOT EXISTS idx_uh_source ON usage_hourly(source_id, hour_id);
5118CREATE INDEX IF NOT EXISTS idx_ud_agent ON usage_daily(agent_slug, day_id);
5119CREATE INDEX IF NOT EXISTS idx_ud_workspace ON usage_daily(workspace_id, day_id);
5120CREATE INDEX IF NOT EXISTS idx_ud_source ON usage_daily(source_id, day_id);
5121CREATE INDEX IF NOT EXISTS idx_umd_model_day ON usage_models_daily(model_family, day_id);
5122CREATE INDEX IF NOT EXISTS idx_umd_agent_day ON usage_models_daily(agent_slug, day_id);
5123CREATE INDEX IF NOT EXISTS idx_umd_workspace_day ON usage_models_daily(workspace_id, day_id);
5124CREATE INDEX IF NOT EXISTS idx_umd_source_day ON usage_models_daily(source_id, day_id);
5125";
5126
5127#[derive(Clone, Copy)]
5128struct SchemaRepairBatch {
5129 name: &'static str,
5130 tables: &'static [&'static str],
5131 sql: &'static str,
5132}
5133
5134const CURRENT_SCHEMA_REPAIR_SOURCES_SQL: &str = r"
5135CREATE TABLE IF NOT EXISTS sources (
5136 id TEXT PRIMARY KEY,
5137 kind TEXT NOT NULL,
5138 host_label TEXT,
5139 machine_id TEXT,
5140 platform TEXT,
5141 config_json TEXT,
5142 created_at INTEGER NOT NULL,
5143 updated_at INTEGER NOT NULL
5144);
5145
5146INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
5147VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
5148";
5149
5150const CURRENT_SCHEMA_REPAIR_DAILY_STATS_SQL: &str = r"
5151CREATE TABLE IF NOT EXISTS daily_stats (
5152 day_id INTEGER NOT NULL,
5153 agent_slug TEXT NOT NULL,
5154 source_id TEXT NOT NULL DEFAULT 'all',
5155 session_count INTEGER NOT NULL DEFAULT 0,
5156 message_count INTEGER NOT NULL DEFAULT 0,
5157 total_chars INTEGER NOT NULL DEFAULT 0,
5158 last_updated INTEGER NOT NULL,
5159 PRIMARY KEY (day_id, agent_slug, source_id)
5160);
5161
5162CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
5163CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
5164";
5165
5166const CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_LOOKUP_SQL: &str = r"
5167CREATE TABLE IF NOT EXISTS conversation_external_lookup (
5168 lookup_key TEXT PRIMARY KEY,
5169 conversation_id INTEGER NOT NULL
5170);
5171
5172INSERT OR REPLACE INTO conversation_external_lookup (lookup_key, conversation_id)
5173SELECT
5174 CAST(length(source_id) AS TEXT) || ':' || source_id || ':' ||
5175 CAST(agent_id AS TEXT) || ':' ||
5176 CAST(length(external_id) AS TEXT) || ':' || external_id,
5177 id
5178FROM conversations
5179WHERE external_id IS NOT NULL;
5180";
5181
5182const CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_TAIL_LOOKUP_SQL: &str = r"
5183CREATE TABLE IF NOT EXISTS conversation_tail_state (
5184 conversation_id INTEGER PRIMARY KEY,
5185 ended_at INTEGER,
5186 last_message_idx INTEGER,
5187 last_message_created_at INTEGER
5188);
5189
5190CREATE TABLE IF NOT EXISTS conversation_external_tail_lookup (
5191 lookup_key TEXT PRIMARY KEY,
5192 conversation_id INTEGER NOT NULL,
5193 ended_at INTEGER,
5194 last_message_idx INTEGER,
5195 last_message_created_at INTEGER
5196);
5197
5198INSERT OR REPLACE INTO conversation_external_tail_lookup (
5199 lookup_key,
5200 conversation_id,
5201 ended_at,
5202 last_message_idx,
5203 last_message_created_at
5204)
5205SELECT
5206 CAST(length(c.source_id) AS TEXT) || ':' || c.source_id || ':' ||
5207 CAST(c.agent_id AS TEXT) || ':' ||
5208 CAST(length(c.external_id) AS TEXT) || ':' || c.external_id,
5209 c.id,
5210 ts.ended_at,
5211 ts.last_message_idx,
5212 ts.last_message_created_at
5213FROM conversations c
5214LEFT JOIN conversation_tail_state ts ON ts.conversation_id = c.id
5215WHERE c.external_id IS NOT NULL;
5216";
5217
5218const CURRENT_SCHEMA_REPAIR_EMBEDDING_JOBS_SQL: &str = r"
5219CREATE TABLE IF NOT EXISTS embedding_jobs (
5220 id INTEGER PRIMARY KEY AUTOINCREMENT,
5221 db_path TEXT NOT NULL,
5222 model_id TEXT NOT NULL,
5223 status TEXT NOT NULL DEFAULT 'pending',
5224 total_docs INTEGER NOT NULL DEFAULT 0,
5225 completed_docs INTEGER NOT NULL DEFAULT 0,
5226 error_message TEXT,
5227 created_at TEXT NOT NULL DEFAULT (datetime('now')),
5228 started_at TEXT,
5229 completed_at TEXT
5230);
5231
5232CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
5233ON embedding_jobs(db_path, model_id)
5234WHERE status IN ('pending', 'running');
5235";
5236
5237const CURRENT_SCHEMA_REPAIR_TOKEN_ANALYTICS_SQL: &str = r"
5238CREATE TABLE IF NOT EXISTS token_usage (
5239 id INTEGER PRIMARY KEY AUTOINCREMENT,
5240 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
5241 conversation_id INTEGER NOT NULL,
5242 agent_id INTEGER NOT NULL,
5243 workspace_id INTEGER,
5244 source_id TEXT NOT NULL DEFAULT 'local',
5245 timestamp_ms INTEGER NOT NULL,
5246 day_id INTEGER NOT NULL,
5247 model_name TEXT,
5248 model_family TEXT,
5249 model_tier TEXT,
5250 service_tier TEXT,
5251 provider TEXT,
5252 input_tokens INTEGER,
5253 output_tokens INTEGER,
5254 cache_read_tokens INTEGER,
5255 cache_creation_tokens INTEGER,
5256 thinking_tokens INTEGER,
5257 total_tokens INTEGER,
5258 estimated_cost_usd REAL,
5259 role TEXT NOT NULL,
5260 content_chars INTEGER NOT NULL,
5261 has_tool_calls INTEGER NOT NULL DEFAULT 0,
5262 tool_call_count INTEGER NOT NULL DEFAULT 0,
5263 data_source TEXT NOT NULL DEFAULT 'api',
5264 UNIQUE(message_id)
5265);
5266
5267CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
5268CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
5269CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
5270CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
5271CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
5272
5273CREATE TABLE IF NOT EXISTS token_daily_stats (
5274 day_id INTEGER NOT NULL,
5275 agent_slug TEXT NOT NULL,
5276 source_id TEXT NOT NULL DEFAULT 'all',
5277 model_family TEXT NOT NULL DEFAULT 'all',
5278 api_call_count INTEGER NOT NULL DEFAULT 0,
5279 user_message_count INTEGER NOT NULL DEFAULT 0,
5280 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5281 tool_message_count INTEGER NOT NULL DEFAULT 0,
5282 total_input_tokens INTEGER NOT NULL DEFAULT 0,
5283 total_output_tokens INTEGER NOT NULL DEFAULT 0,
5284 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
5285 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
5286 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
5287 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
5288 total_content_chars INTEGER NOT NULL DEFAULT 0,
5289 total_tool_calls INTEGER NOT NULL DEFAULT 0,
5290 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
5291 session_count INTEGER NOT NULL DEFAULT 0,
5292 last_updated INTEGER NOT NULL,
5293 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
5294);
5295
5296CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
5297CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
5298
5299CREATE TABLE IF NOT EXISTS model_pricing (
5300 model_pattern TEXT NOT NULL,
5301 provider TEXT NOT NULL,
5302 input_cost_per_mtok REAL NOT NULL,
5303 output_cost_per_mtok REAL NOT NULL,
5304 cache_read_cost_per_mtok REAL,
5305 cache_creation_cost_per_mtok REAL,
5306 effective_date TEXT NOT NULL,
5307 PRIMARY KEY (model_pattern, effective_date)
5308);
5309
5310INSERT OR IGNORE INTO model_pricing VALUES
5311 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
5312 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
5313 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
5314 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
5315 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
5316 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
5317 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
5318 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
5319 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
5320 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
5321";
5322
5323const CURRENT_SCHEMA_REPAIR_MESSAGE_METRICS_SQL: &str = r"
5324CREATE TABLE IF NOT EXISTS message_metrics (
5325 message_id INTEGER PRIMARY KEY REFERENCES messages(id) ON DELETE CASCADE,
5326 created_at_ms INTEGER NOT NULL,
5327 hour_id INTEGER NOT NULL,
5328 day_id INTEGER NOT NULL,
5329 agent_slug TEXT NOT NULL,
5330 workspace_id INTEGER NOT NULL DEFAULT 0,
5331 source_id TEXT NOT NULL DEFAULT 'local',
5332 role TEXT NOT NULL,
5333 content_chars INTEGER NOT NULL,
5334 content_tokens_est INTEGER NOT NULL,
5335 api_input_tokens INTEGER,
5336 api_output_tokens INTEGER,
5337 api_cache_read_tokens INTEGER,
5338 api_cache_creation_tokens INTEGER,
5339 api_thinking_tokens INTEGER,
5340 api_service_tier TEXT,
5341 api_data_source TEXT NOT NULL DEFAULT 'estimated',
5342 tool_call_count INTEGER NOT NULL DEFAULT 0,
5343 has_tool_calls INTEGER NOT NULL DEFAULT 0,
5344 has_plan INTEGER NOT NULL DEFAULT 0,
5345 model_name TEXT,
5346 model_family TEXT NOT NULL DEFAULT 'unknown',
5347 model_tier TEXT NOT NULL DEFAULT 'unknown',
5348 provider TEXT NOT NULL DEFAULT 'unknown'
5349);
5350
5351CREATE TABLE IF NOT EXISTS usage_hourly (
5352 hour_id INTEGER NOT NULL,
5353 agent_slug TEXT NOT NULL,
5354 workspace_id INTEGER NOT NULL DEFAULT 0,
5355 source_id TEXT NOT NULL DEFAULT 'local',
5356 message_count INTEGER NOT NULL DEFAULT 0,
5357 user_message_count INTEGER NOT NULL DEFAULT 0,
5358 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5359 tool_call_count INTEGER NOT NULL DEFAULT 0,
5360 plan_message_count INTEGER NOT NULL DEFAULT 0,
5361 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5362 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5363 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5364 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5365 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5366 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5367 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5368 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5369 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5370 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5371 last_updated INTEGER NOT NULL DEFAULT 0,
5372 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5373 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
5374 PRIMARY KEY (hour_id, agent_slug, workspace_id, source_id)
5375);
5376
5377CREATE TABLE IF NOT EXISTS usage_daily (
5378 day_id INTEGER NOT NULL,
5379 agent_slug TEXT NOT NULL,
5380 workspace_id INTEGER NOT NULL DEFAULT 0,
5381 source_id TEXT NOT NULL DEFAULT 'local',
5382 message_count INTEGER NOT NULL DEFAULT 0,
5383 user_message_count INTEGER NOT NULL DEFAULT 0,
5384 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5385 tool_call_count INTEGER NOT NULL DEFAULT 0,
5386 plan_message_count INTEGER NOT NULL DEFAULT 0,
5387 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5388 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5389 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5390 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5391 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5392 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5393 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5394 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5395 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5396 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5397 last_updated INTEGER NOT NULL DEFAULT 0,
5398 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5399 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
5400 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id)
5401);
5402
5403CREATE TABLE IF NOT EXISTS usage_models_daily (
5404 day_id INTEGER NOT NULL,
5405 agent_slug TEXT NOT NULL,
5406 workspace_id INTEGER NOT NULL DEFAULT 0,
5407 source_id TEXT NOT NULL DEFAULT 'local',
5408 model_family TEXT NOT NULL DEFAULT 'unknown',
5409 model_tier TEXT NOT NULL DEFAULT 'unknown',
5410 message_count INTEGER NOT NULL DEFAULT 0,
5411 user_message_count INTEGER NOT NULL DEFAULT 0,
5412 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5413 tool_call_count INTEGER NOT NULL DEFAULT 0,
5414 plan_message_count INTEGER NOT NULL DEFAULT 0,
5415 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5416 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5417 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5418 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5419 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5420 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5421 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5422 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5423 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5424 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5425 last_updated INTEGER NOT NULL DEFAULT 0,
5426 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id, model_family, model_tier)
5427);
5428
5429CREATE INDEX IF NOT EXISTS idx_mm_hour ON message_metrics(hour_id);
5430CREATE INDEX IF NOT EXISTS idx_mm_day ON message_metrics(day_id);
5431CREATE INDEX IF NOT EXISTS idx_mm_agent_hour ON message_metrics(agent_slug, hour_id);
5432CREATE INDEX IF NOT EXISTS idx_mm_agent_day ON message_metrics(agent_slug, day_id);
5433CREATE INDEX IF NOT EXISTS idx_mm_workspace_hour ON message_metrics(workspace_id, hour_id);
5434CREATE INDEX IF NOT EXISTS idx_mm_source_hour ON message_metrics(source_id, hour_id);
5435CREATE INDEX IF NOT EXISTS idx_mm_model_family_day ON message_metrics(model_family, day_id);
5436CREATE INDEX IF NOT EXISTS idx_mm_provider_day ON message_metrics(provider, day_id);
5437CREATE INDEX IF NOT EXISTS idx_uh_agent ON usage_hourly(agent_slug, hour_id);
5438CREATE INDEX IF NOT EXISTS idx_uh_workspace ON usage_hourly(workspace_id, hour_id);
5439CREATE INDEX IF NOT EXISTS idx_uh_source ON usage_hourly(source_id, hour_id);
5440CREATE INDEX IF NOT EXISTS idx_ud_agent ON usage_daily(agent_slug, day_id);
5441CREATE INDEX IF NOT EXISTS idx_ud_workspace ON usage_daily(workspace_id, day_id);
5442CREATE INDEX IF NOT EXISTS idx_ud_source ON usage_daily(source_id, day_id);
5443CREATE INDEX IF NOT EXISTS idx_umd_model_day ON usage_models_daily(model_family, day_id);
5444CREATE INDEX IF NOT EXISTS idx_umd_agent_day ON usage_models_daily(agent_slug, day_id);
5445CREATE INDEX IF NOT EXISTS idx_umd_workspace_day ON usage_models_daily(workspace_id, day_id);
5446CREATE INDEX IF NOT EXISTS idx_umd_source_day ON usage_models_daily(source_id, day_id);
5447";
5448
5449const CURRENT_SCHEMA_REPAIR_BATCHES: &[SchemaRepairBatch] = &[
5450 SchemaRepairBatch {
5451 name: "sources",
5452 tables: &["sources"],
5453 sql: CURRENT_SCHEMA_REPAIR_SOURCES_SQL,
5454 },
5455 SchemaRepairBatch {
5456 name: "daily_stats",
5457 tables: &["daily_stats"],
5458 sql: CURRENT_SCHEMA_REPAIR_DAILY_STATS_SQL,
5459 },
5460 SchemaRepairBatch {
5461 name: "conversation_external_lookup",
5462 tables: &["conversation_external_lookup"],
5463 sql: CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_LOOKUP_SQL,
5464 },
5465 SchemaRepairBatch {
5466 name: "conversation_external_tail_lookup",
5467 tables: &[
5468 "conversation_tail_state",
5469 "conversation_external_tail_lookup",
5470 ],
5471 sql: CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_TAIL_LOOKUP_SQL,
5472 },
5473 SchemaRepairBatch {
5474 name: "embedding_jobs",
5475 tables: &["embedding_jobs"],
5476 sql: CURRENT_SCHEMA_REPAIR_EMBEDDING_JOBS_SQL,
5477 },
5478 SchemaRepairBatch {
5479 name: "token_analytics",
5480 tables: &["token_usage", "token_daily_stats", "model_pricing"],
5481 sql: CURRENT_SCHEMA_REPAIR_TOKEN_ANALYTICS_SQL,
5482 },
5483 SchemaRepairBatch {
5484 name: "message_rollups",
5485 tables: &[
5486 "message_metrics",
5487 "usage_hourly",
5488 "usage_daily",
5489 "usage_models_daily",
5490 ],
5491 sql: CURRENT_SCHEMA_REPAIR_MESSAGE_METRICS_SQL,
5492 },
5493];
5494
5495fn current_schema_repair_batches_for_missing_tables(
5496 missing_tables: &[&'static str],
5497) -> Result<Vec<&'static SchemaRepairBatch>> {
5498 let missing_set: HashSet<&'static str> = missing_tables.iter().copied().collect();
5499 let mut selected_batches = Vec::new();
5500 let mut covered_tables = HashSet::new();
5501
5502 for batch in CURRENT_SCHEMA_REPAIR_BATCHES {
5503 if !batch
5504 .tables
5505 .iter()
5506 .any(|table_name| missing_set.contains(table_name))
5507 {
5508 continue;
5509 }
5510 selected_batches.push(batch);
5511 covered_tables.extend(batch.tables.iter().copied());
5512 }
5513
5514 for &table_name in missing_tables {
5515 if !covered_tables.contains(table_name) {
5516 return Err(anyhow!(
5517 "no current-schema repair batch registered for missing table {table_name}"
5518 ));
5519 }
5520 }
5521
5522 Ok(selected_batches)
5523}
5524
5525const MIGRATION_NAMES: [(i64, &str); 20] = [
5527 (1, "core_tables"),
5528 (2, "fts_messages"),
5529 (3, "fts_messages_rebuild"),
5530 (4, "sources"),
5531 (5, "provenance_columns"),
5532 (6, "source_path_index"),
5533 (7, "msgpack_columns"),
5534 (8, "daily_stats"),
5535 (9, "embedding_jobs"),
5536 (10, "token_analytics"),
5537 (11, "message_metrics"),
5538 (12, "model_dimensions"),
5539 (13, "plan_token_rollups"),
5540 (14, "fts_contentless"),
5541 (15, "conversation_tail_state_cache"),
5542 (16, "drop_redundant_message_conv_idx"),
5543 (17, "drop_message_created_idx"),
5544 (18, "conversation_tail_state_hot_table"),
5545 (19, "conversation_external_lookup"),
5546 (20, "conversation_external_tail_lookup"),
5547];
5548
5549fn transition_from_meta_version(conn: &FrankenConnection) -> Result<()> {
5568 if conn
5572 .query("SELECT version FROM \"_schema_migrations\";")
5573 .is_ok()
5574 {
5575 return Ok(());
5576 }
5577
5578 if conn.query("SELECT key FROM meta;").is_err() {
5580 return Ok(());
5582 }
5583
5584 let rows = conn
5586 .query("SELECT value FROM meta WHERE key = 'schema_version';")
5587 .with_context(|| "reading schema_version from meta")?;
5588
5589 let current_version: i64 = rows
5590 .first()
5591 .and_then(|row| row.get_typed::<String>(0).ok())
5592 .and_then(|s| s.parse().ok())
5593 .unwrap_or(0);
5594
5595 if current_version == 0 {
5596 if conn.query("SELECT id FROM conversations LIMIT 1;").is_err() {
5598 return Ok(());
5600 }
5601
5602 info!("meta.schema_version=0 but tables exist; skipping transition (corrupted state)");
5605 return Ok(());
5606 }
5607
5608 info!(
5610 current_version,
5611 "transitioning schema tracking from meta table to _schema_migrations"
5612 );
5613
5614 conn.execute(
5615 "CREATE TABLE IF NOT EXISTS _schema_migrations (\
5616 version INTEGER PRIMARY KEY, \
5617 name TEXT NOT NULL, \
5618 applied_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))\
5619 );",
5620 )
5621 .with_context(|| "creating _schema_migrations table for transition")?;
5622
5623 let backfill_through_version = if (10..13).contains(¤t_version) {
5624 13
5625 } else {
5626 current_version
5627 };
5628
5629 for &(version, name) in &MIGRATION_NAMES {
5630 if version > backfill_through_version {
5631 break;
5632 }
5633 conn.execute_compat(
5634 "INSERT INTO _schema_migrations (version, name) VALUES (?1, ?2);",
5635 &[ParamValue::from(version), ParamValue::from(name)],
5636 )
5637 .with_context(|| format!("backfilling _schema_migrations version {version}"))?;
5638 }
5639
5640 info!(
5641 current_version,
5642 backfill_through_version,
5643 "schema version transition complete: backfilled legacy meta schema versions"
5644 );
5645
5646 Ok(())
5647}
5648
5649const REQUIRED_CURRENT_SCHEMA_TABLE_PROBES: &[(&str, &str)] = &[
5650 ("sources", "SELECT id FROM sources LIMIT 1;"),
5651 ("daily_stats", "SELECT day_id FROM daily_stats LIMIT 1;"),
5652 (
5653 "conversation_external_lookup",
5654 "SELECT lookup_key FROM conversation_external_lookup LIMIT 1;",
5655 ),
5656 (
5657 "conversation_tail_state",
5658 "SELECT conversation_id FROM conversation_tail_state LIMIT 1;",
5659 ),
5660 (
5661 "conversation_external_tail_lookup",
5662 "SELECT lookup_key, last_message_idx FROM conversation_external_tail_lookup LIMIT 1;",
5663 ),
5664 ("embedding_jobs", "SELECT id FROM embedding_jobs LIMIT 1;"),
5665 ("token_usage", "SELECT id FROM token_usage LIMIT 1;"),
5666 (
5667 "token_daily_stats",
5668 "SELECT day_id FROM token_daily_stats LIMIT 1;",
5669 ),
5670 (
5671 "model_pricing",
5672 "SELECT model_pattern FROM model_pricing LIMIT 1;",
5673 ),
5674 (
5675 "message_metrics",
5676 "SELECT message_id FROM message_metrics LIMIT 1;",
5677 ),
5678 ("usage_hourly", "SELECT hour_id FROM usage_hourly LIMIT 1;"),
5679 ("usage_daily", "SELECT day_id FROM usage_daily LIMIT 1;"),
5680 (
5681 "usage_models_daily",
5682 "SELECT day_id FROM usage_models_daily LIMIT 1;",
5683 ),
5684];
5685
5686const REQUIRED_CONVERSATION_TOKEN_COLUMNS: &[(&str, &str)] = &[
5687 ("total_input_tokens", "INTEGER"),
5688 ("total_output_tokens", "INTEGER"),
5689 ("total_cache_read_tokens", "INTEGER"),
5690 ("total_cache_creation_tokens", "INTEGER"),
5691 ("grand_total_tokens", "INTEGER"),
5692 ("estimated_cost_usd", "REAL"),
5693 ("primary_model", "TEXT"),
5694 ("api_call_count", "INTEGER"),
5695 ("tool_call_count", "INTEGER"),
5696 ("user_message_count", "INTEGER"),
5697 ("assistant_message_count", "INTEGER"),
5698];
5699
5700fn error_indicates_missing_table(err: &impl std::fmt::Display) -> bool {
5701 err.to_string()
5702 .to_ascii_lowercase()
5703 .contains("no such table")
5704}
5705
5706fn error_indicates_missing_column(err: &impl std::fmt::Display) -> bool {
5707 err.to_string()
5708 .to_ascii_lowercase()
5709 .contains("no such column")
5710}
5711
5712const ORPHAN_FK_ID_CHUNK_SIZE: usize = 256;
5713
5714fn collect_orphan_message_ids(conn: &FrankenConnection) -> Result<Vec<i64>> {
5715 conn.query_map_collect(
5716 "SELECT m.id
5717 FROM messages AS m
5718 WHERE NOT EXISTS (
5719 SELECT 1
5720 FROM conversations AS c
5721 WHERE c.id = m.conversation_id
5722 )",
5723 fparams![],
5724 |row| row.get_typed(0),
5725 )
5726 .context("listing orphan message ids for orphan FK cleanup")
5727}
5728
5729fn delete_rows_by_i64_chunks(
5730 tx: &FrankenTransaction<'_>,
5731 delete_many_sql_prefix: &'static str,
5732 ids: &[i64],
5733) -> Result<usize> {
5734 if ids.is_empty() {
5735 return Ok(0);
5736 }
5737
5738 let full_chunk_sql = delete_rows_by_i64_sql(delete_many_sql_prefix, ORPHAN_FK_ID_CHUNK_SIZE);
5739 let tail_len = ids.len() % ORPHAN_FK_ID_CHUNK_SIZE;
5740 let tail_sql =
5741 (tail_len != 0).then(|| delete_rows_by_i64_sql(delete_many_sql_prefix, tail_len));
5742
5743 let mut deleted = 0;
5744 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5745 let sql = if chunk.len() == ORPHAN_FK_ID_CHUNK_SIZE {
5746 &full_chunk_sql
5747 } else {
5748 tail_sql.as_ref().unwrap_or(&full_chunk_sql)
5749 };
5750 let params = chunk
5751 .iter()
5752 .map(|id| SqliteValue::from(*id))
5753 .collect::<Vec<_>>();
5754 deleted += tx.execute_with_params(sql, ¶ms)?;
5755 }
5756 Ok(deleted)
5757}
5758
5759fn delete_rows_by_i64_sql(delete_many_sql_prefix: &'static str, count: usize) -> String {
5760 let placeholders = sql_placeholders(count);
5761 format!("{delete_many_sql_prefix} ({placeholders})")
5762}
5763
5764fn sql_placeholders(count: usize) -> String {
5765 vec!["?"; count].join(", ")
5766}
5767
5768fn delete_orphan_message_ids_bisecting_oom(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5769 let mut deleted = 0usize;
5770 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5771 deleted = deleted.saturating_add(delete_orphan_message_id_chunk(conn, chunk)?);
5772 }
5773 Ok(deleted)
5774}
5775
5776fn delete_orphan_message_id_chunk(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5777 if ids.is_empty() {
5778 return Ok(0);
5779 }
5780
5781 match delete_orphan_message_id_chunk_once(conn, ids) {
5782 Ok(deleted) => Ok(deleted),
5783 Err(err) if is_out_of_memory_error(&err) && ids.len() > 1 => {
5784 let split_at = ids.len() / 2;
5785 tracing::warn!(
5786 target: "cass::fk_repair",
5787 rows = ids.len(),
5788 left = split_at,
5789 right = ids.len().saturating_sub(split_at),
5790 error = %err,
5791 "orphan-message cleanup ran out of memory; retrying as smaller batches"
5792 );
5793 let left = delete_orphan_message_id_chunk(conn, &ids[..split_at])?;
5794 let right = delete_orphan_message_id_chunk(conn, &ids[split_at..])?;
5795 Ok(left.saturating_add(right))
5796 }
5797 Err(err) => Err(err),
5798 }
5799}
5800
5801fn delete_orphan_message_id_chunk_once(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5802 let mut tx = conn.transaction()?;
5803 let mut deleted = 0usize;
5804 for entry in ORPHAN_MESSAGE_DEPENDENT_TABLES {
5805 match delete_rows_by_i64_chunks(&tx, entry.delete_many_sql_prefix, ids) {
5806 Ok(count) => {
5807 deleted = deleted.saturating_add(count);
5808 }
5809 Err(err) if error_indicates_missing_table(&err) => {
5810 tracing::debug!(
5811 target: "cass::fk_repair",
5812 child_table = entry.child_table,
5813 error = %err,
5814 "skipping orphan-message dependent cleanup (table unavailable)"
5815 );
5816 }
5817 Err(err) => {
5818 return Err(err).with_context(|| {
5819 format!(
5820 "deleting rows from {} that depend on orphan messages",
5821 entry.child_table
5822 )
5823 });
5824 }
5825 }
5826 }
5827 deleted = deleted.saturating_add(
5828 delete_rows_by_i64_chunks(&tx, "DELETE FROM messages WHERE id IN", ids)
5829 .context("deleting orphan rows from messages")?,
5830 );
5831 tx.commit()?;
5832 Ok(deleted)
5833}
5834
5835fn collect_direct_orphan_id_page(
5836 conn: &FrankenConnection,
5837 entry: &'static OrphanFkTable,
5838) -> Result<Vec<i64>> {
5839 Ok(conn.query_map_collect(
5840 entry.orphan_id_page_sql,
5841 fparams![i64::try_from(ORPHAN_FK_ID_CHUNK_SIZE).unwrap_or(i64::MAX)],
5842 |row| row.get_typed(0),
5843 )?)
5844}
5845
5846fn delete_direct_orphan_ids_bisecting_oom(
5847 conn: &FrankenConnection,
5848 entry: &'static OrphanFkTable,
5849 ids: &[i64],
5850) -> Result<usize> {
5851 let mut deleted = 0usize;
5852 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5853 deleted = deleted.saturating_add(delete_direct_orphan_id_chunk(conn, entry, chunk)?);
5854 }
5855 Ok(deleted)
5856}
5857
5858fn delete_direct_orphan_id_chunk(
5859 conn: &FrankenConnection,
5860 entry: &'static OrphanFkTable,
5861 ids: &[i64],
5862) -> Result<usize> {
5863 if ids.is_empty() {
5864 return Ok(0);
5865 }
5866
5867 match delete_direct_orphan_id_chunk_once(conn, entry, ids) {
5868 Ok(deleted) => Ok(deleted),
5869 Err(err) if is_out_of_memory_error(&err) && ids.len() > 1 => {
5870 let split_at = ids.len() / 2;
5871 tracing::warn!(
5872 target: "cass::fk_repair",
5873 child_table = entry.child_table,
5874 rows = ids.len(),
5875 left = split_at,
5876 right = ids.len().saturating_sub(split_at),
5877 error = %err,
5878 "direct orphan cleanup ran out of memory; retrying as smaller batches"
5879 );
5880 let left = delete_direct_orphan_id_chunk(conn, entry, &ids[..split_at])?;
5881 let right = delete_direct_orphan_id_chunk(conn, entry, &ids[split_at..])?;
5882 Ok(left.saturating_add(right))
5883 }
5884 Err(err) => Err(err),
5885 }
5886}
5887
5888fn delete_direct_orphan_id_chunk_once(
5889 conn: &FrankenConnection,
5890 entry: &'static OrphanFkTable,
5891 ids: &[i64],
5892) -> Result<usize> {
5893 let mut tx = conn.transaction()?;
5894 let deleted = delete_rows_by_i64_chunks(&tx, entry.delete_many_sql_prefix, ids)?;
5895 tx.commit()?;
5896 Ok(deleted)
5897}
5898
5899struct OrphanFkTable {
5905 child_table: &'static str,
5906 orphan_id_page_sql: &'static str,
5907 delete_many_sql_prefix: &'static str,
5908}
5909
5910const ORPHAN_DIRECT_CHILD_TABLES: &[OrphanFkTable] = &[
5911 OrphanFkTable {
5912 child_table: "message_metrics",
5913 orphan_id_page_sql: "SELECT message_id FROM message_metrics \
5914 WHERE NOT EXISTS (\
5915 SELECT 1 FROM messages \
5916 WHERE messages.id = message_metrics.message_id\
5917 ) \
5918 ORDER BY message_id \
5919 LIMIT ?1",
5920 delete_many_sql_prefix: "DELETE FROM message_metrics WHERE message_id IN",
5921 },
5922 OrphanFkTable {
5923 child_table: "token_usage",
5924 orphan_id_page_sql: "SELECT message_id FROM token_usage \
5925 WHERE NOT EXISTS (\
5926 SELECT 1 FROM messages \
5927 WHERE messages.id = token_usage.message_id\
5928 ) \
5929 ORDER BY message_id \
5930 LIMIT ?1",
5931 delete_many_sql_prefix: "DELETE FROM token_usage WHERE message_id IN",
5932 },
5933 OrphanFkTable {
5934 child_table: "snippets",
5935 orphan_id_page_sql: "SELECT message_id FROM snippets \
5936 WHERE NOT EXISTS (\
5937 SELECT 1 FROM messages \
5938 WHERE messages.id = snippets.message_id\
5939 ) \
5940 ORDER BY message_id \
5941 LIMIT ?1",
5942 delete_many_sql_prefix: "DELETE FROM snippets WHERE message_id IN",
5943 },
5944 OrphanFkTable {
5945 child_table: "conversation_tags",
5946 orphan_id_page_sql: "SELECT conversation_id FROM conversation_tags \
5947 WHERE NOT EXISTS (\
5948 SELECT 1 FROM conversations \
5949 WHERE conversations.id = conversation_tags.conversation_id\
5950 ) \
5951 ORDER BY conversation_id \
5952 LIMIT ?1",
5953 delete_many_sql_prefix: "DELETE FROM conversation_tags WHERE conversation_id IN",
5954 },
5955];
5956
5957struct OrphanMessageDependentTable {
5958 child_table: &'static str,
5959 delete_many_sql_prefix: &'static str,
5960}
5961
5962const ORPHAN_MESSAGE_DEPENDENT_TABLES: &[OrphanMessageDependentTable] = &[
5963 OrphanMessageDependentTable {
5964 child_table: "message_metrics",
5965 delete_many_sql_prefix: "DELETE FROM message_metrics WHERE message_id IN",
5966 },
5967 OrphanMessageDependentTable {
5968 child_table: "token_usage",
5969 delete_many_sql_prefix: "DELETE FROM token_usage WHERE message_id IN",
5970 },
5971 OrphanMessageDependentTable {
5972 child_table: "snippets",
5973 delete_many_sql_prefix: "DELETE FROM snippets WHERE message_id IN",
5974 },
5975];
5976
5977#[derive(Debug, Default, Clone)]
5988pub(crate) struct OrphanFkCleanupReport {
5989 pub total: i64,
5990 pub per_table: Vec<(&'static str, i64)>,
5991}
5992
5993impl OrphanFkCleanupReport {
5994 fn record(&mut self, child_table: &'static str, count: i64) {
5995 if let Some((_, existing)) = self
5996 .per_table
5997 .iter_mut()
5998 .find(|(table, _)| *table == child_table)
5999 {
6000 *existing = existing.saturating_add(count);
6001 } else {
6002 self.per_table.push((child_table, count));
6003 }
6004 self.total = self.total.saturating_add(count);
6005 }
6006}
6007
6008pub struct InsertOutcome {
6009 pub conversation_id: i64,
6010 pub conversation_inserted: bool,
6011 pub inserted_indices: Vec<i64>,
6012}
6013
6014#[cfg(test)]
6015#[derive(Debug, Clone, Default)]
6016struct MessageInsertSubstageProfile {
6017 single_row_calls: usize,
6018 batch_calls: usize,
6019 batch_rows: usize,
6020 payload_duration: Duration,
6021 sql_build_duration: Duration,
6022 param_build_duration: Duration,
6023 execute_duration: Duration,
6024 rowid_duration: Duration,
6025}
6026
6027#[cfg(test)]
6028#[derive(Debug, Clone, Default)]
6029struct InsertConversationTreePerfProfile {
6030 invocations: usize,
6031 messages: usize,
6032 inserted_messages: usize,
6033 total_duration: Duration,
6034 source_duration: Duration,
6035 tx_open_duration: Duration,
6036 existing_lookup_duration: Duration,
6037 existing_idx_lookup_duration: Duration,
6038 existing_replay_lookup_duration: Duration,
6039 dedupe_filter_duration: Duration,
6040 conversation_row_duration: Duration,
6041 message_insert_duration: Duration,
6042 message_insert_breakdown: MessageInsertSubstageProfile,
6043 snippet_insert_duration: Duration,
6044 fts_entry_duration: Duration,
6045 fts_flush_duration: Duration,
6046 analytics_duration: Duration,
6047 commit_duration: Duration,
6048}
6049
6050#[cfg(test)]
6051impl InsertConversationTreePerfProfile {
6052 fn millis(duration: Duration) -> f64 {
6053 duration.as_secs_f64() * 1000.0
6054 }
6055
6056 fn log_summary(&self, label: &str) {
6057 let calls = self.invocations.max(1) as f64;
6058 let accounted_duration = self.source_duration
6059 + self.tx_open_duration
6060 + self.existing_lookup_duration
6061 + self.existing_idx_lookup_duration
6062 + self.existing_replay_lookup_duration
6063 + self.dedupe_filter_duration
6064 + self.conversation_row_duration
6065 + self.message_insert_duration
6066 + self.snippet_insert_duration
6067 + self.fts_entry_duration
6068 + self.fts_flush_duration
6069 + self.analytics_duration
6070 + self.commit_duration;
6071 let residual_duration = self.total_duration.saturating_sub(accounted_duration);
6072 eprintln!(
6073 concat!(
6074 "CASS_INSERT_TREE_STAGE_PROFILE ",
6075 "label={} calls={} messages={} inserted_messages={} ",
6076 "total_ms={:.3} source_ms={:.3} tx_open_ms={:.3} existing_lookup_ms={:.3} ",
6077 "existing_idx_lookup_ms={:.3} existing_replay_lookup_ms={:.3} dedupe_filter_ms={:.3} ",
6078 "conversation_row_ms={:.3} message_insert_ms={:.3} snippet_insert_ms={:.3} ",
6079 "fts_entry_ms={:.3} fts_flush_ms={:.3} analytics_ms={:.3} commit_ms={:.3} ",
6080 "msg_payload_ms={:.3} msg_sql_ms={:.3} msg_param_ms={:.3} msg_execute_ms={:.3} msg_rowid_ms={:.3} ",
6081 "residual_ms={:.3} avg_total_ms={:.3} avg_message_insert_ms={:.3} ",
6082 "avg_msg_execute_ms={:.3} avg_msg_payload_ms={:.3} avg_snippet_insert_ms={:.3} avg_fts_entry_ms={:.3} avg_commit_ms={:.3}"
6083 ),
6084 label,
6085 self.invocations,
6086 self.messages,
6087 self.inserted_messages,
6088 Self::millis(self.total_duration),
6089 Self::millis(self.source_duration),
6090 Self::millis(self.tx_open_duration),
6091 Self::millis(self.existing_lookup_duration),
6092 Self::millis(self.existing_idx_lookup_duration),
6093 Self::millis(self.existing_replay_lookup_duration),
6094 Self::millis(self.dedupe_filter_duration),
6095 Self::millis(self.conversation_row_duration),
6096 Self::millis(self.message_insert_duration),
6097 Self::millis(self.snippet_insert_duration),
6098 Self::millis(self.fts_entry_duration),
6099 Self::millis(self.fts_flush_duration),
6100 Self::millis(self.analytics_duration),
6101 Self::millis(self.commit_duration),
6102 Self::millis(self.message_insert_breakdown.payload_duration),
6103 Self::millis(self.message_insert_breakdown.sql_build_duration),
6104 Self::millis(self.message_insert_breakdown.param_build_duration),
6105 Self::millis(self.message_insert_breakdown.execute_duration),
6106 Self::millis(self.message_insert_breakdown.rowid_duration),
6107 Self::millis(residual_duration),
6108 Self::millis(self.total_duration) / calls,
6109 Self::millis(self.message_insert_duration) / calls,
6110 Self::millis(self.message_insert_breakdown.execute_duration) / calls,
6111 Self::millis(self.message_insert_breakdown.payload_duration) / calls,
6112 Self::millis(self.snippet_insert_duration) / calls,
6113 Self::millis(self.fts_entry_duration) / calls,
6114 Self::millis(self.commit_duration) / calls,
6115 );
6116 }
6117}
6118
6119#[derive(Debug, Clone, PartialEq, Eq, Hash)]
6120enum PendingConversationKey {
6121 External {
6122 source_id: String,
6123 agent_id: i64,
6124 external_id: String,
6125 },
6126 SourcePath {
6127 source_id: String,
6128 agent_id: i64,
6129 source_path: String,
6130 started_at: Option<i64>,
6131 },
6132}
6133
6134fn conversation_external_lookup_key(source_id: &str, agent_id: i64, external_id: &str) -> String {
6135 format!(
6136 "{}:{source_id}:{agent_id}:{}:{external_id}",
6137 source_id.chars().count(),
6138 external_id.chars().count()
6139 )
6140}
6141
6142fn conversation_external_lookup_key_for_conv(agent_id: i64, conv: &Conversation) -> Option<String> {
6143 conv.external_id
6144 .as_deref()
6145 .map(|external_id| conversation_external_lookup_key(&conv.source_id, agent_id, external_id))
6146}
6147
6148#[derive(Debug, Clone, PartialEq, Eq, Hash)]
6149struct MessageMergeFingerprint {
6150 idx: i64,
6151 created_at: Option<i64>,
6152 role: MessageRole,
6153 author: Option<String>,
6154 content_hash: [u8; 32],
6155}
6156
6157#[derive(Debug, Clone, PartialEq, Eq, Hash)]
6158struct MessageReplayFingerprint {
6159 created_at: Option<i64>,
6160 role: MessageRole,
6161 author: Option<String>,
6162 content_hash: [u8; 32],
6163}
6164
6165#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6166struct ConversationMergeEvidence {
6167 exact_overlap: usize,
6168 replay_overlap: usize,
6169 smaller_replay_set: usize,
6170 started_close: bool,
6171 start_distance_ms: i64,
6172}
6173
6174struct ExistingConversationNewMessages<'a> {
6175 messages: Vec<&'a Message>,
6176 new_chars: i64,
6177 idx_collision_count: usize,
6178 first_collision_idx: Option<i64>,
6179}
6180
6181#[derive(Debug, Clone, Copy)]
6182struct ExistingConversationTailState {
6183 last_message_idx: i64,
6184 last_message_created_at: i64,
6185 ended_at: Option<i64>,
6186}
6187
6188#[derive(Debug, Clone, Copy)]
6189struct ExistingConversationTailMetadata {
6190 last_message_idx: Option<i64>,
6191 last_message_created_at: Option<i64>,
6192 ended_at: Option<i64>,
6193}
6194
6195impl ExistingConversationTailMetadata {
6196 fn complete_tail_state(self) -> Option<ExistingConversationTailState> {
6197 existing_conversation_tail_state_from_cached(
6198 self.last_message_idx,
6199 self.last_message_created_at,
6200 self.ended_at,
6201 )
6202 }
6203}
6204
6205#[derive(Debug, Clone, Copy)]
6206struct ExistingConversationWithTail {
6207 id: i64,
6208 tail_state: Option<ExistingConversationTailState>,
6209}
6210
6211fn conversation_effective_started_at(conv: &Conversation) -> Option<i64> {
6212 conv.started_at
6213 .or_else(|| conv.messages.iter().filter_map(|msg| msg.created_at).min())
6214}
6215
6216fn conversation_tail_state(conv: &Conversation) -> (Option<i64>, Option<i64>) {
6217 (
6218 conv.messages.iter().map(|msg| msg.idx).max(),
6219 conv.messages.iter().filter_map(|msg| msg.created_at).max(),
6220 )
6221}
6222
6223fn conversation_tail_ended_at_candidate(conv: &Conversation) -> Option<i64> {
6224 let max_message_created_at = conv.messages.iter().filter_map(|msg| msg.created_at).max();
6225 max_message_created_at.max(conv.ended_at)
6226}
6227
6228fn borrowed_messages_tail_state(messages: &[&Message]) -> (Option<i64>, Option<i64>) {
6229 (
6230 messages.iter().map(|msg| msg.idx).max(),
6231 messages.iter().filter_map(|msg| msg.created_at).max(),
6232 )
6233}
6234
6235fn role_from_str(role: &str) -> MessageRole {
6236 match role {
6237 "user" => MessageRole::User,
6238 "agent" | "assistant" => MessageRole::Agent,
6239 "tool" => MessageRole::Tool,
6240 "system" => MessageRole::System,
6241 other => MessageRole::Other(other.to_string()),
6242 }
6243}
6244
6245fn message_merge_fingerprint(msg: &Message) -> MessageMergeFingerprint {
6246 MessageMergeFingerprint {
6247 idx: msg.idx,
6248 created_at: msg.created_at,
6249 role: msg.role.clone(),
6250 author: msg.author.clone(),
6251 content_hash: *blake3::hash(msg.content.as_bytes()).as_bytes(),
6252 }
6253}
6254
6255fn message_replay_fingerprint(msg: &Message) -> MessageReplayFingerprint {
6256 MessageReplayFingerprint {
6257 created_at: msg.created_at,
6258 role: msg.role.clone(),
6259 author: msg.author.clone(),
6260 content_hash: *blake3::hash(msg.content.as_bytes()).as_bytes(),
6261 }
6262}
6263
6264fn conversation_message_fingerprints(conv: &Conversation) -> HashSet<MessageMergeFingerprint> {
6265 conv.messages
6266 .iter()
6267 .map(message_merge_fingerprint)
6268 .collect()
6269}
6270
6271fn conversation_message_replay_fingerprints(
6272 conv: &Conversation,
6273) -> HashSet<MessageReplayFingerprint> {
6274 conv.messages
6275 .iter()
6276 .map(message_replay_fingerprint)
6277 .collect()
6278}
6279
6280fn replay_fingerprint_from_merge(
6281 fingerprint: &MessageMergeFingerprint,
6282) -> MessageReplayFingerprint {
6283 MessageReplayFingerprint {
6284 created_at: fingerprint.created_at,
6285 role: fingerprint.role.clone(),
6286 author: fingerprint.author.clone(),
6287 content_hash: fingerprint.content_hash,
6288 }
6289}
6290
6291fn replay_fingerprints_from_merge_set(
6292 fingerprints: &HashSet<MessageMergeFingerprint>,
6293) -> HashSet<MessageReplayFingerprint> {
6294 fingerprints
6295 .iter()
6296 .map(replay_fingerprint_from_merge)
6297 .collect()
6298}
6299
6300fn collect_new_messages_for_existing_conversation<'a>(
6301 conversation_id: i64,
6302 conv: &'a Conversation,
6303 existing_messages: &mut HashMap<i64, MessageMergeFingerprint>,
6304 existing_replay_fingerprints: &mut HashSet<MessageReplayFingerprint>,
6305 replay_skip_log: &'static str,
6306) -> ExistingConversationNewMessages<'a> {
6307 let mut idx_collision_count = 0usize;
6308 let mut first_collision_idx: Option<i64> = None;
6309 let mut new_chars: i64 = 0;
6310 let mut messages = Vec::new();
6311
6312 for msg in &conv.messages {
6313 let incoming_fingerprint = message_merge_fingerprint(msg);
6314 if let Some(existing_fingerprint) = existing_messages.get(&msg.idx) {
6315 if existing_fingerprint != &incoming_fingerprint {
6316 idx_collision_count = idx_collision_count.saturating_add(1);
6317 first_collision_idx.get_or_insert(msg.idx);
6318 }
6319 continue;
6320 }
6321
6322 let incoming_replay = replay_fingerprint_from_merge(&incoming_fingerprint);
6323 if existing_replay_fingerprints.contains(&incoming_replay) {
6324 tracing::debug!(
6325 conversation_id,
6326 idx = msg.idx,
6327 source_path = %conv.source_path.display(),
6328 "{replay_skip_log}"
6329 );
6330 continue;
6331 }
6332
6333 existing_messages.insert(msg.idx, incoming_fingerprint);
6334 existing_replay_fingerprints.insert(incoming_replay);
6335 new_chars += msg.content.len() as i64;
6336 messages.push(msg);
6337 }
6338
6339 ExistingConversationNewMessages {
6340 messages,
6341 new_chars,
6342 idx_collision_count,
6343 first_collision_idx,
6344 }
6345}
6346
6347fn franken_existing_conversation_append_tail_state(
6348 tx: &FrankenTransaction<'_>,
6349 conversation_id: i64,
6350) -> Result<Option<ExistingConversationTailState>> {
6351 let cached: Option<(Option<i64>, Option<i64>, Option<i64>)> = tx
6352 .query_row_map(
6353 "SELECT last_message_idx, last_message_created_at, ended_at
6354 FROM conversation_tail_state
6355 WHERE conversation_id = ?1",
6356 fparams![conversation_id],
6357 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
6358 )
6359 .optional()?;
6360 if let Some(cached) = cached {
6361 let (_, _, cached_ended_at) = cached;
6362 if let Some(tail_state) =
6363 existing_conversation_tail_state_from_cached(cached.0, cached.1, cached_ended_at)
6364 {
6365 return Ok(Some(tail_state));
6366 }
6367 }
6368
6369 let legacy_cached: (Option<i64>, Option<i64>, Option<i64>) = tx.query_row_map(
6370 "SELECT last_message_idx, last_message_created_at, ended_at
6371 FROM conversations
6372 WHERE id = ?1",
6373 fparams![conversation_id],
6374 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
6375 )?;
6376 let (_, _, cached_ended_at) = legacy_cached;
6377 if let Some(tail_state) = existing_conversation_tail_state_from_cached(
6378 legacy_cached.0,
6379 legacy_cached.1,
6380 cached_ended_at,
6381 ) {
6382 franken_insert_conversation_tail_state(
6383 tx,
6384 conversation_id,
6385 cached_ended_at,
6386 Some(tail_state.last_message_idx),
6387 Some(tail_state.last_message_created_at),
6388 )?;
6389 return Ok(Some(tail_state));
6390 }
6391
6392 let (max_idx, max_created_at): (Option<i64>, Option<i64>) = tx.query_row_map(
6393 "SELECT MAX(idx), MAX(created_at)
6394 FROM messages
6395 WHERE conversation_id = ?1",
6396 fparams![conversation_id],
6397 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
6398 )?;
6399 if let Some((last_message_idx, last_message_created_at)) = max_idx.zip(max_created_at) {
6400 franken_update_conversation_tail_state(
6401 tx,
6402 conversation_id,
6403 None,
6404 Some(last_message_idx),
6405 Some(last_message_created_at),
6406 )?;
6407 return Ok(Some(ExistingConversationTailState {
6408 last_message_idx,
6409 last_message_created_at,
6410 ended_at: cached_ended_at,
6411 }));
6412 }
6413 Ok(None)
6414}
6415
6416fn franken_cached_existing_conversation_tail_metadata(
6417 tx: &FrankenTransaction<'_>,
6418 conversation_id: i64,
6419) -> Result<ExistingConversationTailMetadata> {
6420 let cached: Option<(Option<i64>, Option<i64>, Option<i64>)> = tx
6421 .query_row_map(
6422 "SELECT last_message_idx, last_message_created_at, ended_at
6423 FROM conversation_tail_state
6424 WHERE conversation_id = ?1",
6425 fparams![conversation_id],
6426 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
6427 )
6428 .optional()?;
6429 if let Some(cached) = cached {
6430 return Ok(ExistingConversationTailMetadata {
6431 last_message_idx: cached.0,
6432 last_message_created_at: cached.1,
6433 ended_at: cached.2,
6434 });
6435 }
6436
6437 let legacy_cached: (Option<i64>, Option<i64>, Option<i64>) = tx.query_row_map(
6438 "SELECT last_message_idx, last_message_created_at, ended_at
6439 FROM conversations
6440 WHERE id = ?1",
6441 fparams![conversation_id],
6442 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
6443 )?;
6444 Ok(ExistingConversationTailMetadata {
6445 last_message_idx: legacy_cached.0,
6446 last_message_created_at: legacy_cached.1,
6447 ended_at: legacy_cached.2,
6448 })
6449}
6450
6451fn existing_conversation_tail_state_from_cached(
6452 last_message_idx: Option<i64>,
6453 last_message_created_at: Option<i64>,
6454 ended_at: Option<i64>,
6455) -> Option<ExistingConversationTailState> {
6456 let (last_message_idx, last_message_created_at) =
6457 last_message_idx.zip(last_message_created_at)?;
6458 Some(ExistingConversationTailState {
6459 last_message_idx,
6460 last_message_created_at,
6461 ended_at,
6462 })
6463}
6464
6465fn franken_find_existing_conversation_with_tail_by_key(
6466 tx: &FrankenTransaction<'_>,
6467 key: &PendingConversationKey,
6468 conv: Option<&Conversation>,
6469) -> Result<Option<ExistingConversationWithTail>> {
6470 if let PendingConversationKey::External {
6471 source_id,
6472 agent_id,
6473 external_id,
6474 } = key
6475 {
6476 let lookup_key = conversation_external_lookup_key(source_id, *agent_id, external_id);
6477 if let Some(existing) = franken_find_external_conversation_tail_lookup(tx, &lookup_key)? {
6478 return Ok(Some(existing));
6479 }
6480 return Ok(None);
6481 }
6482
6483 let Some(id) = franken_find_existing_conversation_by_key(tx, key, conv)? else {
6484 return Ok(None);
6485 };
6486 let tail_state = franken_existing_conversation_append_tail_state(tx, id)?;
6487 Ok(Some(ExistingConversationWithTail { id, tail_state }))
6488}
6489
6490fn franken_insert_conversation_tail_state(
6491 tx: &FrankenTransaction<'_>,
6492 conversation_id: i64,
6493 ended_at: Option<i64>,
6494 last_message_idx: Option<i64>,
6495 last_message_created_at: Option<i64>,
6496) -> Result<()> {
6497 if ended_at.is_none() && last_message_idx.is_none() && last_message_created_at.is_none() {
6498 return Ok(());
6499 }
6500 tx.execute_compat(
6501 "INSERT OR REPLACE INTO conversation_tail_state (
6502 conversation_id, ended_at, last_message_idx, last_message_created_at
6503 ) VALUES (?1, ?2, ?3, ?4)",
6504 fparams![
6505 conversation_id,
6506 ended_at,
6507 last_message_idx,
6508 last_message_created_at
6509 ],
6510 )?;
6511 Ok(())
6512}
6513
6514fn franken_update_conversation_tail_columns(
6515 tx: &FrankenTransaction<'_>,
6516 conversation_id: i64,
6517 ended_at_candidate: Option<i64>,
6518 last_message_idx_candidate: Option<i64>,
6519 last_message_created_at_candidate: Option<i64>,
6520) -> Result<()> {
6521 if ended_at_candidate.is_none()
6522 && last_message_idx_candidate.is_none()
6523 && last_message_created_at_candidate.is_none()
6524 {
6525 return Ok(());
6526 }
6527
6528 tx.execute_compat(
6529 "UPDATE conversations
6530 SET ended_at = CASE
6531 WHEN ?1 IS NULL THEN ended_at
6532 WHEN ended_at IS NULL OR ended_at < ?1 THEN ?1
6533 ELSE ended_at
6534 END,
6535 last_message_idx = CASE
6536 WHEN ?2 IS NULL THEN last_message_idx
6537 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
6538 ELSE last_message_idx
6539 END,
6540 last_message_created_at = CASE
6541 WHEN ?3 IS NULL THEN last_message_created_at
6542 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
6543 ELSE last_message_created_at
6544 END
6545 WHERE id = ?4",
6546 fparams![
6547 ended_at_candidate,
6548 last_message_idx_candidate,
6549 last_message_created_at_candidate,
6550 conversation_id
6551 ],
6552 )?;
6553 Ok(())
6554}
6555
6556fn franken_tail_state_insert_ended_at(
6557 tx: &FrankenTransaction<'_>,
6558 conversation_id: i64,
6559 candidate: Option<i64>,
6560) -> Result<Option<i64>> {
6561 let canonical: Option<i64> = tx
6562 .query_row_map(
6563 "SELECT ended_at FROM conversations WHERE id = ?1",
6564 fparams![conversation_id],
6565 |row| row.get_typed(0),
6566 )
6567 .optional()?
6568 .flatten();
6569 Ok(canonical.max(candidate))
6570}
6571
6572fn franken_update_conversation_tail_state(
6573 tx: &FrankenTransaction<'_>,
6574 conversation_id: i64,
6575 ended_at_candidate: Option<i64>,
6576 last_message_idx_candidate: Option<i64>,
6577 last_message_created_at_candidate: Option<i64>,
6578) -> Result<()> {
6579 if ended_at_candidate.is_none()
6580 && last_message_idx_candidate.is_none()
6581 && last_message_created_at_candidate.is_none()
6582 {
6583 return Ok(());
6584 }
6585
6586 let changed = tx.execute_compat(
6587 "UPDATE conversation_tail_state
6588 SET ended_at = CASE
6589 WHEN ?1 IS NULL THEN ended_at
6590 ELSE MAX(IFNULL(ended_at, 0), ?1)
6591 END,
6592 last_message_idx = CASE
6593 WHEN ?2 IS NULL THEN last_message_idx
6594 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
6595 ELSE last_message_idx
6596 END,
6597 last_message_created_at = CASE
6598 WHEN ?3 IS NULL THEN last_message_created_at
6599 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
6600 ELSE last_message_created_at
6601 END
6602 WHERE conversation_id = ?4",
6603 fparams![
6604 ended_at_candidate,
6605 last_message_idx_candidate,
6606 last_message_created_at_candidate,
6607 conversation_id
6608 ],
6609 )?;
6610 if changed == 0 {
6611 let insert_ended_at =
6612 franken_tail_state_insert_ended_at(tx, conversation_id, ended_at_candidate)?;
6613 franken_insert_conversation_tail_state(
6614 tx,
6615 conversation_id,
6616 insert_ended_at,
6617 last_message_idx_candidate,
6618 last_message_created_at_candidate,
6619 )?;
6620 }
6621 franken_update_conversation_tail_columns(
6622 tx,
6623 conversation_id,
6624 ended_at_candidate,
6625 last_message_idx_candidate,
6626 last_message_created_at_candidate,
6627 )?;
6628 Ok(())
6629}
6630
6631fn franken_set_conversation_tail_state_after_append(
6632 tx: &FrankenTransaction<'_>,
6633 conversation_id: i64,
6634 ended_at: i64,
6635 last_message_idx: i64,
6636 last_message_created_at: i64,
6637) -> Result<()> {
6638 let changed = tx.execute_compat(
6639 "UPDATE conversation_tail_state
6640 SET ended_at = ?1,
6641 last_message_idx = ?2,
6642 last_message_created_at = ?3
6643 WHERE conversation_id = ?4",
6644 fparams![
6645 ended_at,
6646 last_message_idx,
6647 last_message_created_at,
6648 conversation_id
6649 ],
6650 )?;
6651 if changed == 0 {
6652 let insert_ended_at =
6653 franken_tail_state_insert_ended_at(tx, conversation_id, Some(ended_at))?;
6654 franken_insert_conversation_tail_state(
6655 tx,
6656 conversation_id,
6657 insert_ended_at,
6658 Some(last_message_idx),
6659 Some(last_message_created_at),
6660 )?;
6661 }
6662 franken_update_conversation_tail_columns(
6663 tx,
6664 conversation_id,
6665 Some(ended_at),
6666 Some(last_message_idx),
6667 Some(last_message_created_at),
6668 )?;
6669 Ok(())
6670}
6671
6672fn collect_append_only_tail_messages<'a>(
6673 conv: &'a Conversation,
6674 existing_max_idx: i64,
6675 existing_max_created_at: i64,
6676) -> Option<ExistingConversationNewMessages<'a>> {
6677 if conv.messages.is_empty() {
6678 return Some(ExistingConversationNewMessages {
6679 messages: Vec::new(),
6680 new_chars: 0,
6681 idx_collision_count: 0,
6682 first_collision_idx: None,
6683 });
6684 }
6685
6686 let mut split_idx = None;
6687 let mut prev_idx = None;
6688 for (pos, msg) in conv.messages.iter().enumerate() {
6689 if prev_idx.is_some_and(|prev| msg.idx < prev) {
6690 return None;
6691 }
6692 prev_idx = Some(msg.idx);
6693 if split_idx.is_none() && msg.idx > existing_max_idx {
6694 split_idx = Some(pos);
6695 }
6696 }
6697 let split_idx = split_idx?;
6698 if split_idx != 0 {
6699 return None;
6700 }
6701
6702 let mut seen_tail_idx = HashSet::new();
6703 let mut seen_tail_replay = HashSet::new();
6704 let mut new_chars = 0i64;
6705 let mut messages = Vec::new();
6706 for msg in &conv.messages[split_idx..] {
6707 let created_at = msg.created_at?;
6708 if created_at <= existing_max_created_at {
6709 return None;
6710 }
6711
6712 if !seen_tail_idx.insert(msg.idx) {
6713 return None;
6714 }
6715
6716 let replay_fingerprint = message_replay_fingerprint(msg);
6717 if !seen_tail_replay.insert(replay_fingerprint) {
6718 return None;
6719 }
6720
6721 new_chars += msg.content.len() as i64;
6722 messages.push(msg);
6723 }
6724
6725 Some(ExistingConversationNewMessages {
6726 messages,
6727 new_chars,
6728 idx_collision_count: 0,
6729 first_collision_idx: None,
6730 })
6731}
6732
6733fn collect_existing_conversation_noop_from_idx_tail<'a>(
6734 conv: &'a Conversation,
6735 _existing_max_idx: i64,
6736) -> Option<ExistingConversationNewMessages<'a>> {
6737 if conv.messages.is_empty() {
6738 return Some(ExistingConversationNewMessages {
6739 messages: Vec::new(),
6740 new_chars: 0,
6741 idx_collision_count: 0,
6742 first_collision_idx: None,
6743 });
6744 }
6745
6746 None
6750}
6751
6752fn collect_existing_conversation_noop_from_conversation_ended_at<'a>(
6753 conv: &'a Conversation,
6754 existing_ended_at: i64,
6755) -> Option<ExistingConversationNewMessages<'a>> {
6756 if conv.messages.is_empty()
6757 && conv
6758 .ended_at
6759 .is_none_or(|ended_at| ended_at <= existing_ended_at)
6760 {
6761 return Some(ExistingConversationNewMessages {
6762 messages: Vec::new(),
6763 new_chars: 0,
6764 idx_collision_count: 0,
6765 first_collision_idx: None,
6766 });
6767 }
6768
6769 None
6772}
6773
6774fn collect_existing_conversation_tail_from_ended_at<'a>(
6775 conv: &'a Conversation,
6776 existing_ended_at: i64,
6777) -> Option<ExistingConversationNewMessages<'a>> {
6778 if conv.messages.is_empty() {
6779 return Some(ExistingConversationNewMessages {
6780 messages: Vec::new(),
6781 new_chars: 0,
6782 idx_collision_count: 0,
6783 first_collision_idx: None,
6784 });
6785 }
6786
6787 let mut prev_idx = None;
6788 for msg in conv.messages.iter() {
6789 if prev_idx.is_some_and(|prev| msg.idx <= prev) {
6790 return None;
6791 }
6792 prev_idx = Some(msg.idx);
6793 if msg.created_at? <= existing_ended_at {
6794 return None;
6795 }
6796 }
6797
6798 let mut seen_tail_replay = HashSet::new();
6799 let mut new_chars = 0i64;
6800 let mut messages = Vec::new();
6801 for msg in &conv.messages {
6802 let replay_fingerprint = message_replay_fingerprint(msg);
6803 if !seen_tail_replay.insert(replay_fingerprint) {
6804 return None;
6805 }
6806
6807 new_chars += msg.content.len() as i64;
6808 messages.push(msg);
6809 }
6810
6811 Some(ExistingConversationNewMessages {
6812 messages,
6813 new_chars,
6814 idx_collision_count: 0,
6815 first_collision_idx: None,
6816 })
6817}
6818
6819fn trace_existing_conversation_lookup_fallback(
6820 conversation_id: i64,
6821 conv: &Conversation,
6822 tail_state: Option<ExistingConversationTailState>,
6823 existing_ended_at: Option<i64>,
6824) {
6825 if !MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
6826 return;
6827 }
6828
6829 let mut prev_idx = None;
6830 let mut idx_order_violations = 0usize;
6831 let mut duplicate_idx_count = 0usize;
6832 let mut seen_idx = HashSet::new();
6833 let mut missing_created_at = 0usize;
6834 let mut min_idx = None;
6835 let mut max_idx = None;
6836 let mut min_created_at = None;
6837 let mut max_created_at = None;
6838 for msg in &conv.messages {
6839 if prev_idx.is_some_and(|prev| msg.idx < prev) {
6840 idx_order_violations = idx_order_violations.saturating_add(1);
6841 }
6842 prev_idx = Some(msg.idx);
6843 if !seen_idx.insert(msg.idx) {
6844 duplicate_idx_count = duplicate_idx_count.saturating_add(1);
6845 }
6846 min_idx = Some(min_idx.map_or(msg.idx, |current: i64| current.min(msg.idx)));
6847 max_idx = Some(max_idx.map_or(msg.idx, |current: i64| current.max(msg.idx)));
6848 if let Some(created_at) = msg.created_at {
6849 min_created_at =
6850 Some(min_created_at.map_or(created_at, |current: i64| current.min(created_at)));
6851 max_created_at =
6852 Some(max_created_at.map_or(created_at, |current: i64| current.max(created_at)));
6853 } else {
6854 missing_created_at = missing_created_at.saturating_add(1);
6855 }
6856 }
6857
6858 let first_idx_after_tail = tail_state.and_then(|state| {
6859 conv.messages
6860 .iter()
6861 .find(|msg| msg.idx > state.last_message_idx)
6862 .map(|msg| msg.idx)
6863 });
6864 let first_created_after_tail = tail_state.and_then(|state| {
6865 conv.messages
6866 .iter()
6867 .find(|msg| {
6868 msg.created_at
6869 .is_some_and(|created_at| created_at > state.last_message_created_at)
6870 })
6871 .and_then(|msg| msg.created_at)
6872 });
6873 let first_created_after_ended_at = existing_ended_at.and_then(|ended_at| {
6874 conv.messages
6875 .iter()
6876 .find(|msg| {
6877 msg.created_at
6878 .is_some_and(|created_at| created_at > ended_at)
6879 })
6880 .and_then(|msg| msg.created_at)
6881 });
6882
6883 let payload = serde_json::json!({
6884 "event": "existing_conversation_message_lookup_fallback",
6885 "conversation_id": conversation_id,
6886 "agent_slug": conv.agent_slug,
6887 "source_path": conv.source_path,
6888 "external_id": conv.external_id,
6889 "messages": conv.messages.len(),
6890 "min_idx": min_idx,
6891 "max_idx": max_idx,
6892 "missing_created_at": missing_created_at,
6893 "min_created_at": min_created_at,
6894 "max_created_at": max_created_at,
6895 "idx_order_violations": idx_order_violations,
6896 "duplicate_idx_count": duplicate_idx_count,
6897 "tail_state": tail_state.map(|state| {
6898 serde_json::json!({
6899 "last_message_idx": state.last_message_idx,
6900 "last_message_created_at": state.last_message_created_at,
6901 "ended_at": state.ended_at,
6902 })
6903 }),
6904 "existing_ended_at": existing_ended_at,
6905 "first_idx_after_tail": first_idx_after_tail,
6906 "first_created_after_tail": first_created_after_tail,
6907 "first_created_after_ended_at": first_created_after_ended_at,
6908 });
6909 if let Ok(line) = serde_json::to_string(&payload) {
6910 eprintln!("{line}");
6911 }
6912}
6913
6914fn franken_existing_conversation_ended_at(
6915 tx: &FrankenTransaction<'_>,
6916 conversation_id: i64,
6917) -> Result<Option<i64>> {
6918 let ended_at: Option<Option<i64>> = tx
6919 .query_row_map(
6920 "SELECT ended_at
6921 FROM conversations
6922 WHERE id = ?1",
6923 fparams![conversation_id],
6924 |row| row.get_typed(0),
6925 )
6926 .optional()?;
6927 Ok(ended_at.flatten())
6928}
6929
6930fn start_distance_ms(left: Option<i64>, right: Option<i64>) -> i64 {
6931 match (left, right) {
6932 (Some(left), Some(right)) => (i128::from(left) - i128::from(right))
6933 .abs()
6934 .try_into()
6935 .unwrap_or(i64::MAX),
6936 _ => i64::MAX,
6937 }
6938}
6939
6940fn conversation_merge_evidence(
6941 incoming_exact: &HashSet<MessageMergeFingerprint>,
6942 incoming_replay: &HashSet<MessageReplayFingerprint>,
6943 existing_exact: &HashSet<MessageMergeFingerprint>,
6944 existing_replay: &HashSet<MessageReplayFingerprint>,
6945 incoming_started_at: Option<i64>,
6946 existing_started_at: Option<i64>,
6947) -> Option<ConversationMergeEvidence> {
6948 let exact_overlap = incoming_exact.intersection(existing_exact).count();
6949 let replay_overlap = incoming_replay.intersection(existing_replay).count();
6950 if exact_overlap == 0 && replay_overlap == 0 {
6951 return None;
6952 }
6953
6954 let smaller_replay_set = incoming_replay.len().min(existing_replay.len());
6955 let started_close = timestamps_within_tolerance(
6956 incoming_started_at,
6957 existing_started_at,
6958 SOURCE_PATH_MERGE_START_TOLERANCE_MS,
6959 );
6960 let full_replay_subset_match = smaller_replay_set >= 2 && replay_overlap == smaller_replay_set;
6961
6962 let merge_allowed = if started_close {
6963 exact_overlap >= 1 || replay_overlap >= 2
6964 } else {
6965 exact_overlap >= 2 || full_replay_subset_match
6966 };
6967
6968 merge_allowed.then_some(ConversationMergeEvidence {
6969 exact_overlap,
6970 replay_overlap,
6971 smaller_replay_set,
6972 started_close,
6973 start_distance_ms: start_distance_ms(incoming_started_at, existing_started_at),
6974 })
6975}
6976
6977fn timestamps_within_tolerance(left: Option<i64>, right: Option<i64>, tolerance_ms: i64) -> bool {
6978 match (left, right) {
6979 (Some(left), Some(right)) => {
6980 (i128::from(left) - i128::from(right)).abs() <= i128::from(tolerance_ms)
6981 }
6982 _ => false,
6983 }
6984}
6985
6986fn conversation_merge_key(agent_id: i64, conv: &Conversation) -> PendingConversationKey {
6987 if let Some(external_id) = conv.external_id.clone() {
6988 PendingConversationKey::External {
6989 source_id: conv.source_id.clone(),
6990 agent_id,
6991 external_id,
6992 }
6993 } else {
6994 PendingConversationKey::SourcePath {
6995 source_id: conv.source_id.clone(),
6996 agent_id,
6997 source_path: path_to_string(&conv.source_path),
6998 started_at: conversation_effective_started_at(conv),
6999 }
7000 }
7001}
7002
7003pub struct MessageForEmbedding {
7005 pub message_id: i64,
7006 pub created_at: Option<i64>,
7007 pub agent_id: i64,
7008 pub workspace_id: Option<i64>,
7009 pub source_id_hash: u32,
7010 pub role: String,
7011 pub content: String,
7012}
7013
7014impl FrankenStorage {
7019 pub fn ensure_agent(&self, agent: &Agent) -> Result<i64> {
7021 let cache_key = EnsuredAgentKey::from_agent(agent);
7022 if let Some(id) = self.cached_agent_id(&cache_key) {
7023 return Ok(id);
7024 }
7025
7026 let now = Self::now_millis();
7027 self.conn.execute_compat(
7028 "INSERT INTO agents(slug, name, version, kind, created_at, updated_at)
7029 VALUES(?1, ?2, ?3, ?4, ?5, ?6)
7030 ON CONFLICT(slug) DO UPDATE SET
7031 name = excluded.name,
7032 version = excluded.version,
7033 kind = excluded.kind,
7034 updated_at = excluded.updated_at
7035 WHERE NOT (
7036 agents.name IS excluded.name
7037 AND agents.version IS excluded.version
7038 AND agents.kind IS excluded.kind
7039 )",
7040 fparams![
7041 agent.slug.as_str(),
7042 agent.name.as_str(),
7043 agent.version.as_deref(),
7044 cache_key.kind.as_str(),
7045 now,
7046 now
7047 ],
7048 )?;
7049
7050 let id = self
7051 .conn
7052 .query_row_map(
7053 "SELECT id FROM agents WHERE slug = ?1 LIMIT 1",
7054 fparams![agent.slug.as_str()],
7055 |row| row.get_typed(0),
7056 )
7057 .with_context(|| format!("fetching agent id for {}", agent.slug))?;
7058 self.mark_agent_ensured(cache_key, id);
7059 Ok(id)
7060 }
7061
7062 pub fn ensure_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64> {
7064 let path_str = path.to_string_lossy().to_string();
7065 let cache_key = EnsuredWorkspaceKey::new(path_str.clone(), display_name);
7066 if let Some(id) = self.cached_workspace_id(&cache_key) {
7067 return Ok(id);
7068 }
7069
7070 if let Some(display_name) = display_name {
7071 self.conn.execute_compat(
7072 "INSERT INTO workspaces(path, display_name)
7073 VALUES(?1, ?2)
7074 ON CONFLICT(path) DO UPDATE SET
7075 display_name = excluded.display_name
7076 WHERE NOT (workspaces.display_name IS excluded.display_name)",
7077 fparams![path_str.as_str(), display_name],
7078 )?;
7079 } else {
7080 self.conn.execute_compat(
7081 "INSERT OR IGNORE INTO workspaces(path, display_name) VALUES(?1, NULL)",
7082 fparams![path_str.as_str()],
7083 )?;
7084 }
7085
7086 let id = self
7087 .conn
7088 .query_row_map(
7089 "SELECT id FROM workspaces WHERE path = ?1 LIMIT 1",
7090 fparams![path_str.as_str()],
7091 |row| row.get_typed(0),
7092 )
7093 .with_context(|| format!("fetching workspace id for {path_str}"))?;
7094 self.mark_workspace_ensured(cache_key, id);
7095 Ok(id)
7096 }
7097
7098 pub fn now_millis() -> i64 {
7100 SystemTime::now()
7101 .duration_since(UNIX_EPOCH)
7102 .map(|d| i64::try_from(d.as_millis()).unwrap_or(i64::MAX))
7103 .unwrap_or(0)
7104 }
7105
7106 pub fn day_id_from_millis(timestamp_ms: i64) -> i64 {
7108 const EPOCH_2020_SECS: i64 = 1_577_836_800;
7109 let secs = timestamp_ms.div_euclid(1000);
7110 (secs - EPOCH_2020_SECS).div_euclid(86400)
7111 }
7112
7113 pub fn hour_id_from_millis(timestamp_ms: i64) -> i64 {
7115 const EPOCH_2020_SECS: i64 = 1_577_836_800;
7116 let secs = timestamp_ms.div_euclid(1000);
7117 (secs - EPOCH_2020_SECS).div_euclid(3600)
7118 }
7119
7120 pub fn millis_from_day_id(day_id: i64) -> i64 {
7122 const EPOCH_2020_SECS: i64 = 1_577_836_800;
7123 (EPOCH_2020_SECS + day_id * 86400) * 1000
7124 }
7125
7126 pub fn millis_from_hour_id(hour_id: i64) -> i64 {
7128 const EPOCH_2020_SECS: i64 = 1_577_836_800;
7129 (EPOCH_2020_SECS + hour_id * 3600) * 1000
7130 }
7131
7132 pub fn get_last_scan_ts(&self) -> Result<Option<i64>> {
7134 let result: Result<String, _> = self.conn.query_row_map(
7135 "SELECT value FROM meta WHERE key = 'last_scan_ts'",
7136 fparams![],
7137 |row| row.get_typed(0),
7138 );
7139 match result.optional() {
7140 Ok(Some(s)) => Ok(s.parse().ok()),
7141 Ok(None) => Ok(None),
7142 Err(e) => Err(e.into()),
7143 }
7144 }
7145
7146 pub fn set_last_scan_ts(&self, ts: i64) -> Result<()> {
7148 self.conn.execute_compat(
7149 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_scan_ts', ?1)",
7150 fparams![ts.to_string()],
7151 )?;
7152 Ok(())
7153 }
7154
7155 fn connector_last_scan_ts_meta_key(connector_name: &str) -> String {
7156 format!(
7157 "last_scan_ts:connector:{}",
7158 connector_name.trim().to_ascii_lowercase()
7159 )
7160 }
7161
7162 fn connector_agent_slug_candidates(connector_name: &str) -> SmallVec<[String; 3]> {
7163 let normalized = connector_name.trim().to_ascii_lowercase();
7164 let mut candidates = SmallVec::<[String; 3]>::new();
7165 if normalized.is_empty() {
7166 return candidates;
7167 }
7168
7169 candidates.push(normalized.clone());
7170 match normalized.as_str() {
7171 "claude" | "claude-code" | "claude_code" => {
7172 candidates.push("claude_code".to_string());
7173 candidates.push("claude-code".to_string());
7174 }
7175 _ => {}
7176 }
7177 candidates.sort();
7178 candidates.dedup();
7179 candidates
7180 }
7181
7182 pub fn get_connector_last_scan_ts(&self, connector_name: &str) -> Result<Option<i64>> {
7184 let key = Self::connector_last_scan_ts_meta_key(connector_name);
7185 let result: Result<String, _> = self.conn.query_row_map(
7186 "SELECT value FROM meta WHERE key = ?1",
7187 fparams![key.as_str()],
7188 |row| row.get_typed(0),
7189 );
7190 match result.optional() {
7191 Ok(Some(s)) => Ok(s.parse().ok()),
7192 Ok(None) => Ok(None),
7193 Err(e) => Err(e.into()),
7194 }
7195 }
7196
7197 pub fn set_connector_last_scan_ts(&self, connector_name: &str, ts: i64) -> Result<()> {
7199 let key = Self::connector_last_scan_ts_meta_key(connector_name);
7200 self.conn.execute_compat(
7201 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
7202 fparams![key.as_str(), ts.to_string()],
7203 )?;
7204 Ok(())
7205 }
7206
7207 pub fn connector_scan_states(
7217 &self,
7218 connector_names: &[&str],
7219 ) -> Result<HashMap<String, (Option<i64>, bool)>> {
7220 let requested = connector_names
7221 .iter()
7222 .map(|name| name.trim().to_ascii_lowercase())
7223 .filter(|name| !name.is_empty())
7224 .collect::<HashSet<_>>();
7225 let mut states = requested
7226 .iter()
7227 .map(|name| (name.clone(), (None, false)))
7228 .collect::<HashMap<_, _>>();
7229 if states.is_empty() {
7230 return Ok(states);
7231 }
7232
7233 let mut tx = self.conn.transaction()?;
7234 let watermark_rows: Vec<(String, String)> = tx.query_map_collect(
7235 "SELECT key, value FROM meta WHERE key LIKE 'last_scan_ts:connector:%'",
7236 fparams![],
7237 |row| {
7238 let key: String = row.get_typed(0)?;
7239 let value: String = row.get_typed(1)?;
7240 Ok((key, value))
7241 },
7242 )?;
7243
7244 for (key, value) in watermark_rows {
7245 let Some(connector_name) = key.strip_prefix("last_scan_ts:connector:") else {
7246 continue;
7247 };
7248 if let Some((last_scan_ts, _)) =
7249 states.get_mut(connector_name.trim().to_ascii_lowercase().as_str())
7250 {
7251 *last_scan_ts = value.parse().ok();
7252 }
7253 }
7254
7255 let archived_agent_slugs = tx
7256 .query_map_collect(
7257 "SELECT DISTINCT a.slug
7258 FROM agents a
7259 JOIN conversations c ON c.agent_id = a.id",
7260 fparams![],
7261 |row| row.get_typed::<String>(0),
7262 )?
7263 .into_iter()
7264 .map(|slug| slug.trim().to_ascii_lowercase())
7265 .collect::<HashSet<_>>();
7266
7267 for connector_name in requested {
7268 if Self::connector_agent_slug_candidates(&connector_name)
7269 .iter()
7270 .any(|slug| archived_agent_slugs.contains(slug))
7271 && let Some((_, has_conversations)) = states.get_mut(connector_name.as_str())
7272 {
7273 *has_conversations = true;
7274 }
7275 }
7276
7277 tx.rollback()?;
7278 Ok(states)
7279 }
7280
7281 pub fn connector_has_conversations(&self, connector_name: &str) -> Result<bool> {
7283 let candidate_slugs = Self::connector_agent_slug_candidates(connector_name);
7284 if candidate_slugs.is_empty() {
7285 return Ok(false);
7286 }
7287
7288 for slug in candidate_slugs {
7289 let exists: i64 = self.conn.query_row_map(
7290 "SELECT EXISTS(
7291 SELECT 1
7292 FROM conversations c
7293 JOIN agents a ON a.id = c.agent_id
7294 WHERE a.slug = ?1
7295 LIMIT 1
7296 )",
7297 fparams![slug.as_str()],
7298 |row| row.get_typed(0),
7299 )?;
7300 if exists != 0 {
7301 return Ok(true);
7302 }
7303 }
7304 Ok(false)
7305 }
7306
7307 pub fn get_last_indexed_at(&self) -> Result<Option<i64>> {
7309 let result: Result<String, _> = self.conn.query_row_map(
7310 "SELECT value FROM meta WHERE key = 'last_indexed_at'",
7311 fparams![],
7312 |row| row.get_typed(0),
7313 );
7314 match result.optional() {
7315 Ok(Some(s)) => Ok(s.parse().ok()),
7316 Ok(None) => Ok(None),
7317 Err(e) => Err(e.into()),
7318 }
7319 }
7320
7321 pub fn set_last_indexed_at(&self, ts: i64) -> Result<()> {
7323 self.conn.execute_compat(
7324 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_indexed_at', ?1)",
7325 fparams![ts.to_string()],
7326 )?;
7327 Ok(())
7328 }
7329
7330 pub fn list_agents(&self) -> Result<Vec<Agent>> {
7332 self.conn
7333 .query_map_collect(
7334 "SELECT id, slug, name, version, kind FROM agents ORDER BY slug",
7335 fparams![],
7336 |row| {
7337 let kind: String = row.get_typed(4)?;
7338 Ok(Agent {
7339 id: Some(row.get_typed(0)?),
7340 slug: row.get_typed(1)?,
7341 name: row.get_typed(2)?,
7342 version: row.get_typed(3)?,
7343 kind: match kind.as_str() {
7344 "cli" => AgentKind::Cli,
7345 "vscode" => AgentKind::VsCode,
7346 _ => AgentKind::Hybrid,
7347 },
7348 })
7349 },
7350 )
7351 .with_context(|| "listing agents")
7352 }
7353
7354 pub fn total_conversation_count(&self) -> Result<usize> {
7356 let count: i64 =
7357 self.conn
7358 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
7359 row.get_typed(0)
7360 })?;
7361 Ok(count.max(0) as usize)
7362 }
7363
7364 pub fn total_message_count(&self) -> Result<usize> {
7366 let count: i64 =
7367 self.conn
7368 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
7369 row.get_typed(0)
7370 })?;
7371 Ok(count.max(0) as usize)
7372 }
7373
7374 pub fn purge_agent_archive_data(&self, agent_slug: &str) -> Result<AgentArchivePurgeResult> {
7379 let normalized = agent_slug.trim().to_ascii_lowercase();
7380 if normalized.is_empty() {
7381 return Err(anyhow!("agent slug cannot be empty"));
7382 }
7383
7384 let Some(agent_id) = self
7385 .conn
7386 .query_row_map(
7387 "SELECT id FROM agents WHERE slug = ?1",
7388 fparams![normalized.as_str()],
7389 |row| row.get_typed::<i64>(0),
7390 )
7391 .optional()?
7392 else {
7393 return Ok(AgentArchivePurgeResult::default());
7394 };
7395
7396 let conversations_deleted: i64 = self.conn.query_row_map(
7397 "SELECT COUNT(*) FROM conversations WHERE agent_id = ?1",
7398 fparams![agent_id],
7399 |row| row.get_typed(0),
7400 )?;
7401 if conversations_deleted == 0 {
7402 return Ok(AgentArchivePurgeResult::default());
7403 }
7404
7405 let messages_deleted: i64 = self.conn.query_row_map(
7406 "SELECT COUNT(*)
7407 FROM messages
7408 WHERE conversation_id IN (
7409 SELECT id FROM conversations WHERE agent_id = ?1
7410 )",
7411 fparams![agent_id],
7412 |row| row.get_typed(0),
7413 )?;
7414
7415 let mut tx = self.conn.transaction()?;
7416 tx.execute_compat(
7417 "DELETE FROM conversation_external_lookup
7418 WHERE conversation_id IN (
7419 SELECT id FROM conversations WHERE agent_id = ?1
7420 )",
7421 fparams![agent_id],
7422 )?;
7423 tx.execute_compat(
7424 "DELETE FROM conversation_external_tail_lookup
7425 WHERE conversation_id IN (
7426 SELECT id FROM conversations WHERE agent_id = ?1
7427 )",
7428 fparams![agent_id],
7429 )?;
7430 tx.execute_compat(
7431 "DELETE FROM conversations WHERE agent_id = ?1",
7432 fparams![agent_id],
7433 )?;
7434 tx.execute_compat(
7435 "DELETE FROM agents
7436 WHERE id = ?1
7437 AND NOT EXISTS (
7438 SELECT 1 FROM conversations WHERE agent_id = ?1
7439 )",
7440 fparams![agent_id],
7441 )?;
7442 tx.commit()?;
7443
7444 Ok(AgentArchivePurgeResult {
7445 conversations_deleted: conversations_deleted.max(0) as usize,
7446 messages_deleted: messages_deleted.max(0) as usize,
7447 })
7448 }
7449
7450 pub fn list_workspaces(&self) -> Result<Vec<crate::model::types::Workspace>> {
7452 self.conn
7453 .query_map_collect(
7454 "SELECT id, path, display_name FROM workspaces ORDER BY path",
7455 fparams![],
7456 |row| {
7457 let path_str: String = row.get_typed(1)?;
7458 Ok(crate::model::types::Workspace {
7459 id: Some(row.get_typed(0)?),
7460 path: Path::new(&path_str).to_path_buf(),
7461 display_name: row.get_typed(2)?,
7462 })
7463 },
7464 )
7465 .with_context(|| "listing workspaces")
7466 }
7467
7468 pub fn list_conversations(&self, limit: i64, offset: i64) -> Result<Vec<Conversation>> {
7470 self.conn
7477 .query_map_collect(
7478 r"SELECT c.id,
7479 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown'),
7480 (SELECT w.path FROM workspaces w WHERE w.id = c.workspace_id),
7481 c.external_id, c.title, c.source_path,
7482 c.started_at,
7483 COALESCE(
7484 (SELECT ts.ended_at
7485 FROM conversation_tail_state ts
7486 WHERE ts.conversation_id = c.id),
7487 c.ended_at
7488 ),
7489 c.approx_tokens, c.metadata_json,
7490 c.source_id, c.origin_host, c.metadata_bin
7491 FROM conversations c
7492 ORDER BY CASE WHEN c.started_at IS NULL THEN 1 ELSE 0 END, c.started_at DESC, c.id DESC
7493 LIMIT ?1 OFFSET ?2",
7494 fparams![limit, offset],
7495 |row| {
7496 let workspace_path: Option<String> = row.get_typed(2)?;
7497 let source_path: String = row.get_typed(5)?;
7498 let raw_source_id: Option<String> = row.get_typed(10)?;
7499 let raw_origin_host: Option<String> = row.get_typed(11)?;
7500 let (source_id, _, origin_host) = normalized_storage_source_parts(
7501 raw_source_id.as_deref(),
7502 None,
7503 raw_origin_host.as_deref(),
7504 );
7505 Ok(Conversation {
7506 id: Some(row.get_typed(0)?),
7507 agent_slug: row.get_typed(1)?,
7508 workspace: workspace_path.map(|p| Path::new(&p).to_path_buf()),
7509 external_id: row.get_typed(3)?,
7510 title: row.get_typed(4)?,
7511 source_path: Path::new(&source_path).to_path_buf(),
7512 started_at: row.get_typed(6)?,
7513 ended_at: row.get_typed(7)?,
7514 approx_tokens: row.get_typed(8)?,
7515 metadata_json: franken_read_metadata_compat(row, 9, 12),
7516 messages: Vec::new(),
7517 source_id,
7518 origin_host,
7519 })
7520 },
7521 )
7522 .with_context(|| "listing conversations")
7523 }
7524
7525 pub fn build_lexical_rebuild_lookups(
7529 &self,
7530 ) -> Result<(HashMap<i64, String>, HashMap<i64, PathBuf>)> {
7531 let agents: HashMap<i64, String> = self
7532 .conn
7533 .query_map_collect("SELECT id, slug FROM agents", fparams![], |row| {
7534 Ok((row.get_typed::<i64>(0)?, row.get_typed::<String>(1)?))
7535 })
7536 .with_context(|| "loading agent lookup for lexical rebuild")?
7537 .into_iter()
7538 .collect();
7539 let workspaces: HashMap<i64, PathBuf> = self
7540 .conn
7541 .query_map_collect("SELECT id, path FROM workspaces", fparams![], |row| {
7542 let path_str: String = row.get_typed(1)?;
7543 Ok((row.get_typed::<i64>(0)?, PathBuf::from(path_str)))
7544 })
7545 .with_context(|| "loading workspace lookup for lexical rebuild")?
7546 .into_iter()
7547 .collect();
7548 Ok((agents, workspaces))
7549 }
7550
7551 pub fn list_conversation_footprints_for_lexical_rebuild(
7564 &self,
7565 ) -> Result<Vec<LexicalRebuildConversationFootprintRow>> {
7566 let tail_state_rows: Vec<(i64, Option<i64>)> = match self.conn.query_map_collect(
7567 "SELECT conversation_id, last_message_idx
7568 FROM conversation_tail_state
7569 ORDER BY conversation_id ASC",
7570 fparams![],
7571 |row| Ok((row.get_typed::<i64>(0)?, row.get_typed::<Option<i64>>(1)?)),
7572 ) {
7573 Ok(rows) => rows,
7574 Err(err) if error_indicates_missing_table(&err) => Vec::new(),
7575 Err(err) => {
7576 return Err(err).with_context(|| "listing lexical rebuild tail-state estimates");
7577 }
7578 };
7579 let tail_state_by_conversation: HashMap<i64, Option<i64>> =
7580 tail_state_rows.into_iter().collect();
7581
7582 let rows: Vec<(i64, Option<i64>)> = match self.conn.query_map_collect(
7583 "SELECT id, last_message_idx
7584 FROM conversations
7585 ORDER BY id ASC",
7586 fparams![],
7587 |row| Ok((row.get_typed::<i64>(0)?, row.get_typed::<Option<i64>>(1)?)),
7588 ) {
7589 Ok(rows) => rows,
7590 Err(err) if error_indicates_missing_column(&err) => self
7591 .conn
7592 .query_map_collect(
7593 "SELECT id
7594 FROM conversations
7595 ORDER BY id ASC",
7596 fparams![],
7597 |row| Ok((row.get_typed::<i64>(0)?, None)),
7598 )
7599 .with_context(|| {
7600 "listing lexical rebuild conversation ids after missing tail column fallback"
7601 })?,
7602 Err(err) => {
7603 return Err(err)
7604 .with_context(|| "listing lexical rebuild conversation footprint estimates");
7605 }
7606 };
7607
7608 let mut footprints = Vec::with_capacity(rows.len());
7609 let mut missing_tail_positions = HashMap::new();
7610 for (conversation_id, conversation_last_message_idx) in rows {
7611 let last_message_idx = tail_state_by_conversation
7612 .get(&conversation_id)
7613 .copied()
7614 .flatten()
7615 .or(conversation_last_message_idx);
7616 let Some(message_count) = lexical_rebuild_message_count_from_tail_idx(last_message_idx)
7617 else {
7618 missing_tail_positions.insert(conversation_id, footprints.len());
7619 footprints.push(LexicalRebuildConversationFootprintRow {
7620 conversation_id,
7621 message_count: 0,
7622 message_bytes: 0,
7623 });
7624 continue;
7625 };
7626 footprints.push(lexical_rebuild_conversation_footprint_from_count(
7627 conversation_id,
7628 message_count,
7629 ));
7630 }
7631
7632 let every_footprint_was_missing_tail = missing_tail_positions.len() == footprints.len();
7633 if !missing_tail_positions.is_empty() {
7634 self.fill_missing_lexical_rebuild_footprint_tails(
7635 &mut footprints,
7636 &missing_tail_positions,
7637 )?;
7638 }
7639 if !every_footprint_was_missing_tail {
7640 self.raise_lexical_rebuild_footprints_to_exact_message_counts(&mut footprints)?;
7641 }
7642
7643 Ok(footprints)
7644 }
7645
7646 pub fn lexical_rebuild_has_tail_footprint_metadata(&self) -> Result<bool> {
7647 let total_conversations: i64 = self
7648 .conn
7649 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
7650 row.get_typed(0)
7651 })
7652 .with_context(|| "counting conversations for lexical rebuild tail metadata coverage")?;
7653 let total_conversations = usize::try_from(total_conversations.max(0)).unwrap_or(usize::MAX);
7654 if total_conversations == 0 {
7655 return Ok(true);
7656 }
7657
7658 let conversation_columns = franken_table_column_names(&self.conn, "conversations")?;
7659 let conversations_have_tail_column = conversation_columns.contains("last_message_idx");
7660 let tail_state_has_tail_column =
7661 match franken_table_column_names(&self.conn, "conversation_tail_state") {
7662 Ok(columns) => columns.contains("last_message_idx"),
7663 Err(err) if error_indicates_missing_table(&err) => false,
7664 Err(err) => {
7665 return Err(err)
7666 .with_context(|| "reading lexical rebuild tail-state metadata columns");
7667 }
7668 };
7669 if !conversations_have_tail_column && !tail_state_has_tail_column {
7670 return Ok(false);
7671 }
7672
7673 let covered_sql = match (conversations_have_tail_column, tail_state_has_tail_column) {
7674 (true, true) => {
7675 "SELECT COUNT(*)
7676 FROM conversations c
7677 LEFT JOIN conversation_tail_state ts ON ts.conversation_id = c.id
7678 WHERE c.last_message_idx IS NOT NULL
7679 OR ts.last_message_idx IS NOT NULL"
7680 }
7681 (true, false) => {
7682 "SELECT COUNT(*)
7683 FROM conversations
7684 WHERE last_message_idx IS NOT NULL"
7685 }
7686 (false, true) => {
7687 "SELECT COUNT(*)
7688 FROM conversations c
7689 WHERE EXISTS (
7690 SELECT 1
7691 FROM conversation_tail_state ts
7692 WHERE ts.conversation_id = c.id
7693 AND ts.last_message_idx IS NOT NULL
7694 )"
7695 }
7696 (false, false) => unreachable!("checked before covered_sql selection"),
7697 };
7698 let covered_conversations: i64 = self
7699 .conn
7700 .query_row_map(covered_sql, fparams![], |row| row.get_typed(0))
7701 .with_context(
7702 || "counting conversations covered by lexical rebuild tail footprint metadata",
7703 )?;
7704 let covered_conversations =
7705 usize::try_from(covered_conversations.max(0)).unwrap_or(usize::MAX);
7706
7707 Ok(lexical_rebuild_tail_metadata_coverage_is_sufficient(
7708 total_conversations,
7709 covered_conversations,
7710 ))
7711 }
7712
7713 fn raise_lexical_rebuild_footprints_to_exact_message_counts(
7714 &self,
7715 footprints: &mut [LexicalRebuildConversationFootprintRow],
7716 ) -> Result<()> {
7717 if footprints.is_empty() {
7718 return Ok(());
7719 }
7720
7721 let positions_by_conversation: HashMap<i64, usize> = footprints
7722 .iter()
7723 .enumerate()
7724 .map(|(position, footprint)| (footprint.conversation_id, position))
7725 .collect();
7726 self.conn
7727 .query_with_params_for_each(
7728 "SELECT conversation_id, COUNT(*) AS message_count
7729 FROM messages
7730 GROUP BY conversation_id
7731 ORDER BY conversation_id ASC",
7732 &[] as &[SqliteValue],
7733 |row| {
7734 let conversation_id: i64 = row.get_typed(0)?;
7735 let exact_count: i64 = row.get_typed(1)?;
7736 let Some(position) = positions_by_conversation.get(&conversation_id) else {
7737 return Ok(());
7738 };
7739 let exact_count = usize::try_from(exact_count.max(0)).unwrap_or(usize::MAX);
7740 let footprint = &mut footprints[*position];
7741 if exact_count > footprint.message_count {
7742 footprint.message_count = exact_count;
7743 footprint.message_bytes =
7744 footprint.message_bytes.max(exact_count.saturating_mul(
7745 LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
7746 ));
7747 }
7748 Ok(())
7749 },
7750 )
7751 .with_context(|| "raising lexical rebuild footprints to exact message counts")?;
7752 Ok(())
7753 }
7754
7755 fn fill_missing_lexical_rebuild_footprint_tails(
7756 &self,
7757 footprints: &mut [LexicalRebuildConversationFootprintRow],
7758 missing_tail_positions: &HashMap<i64, usize>,
7759 ) -> Result<()> {
7760 self.fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7761 footprints,
7762 missing_tail_positions,
7763 "SELECT conversation_id, MAX(idx) AS last_message_idx
7764 FROM messages INDEXED BY idx_messages_conv_idx
7765 GROUP BY conversation_id
7766 ORDER BY conversation_id ASC",
7767 )
7768 .or_else(|err| {
7769 if err
7770 .to_string()
7771 .contains("no such index: idx_messages_conv_idx")
7772 {
7773 return self.fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7774 footprints,
7775 missing_tail_positions,
7776 "SELECT conversation_id, MAX(idx) AS last_message_idx
7777 FROM messages
7778 GROUP BY conversation_id
7779 ORDER BY conversation_id ASC",
7780 );
7781 }
7782 Err(err)
7783 })
7784 .with_context(|| "grouping missing lexical rebuild tail estimates from messages")?;
7785
7786 Ok(())
7787 }
7788
7789 fn fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7790 &self,
7791 footprints: &mut [LexicalRebuildConversationFootprintRow],
7792 missing_tail_positions: &HashMap<i64, usize>,
7793 sql: &str,
7794 ) -> Result<()> {
7795 self.conn
7796 .query_with_params_for_each(sql, &[] as &[SqliteValue], |row| {
7797 let conversation_id: i64 = row.get_typed(0)?;
7798 let last_message_idx: Option<i64> = row.get_typed(1)?;
7799 let Some(position) = missing_tail_positions.get(&conversation_id) else {
7800 return Ok(());
7801 };
7802 if let Some(message_count) =
7803 lexical_rebuild_message_count_from_tail_idx(last_message_idx)
7804 {
7805 footprints[*position] = lexical_rebuild_conversation_footprint_from_count(
7806 conversation_id,
7807 message_count,
7808 );
7809 }
7810 Ok(())
7811 })
7812 .with_context(|| "grouping lexical rebuild missing tail estimates")
7813 }
7814
7815 pub fn list_conversation_ids_for_lexical_rebuild(&self) -> Result<Vec<i64>> {
7817 self.conn
7818 .query_map_collect(
7819 "SELECT id FROM conversations ORDER BY id ASC",
7820 fparams![],
7821 |row| row.get_typed(0),
7822 )
7823 .with_context(|| "listing conversation ids for lexical rebuild")
7824 }
7825 pub fn list_conversations_for_lexical_rebuild_by_offset(
7830 &self,
7831 limit: i64,
7832 offset: i64,
7833 agent_slugs: &HashMap<i64, String>,
7834 workspace_paths: &HashMap<i64, PathBuf>,
7835 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7836 self.conn
7839 .query_map_collect(
7840 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7841 started_at,
7842 COALESCE(
7843 (SELECT ts.ended_at
7844 FROM conversation_tail_state ts
7845 WHERE ts.conversation_id = conversations.id),
7846 ended_at
7847 ),
7848 source_id, origin_host
7849 FROM conversations
7850 ORDER BY id ASC
7851 LIMIT ?1 OFFSET ?2",
7852 fparams![limit, offset],
7853 |row| {
7854 let agent_id: Option<i64> = row.get_typed(1)?;
7855 let workspace_id: Option<i64> = row.get_typed(2)?;
7856 let source_path: String = row.get_typed(5)?;
7857 let raw_source_id: Option<String> = row.get_typed(8)?;
7858 let raw_origin_host: Option<String> = row.get_typed(9)?;
7859 let (source_id, _, origin_host) = normalized_storage_source_parts(
7860 raw_source_id.as_deref(),
7861 None,
7862 raw_origin_host.as_deref(),
7863 );
7864 Ok(LexicalRebuildConversationRow {
7865 id: Some(row.get_typed(0)?),
7866 agent_slug: agent_id
7867 .and_then(|aid| agent_slugs.get(&aid).cloned())
7868 .unwrap_or_else(|| "unknown".to_string()),
7869 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7870 external_id: row.get_typed(3)?,
7871 title: row.get_typed(4)?,
7872 source_path: Path::new(&source_path).to_path_buf(),
7873 started_at: row.get_typed(6)?,
7874 ended_at: row.get_typed(7)?,
7875 source_id,
7876 origin_host,
7877 })
7878 },
7879 )
7880 .with_context(|| "listing conversations for lexical rebuild")
7881 }
7882
7883 pub fn list_conversations_for_lexical_rebuild_after_id(
7888 &self,
7889 limit: i64,
7890 after_conversation_id: i64,
7891 agent_slugs: &HashMap<i64, String>,
7892 workspace_paths: &HashMap<i64, PathBuf>,
7893 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7894 self.conn
7895 .query_map_collect(
7896 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7897 started_at,
7898 COALESCE(
7899 (SELECT ts.ended_at
7900 FROM conversation_tail_state ts
7901 WHERE ts.conversation_id = conversations.id),
7902 ended_at
7903 ),
7904 source_id, origin_host
7905 FROM conversations
7906 WHERE id > ?2
7907 ORDER BY id ASC
7908 LIMIT ?1",
7909 fparams![limit, after_conversation_id],
7910 |row| {
7911 let agent_id: Option<i64> = row.get_typed(1)?;
7912 let workspace_id: Option<i64> = row.get_typed(2)?;
7913 let source_path: String = row.get_typed(5)?;
7914 let raw_source_id: Option<String> = row.get_typed(8)?;
7915 let raw_origin_host: Option<String> = row.get_typed(9)?;
7916 let (source_id, _, origin_host) = normalized_storage_source_parts(
7917 raw_source_id.as_deref(),
7918 None,
7919 raw_origin_host.as_deref(),
7920 );
7921 Ok(LexicalRebuildConversationRow {
7922 id: Some(row.get_typed(0)?),
7923 agent_slug: agent_id
7924 .and_then(|aid| agent_slugs.get(&aid).cloned())
7925 .unwrap_or_else(|| "unknown".to_string()),
7926 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7927 external_id: row.get_typed(3)?,
7928 title: row.get_typed(4)?,
7929 source_path: Path::new(&source_path).to_path_buf(),
7930 started_at: row.get_typed(6)?,
7931 ended_at: row.get_typed(7)?,
7932 source_id,
7933 origin_host,
7934 })
7935 },
7936 )
7937 .with_context(|| {
7938 format!(
7939 "listing conversations for lexical rebuild after id {after_conversation_id}"
7940 )
7941 })
7942 }
7943
7944 pub fn list_conversations_for_lexical_rebuild_after_id_through_id(
7950 &self,
7951 limit: i64,
7952 after_conversation_id: i64,
7953 through_conversation_id: i64,
7954 agent_slugs: &HashMap<i64, String>,
7955 workspace_paths: &HashMap<i64, PathBuf>,
7956 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7957 if through_conversation_id <= after_conversation_id {
7958 return Ok(Vec::new());
7959 }
7960 self.conn
7961 .query_map_collect(
7962 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7963 started_at,
7964 COALESCE(
7965 (SELECT ts.ended_at
7966 FROM conversation_tail_state ts
7967 WHERE ts.conversation_id = conversations.id),
7968 ended_at
7969 ),
7970 source_id, origin_host
7971 FROM conversations
7972 WHERE id > ?2 AND id <= ?3
7973 ORDER BY id ASC
7974 LIMIT ?1",
7975 fparams![limit, after_conversation_id, through_conversation_id],
7976 |row| {
7977 let agent_id: Option<i64> = row.get_typed(1)?;
7978 let workspace_id: Option<i64> = row.get_typed(2)?;
7979 let source_path: String = row.get_typed(5)?;
7980 let raw_source_id: Option<String> = row.get_typed(8)?;
7981 let raw_origin_host: Option<String> = row.get_typed(9)?;
7982 let (source_id, _, origin_host) = normalized_storage_source_parts(
7983 raw_source_id.as_deref(),
7984 None,
7985 raw_origin_host.as_deref(),
7986 );
7987 Ok(LexicalRebuildConversationRow {
7988 id: Some(row.get_typed(0)?),
7989 agent_slug: agent_id
7990 .and_then(|aid| agent_slugs.get(&aid).cloned())
7991 .unwrap_or_else(|| "unknown".to_string()),
7992 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7993 external_id: row.get_typed(3)?,
7994 title: row.get_typed(4)?,
7995 source_path: Path::new(&source_path).to_path_buf(),
7996 started_at: row.get_typed(6)?,
7997 ended_at: row.get_typed(7)?,
7998 source_id,
7999 origin_host,
8000 })
8001 },
8002 )
8003 .with_context(|| {
8004 format!(
8005 "listing conversations for lexical rebuild after id {after_conversation_id} through id {through_conversation_id}"
8006 )
8007 })
8008 }
8009
8010 pub fn fetch_messages(&self, conversation_id: i64) -> Result<Vec<Message>> {
8012 let hinted_sql = "SELECT id, idx, role, author, created_at, content, extra_json, extra_bin \
8013 FROM messages INDEXED BY sqlite_autoindex_messages_1 \
8014 WHERE conversation_id = ?1 ORDER BY idx";
8015 let fallback_sql = "SELECT id, idx, role, author, created_at, content, extra_json, extra_bin \
8016 FROM messages \
8017 WHERE conversation_id = ?1 ORDER BY idx";
8018
8019 self.conn
8020 .query_map_collect(hinted_sql, fparams![conversation_id], |row| {
8021 let role: String = row.get_typed(2)?;
8022 Ok(Message {
8023 id: Some(row.get_typed(0)?),
8024 idx: row.get_typed(1)?,
8025 role: match role.as_str() {
8026 "user" => MessageRole::User,
8027 "agent" | "assistant" => MessageRole::Agent,
8028 "tool" => MessageRole::Tool,
8029 "system" => MessageRole::System,
8030 other => MessageRole::Other(other.to_string()),
8031 },
8032 author: row.get_typed(3)?,
8033 created_at: row.get_typed(4)?,
8034 content: row.get_typed(5)?,
8035 extra_json: franken_read_message_extra_compat(row, 6, 7),
8036 snippets: Vec::new(),
8037 })
8038 })
8039 .or_else(|err| {
8040 if err
8041 .to_string()
8042 .contains("no such index: sqlite_autoindex_messages_1")
8043 {
8044 return self.conn.query_map_collect(
8045 fallback_sql,
8046 fparams![conversation_id],
8047 |row| {
8048 let role: String = row.get_typed(2)?;
8049 Ok(Message {
8050 id: Some(row.get_typed(0)?),
8051 idx: row.get_typed(1)?,
8052 role: match role.as_str() {
8053 "user" => MessageRole::User,
8054 "agent" | "assistant" => MessageRole::Agent,
8055 "tool" => MessageRole::Tool,
8056 "system" => MessageRole::System,
8057 other => MessageRole::Other(other.to_string()),
8058 },
8059 author: row.get_typed(3)?,
8060 created_at: row.get_typed(4)?,
8061 content: row.get_typed(5)?,
8062 extra_json: franken_read_message_extra_compat(row, 6, 7),
8063 snippets: Vec::new(),
8064 })
8065 },
8066 );
8067 }
8068 Err(err)
8069 })
8070 .with_context(|| format!("fetching messages for conversation {conversation_id}"))
8071 }
8072
8073 pub fn fetch_messages_for_lexical_rebuild(&self, conversation_id: i64) -> Result<Vec<Message>> {
8079 let hinted_sql = "SELECT id, idx, role, author, created_at, content \
8080 FROM messages INDEXED BY sqlite_autoindex_messages_1 \
8081 WHERE conversation_id = ?1 ORDER BY idx";
8082 let fallback_sql = "SELECT id, idx, role, author, created_at, content \
8083 FROM messages \
8084 WHERE conversation_id = ?1 ORDER BY idx";
8085
8086 self.conn
8087 .query_map_collect(hinted_sql, fparams![conversation_id], |row| {
8088 let role: String = row.get_typed(2)?;
8089 Ok(Message {
8090 id: Some(row.get_typed(0)?),
8091 idx: row.get_typed(1)?,
8092 role: match role.as_str() {
8093 "user" => MessageRole::User,
8094 "agent" | "assistant" => MessageRole::Agent,
8095 "tool" => MessageRole::Tool,
8096 "system" => MessageRole::System,
8097 other => MessageRole::Other(other.to_string()),
8098 },
8099 author: row.get_typed(3)?,
8100 created_at: row.get_typed(4)?,
8101 content: row.get_typed(5)?,
8102 extra_json: serde_json::Value::Null,
8103 snippets: Vec::new(),
8104 })
8105 })
8106 .or_else(|err| {
8107 if err
8108 .to_string()
8109 .contains("no such index: sqlite_autoindex_messages_1")
8110 {
8111 return self.conn.query_map_collect(
8112 fallback_sql,
8113 fparams![conversation_id],
8114 |row| {
8115 let role: String = row.get_typed(2)?;
8116 Ok(Message {
8117 id: Some(row.get_typed(0)?),
8118 idx: row.get_typed(1)?,
8119 role: match role.as_str() {
8120 "user" => MessageRole::User,
8121 "agent" | "assistant" => MessageRole::Agent,
8122 "tool" => MessageRole::Tool,
8123 "system" => MessageRole::System,
8124 other => MessageRole::Other(other.to_string()),
8125 },
8126 author: row.get_typed(3)?,
8127 created_at: row.get_typed(4)?,
8128 content: row.get_typed(5)?,
8129 extra_json: serde_json::Value::Null,
8130 snippets: Vec::new(),
8131 })
8132 },
8133 );
8134 }
8135 Err(err)
8136 })
8137 .with_context(|| {
8138 format!("fetching messages for lexical rebuild of conversation {conversation_id}")
8139 })
8140 }
8141
8142 pub fn fetch_messages_for_lexical_rebuild_batch(
8147 &self,
8148 conversation_ids: &[i64],
8149 max_messages: Option<usize>,
8150 max_content_bytes: Option<usize>,
8151 ) -> Result<HashMap<i64, Vec<Message>>> {
8152 if conversation_ids.is_empty() {
8153 return Ok(HashMap::new());
8154 }
8155
8156 let mut grouped: HashMap<i64, Vec<Message>> =
8157 HashMap::with_capacity(conversation_ids.len());
8158 let mut fetched_conversation_ids = HashSet::with_capacity(conversation_ids.len());
8159 let mut total_messages = 0usize;
8160 let mut total_content_bytes = 0usize;
8161
8162 for conversation_id in conversation_ids {
8167 if !fetched_conversation_ids.insert(*conversation_id) {
8168 continue;
8169 }
8170
8171 let messages = self
8172 .fetch_messages_for_lexical_rebuild(*conversation_id)
8173 .with_context(|| {
8174 format!("fetching lexical rebuild messages for conversation {conversation_id}")
8175 })?;
8176 total_messages = total_messages.saturating_add(messages.len());
8177 if let Some(limit) = max_messages
8178 && total_messages > limit
8179 {
8180 return Err(anyhow!(
8181 "lexical rebuild batch fetch exceeded message guardrail: messages={total_messages} limit={limit} conversations={}",
8182 conversation_ids.len()
8183 ));
8184 }
8185
8186 let message_bytes = messages
8187 .iter()
8188 .map(|message| message.content.len())
8189 .sum::<usize>();
8190 total_content_bytes = total_content_bytes.saturating_add(message_bytes);
8191 if let Some(limit) = max_content_bytes
8192 && total_content_bytes > limit
8193 {
8194 return Err(anyhow!(
8195 "lexical rebuild batch fetch exceeded content-byte guardrail: bytes={total_content_bytes} limit={limit} conversations={}",
8196 conversation_ids.len()
8197 ));
8198 }
8199
8200 if !messages.is_empty() {
8201 grouped.insert(*conversation_id, messages);
8202 }
8203 }
8204
8205 Ok(grouped)
8206 }
8207
8208 pub fn stream_messages_for_lexical_rebuild_between_conversation_ids<F>(
8211 &self,
8212 start_conversation_id: i64,
8213 end_conversation_id: i64,
8214 mut f: F,
8215 ) -> Result<()>
8216 where
8217 F: FnMut(LexicalRebuildMessageRow) -> Result<()>,
8218 {
8219 if end_conversation_id < start_conversation_id {
8220 return Ok(());
8221 }
8222
8223 let conversation_ids: Vec<i64> = self
8224 .conn
8225 .query_map_collect(
8226 "SELECT id FROM conversations WHERE id >= ?1 AND id <= ?2 ORDER BY id ASC",
8227 fparams![start_conversation_id, end_conversation_id],
8228 |row| row.get_typed(0),
8229 )
8230 .with_context(|| "listing conversation ids for streamed lexical rebuild")?;
8231
8232 for conversation_id in conversation_ids {
8233 let messages = self
8234 .fetch_messages_for_lexical_rebuild(conversation_id)
8235 .with_context(|| {
8236 format!("streaming lexical rebuild messages for conversation {conversation_id}")
8237 })?;
8238
8239 for message in messages {
8240 let message_id = message.id.ok_or_else(|| {
8241 anyhow!(
8242 "lexical rebuild message missing id for conversation {conversation_id} idx {}",
8243 message.idx
8244 )
8245 })?;
8246 f(LexicalRebuildMessageRow {
8247 conversation_id,
8248 id: message_id,
8249 idx: message.idx,
8250 role: role_str(&message.role),
8251 author: message.author,
8252 created_at: message.created_at,
8253 content: message.content,
8254 })?;
8255 }
8256 }
8257
8258 Ok(())
8259 }
8260
8261 pub fn stream_grouped_messages_for_lexical_rebuild_between_conversation_ids<F>(
8265 &self,
8266 start_conversation_id: i64,
8267 end_conversation_id: i64,
8268 mut f: F,
8269 ) -> Result<()>
8270 where
8271 F: FnMut(i64, LexicalRebuildGroupedMessageRows, i64) -> Result<()>,
8272 {
8273 if end_conversation_id < start_conversation_id {
8274 return Ok(());
8275 }
8276
8277 let mut current_conversation_id: Option<i64> = None;
8278 let mut current_messages: LexicalRebuildGroupedMessageRows = SmallVec::new();
8279 let mut current_last_message_id = 0i64;
8280 let mut flush_current = |current_conversation_id: &mut Option<i64>,
8281 current_messages: &mut LexicalRebuildGroupedMessageRows,
8282 current_last_message_id: &mut i64|
8283 -> Result<()> {
8284 let Some(conversation_id) = current_conversation_id.take() else {
8285 return Ok(());
8286 };
8287 let messages = std::mem::take(current_messages);
8288 let last_message_id = std::mem::take(current_last_message_id);
8289 f(conversation_id, messages, last_message_id)
8290 };
8291
8292 self.stream_messages_for_lexical_rebuild_between_conversation_ids(
8293 start_conversation_id,
8294 end_conversation_id,
8295 |row| {
8296 if current_conversation_id != Some(row.conversation_id) {
8297 flush_current(
8298 &mut current_conversation_id,
8299 &mut current_messages,
8300 &mut current_last_message_id,
8301 )?;
8302 current_conversation_id = Some(row.conversation_id);
8303 }
8304 current_last_message_id = row.id;
8305 current_messages.push(LexicalRebuildGroupedMessageRow {
8306 idx: row.idx,
8307 is_tool_role: row.role == "tool",
8308 created_at: row.created_at,
8309 content: row.content,
8310 });
8311 Ok(())
8312 },
8313 )
8314 .with_context(|| "streaming grouped lexical rebuild messages")?;
8315
8316 flush_current(
8317 &mut current_conversation_id,
8318 &mut current_messages,
8319 &mut current_last_message_id,
8320 )
8321 .with_context(|| "flushing grouped lexical rebuild messages")
8322 }
8323
8324 pub fn stream_grouped_messages_for_lexical_rebuild_from_conversation_id<F>(
8327 &self,
8328 start_conversation_id: i64,
8329 f: F,
8330 ) -> Result<()>
8331 where
8332 F: FnMut(i64, LexicalRebuildGroupedMessageRows, i64) -> Result<()>,
8333 {
8334 self.stream_grouped_messages_for_lexical_rebuild_between_conversation_ids(
8335 start_conversation_id,
8336 i64::MAX,
8337 f,
8338 )
8339 }
8340
8341 pub fn stream_messages_for_lexical_rebuild_from_conversation_id<F>(
8344 &self,
8345 start_conversation_id: i64,
8346 f: F,
8347 ) -> Result<()>
8348 where
8349 F: FnMut(LexicalRebuildMessageRow) -> Result<()>,
8350 {
8351 self.stream_messages_for_lexical_rebuild_between_conversation_ids(
8352 start_conversation_id,
8353 i64::MAX,
8354 f,
8355 )
8356 }
8357
8358 pub fn get_source(&self, id: &str) -> Result<Option<Source>> {
8360 let result = self.conn.query_row_map(
8361 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at FROM sources WHERE id = ?1",
8362 fparams![id],
8363 |row| {
8364 let kind_str: String = row.get_typed(1)?;
8365 let config_json_str: Option<String> = row.get_typed(5)?;
8366 Ok(Source {
8367 id: row.get_typed(0)?,
8368 kind: SourceKind::parse(&kind_str).unwrap_or_default(),
8369 host_label: row.get_typed(2)?,
8370 machine_id: row.get_typed(3)?,
8371 platform: row.get_typed(4)?,
8372 config_json: config_json_str.and_then(|s| serde_json::from_str(&s).ok()),
8373 created_at: row.get_typed(6)?,
8374 updated_at: row.get_typed(7)?,
8375 })
8376 },
8377 );
8378 Ok(result.optional()?)
8379 }
8380
8381 pub fn list_sources(&self) -> Result<Vec<Source>> {
8383 self.conn
8384 .query_map_collect(
8385 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at FROM sources ORDER BY id",
8386 fparams![],
8387 |row| {
8388 let kind_str: String = row.get_typed(1)?;
8389 let config_json_str: Option<String> = row.get_typed(5)?;
8390 Ok(Source {
8391 id: row.get_typed(0)?,
8392 kind: SourceKind::parse(&kind_str).unwrap_or_default(),
8393 host_label: row.get_typed(2)?,
8394 machine_id: row.get_typed(3)?,
8395 platform: row.get_typed(4)?,
8396 config_json: config_json_str.and_then(|s| serde_json::from_str(&s).ok()),
8397 created_at: row.get_typed(6)?,
8398 updated_at: row.get_typed(7)?,
8399 })
8400 },
8401 )
8402 .with_context(|| "listing sources")
8403 }
8404
8405 pub fn get_source_ids(&self) -> Result<Vec<String>> {
8407 self.conn
8408 .query_map_collect(
8409 "SELECT id FROM sources WHERE id != 'local' ORDER BY id",
8410 fparams![],
8411 |row| row.get_typed(0),
8412 )
8413 .with_context(|| "listing source ids")
8414 }
8415
8416 pub fn upsert_source(&self, source: &Source) -> Result<()> {
8418 self.invalidate_conversation_source_cache(source.id.as_str());
8419 let now = Self::now_millis();
8420 let kind_str = source.kind.to_string();
8421 let config_json_str = source
8422 .config_json
8423 .as_ref()
8424 .map(serde_json::to_string)
8425 .transpose()?;
8426
8427 self.conn.execute_compat(
8431 "INSERT INTO sources(id, kind, host_label, machine_id, platform, config_json, created_at, updated_at)
8432 VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)
8433 ON CONFLICT(id) DO UPDATE SET
8434 kind = excluded.kind,
8435 host_label = excluded.host_label,
8436 machine_id = excluded.machine_id,
8437 platform = excluded.platform,
8438 config_json = excluded.config_json,
8439 updated_at = excluded.updated_at
8440 WHERE NOT (
8441 sources.kind IS excluded.kind
8442 AND sources.host_label IS excluded.host_label
8443 AND sources.machine_id IS excluded.machine_id
8444 AND sources.platform IS excluded.platform
8445 AND sources.config_json IS excluded.config_json
8446 )",
8447 fparams![
8448 source.id.as_str(),
8449 kind_str.as_str(),
8450 source.host_label.as_deref(),
8451 source.machine_id.as_deref(),
8452 source.platform.as_deref(),
8453 config_json_str.as_deref(),
8454 source.created_at.unwrap_or(now),
8455 now
8456 ],
8457 )?;
8458 Ok(())
8459 }
8460
8461 fn historical_bundle_key_hash(
8462 version: u32,
8463 bundle: &HistoricalDatabaseBundle,
8464 include_bundle_stats: bool,
8465 ) -> String {
8466 let signature = if include_bundle_stats {
8467 format!(
8468 "{}:{}:{}:{}",
8469 version,
8470 bundle.root_path.display(),
8471 bundle.total_bytes,
8472 bundle.modified_at_ms
8473 )
8474 } else {
8475 format!("{}:{}", version, bundle.root_path.display())
8476 };
8477 blake3::hash(signature.as_bytes()).to_hex().to_string()
8478 }
8479
8480 fn historical_bundle_meta_key(bundle: &HistoricalDatabaseBundle) -> String {
8481 format!(
8482 "historical_bundle_salvaged:{}",
8483 Self::historical_bundle_key_hash(HISTORICAL_SALVAGE_LEDGER_VERSION, bundle, false)
8484 )
8485 }
8486
8487 fn historical_bundle_legacy_meta_key(bundle: &HistoricalDatabaseBundle) -> String {
8488 let signature = format!(
8489 "{}:{}:{}:{}",
8490 HISTORICAL_SALVAGE_LEDGER_VERSION,
8491 bundle.root_path.display(),
8492 bundle.total_bytes,
8493 bundle.modified_at_ms
8494 );
8495 format!(
8496 "historical_bundle_salvaged:{}",
8497 blake3::hash(signature.as_bytes()).to_hex()
8498 )
8499 }
8500
8501 fn historical_bundle_progress_key(bundle: &HistoricalDatabaseBundle) -> String {
8502 format!(
8503 "historical_bundle_progress:{}",
8504 Self::historical_bundle_key_hash(HISTORICAL_SALVAGE_PROGRESS_VERSION, bundle, false)
8505 )
8506 }
8507
8508 fn historical_bundle_legacy_progress_key(bundle: &HistoricalDatabaseBundle) -> String {
8509 let signature = format!(
8510 "{}:{}:{}:{}",
8511 HISTORICAL_SALVAGE_PROGRESS_VERSION,
8512 bundle.root_path.display(),
8513 bundle.total_bytes,
8514 bundle.modified_at_ms
8515 );
8516 format!(
8517 "historical_bundle_progress:{}",
8518 blake3::hash(signature.as_bytes()).to_hex()
8519 )
8520 }
8521
8522 fn historical_bundle_already_imported(
8523 &self,
8524 bundle: &HistoricalDatabaseBundle,
8525 ) -> Result<bool> {
8526 for key in [
8527 Self::historical_bundle_meta_key(bundle),
8528 Self::historical_bundle_legacy_meta_key(bundle),
8529 ] {
8530 let existing: Option<String> = self
8531 .conn
8532 .query_row_map(
8533 "SELECT value FROM meta WHERE key = ?1",
8534 fparams![key.as_str()],
8535 |row| row.get_typed(0),
8536 )
8537 .optional()?;
8538 if existing.is_some() {
8539 return Ok(true);
8540 }
8541 }
8542 Ok(false)
8543 }
8544
8545 pub(crate) fn has_pending_historical_bundles(&self, canonical_db_path: &Path) -> Result<bool> {
8546 for bundle in discover_historical_database_bundles(canonical_db_path) {
8547 if !self.historical_bundle_already_imported(&bundle)? {
8548 return Ok(true);
8549 }
8550 }
8551 Ok(false)
8552 }
8553
8554 fn load_historical_bundle_progress(
8555 &self,
8556 bundle: &HistoricalDatabaseBundle,
8557 ) -> Result<Option<HistoricalBundleProgress>> {
8558 for key in [
8559 Self::historical_bundle_progress_key(bundle),
8560 Self::historical_bundle_legacy_progress_key(bundle),
8561 ] {
8562 let raw: Option<String> = self
8563 .conn
8564 .query_row_map(
8565 "SELECT value FROM meta WHERE key = ?1",
8566 fparams![key.as_str()],
8567 |row| row.get_typed(0),
8568 )
8569 .optional()?;
8570 let Some(raw) = raw else {
8571 continue;
8572 };
8573 let parsed: HistoricalBundleProgress =
8574 serde_json::from_str(&raw).with_context(|| {
8575 format!(
8576 "parsing historical salvage progress checkpoint for {}",
8577 bundle.root_path.display()
8578 )
8579 })?;
8580 if parsed.progress_version == HISTORICAL_SALVAGE_PROGRESS_VERSION {
8581 return Ok(Some(parsed));
8582 }
8583 }
8584 Ok(None)
8585 }
8586
8587 fn record_historical_bundle_progress(
8588 &self,
8589 bundle: &HistoricalDatabaseBundle,
8590 method: &str,
8591 last_completed_source_row_id: i64,
8592 conversations_imported: usize,
8593 messages_imported: usize,
8594 ) -> Result<()> {
8595 let key = Self::historical_bundle_progress_key(bundle);
8596 let value = HistoricalBundleProgress {
8597 progress_version: HISTORICAL_SALVAGE_PROGRESS_VERSION,
8598 path: bundle.root_path.display().to_string(),
8599 bytes: bundle.total_bytes,
8600 modified_at_ms: bundle.modified_at_ms,
8601 method: method.to_string(),
8602 last_completed_source_row_id,
8603 conversations_imported,
8604 messages_imported,
8605 updated_at_ms: Self::now_millis(),
8606 };
8607 let value_str = serde_json::to_string(&value)?;
8608 self.conn.execute_compat(
8609 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
8610 fparams![key.as_str(), value_str.as_str()],
8611 )?;
8612 Ok(())
8613 }
8614
8615 fn clear_historical_bundle_progress(&self, bundle: &HistoricalDatabaseBundle) -> Result<()> {
8616 for key in [
8617 Self::historical_bundle_progress_key(bundle),
8618 Self::historical_bundle_legacy_progress_key(bundle),
8619 ] {
8620 self.conn
8621 .execute_compat("DELETE FROM meta WHERE key = ?1", fparams![key.as_str()])?;
8622 }
8623 Ok(())
8624 }
8625
8626 fn record_historical_bundle_import(
8627 &self,
8628 bundle: &HistoricalDatabaseBundle,
8629 method: &str,
8630 conversations_imported: usize,
8631 messages_imported: usize,
8632 ) -> Result<()> {
8633 let key = Self::historical_bundle_meta_key(bundle);
8634 let value = serde_json::json!({
8635 "salvage_version": HISTORICAL_SALVAGE_LEDGER_VERSION,
8636 "path": bundle.root_path.display().to_string(),
8637 "bytes": bundle.total_bytes,
8638 "modified_at_ms": bundle.modified_at_ms,
8639 "method": method,
8640 "conversations_imported": conversations_imported,
8641 "messages_imported": messages_imported,
8642 "recorded_at_ms": Self::now_millis(),
8643 });
8644 let value_str = serde_json::to_string(&value)?;
8645 self.conn.execute_compat(
8646 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
8647 fparams![key.as_str(), value_str.as_str()],
8648 )?;
8649 Ok(())
8650 }
8651
8652 fn historical_import_error_is_split_retryable(err: &anyhow::Error) -> bool {
8653 const RETRYABLE_PATTERNS: &[&str] = &[
8654 "out of memory",
8655 "string or blob too big",
8656 "too many sql variables",
8657 ];
8658 err.chain().any(|cause| {
8659 let rendered = cause.to_string().to_ascii_lowercase();
8660 RETRYABLE_PATTERNS
8661 .iter()
8662 .any(|pattern| rendered.contains(pattern))
8663 })
8664 }
8665
8666 fn split_historical_batch_entry_messages(
8667 entry: &HistoricalBatchEntry,
8668 ) -> Option<(HistoricalBatchEntry, HistoricalBatchEntry)> {
8669 if entry.conversation.messages.len() < 2 {
8670 return None;
8671 }
8672 let split_at = entry.conversation.messages.len() / 2;
8673 if split_at == 0 || split_at >= entry.conversation.messages.len() {
8674 return None;
8675 }
8676
8677 let mut left = entry.clone();
8678 left.conversation.messages = entry.conversation.messages[..split_at].to_vec();
8679
8680 let mut right = entry.clone();
8681 right.conversation.messages = entry.conversation.messages[split_at..].to_vec();
8682
8683 Some((left, right))
8684 }
8685
8686 fn import_historical_batch_with_retry<F>(
8687 entries: &[HistoricalBatchEntry],
8688 insert_batch: &mut F,
8689 ) -> Result<HistoricalBatchImportTotals>
8690 where
8691 F: FnMut(&[HistoricalBatchEntry]) -> Result<HistoricalBatchImportTotals>,
8692 {
8693 match insert_batch(entries) {
8694 Ok(totals) => Ok(totals),
8695 Err(err) if Self::historical_import_error_is_split_retryable(&err) => {
8696 if entries.len() > 1 {
8697 let mid = entries.len() / 2;
8698 tracing::warn!(
8699 batch_entries = entries.len(),
8700 split_left = mid,
8701 split_right = entries.len() - mid,
8702 error = %err,
8703 "historical salvage batch failed; retrying in smaller sub-batches"
8704 );
8705 let left =
8706 Self::import_historical_batch_with_retry(&entries[..mid], insert_batch)?;
8707 let right =
8708 Self::import_historical_batch_with_retry(&entries[mid..], insert_batch)?;
8709 return Ok(HistoricalBatchImportTotals {
8710 inserted_source_rows: left.inserted_source_rows
8711 + right.inserted_source_rows,
8712 inserted_messages: left.inserted_messages + right.inserted_messages,
8713 });
8714 }
8715
8716 if let Some(entry) = entries.first()
8717 && let Some((left, right)) = Self::split_historical_batch_entry_messages(entry)
8718 {
8719 tracing::warn!(
8720 source_row_id = entry.source_row_id,
8721 message_count = entry.conversation.messages.len(),
8722 error = %err,
8723 "historical salvage conversation failed; retrying in smaller message slices"
8724 );
8725 let left_totals = Self::import_historical_batch_with_retry(
8726 std::slice::from_ref(&left),
8727 insert_batch,
8728 )?;
8729 let right_totals = Self::import_historical_batch_with_retry(
8730 std::slice::from_ref(&right),
8731 insert_batch,
8732 )?;
8733 return Ok(HistoricalBatchImportTotals {
8734 inserted_source_rows: usize::from(
8735 left_totals.inserted_source_rows > 0
8736 || right_totals.inserted_source_rows > 0,
8737 ),
8738 inserted_messages: left_totals
8739 .inserted_messages
8740 .saturating_add(right_totals.inserted_messages),
8741 });
8742 }
8743
8744 Err(err)
8745 }
8746 Err(err) => Err(err),
8747 }
8748 }
8749
8750 fn import_historical_sources(&self, source_conn: &FrankenConnection) -> Result<()> {
8751 let sources: Vec<Source> = match source_conn.query_map_collect(
8752 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at
8753 FROM sources",
8754 fparams![],
8755 |row| {
8756 let raw_source_id: String = row.get_typed(0)?;
8757 let kind_str: String = row.get_typed(1)?;
8758 let raw_host_label: Option<String> = row.get_typed(2)?;
8759 let config_json_raw: Option<String> = row.get_typed(5)?;
8760 let (source_id, source_kind, host_label) = normalized_storage_source_parts(
8761 Some(raw_source_id.as_str()),
8762 Some(kind_str.as_str()),
8763 raw_host_label.as_deref(),
8764 );
8765 Ok(Source {
8766 id: source_id,
8767 kind: source_kind,
8768 host_label,
8769 machine_id: row.get_typed(3)?,
8770 platform: row.get_typed(4)?,
8771 config_json: config_json_raw.and_then(|raw| serde_json::from_str(&raw).ok()),
8772 created_at: row.get_typed(6)?,
8773 updated_at: row.get_typed(7)?,
8774 })
8775 },
8776 ) {
8777 Ok(rows) => rows,
8778 Err(err) => {
8779 tracing::warn!(error = %err, "historical sources table unavailable; skipping source import");
8780 return Ok(());
8781 }
8782 };
8783
8784 for source in sources {
8785 self.upsert_source(&source)?;
8786 }
8787 Ok(())
8788 }
8789
8790 fn import_historical_conversations(
8791 &self,
8792 bundle: &HistoricalDatabaseBundle,
8793 salvage_method: &str,
8794 source_conn: &FrankenConnection,
8795 ) -> Result<(usize, usize)> {
8796 let batch_limits = historical_import_batch_limits();
8797 let cache_enabled = IndexingCache::is_enabled();
8798 let mut indexing_cache = IndexingCache::new();
8799 let mut known_sources: HashSet<String> = self
8800 .list_sources()?
8801 .into_iter()
8802 .map(|source| source.id)
8803 .collect();
8804 let resume_progress = self.load_historical_bundle_progress(bundle)?;
8805 let resume_after_row_id = resume_progress
8806 .as_ref()
8807 .map(|progress| progress.last_completed_source_row_id)
8808 .filter(|row_id| *row_id > 0);
8809
8810 tracing::info!(
8811 target: "cass::historical_salvage",
8812 batch_conversations = batch_limits.conversations,
8813 batch_messages = batch_limits.messages,
8814 batch_payload_chars = batch_limits.payload_chars,
8815 cache_enabled,
8816 resume_after_row_id,
8817 "configured historical salvage batch limits"
8818 );
8819
8820 if let Some(progress) = &resume_progress {
8821 tracing::info!(
8822 target: "cass::historical_salvage",
8823 path = %bundle.root_path.display(),
8824 resume_after_row_id = progress.last_completed_source_row_id,
8825 prior_conversations_imported = progress.conversations_imported,
8826 prior_messages_imported = progress.messages_imported,
8827 "resuming historical salvage bundle from durable checkpoint"
8828 );
8829 }
8830
8831 let conv_sql = if resume_after_row_id.is_some() {
8837 "SELECT
8838 c.id,
8839 COALESCE(a.slug, 'unknown'),
8840 w.path,
8841 c.external_id,
8842 c.title,
8843 c.source_path,
8844 c.started_at,
8845 c.ended_at,
8846 c.approx_tokens,
8847 c.metadata_json,
8848 c.source_id,
8849 c.origin_host
8850 FROM conversations c
8851 LEFT JOIN agents a ON c.agent_id = a.id
8852 LEFT JOIN workspaces w ON c.workspace_id = w.id
8853 WHERE c.id > ?1
8854 ORDER BY c.id"
8855 } else {
8856 "SELECT
8857 c.id,
8858 COALESCE(a.slug, 'unknown'),
8859 w.path,
8860 c.external_id,
8861 c.title,
8862 c.source_path,
8863 c.started_at,
8864 c.ended_at,
8865 c.approx_tokens,
8866 c.metadata_json,
8867 c.source_id,
8868 c.origin_host
8869 FROM conversations c
8870 LEFT JOIN agents a ON c.agent_id = a.id
8871 LEFT JOIN workspaces w ON c.workspace_id = w.id
8872 ORDER BY c.id"
8873 };
8874 let conv_params: &[ParamValue] =
8875 if let Some(last_completed_source_row_id) = resume_after_row_id {
8876 &[ParamValue::from(last_completed_source_row_id)]
8877 } else {
8878 &[]
8879 };
8880
8881 #[allow(clippy::type_complexity)]
8882 let conv_rows: Vec<(
8883 i64,
8884 String,
8885 Option<String>,
8886 Option<String>,
8887 Option<String>,
8888 String,
8889 Option<i64>,
8890 Option<i64>,
8891 Option<i64>,
8892 Option<String>,
8893 Option<String>,
8894 Option<String>,
8895 )> = source_conn
8896 .query_map_collect(conv_sql, conv_params, |row| {
8897 Ok((
8898 row.get_typed::<i64>(0)?,
8899 row.get_typed::<String>(1)?,
8900 row.get_typed::<Option<String>>(2)?,
8901 row.get_typed::<Option<String>>(3)?,
8902 row.get_typed::<Option<String>>(4)?,
8903 row.get_typed::<String>(5)?,
8904 row.get_typed::<Option<i64>>(6)?,
8905 row.get_typed::<Option<i64>>(7)?,
8906 row.get_typed::<Option<i64>>(8)?,
8907 row.get_typed::<Option<String>>(9)?,
8908 row.get_typed::<Option<String>>(10)?,
8909 row.get_typed::<Option<String>>(11)?,
8910 ))
8911 })
8912 .context("querying historical conversations")?;
8913
8914 let msg_sql = "SELECT idx, role, author, created_at, content, extra_json
8915 FROM messages
8916 WHERE conversation_id = ?1
8917 ORDER BY idx";
8918
8919 let mut imported_conversations = resume_progress
8920 .as_ref()
8921 .map(|progress| progress.conversations_imported)
8922 .unwrap_or(0);
8923 let mut imported_messages = resume_progress
8924 .as_ref()
8925 .map(|progress| progress.messages_imported)
8926 .unwrap_or(0);
8927 let mut pending_batch: Vec<HistoricalBatchEntry> = Vec::new();
8928 let mut pending_batch_messages = 0usize;
8929 let mut pending_batch_chars = 0usize;
8930 let mut pending_batch_first_row_id: Option<i64> = None;
8931 let mut pending_batch_last_row_id: Option<i64> = None;
8932
8933 let flush_batch = |storage: &FrankenStorage,
8934 batch: &mut Vec<HistoricalBatchEntry>,
8935 pending_messages: &mut usize,
8936 pending_chars: &mut usize,
8937 first_row_id: &mut Option<i64>,
8938 last_row_id: &mut Option<i64>,
8939 imported_conversations: &mut usize,
8940 imported_messages: &mut usize|
8941 -> Result<()> {
8942 if batch.is_empty() {
8943 return Ok(());
8944 }
8945
8946 let batch_first_row_id = *first_row_id;
8947 let batch_last_row_id = *last_row_id;
8948 if historical_salvage_debug_enabled() {
8949 eprintln!(
8950 "[historical-salvage] flushing batch rows {:?}..{:?} conversations={} messages={} payload_chars={}",
8951 batch_first_row_id,
8952 batch_last_row_id,
8953 batch.len(),
8954 *pending_messages,
8955 *pending_chars
8956 );
8957 }
8958 tracing::info!(
8959 target: "cass::historical_salvage",
8960 batch_conversations = batch.len(),
8961 batch_messages = *pending_messages,
8962 batch_payload_chars = *pending_chars,
8963 first_source_row_id = batch_first_row_id,
8964 last_source_row_id = batch_last_row_id,
8965 "flushing historical salvage batch"
8966 );
8967
8968 let mut insert_batch =
8969 |entries: &[HistoricalBatchEntry]| -> Result<HistoricalBatchImportTotals> {
8970 let borrowed_batch: Vec<(i64, Option<i64>, &Conversation)> = entries
8971 .iter()
8972 .map(|entry| (entry.agent_id, entry.workspace_id, &entry.conversation))
8973 .collect();
8974 let outcomes = storage
8975 .insert_conversations_batched(&borrowed_batch)
8976 .with_context(|| {
8977 let first_source_row_id =
8978 entries.first().map(|entry| entry.source_row_id);
8979 let last_source_row_id =
8980 entries.last().map(|entry| entry.source_row_id);
8981 format!(
8982 "inserting historical salvage batch source rows {:?}..{:?}",
8983 first_source_row_id, last_source_row_id
8984 )
8985 })?;
8986 let mut totals = HistoricalBatchImportTotals::default();
8987 for outcome in outcomes {
8988 if !outcome.inserted_indices.is_empty() {
8989 totals.inserted_source_rows += 1;
8990 totals.inserted_messages += outcome.inserted_indices.len();
8991 }
8992 }
8993 Ok(totals)
8994 };
8995 let totals =
8996 Self::import_historical_batch_with_retry(batch.as_slice(), &mut insert_batch)?;
8997 *imported_conversations =
8998 (*imported_conversations).saturating_add(totals.inserted_source_rows);
8999 *imported_messages = (*imported_messages).saturating_add(totals.inserted_messages);
9000 if let Some(last_completed_row_id) = batch_last_row_id {
9001 storage.record_historical_bundle_progress(
9002 bundle,
9003 salvage_method,
9004 last_completed_row_id,
9005 *imported_conversations,
9006 *imported_messages,
9007 )?;
9008 }
9009 tracing::info!(
9010 target: "cass::historical_salvage",
9011 batch_conversations = batch.len(),
9012 batch_messages = *pending_messages,
9013 imported_conversations = *imported_conversations,
9014 imported_messages = *imported_messages,
9015 first_source_row_id = batch_first_row_id,
9016 last_source_row_id = batch_last_row_id,
9017 "historical salvage batch committed"
9018 );
9019 if historical_salvage_debug_enabled() {
9020 eprintln!(
9021 "[historical-salvage] committed batch rows {:?}..{:?} imported_conversations={} imported_messages={}",
9022 batch_first_row_id,
9023 batch_last_row_id,
9024 *imported_conversations,
9025 *imported_messages
9026 );
9027 }
9028 batch.clear();
9029 *pending_messages = 0;
9030 *pending_chars = 0;
9031 *first_row_id = None;
9032 *last_row_id = None;
9033 Ok(())
9034 };
9035
9036 for (
9037 conversation_row_id,
9038 agent_slug,
9039 workspace_path,
9040 external_id,
9041 title,
9042 source_path,
9043 started_at,
9044 ended_at,
9045 approx_tokens,
9046 metadata_json_raw,
9047 raw_source_id,
9048 raw_origin_host,
9049 ) in conv_rows
9050 {
9051 let source_id = crate::search::tantivy::normalized_index_source_id(
9052 raw_source_id.as_deref(),
9053 None,
9054 raw_origin_host.as_deref(),
9055 );
9056 let origin_host =
9057 crate::search::tantivy::normalized_index_origin_host(raw_origin_host.as_deref());
9058
9059 let messages: Vec<Message> = source_conn
9060 .query_map_collect(msg_sql, fparams![conversation_row_id], |msg_row| {
9061 let role: String = msg_row.get_typed(1)?;
9062 Ok(Message {
9063 id: None,
9064 idx: msg_row.get_typed(0)?,
9065 role: match role.as_str() {
9066 "user" => MessageRole::User,
9067 "agent" | "assistant" => MessageRole::Agent,
9068 "tool" => MessageRole::Tool,
9069 "system" => MessageRole::System,
9070 other => MessageRole::Other(other.to_string()),
9071 },
9072 author: msg_row.get_typed(2)?,
9073 created_at: msg_row.get_typed(3)?,
9074 content: msg_row.get_typed(4)?,
9075 extra_json: parse_historical_json_column(msg_row.get_typed(5)?),
9076 snippets: Vec::new(),
9077 })
9078 })
9079 .context("collecting historical message rows")?;
9080
9081 if messages.is_empty() {
9082 continue;
9083 }
9084
9085 let conversation_message_count = messages.len();
9086 let conversation_chars = messages
9087 .iter()
9088 .map(message_payload_size_hint)
9089 .sum::<usize>();
9090
9091 let conversation = Conversation {
9092 id: None,
9093 agent_slug: agent_slug.clone(),
9094 workspace: workspace_path.map(PathBuf::from),
9095 external_id,
9096 title,
9097 source_path: PathBuf::from(source_path),
9098 started_at,
9099 ended_at,
9100 approx_tokens,
9101 metadata_json: parse_json_column(metadata_json_raw),
9102 messages,
9103 source_id,
9104 origin_host,
9105 };
9106
9107 if !known_sources.contains(&conversation.source_id) {
9108 let placeholder = if conversation.source_id == LOCAL_SOURCE_ID {
9109 Source::local()
9110 } else {
9111 Source {
9112 id: conversation.source_id.clone(),
9113 kind: SourceKind::Ssh,
9114 host_label: conversation.origin_host.clone(),
9115 machine_id: None,
9116 platform: None,
9117 config_json: None,
9118 created_at: None,
9119 updated_at: None,
9120 }
9121 };
9122 self.upsert_source(&placeholder)?;
9123 known_sources.insert(conversation.source_id.clone());
9124 }
9125
9126 let agent = Agent {
9127 id: None,
9128 slug: agent_slug.clone(),
9129 name: agent_slug,
9130 version: None,
9131 kind: AgentKind::Cli,
9132 };
9133 let agent_id = if cache_enabled {
9134 indexing_cache.get_or_insert_agent(self, &agent)?
9135 } else {
9136 self.ensure_agent(&agent)?
9137 };
9138 let workspace_id = if let Some(workspace) = &conversation.workspace {
9139 if cache_enabled {
9140 Some(indexing_cache.get_or_insert_workspace(self, workspace, None)?)
9141 } else {
9142 Some(self.ensure_workspace(workspace, None)?)
9143 }
9144 } else {
9145 None
9146 };
9147
9148 let exceeds_pending_limits = !pending_batch.is_empty()
9149 && (pending_batch.len() >= batch_limits.conversations
9150 || pending_batch_messages.saturating_add(conversation_message_count)
9151 > batch_limits.messages
9152 || pending_batch_chars.saturating_add(conversation_chars)
9153 > batch_limits.payload_chars);
9154 if exceeds_pending_limits {
9155 flush_batch(
9156 self,
9157 &mut pending_batch,
9158 &mut pending_batch_messages,
9159 &mut pending_batch_chars,
9160 &mut pending_batch_first_row_id,
9161 &mut pending_batch_last_row_id,
9162 &mut imported_conversations,
9163 &mut imported_messages,
9164 )?;
9165 }
9166
9167 if pending_batch_first_row_id.is_none() {
9168 pending_batch_first_row_id = Some(conversation_row_id);
9169 }
9170 pending_batch_last_row_id = Some(conversation_row_id);
9171 pending_batch_messages =
9172 pending_batch_messages.saturating_add(conversation_message_count);
9173 pending_batch_chars = pending_batch_chars.saturating_add(conversation_chars);
9174 pending_batch.push(HistoricalBatchEntry {
9175 source_row_id: conversation_row_id,
9176 agent_id,
9177 workspace_id,
9178 conversation,
9179 });
9180
9181 if pending_batch.len() >= batch_limits.conversations
9182 || pending_batch_messages >= batch_limits.messages
9183 || pending_batch_chars >= batch_limits.payload_chars
9184 {
9185 flush_batch(
9186 self,
9187 &mut pending_batch,
9188 &mut pending_batch_messages,
9189 &mut pending_batch_chars,
9190 &mut pending_batch_first_row_id,
9191 &mut pending_batch_last_row_id,
9192 &mut imported_conversations,
9193 &mut imported_messages,
9194 )?;
9195 }
9196 }
9197
9198 flush_batch(
9199 self,
9200 &mut pending_batch,
9201 &mut pending_batch_messages,
9202 &mut pending_batch_chars,
9203 &mut pending_batch_first_row_id,
9204 &mut pending_batch_last_row_id,
9205 &mut imported_conversations,
9206 &mut imported_messages,
9207 )?;
9208
9209 if cache_enabled {
9210 let (hits, misses, hit_rate) = indexing_cache.stats();
9211 tracing::info!(
9212 target: "cass::historical_salvage",
9213 hits,
9214 misses,
9215 hit_rate = format!("{:.1}%", hit_rate * 100.0),
9216 agents = indexing_cache.agent_count(),
9217 workspaces = indexing_cache.workspace_count(),
9218 sources = known_sources.len(),
9219 "historical salvage cache stats"
9220 );
9221 }
9222
9223 Ok((imported_conversations, imported_messages))
9224 }
9225
9226 pub fn salvage_historical_databases(
9227 &self,
9228 canonical_db_path: &Path,
9229 ) -> Result<HistoricalSalvageOutcome> {
9230 let ordered_bundles = discover_historical_database_bundles(canonical_db_path);
9231 let mut outcome = HistoricalSalvageOutcome {
9232 bundles_considered: ordered_bundles.len(),
9233 ..HistoricalSalvageOutcome::default()
9234 };
9235
9236 for bundle in ordered_bundles {
9237 if self.historical_bundle_already_imported(&bundle)? {
9238 self.clear_historical_bundle_progress(&bundle)?;
9239 continue;
9240 }
9241
9242 let source = match open_historical_bundle_for_salvage(&bundle).with_context(|| {
9243 format!(
9244 "opening historical bundle {} for salvage",
9245 bundle.root_path.display()
9246 )
9247 }) {
9248 Ok(source) => source,
9249 Err(err) => {
9250 tracing::warn!(
9251 path = %bundle.root_path.display(),
9252 error = %err,
9253 "skipping unreadable historical cass database bundle during salvage"
9254 );
9255 self.clear_historical_bundle_progress(&bundle)?;
9256 continue;
9257 }
9258 };
9259
9260 if let Some(progress) = self.load_historical_bundle_progress(&bundle)? {
9268 let backup_max_conversation_id: i64 = source
9269 .conn
9270 .query_row_map(
9271 "SELECT COALESCE(MAX(id), 0) FROM conversations",
9272 fparams![],
9273 |row| row.get_typed(0),
9274 )
9275 .unwrap_or(0);
9276 if backup_max_conversation_id > 0
9277 && progress.last_completed_source_row_id >= backup_max_conversation_id
9278 {
9279 self.record_historical_bundle_import(
9280 &bundle,
9281 source.method,
9282 progress.conversations_imported,
9283 progress.messages_imported,
9284 )?;
9285 self.clear_historical_bundle_progress(&bundle)?;
9286 tracing::info!(
9287 path = %bundle.root_path.display(),
9288 last_completed_source_row_id = progress.last_completed_source_row_id,
9289 backup_max_conversation_id,
9290 conversations_imported = progress.conversations_imported,
9291 messages_imported = progress.messages_imported,
9292 "historical bundle already fully imported per checkpoint; marking salvaged and skipping O(n) re-scan"
9293 );
9294 continue;
9295 }
9296 }
9297
9298 self.import_historical_sources(&source.conn)?;
9299 let (imported_conversations, imported_messages) =
9300 self.import_historical_conversations(&bundle, source.method, &source.conn)?;
9301 self.record_historical_bundle_import(
9302 &bundle,
9303 source.method,
9304 imported_conversations,
9305 imported_messages,
9306 )?;
9307 self.clear_historical_bundle_progress(&bundle)?;
9308
9309 outcome.bundles_imported += 1;
9310 outcome.conversations_imported += imported_conversations;
9311 outcome.messages_imported += imported_messages;
9312
9313 tracing::info!(
9314 path = %bundle.root_path.display(),
9315 bytes = bundle.total_bytes,
9316 method = source.method,
9317 imported_conversations,
9318 imported_messages,
9319 "salvaged historical cass database bundle"
9320 );
9321 }
9322
9323 Ok(outcome)
9324 }
9325
9326 pub fn delete_source(&self, id: &str, _cascade: bool) -> Result<bool> {
9328 if id == LOCAL_SOURCE_ID {
9329 anyhow::bail!("cannot delete the local source");
9330 }
9331 let count = self
9332 .conn
9333 .execute_compat("DELETE FROM sources WHERE id = ?1", fparams![id])?;
9334 if count > 0 {
9335 self.invalidate_conversation_source_cache(id);
9336 }
9337 Ok(count > 0)
9338 }
9339
9340 pub fn insert_conversation_tree(
9342 &self,
9343 agent_id: i64,
9344 workspace_id: Option<i64>,
9345 conv: &Conversation,
9346 ) -> Result<InsertOutcome> {
9347 let normalized_conv = normalized_conversation_for_storage(conv);
9348 let conv = normalized_conv.as_ref();
9349 self.ensure_source_for_conversation(conv)?;
9350 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
9351 let defer_analytics_updates = defer_analytics_updates_enabled();
9352 let conversation_key = conversation_merge_key(agent_id, conv);
9353 let mut tx = self.conn.transaction()?;
9354 let existing = franken_find_existing_conversation_with_tail_by_key(
9355 &tx,
9356 &conversation_key,
9357 Some(conv),
9358 )?;
9359 if let Some(existing) = existing {
9360 let outcome = self.franken_append_messages_with_tail_in_tx(
9361 &tx,
9362 agent_id,
9363 existing.id,
9364 conv,
9365 existing.tail_state,
9366 defer_lexical_updates,
9367 defer_analytics_updates,
9368 )?;
9369 tx.commit()?;
9370 return Ok(outcome);
9371 }
9372
9373 let conv_id = match franken_insert_conversation_or_get_existing_after_miss(
9374 &tx,
9375 agent_id,
9376 workspace_id,
9377 conv,
9378 &conversation_key,
9379 )? {
9380 ConversationInsertStatus::Inserted(conv_id) => conv_id,
9381 ConversationInsertStatus::Existing(existing_id) => {
9382 let ExistingMessageLookup {
9383 by_idx: mut existing_messages,
9384 replay: mut existing_replay_fingerprints,
9385 } = franken_existing_message_lookup(&tx, existing_id, &conv.messages)?;
9386 let ExistingConversationNewMessages {
9387 messages: new_messages,
9388 new_chars: _planned_new_chars,
9389 idx_collision_count,
9390 first_collision_idx,
9391 } = collect_new_messages_for_existing_conversation(
9392 existing_id,
9393 conv,
9394 &mut existing_messages,
9395 &mut existing_replay_fingerprints,
9396 "skipping replay-equivalent recovered message with shifted idx",
9397 );
9398 let (inserted_last_idx, inserted_last_created_at) =
9399 borrowed_messages_tail_state(&new_messages);
9400 let mut inserted_indices = Vec::new();
9401 let mut fts_entries = Vec::new();
9402 let mut fts_pending_chars = 0usize;
9403 let mut _fts_inserted_total = 0usize;
9404 let inserted_messages =
9405 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
9406 let inserted_chars = inserted_messages
9407 .iter()
9408 .map(|(_, msg)| msg.content.len() as i64)
9409 .sum::<i64>();
9410 for (msg_id, msg) in inserted_messages {
9411 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9412 if !defer_lexical_updates {
9413 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9414 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9415 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9416 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9417 {
9418 flush_pending_fts_entries(
9419 self,
9420 &tx,
9421 &mut fts_entries,
9422 &mut fts_pending_chars,
9423 &mut _fts_inserted_total,
9424 )?;
9425 }
9426 }
9427 inserted_indices.push(msg.idx);
9428 }
9429
9430 if idx_collision_count > 0 {
9431 tracing::warn!(
9432 conversation_id = existing_id,
9433 collision_count = idx_collision_count,
9434 first_idx = first_collision_idx,
9435 source_path = %conv.source_path.display(),
9436 "message idx collisions encountered while merging recovered conversation; retaining canonical message variants"
9437 );
9438 }
9439
9440 if !defer_lexical_updates {
9441 flush_pending_fts_entries(
9442 self,
9443 &tx,
9444 &mut fts_entries,
9445 &mut fts_pending_chars,
9446 &mut _fts_inserted_total,
9447 )?;
9448 }
9449
9450 let conv_last_ts = conversation_tail_ended_at_candidate(conv);
9451 franken_update_conversation_tail_state(
9452 &tx,
9453 existing_id,
9454 conv_last_ts,
9455 inserted_last_idx,
9456 inserted_last_created_at,
9457 )?;
9458 if let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv)
9459 {
9460 franken_update_external_conversation_tail_lookup_key(
9461 &tx,
9462 &lookup_key,
9463 conv_last_ts,
9464 inserted_last_idx,
9465 inserted_last_created_at,
9466 )?;
9467 }
9468
9469 if !defer_analytics_updates && !inserted_indices.is_empty() {
9470 franken_update_daily_stats_in_tx(
9471 self,
9472 &tx,
9473 &conv.agent_slug,
9474 &conv.source_id,
9475 conversation_effective_started_at(conv),
9476 StatsDelta {
9477 session_count_delta: 0,
9478 message_count_delta: inserted_indices.len() as i64,
9479 total_chars_delta: inserted_chars,
9480 },
9481 )?;
9482 }
9483
9484 tx.commit()?;
9485 return Ok(InsertOutcome {
9486 conversation_id: existing_id,
9487 conversation_inserted: false,
9488 inserted_indices,
9489 });
9490 }
9491 };
9492 let mut fts_entries = Vec::new();
9493 let mut fts_pending_chars = 0usize;
9494 let mut _fts_inserted_total = 0usize;
9495 let mut total_chars: i64 = 0;
9496 let mut inserted_indices = Vec::new();
9497 let mut pending_messages = HashMap::new();
9498 let mut pending_replay_fingerprints = HashSet::new();
9499 let mut idx_collision_count = 0usize;
9500 let mut first_collision_idx: Option<i64> = None;
9501 let mut new_messages = Vec::new();
9502 for msg in &conv.messages {
9503 let incoming_fingerprint = message_merge_fingerprint(msg);
9504 if let Some(existing_fingerprint) = pending_messages.get(&msg.idx) {
9505 if existing_fingerprint != &incoming_fingerprint {
9506 idx_collision_count = idx_collision_count.saturating_add(1);
9507 first_collision_idx.get_or_insert(msg.idx);
9508 }
9509 continue;
9510 }
9511 let incoming_replay = message_replay_fingerprint(msg);
9512 if pending_replay_fingerprints.contains(&incoming_replay) {
9513 tracing::debug!(
9514 conversation_id = conv_id,
9515 idx = msg.idx,
9516 source_path = %conv.source_path.display(),
9517 "skipping replay-equivalent duplicate message within new conversation insert"
9518 );
9519 continue;
9520 }
9521 pending_messages.insert(msg.idx, incoming_fingerprint);
9522 pending_replay_fingerprints.insert(incoming_replay);
9523 new_messages.push(msg);
9524 }
9525 let inserted_message_ids = franken_batch_insert_new_messages(&tx, conv_id, &new_messages)?;
9526 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9527 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9528 if !defer_lexical_updates {
9529 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9530 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9531 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9532 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9533 {
9534 flush_pending_fts_entries(
9535 self,
9536 &tx,
9537 &mut fts_entries,
9538 &mut fts_pending_chars,
9539 &mut _fts_inserted_total,
9540 )?;
9541 }
9542 }
9543 total_chars += msg.content.len() as i64;
9544 inserted_indices.push(msg.idx);
9545 }
9546 if idx_collision_count > 0 {
9547 tracing::warn!(
9548 conversation_id = conv_id,
9549 collision_count = idx_collision_count,
9550 first_idx = first_collision_idx,
9551 source_path = %conv.source_path.display(),
9552 "message idx collisions encountered while inserting a new conversation; retaining the first canonical variant per idx"
9553 );
9554 }
9555 if !defer_lexical_updates {
9556 flush_pending_fts_entries(
9557 self,
9558 &tx,
9559 &mut fts_entries,
9560 &mut fts_pending_chars,
9561 &mut _fts_inserted_total,
9562 )?;
9563 }
9564
9565 if !defer_analytics_updates {
9566 franken_update_daily_stats_in_tx(
9567 self,
9568 &tx,
9569 &conv.agent_slug,
9570 &conv.source_id,
9571 conversation_effective_started_at(conv),
9572 StatsDelta {
9573 session_count_delta: 1,
9574 message_count_delta: inserted_indices.len() as i64,
9575 total_chars_delta: total_chars,
9576 },
9577 )?;
9578 }
9579
9580 tx.commit()?;
9581 Ok(InsertOutcome {
9582 conversation_id: conv_id,
9583 conversation_inserted: true,
9584 inserted_indices,
9585 })
9586 }
9587
9588 #[cfg(test)]
9589 fn insert_conversation_tree_with_profile(
9590 &self,
9591 agent_id: i64,
9592 workspace_id: Option<i64>,
9593 conv: &Conversation,
9594 profile: &mut InsertConversationTreePerfProfile,
9595 ) -> Result<InsertOutcome> {
9596 let total_start = Instant::now();
9597 let normalized_conv = normalized_conversation_for_storage(conv);
9598 let conv = normalized_conv.as_ref();
9599
9600 let source_start = Instant::now();
9601 self.ensure_source_for_conversation(conv)?;
9602 profile.source_duration += source_start.elapsed();
9603
9604 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
9605 let defer_analytics_updates = defer_analytics_updates_enabled();
9606 let conversation_key = conversation_merge_key(agent_id, conv);
9607
9608 let tx_open_start = Instant::now();
9609 let mut tx = self.conn.transaction()?;
9610 profile.tx_open_duration += tx_open_start.elapsed();
9611
9612 let existing_lookup_start = Instant::now();
9613 let existing =
9614 franken_find_existing_conversation_by_key(&tx, &conversation_key, Some(conv))?;
9615 profile.existing_lookup_duration += existing_lookup_start.elapsed();
9616 if let Some(existing_id) = existing {
9617 return Err(anyhow!(
9618 "profile helper expects new conversation path, found existing id {existing_id}"
9619 ));
9620 }
9621
9622 let conversation_row_start = Instant::now();
9623 let conv_id = match franken_insert_conversation_or_get_existing_after_miss(
9624 &tx,
9625 agent_id,
9626 workspace_id,
9627 conv,
9628 &conversation_key,
9629 )? {
9630 ConversationInsertStatus::Inserted(conv_id) => conv_id,
9631 ConversationInsertStatus::Existing(existing_id) => {
9632 return Err(anyhow!(
9633 "profile helper expected inserted conversation row, reused existing id {existing_id}"
9634 ));
9635 }
9636 };
9637 profile.conversation_row_duration += conversation_row_start.elapsed();
9638
9639 let mut fts_entries = Vec::new();
9640 let mut fts_pending_chars = 0usize;
9641 let mut fts_inserted_total = 0usize;
9642 let mut total_chars: i64 = 0;
9643 let mut inserted_indices = Vec::new();
9644 let mut pending_messages = HashMap::new();
9645 let mut pending_replay_fingerprints = HashSet::new();
9646 let mut idx_collision_count = 0usize;
9647 let mut first_collision_idx: Option<i64> = None;
9648 let mut new_messages = Vec::new();
9649
9650 for msg in &conv.messages {
9651 let incoming_fingerprint = message_merge_fingerprint(msg);
9652 if let Some(existing_fingerprint) = pending_messages.get(&msg.idx) {
9653 if existing_fingerprint != &incoming_fingerprint {
9654 idx_collision_count = idx_collision_count.saturating_add(1);
9655 first_collision_idx.get_or_insert(msg.idx);
9656 }
9657 continue;
9658 }
9659
9660 let incoming_replay = message_replay_fingerprint(msg);
9661 if pending_replay_fingerprints.contains(&incoming_replay) {
9662 tracing::debug!(
9663 conversation_id = conv_id,
9664 idx = msg.idx,
9665 source_path = %conv.source_path.display(),
9666 "skipping replay-equivalent duplicate message within profiled new conversation insert"
9667 );
9668 continue;
9669 }
9670
9671 pending_messages.insert(msg.idx, incoming_fingerprint);
9672 pending_replay_fingerprints.insert(incoming_replay);
9673 new_messages.push(msg);
9674 }
9675
9676 let message_insert_start = Instant::now();
9677 let inserted_message_ids = franken_batch_insert_new_messages_with_profile(
9678 &tx,
9679 conv_id,
9680 &new_messages,
9681 &mut profile.message_insert_breakdown,
9682 )?;
9683 profile.message_insert_duration += message_insert_start.elapsed();
9684
9685 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9686 let snippet_insert_start = Instant::now();
9687 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9688 profile.snippet_insert_duration += snippet_insert_start.elapsed();
9689
9690 if !defer_lexical_updates {
9691 let fts_entry_start = Instant::now();
9692 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9693 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9694 profile.fts_entry_duration += fts_entry_start.elapsed();
9695 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9696 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9697 {
9698 let fts_flush_start = Instant::now();
9699 flush_pending_fts_entries(
9700 self,
9701 &tx,
9702 &mut fts_entries,
9703 &mut fts_pending_chars,
9704 &mut fts_inserted_total,
9705 )?;
9706 profile.fts_flush_duration += fts_flush_start.elapsed();
9707 }
9708 }
9709
9710 total_chars += msg.content.len() as i64;
9711 inserted_indices.push(msg.idx);
9712 }
9713
9714 if idx_collision_count > 0 {
9715 tracing::warn!(
9716 conversation_id = conv_id,
9717 collision_count = idx_collision_count,
9718 first_idx = first_collision_idx,
9719 source_path = %conv.source_path.display(),
9720 "message idx collisions encountered while profiling a new conversation insert; retaining the first canonical variant per idx"
9721 );
9722 }
9723
9724 if !defer_lexical_updates {
9725 let fts_flush_start = Instant::now();
9726 flush_pending_fts_entries(
9727 self,
9728 &tx,
9729 &mut fts_entries,
9730 &mut fts_pending_chars,
9731 &mut fts_inserted_total,
9732 )?;
9733 profile.fts_flush_duration += fts_flush_start.elapsed();
9734 }
9735
9736 if !defer_analytics_updates {
9737 let analytics_start = Instant::now();
9738 franken_update_daily_stats_in_tx(
9739 self,
9740 &tx,
9741 &conv.agent_slug,
9742 &conv.source_id,
9743 conversation_effective_started_at(conv),
9744 StatsDelta {
9745 session_count_delta: 1,
9746 message_count_delta: inserted_indices.len() as i64,
9747 total_chars_delta: total_chars,
9748 },
9749 )?;
9750 profile.analytics_duration += analytics_start.elapsed();
9751 }
9752
9753 let commit_start = Instant::now();
9754 tx.commit()?;
9755 profile.commit_duration += commit_start.elapsed();
9756 profile.invocations += 1;
9757 profile.messages += conv.messages.len();
9758 profile.inserted_messages += inserted_indices.len();
9759 profile.total_duration += total_start.elapsed();
9760
9761 Ok(InsertOutcome {
9762 conversation_id: conv_id,
9763 conversation_inserted: true,
9764 inserted_indices,
9765 })
9766 }
9767
9768 #[cfg(test)]
9769 fn append_existing_conversation_with_profile(
9770 &self,
9771 agent_id: i64,
9772 _workspace_id: Option<i64>,
9773 conv: &Conversation,
9774 profile: &mut InsertConversationTreePerfProfile,
9775 ) -> Result<InsertOutcome> {
9776 let total_start = Instant::now();
9777 let normalized_conv = normalized_conversation_for_storage(conv);
9778 let conv = normalized_conv.as_ref();
9779
9780 let source_start = Instant::now();
9781 self.ensure_source_for_conversation(conv)?;
9782 profile.source_duration += source_start.elapsed();
9783
9784 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
9785 let defer_analytics_updates = defer_analytics_updates_enabled();
9786 let conversation_key = conversation_merge_key(agent_id, conv);
9787
9788 let tx_open_start = Instant::now();
9789 let mut tx = self.conn.transaction()?;
9790 profile.tx_open_duration += tx_open_start.elapsed();
9791
9792 let existing_lookup_start = Instant::now();
9793 let existing = franken_find_existing_conversation_with_tail_by_key(
9794 &tx,
9795 &conversation_key,
9796 Some(conv),
9797 )?;
9798 profile.existing_lookup_duration += existing_lookup_start.elapsed();
9799 let existing = existing.ok_or_else(|| {
9800 anyhow!("append profile helper expects existing conversation for {conversation_key:?}")
9801 })?;
9802 let existing_id = existing.id;
9803
9804 let existing_idx_lookup_start = Instant::now();
9805 let append_tail_state = existing.tail_state;
9806 let append_tail_ended_at = append_tail_state.and_then(|state| state.ended_at);
9807 let existing_plan = append_tail_state.as_ref().and_then(|state| {
9808 collect_append_only_tail_messages(
9809 conv,
9810 state.last_message_idx,
9811 state.last_message_created_at,
9812 )
9813 });
9814 let used_append_tail_plan = existing_plan.is_some();
9815 profile.existing_idx_lookup_duration += existing_idx_lookup_start.elapsed();
9816
9817 let dedupe_filter_start = Instant::now();
9818 let ExistingConversationNewMessages {
9819 messages: new_messages,
9820 new_chars,
9821 idx_collision_count,
9822 first_collision_idx,
9823 } = if let Some(existing_plan) = existing_plan {
9824 existing_plan
9825 } else {
9826 let ExistingMessageLookup {
9827 by_idx: mut existing_messages,
9828 replay: mut existing_replay_fingerprints,
9829 } = franken_existing_message_lookup(&tx, existing_id, &conv.messages)?;
9830 collect_new_messages_for_existing_conversation(
9831 existing_id,
9832 conv,
9833 &mut existing_messages,
9834 &mut existing_replay_fingerprints,
9835 "skipping replay-equivalent profiled append message with shifted idx",
9836 )
9837 };
9838 profile.dedupe_filter_duration += dedupe_filter_start.elapsed();
9839
9840 let mut inserted_indices = Vec::new();
9841 let mut fts_entries = Vec::new();
9842 let mut fts_pending_chars = 0usize;
9843 let mut fts_inserted_total = 0usize;
9844 let (inserted_last_idx, inserted_last_created_at) =
9845 borrowed_messages_tail_state(&new_messages);
9846
9847 let message_insert_start = Instant::now();
9848 let inserted_message_ids = franken_append_insert_new_messages_with_profile(
9849 &tx,
9850 existing_id,
9851 &new_messages,
9852 &mut profile.message_insert_breakdown,
9853 )?;
9854 profile.message_insert_duration += message_insert_start.elapsed();
9855
9856 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9857 let snippet_insert_start = Instant::now();
9858 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9859 profile.snippet_insert_duration += snippet_insert_start.elapsed();
9860
9861 if !defer_lexical_updates {
9862 let fts_entry_start = Instant::now();
9863 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9864 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9865 profile.fts_entry_duration += fts_entry_start.elapsed();
9866 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9867 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9868 {
9869 let fts_flush_start = Instant::now();
9870 flush_pending_fts_entries(
9871 self,
9872 &tx,
9873 &mut fts_entries,
9874 &mut fts_pending_chars,
9875 &mut fts_inserted_total,
9876 )?;
9877 profile.fts_flush_duration += fts_flush_start.elapsed();
9878 }
9879 }
9880
9881 inserted_indices.push(msg.idx);
9882 }
9883
9884 if idx_collision_count > 0 {
9885 tracing::warn!(
9886 conversation_id = existing_id,
9887 collision_count = idx_collision_count,
9888 first_idx = first_collision_idx,
9889 source_path = %conv.source_path.display(),
9890 "message idx collisions encountered while profiling append merge; retaining canonical message variants"
9891 );
9892 }
9893
9894 if !defer_lexical_updates {
9895 let fts_flush_start = Instant::now();
9896 flush_pending_fts_entries(
9897 self,
9898 &tx,
9899 &mut fts_entries,
9900 &mut fts_pending_chars,
9901 &mut fts_inserted_total,
9902 )?;
9903 profile.fts_flush_duration += fts_flush_start.elapsed();
9904 }
9905
9906 let conversation_row_start = Instant::now();
9907 let mut exact_append_tail_set = false;
9908 if used_append_tail_plan {
9909 if let (Some(last_message_idx), Some(last_message_created_at)) =
9910 (inserted_last_idx, inserted_last_created_at)
9911 {
9912 if append_tail_ended_at.is_none_or(|ended_at| ended_at <= last_message_created_at) {
9913 franken_set_conversation_tail_state_after_append(
9914 &tx,
9915 existing_id,
9916 last_message_created_at,
9917 last_message_idx,
9918 last_message_created_at,
9919 )?;
9920 exact_append_tail_set = true;
9921 } else {
9922 franken_update_conversation_tail_state(
9923 &tx,
9924 existing_id,
9925 Some(last_message_created_at),
9926 inserted_last_idx,
9927 inserted_last_created_at,
9928 )?;
9929 }
9930 }
9931 } else {
9932 let conv_last_ts = conversation_tail_ended_at_candidate(conv);
9933 franken_update_conversation_tail_state(
9934 &tx,
9935 existing_id,
9936 conv_last_ts,
9937 inserted_last_idx,
9938 inserted_last_created_at,
9939 )?;
9940 }
9941 franken_update_external_conversation_tail_after_append(
9942 &tx,
9943 agent_id,
9944 conv,
9945 used_append_tail_plan,
9946 exact_append_tail_set,
9947 inserted_last_idx,
9948 inserted_last_created_at,
9949 )?;
9950 profile.conversation_row_duration += conversation_row_start.elapsed();
9951
9952 if !defer_analytics_updates && !inserted_indices.is_empty() {
9953 let analytics_start = Instant::now();
9954 franken_update_daily_stats_in_tx(
9955 self,
9956 &tx,
9957 &conv.agent_slug,
9958 &conv.source_id,
9959 conversation_effective_started_at(conv),
9960 StatsDelta {
9961 session_count_delta: 0,
9962 message_count_delta: inserted_indices.len() as i64,
9963 total_chars_delta: new_chars,
9964 },
9965 )?;
9966 profile.analytics_duration += analytics_start.elapsed();
9967 }
9968
9969 let commit_start = Instant::now();
9970 tx.commit()?;
9971 profile.commit_duration += commit_start.elapsed();
9972 profile.invocations += 1;
9973 profile.messages += conv.messages.len();
9974 profile.inserted_messages += inserted_indices.len();
9975 profile.total_duration += total_start.elapsed();
9976
9977 Ok(InsertOutcome {
9978 conversation_id: existing_id,
9979 conversation_inserted: false,
9980 inserted_indices,
9981 })
9982 }
9983
9984 #[allow(clippy::too_many_arguments)]
9986 fn franken_append_messages_with_tail_in_tx(
9987 &self,
9988 tx: &FrankenTransaction<'_>,
9989 agent_id: i64,
9990 conversation_id: i64,
9991 conv: &Conversation,
9992 append_tail_state: Option<ExistingConversationTailState>,
9993 defer_lexical_updates: bool,
9994 defer_analytics_updates: bool,
9995 ) -> Result<InsertOutcome> {
9996 let append_tail_ended_at = append_tail_state.and_then(|state| state.ended_at);
9997 let append_plan = append_tail_state.as_ref().and_then(|state| {
9998 collect_append_only_tail_messages(
9999 conv,
10000 state.last_message_idx,
10001 state.last_message_created_at,
10002 )
10003 });
10004 let used_append_tail_plan = append_plan.is_some();
10005 let ExistingConversationNewMessages {
10006 messages: new_messages,
10007 new_chars: _planned_new_chars,
10008 idx_collision_count,
10009 first_collision_idx,
10010 } = if let Some(append_plan) = append_plan {
10011 append_plan
10012 } else {
10013 let ExistingMessageLookup {
10014 by_idx: mut existing_messages,
10015 replay: mut existing_replay_fingerprints,
10016 } = franken_existing_message_lookup(tx, conversation_id, &conv.messages)?;
10017 collect_new_messages_for_existing_conversation(
10018 conversation_id,
10019 conv,
10020 &mut existing_messages,
10021 &mut existing_replay_fingerprints,
10022 "skipping replay-equivalent recovered message with shifted idx",
10023 )
10024 };
10025
10026 let mut inserted_indices = Vec::new();
10027 let mut fts_entries = Vec::new();
10028 let mut fts_pending_chars = 0usize;
10029 let mut _fts_inserted_total = 0usize;
10030 let (inserted_last_idx, inserted_last_created_at) =
10031 borrowed_messages_tail_state(&new_messages);
10032 let inserted_messages =
10033 franken_append_insert_new_messages(tx, conversation_id, &new_messages)?;
10034 let inserted_chars = inserted_messages
10035 .iter()
10036 .map(|(_, msg)| msg.content.len() as i64)
10037 .sum::<i64>();
10038 for (msg_id, msg) in inserted_messages {
10039 franken_insert_snippets(tx, msg_id, &msg.snippets)?;
10040 if !defer_lexical_updates {
10041 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10042 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
10043 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10044 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10045 {
10046 flush_pending_fts_entries(
10047 self,
10048 tx,
10049 &mut fts_entries,
10050 &mut fts_pending_chars,
10051 &mut _fts_inserted_total,
10052 )?;
10053 }
10054 }
10055 inserted_indices.push(msg.idx);
10056 }
10057
10058 if idx_collision_count > 0 {
10059 tracing::warn!(
10060 conversation_id,
10061 collision_count = idx_collision_count,
10062 first_idx = first_collision_idx,
10063 source_path = %conv.source_path.display(),
10064 "message idx collisions encountered while appending to an existing conversation; retaining canonical message variants"
10065 );
10066 }
10067
10068 if !defer_lexical_updates {
10069 flush_pending_fts_entries(
10070 self,
10071 tx,
10072 &mut fts_entries,
10073 &mut fts_pending_chars,
10074 &mut _fts_inserted_total,
10075 )?;
10076 }
10077
10078 let mut exact_append_tail_set = false;
10079 if used_append_tail_plan {
10080 if let (Some(last_message_idx), Some(last_message_created_at)) =
10081 (inserted_last_idx, inserted_last_created_at)
10082 {
10083 if append_tail_ended_at.is_none_or(|ended_at| ended_at <= last_message_created_at) {
10084 franken_set_conversation_tail_state_after_append(
10085 tx,
10086 conversation_id,
10087 last_message_created_at,
10088 last_message_idx,
10089 last_message_created_at,
10090 )?;
10091 exact_append_tail_set = true;
10092 } else {
10093 franken_update_conversation_tail_state(
10094 tx,
10095 conversation_id,
10096 Some(last_message_created_at),
10097 inserted_last_idx,
10098 inserted_last_created_at,
10099 )?;
10100 }
10101 }
10102 } else {
10103 let conv_last_ts = conversation_tail_ended_at_candidate(conv);
10104 franken_update_conversation_tail_state(
10105 tx,
10106 conversation_id,
10107 conv_last_ts,
10108 inserted_last_idx,
10109 inserted_last_created_at,
10110 )?;
10111 }
10112 franken_update_external_conversation_tail_after_append(
10113 tx,
10114 agent_id,
10115 conv,
10116 used_append_tail_plan,
10117 exact_append_tail_set,
10118 inserted_last_idx,
10119 inserted_last_created_at,
10120 )?;
10121
10122 if !defer_analytics_updates && !inserted_indices.is_empty() {
10123 let message_count = inserted_indices.len() as i64;
10124 franken_update_daily_stats_in_tx(
10125 self,
10126 tx,
10127 &conv.agent_slug,
10128 &conv.source_id,
10129 conversation_effective_started_at(conv),
10130 StatsDelta {
10131 session_count_delta: 0,
10132 message_count_delta: message_count,
10133 total_chars_delta: inserted_chars,
10134 },
10135 )?;
10136 }
10137
10138 Ok(InsertOutcome {
10139 conversation_id,
10140 conversation_inserted: false,
10141 inserted_indices,
10142 })
10143 }
10144
10145 pub fn rebuild_fts(&self) -> Result<()> {
10147 self.rebuild_fts_via_frankensqlite().map(|_| ())
10148 }
10149
10150 pub(crate) fn ensure_search_fallback_fts_consistency(&self) -> Result<FtsConsistencyRepair> {
10155 self.ensure_fts_consistency_via_frankensqlite()
10156 }
10157
10158 pub(crate) fn validate_fts_messages_integrity(&self) -> Result<()> {
10159 validate_fts_messages_integrity_for_connection(&self.conn)
10160 }
10161
10162 pub(crate) fn fallback_fts_is_known_healthy_for_archive_fingerprint(
10163 &self,
10164 archive_fingerprint: &str,
10165 ) -> Result<bool> {
10166 Ok(
10167 self.read_fts_franken_rebuild_generation()? == Some(FTS_FRANKEN_REBUILD_GENERATION)
10168 && self
10169 .read_fts_franken_rebuild_archive_fingerprint()?
10170 .as_deref()
10171 == Some(archive_fingerprint),
10172 )
10173 }
10174
10175 pub(crate) fn record_search_fallback_fts_archive_fingerprint(
10176 &self,
10177 archive_fingerprint: &str,
10178 ) -> Result<()> {
10179 self.conn
10180 .execute_compat(
10181 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
10182 fparams![
10183 FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY,
10184 archive_fingerprint.to_string()
10185 ],
10186 )
10187 .with_context(|| "recording frankensqlite FTS archive fingerprint")?;
10188 Ok(())
10189 }
10190
10191 pub(crate) fn daily_stats_is_known_healthy_for_archive_fingerprint(
10192 &self,
10193 archive_fingerprint: &str,
10194 ) -> Result<bool> {
10195 Ok(
10196 self.read_daily_stats_health_generation()? == Some(DAILY_STATS_HEALTH_GENERATION)
10197 && self.read_daily_stats_archive_fingerprint()?.as_deref()
10198 == Some(archive_fingerprint),
10199 )
10200 }
10201
10202 pub(crate) fn record_daily_stats_archive_fingerprint(
10203 &self,
10204 archive_fingerprint: &str,
10205 ) -> Result<()> {
10206 self.conn
10207 .execute_compat(
10208 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
10209 fparams![
10210 DAILY_STATS_HEALTH_GENERATION_META_KEY,
10211 DAILY_STATS_HEALTH_GENERATION.to_string()
10212 ],
10213 )
10214 .with_context(|| "recording daily_stats health generation")?;
10215 self.conn
10216 .execute_compat(
10217 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
10218 fparams![DAILY_STATS_HEALTH_META_KEY, archive_fingerprint.to_string()],
10219 )
10220 .with_context(|| "recording daily_stats archive fingerprint")?;
10221 Ok(())
10222 }
10223
10224 fn read_fts_franken_rebuild_generation(&self) -> Result<Option<i64>> {
10225 let value: Option<String> = self
10226 .conn
10227 .query_row_map(
10228 "SELECT value FROM meta WHERE key = ?1",
10229 fparams![FTS_FRANKEN_REBUILD_META_KEY],
10230 |row| row.get_typed(0),
10231 )
10232 .optional()?;
10233 Ok(value.and_then(|v| v.parse::<i64>().ok()))
10234 }
10235
10236 fn read_fts_franken_rebuild_archive_fingerprint(&self) -> Result<Option<String>> {
10237 Ok(self
10238 .conn
10239 .query_row_map(
10240 "SELECT value FROM meta WHERE key = ?1",
10241 fparams![FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY],
10242 |row| row.get_typed(0),
10243 )
10244 .optional()?)
10245 }
10246
10247 fn read_daily_stats_health_generation(&self) -> Result<Option<i64>> {
10248 let value: Option<String> = self
10249 .conn
10250 .query_row_map(
10251 "SELECT value FROM meta WHERE key = ?1",
10252 fparams![DAILY_STATS_HEALTH_GENERATION_META_KEY],
10253 |row| row.get_typed(0),
10254 )
10255 .optional()?;
10256 Ok(value.and_then(|value| value.parse::<i64>().ok()))
10257 }
10258
10259 fn read_daily_stats_archive_fingerprint(&self) -> Result<Option<String>> {
10260 Ok(self
10261 .conn
10262 .query_row_map(
10263 "SELECT value FROM meta WHERE key = ?1",
10264 fparams![DAILY_STATS_HEALTH_META_KEY],
10265 |row| row.get_typed(0),
10266 )
10267 .optional()?)
10268 }
10269
10270 fn record_fts_franken_rebuild_generation(&self) -> Result<()> {
10271 self.conn
10272 .execute_compat(
10273 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
10274 fparams![
10275 FTS_FRANKEN_REBUILD_META_KEY,
10276 FTS_FRANKEN_REBUILD_GENERATION.to_string()
10277 ],
10278 )
10279 .with_context(|| "recording frankensqlite FTS rebuild generation")?;
10280 Ok(())
10281 }
10282
10283 fn ensure_fts_consistency_via_frankensqlite(&self) -> Result<FtsConsistencyRepair> {
10284 if self.read_fts_franken_rebuild_generation()? != Some(FTS_FRANKEN_REBUILD_GENERATION) {
10285 let fts_already_healthy = (|| -> Result<bool> {
10290 let fts_exists: i64 = self.conn.query_row_map(
10291 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
10292 fparams![],
10293 |row| row.get_typed(0),
10294 )?;
10295 if fts_exists != 1 {
10296 return Ok(false);
10297 }
10298 let total: i64 = self.conn.query_row_map(
10299 "SELECT COUNT(*) FROM messages",
10300 fparams![],
10301 |row| row.get_typed(0),
10302 )?;
10303 if total == 0 {
10304 return Ok(false);
10305 }
10306 let indexed: i64 = self.conn.query_row_map(
10307 "SELECT COUNT(*) FROM fts_messages",
10308 fparams![],
10309 |row| row.get_typed(0),
10310 )?;
10311 Ok(indexed > 0 && indexed * 100 >= total * 90)
10313 })()
10314 .unwrap_or(false);
10315
10316 if fts_already_healthy {
10317 tracing::info!(
10318 target: "cass::fts_rebuild",
10319 "FTS already populated and consistent; setting generation marker without rebuild"
10320 );
10321 self.record_fts_franken_rebuild_generation()?;
10322 self.set_fts_messages_present_cache(true);
10323 } else {
10324 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
10325 self.record_fts_franken_rebuild_generation()?;
10326 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
10327 }
10328 }
10329
10330 let inspection = (|| -> Result<(i64, bool)> {
10331 let fts_schema_rows = self.conn.query_row_map(
10332 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
10333 fparams![],
10334 |row| row.get_typed::<i64>(0),
10335 )?;
10336 let fts_queryable = fts_schema_rows == 1
10337 && self.conn.query("SELECT COUNT(*) FROM fts_messages").is_ok();
10338 Ok((fts_schema_rows, fts_queryable))
10339 })();
10340
10341 let (fts_schema_rows, fts_queryable) = match inspection {
10342 Ok(result) => result,
10343 Err(err) => {
10344 tracing::warn!(
10345 error = %err,
10346 "frankensqlite FTS consistency probe failed; rebuilding authoritative FTS"
10347 );
10348 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
10349 self.record_fts_franken_rebuild_generation()?;
10350 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
10351 }
10352 };
10353
10354 if fts_schema_rows != 1 || !fts_queryable {
10355 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
10356 self.record_fts_franken_rebuild_generation()?;
10357 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
10358 }
10359
10360 let total_messages =
10361 self.conn
10362 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
10363 row.get_typed::<i64>(0)
10364 })?;
10365 let indexed_messages =
10366 self.conn
10367 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
10368 row.get_typed::<i64>(0)
10369 })?;
10370
10371 if indexed_messages == total_messages {
10372 self.set_fts_messages_present_cache(true);
10373 return Ok(FtsConsistencyRepair::AlreadyHealthy {
10374 rows: usize::try_from(total_messages.max(0)).unwrap_or(usize::MAX),
10375 });
10376 }
10377
10378 if indexed_messages > total_messages {
10379 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
10380 self.record_fts_franken_rebuild_generation()?;
10381 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
10382 }
10383
10384 let inserted_rows = self
10385 .stream_fts_rows_via_frankensqlite(true)
10386 .with_context(|| "incrementally repairing missing FTS rows via frankensqlite")?;
10387 let repaired_rows =
10388 self.conn
10389 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
10390 row.get_typed::<i64>(0)
10391 })?;
10392 if repaired_rows == total_messages {
10393 self.set_fts_messages_present_cache(true);
10394 return Ok(FtsConsistencyRepair::IncrementalCatchUp {
10395 inserted_rows,
10396 total_rows: usize::try_from(repaired_rows.max(0)).unwrap_or(usize::MAX),
10397 });
10398 }
10399
10400 if inserted_rows == 0 {
10408 tracing::debug!(
10409 target: "cass::fts_rebuild",
10410 indexed_messages = repaired_rows,
10411 total_messages,
10412 un_indexable_gap = total_messages.saturating_sub(repaired_rows),
10413 "FTS catch-up inserted 0 rows; remaining gap is un-indexable (likely orphaned messages with dangling conversation_id)"
10414 );
10415 self.set_fts_messages_present_cache(true);
10416 return Ok(FtsConsistencyRepair::IncrementalCatchUp {
10417 inserted_rows: 0,
10418 total_rows: usize::try_from(repaired_rows.max(0)).unwrap_or(usize::MAX),
10419 });
10420 }
10421
10422 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
10425 self.record_fts_franken_rebuild_generation()?;
10426 Ok(FtsConsistencyRepair::Rebuilt { inserted_rows })
10427 }
10428
10429 pub(crate) fn rebuild_fts_via_frankensqlite(&self) -> Result<usize> {
10430 self.invalidate_fts_messages_present_cache();
10431 self.conn
10432 .execute("DROP TABLE IF EXISTS fts_messages;")
10433 .with_context(|| "dropping derived fts_messages before frankensqlite rebuild")?;
10434 self.conn
10435 .execute_compat(FTS5_REGISTER_SQL, fparams![])
10436 .with_context(|| "creating derived fts_messages via frankensqlite rebuild")?;
10437 self.set_fts_messages_present_cache(true);
10438
10439 self.stream_fts_rows_via_frankensqlite(false)
10440 }
10441
10442 fn stream_fts_rows_via_frankensqlite(&self, missing_only: bool) -> Result<usize> {
10443 let batch_size = fts_rebuild_batch_size().max(1);
10444 let batch_limit = i64::try_from(batch_size).unwrap_or(i64::MAX);
10445 let mut total_inserted: usize = 0;
10446 let mut total_skipped_orphans: usize = 0;
10447 let mut total_skipped_existing: usize = 0;
10448 let mut last_rowid: i64 = 0;
10449 let conversation_by_id = self.load_fts_conversation_projection_map()?;
10450 let agent_slug_by_id = self.load_fts_agent_slug_map()?;
10451 let workspace_path_by_id = self.load_fts_workspace_path_map()?;
10452 let existing_fts_rowids = if missing_only {
10453 Some(self.load_fts_message_rowid_set()?)
10454 } else {
10455 None
10456 };
10457 let mut entries = Vec::new();
10458 let mut pending_chars = 0usize;
10459
10460 loop {
10461 let rows = self.fetch_fts_rebuild_message_rows(last_rowid, batch_limit)?;
10462 let fetched_count = rows.len();
10463 if fetched_count == 0 {
10464 break;
10465 }
10466
10467 let inserted_before_batch = total_inserted;
10468 let skipped_before_batch = total_skipped_orphans;
10469 let existing_before_batch = total_skipped_existing;
10470
10471 for row in rows {
10472 last_rowid = row.rowid;
10473 if existing_fts_rowids
10474 .as_ref()
10475 .is_some_and(|rowids| rowids.contains(&row.message_id))
10476 {
10477 total_skipped_existing = total_skipped_existing.saturating_add(1);
10478 continue;
10479 }
10480 let Some(conversation) = conversation_by_id.get(&row.conversation_id) else {
10481 total_skipped_orphans = total_skipped_orphans.saturating_add(1);
10482 continue;
10483 };
10484 let agent = conversation
10485 .agent_id
10486 .and_then(|agent_id| agent_slug_by_id.get(&agent_id))
10487 .filter(|slug| !slug.is_empty())
10488 .cloned()
10489 .unwrap_or_else(|| "unknown".to_string());
10490 let workspace = conversation
10491 .workspace_id
10492 .and_then(|workspace_id| workspace_path_by_id.get(&workspace_id))
10493 .cloned()
10494 .unwrap_or_default();
10495 pending_chars = pending_chars.saturating_add(row.content.len());
10496 entries.push(FtsEntry {
10497 content: row.content,
10498 title: conversation.title.clone(),
10499 agent,
10500 workspace,
10501 source_path: conversation.source_path.clone(),
10502 created_at: row.created_at,
10503 message_id: row.message_id,
10504 });
10505 if entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10506 || pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10507 {
10508 total_inserted = total_inserted.saturating_add(
10509 franken_batch_insert_fts_on_connection(&self.conn, &entries)?,
10510 );
10511 entries.clear();
10512 pending_chars = 0;
10513 }
10514 }
10515
10516 if !entries.is_empty() {
10517 total_inserted = total_inserted.saturating_add(
10518 franken_batch_insert_fts_on_connection(&self.conn, &entries)?,
10519 );
10520 entries.clear();
10521 pending_chars = 0;
10522 }
10523
10524 tracing::debug!(
10525 target: "cass::fts_rebuild",
10526 batch_rows = fetched_count,
10527 batch_inserted = total_inserted.saturating_sub(inserted_before_batch),
10528 batch_skipped_orphans = total_skipped_orphans.saturating_sub(skipped_before_batch),
10529 batch_skipped_existing = total_skipped_existing.saturating_sub(existing_before_batch),
10530 total_inserted,
10531 total_skipped_orphans,
10532 total_skipped_existing,
10533 last_rowid,
10534 missing_only,
10535 "FTS streaming maintenance batch complete"
10536 );
10537
10538 if fetched_count < batch_size {
10539 break;
10540 }
10541 }
10542
10543 Ok(total_inserted)
10544 }
10545
10546 fn fetch_fts_rebuild_message_rows(
10547 &self,
10548 last_rowid: i64,
10549 batch_limit: i64,
10550 ) -> Result<Vec<FtsRebuildMessageRow>> {
10551 self.conn
10552 .query_map_collect(
10553 "SELECT m.rowid, m.id, m.conversation_id, m.content, m.created_at
10554 FROM messages m
10555 WHERE m.rowid > ?1
10556 ORDER BY m.rowid
10557 LIMIT ?2",
10558 fparams![last_rowid, batch_limit],
10559 |row| {
10560 Ok(FtsRebuildMessageRow {
10561 rowid: row.get_typed(0)?,
10562 message_id: row.get_typed(1)?,
10563 conversation_id: row.get_typed(2)?,
10564 content: row.get_typed::<Option<String>>(3)?.unwrap_or_default(),
10565 created_at: row.get_typed(4)?,
10566 })
10567 },
10568 )
10569 .with_context(|| format!("fetching FTS maintenance messages after rowid {last_rowid}"))
10570 }
10571
10572 fn load_fts_message_rowid_set(&self) -> Result<HashSet<i64>> {
10573 let rows: Vec<i64> = self
10574 .conn
10575 .query_map_collect("SELECT rowid FROM fts_messages", fparams![], |row| {
10576 row.get_typed(0)
10577 })
10578 .with_context(|| "loading existing FTS message rowids")?;
10579 Ok(rows.into_iter().collect())
10580 }
10581
10582 fn load_fts_conversation_projection_map(
10583 &self,
10584 ) -> Result<HashMap<i64, FtsConversationProjection>> {
10585 let rows: Vec<(i64, FtsConversationProjection)> = self
10586 .conn
10587 .query_map_collect(
10588 "SELECT id, title, agent_id, workspace_id, source_path
10589 FROM conversations",
10590 fparams![],
10591 |row| {
10592 Ok((
10593 row.get_typed(0)?,
10594 FtsConversationProjection {
10595 title: row.get_typed::<Option<String>>(1)?.unwrap_or_default(),
10596 agent_id: row.get_typed(2)?,
10597 workspace_id: row.get_typed(3)?,
10598 source_path: row.get_typed::<Option<String>>(4)?.unwrap_or_default(),
10599 },
10600 ))
10601 },
10602 )
10603 .with_context(|| "loading FTS conversation projection map")?;
10604 Ok(rows.into_iter().collect())
10605 }
10606
10607 fn load_fts_agent_slug_map(&self) -> Result<HashMap<i64, String>> {
10608 let rows: Vec<(i64, String)> = self
10609 .conn
10610 .query_map_collect("SELECT id, slug FROM agents", fparams![], |row| {
10611 Ok((
10612 row.get_typed(0)?,
10613 row.get_typed::<Option<String>>(1)?
10614 .unwrap_or_else(|| "unknown".to_string()),
10615 ))
10616 })
10617 .with_context(|| "loading FTS agent slug map")?;
10618 Ok(rows.into_iter().collect())
10619 }
10620
10621 fn load_fts_workspace_path_map(&self) -> Result<HashMap<i64, String>> {
10622 let rows: Vec<(i64, String)> = self
10623 .conn
10624 .query_map_collect("SELECT id, path FROM workspaces", fparams![], |row| {
10625 Ok((
10626 row.get_typed(0)?,
10627 row.get_typed::<Option<String>>(1)?.unwrap_or_default(),
10628 ))
10629 })
10630 .with_context(|| "loading FTS workspace path map")?;
10631 Ok(rows.into_iter().collect())
10632 }
10633
10634 pub fn fetch_messages_for_embedding(&self) -> Result<Vec<MessageForEmbedding>> {
10636 self.conn
10641 .query_map_collect(
10642 "SELECT m.id, m.created_at, COALESCE(c.agent_id, 0), c.workspace_id, c.source_id, m.role, m.content
10643 FROM messages m
10644 JOIN conversations c ON m.conversation_id = c.id
10645 ORDER BY m.id",
10646 fparams![],
10647 |row| {
10648 let source_id: String = row.get_typed::<Option<String>>(4)?
10649 .unwrap_or_else(|| "local".to_string());
10650 Ok(MessageForEmbedding {
10651 message_id: row.get_typed(0)?,
10652 created_at: row.get_typed(1)?,
10653 agent_id: row.get_typed(2)?,
10654 workspace_id: row.get_typed(3)?,
10655 source_id_hash: crc32fast::hash(source_id.as_bytes()),
10656 role: row.get_typed(5)?,
10657 content: row.get_typed(6)?,
10658 })
10659 },
10660 )
10661 .with_context(|| "fetching messages for embedding")
10662 }
10663
10664 pub fn get_last_embedded_message_id(&self) -> Result<Option<i64>> {
10666 let result: Result<String, _> = self.conn.query_row_map(
10667 "SELECT value FROM meta WHERE key = 'last_embedded_message_id'",
10668 fparams![],
10669 |row| row.get_typed(0),
10670 );
10671 match result.optional() {
10672 Ok(Some(s)) => Ok(s.parse().ok()),
10673 Ok(None) => Ok(None),
10674 Err(e) => Err(e.into()),
10675 }
10676 }
10677
10678 pub fn set_last_embedded_message_id(&self, id: i64) -> Result<()> {
10680 self.conn.execute_compat(
10681 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_embedded_message_id', ?1)",
10682 fparams![id.to_string()],
10683 )?;
10684 Ok(())
10685 }
10686
10687 pub fn get_embedding_jobs(&self, db_path: &str) -> Result<Vec<EmbeddingJobRow>> {
10689 self.conn
10690 .query_map_collect(
10691 "SELECT id, db_path, model_id, status, total_docs, completed_docs, error_message, created_at, started_at, completed_at
10692 FROM embedding_jobs WHERE db_path = ?1 ORDER BY id DESC",
10693 fparams![db_path],
10694 |row| {
10695 Ok(EmbeddingJobRow {
10696 id: row.get_typed(0)?,
10697 db_path: row.get_typed(1)?,
10698 model_id: row.get_typed(2)?,
10699 status: row.get_typed(3)?,
10700 total_docs: row.get_typed(4)?,
10701 completed_docs: row.get_typed(5)?,
10702 error_message: row.get_typed(6)?,
10703 created_at: row.get_typed(7)?,
10704 started_at: row.get_typed(8)?,
10705 completed_at: row.get_typed(9)?,
10706 })
10707 },
10708 )
10709 .with_context(|| format!("fetching embedding jobs for {db_path}"))
10710 }
10711
10712 pub fn upsert_embedding_job(
10714 &self,
10715 db_path: &str,
10716 model_id: &str,
10717 total_docs: i64,
10718 ) -> Result<i64> {
10719 let updated = self.conn.execute_compat(
10720 "UPDATE embedding_jobs
10721 SET total_docs = ?3
10722 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
10723 fparams![db_path, model_id, total_docs],
10724 )?;
10725 if updated == 0 {
10726 let insert_result = self.conn.execute_compat(
10727 "INSERT INTO embedding_jobs(db_path, model_id, total_docs) VALUES(?1,?2,?3)",
10728 fparams![db_path, model_id, total_docs],
10729 );
10730 if let Err(err) = insert_result {
10731 if !matches!(err, frankensqlite::FrankenError::UniqueViolation { .. }) {
10732 return Err(err.into());
10733 }
10734 self.conn.execute_compat(
10735 "UPDATE embedding_jobs
10736 SET total_docs = ?3
10737 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
10738 fparams![db_path, model_id, total_docs],
10739 )?;
10740 }
10741 }
10742 self.conn
10743 .query_row_map(
10744 "SELECT id FROM embedding_jobs
10745 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')
10746 ORDER BY id DESC
10747 LIMIT 1",
10748 fparams![db_path, model_id],
10749 |row| row.get_typed(0),
10750 )
10751 .with_context(|| "resolving embedding job id after upsert")
10752 }
10753
10754 pub fn start_embedding_job(&self, job_id: i64) -> Result<()> {
10756 self.conn.execute_compat(
10757 "UPDATE embedding_jobs SET status = 'running', started_at = datetime('now') WHERE id = ?1",
10758 fparams![job_id],
10759 )?;
10760 Ok(())
10761 }
10762
10763 pub fn complete_embedding_job(&self, job_id: i64) -> Result<()> {
10765 self.conn.execute_compat(
10766 "UPDATE embedding_jobs SET status = 'completed', completed_at = datetime('now') WHERE id = ?1",
10767 fparams![job_id],
10768 )?;
10769 Ok(())
10770 }
10771
10772 pub fn fail_embedding_job(&self, job_id: i64, error: &str) -> Result<()> {
10774 self.conn.execute_compat(
10775 "UPDATE embedding_jobs SET status = 'failed', error_message = ?2, completed_at = datetime('now') WHERE id = ?1",
10776 fparams![job_id, error],
10777 )?;
10778 Ok(())
10779 }
10780
10781 pub fn cancel_embedding_jobs(&self, db_path: &str, model_id: Option<&str>) -> Result<usize> {
10783 if let Some(mid) = model_id {
10784 Ok(self.conn.execute_compat(
10785 "UPDATE embedding_jobs SET status = 'cancelled' WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
10786 fparams![db_path, mid],
10787 )?)
10788 } else {
10789 Ok(self.conn.execute_compat(
10790 "UPDATE embedding_jobs SET status = 'cancelled' WHERE db_path = ?1 AND status IN ('pending', 'running')",
10791 fparams![db_path],
10792 )?)
10793 }
10794 }
10795
10796 pub fn update_job_progress(&self, job_id: i64, completed_docs: i64) -> Result<()> {
10798 self.conn.execute_compat(
10799 "UPDATE embedding_jobs SET completed_docs = ?2 WHERE id = ?1",
10800 fparams![job_id, completed_docs],
10801 )?;
10802 Ok(())
10803 }
10804
10805 pub fn count_sessions_in_range(
10814 &self,
10815 start_ts_ms: Option<i64>,
10816 end_ts_ms: Option<i64>,
10817 agent_slug: Option<&str>,
10818 source_id: Option<&str>,
10819 ) -> Result<(i64, bool)> {
10820 let agent = agent_slug.unwrap_or("all");
10821 let source = source_id.unwrap_or("all");
10822
10823 let stats_count: i64 = self
10825 .conn
10826 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
10827 row.get_typed(0)
10828 })
10829 .unwrap_or(0);
10830
10831 if stats_count == 0 {
10832 return self.count_sessions_direct(start_ts_ms, end_ts_ms, agent_slug, source_id);
10833 }
10834
10835 let start_day = start_ts_ms.map(Self::day_id_from_millis);
10837 let end_day = end_ts_ms.map(Self::day_id_from_millis);
10838
10839 let count: i64 = match (start_day, end_day) {
10840 (Some(start), Some(end)) => self.conn.query_row_map(
10841 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10842 WHERE day_id BETWEEN ?1 AND ?2 AND agent_slug = ?3 AND source_id = ?4",
10843 fparams![start, end, agent, source],
10844 |row| row.get_typed(0),
10845 )?,
10846 (Some(start), None) => self.conn.query_row_map(
10847 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10848 WHERE day_id >= ?1 AND agent_slug = ?2 AND source_id = ?3",
10849 fparams![start, agent, source],
10850 |row| row.get_typed(0),
10851 )?,
10852 (None, Some(end)) => self.conn.query_row_map(
10853 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10854 WHERE day_id <= ?1 AND agent_slug = ?2 AND source_id = ?3",
10855 fparams![end, agent, source],
10856 |row| row.get_typed(0),
10857 )?,
10858 (None, None) => self.conn.query_row_map(
10859 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10860 WHERE agent_slug = ?1 AND source_id = ?2",
10861 fparams![agent, source],
10862 |row| row.get_typed(0),
10863 )?,
10864 };
10865
10866 Ok((count, true))
10867 }
10868
10869 fn count_sessions_direct(
10871 &self,
10872 start_ts_ms: Option<i64>,
10873 end_ts_ms: Option<i64>,
10874 agent_slug: Option<&str>,
10875 source_id: Option<&str>,
10876 ) -> Result<(i64, bool)> {
10877 let mut sql = "SELECT COUNT(*) FROM conversations c WHERE 1=1".to_string();
10884 let mut param_values: Vec<ParamValue> = Vec::new();
10885 let mut idx = 1;
10886
10887 if let Some(start) = start_ts_ms {
10888 sql.push_str(&format!(" AND c.started_at >= ?{idx}"));
10889 param_values.push(ParamValue::from(start));
10890 idx += 1;
10891 }
10892 if let Some(end) = end_ts_ms {
10893 sql.push_str(&format!(" AND c.started_at <= ?{idx}"));
10894 param_values.push(ParamValue::from(end));
10895 idx += 1;
10896 }
10897 if let Some(agent) = agent_slug
10898 && agent != "all"
10899 {
10900 sql.push_str(&format!(
10901 " AND EXISTS (SELECT 1 FROM agents a WHERE a.id = c.agent_id AND a.slug = ?{idx})"
10902 ));
10903 param_values.push(ParamValue::from(agent));
10904 idx += 1;
10905 }
10906 if let Some(source) = source_id
10907 && source != "all"
10908 {
10909 sql.push_str(&format!(" AND c.source_id = ?{idx}"));
10910 param_values.push(ParamValue::from(source));
10911 let _ = idx; }
10913
10914 let count: i64 = self
10915 .conn
10916 .query_row_map(&sql, ¶m_values, |row| row.get_typed(0))?;
10917 Ok((count, false))
10918 }
10919
10920 pub fn get_daily_histogram(
10922 &self,
10923 start_ts_ms: i64,
10924 end_ts_ms: i64,
10925 agent_slug: Option<&str>,
10926 source_id: Option<&str>,
10927 ) -> Result<Vec<DailyCount>> {
10928 let start_day = Self::day_id_from_millis(start_ts_ms);
10929 let end_day = Self::day_id_from_millis(end_ts_ms);
10930 let agent = agent_slug.unwrap_or("all");
10931 let source = source_id.unwrap_or("all");
10932
10933 let rows = self.conn.query_map_collect(
10934 "SELECT day_id, session_count, message_count, total_chars
10935 FROM daily_stats
10936 WHERE day_id BETWEEN ?1 AND ?2 AND agent_slug = ?3 AND source_id = ?4
10937 ORDER BY day_id",
10938 fparams![start_day, end_day, agent, source],
10939 |row| {
10940 Ok(DailyCount {
10941 day_id: row.get_typed(0)?,
10942 sessions: row.get_typed(1)?,
10943 messages: row.get_typed(2)?,
10944 chars: row.get_typed(3)?,
10945 })
10946 },
10947 )?;
10948
10949 Ok(rows)
10950 }
10951
10952 pub fn daily_stats_health(&self) -> Result<DailyStatsHealth> {
10954 let row_count: i64 =
10955 self.conn
10956 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
10957 row.get_typed(0)
10958 })?;
10959
10960 let oldest_update: Option<i64> = self.conn.query_row_map(
10961 "SELECT MIN(last_updated) FROM daily_stats",
10962 fparams![],
10963 |row| row.get_typed(0),
10964 )?;
10965
10966 let conversation_count: i64 =
10967 self.conn
10968 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
10969 row.get_typed(0)
10970 })?;
10971
10972 let materialized_total: i64 = self.conn.query_row_map(
10973 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10974 WHERE agent_slug = 'all' AND source_id = 'all'",
10975 fparams![],
10976 |row| row.get_typed(0),
10977 )?;
10978
10979 Ok(DailyStatsHealth {
10980 populated: row_count > 0,
10981 row_count,
10982 oldest_update_ms: oldest_update,
10983 conversation_count,
10984 materialized_total,
10985 drift: (conversation_count - materialized_total).abs(),
10986 })
10987 }
10988
10989 pub fn insert_conversations_batched(
10993 &self,
10994 conversations: &[(i64, Option<i64>, &Conversation)],
10995 ) -> Result<Vec<InsertOutcome>> {
10996 if conversations.is_empty() {
10997 return Ok(Vec::new());
10998 }
10999
11000 self.ensure_sources_for_batch(conversations)?;
11001
11002 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
11003 let defer_analytics_updates = defer_analytics_updates_enabled();
11004
11005 let pricing_table = PricingTable::franken_load(&self.conn).unwrap_or_else(|e| {
11006 tracing::warn!(target: "cass::analytics::pricing", error = %e, "failed to load pricing table");
11007 PricingTable { entries: Vec::new() }
11008 });
11009 let mut pricing_diag = PricingDiagnostics::default();
11010
11011 let mut tx = self.conn.transaction()?;
11012
11013 ensure_agents_in_tx(&tx, conversations)?;
11020 ensure_workspaces_in_tx(&tx, conversations)?;
11021 ensure_sources_in_tx(&tx, conversations)?;
11022
11023 let mut outcomes = Vec::with_capacity(conversations.len());
11024 let mut fts_entries = Vec::new();
11025 let mut fts_pending_chars = 0usize;
11026 let mut fts_inserted_total = 0usize;
11027 let mut fts_count_total = 0usize;
11028 let mut stats = StatsAggregator::new();
11029 let mut token_stats = TokenStatsAggregator::new();
11030 let mut token_entries: Vec<TokenUsageEntry> = Vec::new();
11031 let mut metrics_entries: Vec<MessageMetricsEntry> = Vec::new();
11032 let mut rollup_agg = AnalyticsRollupAggregator::new();
11033 let mut conv_ids_to_summarize: Vec<i64> = Vec::new();
11034 let mut pending_conversation_ids: HashMap<PendingConversationKey, i64> = HashMap::new();
11035 let mut pending_message_fingerprints: HashMap<i64, HashMap<i64, MessageMergeFingerprint>> =
11036 HashMap::new();
11037 let mut pending_message_replay_fingerprints: HashMap<
11038 i64,
11039 HashSet<MessageReplayFingerprint>,
11040 > = HashMap::new();
11041
11042 for &(agent_id, workspace_id, raw_conv) in conversations {
11043 let normalized_conv = normalized_conversation_for_storage(raw_conv);
11044 let conv = normalized_conv.as_ref();
11045 let mut total_chars: i64 = 0;
11046 let mut inserted_indices = Vec::with_capacity(conv.messages.len());
11047 let mut inserted_messages: Vec<(i64, &Message)> =
11048 Vec::with_capacity(conv.messages.len());
11049 let mut session_count_delta = 1_i64;
11050 let conversation_key = conversation_merge_key(agent_id, conv);
11051
11052 let existing_conv_id = if let Some(existing_id) =
11053 pending_conversation_ids.get(&conversation_key)
11054 {
11055 Some(*existing_id)
11056 } else {
11057 let existing_id =
11058 franken_find_existing_conversation_by_key(&tx, &conversation_key, Some(conv))?;
11059 if let Some(existing_id) = existing_id {
11060 pending_conversation_ids.insert(conversation_key.clone(), existing_id);
11061 }
11062 existing_id
11063 };
11064
11065 let conv_id = if let Some(existing_id) = existing_conv_id {
11066 session_count_delta = 0;
11067 let (
11068 ExistingConversationNewMessages {
11069 messages: new_messages,
11070 new_chars: _planned_new_chars,
11071 idx_collision_count,
11072 first_collision_idx,
11073 },
11074 existing_messages,
11075 existing_replay_fingerprints,
11076 ) = franken_collect_batched_existing_new_messages(
11077 &tx,
11078 existing_id,
11079 conv,
11080 &mut pending_message_fingerprints,
11081 &mut pending_message_replay_fingerprints,
11082 "skipping replay-equivalent recovered message with shifted idx during batched merge",
11083 )?;
11084 let (inserted_last_idx, inserted_last_created_at) =
11085 borrowed_messages_tail_state(&new_messages);
11086 let inserted_append_messages =
11087 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
11088 total_chars += inserted_append_messages
11089 .iter()
11090 .map(|(_, msg)| msg.content.len() as i64)
11091 .sum::<i64>();
11092 for (msg_id, msg) in inserted_append_messages {
11093 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
11094 if !defer_lexical_updates {
11095 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
11096 fts_count_total += 1;
11097 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
11098 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
11099 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
11100 {
11101 flush_pending_fts_entries(
11102 self,
11103 &tx,
11104 &mut fts_entries,
11105 &mut fts_pending_chars,
11106 &mut fts_inserted_total,
11107 )?;
11108 }
11109 }
11110 inserted_indices.push(msg.idx);
11111 inserted_messages.push((msg_id, msg));
11112 }
11113
11114 if idx_collision_count > 0 {
11115 tracing::warn!(
11116 conversation_id = existing_id,
11117 collision_count = idx_collision_count,
11118 first_idx = first_collision_idx,
11119 source_path = %conv.source_path.display(),
11120 "message idx collisions encountered during batched conversation merge; retaining canonical message variants"
11121 );
11122 }
11123
11124 let conv_last_ts = conversation_tail_ended_at_candidate(conv);
11125 franken_update_conversation_tail_state(
11126 &tx,
11127 existing_id,
11128 conv_last_ts,
11129 inserted_last_idx,
11130 inserted_last_created_at,
11131 )?;
11132 if let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv)
11133 {
11134 franken_update_external_conversation_tail_lookup_key(
11135 &tx,
11136 &lookup_key,
11137 conv_last_ts,
11138 inserted_last_idx,
11139 inserted_last_created_at,
11140 )?;
11141 }
11142
11143 pending_message_fingerprints.insert(existing_id, existing_messages);
11144 pending_message_replay_fingerprints
11145 .insert(existing_id, existing_replay_fingerprints);
11146
11147 existing_id
11148 } else {
11149 match franken_insert_conversation_or_get_existing(
11150 &tx,
11151 agent_id,
11152 workspace_id,
11153 conv,
11154 )? {
11155 ConversationInsertStatus::Inserted(new_conv_id) => {
11156 pending_conversation_ids.insert(conversation_key.clone(), new_conv_id);
11157 let pending_messages =
11158 pending_message_fingerprints.entry(new_conv_id).or_default();
11159 let pending_replay_fingerprints = pending_message_replay_fingerprints
11160 .entry(new_conv_id)
11161 .or_default();
11162 let mut new_messages = Vec::new();
11163 for msg in &conv.messages {
11164 let incoming_replay = message_replay_fingerprint(msg);
11165 if pending_messages.contains_key(&msg.idx)
11166 || pending_replay_fingerprints.contains(&incoming_replay)
11167 {
11168 continue;
11169 }
11170 pending_messages.insert(msg.idx, message_merge_fingerprint(msg));
11171 pending_replay_fingerprints.insert(incoming_replay);
11172 new_messages.push(msg);
11173 }
11174 let inserted_message_ids =
11175 franken_batch_insert_new_messages(&tx, new_conv_id, &new_messages)?;
11176 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
11177 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
11178 if !defer_lexical_updates {
11179 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
11180 fts_count_total += 1;
11181 fts_pending_chars =
11182 fts_pending_chars.saturating_add(msg.content.len());
11183 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
11184 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
11185 {
11186 flush_pending_fts_entries(
11187 self,
11188 &tx,
11189 &mut fts_entries,
11190 &mut fts_pending_chars,
11191 &mut fts_inserted_total,
11192 )?;
11193 }
11194 }
11195 total_chars += msg.content.len() as i64;
11196 inserted_indices.push(msg.idx);
11197 inserted_messages.push((msg_id, msg));
11198 }
11199 new_conv_id
11200 }
11201 ConversationInsertStatus::Existing(existing_id) => {
11202 session_count_delta = 0;
11203 pending_conversation_ids.insert(conversation_key.clone(), existing_id);
11204 let (
11205 ExistingConversationNewMessages {
11206 messages: new_messages,
11207 new_chars: _planned_new_chars,
11208 idx_collision_count,
11209 first_collision_idx,
11210 },
11211 existing_messages,
11212 existing_replay_fingerprints,
11213 ) = franken_collect_batched_existing_new_messages(
11214 &tx,
11215 existing_id,
11216 conv,
11217 &mut pending_message_fingerprints,
11218 &mut pending_message_replay_fingerprints,
11219 "skipping replay-equivalent recovered message with shifted idx after duplicate conversation recovery",
11220 )?;
11221 let (inserted_last_idx, inserted_last_created_at) =
11222 borrowed_messages_tail_state(&new_messages);
11223 let inserted_append_messages =
11224 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
11225 total_chars += inserted_append_messages
11226 .iter()
11227 .map(|(_, msg)| msg.content.len() as i64)
11228 .sum::<i64>();
11229 for (msg_id, msg) in inserted_append_messages {
11230 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
11231 if !defer_lexical_updates {
11232 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
11233 fts_count_total += 1;
11234 fts_pending_chars =
11235 fts_pending_chars.saturating_add(msg.content.len());
11236 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
11237 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
11238 {
11239 flush_pending_fts_entries(
11240 self,
11241 &tx,
11242 &mut fts_entries,
11243 &mut fts_pending_chars,
11244 &mut fts_inserted_total,
11245 )?;
11246 }
11247 }
11248 inserted_indices.push(msg.idx);
11249 inserted_messages.push((msg_id, msg));
11250 }
11251
11252 if idx_collision_count > 0 {
11253 tracing::warn!(
11254 conversation_id = existing_id,
11255 collision_count = idx_collision_count,
11256 first_idx = first_collision_idx,
11257 source_path = %conv.source_path.display(),
11258 "message idx collisions encountered after duplicate conversation recovery; retaining canonical message variants"
11259 );
11260 }
11261
11262 let conv_last_ts = conversation_tail_ended_at_candidate(conv);
11263 franken_update_conversation_tail_state(
11264 &tx,
11265 existing_id,
11266 conv_last_ts,
11267 inserted_last_idx,
11268 inserted_last_created_at,
11269 )?;
11270 if let Some(lookup_key) =
11271 conversation_external_lookup_key_for_conv(agent_id, conv)
11272 {
11273 franken_update_external_conversation_tail_lookup_key(
11274 &tx,
11275 &lookup_key,
11276 conv_last_ts,
11277 inserted_last_idx,
11278 inserted_last_created_at,
11279 )?;
11280 }
11281
11282 pending_message_fingerprints.insert(existing_id, existing_messages);
11283 pending_message_replay_fingerprints
11284 .insert(existing_id, existing_replay_fingerprints);
11285
11286 existing_id
11287 }
11288 }
11289 };
11290
11291 if !defer_analytics_updates {
11292 let delta = StatsDelta {
11293 session_count_delta,
11294 message_count_delta: inserted_messages.len() as i64,
11295 total_chars_delta: total_chars,
11296 };
11297
11298 let effective_started_at = conversation_effective_started_at(conv);
11299 let day_id = effective_started_at
11300 .map(FrankenStorage::day_id_from_millis)
11301 .unwrap_or(0);
11302 stats.record_delta(
11303 &conv.agent_slug,
11304 &conv.source_id,
11305 day_id,
11306 delta.session_count_delta,
11307 delta.message_count_delta,
11308 delta.total_chars_delta,
11309 );
11310
11311 let conv_day_id = day_id;
11312 let mut session_model_family = String::from("unknown");
11313 let mut has_any_tokens = false;
11314
11315 for &(message_id, msg) in &inserted_messages {
11316 let role_s = role_str(&msg.role);
11317 let usage = if historical_raw_json(&msg.extra_json).is_some() {
11318 crate::connectors::extract_tokens_for_agent(
11319 &conv.agent_slug,
11320 &serde_json::Value::Null,
11321 &msg.content,
11322 &role_s,
11323 )
11324 } else {
11325 crate::connectors::extract_tokens_for_agent(
11326 &conv.agent_slug,
11327 &msg.extra_json,
11328 &msg.content,
11329 &role_s,
11330 )
11331 };
11332
11333 let msg_ts = msg
11334 .created_at
11335 .or(conversation_effective_started_at(conv))
11336 .unwrap_or(0);
11337 let msg_day_id = if msg_ts > 0 {
11338 FrankenStorage::day_id_from_millis(msg_ts)
11339 } else {
11340 conv_day_id
11341 };
11342
11343 let model_info = usage
11344 .model_name
11345 .as_deref()
11346 .map(crate::connectors::normalize_model);
11347
11348 let model_family = model_info
11349 .as_ref()
11350 .map(|i| i.family.clone())
11351 .unwrap_or_else(|| "unknown".into());
11352 let model_tier = model_info
11353 .as_ref()
11354 .map(|i| i.tier.clone())
11355 .unwrap_or_else(|| "unknown".into());
11356 let provider = usage
11357 .provider
11358 .clone()
11359 .or_else(|| model_info.as_ref().map(|i| i.provider.clone()))
11360 .unwrap_or_else(|| "unknown".into());
11361
11362 if model_family != "unknown" {
11363 session_model_family = model_family.clone();
11364 }
11365
11366 let estimated_cost = pricing_table.compute_cost(
11367 usage.model_name.as_deref(),
11368 msg_day_id,
11369 usage.input_tokens,
11370 usage.output_tokens,
11371 usage.cache_read_tokens,
11372 usage.cache_creation_tokens,
11373 );
11374 if estimated_cost.is_some() {
11375 pricing_diag.record_priced();
11376 } else if usage.has_token_data() {
11377 pricing_diag.record_unpriced(usage.model_name.as_deref());
11378 }
11379
11380 token_stats.record(
11381 &conv.agent_slug,
11382 &conv.source_id,
11383 msg_day_id,
11384 &model_family,
11385 &role_s,
11386 &usage,
11387 msg.content.len() as i64,
11388 estimated_cost.unwrap_or(0.0),
11389 );
11390
11391 if usage.has_token_data() {
11392 has_any_tokens = true;
11393 }
11394
11395 let content_chars = msg.content.len() as i64;
11396 let content_tokens_est = content_chars / 4;
11397 let msg_hour_id = FrankenStorage::hour_id_from_millis(msg_ts);
11398 let has_plan = has_plan_for_role(&role_s, &msg.content);
11399
11400 token_entries.push(TokenUsageEntry {
11401 message_id,
11402 conversation_id: conv_id,
11403 agent_id,
11404 workspace_id,
11405 source_id: conv.source_id.clone(),
11406 timestamp_ms: msg_ts,
11407 day_id: msg_day_id,
11408 model_name: usage.model_name.clone(),
11409 model_family: Some(model_family.clone()),
11410 model_tier: Some(model_tier.clone()),
11411 service_tier: usage.service_tier.clone(),
11412 provider: Some(provider.clone()),
11413 input_tokens: usage.input_tokens,
11414 output_tokens: usage.output_tokens,
11415 cache_read_tokens: usage.cache_read_tokens,
11416 cache_creation_tokens: usage.cache_creation_tokens,
11417 thinking_tokens: usage.thinking_tokens,
11418 total_tokens: usage.total_tokens(),
11419 estimated_cost_usd: estimated_cost,
11420 role: role_s.to_string(),
11421 content_chars,
11422 has_tool_calls: usage.has_tool_calls,
11423 tool_call_count: usage.tool_call_count,
11424 data_source: usage.data_source.as_str().to_string(),
11425 });
11426
11427 let mm = MessageMetricsEntry {
11428 message_id,
11429 created_at_ms: msg_ts,
11430 hour_id: msg_hour_id,
11431 day_id: msg_day_id,
11432 agent_slug: conv.agent_slug.clone(),
11433 workspace_id: workspace_id.unwrap_or(0),
11434 source_id: conv.source_id.clone(),
11435 role: role_s.to_string(),
11436 content_chars,
11437 content_tokens_est,
11438 model_name: usage.model_name.clone(),
11439 model_family: model_family.clone(),
11440 model_tier: model_tier.clone(),
11441 provider,
11442 api_input_tokens: usage.input_tokens,
11443 api_output_tokens: usage.output_tokens,
11444 api_cache_read_tokens: usage.cache_read_tokens,
11445 api_cache_creation_tokens: usage.cache_creation_tokens,
11446 api_thinking_tokens: usage.thinking_tokens,
11447 api_service_tier: usage.service_tier.clone(),
11448 api_data_source: usage.data_source.as_str().to_string(),
11449 tool_call_count: usage.tool_call_count as i64,
11450 has_tool_calls: usage.has_tool_calls,
11451 has_plan,
11452 };
11453 rollup_agg.record(&mm);
11454 metrics_entries.push(mm);
11455 }
11456
11457 if session_count_delta > 0 {
11458 token_stats.record_session(
11459 &conv.agent_slug,
11460 &conv.source_id,
11461 conv_day_id,
11462 &session_model_family,
11463 );
11464 }
11465
11466 if has_any_tokens {
11467 conv_ids_to_summarize.push(conv_id);
11468 }
11469 }
11470
11471 outcomes.push(InsertOutcome {
11472 conversation_id: conv_id,
11473 conversation_inserted: session_count_delta > 0,
11474 inserted_indices,
11475 });
11476 }
11477
11478 if !defer_lexical_updates {
11480 flush_pending_fts_entries(
11481 self,
11482 &tx,
11483 &mut fts_entries,
11484 &mut fts_pending_chars,
11485 &mut fts_inserted_total,
11486 )?;
11487 }
11488 if !defer_lexical_updates && fts_count_total > 0 {
11489 tracing::debug!(
11490 target: "cass::perf::fts5",
11491 total = fts_count_total,
11492 inserted = fts_inserted_total,
11493 conversations = conversations.len(),
11494 "franken_batch_fts_insert_complete"
11495 );
11496 }
11497
11498 if !defer_analytics_updates && !stats.is_empty() {
11500 let entries = stats.expand();
11501 let affected = franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
11502 tracing::debug!(
11503 target: "cass::perf::daily_stats",
11504 raw = stats.raw_entry_count(),
11505 expanded = entries.len(),
11506 affected = affected,
11507 "franken_batched_stats_update_complete"
11508 );
11509 }
11510
11511 if !defer_analytics_updates && !token_entries.is_empty() {
11513 let token_count = token_entries.len();
11514 let inserted = franken_insert_token_usage_batched_in_tx(&tx, &token_entries)?;
11515 tracing::debug!(
11516 target: "cass::perf::token_usage",
11517 total = token_count,
11518 inserted = inserted,
11519 "franken_batch_token_usage_insert_complete"
11520 );
11521 }
11522
11523 if !defer_analytics_updates && !token_stats.is_empty() {
11525 let entries = token_stats.expand();
11526 let affected = franken_update_token_daily_stats_batched_in_tx(&tx, &entries)?;
11527 tracing::debug!(
11528 target: "cass::perf::token_daily_stats",
11529 raw = token_stats.raw_entry_count(),
11530 expanded = entries.len(),
11531 affected = affected,
11532 "franken_batched_token_stats_update_complete"
11533 );
11534 }
11535
11536 if !defer_analytics_updates && !metrics_entries.is_empty() {
11538 let mm_count = metrics_entries.len();
11539 let inserted = franken_insert_message_metrics_batched_in_tx(&tx, &metrics_entries)?;
11540 tracing::debug!(
11541 target: "cass::perf::message_metrics",
11542 total = mm_count,
11543 inserted = inserted,
11544 "franken_batch_message_metrics_insert_complete"
11545 );
11546 }
11547
11548 if !defer_analytics_updates && !rollup_agg.is_empty() {
11550 let (hourly, daily, models_daily) =
11551 franken_flush_analytics_rollups_in_tx(&tx, &rollup_agg)?;
11552 tracing::debug!(
11553 target: "cass::perf::usage_rollups",
11554 hourly_buckets = rollup_agg.hourly_entry_count(),
11555 daily_buckets = rollup_agg.daily_entry_count(),
11556 models_daily_buckets = rollup_agg.models_daily_entry_count(),
11557 hourly_affected = hourly,
11558 daily_affected = daily,
11559 models_daily_affected = models_daily,
11560 "franken_batched_usage_rollups_complete"
11561 );
11562 }
11563
11564 if !defer_analytics_updates {
11566 for conv_id in &conv_ids_to_summarize {
11567 franken_update_conversation_token_summaries_in_tx(&tx, *conv_id)?;
11568 }
11569 }
11570
11571 tx.commit()?;
11572
11573 pricing_diag.log_summary();
11574
11575 Ok(outcomes)
11576 }
11577}
11578
11579fn normalized_storage_source_parts(
11580 source_id: Option<&str>,
11581 origin_kind: Option<&str>,
11582 origin_host: Option<&str>,
11583) -> (String, SourceKind, Option<String>) {
11584 let host_label = crate::search::tantivy::normalized_index_origin_host(origin_host);
11585 let source_id = crate::search::tantivy::normalized_index_source_id(
11586 source_id,
11587 origin_kind,
11588 host_label.as_deref(),
11589 );
11590
11591 if source_id == LOCAL_SOURCE_ID {
11592 (source_id, SourceKind::Local, None)
11593 } else {
11594 (source_id, SourceKind::Ssh, host_label)
11595 }
11596}
11597
11598fn normalized_source_for_conversation(conv: &Conversation) -> Source {
11599 let (id, kind, host_label) = normalized_storage_source_parts(
11600 Some(conv.source_id.as_str()),
11601 None,
11602 conv.origin_host.as_deref(),
11603 );
11604 Source {
11605 id,
11606 kind,
11607 host_label,
11608 machine_id: None,
11609 platform: None,
11610 config_json: None,
11611 created_at: None,
11612 updated_at: None,
11613 }
11614}
11615
11616fn is_bootstrap_local_source(source: &Source) -> bool {
11617 source.id == LOCAL_SOURCE_ID
11618 && matches!(source.kind, SourceKind::Local)
11619 && source.host_label.is_none()
11620 && source.machine_id.is_none()
11621 && source.platform.is_none()
11622 && source.config_json.is_none()
11623 && source.created_at.is_none()
11624 && source.updated_at.is_none()
11625}
11626
11627fn normalized_conversation_for_storage<'a>(conv: &'a Conversation) -> Cow<'a, Conversation> {
11628 let normalized_source = normalized_source_for_conversation(conv);
11629 if normalized_source.id == conv.source_id && normalized_source.host_label == conv.origin_host {
11630 Cow::Borrowed(conv)
11631 } else {
11632 let mut normalized = conv.clone();
11633 normalized.source_id = normalized_source.id;
11634 normalized.origin_host = normalized_source.host_label;
11635 Cow::Owned(normalized)
11636 }
11637}
11638
11639impl FrankenStorage {
11640 fn ensure_source_for_conversation(&self, conv: &Conversation) -> Result<()> {
11641 let source = normalized_source_for_conversation(conv);
11642 if is_bootstrap_local_source(&source) {
11643 return Ok(());
11646 }
11647 let cache_key = EnsuredConversationSourceKey::from_source(&source);
11648 if self.conversation_source_already_ensured(&cache_key) {
11649 return Ok(());
11650 }
11651 self.upsert_source(&source)?;
11652 self.mark_conversation_source_ensured(cache_key);
11653 Ok(())
11654 }
11655
11656 fn ensure_sources_for_batch(
11657 &self,
11658 conversations: &[(i64, Option<i64>, &Conversation)],
11659 ) -> Result<()> {
11660 let mut seen = HashSet::with_capacity(conversations.len());
11661 for &(_, _, conv) in conversations {
11662 let source = normalized_source_for_conversation(conv);
11663 if seen.insert(source.id.clone()) {
11664 if is_bootstrap_local_source(&source) {
11665 continue;
11666 }
11667 self.upsert_source(&source)?;
11668 self.mark_conversation_source_ensured(EnsuredConversationSourceKey::from_source(
11669 &source,
11670 ));
11671 }
11672 }
11673 Ok(())
11674 }
11675}
11676
11677fn franken_last_rowid(tx: &FrankenTransaction<'_>) -> Result<i64> {
11683 tx.last_insert_rowid()
11684 .ok()
11685 .filter(|&id| id > 0)
11686 .with_context(|| "last_insert_rowid() returned NULL or 0 after INSERT")
11687}
11688
11689fn ensure_agents_in_tx(
11695 tx: &FrankenTransaction<'_>,
11696 conversations: &[(i64, Option<i64>, &Conversation)],
11697) -> Result<()> {
11698 let mut seen = HashSet::new();
11699 let now = FrankenStorage::now_millis();
11700 for &(agent_id, _, conv) in conversations {
11701 if !seen.insert(agent_id) {
11702 continue;
11703 }
11704 let exists: i64 = tx.query_row_map(
11705 "SELECT COUNT(*) FROM agents WHERE id = ?1",
11706 fparams![agent_id],
11707 |row| row.get_typed(0),
11708 )?;
11709 if exists == 0 {
11710 tracing::debug!(
11711 target: "cass::fk_guard",
11712 agent_id,
11713 slug = %conv.agent_slug,
11714 "inserting agent row inside transaction to satisfy FK constraint"
11715 );
11716 tx.execute_compat(
11720 "INSERT OR IGNORE INTO agents(id, slug, name, kind, created_at, updated_at)
11721 VALUES(?1, ?2, ?3, 'cli', ?4, ?5)",
11722 fparams![
11723 agent_id,
11724 conv.agent_slug.as_str(),
11725 conv.agent_slug.as_str(),
11726 now,
11727 now
11728 ],
11729 )?;
11730 }
11731 }
11732 Ok(())
11733}
11734
11735fn ensure_workspaces_in_tx(
11738 tx: &FrankenTransaction<'_>,
11739 conversations: &[(i64, Option<i64>, &Conversation)],
11740) -> Result<()> {
11741 let mut seen = HashSet::new();
11742 for &(_, workspace_id, conv) in conversations {
11743 let ws_id = match workspace_id {
11744 Some(id) => id,
11745 None => continue,
11746 };
11747 if !seen.insert(ws_id) {
11748 continue;
11749 }
11750 let exists: i64 = tx.query_row_map(
11751 "SELECT COUNT(*) FROM workspaces WHERE id = ?1",
11752 fparams![ws_id],
11753 |row| row.get_typed(0),
11754 )?;
11755 if exists == 0 {
11756 let path_str = conv
11757 .workspace
11758 .as_ref()
11759 .map(|p| p.to_string_lossy().to_string())
11760 .unwrap_or_default();
11761 tracing::debug!(
11762 target: "cass::fk_guard",
11763 workspace_id = ws_id,
11764 path = %path_str,
11765 "inserting workspace row inside transaction to satisfy FK constraint"
11766 );
11767 tx.execute_compat(
11768 "INSERT OR IGNORE INTO workspaces(id, path) VALUES(?1, ?2)",
11769 fparams![ws_id, path_str.as_str()],
11770 )?;
11771 }
11772 }
11773 Ok(())
11774}
11775
11776fn ensure_sources_in_tx(
11780 tx: &FrankenTransaction<'_>,
11781 conversations: &[(i64, Option<i64>, &Conversation)],
11782) -> Result<()> {
11783 let mut seen = HashSet::new();
11784 for &(_, _, conv) in conversations {
11785 let (source_id, source_kind, host_label) = normalized_storage_source_parts(
11786 Some(conv.source_id.as_str()),
11787 None,
11788 conv.origin_host.as_deref(),
11789 );
11790 if !seen.insert(source_id.clone()) {
11791 continue;
11792 }
11793 let exists: i64 = tx.query_row_map(
11794 "SELECT COUNT(*) FROM sources WHERE id = ?1",
11795 fparams![source_id.as_str()],
11796 |row| row.get_typed(0),
11797 )?;
11798 if exists == 0 {
11799 let kind_str = source_kind.to_string();
11800 let now = FrankenStorage::now_millis();
11801 tracing::debug!(
11802 target: "cass::fk_guard",
11803 source_id = %source_id,
11804 kind = kind_str.as_str(),
11805 "inserting source row inside transaction to satisfy FK constraint"
11806 );
11807 tx.execute_compat(
11808 "INSERT OR IGNORE INTO sources(id, kind, host_label, created_at, updated_at)
11809 VALUES(?1, ?2, ?3, ?4, ?5)",
11810 fparams![
11811 source_id.as_str(),
11812 kind_str.as_str(),
11813 host_label.as_deref(),
11814 now,
11815 now
11816 ],
11817 )?;
11818 }
11819 }
11820 Ok(())
11821}
11822
11823fn env_flag_enabled(name: &str) -> bool {
11824 dotenvy::var(name).ok().is_some_and(|v| {
11825 matches!(
11826 v.trim(),
11827 "1" | "true" | "TRUE" | "yes" | "YES" | "on" | "ON"
11828 )
11829 })
11830}
11831
11832fn defer_storage_lexical_updates_enabled() -> bool {
11833 env_flag_enabled("CASS_DEFER_LEXICAL_UPDATES")
11834}
11835
11836fn defer_analytics_updates_enabled() -> bool {
11837 if env_flag_enabled("CASS_DEFER_ANALYTICS_UPDATES") {
11838 return true;
11839 }
11840 if env_flag_enabled("CASS_INLINE_ANALYTICS_UPDATES") {
11841 return false;
11842 }
11843 DEFAULT_DEFER_ANALYTICS_UPDATES.load(Ordering::Relaxed)
11844}
11845
11846enum ConversationInsertStatus {
11847 Inserted(i64),
11848 Existing(i64),
11849}
11850
11851fn franken_find_external_conversation_tail_lookup(
11852 tx: &FrankenTransaction<'_>,
11853 lookup_key: &str,
11854) -> Result<Option<ExistingConversationWithTail>> {
11855 let params = [SqliteValue::from(lookup_key)];
11856 let row = tx
11857 .query_row_with_params(
11858 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
11859 FROM conversation_external_tail_lookup
11860 WHERE lookup_key = ?1",
11861 ¶ms,
11862 )
11863 .optional()?;
11864 let Some(row) = row else {
11865 return Ok(None);
11866 };
11867 let id = row.get_typed(0)?;
11868 let ended_at = row.get_typed(1)?;
11869 let last_message_idx = row.get_typed(2)?;
11870 let last_message_created_at = row.get_typed(3)?;
11871 Ok(Some(ExistingConversationWithTail {
11872 id,
11873 tail_state: existing_conversation_tail_state_from_cached(
11874 last_message_idx,
11875 last_message_created_at,
11876 ended_at,
11877 ),
11878 }))
11879}
11880
11881fn franken_find_external_conversation_lookup(
11882 tx: &FrankenTransaction<'_>,
11883 lookup_key: &str,
11884) -> Result<Option<i64>> {
11885 Ok(franken_find_external_conversation_tail_lookup(tx, lookup_key)?.map(|existing| existing.id))
11886}
11887
11888fn franken_insert_external_conversation_tail_lookup_key(
11889 tx: &FrankenTransaction<'_>,
11890 lookup_key: &str,
11891 conversation_id: i64,
11892 ended_at: Option<i64>,
11893 last_message_idx: Option<i64>,
11894 last_message_created_at: Option<i64>,
11895) -> Result<()> {
11896 let params = [
11897 SqliteValue::from(lookup_key),
11898 SqliteValue::from(conversation_id),
11899 SqliteValue::from(ended_at),
11900 SqliteValue::from(last_message_idx),
11901 SqliteValue::from(last_message_created_at),
11902 ];
11903 tx.execute_with_params(
11904 "INSERT OR REPLACE INTO conversation_external_tail_lookup(
11905 lookup_key, conversation_id, ended_at, last_message_idx, last_message_created_at
11906 ) VALUES(?1, ?2, ?3, ?4, ?5)",
11907 ¶ms,
11908 )?;
11909 Ok(())
11910}
11911
11912fn franken_insert_external_conversation_tail_lookup(
11913 tx: &FrankenTransaction<'_>,
11914 source_id: &str,
11915 agent_id: i64,
11916 external_id: &str,
11917 existing: ExistingConversationWithTail,
11918) -> Result<()> {
11919 let lookup_key = conversation_external_lookup_key(source_id, agent_id, external_id);
11920 let ended_at = existing.tail_state.and_then(|state| state.ended_at);
11921 let last_message_idx = existing.tail_state.map(|state| state.last_message_idx);
11922 let last_message_created_at = existing
11923 .tail_state
11924 .map(|state| state.last_message_created_at);
11925 franken_insert_external_conversation_tail_lookup_key(
11926 tx,
11927 &lookup_key,
11928 existing.id,
11929 ended_at,
11930 last_message_idx,
11931 last_message_created_at,
11932 )
11933}
11934
11935fn franken_update_external_conversation_tail_lookup_key(
11936 tx: &FrankenTransaction<'_>,
11937 lookup_key: &str,
11938 ended_at_candidate: Option<i64>,
11939 last_message_idx_candidate: Option<i64>,
11940 last_message_created_at_candidate: Option<i64>,
11941) -> Result<()> {
11942 if ended_at_candidate.is_none()
11943 && last_message_idx_candidate.is_none()
11944 && last_message_created_at_candidate.is_none()
11945 {
11946 return Ok(());
11947 }
11948 tx.execute_compat(
11949 "UPDATE conversation_external_tail_lookup
11950 SET ended_at = CASE
11951 WHEN ?1 IS NULL THEN ended_at
11952 ELSE MAX(IFNULL(ended_at, 0), ?1)
11953 END,
11954 last_message_idx = CASE
11955 WHEN ?2 IS NULL THEN last_message_idx
11956 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
11957 ELSE last_message_idx
11958 END,
11959 last_message_created_at = CASE
11960 WHEN ?3 IS NULL THEN last_message_created_at
11961 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
11962 ELSE last_message_created_at
11963 END
11964 WHERE lookup_key = ?4",
11965 fparams![
11966 ended_at_candidate,
11967 last_message_idx_candidate,
11968 last_message_created_at_candidate,
11969 lookup_key
11970 ],
11971 )?;
11972 Ok(())
11973}
11974
11975fn franken_set_external_conversation_tail_lookup_after_append(
11976 tx: &FrankenTransaction<'_>,
11977 lookup_key: &str,
11978 ended_at: i64,
11979 last_message_idx: i64,
11980 last_message_created_at: i64,
11981) -> Result<()> {
11982 tx.execute_compat(
11983 "UPDATE conversation_external_tail_lookup
11984 SET ended_at = ?1,
11985 last_message_idx = ?2,
11986 last_message_created_at = ?3
11987 WHERE lookup_key = ?4",
11988 fparams![
11989 ended_at,
11990 last_message_idx,
11991 last_message_created_at,
11992 lookup_key
11993 ],
11994 )?;
11995 Ok(())
11996}
11997
11998fn franken_update_external_conversation_tail_after_append(
11999 tx: &FrankenTransaction<'_>,
12000 agent_id: i64,
12001 conv: &Conversation,
12002 used_append_tail_plan: bool,
12003 exact_append_set: bool,
12004 inserted_last_idx: Option<i64>,
12005 inserted_last_created_at: Option<i64>,
12006) -> Result<()> {
12007 let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv) else {
12008 return Ok(());
12009 };
12010
12011 if exact_append_set
12012 && let (Some(last_message_idx), Some(last_message_created_at)) =
12013 (inserted_last_idx, inserted_last_created_at)
12014 {
12015 return franken_set_external_conversation_tail_lookup_after_append(
12016 tx,
12017 &lookup_key,
12018 last_message_created_at,
12019 last_message_idx,
12020 last_message_created_at,
12021 );
12022 }
12023
12024 let ended_at_candidate = if used_append_tail_plan {
12025 inserted_last_created_at
12026 } else {
12027 conv.messages.iter().filter_map(|m| m.created_at).max()
12028 };
12029 franken_update_external_conversation_tail_lookup_key(
12030 tx,
12031 &lookup_key,
12032 ended_at_candidate,
12033 inserted_last_idx,
12034 inserted_last_created_at,
12035 )
12036}
12037
12038fn franken_find_existing_conversation_by_key(
12039 tx: &FrankenTransaction<'_>,
12040 key: &PendingConversationKey,
12041 conv: Option<&Conversation>,
12042) -> Result<Option<i64>> {
12043 franken_find_existing_conversation_by_key_impl(tx, key, conv, false)
12044}
12045
12046fn franken_find_existing_conversation_by_key_after_conflict(
12047 tx: &FrankenTransaction<'_>,
12048 key: &PendingConversationKey,
12049 conv: Option<&Conversation>,
12050) -> Result<Option<i64>> {
12051 franken_find_existing_conversation_by_key_impl(tx, key, conv, true)
12052}
12053
12054fn franken_find_existing_conversation_by_key_impl(
12055 tx: &FrankenTransaction<'_>,
12056 key: &PendingConversationKey,
12057 conv: Option<&Conversation>,
12058 allow_legacy_external_scan: bool,
12059) -> Result<Option<i64>> {
12060 match key {
12061 PendingConversationKey::External {
12062 source_id,
12063 agent_id,
12064 external_id,
12065 } => {
12066 let lookup_key = conversation_external_lookup_key(source_id, *agent_id, external_id);
12067 if let Some(existing_id) = franken_find_external_conversation_lookup(tx, &lookup_key)? {
12068 return Ok(Some(existing_id));
12069 }
12070 if !allow_legacy_external_scan {
12071 return Ok(None);
12072 }
12073
12074 let existing_id = tx
12075 .query_row_map(
12076 "SELECT id
12077 FROM conversations
12078 WHERE source_id = ?1 AND agent_id = ?2 AND external_id = ?3",
12079 fparams![source_id.as_str(), *agent_id, external_id.as_str()],
12080 |row| row.get_typed(0),
12081 )
12082 .optional()?;
12083 if let Some(existing_id) = existing_id {
12084 let tail_state = franken_existing_conversation_append_tail_state(tx, existing_id)?;
12085 franken_insert_external_conversation_tail_lookup_key(
12086 tx,
12087 &lookup_key,
12088 existing_id,
12089 tail_state.and_then(|state| state.ended_at),
12090 tail_state.map(|state| state.last_message_idx),
12091 tail_state.map(|state| state.last_message_created_at),
12092 )?;
12093 Ok(Some(existing_id))
12094 } else {
12095 Ok(None)
12096 }
12097 }
12098 PendingConversationKey::SourcePath {
12099 source_id,
12100 agent_id,
12101 source_path,
12102 started_at,
12103 } => {
12104 let exact_match = tx
12105 .query_row_map(
12106 "SELECT c.id
12107 FROM conversations c
12108 WHERE c.source_id = ?1
12109 AND c.agent_id = ?2
12110 AND c.source_path = ?3
12111 AND ((
12112 COALESCE(
12113 c.started_at,
12114 (SELECT MIN(created_at)
12115 FROM messages
12116 WHERE conversation_id = c.id
12117 AND created_at IS NOT NULL)
12118 ) IS NULL
12119 AND ?4 IS NULL
12120 ) OR COALESCE(
12121 c.started_at,
12122 (SELECT MIN(created_at)
12123 FROM messages
12124 WHERE conversation_id = c.id
12125 AND created_at IS NOT NULL)
12126 ) = ?4)
12127 ORDER BY c.id
12128 LIMIT 1",
12129 fparams![
12130 source_id.as_str(),
12131 *agent_id,
12132 source_path.as_str(),
12133 *started_at
12134 ],
12135 |row| row.get_typed(0),
12136 )
12137 .optional()?;
12138 if exact_match.is_some() {
12139 return Ok(exact_match);
12140 }
12141
12142 let Some(conv) = conv else {
12143 return Ok(None);
12144 };
12145 let incoming_fingerprints = conversation_message_fingerprints(conv);
12146 if incoming_fingerprints.is_empty() {
12147 return Ok(None);
12148 }
12149 let incoming_replay_fingerprints = conversation_message_replay_fingerprints(conv);
12150
12151 let candidates: Vec<(i64, Option<i64>)> = tx.query_map_collect(
12152 "SELECT
12153 c.id,
12154 COALESCE(
12155 c.started_at,
12156 (SELECT MIN(created_at)
12157 FROM messages
12158 WHERE conversation_id = c.id
12159 AND created_at IS NOT NULL)
12160 ) AS effective_started_at
12161 FROM conversations c
12162 WHERE c.source_id = ?1
12163 AND c.agent_id = ?2
12164 AND c.source_path = ?3
12165 ORDER BY c.id",
12166 fparams![source_id.as_str(), *agent_id, source_path.as_str()],
12167 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
12168 )?;
12169
12170 let mut best_candidate: Option<(i64, ConversationMergeEvidence)> = None;
12171 for (candidate_id, candidate_started_at) in candidates {
12172 let existing_fingerprints =
12173 franken_existing_message_fingerprints(tx, candidate_id)?;
12174 let existing_replay_fingerprints =
12175 replay_fingerprints_from_merge_set(&existing_fingerprints);
12176 let Some(evidence) = conversation_merge_evidence(
12177 &incoming_fingerprints,
12178 &incoming_replay_fingerprints,
12179 &existing_fingerprints,
12180 &existing_replay_fingerprints,
12181 *started_at,
12182 candidate_started_at,
12183 ) else {
12184 continue;
12185 };
12186
12187 let candidate_key = (
12188 evidence.exact_overlap,
12189 evidence.replay_overlap,
12190 evidence.started_close,
12191 evidence.smaller_replay_set,
12192 std::cmp::Reverse(evidence.start_distance_ms),
12193 );
12194 let should_replace = best_candidate
12195 .as_ref()
12196 .map(|(_, best_evidence)| {
12197 candidate_key
12198 > (
12199 best_evidence.exact_overlap,
12200 best_evidence.replay_overlap,
12201 best_evidence.started_close,
12202 best_evidence.smaller_replay_set,
12203 std::cmp::Reverse(best_evidence.start_distance_ms),
12204 )
12205 })
12206 .unwrap_or(true);
12207
12208 if should_replace {
12209 best_candidate = Some((candidate_id, evidence));
12210 }
12211 }
12212
12213 Ok(best_candidate.map(|(candidate_id, _)| candidate_id))
12214 }
12215 }
12216}
12217
12218fn franken_insert_conversation_or_get_existing(
12219 tx: &FrankenTransaction<'_>,
12220 agent_id: i64,
12221 workspace_id: Option<i64>,
12222 conv: &Conversation,
12223) -> Result<ConversationInsertStatus> {
12224 let conversation_key = conversation_merge_key(agent_id, conv);
12225 if let Some(existing_id) =
12226 franken_find_existing_conversation_by_key(tx, &conversation_key, Some(conv))?
12227 {
12228 return Ok(ConversationInsertStatus::Existing(existing_id));
12229 }
12230
12231 franken_insert_conversation_or_get_existing_after_miss(
12232 tx,
12233 agent_id,
12234 workspace_id,
12235 conv,
12236 &conversation_key,
12237 )
12238}
12239
12240fn franken_insert_conversation_or_get_existing_after_miss(
12241 tx: &FrankenTransaction<'_>,
12242 agent_id: i64,
12243 workspace_id: Option<i64>,
12244 conv: &Conversation,
12245 conversation_key: &PendingConversationKey,
12246) -> Result<ConversationInsertStatus> {
12247 match franken_insert_conversation(tx, agent_id, workspace_id, conv) {
12248 Ok(Some(conv_id)) => Ok(ConversationInsertStatus::Inserted(conv_id)),
12249 Ok(None) => {
12250 let existing_id =
12253 franken_find_existing_conversation_by_key_after_conflict(
12254 tx,
12255 conversation_key,
12256 Some(conv),
12257 )?
12258 .with_context(|| {
12259 format!(
12260 "conversation INSERT produced a duplicate conflict but existing row was not found for source_id={} agent_id={} external_id={:?} source_path={}",
12261 conv.source_id,
12262 agent_id,
12263 conv.external_id,
12264 conv.source_path.display()
12265 )
12266 })?;
12267 tracing::warn!(
12268 source_id = %conv.source_id,
12269 agent_id,
12270 external_id = ?conv.external_id,
12271 existing_id,
12272 source_path = %conv.source_path.display(),
12273 "conversation INSERT: duplicate gracefully recovered, reusing existing row"
12274 );
12275 Ok(ConversationInsertStatus::Existing(existing_id))
12276 }
12277 Err(error) => {
12278 tracing::error!(
12279 source_id = %conv.source_id,
12280 agent_id,
12281 external_id = ?conv.external_id,
12282 error = %error,
12283 source_path = %conv.source_path.display(),
12284 "franken_insert_conversation failed"
12285 );
12286 Err(error)
12287 }
12288 }
12289}
12290
12291fn franken_insert_conversation(
12297 tx: &FrankenTransaction<'_>,
12298 agent_id: i64,
12299 workspace_id: Option<i64>,
12300 conv: &Conversation,
12301) -> Result<Option<i64>> {
12302 let (metadata_json_str, metadata_bin) = franken_metadata_insert_payload(&conv.metadata_json)?;
12303 let (last_message_idx, last_message_created_at) = conversation_tail_state(conv);
12304 let metadata_bin_bytes = metadata_bin.as_deref();
12305
12306 match tx.execute_compat(
12307 "INSERT INTO conversations(
12308 agent_id, workspace_id, source_id, external_id, title, source_path,
12309 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin,
12310 last_message_idx, last_message_created_at
12311 ) VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14)",
12312 fparams![
12313 agent_id,
12314 workspace_id,
12315 conv.source_id.as_str(),
12316 conv.external_id.as_deref(),
12317 conv.title.as_deref(),
12318 path_to_string(&conv.source_path),
12319 conv.started_at,
12320 conv.ended_at,
12321 conv.approx_tokens,
12322 metadata_json_str.as_deref(),
12323 conv.origin_host.as_deref(),
12324 metadata_bin_bytes,
12325 last_message_idx,
12326 last_message_created_at
12327 ],
12328 ) {
12329 Ok(_) => {
12330 let conv_id = franken_last_rowid(tx)?;
12331 franken_insert_conversation_tail_state(
12332 tx,
12333 conv_id,
12334 conv.ended_at,
12335 last_message_idx,
12336 last_message_created_at,
12337 )?;
12338 if let Some(external_id) = conv.external_id.as_deref() {
12339 franken_insert_external_conversation_tail_lookup(
12340 tx,
12341 conv.source_id.as_str(),
12342 agent_id,
12343 external_id,
12344 ExistingConversationWithTail {
12345 id: conv_id,
12346 tail_state: existing_conversation_tail_state_from_cached(
12347 last_message_idx,
12348 last_message_created_at,
12349 conv.ended_at,
12350 ),
12351 },
12352 )?;
12353 }
12354 Ok(Some(conv_id))
12355 }
12356 Err(frankensqlite::FrankenError::UniqueViolation { .. }) => {
12357 tracing::debug!(
12358 source_id = %conv.source_id,
12359 agent_id,
12360 external_id = ?conv.external_id,
12361 source_path = %conv.source_path.display(),
12362 "conversation INSERT: duplicate provenance conflict"
12363 );
12364 Ok(None)
12365 }
12366 Err(error) => Err(error.into()),
12367 }
12368}
12369
12370type MetadataInsertPayload<'a> = (Option<Cow<'a, str>>, Option<Vec<u8>>);
12371
12372fn franken_metadata_insert_payload(value: &serde_json::Value) -> Result<MetadataInsertPayload<'_>> {
12373 if let Some(raw) = historical_raw_json(value) {
12374 Ok((Some(Cow::Borrowed(raw)), None))
12375 } else if value.is_null() {
12376 Ok((Some(Cow::Borrowed("null")), None))
12377 } else if value.as_object().is_some_and(|object| object.is_empty()) {
12378 Ok((None, None))
12379 } else if let Some(metadata_bin) = serialize_json_to_msgpack(value) {
12380 Ok((None, Some(metadata_bin)))
12381 } else {
12382 Ok((Some(Cow::Owned(serde_json::to_string(value)?)), None))
12383 }
12384}
12385
12386fn franken_insert_new_message(
12387 tx: &FrankenTransaction<'_>,
12388 conversation_id: i64,
12389 msg: &Message,
12390) -> Result<i64> {
12391 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
12392 let extra_bin_bytes = extra_bin.as_deref();
12393
12394 tx.execute_compat(
12395 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
12396 VALUES(?1,?2,?3,?4,?5,?6,?7,?8)",
12397 fparams![
12398 conversation_id,
12399 msg.idx,
12400 role_as_str(&msg.role),
12401 msg.author.as_deref(),
12402 msg.created_at,
12403 msg.content.as_str(),
12404 extra_json_str.as_deref(),
12405 extra_bin_bytes
12406 ],
12407 )?;
12408 franken_last_rowid(tx)
12409}
12410
12411fn franken_insert_new_message_ignore_duplicate(
12412 tx: &FrankenTransaction<'_>,
12413 conversation_id: i64,
12414 msg: &Message,
12415) -> Result<Option<i64>> {
12416 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
12417 let extra_bin_bytes = extra_bin.as_deref();
12418
12419 let changed = tx.execute_compat(
12420 "INSERT OR IGNORE INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
12421 VALUES(?1,?2,?3,?4,?5,?6,?7,?8)",
12422 fparams![
12423 conversation_id,
12424 msg.idx,
12425 role_as_str(&msg.role),
12426 msg.author.as_deref(),
12427 msg.created_at,
12428 msg.content.as_str(),
12429 extra_json_str.as_deref(),
12430 extra_bin_bytes
12431 ],
12432 )?;
12433 if changed == 0 {
12434 return Ok(None);
12435 }
12436 franken_last_rowid(tx).map(Some)
12437}
12438
12439type MessageInsertPayload<'a> = (Option<Cow<'a, str>>, Option<Vec<u8>>);
12440
12441fn franken_message_insert_payload(msg: &Message) -> Result<MessageInsertPayload<'_>> {
12442 if let Some(raw) = historical_raw_json(&msg.extra_json) {
12443 Ok((Some(Cow::Borrowed(raw)), None))
12444 } else if msg.extra_json.is_null() {
12445 Ok((None, None))
12446 } else {
12447 let extra_bin = serialize_json_to_msgpack(&msg.extra_json);
12448 if extra_bin.is_some() {
12449 Ok((None, extra_bin))
12450 } else {
12451 Ok((
12452 Some(Cow::Owned(serde_json::to_string(&msg.extra_json)?)),
12453 None,
12454 ))
12455 }
12456 }
12457}
12458
12459const MESSAGE_INSERT_BATCH_SIZE: usize = 100;
12464
12465const APPEND_MESSAGE_INSERT_BATCH_SIZE: usize = 50;
12471
12472fn message_insert_batch_sql(row_count: usize) -> &'static str {
12473 static MESSAGE_INSERT_BATCH_SQL: std::sync::OnceLock<Vec<String>> = std::sync::OnceLock::new();
12474
12475 let max_batch_size = MESSAGE_INSERT_BATCH_SIZE.max(APPEND_MESSAGE_INSERT_BATCH_SIZE);
12476 let cached_sql = MESSAGE_INSERT_BATCH_SQL.get_or_init(|| {
12477 let mut sql_by_row_count = Vec::with_capacity(max_batch_size + 1);
12478 sql_by_row_count.push(String::new());
12479 for row_count in 1..=max_batch_size {
12480 let placeholders = (0..row_count)
12481 .map(|idx| {
12482 let base = idx * 8;
12483 format!(
12484 "(?{},?{},?{},?{},?{},?{},?{},?{})",
12485 base + 1,
12486 base + 2,
12487 base + 3,
12488 base + 4,
12489 base + 5,
12490 base + 6,
12491 base + 7,
12492 base + 8
12493 )
12494 })
12495 .collect::<Vec<_>>()
12496 .join(",");
12497 sql_by_row_count.push(format!(
12498 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin) VALUES {placeholders}"
12499 ));
12500 }
12501 sql_by_row_count
12502 });
12503
12504 cached_sql
12505 .get(row_count)
12506 .map(String::as_str)
12507 .expect("message insert batch size must be covered by the cached SQL table")
12508}
12509
12510fn franken_batch_insert_new_messages(
12511 tx: &FrankenTransaction<'_>,
12512 conversation_id: i64,
12513 messages: &[&Message],
12514) -> Result<Vec<i64>> {
12515 franken_batch_insert_new_messages_with_batch_size(
12516 tx,
12517 conversation_id,
12518 messages,
12519 MESSAGE_INSERT_BATCH_SIZE,
12520 )
12521}
12522
12523fn franken_append_insert_new_messages<'a>(
12524 tx: &FrankenTransaction<'_>,
12525 conversation_id: i64,
12526 messages: &[&'a Message],
12527) -> Result<Vec<(i64, &'a Message)>> {
12528 let mut inserted = Vec::with_capacity(messages.len());
12529 for msg in messages {
12530 if let Some(message_id) =
12531 franken_insert_new_message_ignore_duplicate(tx, conversation_id, msg)?
12532 {
12533 inserted.push((message_id, *msg));
12534 }
12535 }
12536 Ok(inserted)
12537}
12538
12539fn franken_batch_insert_new_messages_with_batch_size(
12540 tx: &FrankenTransaction<'_>,
12541 conversation_id: i64,
12542 messages: &[&Message],
12543 batch_size: usize,
12544) -> Result<Vec<i64>> {
12545 let batch_size = batch_size.max(1);
12546 let mut inserted_ids = Vec::with_capacity(messages.len());
12547 for chunk in messages.chunks(batch_size) {
12548 if chunk.len() == 1 {
12549 inserted_ids.push(franken_insert_new_message(tx, conversation_id, chunk[0])?);
12550 continue;
12551 }
12552 let sql = message_insert_batch_sql(chunk.len());
12553
12554 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 8);
12555 for msg in chunk {
12556 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
12557 param_values.push(SqliteValue::from(conversation_id));
12558 param_values.push(SqliteValue::from(msg.idx));
12559 param_values.push(SqliteValue::from(role_as_str(&msg.role)));
12560 param_values.push(SqliteValue::from(msg.author.as_deref()));
12561 param_values.push(SqliteValue::from(msg.created_at));
12562 param_values.push(SqliteValue::from(msg.content.as_str()));
12563 param_values.push(SqliteValue::from(extra_json_str.as_deref()));
12564 param_values.push(SqliteValue::from(extra_bin.as_deref()));
12565 }
12566
12567 tx.execute_with_params(sql, ¶m_values)?;
12568
12569 let last_id = franken_last_rowid(tx)?;
12570 let first_id = last_id
12571 .checked_sub((chunk.len() - 1) as i64)
12572 .with_context(|| {
12573 format!(
12574 "inferring rowid range for {}-row message batch ending at {last_id}",
12575 chunk.len()
12576 )
12577 })?;
12578 inserted_ids.extend((0..chunk.len()).map(|offset| first_id + offset as i64));
12579 }
12580
12581 Ok(inserted_ids)
12582}
12583
12584#[cfg(test)]
12585fn franken_insert_new_message_with_profile(
12586 tx: &FrankenTransaction<'_>,
12587 conversation_id: i64,
12588 msg: &Message,
12589 profile: &mut MessageInsertSubstageProfile,
12590) -> Result<i64> {
12591 profile.single_row_calls += 1;
12592 profile.batch_rows += 1;
12593
12594 let payload_start = Instant::now();
12595 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
12596 profile.payload_duration += payload_start.elapsed();
12597 let extra_bin_bytes = extra_bin.as_deref();
12598
12599 let execute_start = Instant::now();
12600 tx.execute_compat(
12601 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
12602 VALUES(?1,?2,?3,?4,?5,?6,?7,?8)",
12603 fparams![
12604 conversation_id,
12605 msg.idx,
12606 role_as_str(&msg.role),
12607 msg.author.as_deref(),
12608 msg.created_at,
12609 msg.content.as_str(),
12610 extra_json_str.as_deref(),
12611 extra_bin_bytes
12612 ],
12613 )?;
12614 profile.execute_duration += execute_start.elapsed();
12615
12616 let rowid_start = Instant::now();
12617 let rowid = franken_last_rowid(tx)?;
12618 profile.rowid_duration += rowid_start.elapsed();
12619 Ok(rowid)
12620}
12621
12622#[cfg(test)]
12623fn franken_batch_insert_new_messages_with_profile(
12624 tx: &FrankenTransaction<'_>,
12625 conversation_id: i64,
12626 messages: &[&Message],
12627 profile: &mut MessageInsertSubstageProfile,
12628) -> Result<Vec<i64>> {
12629 franken_batch_insert_new_messages_with_profile_batch_size(
12630 tx,
12631 conversation_id,
12632 messages,
12633 profile,
12634 MESSAGE_INSERT_BATCH_SIZE,
12635 )
12636}
12637
12638#[cfg(test)]
12639fn franken_append_insert_new_messages_with_profile(
12640 tx: &FrankenTransaction<'_>,
12641 conversation_id: i64,
12642 messages: &[&Message],
12643 profile: &mut MessageInsertSubstageProfile,
12644) -> Result<Vec<i64>> {
12645 franken_batch_insert_new_messages_with_profile_batch_size(
12646 tx,
12647 conversation_id,
12648 messages,
12649 profile,
12650 APPEND_MESSAGE_INSERT_BATCH_SIZE,
12651 )
12652}
12653
12654#[cfg(test)]
12655fn franken_batch_insert_new_messages_with_profile_batch_size(
12656 tx: &FrankenTransaction<'_>,
12657 conversation_id: i64,
12658 messages: &[&Message],
12659 profile: &mut MessageInsertSubstageProfile,
12660 batch_size: usize,
12661) -> Result<Vec<i64>> {
12662 let batch_size = batch_size.max(1);
12663 let mut inserted_ids = Vec::with_capacity(messages.len());
12664 for chunk in messages.chunks(batch_size) {
12665 if chunk.len() == 1 {
12666 inserted_ids.push(franken_insert_new_message_with_profile(
12667 tx,
12668 conversation_id,
12669 chunk[0],
12670 profile,
12671 )?);
12672 continue;
12673 }
12674
12675 profile.batch_calls += 1;
12676 profile.batch_rows += chunk.len();
12677
12678 let sql_build_start = Instant::now();
12679 let sql = message_insert_batch_sql(chunk.len());
12680 profile.sql_build_duration += sql_build_start.elapsed();
12681
12682 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 8);
12683 for msg in chunk {
12684 let payload_start = Instant::now();
12685 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
12686 profile.payload_duration += payload_start.elapsed();
12687
12688 let param_build_start = Instant::now();
12689 param_values.push(SqliteValue::from(conversation_id));
12690 param_values.push(SqliteValue::from(msg.idx));
12691 param_values.push(SqliteValue::from(role_as_str(&msg.role)));
12692 param_values.push(SqliteValue::from(msg.author.as_deref()));
12693 param_values.push(SqliteValue::from(msg.created_at));
12694 param_values.push(SqliteValue::from(msg.content.as_str()));
12695 param_values.push(SqliteValue::from(extra_json_str.as_deref()));
12696 param_values.push(SqliteValue::from(extra_bin.as_deref()));
12697 profile.param_build_duration += param_build_start.elapsed();
12698 }
12699
12700 let execute_start = Instant::now();
12701 tx.execute_with_params(sql, ¶m_values)?;
12702 profile.execute_duration += execute_start.elapsed();
12703
12704 let rowid_start = Instant::now();
12705 let last_id = franken_last_rowid(tx)?;
12706 let first_id = last_id
12707 .checked_sub((chunk.len() - 1) as i64)
12708 .with_context(|| {
12709 format!(
12710 "inferring rowid range for {}-row message batch ending at {last_id}",
12711 chunk.len()
12712 )
12713 })?;
12714 inserted_ids.extend((0..chunk.len()).map(|offset| first_id + offset as i64));
12715 profile.rowid_duration += rowid_start.elapsed();
12716 }
12717
12718 Ok(inserted_ids)
12719}
12720
12721fn franken_insert_snippets(
12723 tx: &FrankenTransaction<'_>,
12724 message_id: i64,
12725 snippets: &[Snippet],
12726) -> Result<()> {
12727 for snip in snippets {
12728 let file_path_str = snip.file_path.as_ref().map(path_to_string);
12729 tx.execute_compat(
12730 "INSERT INTO snippets(message_id, file_path, start_line, end_line, language, snippet_text)
12731 VALUES(?1,?2,?3,?4,?5,?6)",
12732 fparams![
12733 message_id,
12734 file_path_str.as_deref(),
12735 snip.start_line,
12736 snip.end_line,
12737 snip.language.as_deref(),
12738 snip.snippet_text.as_deref()
12739 ],
12740 )?;
12741 }
12742 Ok(())
12743}
12744
12745fn franken_existing_message_fingerprints(
12746 tx: &FrankenTransaction<'_>,
12747 conversation_id: i64,
12748) -> Result<HashSet<MessageMergeFingerprint>> {
12749 let rows = tx.query_params(
12750 "SELECT idx, role, author, created_at, content
12751 FROM messages
12752 WHERE conversation_id = ?1",
12753 fparams![conversation_id],
12754 )?;
12755 let mut fingerprints = HashSet::with_capacity(rows.len());
12756 for row in rows {
12757 let role: String = row.get_typed(1)?;
12758 let content: String = row.get_typed(4)?;
12759 fingerprints.insert(MessageMergeFingerprint {
12760 idx: row.get_typed(0)?,
12761 created_at: row.get_typed(3)?,
12762 role: role_from_str(&role),
12763 author: row.get_typed(2)?,
12764 content_hash: *blake3::hash(content.as_bytes()).as_bytes(),
12765 });
12766 }
12767 Ok(fingerprints)
12768}
12769
12770struct ExistingMessageLookup {
12771 by_idx: HashMap<i64, MessageMergeFingerprint>,
12772 replay: HashSet<MessageReplayFingerprint>,
12773}
12774
12775fn existing_message_lookup_from_rows(
12776 rows: Vec<FrankenRow>,
12777 min_idx: i64,
12778 max_idx: i64,
12779 created_bounds: Option<(i64, i64)>,
12780 replay_full_scan: bool,
12781) -> Result<ExistingMessageLookup> {
12782 let mut by_idx = HashMap::with_capacity(rows.len());
12783 let mut replay = HashSet::with_capacity(rows.len());
12784 for row in rows {
12785 let idx: i64 = row.get_typed(0)?;
12786 let role: String = row.get_typed(1)?;
12787 let author: Option<String> = row.get_typed(2)?;
12788 let created_at: Option<i64> = row.get_typed(3)?;
12789 let content: String = row.get_typed(4)?;
12790 let role = role_from_str(&role);
12791 let content_hash = *blake3::hash(content.as_bytes()).as_bytes();
12792
12793 if idx >= min_idx && idx <= max_idx {
12794 by_idx.insert(
12795 idx,
12796 MessageMergeFingerprint {
12797 idx,
12798 created_at,
12799 role: role.clone(),
12800 author: author.clone(),
12801 content_hash,
12802 },
12803 );
12804 }
12805
12806 let replay_matches = if replay_full_scan {
12807 true
12808 } else if let Some((min_created_at, max_created_at)) = created_bounds {
12809 created_at.is_some_and(|ts| ts >= min_created_at && ts <= max_created_at)
12810 } else {
12811 true
12812 };
12813 if replay_matches {
12814 replay.insert(MessageReplayFingerprint {
12815 created_at,
12816 role,
12817 author,
12818 content_hash,
12819 });
12820 }
12821 }
12822 Ok(ExistingMessageLookup { by_idx, replay })
12823}
12824
12825fn franken_existing_message_lookup(
12826 tx: &FrankenTransaction<'_>,
12827 conversation_id: i64,
12828 incoming_messages: &[Message],
12829) -> Result<ExistingMessageLookup> {
12830 if incoming_messages.is_empty() {
12831 return Ok(ExistingMessageLookup {
12832 by_idx: HashMap::new(),
12833 replay: HashSet::new(),
12834 });
12835 }
12836
12837 let min_idx = incoming_messages
12838 .iter()
12839 .map(|msg| msg.idx)
12840 .min()
12841 .unwrap_or(0);
12842 let max_idx = incoming_messages
12843 .iter()
12844 .map(|msg| msg.idx)
12845 .max()
12846 .unwrap_or(min_idx);
12847 let idx_rows = tx.query_params(
12848 "SELECT idx
12849 FROM messages INDEXED BY sqlite_autoindex_messages_1
12850 WHERE conversation_id = ?1
12851 AND idx >= ?2
12852 AND idx <= ?3",
12853 fparams![conversation_id, min_idx, max_idx],
12854 )?;
12855 record_message_lookup_bounded_queries(1, idx_rows.len());
12856
12857 let mut existing_indices = HashSet::with_capacity(idx_rows.len());
12858 for row in idx_rows {
12859 let idx: i64 = row.get_typed(0)?;
12860 existing_indices.insert(idx);
12861 }
12862
12863 let mut by_idx = HashMap::with_capacity(incoming_messages.len().min(existing_indices.len()));
12864 let mut missing_messages = Vec::new();
12865 for msg in incoming_messages {
12866 if existing_indices.contains(&msg.idx) {
12867 by_idx.insert(msg.idx, message_merge_fingerprint(msg));
12871 } else {
12872 missing_messages.push(msg);
12873 }
12874 }
12875
12876 if missing_messages.is_empty() {
12877 return Ok(ExistingMessageLookup {
12878 by_idx,
12879 replay: HashSet::new(),
12880 });
12881 }
12882
12883 let requires_full_scan = missing_messages.iter().any(|msg| msg.created_at.is_none());
12884 let created_bounds = missing_messages
12885 .iter()
12886 .filter_map(|msg| msg.created_at)
12887 .fold(None, |bounds: Option<(i64, i64)>, created_at| {
12888 Some(match bounds {
12889 Some((min_created_at, max_created_at)) => (
12890 min_created_at.min(created_at),
12891 max_created_at.max(created_at),
12892 ),
12893 None => (created_at, created_at),
12894 })
12895 });
12896
12897 let mut replay = HashSet::new();
12898 if requires_full_scan {
12899 let rows = tx.query_params(
12900 "SELECT idx, role, author, created_at, content
12901 FROM messages INDEXED BY sqlite_autoindex_messages_1
12902 WHERE conversation_id = ?1",
12903 fparams![conversation_id],
12904 )?;
12905 record_message_lookup_full_scan_query(rows.len());
12906 let content_lookup =
12907 existing_message_lookup_from_rows(rows, min_idx, max_idx, created_bounds, true)?;
12908 by_idx.extend(content_lookup.by_idx);
12909 replay.extend(content_lookup.replay);
12910 } else if let Some((min_created_at, max_created_at)) = created_bounds {
12911 let rows = tx.query_params(
12912 "SELECT idx, role, author, created_at, content
12913 FROM messages INDEXED BY sqlite_autoindex_messages_1
12914 WHERE conversation_id = ?1
12915 AND created_at IS NOT NULL
12916 AND created_at >= ?2
12917 AND created_at <= ?3",
12918 fparams![conversation_id, min_created_at, max_created_at],
12919 )?;
12920 record_message_lookup_bounded_queries(1, rows.len());
12921 let created_lookup =
12922 existing_message_lookup_from_rows(rows, min_idx, max_idx, created_bounds, false)?;
12923 by_idx.extend(created_lookup.by_idx);
12924 replay.extend(created_lookup.replay);
12925 }
12926
12927 Ok(ExistingMessageLookup { by_idx, replay })
12928}
12929
12930fn franken_existing_message_lookup_with_pending(
12931 tx: &FrankenTransaction<'_>,
12932 conversation_id: i64,
12933 incoming_messages: &[Message],
12934 pending_message_fingerprints: &mut HashMap<i64, HashMap<i64, MessageMergeFingerprint>>,
12935 pending_message_replay_fingerprints: &mut HashMap<i64, HashSet<MessageReplayFingerprint>>,
12936) -> Result<ExistingMessageLookup> {
12937 if let (Some(by_idx), Some(replay)) = (
12938 pending_message_fingerprints.get(&conversation_id),
12939 pending_message_replay_fingerprints.get(&conversation_id),
12940 ) {
12941 if incoming_messages.iter().all(|msg| {
12942 by_idx.contains_key(&msg.idx) || replay.contains(&message_replay_fingerprint(msg))
12943 }) {
12944 return Ok(ExistingMessageLookup {
12945 by_idx: by_idx.clone(),
12946 replay: replay.clone(),
12947 });
12948 }
12949
12950 let fresh = franken_existing_message_lookup(tx, conversation_id, incoming_messages)?;
12951 let mut merged_by_idx = by_idx.clone();
12952 let mut merged_replay = replay.clone();
12953 merged_by_idx.extend(fresh.by_idx);
12954 merged_replay.extend(fresh.replay);
12955 pending_message_fingerprints.insert(conversation_id, merged_by_idx.clone());
12956 pending_message_replay_fingerprints.insert(conversation_id, merged_replay.clone());
12957 return Ok(ExistingMessageLookup {
12958 by_idx: merged_by_idx,
12959 replay: merged_replay,
12960 });
12961 }
12962
12963 let lookup = franken_existing_message_lookup(tx, conversation_id, incoming_messages)?;
12964 pending_message_fingerprints.insert(conversation_id, lookup.by_idx.clone());
12965 pending_message_replay_fingerprints.insert(conversation_id, lookup.replay.clone());
12966 Ok(lookup)
12967}
12968
12969fn franken_collect_batched_existing_new_messages<'a>(
12970 tx: &FrankenTransaction<'_>,
12971 conversation_id: i64,
12972 conv: &'a Conversation,
12973 pending_message_fingerprints: &mut HashMap<i64, HashMap<i64, MessageMergeFingerprint>>,
12974 pending_message_replay_fingerprints: &mut HashMap<i64, HashSet<MessageReplayFingerprint>>,
12975 replay_skip_log: &'static str,
12976) -> Result<(
12977 ExistingConversationNewMessages<'a>,
12978 HashMap<i64, MessageMergeFingerprint>,
12979 HashSet<MessageReplayFingerprint>,
12980)> {
12981 let tail_metadata = franken_cached_existing_conversation_tail_metadata(tx, conversation_id)?;
12982 let tail_state = tail_metadata.complete_tail_state();
12983 if let Some(tail_state) = tail_state
12984 && let Some(tail_plan) = collect_append_only_tail_messages(
12985 conv,
12986 tail_state.last_message_idx,
12987 tail_state.last_message_created_at,
12988 )
12989 {
12990 let mut by_idx = pending_message_fingerprints
12991 .remove(&conversation_id)
12992 .unwrap_or_default();
12993 let mut replay = pending_message_replay_fingerprints
12994 .remove(&conversation_id)
12995 .unwrap_or_default();
12996 for msg in &tail_plan.messages {
12997 let fingerprint = message_merge_fingerprint(msg);
12998 by_idx.insert(msg.idx, fingerprint.clone());
12999 replay.insert(replay_fingerprint_from_merge(&fingerprint));
13000 }
13001 return Ok((tail_plan, by_idx, replay));
13002 }
13003
13004 let timestamp_data_incomplete = tail_metadata.last_message_created_at.is_none()
13005 || conv.messages.iter().any(|msg| msg.created_at.is_none());
13006 if timestamp_data_incomplete
13007 && let Some(existing_ended_at) = tail_metadata.ended_at
13008 && let Some(noop_plan) =
13009 collect_existing_conversation_noop_from_conversation_ended_at(conv, existing_ended_at)
13010 {
13011 let by_idx = pending_message_fingerprints
13012 .remove(&conversation_id)
13013 .unwrap_or_default();
13014 let replay = pending_message_replay_fingerprints
13015 .remove(&conversation_id)
13016 .unwrap_or_default();
13017 return Ok((noop_plan, by_idx, replay));
13018 }
13019
13020 if timestamp_data_incomplete
13021 && let Some(last_message_idx) = tail_metadata.last_message_idx
13022 && let Some(tail_plan) =
13023 collect_existing_conversation_noop_from_idx_tail(conv, last_message_idx)
13024 {
13025 let mut by_idx = pending_message_fingerprints
13026 .remove(&conversation_id)
13027 .unwrap_or_default();
13028 let mut replay = pending_message_replay_fingerprints
13029 .remove(&conversation_id)
13030 .unwrap_or_default();
13031 for msg in &tail_plan.messages {
13032 let fingerprint = message_merge_fingerprint(msg);
13033 by_idx.insert(msg.idx, fingerprint.clone());
13034 replay.insert(replay_fingerprint_from_merge(&fingerprint));
13035 }
13036 return Ok((tail_plan, by_idx, replay));
13037 }
13038
13039 let existing_ended_at = if tail_metadata.ended_at.is_some() {
13040 tail_metadata.ended_at
13041 } else {
13042 franken_existing_conversation_ended_at(tx, conversation_id)?
13043 };
13044 if let Some(existing_ended_at) = existing_ended_at
13045 && let Some(tail_plan) =
13046 collect_existing_conversation_tail_from_ended_at(conv, existing_ended_at)
13047 {
13048 let mut by_idx = pending_message_fingerprints
13049 .remove(&conversation_id)
13050 .unwrap_or_default();
13051 let mut replay = pending_message_replay_fingerprints
13052 .remove(&conversation_id)
13053 .unwrap_or_default();
13054 for msg in &tail_plan.messages {
13055 let fingerprint = message_merge_fingerprint(msg);
13056 by_idx.insert(msg.idx, fingerprint.clone());
13057 replay.insert(replay_fingerprint_from_merge(&fingerprint));
13058 }
13059 return Ok((tail_plan, by_idx, replay));
13060 }
13061
13062 trace_existing_conversation_lookup_fallback(
13063 conversation_id,
13064 conv,
13065 tail_state,
13066 existing_ended_at,
13067 );
13068
13069 let ExistingMessageLookup {
13070 by_idx: mut existing_messages,
13071 replay: mut existing_replay_fingerprints,
13072 } = franken_existing_message_lookup_with_pending(
13073 tx,
13074 conversation_id,
13075 &conv.messages,
13076 pending_message_fingerprints,
13077 pending_message_replay_fingerprints,
13078 )?;
13079 let new_messages = collect_new_messages_for_existing_conversation(
13080 conversation_id,
13081 conv,
13082 &mut existing_messages,
13083 &mut existing_replay_fingerprints,
13084 replay_skip_log,
13085 );
13086 Ok((
13087 new_messages,
13088 existing_messages,
13089 existing_replay_fingerprints,
13090 ))
13091}
13092
13093fn franken_batch_insert_fts(tx: &FrankenTransaction<'_>, entries: &[FtsEntry]) -> Result<usize> {
13095 if entries.is_empty() {
13096 return Ok(0);
13097 }
13098
13099 let mut inserted = 0;
13100
13101 for chunk in entries.chunks(FTS5_BATCH_SIZE) {
13102 let placeholders: String = chunk
13103 .iter()
13104 .enumerate()
13105 .map(|(i, _)| {
13106 let base = i * 7 + 1; format!(
13108 "(?{},?{},?{},?{},?{},?{},?{})",
13109 base,
13110 base + 1,
13111 base + 2,
13112 base + 3,
13113 base + 4,
13114 base + 5,
13115 base + 6
13116 )
13117 })
13118 .collect::<Vec<_>>()
13119 .join(",");
13120
13121 let sql = format!(
13122 "INSERT INTO fts_messages(rowid, content, title, agent, workspace, source_path, created_at) VALUES {placeholders}"
13123 );
13124
13125 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 7);
13126 for entry in chunk {
13127 param_values.push(SqliteValue::from(entry.message_id));
13128 param_values.push(SqliteValue::from(entry.content.as_str()));
13129 param_values.push(SqliteValue::from(entry.title.as_str()));
13130 param_values.push(SqliteValue::from(entry.agent.as_str()));
13131 param_values.push(SqliteValue::from(entry.workspace.as_str()));
13132 param_values.push(SqliteValue::from(entry.source_path.as_str()));
13133 param_values.push(SqliteValue::from(entry.created_at));
13134 }
13135
13136 match tx.execute_with_params(&sql, ¶m_values) {
13137 Ok(_) => {
13138 inserted += chunk.len();
13139 }
13140 Err(err) => {
13141 tracing::warn!(
13142 error = %err,
13143 chunk_docs = chunk.len(),
13144 "frankensqlite FTS batch insert failed; skipping db-resident FTS maintenance because Tantivy is authoritative"
13145 );
13146 return Ok(inserted);
13147 }
13148 }
13149 }
13150
13151 Ok(inserted)
13152}
13153
13154fn franken_batch_insert_fts_on_connection(
13155 conn: &FrankenConnection,
13156 entries: &[FtsEntry],
13157) -> Result<usize> {
13158 if entries.is_empty() {
13159 return Ok(0);
13160 }
13161
13162 let mut inserted = 0;
13163
13164 for chunk in entries.chunks(FTS5_BATCH_SIZE) {
13165 let placeholders: String = chunk
13166 .iter()
13167 .enumerate()
13168 .map(|(i, _)| {
13169 let base = i * 7 + 1;
13170 format!(
13171 "(?{},?{},?{},?{},?{},?{},?{})",
13172 base,
13173 base + 1,
13174 base + 2,
13175 base + 3,
13176 base + 4,
13177 base + 5,
13178 base + 6
13179 )
13180 })
13181 .collect::<Vec<_>>()
13182 .join(",");
13183
13184 let sql = format!(
13185 "INSERT INTO fts_messages(rowid, content, title, agent, workspace, source_path, created_at) VALUES {placeholders}"
13186 );
13187
13188 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 7);
13189 for entry in chunk {
13190 param_values.push(SqliteValue::from(entry.message_id));
13191 param_values.push(SqliteValue::from(entry.content.as_str()));
13192 param_values.push(SqliteValue::from(entry.title.as_str()));
13193 param_values.push(SqliteValue::from(entry.agent.as_str()));
13194 param_values.push(SqliteValue::from(entry.workspace.as_str()));
13195 param_values.push(SqliteValue::from(entry.source_path.as_str()));
13196 param_values.push(SqliteValue::from(entry.created_at));
13197 }
13198
13199 conn.execute_with_params(&sql, ¶m_values)
13200 .with_context(|| {
13201 format!(
13202 "inserting {} rows into fts_messages during streaming FTS maintenance",
13203 chunk.len()
13204 )
13205 })?;
13206 inserted += chunk.len();
13207 }
13208
13209 Ok(inserted)
13210}
13211
13212fn franken_update_daily_stats_in_tx(
13214 storage: &FrankenStorage,
13215 tx: &FrankenTransaction<'_>,
13216 agent_slug: &str,
13217 source_id: &str,
13218 started_at: Option<i64>,
13219 delta: StatsDelta,
13220) -> Result<()> {
13221 let day_id = started_at
13222 .map(FrankenStorage::day_id_from_millis)
13223 .unwrap_or(0);
13224 let now = FrankenStorage::now_millis();
13225
13226 let targets = [
13227 DailyStatsTarget {
13228 day_id,
13229 agent_slug,
13230 source_id,
13231 },
13232 DailyStatsTarget {
13233 day_id,
13234 agent_slug: "all",
13235 source_id,
13236 },
13237 DailyStatsTarget {
13238 day_id,
13239 agent_slug,
13240 source_id: "all",
13241 },
13242 DailyStatsTarget {
13243 day_id,
13244 agent_slug: "all",
13245 source_id: "all",
13246 },
13247 ];
13248
13249 if agent_slug != "all"
13250 && source_id != "all"
13251 && franken_update_ensured_daily_stats_targets_in_tx(storage, tx, &targets, now, delta)?
13252 {
13253 return Ok(());
13254 }
13255
13256 for target in targets {
13257 franken_apply_daily_stats_delta_in_tx(storage, tx, target, now, delta)?;
13258 }
13259
13260 Ok(())
13261}
13262
13263#[derive(Clone, Copy)]
13264struct DailyStatsTarget<'a> {
13265 day_id: i64,
13266 agent_slug: &'a str,
13267 source_id: &'a str,
13268}
13269
13270fn franken_update_ensured_daily_stats_targets_in_tx(
13271 storage: &FrankenStorage,
13272 tx: &FrankenTransaction<'_>,
13273 targets: &[DailyStatsTarget<'_>; 4],
13274 now: i64,
13275 delta: StatsDelta,
13276) -> Result<bool> {
13277 let cache_keys = targets.map(|target| {
13278 EnsuredDailyStatsKey::new(target.day_id, target.agent_slug, target.source_id)
13279 });
13280 if !storage.daily_stats_keys_already_ensured(&cache_keys) {
13281 return Ok(false);
13282 }
13283
13284 let primary = targets[0];
13285 let rows_changed = tx.execute_compat(
13286 "UPDATE daily_stats
13287 SET session_count = session_count + ?4,
13288 message_count = message_count + ?5,
13289 total_chars = total_chars + ?6,
13290 last_updated = ?7
13291 WHERE day_id = ?1
13292 AND ((agent_slug = ?2 AND source_id = ?3)
13293 OR (agent_slug = 'all' AND source_id = ?3)
13294 OR (agent_slug = ?2 AND source_id = 'all')
13295 OR (agent_slug = 'all' AND source_id = 'all'))",
13296 fparams![
13297 primary.day_id,
13298 primary.agent_slug,
13299 primary.source_id,
13300 delta.session_count_delta,
13301 delta.message_count_delta,
13302 delta.total_chars_delta,
13303 now
13304 ],
13305 )?;
13306 if rows_changed == targets.len() {
13307 return Ok(true);
13308 }
13309
13310 for (target, cache_key) in targets.iter().copied().zip(cache_keys) {
13311 let exists = tx
13312 .query_row_map(
13313 "SELECT 1 FROM daily_stats
13314 WHERE day_id = ?1 AND agent_slug = ?2 AND source_id = ?3
13315 LIMIT 1",
13316 fparams![target.day_id, target.agent_slug, target.source_id],
13317 |row| row.get_typed::<i64>(0),
13318 )
13319 .optional()?
13320 .is_some();
13321 if exists {
13322 continue;
13323 }
13324
13325 tx.execute_compat(
13326 "INSERT INTO daily_stats(day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
13327 VALUES(?1,?2,?3,?4,?5,?6,?7)",
13328 fparams![
13329 target.day_id,
13330 target.agent_slug,
13331 target.source_id,
13332 delta.session_count_delta,
13333 delta.message_count_delta,
13334 delta.total_chars_delta,
13335 now
13336 ],
13337 )?;
13338 storage.mark_daily_stats_key_ensured(cache_key);
13339 }
13340
13341 Ok(true)
13342}
13343
13344fn franken_apply_daily_stats_delta_in_tx(
13345 storage: &FrankenStorage,
13346 tx: &FrankenTransaction<'_>,
13347 target: DailyStatsTarget<'_>,
13348 now: i64,
13349 delta: StatsDelta,
13350) -> Result<()> {
13351 let cache_key = EnsuredDailyStatsKey::new(target.day_id, target.agent_slug, target.source_id);
13352 if storage.daily_stats_key_already_ensured(&cache_key) {
13353 let rows_changed = tx.execute_compat(
13354 "UPDATE daily_stats
13355 SET session_count = session_count + ?4,
13356 message_count = message_count + ?5,
13357 total_chars = total_chars + ?6,
13358 last_updated = ?7
13359 WHERE day_id = ?1 AND agent_slug = ?2 AND source_id = ?3",
13360 fparams![
13361 target.day_id,
13362 target.agent_slug,
13363 target.source_id,
13364 delta.session_count_delta,
13365 delta.message_count_delta,
13366 delta.total_chars_delta,
13367 now
13368 ],
13369 )?;
13370 if rows_changed > 0 {
13371 return Ok(());
13372 }
13373 }
13374
13375 tx.execute_compat(
13376 "INSERT INTO daily_stats(day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
13377 VALUES(?1,?2,?3,?4,?5,?6,?7)
13378 ON CONFLICT(day_id, agent_slug, source_id) DO UPDATE SET
13379 session_count = session_count + excluded.session_count,
13380 message_count = message_count + excluded.message_count,
13381 total_chars = total_chars + excluded.total_chars,
13382 last_updated = excluded.last_updated",
13383 fparams![
13384 target.day_id,
13385 target.agent_slug,
13386 target.source_id,
13387 delta.session_count_delta,
13388 delta.message_count_delta,
13389 delta.total_chars_delta,
13390 now
13391 ],
13392 )?;
13393 storage.mark_daily_stats_key_ensured(cache_key);
13394 Ok(())
13395}
13396
13397fn franken_update_daily_stats_batched_in_tx(
13403 tx: &FrankenTransaction<'_>,
13404 entries: &[(i64, String, String, StatsDelta)],
13405) -> Result<usize> {
13406 if entries.is_empty() {
13407 return Ok(0);
13408 }
13409
13410 let now = FrankenStorage::now_millis();
13411 let mut total_affected = 0;
13412
13413 for (day_id, agent, source, delta) in entries {
13418 total_affected += tx.execute_compat(
13419 "INSERT INTO daily_stats (day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
13420 VALUES(?1,?2,?3,?4,?5,?6,?7)
13421 ON CONFLICT(day_id, agent_slug, source_id) DO UPDATE SET
13422 session_count = session_count + excluded.session_count,
13423 message_count = message_count + excluded.message_count,
13424 total_chars = total_chars + excluded.total_chars,
13425 last_updated = excluded.last_updated",
13426 fparams![
13427 *day_id,
13428 agent.as_str(),
13429 source.as_str(),
13430 delta.session_count_delta,
13431 delta.message_count_delta,
13432 delta.total_chars_delta,
13433 now
13434 ],
13435 )?;
13436 }
13437
13438 Ok(total_affected)
13439}
13440
13441fn franken_insert_token_usage_batched_in_tx(
13447 tx: &FrankenTransaction<'_>,
13448 entries: &[TokenUsageEntry],
13449) -> Result<usize> {
13450 if entries.is_empty() {
13451 return Ok(0);
13452 }
13453
13454 let mut total_inserted = 0;
13455
13456 for e in entries {
13457 let params_vec: Vec<ParamValue> = vec![
13458 ParamValue::from(e.message_id),
13459 ParamValue::from(e.conversation_id),
13460 ParamValue::from(e.agent_id),
13461 ParamValue::from(e.workspace_id),
13462 ParamValue::from(e.source_id.clone()),
13463 ParamValue::from(e.timestamp_ms),
13464 ParamValue::from(e.day_id),
13465 ParamValue::from(e.model_name.clone()),
13466 ParamValue::from(e.model_family.clone()),
13467 ParamValue::from(e.model_tier.clone()),
13468 ParamValue::from(e.service_tier.clone()),
13469 ParamValue::from(e.provider.clone()),
13470 ParamValue::from(e.input_tokens),
13471 ParamValue::from(e.output_tokens),
13472 ParamValue::from(e.cache_read_tokens),
13473 ParamValue::from(e.cache_creation_tokens),
13474 ParamValue::from(e.thinking_tokens),
13475 ParamValue::from(e.total_tokens),
13476 ParamValue::from(e.estimated_cost_usd),
13477 ParamValue::from(e.role.clone()),
13478 ParamValue::from(e.content_chars),
13479 ParamValue::from(e.has_tool_calls as i64),
13480 ParamValue::from(e.tool_call_count as i64),
13481 ParamValue::from(e.data_source.clone()),
13482 ];
13483
13484 let values = param_slice_to_values(¶ms_vec);
13485 total_inserted += tx.execute_with_params(
13486 "INSERT OR IGNORE INTO token_usage (
13487 message_id, conversation_id, agent_id, workspace_id, source_id,
13488 timestamp_ms, day_id,
13489 model_name, model_family, model_tier, service_tier, provider,
13490 input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens,
13491 thinking_tokens, total_tokens, estimated_cost_usd,
13492 role, content_chars, has_tool_calls, tool_call_count, data_source
13493 )
13494 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22,?23,?24)",
13495 &values,
13496 )?;
13497 }
13498
13499 Ok(total_inserted)
13500}
13501
13502fn franken_update_token_daily_stats_batched_in_tx(
13504 tx: &FrankenTransaction<'_>,
13505 entries: &[(i64, String, String, String, TokenStatsDelta)],
13506) -> Result<usize> {
13507 if entries.is_empty() {
13508 return Ok(0);
13509 }
13510
13511 let now = FrankenStorage::now_millis();
13512 let mut total_affected = 0;
13513
13514 for (day_id, agent, source, model, delta) in entries {
13515 total_affected += tx.execute_compat(
13516 "INSERT INTO token_daily_stats (
13517 day_id, agent_slug, source_id, model_family,
13518 api_call_count, user_message_count, assistant_message_count, tool_message_count,
13519 total_input_tokens, total_output_tokens, total_cache_read_tokens,
13520 total_cache_creation_tokens, total_thinking_tokens, grand_total_tokens,
13521 total_content_chars, total_tool_calls, estimated_cost_usd, session_count,
13522 last_updated
13523 )
13524 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19)
13525 ON CONFLICT(day_id, agent_slug, source_id, model_family) DO UPDATE SET
13526 api_call_count = api_call_count + excluded.api_call_count,
13527 user_message_count = user_message_count + excluded.user_message_count,
13528 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
13529 tool_message_count = tool_message_count + excluded.tool_message_count,
13530 total_input_tokens = total_input_tokens + excluded.total_input_tokens,
13531 total_output_tokens = total_output_tokens + excluded.total_output_tokens,
13532 total_cache_read_tokens = total_cache_read_tokens + excluded.total_cache_read_tokens,
13533 total_cache_creation_tokens = total_cache_creation_tokens + excluded.total_cache_creation_tokens,
13534 total_thinking_tokens = total_thinking_tokens + excluded.total_thinking_tokens,
13535 grand_total_tokens = grand_total_tokens + excluded.grand_total_tokens,
13536 total_content_chars = total_content_chars + excluded.total_content_chars,
13537 total_tool_calls = total_tool_calls + excluded.total_tool_calls,
13538 estimated_cost_usd = estimated_cost_usd + excluded.estimated_cost_usd,
13539 session_count = session_count + excluded.session_count,
13540 last_updated = excluded.last_updated",
13541 fparams![
13542 *day_id,
13543 agent.as_str(),
13544 source.as_str(),
13545 model.as_str(),
13546 delta.api_call_count,
13547 delta.user_message_count,
13548 delta.assistant_message_count,
13549 delta.tool_message_count,
13550 delta.total_input_tokens,
13551 delta.total_output_tokens,
13552 delta.total_cache_read_tokens,
13553 delta.total_cache_creation_tokens,
13554 delta.total_thinking_tokens,
13555 delta.grand_total_tokens,
13556 delta.total_content_chars,
13557 delta.total_tool_calls,
13558 delta.estimated_cost_usd,
13559 delta.session_count,
13560 now
13561 ],
13562 )?;
13563 }
13564
13565 Ok(total_affected)
13566}
13567
13568fn franken_insert_message_metrics_batched_in_tx(
13574 tx: &FrankenTransaction<'_>,
13575 entries: &[MessageMetricsEntry],
13576) -> Result<usize> {
13577 if entries.is_empty() {
13578 return Ok(0);
13579 }
13580
13581 let mut total_inserted = 0;
13582
13583 for e in entries {
13584 let params_vec: Vec<ParamValue> = vec![
13585 ParamValue::from(e.message_id),
13586 ParamValue::from(e.created_at_ms),
13587 ParamValue::from(e.hour_id),
13588 ParamValue::from(e.day_id),
13589 ParamValue::from(e.agent_slug.clone()),
13590 ParamValue::from(e.workspace_id),
13591 ParamValue::from(e.source_id.clone()),
13592 ParamValue::from(e.role.clone()),
13593 ParamValue::from(e.content_chars),
13594 ParamValue::from(e.content_tokens_est),
13595 ParamValue::from(e.model_name.clone()),
13596 ParamValue::from(e.model_family.clone()),
13597 ParamValue::from(e.model_tier.clone()),
13598 ParamValue::from(e.provider.clone()),
13599 ParamValue::from(e.api_input_tokens),
13600 ParamValue::from(e.api_output_tokens),
13601 ParamValue::from(e.api_cache_read_tokens),
13602 ParamValue::from(e.api_cache_creation_tokens),
13603 ParamValue::from(e.api_thinking_tokens),
13604 ParamValue::from(e.api_service_tier.clone()),
13605 ParamValue::from(e.api_data_source.clone()),
13606 ParamValue::from(e.tool_call_count),
13607 ParamValue::from(e.has_tool_calls as i64),
13608 ParamValue::from(e.has_plan as i64),
13609 ];
13610
13611 let values = param_slice_to_values(¶ms_vec);
13612 total_inserted += tx.execute_with_params(
13613 "INSERT OR IGNORE INTO message_metrics (
13614 message_id, created_at_ms, hour_id, day_id,
13615 agent_slug, workspace_id, source_id, role,
13616 content_chars, content_tokens_est,
13617 model_name, model_family, model_tier, provider,
13618 api_input_tokens, api_output_tokens, api_cache_read_tokens,
13619 api_cache_creation_tokens, api_thinking_tokens,
13620 api_service_tier, api_data_source,
13621 tool_call_count, has_tool_calls, has_plan
13622 )
13623 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22,?23,?24)",
13624 &values,
13625 )?;
13626 }
13627
13628 Ok(total_inserted)
13629}
13630
13631fn franken_flush_rollup_table(
13633 tx: &FrankenTransaction<'_>,
13634 table: &str,
13635 bucket_col: &str,
13636 deltas: &HashMap<(i64, String, i64, String), UsageRollupDelta>,
13637 now: i64,
13638) -> Result<usize> {
13639 if deltas.is_empty() {
13640 return Ok(0);
13641 }
13642
13643 let mut total_affected = 0;
13644
13645 for ((bucket_id, agent, workspace_id, source), d) in deltas {
13646 let sql = format!(
13647 "INSERT INTO {table} (
13648 {bucket_col}, agent_slug, workspace_id, source_id,
13649 message_count, user_message_count, assistant_message_count,
13650 tool_call_count, plan_message_count, plan_content_tokens_est_total,
13651 plan_api_tokens_total, api_coverage_message_count,
13652 content_tokens_est_total, content_tokens_est_user, content_tokens_est_assistant,
13653 api_tokens_total, api_input_tokens_total, api_output_tokens_total,
13654 api_cache_read_tokens_total, api_cache_creation_tokens_total,
13655 api_thinking_tokens_total, last_updated
13656 )
13657 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22)
13658 ON CONFLICT({bucket_col}, agent_slug, workspace_id, source_id) DO UPDATE SET
13659 message_count = message_count + excluded.message_count,
13660 user_message_count = user_message_count + excluded.user_message_count,
13661 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
13662 tool_call_count = tool_call_count + excluded.tool_call_count,
13663 plan_message_count = plan_message_count + excluded.plan_message_count,
13664 plan_content_tokens_est_total = plan_content_tokens_est_total + excluded.plan_content_tokens_est_total,
13665 plan_api_tokens_total = plan_api_tokens_total + excluded.plan_api_tokens_total,
13666 api_coverage_message_count = api_coverage_message_count + excluded.api_coverage_message_count,
13667 content_tokens_est_total = content_tokens_est_total + excluded.content_tokens_est_total,
13668 content_tokens_est_user = content_tokens_est_user + excluded.content_tokens_est_user,
13669 content_tokens_est_assistant = content_tokens_est_assistant + excluded.content_tokens_est_assistant,
13670 api_tokens_total = api_tokens_total + excluded.api_tokens_total,
13671 api_input_tokens_total = api_input_tokens_total + excluded.api_input_tokens_total,
13672 api_output_tokens_total = api_output_tokens_total + excluded.api_output_tokens_total,
13673 api_cache_read_tokens_total = api_cache_read_tokens_total + excluded.api_cache_read_tokens_total,
13674 api_cache_creation_tokens_total = api_cache_creation_tokens_total + excluded.api_cache_creation_tokens_total,
13675 api_thinking_tokens_total = api_thinking_tokens_total + excluded.api_thinking_tokens_total,
13676 last_updated = excluded.last_updated"
13677 );
13678
13679 total_affected += tx.execute_compat(
13680 &sql,
13681 fparams![
13682 *bucket_id,
13683 agent.as_str(),
13684 *workspace_id,
13685 source.as_str(),
13686 d.message_count,
13687 d.user_message_count,
13688 d.assistant_message_count,
13689 d.tool_call_count,
13690 d.plan_message_count,
13691 d.plan_content_tokens_est_total,
13692 d.plan_api_tokens_total,
13693 d.api_coverage_message_count,
13694 d.content_tokens_est_total,
13695 d.content_tokens_est_user,
13696 d.content_tokens_est_assistant,
13697 d.api_tokens_total,
13698 d.api_input_tokens_total,
13699 d.api_output_tokens_total,
13700 d.api_cache_read_tokens_total,
13701 d.api_cache_creation_tokens_total,
13702 d.api_thinking_tokens_total,
13703 now
13704 ],
13705 )?;
13706 }
13707
13708 Ok(total_affected)
13709}
13710
13711fn franken_flush_model_daily_rollup_table(
13713 tx: &FrankenTransaction<'_>,
13714 deltas: &HashMap<(i64, String, i64, String, String, String), UsageRollupDelta>,
13715 now: i64,
13716) -> Result<usize> {
13717 if deltas.is_empty() {
13718 return Ok(0);
13719 }
13720
13721 let mut total_affected = 0;
13722
13723 for ((day_id, agent, workspace_id, source, model_family, model_tier), d) in deltas {
13724 total_affected += tx.execute_compat(
13725 "INSERT INTO usage_models_daily (
13726 day_id, agent_slug, workspace_id, source_id, model_family, model_tier,
13727 message_count, user_message_count, assistant_message_count,
13728 tool_call_count, plan_message_count, api_coverage_message_count,
13729 content_tokens_est_total, content_tokens_est_user, content_tokens_est_assistant,
13730 api_tokens_total, api_input_tokens_total, api_output_tokens_total,
13731 api_cache_read_tokens_total, api_cache_creation_tokens_total,
13732 api_thinking_tokens_total, last_updated
13733 )
13734 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22)
13735 ON CONFLICT(day_id, agent_slug, workspace_id, source_id, model_family, model_tier) DO UPDATE SET
13736 message_count = message_count + excluded.message_count,
13737 user_message_count = user_message_count + excluded.user_message_count,
13738 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
13739 tool_call_count = tool_call_count + excluded.tool_call_count,
13740 plan_message_count = plan_message_count + excluded.plan_message_count,
13741 api_coverage_message_count = api_coverage_message_count + excluded.api_coverage_message_count,
13742 content_tokens_est_total = content_tokens_est_total + excluded.content_tokens_est_total,
13743 content_tokens_est_user = content_tokens_est_user + excluded.content_tokens_est_user,
13744 content_tokens_est_assistant = content_tokens_est_assistant + excluded.content_tokens_est_assistant,
13745 api_tokens_total = api_tokens_total + excluded.api_tokens_total,
13746 api_input_tokens_total = api_input_tokens_total + excluded.api_input_tokens_total,
13747 api_output_tokens_total = api_output_tokens_total + excluded.api_output_tokens_total,
13748 api_cache_read_tokens_total = api_cache_read_tokens_total + excluded.api_cache_read_tokens_total,
13749 api_cache_creation_tokens_total = api_cache_creation_tokens_total + excluded.api_cache_creation_tokens_total,
13750 api_thinking_tokens_total = api_thinking_tokens_total + excluded.api_thinking_tokens_total,
13751 last_updated = excluded.last_updated",
13752 fparams![
13753 *day_id,
13754 agent.as_str(),
13755 *workspace_id,
13756 source.as_str(),
13757 model_family.as_str(),
13758 model_tier.as_str(),
13759 d.message_count,
13760 d.user_message_count,
13761 d.assistant_message_count,
13762 d.tool_call_count,
13763 d.plan_message_count,
13764 d.api_coverage_message_count,
13765 d.content_tokens_est_total,
13766 d.content_tokens_est_user,
13767 d.content_tokens_est_assistant,
13768 d.api_tokens_total,
13769 d.api_input_tokens_total,
13770 d.api_output_tokens_total,
13771 d.api_cache_read_tokens_total,
13772 d.api_cache_creation_tokens_total,
13773 d.api_thinking_tokens_total,
13774 now
13775 ],
13776 )?;
13777 }
13778
13779 Ok(total_affected)
13780}
13781
13782fn franken_flush_analytics_rollups_in_tx(
13784 tx: &FrankenTransaction<'_>,
13785 agg: &AnalyticsRollupAggregator,
13786) -> Result<(usize, usize, usize)> {
13787 let now = FrankenStorage::now_millis();
13788
13789 let hourly_affected =
13790 franken_flush_rollup_table(tx, "usage_hourly", "hour_id", &agg.hourly, now)?;
13791 let daily_affected = franken_flush_rollup_table(tx, "usage_daily", "day_id", &agg.daily, now)?;
13792 let models_daily_affected = franken_flush_model_daily_rollup_table(tx, &agg.models_daily, now)?;
13793
13794 Ok((hourly_affected, daily_affected, models_daily_affected))
13795}
13796
13797fn franken_update_conversation_token_summaries_in_tx(
13799 tx: &FrankenTransaction<'_>,
13800 conversation_id: i64,
13801) -> Result<()> {
13802 tx.execute_compat(
13803 "UPDATE conversations SET
13804 total_input_tokens = (SELECT SUM(input_tokens) FROM token_usage WHERE conversation_id = ?1),
13805 total_output_tokens = (SELECT SUM(output_tokens) FROM token_usage WHERE conversation_id = ?1),
13806 total_cache_read_tokens = (SELECT SUM(cache_read_tokens) FROM token_usage WHERE conversation_id = ?1),
13807 total_cache_creation_tokens = (SELECT SUM(cache_creation_tokens) FROM token_usage WHERE conversation_id = ?1),
13808 grand_total_tokens = (SELECT SUM(total_tokens) FROM token_usage WHERE conversation_id = ?1),
13809 estimated_cost_usd = (SELECT SUM(estimated_cost_usd) FROM token_usage WHERE conversation_id = ?1),
13810 primary_model = (SELECT model_name FROM token_usage WHERE conversation_id = ?1
13811 AND model_name IS NOT NULL
13812 GROUP BY model_name ORDER BY COUNT(*) DESC LIMIT 1),
13813 api_call_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
13814 AND data_source = 'api'),
13815 tool_call_count = (SELECT SUM(tool_call_count) FROM token_usage WHERE conversation_id = ?1),
13816 user_message_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
13817 AND role = 'user'),
13818 assistant_message_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
13819 AND role IN ('assistant', 'agent'))
13820 WHERE id = ?1",
13821 fparams![conversation_id],
13822 )?;
13823 Ok(())
13824}
13825
13826impl FrankenStorage {
13827 pub fn rebuild_token_daily_stats(&self) -> Result<usize> {
13829 const CONVERSATION_BATCH_SIZE: usize = 1_000;
13830 const TOKEN_USAGE_BATCH_SIZE: usize = 10_000;
13831
13832 let total_usage_rows: i64 =
13833 self.conn
13834 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
13835 row.get_typed(0)
13836 })?;
13837 tracing::info!(
13838 target: "cass::analytics",
13839 total_usage_rows,
13840 "token_daily_stats_rebuild_start"
13841 );
13842
13843 let mut tx = self.conn.transaction()?;
13844 tx.execute("DELETE FROM token_daily_stats")?;
13845
13846 let mut last_conversation_id = 0_i64;
13847 let mut rows_created = 0_usize;
13848
13849 loop {
13850 let conversation_rows = tx.query_map_collect(
13851 "SELECT c.id, c.started_at, c.source_id,
13852 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown')
13853 FROM conversations c
13854 WHERE c.id > ?1
13855 ORDER BY c.id
13856 LIMIT ?2",
13857 fparams![last_conversation_id, CONVERSATION_BATCH_SIZE as i64],
13858 |row| {
13859 Ok((
13860 row.get_typed::<i64>(0)?,
13861 row.get_typed::<Option<i64>>(1)?,
13862 row.get_typed::<String>(2)?,
13863 row.get_typed::<String>(3)?,
13864 ))
13865 },
13866 )?;
13867 if conversation_rows.is_empty() {
13868 break;
13869 }
13870
13871 let mut aggregate = TokenStatsAggregator::new();
13872
13873 for (conversation_id, started_at, source_id, agent_slug) in conversation_rows {
13874 last_conversation_id = conversation_id;
13875 let conversation_day_id = started_at.map(Self::day_id_from_millis).unwrap_or(0);
13876 let mut last_token_usage_id = 0_i64;
13877 let mut session_model_family = String::from("unknown");
13878
13879 loop {
13880 let usage_rows = tx.query_map_collect(
13881 "SELECT id, day_id, role,
13882 COALESCE(model_family, 'unknown'),
13883 input_tokens, output_tokens, cache_read_tokens,
13884 cache_creation_tokens, thinking_tokens,
13885 has_tool_calls, tool_call_count,
13886 content_chars, estimated_cost_usd
13887 FROM token_usage
13888 WHERE conversation_id = ?1
13889 AND id > ?2
13890 ORDER BY id
13891 LIMIT ?3",
13892 fparams![
13893 conversation_id,
13894 last_token_usage_id,
13895 TOKEN_USAGE_BATCH_SIZE as i64
13896 ],
13897 |row| {
13898 Ok((
13899 row.get_typed::<i64>(0)?,
13900 row.get_typed::<i64>(1)?,
13901 row.get_typed::<String>(2)?,
13902 row.get_typed::<String>(3)?,
13903 row.get_typed::<Option<i64>>(4)?,
13904 row.get_typed::<Option<i64>>(5)?,
13905 row.get_typed::<Option<i64>>(6)?,
13906 row.get_typed::<Option<i64>>(7)?,
13907 row.get_typed::<Option<i64>>(8)?,
13908 row.get_typed::<i64>(9)?,
13909 row.get_typed::<i64>(10)?,
13910 row.get_typed::<i64>(11)?,
13911 row.get_typed::<Option<f64>>(12)?,
13912 ))
13913 },
13914 )?;
13915 if usage_rows.is_empty() {
13916 break;
13917 }
13918
13919 for (
13920 token_usage_id,
13921 day_id,
13922 role,
13923 model_family,
13924 input_tokens,
13925 output_tokens,
13926 cache_read_tokens,
13927 cache_creation_tokens,
13928 thinking_tokens,
13929 has_tool_calls,
13930 tool_call_count,
13931 content_chars,
13932 estimated_cost_usd,
13933 ) in usage_rows
13934 {
13935 last_token_usage_id = token_usage_id;
13936 if model_family != "unknown" {
13937 session_model_family = model_family.clone();
13938 }
13939 let usage = crate::connectors::ExtractedTokenUsage {
13940 model_name: None,
13941 provider: None,
13942 input_tokens,
13943 output_tokens,
13944 cache_read_tokens,
13945 cache_creation_tokens,
13946 thinking_tokens,
13947 service_tier: None,
13948 has_tool_calls: has_tool_calls != 0,
13949 tool_call_count: u32::try_from(tool_call_count.max(0)).unwrap_or(0),
13950 data_source: franken_agent_detection::TokenDataSource::Api,
13951 };
13952 aggregate.record(
13953 &agent_slug,
13954 &source_id,
13955 day_id,
13956 &model_family,
13957 &role,
13958 &usage,
13959 content_chars,
13960 estimated_cost_usd.unwrap_or(0.0),
13961 );
13962 }
13963 }
13964
13965 aggregate.record_session(
13966 &agent_slug,
13967 &source_id,
13968 conversation_day_id,
13969 &session_model_family,
13970 );
13971 }
13972
13973 let entries = aggregate.expand();
13974 rows_created = rows_created.saturating_add(entries.len());
13975 franken_update_token_daily_stats_batched_in_tx(&tx, &entries)?;
13976 }
13977
13978 tx.commit()?;
13979
13980 tracing::info!(
13981 target: "cass::analytics",
13982 rows_created,
13983 "token_daily_stats_rebuild_complete"
13984 );
13985
13986 Ok(rows_created)
13987 }
13988
13989 pub fn rebuild_analytics(&self) -> Result<AnalyticsRebuildResult> {
13992 let start = Instant::now();
13993
13994 let total_messages: i64 =
13995 self.conn
13996 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
13997 row.get_typed(0)
13998 })?;
13999 tracing::info!(
14000 target: "cass::analytics",
14001 total_messages,
14002 "analytics_rebuild_start"
14003 );
14004
14005 let mut tx = self.conn.transaction()?;
14006
14007 tx.execute("DELETE FROM message_metrics")?;
14008 tx.execute("DELETE FROM usage_hourly")?;
14009 tx.execute("DELETE FROM usage_daily")?;
14010 tx.execute("DELETE FROM usage_models_daily")?;
14011
14012 const CHUNK_SIZE: i64 = 10_000;
14013 let mut offset: i64 = 0;
14014 let mut total_inserted: usize = 0;
14015 let mut usage_hourly_rows: usize = 0;
14016 let mut usage_daily_rows: usize = 0;
14017 let mut usage_models_daily_rows: usize = 0;
14018
14019 loop {
14020 #[allow(clippy::type_complexity)]
14021 let rows: Vec<(
14022 i64,
14023 String,
14024 String,
14025 Option<serde_json::Value>,
14026 Option<i64>,
14027 Option<i64>,
14028 String,
14029 Option<i64>,
14030 String,
14031 )> = tx.query_map_collect(
14032 "SELECT m.id, m.idx, m.role, m.content, m.extra_json, m.extra_bin,
14038 m.created_at,
14039 c.id AS conv_id, c.started_at AS conv_started_at,
14040 c.source_id, c.workspace_id,
14041 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown') AS agent_slug
14042 FROM messages m
14043 JOIN conversations c ON m.conversation_id = c.id
14044 ORDER BY m.id
14045 LIMIT ?1 OFFSET ?2",
14046 fparams![CHUNK_SIZE, offset],
14047 |row| {
14048 let msg_id: i64 = row.get_typed(0)?;
14049 let role: String = row.get_typed(2)?;
14050 let content: String = row.get_typed(3)?;
14051 let extra_json = row
14052 .get_typed::<Option<String>>(4)?
14053 .and_then(|s| serde_json::from_str(&s).ok())
14054 .or_else(|| {
14055 row.get_typed::<Option<Vec<u8>>>(5)
14056 .ok()
14057 .flatten()
14058 .and_then(|b| rmp_serde::from_slice(&b).ok())
14059 });
14060 let msg_ts: Option<i64> = row.get_typed(6)?;
14061 let conv_started_at: Option<i64> = row.get_typed(8)?;
14062 let source_id: String = row.get_typed(9)?;
14063 let workspace_id: Option<i64> = row.get_typed(10)?;
14064 let agent_slug: String = row.get_typed(11)?;
14065 let effective_ts = msg_ts.or(conv_started_at).unwrap_or(0);
14066
14067 Ok((
14068 msg_id,
14069 role,
14070 content,
14071 extra_json,
14072 Some(effective_ts),
14073 workspace_id,
14074 source_id,
14075 conv_started_at,
14076 agent_slug,
14077 ))
14078 },
14079 )?;
14080
14081 if rows.is_empty() {
14082 break;
14083 }
14084
14085 let chunk_len = rows.len();
14086 let mut entries = Vec::with_capacity(chunk_len);
14087 let mut rollup_agg = AnalyticsRollupAggregator::new();
14088
14089 for (
14090 msg_id,
14091 role,
14092 content,
14093 extra_json,
14094 effective_ts,
14095 workspace_id,
14096 source_id,
14097 _conv_started_at,
14098 agent_slug,
14099 ) in &rows
14100 {
14101 let ts = effective_ts.unwrap_or(0);
14102 let day_id = Self::day_id_from_millis(ts);
14103 let hour_id = Self::hour_id_from_millis(ts);
14104 let content_chars = content.len() as i64;
14105 let content_tokens_est = content_chars / 4;
14106 let extra = extra_json
14107 .as_ref()
14108 .cloned()
14109 .unwrap_or(serde_json::Value::Null);
14110 let usage =
14111 crate::connectors::extract_tokens_for_agent(agent_slug, &extra, content, role);
14112 let model_info = usage
14113 .model_name
14114 .as_deref()
14115 .map(crate::connectors::normalize_model);
14116 let model_family = model_info
14117 .as_ref()
14118 .map(|i| i.family.clone())
14119 .unwrap_or_else(|| "unknown".into());
14120 let model_tier = model_info
14121 .as_ref()
14122 .map(|i| i.tier.clone())
14123 .unwrap_or_else(|| "unknown".into());
14124 let provider = usage
14125 .provider
14126 .clone()
14127 .or_else(|| model_info.as_ref().map(|i| i.provider.clone()))
14128 .unwrap_or_else(|| "unknown".into());
14129
14130 let entry = MessageMetricsEntry {
14131 message_id: *msg_id,
14132 created_at_ms: ts,
14133 hour_id,
14134 day_id,
14135 agent_slug: agent_slug.clone(),
14136 workspace_id: workspace_id.unwrap_or(0),
14137 source_id: source_id.clone(),
14138 role: role.clone(),
14139 content_chars,
14140 content_tokens_est,
14141 model_name: usage.model_name.clone(),
14142 model_family,
14143 model_tier,
14144 provider,
14145 api_input_tokens: usage.input_tokens,
14146 api_output_tokens: usage.output_tokens,
14147 api_cache_read_tokens: usage.cache_read_tokens,
14148 api_cache_creation_tokens: usage.cache_creation_tokens,
14149 api_thinking_tokens: usage.thinking_tokens,
14150 api_service_tier: usage.service_tier,
14151 api_data_source: usage.data_source.as_str().to_string(),
14152 tool_call_count: usage.tool_call_count as i64,
14153 has_tool_calls: usage.has_tool_calls,
14154 has_plan: has_plan_for_role(role, content),
14155 };
14156 rollup_agg.record(&entry);
14157 entries.push(entry);
14158 }
14159
14160 total_inserted += franken_insert_message_metrics_batched_in_tx(&tx, &entries)?;
14161 let (hourly, daily, models_daily) =
14162 franken_flush_analytics_rollups_in_tx(&tx, &rollup_agg)?;
14163 usage_hourly_rows += hourly;
14164 usage_daily_rows += daily;
14165 usage_models_daily_rows += models_daily;
14166 offset += chunk_len as i64;
14167
14168 tracing::debug!(
14169 target: "cass::analytics",
14170 offset,
14171 chunk = chunk_len,
14172 inserted = entries.len(),
14173 total = total_inserted,
14174 "analytics_rebuild_chunk"
14175 );
14176
14177 if (chunk_len as i64) < CHUNK_SIZE {
14178 break;
14179 }
14180 }
14181
14182 tx.commit()?;
14183
14184 let elapsed = start.elapsed();
14185 let elapsed_ms = elapsed.as_millis() as u64;
14186 let msgs_per_sec = if elapsed_ms > 0 {
14187 (total_inserted as f64) / (elapsed_ms as f64 / 1000.0)
14188 } else {
14189 0.0
14190 };
14191
14192 tracing::info!(
14193 target: "cass::analytics",
14194 message_metrics_rows = total_inserted,
14195 usage_hourly_rows,
14196 usage_daily_rows,
14197 usage_models_daily_rows,
14198 elapsed_ms,
14199 messages_per_sec = format!("{:.0}", msgs_per_sec),
14200 "analytics_rebuild_complete"
14201 );
14202
14203 Ok(AnalyticsRebuildResult {
14204 message_metrics_rows: total_inserted,
14205 usage_hourly_rows,
14206 usage_daily_rows,
14207 usage_models_daily_rows,
14208 elapsed_ms,
14209 messages_per_sec: msgs_per_sec,
14210 })
14211 }
14212
14213 pub fn rebuild_daily_stats(&self) -> Result<DailyStatsRebuildResult> {
14215 const DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE: usize = 1_000;
14216 const DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE: usize = 10_000;
14217
14218 let mut conversation_batch_size = rebuild_batch_size_env(
14219 "CASS_DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE",
14220 DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE,
14221 );
14222 let mut message_batch_size = rebuild_batch_size_env(
14223 "CASS_DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE",
14224 DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE,
14225 );
14226
14227 let total_messages: i64 =
14228 self.conn
14229 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
14230 row.get_typed(0)
14231 })?;
14232 let message_metrics_rows: i64 =
14233 self.conn
14234 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
14235 row.get_typed(0)
14236 })?;
14237 let use_message_metrics = total_messages > 0 && total_messages == message_metrics_rows;
14238
14239 tracing::info!(
14240 target: "cass::perf::daily_stats",
14241 total_messages,
14242 message_metrics_rows,
14243 use_message_metrics,
14244 "daily_stats rebuild selected message source"
14245 );
14246
14247 let mut tx = self.conn.transaction()?;
14248 tx.execute("DELETE FROM daily_stats")?;
14249
14250 let mut last_conversation_id = 0_i64;
14251 let mut conversation_batch_count = 0_usize;
14252 let mut conversations_processed = 0_usize;
14253 let mut messages_processed = 0_usize;
14254 let mut message_batch_count = 0_usize;
14255 let mut raw_entries_flushed = 0_usize;
14256 let mut expanded_entries_flushed = 0_usize;
14257 let message_scan_sql = if use_message_metrics {
14258 "SELECT m.idx, mm.content_chars
14259 FROM messages m
14260 JOIN message_metrics mm ON mm.message_id = m.id
14261 WHERE m.conversation_id = ?1
14262 AND m.idx > ?2
14263 ORDER BY m.conversation_id, m.idx
14264 LIMIT ?3"
14265 } else {
14266 "SELECT m.idx, COALESCE(LENGTH(CAST(m.content AS BLOB)), 0)
14267 FROM messages m
14268 WHERE m.conversation_id = ?1
14269 AND m.idx > ?2
14270 ORDER BY m.conversation_id, m.idx
14271 LIMIT ?3"
14272 };
14273
14274 loop {
14275 let conversation_rows = match self.conn.query_with_params(
14281 "SELECT c.id, c.started_at,
14282 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown'),
14283 c.source_id
14284 FROM conversations c
14285 WHERE c.id > ?1
14286 ORDER BY c.id
14287 LIMIT ?2",
14288 ¶ms_from_iter([
14289 ParamValue::from(last_conversation_id),
14290 ParamValue::from(conversation_batch_size as i64),
14291 ]),
14292 ) {
14293 Ok(rows) => rows,
14294 Err(err) if is_out_of_memory_error(&err) && conversation_batch_size > 1 => {
14295 let previous_batch_size = conversation_batch_size;
14296 conversation_batch_size = (conversation_batch_size / 2).max(1);
14297 tracing::warn!(
14298 previous_batch_size,
14299 conversation_batch_size,
14300 last_conversation_id,
14301 "daily_stats conversation scan ran out of memory; retrying with smaller batch"
14302 );
14303 continue;
14304 }
14305 Err(err) => return Err(err.into()),
14306 };
14307 if conversation_rows.is_empty() {
14308 break;
14309 }
14310
14311 let mut aggregate = StatsAggregator::new();
14312 let mut conversation_batch_meta: Vec<(i64, i64, String, String)> =
14313 Vec::with_capacity(conversation_rows.len());
14314 for row in &conversation_rows {
14315 let conversation_id: i64 = row.get_typed(0)?;
14316 let started_at: Option<i64> = row.get_typed(1)?;
14317 let agent_slug: String = row.get_typed(2)?;
14318 let source_id: String = row.get_typed(3)?;
14319 last_conversation_id = conversation_id;
14320 let day_id = started_at.map(Self::day_id_from_millis).unwrap_or(0);
14321 aggregate.record_delta(&agent_slug, &source_id, day_id, 1, 0, 0);
14322 conversation_batch_meta.push((conversation_id, day_id, agent_slug, source_id));
14323 conversations_processed += 1;
14324 }
14325
14326 conversation_batch_count += 1;
14327 raw_entries_flushed += aggregate.raw_entry_count();
14328 let entries = aggregate.expand();
14329 expanded_entries_flushed += entries.len();
14330 if !entries.is_empty() {
14331 franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
14332 }
14333 if conversation_batch_count.is_multiple_of(25) {
14334 tracing::info!(
14335 target: "cass::perf::daily_stats",
14336 conversations_processed,
14337 batches = conversation_batch_count,
14338 batch_size = conversation_batch_size,
14339 last_conversation_id,
14340 "daily_stats rebuild conversation scan progress"
14341 );
14342 }
14343 if conversation_batch_meta.is_empty() {
14344 continue;
14345 }
14346
14347 for (conversation_id, day_id, agent_slug, source_id) in conversation_batch_meta {
14348 let mut cursor_message_idx = -1_i64;
14349 loop {
14350 let message_rows = match self.conn.query_with_params(
14351 message_scan_sql,
14352 ¶ms_from_iter([
14353 ParamValue::from(conversation_id),
14354 ParamValue::from(cursor_message_idx),
14355 ParamValue::from(message_batch_size as i64),
14356 ]),
14357 ) {
14358 Ok(rows) => rows,
14359 Err(err) if is_out_of_memory_error(&err) && message_batch_size > 1 => {
14360 let previous_batch_size = message_batch_size;
14361 message_batch_size = (message_batch_size / 2).max(1);
14362 tracing::warn!(
14363 previous_batch_size,
14364 message_batch_size,
14365 conversation_id,
14366 cursor_message_idx,
14367 "daily_stats message scan ran out of memory; retrying with smaller batch"
14368 );
14369 continue;
14370 }
14371 Err(err) => return Err(err.into()),
14372 };
14373 if message_rows.is_empty() {
14374 break;
14375 }
14376
14377 let mut aggregate = StatsAggregator::new();
14378 for row in &message_rows {
14379 let message_idx: i64 = row.get_typed(0)?;
14380 let content_len: i64 = row.get_typed(1)?;
14381 cursor_message_idx = message_idx;
14382 aggregate.record_delta(&agent_slug, &source_id, day_id, 0, 1, content_len);
14383 messages_processed += 1;
14384 }
14385
14386 message_batch_count += 1;
14387 raw_entries_flushed += aggregate.raw_entry_count();
14388 let entries = aggregate.expand();
14389 expanded_entries_flushed += entries.len();
14390 if !entries.is_empty() {
14391 franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
14392 }
14393 if message_batch_count.is_multiple_of(50) {
14394 tracing::info!(
14395 target: "cass::perf::daily_stats",
14396 messages_processed,
14397 batches = message_batch_count,
14398 batch_size = message_batch_size,
14399 source = if use_message_metrics {
14400 "message_metrics"
14401 } else {
14402 "messages"
14403 },
14404 conversation_id,
14405 cursor_message_idx,
14406 "daily_stats rebuild message scan progress"
14407 );
14408 }
14409 }
14410 }
14411 }
14412
14413 let rows_created: i64 =
14414 tx.query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
14415 row.get_typed(0)
14416 })?;
14417 let total_sessions: i64 = tx.query_row_map(
14418 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
14419 fparams![],
14420 |row| row.get_typed(0),
14421 )?;
14422
14423 tx.commit()?;
14424
14425 tracing::info!(
14426 target: "cass::perf::daily_stats",
14427 rows_created,
14428 total_sessions,
14429 conversations_processed,
14430 conversation_batches = conversation_batch_count,
14431 conversation_batch_size,
14432 message_batches = message_batch_count,
14433 message_batch_size,
14434 messages_processed,
14435 use_message_metrics,
14436 raw_entries_flushed,
14437 expanded_entries_flushed,
14438 "Daily stats rebuilt from conversations"
14439 );
14440
14441 Ok(DailyStatsRebuildResult {
14442 rows_created,
14443 total_sessions,
14444 })
14445 }
14446}
14447
14448#[derive(Debug, Default)]
14475pub struct IndexingCache {
14476 agent_ids: HashMap<String, i64>,
14477 workspace_ids: HashMap<PathBuf, i64>,
14478 hits: u64,
14479 misses: u64,
14480}
14481
14482pub trait IndexingCacheStorage {
14483 fn ensure_indexing_agent(&self, agent: &Agent) -> Result<i64>;
14484 fn ensure_indexing_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64>;
14485}
14486
14487impl IndexingCacheStorage for FrankenStorage {
14488 fn ensure_indexing_agent(&self, agent: &Agent) -> Result<i64> {
14489 self.ensure_agent(agent)
14490 }
14491
14492 fn ensure_indexing_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64> {
14493 self.ensure_workspace(path, display_name)
14494 }
14495}
14496
14497impl IndexingCache {
14500 pub fn new() -> Self {
14502 Self {
14503 agent_ids: HashMap::new(),
14504 workspace_ids: HashMap::new(),
14505 hits: 0,
14506 misses: 0,
14507 }
14508 }
14509
14510 pub fn is_enabled() -> bool {
14513 dotenvy::var("CASS_SQLITE_CACHE")
14514 .map(|v| v != "0" && v.to_lowercase() != "false")
14515 .unwrap_or(true)
14516 }
14517
14518 pub fn get_or_insert_agent<S>(&mut self, storage: &S, agent: &Agent) -> Result<i64>
14523 where
14524 S: IndexingCacheStorage + ?Sized,
14525 {
14526 if let Some(&cached) = self.agent_ids.get(&agent.slug) {
14527 self.hits += 1;
14528 return Ok(cached);
14529 }
14530
14531 self.misses += 1;
14532 let id = storage.ensure_indexing_agent(agent)?;
14533 self.agent_ids.insert(agent.slug.clone(), id);
14534 Ok(id)
14535 }
14536
14537 pub fn get_or_insert_workspace(
14542 &mut self,
14543 storage: &(impl IndexingCacheStorage + ?Sized),
14544 path: &Path,
14545 display_name: Option<&str>,
14546 ) -> Result<i64> {
14547 if let Some(&cached) = self.workspace_ids.get(path) {
14548 self.hits += 1;
14549 return Ok(cached);
14550 }
14551
14552 self.misses += 1;
14553 let id = storage.ensure_indexing_workspace(path, display_name)?;
14554 self.workspace_ids.insert(path.to_path_buf(), id);
14555 Ok(id)
14556 }
14557
14558 pub fn stats(&self) -> (u64, u64, f64) {
14560 let total = self.hits + self.misses;
14561 let hit_rate = if total > 0 {
14562 self.hits as f64 / total as f64
14563 } else {
14564 0.0
14565 };
14566 (self.hits, self.misses, hit_rate)
14567 }
14568
14569 pub fn clear(&mut self) {
14571 self.agent_ids.clear();
14572 self.workspace_ids.clear();
14573 self.hits = 0;
14574 self.misses = 0;
14575 }
14576
14577 pub fn agent_count(&self) -> usize {
14579 self.agent_ids.len()
14580 }
14581
14582 pub fn workspace_count(&self) -> usize {
14584 self.workspace_ids.len()
14585 }
14586}
14587
14588#[derive(Clone, Copy, Debug, Default)]
14597pub struct StatsDelta {
14598 pub session_count_delta: i64,
14599 pub message_count_delta: i64,
14600 pub total_chars_delta: i64,
14601}
14602
14603#[derive(Debug, Default)]
14619pub struct StatsAggregator {
14620 deltas: HashMap<(i64, String, String), StatsDelta>,
14623}
14624
14625impl StatsAggregator {
14626 pub fn new() -> Self {
14628 Self {
14629 deltas: HashMap::new(),
14630 }
14631 }
14632
14633 pub fn record(
14644 &mut self,
14645 agent_slug: &str,
14646 source_id: &str,
14647 day_id: i64,
14648 message_count: i64,
14649 total_chars: i64,
14650 ) {
14651 self.record_delta(agent_slug, source_id, day_id, 1, message_count, total_chars);
14652 }
14653
14654 pub fn record_delta(
14657 &mut self,
14658 agent_slug: &str,
14659 source_id: &str,
14660 day_id: i64,
14661 session_count_delta: i64,
14662 message_count_delta: i64,
14663 total_chars_delta: i64,
14664 ) {
14665 if session_count_delta == 0 && message_count_delta == 0 && total_chars_delta == 0 {
14666 return;
14667 }
14668 let key = (day_id, agent_slug.to_owned(), source_id.to_owned());
14669 let delta = self.deltas.entry(key).or_default();
14670 delta.session_count_delta += session_count_delta;
14671 delta.message_count_delta += message_count_delta;
14672 delta.total_chars_delta += total_chars_delta;
14673 }
14674
14675 pub fn expand(&self) -> Vec<(i64, String, String, StatsDelta)> {
14683 let mut expanded: HashMap<(i64, String, String), StatsDelta> = HashMap::new();
14684
14685 for ((day_id, agent, source), delta) in &self.deltas {
14686 let permutations = [
14687 (agent.as_str(), source.as_str()),
14688 ("all", source.as_str()),
14689 (agent.as_str(), "all"),
14690 ("all", "all"),
14691 ];
14692
14693 for idx in 0..permutations.len() {
14695 let (a, s) = permutations[idx];
14696 if permutations[..idx].contains(&(a, s)) {
14697 continue;
14698 }
14699 let key = (*day_id, a.to_owned(), s.to_owned());
14700 let entry = expanded.entry(key).or_default();
14701 entry.session_count_delta += delta.session_count_delta;
14702 entry.message_count_delta += delta.message_count_delta;
14703 entry.total_chars_delta += delta.total_chars_delta;
14704 }
14705 }
14706
14707 let mut out: Vec<(i64, String, String, StatsDelta)> = expanded
14708 .into_iter()
14709 .map(|((d, a, s), delta)| (d, a, s, delta))
14710 .collect();
14711 out.sort_by(|(d1, a1, s1, _), (d2, a2, s2, _)| {
14712 d1.cmp(d2).then_with(|| a1.cmp(a2)).then_with(|| s1.cmp(s2))
14713 });
14714 out
14715 }
14716
14717 pub fn is_empty(&self) -> bool {
14719 self.deltas.is_empty()
14720 }
14721
14722 pub fn raw_entry_count(&self) -> usize {
14724 self.deltas.len()
14725 }
14726}
14727
14728#[derive(Clone, Debug, Default)]
14737pub struct TokenStatsDelta {
14738 pub api_call_count: i64,
14739 pub user_message_count: i64,
14740 pub assistant_message_count: i64,
14741 pub tool_message_count: i64,
14742 pub total_input_tokens: i64,
14743 pub total_output_tokens: i64,
14744 pub total_cache_read_tokens: i64,
14745 pub total_cache_creation_tokens: i64,
14746 pub total_thinking_tokens: i64,
14747 pub grand_total_tokens: i64,
14748 pub total_content_chars: i64,
14749 pub total_tool_calls: i64,
14750 pub estimated_cost_usd: f64,
14751 pub session_count: i64,
14752}
14753
14754#[derive(Debug, Default)]
14760pub struct TokenStatsAggregator {
14761 deltas: HashMap<(i64, String, String, String), TokenStatsDelta>,
14763}
14764
14765impl TokenStatsAggregator {
14766 pub fn new() -> Self {
14767 Self {
14768 deltas: HashMap::new(),
14769 }
14770 }
14771
14772 #[allow(clippy::too_many_arguments)]
14774 pub fn record(
14775 &mut self,
14776 agent_slug: &str,
14777 source_id: &str,
14778 day_id: i64,
14779 model_family: &str,
14780 role: &str,
14781 usage: &crate::connectors::ExtractedTokenUsage,
14782 content_chars: i64,
14783 estimated_cost_usd: f64,
14784 ) {
14785 let key = (
14786 day_id,
14787 agent_slug.to_owned(),
14788 source_id.to_owned(),
14789 model_family.to_owned(),
14790 );
14791 let delta = self.deltas.entry(key).or_default();
14792
14793 delta.api_call_count += 1;
14794 match role {
14795 "user" => delta.user_message_count += 1,
14796 "assistant" | "agent" => delta.assistant_message_count += 1,
14797 "tool" => delta.tool_message_count += 1,
14798 _ => {}
14799 }
14800
14801 delta.total_input_tokens += usage.input_tokens.unwrap_or(0);
14802 delta.total_output_tokens += usage.output_tokens.unwrap_or(0);
14803 delta.total_cache_read_tokens += usage.cache_read_tokens.unwrap_or(0);
14804 delta.total_cache_creation_tokens += usage.cache_creation_tokens.unwrap_or(0);
14805 delta.total_thinking_tokens += usage.thinking_tokens.unwrap_or(0);
14806 delta.grand_total_tokens += usage.total_tokens().unwrap_or(0);
14807 delta.total_content_chars += content_chars;
14808 delta.total_tool_calls += usage.tool_call_count as i64;
14809 delta.estimated_cost_usd += estimated_cost_usd;
14810 }
14811
14812 pub fn record_session(
14814 &mut self,
14815 agent_slug: &str,
14816 source_id: &str,
14817 day_id: i64,
14818 model_family: &str,
14819 ) {
14820 let key = (
14821 day_id,
14822 agent_slug.to_owned(),
14823 source_id.to_owned(),
14824 model_family.to_owned(),
14825 );
14826 self.deltas.entry(key).or_default().session_count += 1;
14827 }
14828
14829 pub fn expand(&self) -> Vec<(i64, String, String, String, TokenStatsDelta)> {
14836 let mut expanded: HashMap<(i64, String, String, String), TokenStatsDelta> = HashMap::new();
14837
14838 for ((day_id, agent, source, model), delta) in &self.deltas {
14839 let permutations = [
14840 (agent.as_str(), source.as_str(), model.as_str()),
14841 ("all", source.as_str(), model.as_str()),
14842 (agent.as_str(), "all", model.as_str()),
14843 (agent.as_str(), source.as_str(), "all"),
14844 ("all", "all", "all"),
14845 ];
14846
14847 for idx in 0..permutations.len() {
14848 let (a, s, m) = permutations[idx];
14849 if permutations[..idx].contains(&(a, s, m)) {
14851 continue;
14852 }
14853 let key = (*day_id, a.to_owned(), s.to_owned(), m.to_owned());
14854 let entry = expanded.entry(key).or_default();
14855 entry.api_call_count += delta.api_call_count;
14856 entry.user_message_count += delta.user_message_count;
14857 entry.assistant_message_count += delta.assistant_message_count;
14858 entry.tool_message_count += delta.tool_message_count;
14859 entry.total_input_tokens += delta.total_input_tokens;
14860 entry.total_output_tokens += delta.total_output_tokens;
14861 entry.total_cache_read_tokens += delta.total_cache_read_tokens;
14862 entry.total_cache_creation_tokens += delta.total_cache_creation_tokens;
14863 entry.total_thinking_tokens += delta.total_thinking_tokens;
14864 entry.grand_total_tokens += delta.grand_total_tokens;
14865 entry.total_content_chars += delta.total_content_chars;
14866 entry.total_tool_calls += delta.total_tool_calls;
14867 entry.estimated_cost_usd += delta.estimated_cost_usd;
14868 entry.session_count += delta.session_count;
14869 }
14870 }
14871
14872 let mut out: Vec<(i64, String, String, String, TokenStatsDelta)> = expanded
14873 .into_iter()
14874 .map(|((d, a, s, m), delta)| (d, a, s, m, delta))
14875 .collect();
14876 out.sort_by(|(d1, a1, s1, m1, _), (d2, a2, s2, m2, _)| {
14877 d1.cmp(d2)
14878 .then_with(|| a1.cmp(a2))
14879 .then_with(|| s1.cmp(s2))
14880 .then_with(|| m1.cmp(m2))
14881 });
14882 out
14883 }
14884
14885 pub fn is_empty(&self) -> bool {
14886 self.deltas.is_empty()
14887 }
14888
14889 pub fn raw_entry_count(&self) -> usize {
14890 self.deltas.len()
14891 }
14892}
14893
14894#[derive(Clone, Debug, Default)]
14902pub struct UsageRollupDelta {
14903 pub message_count: i64,
14904 pub user_message_count: i64,
14905 pub assistant_message_count: i64,
14906 pub tool_call_count: i64,
14907 pub plan_message_count: i64,
14908 pub plan_content_tokens_est_total: i64,
14909 pub plan_api_tokens_total: i64,
14910 pub api_coverage_message_count: i64,
14911 pub content_tokens_est_total: i64,
14912 pub content_tokens_est_user: i64,
14913 pub content_tokens_est_assistant: i64,
14914 pub api_tokens_total: i64,
14915 pub api_input_tokens_total: i64,
14916 pub api_output_tokens_total: i64,
14917 pub api_cache_read_tokens_total: i64,
14918 pub api_cache_creation_tokens_total: i64,
14919 pub api_thinking_tokens_total: i64,
14920}
14921
14922#[derive(Debug, Clone)]
14924pub struct MessageMetricsEntry {
14925 pub message_id: i64,
14926 pub created_at_ms: i64,
14927 pub hour_id: i64,
14928 pub day_id: i64,
14929 pub agent_slug: String,
14930 pub workspace_id: i64,
14931 pub source_id: String,
14932 pub role: String,
14933 pub content_chars: i64,
14934 pub content_tokens_est: i64,
14935 pub model_name: Option<String>,
14936 pub model_family: String,
14937 pub model_tier: String,
14938 pub provider: String,
14939 pub api_input_tokens: Option<i64>,
14940 pub api_output_tokens: Option<i64>,
14941 pub api_cache_read_tokens: Option<i64>,
14942 pub api_cache_creation_tokens: Option<i64>,
14943 pub api_thinking_tokens: Option<i64>,
14944 pub api_service_tier: Option<String>,
14945 pub api_data_source: String,
14946 pub tool_call_count: i64,
14947 pub has_tool_calls: bool,
14948 pub has_plan: bool,
14949}
14950
14951#[derive(Debug, Default)]
14956pub struct AnalyticsRollupAggregator {
14957 hourly: HashMap<(i64, String, i64, String), UsageRollupDelta>,
14958 daily: HashMap<(i64, String, i64, String), UsageRollupDelta>,
14959 models_daily: HashMap<(i64, String, i64, String, String, String), UsageRollupDelta>,
14960}
14961
14962impl AnalyticsRollupAggregator {
14963 pub fn new() -> Self {
14964 Self::default()
14965 }
14966
14967 pub fn record(&mut self, entry: &MessageMetricsEntry) {
14969 let content_est = entry.content_tokens_est;
14970 let api_total = entry.api_input_tokens.unwrap_or(0)
14971 + entry.api_output_tokens.unwrap_or(0)
14972 + entry.api_cache_read_tokens.unwrap_or(0)
14973 + entry.api_cache_creation_tokens.unwrap_or(0)
14974 + entry.api_thinking_tokens.unwrap_or(0);
14975 let is_api = entry.api_data_source == "api";
14976 let is_user = entry.role == "user";
14977 let is_assistant = entry.role == "assistant" || entry.role == "agent";
14978
14979 for (map, bucket_id) in [
14981 (&mut self.hourly, entry.hour_id),
14982 (&mut self.daily, entry.day_id),
14983 ] {
14984 let key = (
14985 bucket_id,
14986 entry.agent_slug.clone(),
14987 entry.workspace_id,
14988 entry.source_id.clone(),
14989 );
14990 let d = map.entry(key).or_default();
14991 d.message_count += 1;
14992 if is_user {
14993 d.user_message_count += 1;
14994 d.content_tokens_est_user += content_est;
14995 }
14996 if is_assistant {
14997 d.assistant_message_count += 1;
14998 d.content_tokens_est_assistant += content_est;
14999 }
15000 d.tool_call_count += entry.tool_call_count;
15001 if entry.has_plan {
15002 d.plan_message_count += 1;
15003 d.plan_content_tokens_est_total += content_est;
15004 if is_api {
15005 d.plan_api_tokens_total += api_total;
15006 }
15007 }
15008 if is_api {
15009 d.api_coverage_message_count += 1;
15010 d.api_tokens_total += api_total;
15011 d.api_input_tokens_total += entry.api_input_tokens.unwrap_or(0);
15012 d.api_output_tokens_total += entry.api_output_tokens.unwrap_or(0);
15013 d.api_cache_read_tokens_total += entry.api_cache_read_tokens.unwrap_or(0);
15014 d.api_cache_creation_tokens_total += entry.api_cache_creation_tokens.unwrap_or(0);
15015 d.api_thinking_tokens_total += entry.api_thinking_tokens.unwrap_or(0);
15016 }
15017 d.content_tokens_est_total += content_est;
15018 }
15019
15020 let model_key = (
15021 entry.day_id,
15022 entry.agent_slug.clone(),
15023 entry.workspace_id,
15024 entry.source_id.clone(),
15025 entry.model_family.clone(),
15026 entry.model_tier.clone(),
15027 );
15028 let d = self.models_daily.entry(model_key).or_default();
15029 d.message_count += 1;
15030 if is_user {
15031 d.user_message_count += 1;
15032 d.content_tokens_est_user += content_est;
15033 }
15034 if is_assistant {
15035 d.assistant_message_count += 1;
15036 d.content_tokens_est_assistant += content_est;
15037 }
15038 d.tool_call_count += entry.tool_call_count;
15039 if entry.has_plan {
15040 d.plan_message_count += 1;
15041 d.plan_content_tokens_est_total += content_est;
15042 if is_api {
15043 d.plan_api_tokens_total += api_total;
15044 }
15045 }
15046 if is_api {
15047 d.api_coverage_message_count += 1;
15048 d.api_tokens_total += api_total;
15049 d.api_input_tokens_total += entry.api_input_tokens.unwrap_or(0);
15050 d.api_output_tokens_total += entry.api_output_tokens.unwrap_or(0);
15051 d.api_cache_read_tokens_total += entry.api_cache_read_tokens.unwrap_or(0);
15052 d.api_cache_creation_tokens_total += entry.api_cache_creation_tokens.unwrap_or(0);
15053 d.api_thinking_tokens_total += entry.api_thinking_tokens.unwrap_or(0);
15054 }
15055 d.content_tokens_est_total += content_est;
15056 }
15057
15058 pub fn is_empty(&self) -> bool {
15059 self.hourly.is_empty() && self.daily.is_empty() && self.models_daily.is_empty()
15060 }
15061
15062 pub fn hourly_entry_count(&self) -> usize {
15063 self.hourly.len()
15064 }
15065
15066 pub fn daily_entry_count(&self) -> usize {
15067 self.daily.len()
15068 }
15069
15070 pub fn models_daily_entry_count(&self) -> usize {
15071 self.models_daily.len()
15072 }
15073}
15074
15075fn has_plan_for_role(role: &str, content: &str) -> bool {
15079 let role = role.trim();
15080 (role.eq_ignore_ascii_case("assistant") || role.eq_ignore_ascii_case("agent"))
15081 && has_plan_heuristic(content)
15082}
15083
15084fn has_plan_heuristic(content: &str) -> bool {
15091 if content.len() < 24 {
15092 return false;
15093 }
15094
15095 let lower = content.to_lowercase();
15096
15097 let looks_like_tool_blob = lower.contains("```")
15099 || lower.contains("\"tool\"")
15100 || lower.contains("stdout:")
15101 || lower.contains("stderr:")
15102 || lower.contains("exit code:");
15103
15104 let mut lines: Vec<&str> = Vec::with_capacity(60);
15105 let mut in_fenced_code = false;
15106 for raw in lower.lines() {
15107 let line = raw.trim();
15108 if line.starts_with("```") {
15109 in_fenced_code = !in_fenced_code;
15110 continue;
15111 }
15112 if in_fenced_code || line.is_empty() {
15113 continue;
15114 }
15115 lines.push(line);
15116 if lines.len() >= 60 {
15117 break;
15118 }
15119 }
15120
15121 let header_pos = lines.iter().position(|line| {
15122 line.starts_with("## plan")
15123 || line.starts_with("# plan")
15124 || line.starts_with("plan:")
15125 || line.starts_with("implementation plan")
15126 || line.starts_with("next steps:")
15127 || line.starts_with("action plan:")
15128 });
15129 let preview_top = lines.iter().take(8).copied().collect::<Vec<_>>().join("\n");
15130 let header_near_top = header_pos.is_some_and(|idx| idx <= 6) || preview_top.contains("plan:");
15131
15132 if !header_near_top {
15133 return false;
15134 }
15135 if looks_like_tool_blob && header_pos.is_none() {
15136 return false;
15137 }
15138
15139 let numbered_steps = lines
15140 .iter()
15141 .filter(|line| is_numbered_step_line(line))
15142 .count();
15143 let bullet_steps = lines
15144 .iter()
15145 .filter(|line| {
15146 line.starts_with("- ")
15147 || line.starts_with("* ")
15148 || line.starts_with("+ ")
15149 || line.starts_with("- [ ] ")
15150 || line.starts_with("- [x] ")
15151 })
15152 .count();
15153
15154 numbered_steps >= 2 || (numbered_steps >= 1 && bullet_steps >= 1) || bullet_steps >= 3
15155}
15156
15157fn is_numbered_step_line(line: &str) -> bool {
15158 let trimmed = line.trim_start();
15159 let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
15160 if digit_count == 0 || digit_count > 3 {
15161 return false;
15162 }
15163 let rest = &trimmed[digit_count..];
15164 rest.starts_with(". ") || rest.starts_with(") ")
15165}
15166
15167#[derive(Debug, Clone)]
15169pub struct TokenUsageEntry {
15170 pub message_id: i64,
15171 pub conversation_id: i64,
15172 pub agent_id: i64,
15173 pub workspace_id: Option<i64>,
15174 pub source_id: String,
15175 pub timestamp_ms: i64,
15176 pub day_id: i64,
15177 pub model_name: Option<String>,
15178 pub model_family: Option<String>,
15179 pub model_tier: Option<String>,
15180 pub service_tier: Option<String>,
15181 pub provider: Option<String>,
15182 pub input_tokens: Option<i64>,
15183 pub output_tokens: Option<i64>,
15184 pub cache_read_tokens: Option<i64>,
15185 pub cache_creation_tokens: Option<i64>,
15186 pub thinking_tokens: Option<i64>,
15187 pub total_tokens: Option<i64>,
15188 pub estimated_cost_usd: Option<f64>,
15189 pub role: String,
15190 pub content_chars: i64,
15191 pub has_tool_calls: bool,
15192 pub tool_call_count: u32,
15193 pub data_source: String,
15194}
15195
15196#[derive(Debug, Clone)]
15202pub struct PricingEntry {
15203 pub model_pattern: String,
15204 pub provider: String,
15205 pub input_cost_per_mtok: f64,
15206 pub output_cost_per_mtok: f64,
15207 pub cache_read_cost_per_mtok: Option<f64>,
15208 pub cache_creation_cost_per_mtok: Option<f64>,
15209 pub effective_day_id: i64,
15211}
15212
15213#[derive(Debug, Clone, Default)]
15215pub struct PricingDiagnostics {
15216 pub priced_count: u64,
15217 pub unpriced_count: u64,
15218 pub unknown_models: HashMap<String, u64>,
15220}
15221
15222impl PricingDiagnostics {
15223 fn record_priced(&mut self) {
15224 self.priced_count += 1;
15225 }
15226
15227 fn record_unpriced(&mut self, model_name: Option<&str>) {
15228 self.unpriced_count += 1;
15229 let key = model_name.unwrap_or("(none)").to_string();
15230 *self.unknown_models.entry(key).or_insert(0) += 1;
15231 }
15232
15233 pub fn log_summary(&self) {
15235 let total = self.priced_count + self.unpriced_count;
15236 if total == 0 {
15237 return;
15238 }
15239 let pct = (self.priced_count as f64 / total as f64) * 100.0;
15240 tracing::info!(
15241 target: "cass::analytics::pricing",
15242 priced = self.priced_count,
15243 unpriced = self.unpriced_count,
15244 total = total,
15245 coverage_pct = format!("{pct:.1}%"),
15246 "pricing coverage"
15247 );
15248 if !self.unknown_models.is_empty() {
15249 let mut sorted: Vec<_> = self.unknown_models.iter().collect();
15250 sorted.sort_by(|a, b| b.1.cmp(a.1));
15251 for (model, count) in sorted.iter().take(5) {
15252 tracing::debug!(
15253 target: "cass::analytics::pricing",
15254 model = model.as_str(),
15255 count = count,
15256 "unknown model (no pricing)"
15257 );
15258 }
15259 }
15260 }
15261}
15262
15263#[derive(Debug, Clone)]
15265pub struct PricingTable {
15266 entries: Vec<PricingEntry>,
15267}
15268
15269impl PricingTable {
15270 pub fn load(conn: &FrankenConnection) -> Result<Self> {
15272 Self::franken_load(conn)
15273 }
15274
15275 pub fn franken_load(conn: &FrankenConnection) -> Result<Self> {
15277 let rows = conn.query(
15278 "SELECT model_pattern, provider, input_cost_per_mtok, output_cost_per_mtok,
15279 cache_read_cost_per_mtok, cache_creation_cost_per_mtok, effective_date
15280 FROM model_pricing
15281 ORDER BY effective_date DESC",
15282 )?;
15283 let mut entries = Vec::with_capacity(rows.len());
15284 for row in &rows {
15285 let effective_date: String = row.get_typed(6)?;
15286 let effective_day_id = date_str_to_day_id(&effective_date)?;
15287 entries.push(PricingEntry {
15288 model_pattern: row.get_typed(0)?,
15289 provider: row.get_typed(1)?,
15290 input_cost_per_mtok: row.get_typed(2)?,
15291 output_cost_per_mtok: row.get_typed(3)?,
15292 cache_read_cost_per_mtok: row.get_typed(4)?,
15293 cache_creation_cost_per_mtok: row.get_typed(5)?,
15294 effective_day_id,
15295 });
15296 }
15297 Ok(Self { entries })
15298 }
15299
15300 pub fn lookup(&self, model_name: &str, message_day_id: i64) -> Option<&PricingEntry> {
15308 let mut best: Option<&PricingEntry> = None;
15309
15310 for entry in &self.entries {
15311 if entry.effective_day_id > message_day_id {
15312 continue;
15313 }
15314 if !sql_like_match(model_name, &entry.model_pattern) {
15315 continue;
15316 }
15317
15318 match best {
15319 None => best = Some(entry),
15320 Some(current) => {
15321 if entry.effective_day_id > current.effective_day_id
15322 || (entry.effective_day_id == current.effective_day_id
15323 && entry.model_pattern.len() > current.model_pattern.len())
15324 {
15325 best = Some(entry);
15326 }
15327 }
15328 }
15329 }
15330
15331 best
15332 }
15333
15334 pub fn compute_cost(
15338 &self,
15339 model_name: Option<&str>,
15340 message_day_id: i64,
15341 input_tokens: Option<i64>,
15342 output_tokens: Option<i64>,
15343 cache_read_tokens: Option<i64>,
15344 cache_creation_tokens: Option<i64>,
15345 ) -> Option<f64> {
15346 let model = model_name?;
15347 let pricing = self.lookup(model, message_day_id)?;
15348
15349 if input_tokens.is_none() && output_tokens.is_none() {
15350 return None;
15351 }
15352
15353 let mut cost = 0.0;
15354 let cache_read = cache_read_tokens.unwrap_or(0);
15355 let cache_creation = cache_creation_tokens.unwrap_or(0);
15356 let non_cache_input = input_tokens
15359 .unwrap_or(0)
15360 .saturating_sub(cache_read)
15361 .saturating_sub(cache_creation)
15362 .max(0);
15363 cost += non_cache_input as f64 * pricing.input_cost_per_mtok / 1_000_000.0;
15364 cost += output_tokens.unwrap_or(0) as f64 * pricing.output_cost_per_mtok / 1_000_000.0;
15365
15366 if let Some(cache_price) = pricing.cache_read_cost_per_mtok {
15367 cost += cache_read as f64 * cache_price / 1_000_000.0;
15368 }
15369 if let Some(cache_price) = pricing.cache_creation_cost_per_mtok {
15370 cost += cache_creation as f64 * cache_price / 1_000_000.0;
15371 }
15372
15373 Some(cost)
15374 }
15375
15376 pub fn is_empty(&self) -> bool {
15378 self.entries.is_empty()
15379 }
15380}
15381
15382fn date_str_to_day_id(s: &str) -> Result<i64> {
15385 use chrono::NaiveDate;
15386 const EPOCH_2020: NaiveDate = match NaiveDate::from_ymd_opt(2020, 1, 1) {
15387 Some(d) => d,
15388 None => unreachable!(),
15389 };
15390 NaiveDate::parse_from_str(s, "%Y-%m-%d")
15391 .map(|d| (d - EPOCH_2020).num_days())
15392 .with_context(|| format!("invalid effective_date '{s}'"))
15393}
15394
15395fn sql_like_match(value: &str, pattern: &str) -> bool {
15397 sql_like_match_bytes(
15398 value.to_ascii_lowercase().as_bytes(),
15399 pattern.to_ascii_lowercase().as_bytes(),
15400 )
15401}
15402
15403fn utf8_char_len(b: u8) -> usize {
15405 if b < 0x80 {
15406 1
15407 } else if b < 0xE0 {
15408 2
15409 } else if b < 0xF0 {
15410 3
15411 } else {
15412 4
15413 }
15414}
15415
15416fn sql_like_match_bytes(val: &[u8], pat: &[u8]) -> bool {
15417 if pat.is_empty() {
15418 return val.is_empty();
15419 }
15420 match pat[0] {
15421 b'%' => {
15422 let mut p = 1;
15423 while p < pat.len() && pat[p] == b'%' {
15424 p += 1;
15425 }
15426 let rest = &pat[p..];
15427 let mut i = 0;
15429 while i <= val.len() {
15430 if sql_like_match_bytes(&val[i..], rest) {
15431 return true;
15432 }
15433 if i < val.len() {
15434 i += utf8_char_len(val[i]);
15435 } else {
15436 break;
15437 }
15438 }
15439 false
15440 }
15441 b'_' => {
15442 if val.is_empty() {
15444 return false;
15445 }
15446 let char_len = utf8_char_len(val[0]);
15447 val.len() >= char_len && sql_like_match_bytes(&val[char_len..], &pat[1..])
15448 }
15449 c => !val.is_empty() && val[0] == c && sql_like_match_bytes(&val[1..], &pat[1..]),
15450 }
15451}
15452
15453fn rebuild_batch_size_env(var: &str, default: usize) -> usize {
15454 dotenvy::var(var)
15455 .ok()
15456 .and_then(|raw| raw.parse::<usize>().ok())
15457 .filter(|value| *value > 0)
15458 .unwrap_or(default)
15459}
15460
15461fn is_out_of_memory_error<E: OutOfMemoryProbe + ?Sized>(err: &E) -> bool {
15471 err.is_out_of_memory()
15472}
15473
15474trait OutOfMemoryProbe {
15475 fn is_out_of_memory(&self) -> bool;
15476}
15477
15478impl OutOfMemoryProbe for anyhow::Error {
15479 fn is_out_of_memory(&self) -> bool {
15480 self.chain().any(|cause| {
15481 if cause
15482 .downcast_ref::<frankensqlite::FrankenError>()
15483 .is_some_and(|err| matches!(err, frankensqlite::FrankenError::OutOfMemory))
15484 {
15485 return true;
15486 }
15487 is_exact_out_of_memory_message(&cause.to_string())
15488 })
15489 }
15490}
15491
15492impl OutOfMemoryProbe for frankensqlite::FrankenError {
15493 fn is_out_of_memory(&self) -> bool {
15494 matches!(self, frankensqlite::FrankenError::OutOfMemory)
15495 }
15496}
15497
15498fn is_exact_out_of_memory_message(message: &str) -> bool {
15499 matches!(
15500 message.trim().to_ascii_lowercase().as_str(),
15501 "out of memory" | "not enough memory"
15502 )
15503}
15504
15505#[derive(Debug, Clone)]
15511pub struct DailyCount {
15512 pub day_id: i64,
15513 pub sessions: i64,
15514 pub messages: i64,
15515 pub chars: i64,
15516}
15517
15518#[derive(Debug, Clone)]
15520pub struct AnalyticsRebuildResult {
15521 pub message_metrics_rows: usize,
15522 pub usage_hourly_rows: usize,
15523 pub usage_daily_rows: usize,
15524 pub usage_models_daily_rows: usize,
15525 pub elapsed_ms: u64,
15526 pub messages_per_sec: f64,
15527}
15528
15529#[derive(Debug, Clone)]
15531pub struct DailyStatsRebuildResult {
15532 pub rows_created: i64,
15533 pub total_sessions: i64,
15534}
15535
15536#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
15538pub struct AgentArchivePurgeResult {
15539 pub conversations_deleted: usize,
15540 pub messages_deleted: usize,
15541}
15542
15543#[derive(Debug, Clone)]
15545pub struct DailyStatsHealth {
15546 pub populated: bool,
15547 pub row_count: i64,
15548 pub oldest_update_ms: Option<i64>,
15549 pub conversation_count: i64,
15550 pub materialized_total: i64,
15551 pub drift: i64,
15552}
15553
15554const FTS5_BATCH_SIZE: usize = 100;
15562
15563#[derive(Debug, Clone)]
15564struct FtsRebuildMessageRow {
15565 rowid: i64,
15566 message_id: i64,
15567 conversation_id: i64,
15568 content: String,
15569 created_at: Option<i64>,
15570}
15571
15572#[derive(Debug, Clone)]
15573struct FtsConversationProjection {
15574 title: String,
15575 agent_id: Option<i64>,
15576 workspace_id: Option<i64>,
15577 source_path: String,
15578}
15579
15580#[derive(Debug, Clone)]
15582pub struct FtsEntry {
15583 pub content: String,
15584 pub title: String,
15585 pub agent: String,
15586 pub workspace: String,
15587 pub source_path: String,
15588 pub created_at: Option<i64>,
15589 pub message_id: i64,
15590}
15591
15592impl FtsEntry {
15593 pub fn from_message(message_id: i64, msg: &Message, conv: &Conversation) -> Self {
15595 FtsEntry {
15596 content: msg.content.clone(),
15597 title: conv.title.clone().unwrap_or_default(),
15598 agent: conv.agent_slug.clone(),
15599 workspace: conv
15600 .workspace
15601 .as_ref()
15602 .map(|p| p.to_string_lossy().into_owned())
15603 .unwrap_or_default(),
15604 source_path: path_to_string(&conv.source_path),
15605 created_at: msg.created_at.or(conv.started_at),
15606 message_id,
15607 }
15608 }
15609}
15610
15611const FTS_ENTRY_BATCH_MAX_DOCS: usize = 512;
15612const FTS_ENTRY_BATCH_MAX_CHARS: usize = 1024 * 1024;
15613
15614const FTS_REBUILD_BATCH_SIZE_DEFAULT: usize = 5_000;
15619
15620fn fts_rebuild_batch_size() -> usize {
15623 dotenvy::var("CASS_FTS_REBUILD_BATCH_SIZE")
15624 .ok()
15625 .and_then(|v| v.parse::<usize>().ok())
15626 .filter(|&n| n > 0)
15627 .unwrap_or(FTS_REBUILD_BATCH_SIZE_DEFAULT)
15628}
15629
15630fn flush_pending_fts_entries(
15631 storage: &FrankenStorage,
15632 tx: &FrankenTransaction<'_>,
15633 entries: &mut Vec<FtsEntry>,
15634 pending_chars: &mut usize,
15635 inserted_total: &mut usize,
15636) -> Result<()> {
15637 if entries.is_empty() {
15638 return Ok(());
15639 }
15640
15641 if storage.fts_messages_present_cached(tx) {
15642 *inserted_total += franken_batch_insert_fts(tx, entries)?;
15643 }
15644 entries.clear();
15645 *pending_chars = 0;
15646 Ok(())
15647}
15648
15649fn path_to_string<P: AsRef<Path>>(p: P) -> String {
15650 p.as_ref().to_string_lossy().into_owned()
15651}
15652
15653fn role_str(role: &MessageRole) -> String {
15654 role_as_str(role).to_owned()
15655}
15656
15657fn role_as_str(role: &MessageRole) -> &str {
15658 match role {
15659 MessageRole::User => "user",
15660 MessageRole::Agent => "agent",
15661 MessageRole::Tool => "tool",
15662 MessageRole::System => "system",
15663 MessageRole::Other(v) => v.as_str(),
15664 }
15665}
15666
15667fn agent_kind_str(kind: AgentKind) -> String {
15668 match kind {
15669 AgentKind::Cli => "cli".into(),
15670 AgentKind::VsCode => "vscode".into(),
15671 AgentKind::Hybrid => "hybrid".into(),
15672 }
15673}
15674
15675#[cfg(test)]
15680mod tests {
15681 use super::*;
15682 use serial_test::serial;
15683 use tempfile::TempDir;
15684
15685 struct EnvGuard {
15686 key: &'static str,
15687 previous: Option<String>,
15688 }
15689
15690 impl Drop for EnvGuard {
15691 fn drop(&mut self) {
15692 if let Some(value) = &self.previous {
15693 unsafe {
15695 std::env::set_var(self.key, value);
15696 }
15697 } else {
15698 unsafe {
15700 std::env::remove_var(self.key);
15701 }
15702 }
15703 }
15704 }
15705
15706 fn set_env_var(key: &'static str, value: impl AsRef<str>) -> EnvGuard {
15707 let previous = dotenvy::var(key).ok();
15708 unsafe {
15710 std::env::set_var(key, value.as_ref());
15711 }
15712 EnvGuard { key, previous }
15713 }
15714
15715 fn unset_env_var(key: &'static str) -> EnvGuard {
15716 let previous = dotenvy::var(key).ok();
15717 unsafe {
15719 std::env::remove_var(key);
15720 }
15721 EnvGuard { key, previous }
15722 }
15723
15724 #[test]
15725 #[serial]
15726 fn storage_env_flags_are_truthy_only() {
15727 for value in ["1", "true", "TRUE", "yes", "YES", "on", "ON"] {
15728 let _guard = set_env_var("CASS_DEFER_LEXICAL_UPDATES", value);
15729 assert!(
15730 defer_storage_lexical_updates_enabled(),
15731 "{value:?} should enable the lexical defer toggle"
15732 );
15733 }
15734
15735 for value in ["0", "false", "FALSE", "no", "NO", "", "maybe"] {
15736 let _guard = set_env_var("CASS_DEFER_LEXICAL_UPDATES", value);
15737 assert!(
15738 !defer_storage_lexical_updates_enabled(),
15739 "{value:?} should not enable the lexical defer toggle"
15740 );
15741 }
15742 }
15743
15744 #[test]
15745 #[serial]
15746 fn analytics_defer_default_can_be_overridden_explicitly() {
15747 {
15748 let _defer_env = unset_env_var("CASS_DEFER_ANALYTICS_UPDATES");
15749 let _inline_env = unset_env_var("CASS_INLINE_ANALYTICS_UPDATES");
15750 let _default_guard = default_defer_analytics_updates_guard(false);
15751 assert!(
15752 !defer_analytics_updates_enabled(),
15753 "analytics should stay inline when neither env nor index-run default requests deferral"
15754 );
15755
15756 let _defer = set_env_var("CASS_DEFER_ANALYTICS_UPDATES", "no");
15757 assert!(
15758 !defer_analytics_updates_enabled(),
15759 "false-like explicit defer value must not force analytics deferral"
15760 );
15761 }
15762
15763 let _defer_env = unset_env_var("CASS_DEFER_ANALYTICS_UPDATES");
15764 let _inline_env = unset_env_var("CASS_INLINE_ANALYTICS_UPDATES");
15765 let _default_guard = default_defer_analytics_updates_guard(true);
15766 assert!(
15767 defer_analytics_updates_enabled(),
15768 "index-run default should defer analytics when no explicit env override is set"
15769 );
15770
15771 {
15772 let _inline = set_env_var("CASS_INLINE_ANALYTICS_UPDATES", "1");
15773 assert!(
15774 !defer_analytics_updates_enabled(),
15775 "truthy inline override should restore inline analytics writes"
15776 );
15777 }
15778
15779 {
15780 let _inline = set_env_var("CASS_INLINE_ANALYTICS_UPDATES", "no");
15781 assert!(
15782 defer_analytics_updates_enabled(),
15783 "false-like inline override must not accidentally force inline analytics"
15784 );
15785 }
15786
15787 {
15788 let _defer = set_env_var("CASS_DEFER_ANALYTICS_UPDATES", "no");
15789 assert!(
15790 defer_analytics_updates_enabled(),
15791 "false-like explicit defer value should leave the index-run default in effect"
15792 );
15793 }
15794 }
15795
15796 fn frontier_test_conversation(idx_created_at: &[(i64, Option<i64>)]) -> Conversation {
15797 Conversation {
15798 id: None,
15799 agent_slug: "codex".into(),
15800 workspace: None,
15801 external_id: Some("frontier-test".into()),
15802 title: Some("Frontier test".into()),
15803 source_path: PathBuf::from("/tmp/frontier-test.jsonl"),
15804 started_at: Some(1_700_000_000_000),
15805 ended_at: None,
15806 approx_tokens: None,
15807 metadata_json: serde_json::Value::Null,
15808 messages: idx_created_at
15809 .iter()
15810 .map(|(idx, created_at)| Message {
15811 id: None,
15812 idx: *idx,
15813 role: MessageRole::User,
15814 author: None,
15815 created_at: *created_at,
15816 content: format!("message-{idx}"),
15817 extra_json: serde_json::Value::Null,
15818 snippets: Vec::new(),
15819 })
15820 .collect(),
15821 source_id: LOCAL_SOURCE_ID.into(),
15822 origin_host: None,
15823 }
15824 }
15825
15826 #[test]
15827 fn conversation_tail_ended_at_candidate_uses_latest_known_end() {
15828 let mut later_conversation_end =
15829 frontier_test_conversation(&[(0, Some(100)), (1, Some(110))]);
15830 later_conversation_end.ended_at = Some(250);
15831 assert_eq!(
15832 conversation_tail_ended_at_candidate(&later_conversation_end),
15833 Some(250),
15834 "conversation-level ended_at can be later than the final message timestamp"
15835 );
15836
15837 let mut later_message_end = frontier_test_conversation(&[(0, Some(100)), (1, Some(300))]);
15838 later_message_end.ended_at = Some(250);
15839 assert_eq!(
15840 conversation_tail_ended_at_candidate(&later_message_end),
15841 Some(300),
15842 "message timestamps can be later than a stale conversation-level ended_at"
15843 );
15844
15845 let mut no_message_timestamps = frontier_test_conversation(&[(0, None), (1, None)]);
15846 no_message_timestamps.ended_at = Some(200);
15847 assert_eq!(
15848 conversation_tail_ended_at_candidate(&no_message_timestamps),
15849 Some(200)
15850 );
15851 }
15852
15853 #[test]
15854 fn ended_at_shortcut_splits_safe_append_tail() {
15855 let covered = frontier_test_conversation(&[(0, Some(100)), (1, Some(110)), (2, Some(120))]);
15856 assert!(
15857 collect_existing_conversation_tail_from_ended_at(&covered, 120).is_none(),
15858 "ended_at coverage alone does not prove all lower idx rows exist"
15859 );
15860
15861 let append = frontier_test_conversation(&[(0, Some(100)), (1, Some(110)), (2, Some(130))]);
15862 assert!(
15863 collect_existing_conversation_tail_from_ended_at(&append, 120).is_none(),
15864 "mixed covered-prefix plus append-tail input needs lookup to fill possible gaps"
15865 );
15866
15867 let pure_append = frontier_test_conversation(&[(2, Some(130)), (3, Some(140))]);
15868 let plan = collect_existing_conversation_tail_from_ended_at(&pure_append, 120)
15869 .expect("all-new timestamp tail can append without message lookup");
15870 assert_eq!(plan.messages.len(), 2);
15871 assert_eq!(
15872 plan.messages.iter().map(|msg| msg.idx).collect::<Vec<_>>(),
15873 vec![2, 3]
15874 );
15875 assert_eq!(
15876 plan.new_chars,
15877 ("message-2".len() + "message-3".len()) as i64
15878 );
15879
15880 let unsorted = frontier_test_conversation(&[(1, Some(110)), (0, Some(100))]);
15881 assert!(
15882 collect_existing_conversation_tail_from_ended_at(&unsorted, 120).is_none(),
15883 "out-of-order input must not use the append/no-op shortcut"
15884 );
15885
15886 let missing_timestamp = frontier_test_conversation(&[(0, Some(100)), (1, None)]);
15887 assert!(
15888 collect_existing_conversation_tail_from_ended_at(&missing_timestamp, 120).is_none(),
15889 "missing timestamps require replay-aware lookup"
15890 );
15891
15892 let covered_after_append =
15893 frontier_test_conversation(&[(0, Some(100)), (1, Some(130)), (2, Some(110))]);
15894 assert!(
15895 collect_existing_conversation_tail_from_ended_at(&covered_after_append, 120).is_none(),
15896 "covered messages after the append split mean the input is not a safe tail"
15897 );
15898
15899 let duplicate_idx = frontier_test_conversation(&[(0, Some(100)), (0, Some(130))]);
15900 assert!(
15901 collect_existing_conversation_tail_from_ended_at(&duplicate_idx, 120).is_none(),
15902 "duplicate idx values can collide with archived rows and require robust lookup"
15903 );
15904 }
15905
15906 #[test]
15907 fn idx_tail_shortcut_handles_no_timestamp_legacy_sources() {
15908 let covered = frontier_test_conversation(&[(0, None), (1, None)]);
15909 assert!(
15910 collect_existing_conversation_noop_from_idx_tail(&covered, 1).is_none(),
15911 "idx tail coverage alone does not prove all lower rows exist"
15912 );
15913
15914 let append = frontier_test_conversation(&[(0, None), (1, None), (2, None)]);
15915 assert!(
15916 collect_existing_conversation_noop_from_idx_tail(&append, 1).is_none(),
15917 "partial timestamp tail metadata is not trusted for appends"
15918 );
15919
15920 let unsorted = frontier_test_conversation(&[(1, None), (0, None), (2, None)]);
15921 assert!(
15922 collect_existing_conversation_noop_from_idx_tail(&unsorted, 1).is_none(),
15923 "out-of-order legacy messages need the robust lookup"
15924 );
15925
15926 let duplicate_tail = frontier_test_conversation(&[(0, None), (2, None), (2, None)]);
15927 assert!(
15928 collect_existing_conversation_noop_from_idx_tail(&duplicate_tail, 1).is_none(),
15929 "duplicate tail idx values can collide and require robust lookup"
15930 );
15931
15932 let duplicate_covered = frontier_test_conversation(&[(0, None), (1, None), (1, None)]);
15933 assert!(
15934 collect_existing_conversation_noop_from_idx_tail(&duplicate_covered, 1).is_none(),
15935 "duplicate covered idx values still need collision-aware lookup"
15936 );
15937 }
15938
15939 #[test]
15940 fn conversation_ended_at_shortcut_handles_stale_partial_idx_tail() {
15941 let mut covered =
15942 frontier_test_conversation(&[(0, Some(100)), (1, Some(110)), (2, Some(120))]);
15943 covered.ended_at = Some(120);
15944 assert!(
15945 collect_existing_conversation_noop_from_conversation_ended_at(&covered, 120).is_none(),
15946 "conversation ended_at coverage alone does not prove all message rows exist"
15947 );
15948
15949 let mut missing_timestamp = frontier_test_conversation(&[(0, None), (1, None), (2, None)]);
15950 missing_timestamp.ended_at = Some(120);
15951 assert!(
15952 collect_existing_conversation_noop_from_conversation_ended_at(&missing_timestamp, 120)
15953 .is_none(),
15954 "no-timestamp messages need replay-aware lookup even when conversation ended_at is unchanged"
15955 );
15956
15957 let mut newer =
15958 frontier_test_conversation(&[(0, Some(100)), (1, Some(110)), (2, Some(121))]);
15959 newer.ended_at = Some(121);
15960 assert!(
15961 collect_existing_conversation_noop_from_conversation_ended_at(&newer, 120).is_none(),
15962 "newer conversations need robust append handling"
15963 );
15964
15965 let mut unsorted = frontier_test_conversation(&[(1, Some(110)), (0, Some(100))]);
15966 unsorted.ended_at = Some(120);
15967 assert!(
15968 collect_existing_conversation_noop_from_conversation_ended_at(&unsorted, 120).is_none(),
15969 "out-of-order unchanged conversations still use the robust path"
15970 );
15971
15972 let mut duplicate =
15973 frontier_test_conversation(&[(0, Some(100)), (1, Some(110)), (1, Some(111))]);
15974 duplicate.ended_at = Some(120);
15975 assert!(
15976 collect_existing_conversation_noop_from_conversation_ended_at(&duplicate, 120)
15977 .is_none(),
15978 "duplicate covered idx values still need collision-aware lookup"
15979 );
15980 }
15981
15982 #[test]
15983 fn populated_fts_shadow_without_rowid_reload_errors_are_classified() {
15984 assert!(
15985 error_message_indicates_populated_fts_shadow_without_rowid_reload(
15986 "not implemented: reloading populated WITHOUT ROWID table `fts_messages_config` into MemDatabase is not yet supported",
15987 )
15988 );
15989 assert!(
15990 error_message_indicates_populated_fts_shadow_without_rowid_reload(
15991 "not implemented: loading populated WITHOUT ROWID table fts_messages_data is not yet supported",
15992 )
15993 );
15994 assert!(
15995 !error_message_indicates_populated_fts_shadow_without_rowid_reload(
15996 "not implemented: reloading populated WITHOUT ROWID table `user_table` into MemDatabase is not yet supported",
15997 )
15998 );
15999 }
16000
16001 #[test]
16002 fn doctor_mutation_open_guard_only_targets_canonical_archive_db() {
16003 let dir = TempDir::new().unwrap();
16004 let canonical = dir.path().join("agent_search.db");
16005 let scratch = dir.path().join("scratch.db");
16006
16007 assert_eq!(
16008 doctor_mutation_lock_path_for_db_open(&canonical),
16009 Some(dir.path().join("doctor/locks/doctor-repair.lock"))
16010 );
16011 assert_eq!(doctor_mutation_lock_path_for_db_open(&scratch), None);
16012 }
16013
16014 #[test]
16015 fn doctor_lock_metadata_pid_detection_is_exact() {
16016 let current = std::process::id();
16017
16018 assert!(doctor_lock_metadata_pid_is_current_process(&format!(
16019 "schema_version=1\npid={current}\nmode=safe_auto_run\n"
16020 )));
16021 assert!(!doctor_lock_metadata_pid_is_current_process(
16022 "schema_version=1\npid=not-a-pid\n"
16023 ));
16024 assert!(!doctor_lock_metadata_pid_is_current_process(&format!(
16025 "pid={}\n",
16026 current.saturating_add(1)
16027 )));
16028 }
16029
16030 #[test]
16031 #[cfg(not(windows))]
16032 fn doctor_storage_open_refuses_active_doctor_mutation_lock_from_other_process() {
16033 use std::io::Write as _;
16034
16035 let dir = TempDir::new().unwrap();
16036 let db_path = dir.path().join("agent_search.db");
16037 {
16038 let storage = FrankenStorage::open(&db_path).unwrap();
16039 storage.close().unwrap();
16040 }
16041
16042 let lock_path = doctor_mutation_lock_path_for_db_open(&db_path).unwrap();
16043 let mut lock_file = fs::OpenOptions::new()
16044 .create(true)
16045 .truncate(false)
16046 .read(true)
16047 .write(true)
16048 .open(&lock_path)
16049 .unwrap();
16050 fs2::FileExt::try_lock_exclusive(&lock_file).unwrap();
16051 lock_file.set_len(0).unwrap();
16052 lock_file.write_all(b"schema_version=1\npid=1\n").unwrap();
16053 lock_file.sync_all().unwrap();
16054
16055 let err =
16056 open_franken_raw_readonly_connection_with_timeout(&db_path, Duration::from_millis(25))
16057 .expect_err("active doctor mutation lock must block canonical DB opens");
16058 let message = err.to_string();
16059 assert!(
16060 message.contains("doctor mutation lock") && message.contains("active"),
16061 "error should identify the active doctor mutation lock: {message}"
16062 );
16063
16064 fs2::FileExt::unlock(&lock_file).unwrap();
16065 }
16066
16067 #[test]
16068 fn doctor_storage_open_allows_current_doctor_process_probe() {
16069 use std::io::Write as _;
16070
16071 let dir = TempDir::new().unwrap();
16072 let db_path = dir.path().join("agent_search.db");
16073 {
16074 let storage = FrankenStorage::open(&db_path).unwrap();
16075 storage.close().unwrap();
16076 }
16077
16078 let lock_path = doctor_mutation_lock_path_for_db_open(&db_path).unwrap();
16079 let mut lock_file = fs::OpenOptions::new()
16080 .create(true)
16081 .truncate(false)
16082 .read(true)
16083 .write(true)
16084 .open(&lock_path)
16085 .unwrap();
16086 fs2::FileExt::try_lock_exclusive(&lock_file).unwrap();
16087 lock_file.set_len(0).unwrap();
16088 write!(lock_file, "schema_version=1\npid={}\n", std::process::id()).unwrap();
16089 lock_file.sync_all().unwrap();
16090
16091 #[cfg(windows)]
16092 let _bypass = enter_doctor_mutation_db_open_bypass();
16093
16094 let conn =
16095 open_franken_raw_readonly_connection_with_timeout(&db_path, Duration::from_millis(25))
16096 .expect(
16097 "doctor process must be able to run post-repair read probes under its own lock",
16098 );
16099 drop(conn);
16100
16101 fs2::FileExt::unlock(&lock_file).unwrap();
16102 }
16103
16104 #[test]
16105 fn autocommit_retain_disable_tries_compat_then_canonical_pragma() {
16106 let mut attempts = Vec::new();
16107
16108 let selected = disable_autocommit_retain(|pragma| {
16109 attempts.push(pragma);
16110 if pragma == "PRAGMA fsqlite.autocommit_retain = OFF;" {
16111 Err("compat namespace unavailable")
16112 } else {
16113 Ok(())
16114 }
16115 })
16116 .expect("canonical pragma should disable autocommit retain");
16117
16118 assert_eq!(selected, "PRAGMA autocommit_retain = OFF;");
16119 assert_eq!(attempts, AUTOCOMMIT_RETAIN_OFF_PRAGMAS);
16120 }
16121
16122 #[test]
16123 fn autocommit_retain_disable_fails_closed_when_no_pragma_works() {
16124 let mut attempts = Vec::new();
16125
16126 let err = disable_autocommit_retain(|pragma| {
16127 attempts.push(pragma);
16128 Err("unsupported pragma")
16129 })
16130 .expect_err("unsupported autocommit retain controls should fail closed");
16131
16132 assert_eq!(attempts, AUTOCOMMIT_RETAIN_OFF_PRAGMAS);
16133 let message = err.to_string();
16134 assert!(
16135 message.contains("refusing to keep a long-lived MVCC connection"),
16136 "error should force callers away from unbounded snapshot retention: {message}"
16137 );
16138 assert!(
16139 message.contains("PRAGMA fsqlite.autocommit_retain = OFF;")
16140 && message.contains("PRAGMA autocommit_retain = OFF;"),
16141 "error should preserve attempted PRAGMAs for diagnostics: {message}"
16142 );
16143 }
16144
16145 fn rusqlite_test_fixture_conn(db_path: &Path) -> rusqlite::Connection {
16154 rusqlite::Connection::open(db_path).expect("open rusqlite test fixture connection")
16155 }
16156
16157 fn seed_historical_db_direct(
16158 db_path: &Path,
16159 conversations: &[crate::model::types::Conversation],
16160 ) {
16161 if let Some(parent) = db_path.parent() {
16162 fs::create_dir_all(parent).unwrap();
16163 }
16164
16165 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
16166 conn.execute_batch(HISTORICAL_RECOVERY_CORE_SCHEMA).unwrap();
16167 conn.execute_compat(
16168 "INSERT INTO agents(id, slug, name, version, kind, created_at, updated_at)
16169 VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7)",
16170 fparams![1_i64, "codex", "Codex", "0.2.3", "cli", 0_i64, 0_i64],
16171 )
16172 .unwrap();
16173
16174 let mut next_message_id = 1_i64;
16175 for (conv_index, conv) in conversations.iter().enumerate() {
16176 let conversation_id = i64::try_from(conv_index + 1).unwrap();
16177 let workspace_id = conv.workspace.as_ref().map(|workspace| {
16178 let workspace_id = conversation_id;
16179 let workspace_path = workspace.to_string_lossy().into_owned();
16180 conn.execute_compat(
16181 "INSERT INTO workspaces(id, path, display_name) VALUES(?1, ?2, ?3)",
16182 fparams![
16183 workspace_id,
16184 workspace_path.as_str(),
16185 workspace_path.as_str()
16186 ],
16187 )
16188 .unwrap();
16189 workspace_id
16190 });
16191 let source_path = conv.source_path.to_string_lossy().into_owned();
16192 let metadata_json = conv.metadata_json.to_string();
16193 conn.execute_compat(
16194 "INSERT INTO conversations (
16195 id, agent_id, workspace_id, source_id, external_id, title, source_path,
16196 started_at, ended_at, approx_tokens, metadata_json, origin_host
16197 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
16198 fparams![
16199 conversation_id,
16200 1_i64,
16201 workspace_id,
16202 conv.source_id.as_str(),
16203 conv.external_id.as_deref(),
16204 conv.title.as_deref(),
16205 source_path.as_str(),
16206 conv.started_at,
16207 conv.ended_at,
16208 conv.approx_tokens,
16209 metadata_json.as_str(),
16210 conv.origin_host.as_deref()
16211 ],
16212 )
16213 .unwrap();
16214
16215 for msg in &conv.messages {
16216 let extra_json = msg.extra_json.to_string();
16217 let role = role_str(&msg.role);
16218 conn.execute_compat(
16219 "INSERT INTO messages(
16220 id, conversation_id, idx, role, author, created_at, content, extra_json
16221 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
16222 fparams![
16223 next_message_id,
16224 conversation_id,
16225 msg.idx,
16226 role.as_str(),
16227 msg.author.as_deref(),
16228 msg.created_at,
16229 msg.content.as_str(),
16230 extra_json.as_str()
16231 ],
16232 )
16233 .unwrap();
16234 next_message_id += 1;
16235 }
16236 }
16237 }
16238
16239 #[test]
16244 fn is_user_data_file_detects_bookmarks() {
16245 assert!(is_user_data_file(Path::new("/data/bookmarks.db")));
16246 assert!(is_user_data_file(Path::new("bookmarks.db")));
16247 }
16248
16249 #[test]
16250 fn is_user_data_file_detects_tui_state() {
16251 assert!(is_user_data_file(Path::new("/data/tui_state.json")));
16252 }
16253
16254 #[test]
16255 fn is_user_data_file_detects_sources_toml() {
16256 assert!(is_user_data_file(Path::new("/config/sources.toml")));
16257 }
16258
16259 #[test]
16260 fn is_user_data_file_detects_env() {
16261 assert!(is_user_data_file(Path::new(".env")));
16262 }
16263
16264 #[test]
16265 fn is_user_data_file_rejects_other_files() {
16266 assert!(!is_user_data_file(Path::new("index.db")));
16267 assert!(!is_user_data_file(Path::new("conversations.db")));
16268 assert!(!is_user_data_file(Path::new("random.txt")));
16269 }
16270
16271 #[test]
16276 fn create_backup_returns_none_for_nonexistent() {
16277 let dir = TempDir::new().unwrap();
16278 let db_path = dir.path().join("nonexistent.db");
16279 let result = create_backup(&db_path).unwrap();
16280 assert!(result.is_none());
16281 }
16282
16283 #[test]
16284 fn create_backup_creates_named_file() {
16285 let dir = TempDir::new().unwrap();
16286 let db_path = dir.path().join("test.db");
16287 std::fs::write(&db_path, b"test data").unwrap();
16288
16289 let backup_path = create_backup(&db_path).unwrap();
16290 assert!(backup_path.is_some());
16291 let backup = backup_path.unwrap();
16292 assert!(backup.exists());
16293 assert!(
16294 backup
16295 .file_name()
16296 .unwrap()
16297 .to_str()
16298 .unwrap()
16299 .contains("backup")
16300 );
16301 }
16302
16303 #[test]
16304 fn create_backup_paths_are_unique() {
16305 let dir = TempDir::new().unwrap();
16306 let db_path = dir.path().join("test.db");
16307 std::fs::write(&db_path, b"test data").unwrap();
16308
16309 let first = create_backup(&db_path).unwrap().unwrap();
16310 let second = create_backup(&db_path).unwrap().unwrap();
16311
16312 assert_ne!(first, second);
16313 assert!(first.exists());
16314 assert!(second.exists());
16315 }
16316
16317 #[test]
16318 fn lexical_rebuild_messages_query_uses_conversation_idx_access_path() {
16319 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
16320 use std::path::PathBuf;
16321
16322 let dir = TempDir::new().unwrap();
16323 let db_path = dir.path().join("agent_search.db");
16324 let storage = SqliteStorage::open(&db_path).unwrap();
16325
16326 let agent = Agent {
16327 id: None,
16328 slug: "claude_code".into(),
16329 name: "Claude Code".into(),
16330 version: None,
16331 kind: AgentKind::Cli,
16332 };
16333 let agent_id = storage.ensure_agent(&agent).unwrap();
16334 let conversation = Conversation {
16335 id: None,
16336 agent_slug: "claude_code".into(),
16337 workspace: Some(PathBuf::from("/tmp/workspace")),
16338 external_id: Some("conv-1".into()),
16339 title: Some("Lexical rebuild".into()),
16340 source_path: PathBuf::from("/tmp/conv-1.jsonl"),
16341 started_at: Some(1_700_000_000_000),
16342 ended_at: Some(1_700_000_000_100),
16343 approx_tokens: None,
16344 metadata_json: serde_json::Value::Null,
16345 messages: vec![
16346 Message {
16347 id: None,
16348 idx: 0,
16349 role: MessageRole::User,
16350 author: Some("user".into()),
16351 created_at: Some(1_700_000_000_010),
16352 content: "first".into(),
16353 extra_json: serde_json::Value::Null,
16354 snippets: Vec::new(),
16355 },
16356 Message {
16357 id: None,
16358 idx: 1,
16359 role: MessageRole::Agent,
16360 author: Some("assistant".into()),
16361 created_at: Some(1_700_000_000_020),
16362 content: "second".into(),
16363 extra_json: serde_json::Value::Null,
16364 snippets: Vec::new(),
16365 },
16366 ],
16367 source_id: LOCAL_SOURCE_ID.into(),
16368 origin_host: None,
16369 };
16370 storage
16371 .insert_conversation_tree(agent_id, None, &conversation)
16372 .unwrap();
16373 let conversation_id = storage
16374 .conn
16375 .query_row_map(
16376 "SELECT id FROM conversations WHERE external_id = ?1",
16377 fparams!["conv-1"],
16378 |row| row.get_typed::<i64>(0),
16379 )
16380 .unwrap();
16381
16382 let opcodes: Vec<String> = storage
16383 .conn
16384 .query_map_collect(
16385 "EXPLAIN \
16386 SELECT id, idx, role, author, created_at, content \
16387 FROM messages \
16388 WHERE conversation_id = ?1 ORDER BY idx",
16389 fparams![conversation_id],
16390 |row| row.get_typed(1),
16391 )
16392 .unwrap();
16393
16394 assert!(
16395 opcodes.iter().any(|opcode| opcode == "SeekGE"),
16396 "expected lexical rebuild message fetch to seek into the conversation_id/idx access path, got {opcodes:?}"
16397 );
16398 assert!(
16399 !opcodes.iter().any(|opcode| opcode == "SorterOpen"),
16400 "expected lexical rebuild message fetch to avoid sorter temp b-trees, got {opcodes:?}"
16401 );
16402 }
16403
16404 #[test]
16405 fn schema_check_rebuild_classification_ignores_transient_errors() {
16406 assert!(!schema_check_error_requires_rebuild(
16407 &frankensqlite::FrankenError::Busy
16408 ));
16409 assert!(!schema_check_error_requires_rebuild(
16410 &frankensqlite::FrankenError::DatabaseLocked {
16411 path: PathBuf::from("/tmp/test.db"),
16412 }
16413 ));
16414 assert!(!schema_check_error_requires_rebuild(
16415 &frankensqlite::FrankenError::CannotOpen {
16416 path: PathBuf::from("/tmp/test.db"),
16417 }
16418 ));
16419 assert!(!schema_check_error_requires_rebuild(
16420 &frankensqlite::FrankenError::Io(std::io::Error::other("disk hiccup"))
16421 ));
16422 }
16423
16424 #[test]
16425 fn schema_check_rebuild_classification_keeps_corruption_errors() {
16426 assert!(schema_check_error_requires_rebuild(
16427 &frankensqlite::FrankenError::DatabaseCorrupt {
16428 detail: "bad header".to_string(),
16429 }
16430 ));
16431 assert!(schema_check_error_requires_rebuild(
16432 &frankensqlite::FrankenError::WalCorrupt {
16433 detail: "bad wal".to_string(),
16434 }
16435 ));
16436 assert!(schema_check_error_requires_rebuild(
16437 &frankensqlite::FrankenError::NotADatabase {
16438 path: PathBuf::from("/tmp/test.db"),
16439 }
16440 ));
16441 assert!(schema_check_error_requires_rebuild(
16442 &frankensqlite::FrankenError::ShortRead {
16443 expected: 4096,
16444 actual: 64,
16445 }
16446 ));
16447 }
16448
16449 #[test]
16450 fn create_backup_refuses_raw_copy_after_retryable_vacuum_errors() {
16451 let retryable_errors = [
16452 frankensqlite::FrankenError::Busy,
16453 frankensqlite::FrankenError::BusyRecovery,
16454 frankensqlite::FrankenError::BusySnapshot {
16455 conflicting_pages: "1,2".to_string(),
16456 },
16457 frankensqlite::FrankenError::DatabaseLocked {
16458 path: PathBuf::from("/tmp/test.db"),
16459 },
16460 frankensqlite::FrankenError::LockFailed {
16461 detail: "fcntl lock still held".to_string(),
16462 },
16463 frankensqlite::FrankenError::WriteConflict { page: 7, holder: 9 },
16464 frankensqlite::FrankenError::SerializationFailure { page: 11 },
16465 frankensqlite::FrankenError::Internal("database is locked".to_string()),
16466 ];
16467
16468 for err in retryable_errors {
16469 assert!(
16470 backup_vacuum_error_requires_consistent_retry(&err),
16471 "retryable VACUUM failure must not fall back to raw bundle copy: {err}"
16472 );
16473 }
16474
16475 assert!(!backup_vacuum_error_requires_consistent_retry(
16476 &frankensqlite::FrankenError::NotADatabase {
16477 path: PathBuf::from("/tmp/test.db")
16478 }
16479 ));
16480 assert!(!backup_vacuum_error_requires_consistent_retry(
16481 &frankensqlite::FrankenError::DatabaseCorrupt {
16482 detail: "bad header".to_string()
16483 }
16484 ));
16485 }
16486
16487 #[test]
16488 fn create_backup_uses_hidden_vacuum_stage_path() {
16489 let backup_path = PathBuf::from("/tmp/test.db.backup.123.456.0");
16490 let stage_path = vacuum_stage_backup_path(&backup_path);
16491 let stage_name = stage_path
16492 .file_name()
16493 .and_then(|name| name.to_str())
16494 .unwrap_or_default();
16495
16496 assert!(stage_name.starts_with('.'));
16497 assert!(stage_name.ends_with(".vacuum-in-progress"));
16498 assert!(
16499 !is_backup_root_name(stage_name, "test.db.backup."),
16500 "incomplete VACUUM output must not be discoverable as a backup root"
16501 );
16502 }
16503
16504 #[test]
16505 fn create_backup_preserves_content() {
16506 let dir = TempDir::new().unwrap();
16507 let db_path = dir.path().join("test.db");
16508 let original_content = b"test database content 12345";
16509 std::fs::write(&db_path, original_content).unwrap();
16510
16511 let backup_path = create_backup(&db_path).unwrap().unwrap();
16512 let backup_content = std::fs::read(&backup_path).unwrap();
16513 assert_eq!(backup_content, original_content);
16514 }
16515
16516 #[test]
16517 fn create_backup_copies_sidecars_when_present() {
16518 let dir = TempDir::new().unwrap();
16519 let db_path = dir.path().join("test.db");
16520 std::fs::write(&db_path, b"db").unwrap();
16521 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
16522 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
16523
16524 let backup_path = create_backup(&db_path).unwrap().unwrap();
16525
16526 assert_eq!(
16527 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
16528 b"wal"
16529 );
16530 assert_eq!(
16531 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
16532 b"shm"
16533 );
16534 }
16535
16536 #[test]
16537 #[cfg(unix)]
16538 fn create_backup_rejects_symlink_root_during_raw_fallback() {
16539 use std::os::unix::fs::symlink;
16540
16541 let dir = TempDir::new().unwrap();
16542 let outside_db = dir.path().join("outside.db");
16543 let db_path = dir.path().join("test.db");
16544 std::fs::write(&outside_db, b"not sqlite").unwrap();
16545 symlink(&outside_db, &db_path).unwrap();
16546
16547 let err = create_backup(&db_path).unwrap_err();
16548
16549 assert!(
16550 err.to_string().contains("bundle symlink"),
16551 "unexpected error: {err:#}"
16552 );
16553 assert_eq!(std::fs::read(&outside_db).unwrap(), b"not sqlite");
16554 let backup_roots: Vec<_> = std::fs::read_dir(dir.path())
16555 .unwrap()
16556 .filter_map(|entry| entry.ok())
16557 .map(|entry| entry.file_name().to_string_lossy().into_owned())
16558 .filter(|name| name.starts_with("test.db.backup."))
16559 .collect();
16560 assert!(
16561 backup_roots.is_empty(),
16562 "symlinked backup source must not publish backup roots: {backup_roots:?}"
16563 );
16564 }
16565
16566 #[test]
16567 #[cfg(unix)]
16568 fn create_backup_rejects_symlink_sidecar_without_partial_backup() {
16569 use std::os::unix::fs::symlink;
16570
16571 let dir = TempDir::new().unwrap();
16572 let db_path = dir.path().join("test.db");
16573 let outside_wal = dir.path().join("outside.wal");
16574 let wal_path = database_sidecar_path(&db_path, "-wal");
16575 std::fs::write(&db_path, b"not sqlite").unwrap();
16576 std::fs::write(&outside_wal, b"outside wal").unwrap();
16577 symlink(&outside_wal, &wal_path).unwrap();
16578
16579 let err = create_backup(&db_path).unwrap_err();
16580
16581 assert!(
16582 err.to_string().contains("bundle symlink"),
16583 "unexpected error: {err:#}"
16584 );
16585 assert_eq!(std::fs::read(&outside_wal).unwrap(), b"outside wal");
16586 let backup_roots: Vec<_> = std::fs::read_dir(dir.path())
16587 .unwrap()
16588 .filter_map(|entry| entry.ok())
16589 .map(|entry| entry.file_name().to_string_lossy().into_owned())
16590 .filter(|name| name.starts_with("test.db.backup."))
16591 .collect();
16592 assert!(
16593 backup_roots.is_empty(),
16594 "sidecar preflight failure must not leave a partial backup root: {backup_roots:?}"
16595 );
16596 }
16597
16598 #[test]
16603 fn cleanup_old_backups_keeps_recent() {
16604 let dir = TempDir::new().unwrap();
16605 let db_path = dir.path().join("test.db");
16606
16607 for i in 0..5 {
16609 let backup_name = format!("test.db.backup.{}", 1000 + i);
16610 std::fs::write(dir.path().join(&backup_name), format!("backup {i}")).unwrap();
16611 }
16612
16613 cleanup_old_backups(&db_path, 3).unwrap();
16614
16615 let backups: Vec<_> = std::fs::read_dir(dir.path())
16617 .unwrap()
16618 .filter_map(|e| e.ok())
16619 .filter(|e| e.file_name().to_str().unwrap_or("").contains("backup"))
16620 .collect();
16621
16622 assert_eq!(backups.len(), 3);
16623 }
16624
16625 #[test]
16626 fn cleanup_old_backups_ignores_wal_and_shm_sidecars() {
16627 let dir = TempDir::new().unwrap();
16628 let db_path = dir.path().join("test.db");
16629
16630 for i in 0..3 {
16631 let backup_name = format!("test.db.backup.{}", 1000 + i);
16632 let backup_path = dir.path().join(&backup_name);
16633 std::fs::write(&backup_path, format!("backup {i}")).unwrap();
16634 std::fs::write(format!("{}-wal", backup_path.display()), b"wal").unwrap();
16635 std::fs::write(format!("{}-shm", backup_path.display()), b"shm").unwrap();
16636 std::thread::sleep(std::time::Duration::from_millis(20));
16637 }
16638
16639 cleanup_old_backups(&db_path, 2).unwrap();
16640
16641 let mut roots = Vec::new();
16642 let mut wals = Vec::new();
16643 let mut shms = Vec::new();
16644 for entry in std::fs::read_dir(dir.path())
16645 .unwrap()
16646 .filter_map(|e| e.ok())
16647 {
16648 let name = entry.file_name().to_string_lossy().into_owned();
16649 if name.ends_with("-wal") {
16650 wals.push(name);
16651 } else if name.ends_with("-shm") {
16652 shms.push(name);
16653 } else if name.contains("backup") {
16654 roots.push(name);
16655 }
16656 }
16657
16658 assert_eq!(roots.len(), 2, "should keep two backup roots");
16659 assert_eq!(
16660 wals.len(),
16661 2,
16662 "should keep WAL sidecars only for retained backups"
16663 );
16664 assert_eq!(
16665 shms.len(),
16666 2,
16667 "should keep SHM sidecars only for retained backups"
16668 );
16669 }
16670
16671 #[test]
16672 fn move_database_bundle_moves_database_and_sidecars() {
16673 let dir = TempDir::new().unwrap();
16674 let db_path = dir.path().join("test.db");
16675 let backup_path = dir.path().join("test.db.corrupt");
16676
16677 std::fs::write(&db_path, b"db").unwrap();
16678 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
16679 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
16680
16681 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
16682 assert_eq!(
16683 moved,
16684 DatabaseBundleMoveResult {
16685 database: true,
16686 wal: true,
16687 shm: true
16688 }
16689 );
16690 assert!(moved.moved_any());
16691
16692 assert!(!db_path.exists());
16693 assert!(!database_sidecar_path(&db_path, "-wal").exists());
16694 assert!(!database_sidecar_path(&db_path, "-shm").exists());
16695
16696 assert_eq!(std::fs::read(&backup_path).unwrap(), b"db");
16697 assert_eq!(
16698 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
16699 b"wal"
16700 );
16701 assert_eq!(
16702 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
16703 b"shm"
16704 );
16705 }
16706
16707 #[test]
16708 fn move_database_bundle_preserves_orphan_sidecars_without_main_db() {
16709 let dir = TempDir::new().unwrap();
16710 let db_path = dir.path().join("test.db");
16711 let backup_path = dir.path().join("test.db.corrupt");
16712
16713 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
16714 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
16715
16716 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
16717 assert_eq!(
16718 moved,
16719 DatabaseBundleMoveResult {
16720 database: false,
16721 wal: true,
16722 shm: true
16723 }
16724 );
16725 assert!(moved.moved_any());
16726 assert!(!db_path.exists());
16727 assert!(!database_sidecar_path(&db_path, "-wal").exists());
16728 assert!(!database_sidecar_path(&db_path, "-shm").exists());
16729 assert_eq!(
16730 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
16731 b"wal"
16732 );
16733 assert_eq!(
16734 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
16735 b"shm"
16736 );
16737 }
16738
16739 #[test]
16740 #[cfg(unix)]
16741 fn move_database_bundle_moves_dangling_symlink_database_root() {
16742 use std::os::unix::fs::symlink;
16743
16744 let dir = TempDir::new().unwrap();
16745 let db_path = dir.path().join("test.db");
16746 let backup_path = dir.path().join("test.db.corrupt");
16747 let missing_target = dir.path().join("missing-target.db");
16748
16749 symlink(&missing_target, &db_path).unwrap();
16750
16751 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
16752
16753 assert_eq!(
16754 moved,
16755 DatabaseBundleMoveResult {
16756 database: true,
16757 wal: false,
16758 shm: false
16759 }
16760 );
16761 assert!(std::fs::symlink_metadata(&db_path).is_err());
16762 assert!(
16763 std::fs::symlink_metadata(&backup_path)
16764 .unwrap()
16765 .file_type()
16766 .is_symlink()
16767 );
16768 assert!(!missing_target.exists());
16769 }
16770
16771 #[test]
16772 #[cfg(unix)]
16773 fn move_database_bundle_moves_dangling_symlink_sidecars_without_main_db() {
16774 use std::os::unix::fs::symlink;
16775
16776 let dir = TempDir::new().unwrap();
16777 let db_path = dir.path().join("test.db");
16778 let backup_path = dir.path().join("test.db.corrupt");
16779 let missing_wal_target = dir.path().join("missing-wal");
16780 let missing_shm_target = dir.path().join("missing-shm");
16781 let wal_path = database_sidecar_path(&db_path, "-wal");
16782 let shm_path = database_sidecar_path(&db_path, "-shm");
16783
16784 symlink(&missing_wal_target, &wal_path).unwrap();
16785 symlink(&missing_shm_target, &shm_path).unwrap();
16786
16787 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
16788
16789 assert_eq!(
16790 moved,
16791 DatabaseBundleMoveResult {
16792 database: false,
16793 wal: true,
16794 shm: true
16795 }
16796 );
16797 assert!(std::fs::symlink_metadata(&wal_path).is_err());
16798 assert!(std::fs::symlink_metadata(&shm_path).is_err());
16799 assert!(
16800 std::fs::symlink_metadata(database_sidecar_path(&backup_path, "-wal"))
16801 .unwrap()
16802 .file_type()
16803 .is_symlink()
16804 );
16805 assert!(
16806 std::fs::symlink_metadata(database_sidecar_path(&backup_path, "-shm"))
16807 .unwrap()
16808 .file_type()
16809 .is_symlink()
16810 );
16811 assert!(!missing_wal_target.exists());
16812 assert!(!missing_shm_target.exists());
16813 }
16814
16815 #[test]
16816 fn copy_database_bundle_copies_database_and_sidecars() {
16817 let dir = TempDir::new().unwrap();
16818 let db_path = dir.path().join("test.db");
16819 let copied_path = dir.path().join("copy.db");
16820
16821 std::fs::write(&db_path, b"db").unwrap();
16822 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
16823 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
16824
16825 copy_database_bundle(&db_path, &copied_path).unwrap();
16826
16827 assert_eq!(std::fs::read(&copied_path).unwrap(), b"db");
16828 assert_eq!(
16829 std::fs::read(database_sidecar_path(&copied_path, "-wal")).unwrap(),
16830 b"wal"
16831 );
16832 assert_eq!(
16833 std::fs::read(database_sidecar_path(&copied_path, "-shm")).unwrap(),
16834 b"shm"
16835 );
16836 assert_eq!(std::fs::read(&db_path).unwrap(), b"db");
16837 }
16838
16839 #[test]
16840 fn copy_database_bundle_creates_destination_parent() {
16841 let dir = TempDir::new().unwrap();
16842 let db_path = dir.path().join("test.db");
16843 let copied_path = dir.path().join("nested/copies/copy.db");
16844
16845 std::fs::write(&db_path, b"db").unwrap();
16846 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
16847
16848 copy_database_bundle(&db_path, &copied_path).unwrap();
16849
16850 assert!(copied_path.parent().unwrap().is_dir());
16851 assert_eq!(std::fs::read(&copied_path).unwrap(), b"db");
16852 assert_eq!(
16853 std::fs::read(database_sidecar_path(&copied_path, "-wal")).unwrap(),
16854 b"wal"
16855 );
16856 }
16857
16858 #[test]
16859 #[cfg(unix)]
16860 fn copy_database_bundle_rejects_symlink_source_root() {
16861 use std::os::unix::fs::symlink;
16862
16863 let dir = TempDir::new().unwrap();
16864 let outside_db = dir.path().join("outside.db");
16865 let db_path = dir.path().join("test.db");
16866 let copied_path = dir.path().join("copy.db");
16867
16868 std::fs::write(&outside_db, b"outside").unwrap();
16869 symlink(&outside_db, &db_path).unwrap();
16870
16871 let err = copy_database_bundle(&db_path, &copied_path).unwrap_err();
16872
16873 assert!(
16874 err.to_string().contains("bundle symlink"),
16875 "unexpected error: {err:#}"
16876 );
16877 assert!(!copied_path.exists());
16878 assert_eq!(std::fs::read(&outside_db).unwrap(), b"outside");
16879 }
16880
16881 #[test]
16882 #[cfg(unix)]
16883 fn copy_database_bundle_rejects_symlink_sidecar() {
16884 use std::os::unix::fs::symlink;
16885
16886 let dir = TempDir::new().unwrap();
16887 let db_path = dir.path().join("test.db");
16888 let copied_path = dir.path().join("copy.db");
16889 let outside_wal = dir.path().join("outside.wal");
16890 let wal_path = database_sidecar_path(&db_path, "-wal");
16891
16892 std::fs::write(&db_path, b"db").unwrap();
16893 std::fs::write(&outside_wal, b"outside wal").unwrap();
16894 symlink(&outside_wal, &wal_path).unwrap();
16895
16896 let err = copy_database_bundle(&db_path, &copied_path).unwrap_err();
16897
16898 assert!(
16899 err.to_string().contains("bundle symlink"),
16900 "unexpected error: {err:#}"
16901 );
16902 assert_eq!(std::fs::read(&outside_wal).unwrap(), b"outside wal");
16903 assert!(!copied_path.exists());
16904 assert!(!database_sidecar_path(&copied_path, "-wal").exists());
16905 }
16906
16907 #[test]
16908 fn move_database_bundle_creates_destination_parent_and_moves_sidecars() {
16909 let dir = TempDir::new().unwrap();
16910 let db_path = dir.path().join("test.db");
16911 let backup_path = dir.path().join("nested/backups/test.db.corrupt");
16912
16913 std::fs::write(&db_path, b"db").unwrap();
16914 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
16915 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
16916
16917 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
16918 assert_eq!(
16919 moved,
16920 DatabaseBundleMoveResult {
16921 database: true,
16922 wal: true,
16923 shm: true
16924 }
16925 );
16926 assert!(backup_path.parent().unwrap().is_dir());
16927 assert_eq!(std::fs::read(&backup_path).unwrap(), b"db");
16928 assert_eq!(
16929 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
16930 b"wal"
16931 );
16932 assert_eq!(
16933 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
16934 b"shm"
16935 );
16936 }
16937
16938 #[test]
16939 fn remove_database_files_removes_orphan_sidecars_without_main_db() {
16940 let dir = TempDir::new().unwrap();
16941 let db_path = dir.path().join("test.db");
16942
16943 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
16944 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
16945
16946 remove_database_files(&db_path).unwrap();
16947
16948 assert!(!db_path.exists());
16949 assert!(!database_sidecar_path(&db_path, "-wal").exists());
16950 assert!(!database_sidecar_path(&db_path, "-shm").exists());
16951 }
16952
16953 #[test]
16954 fn cleanup_old_backups_ignores_backup_named_directories() {
16955 let dir = TempDir::new().unwrap();
16956 let db_path = dir.path().join("test.db");
16957
16958 for i in 0..3 {
16959 let backup_name = format!("test.db.backup.{}", 1000 + i);
16960 std::fs::write(dir.path().join(&backup_name), format!("backup {i}")).unwrap();
16961 }
16962 std::fs::create_dir(dir.path().join("test.db.backup.directory")).unwrap();
16963
16964 cleanup_old_backups(&db_path, 2).unwrap();
16965
16966 let mut backup_files = Vec::new();
16967 let mut backup_dirs = Vec::new();
16968 for entry in std::fs::read_dir(dir.path())
16969 .unwrap()
16970 .filter_map(|e| e.ok())
16971 {
16972 let name = entry.file_name().to_string_lossy().into_owned();
16973 if !name.starts_with("test.db.backup.") {
16974 continue;
16975 }
16976 if entry.path().is_dir() {
16977 backup_dirs.push(name);
16978 } else {
16979 backup_files.push(name);
16980 }
16981 }
16982
16983 assert_eq!(
16984 backup_files.len(),
16985 2,
16986 "only real backup files count toward retention"
16987 );
16988 assert_eq!(
16989 backup_dirs.len(),
16990 1,
16991 "backup-named directories should be ignored"
16992 );
16993 }
16994
16995 #[test]
17000 fn open_creates_new_database() {
17001 let dir = TempDir::new().unwrap();
17002 let db_path = dir.path().join("new.db");
17003 assert!(!db_path.exists());
17004
17005 let storage = SqliteStorage::open(&db_path).unwrap();
17006 assert!(db_path.exists());
17007 storage.close().unwrap();
17008 }
17009
17010 #[test]
17011 fn open_readonly_fails_for_nonexistent() {
17012 let dir = TempDir::new().unwrap();
17013 let db_path = dir.path().join("nonexistent.db");
17014 let result = SqliteStorage::open_readonly(&db_path);
17015 assert!(result.is_err());
17016 }
17017
17018 #[test]
17019 fn open_readonly_succeeds_for_existing() {
17020 let dir = TempDir::new().unwrap();
17021 let db_path = dir.path().join("existing.db");
17022
17023 let _storage = SqliteStorage::open(&db_path).unwrap();
17025 drop(_storage);
17026
17027 let storage = SqliteStorage::open_readonly(&db_path).unwrap();
17029 assert!(storage.schema_version().is_ok());
17030 }
17031
17032 #[test]
17033 fn reopen_existing_current_schema_is_idempotent() {
17034 let dir = TempDir::new().unwrap();
17035 let db_path = dir.path().join("existing.db");
17036
17037 {
17039 let storage = SqliteStorage::open(&db_path).unwrap();
17040 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
17041 }
17042
17043 let reopened = SqliteStorage::open(&db_path).unwrap();
17045 assert_eq!(
17046 reopened.schema_version().unwrap(),
17047 CURRENT_SCHEMA_VERSION,
17048 "reopening current schema DB should be idempotent"
17049 );
17050 }
17051
17052 #[test]
17053 fn open_or_rebuild_current_schema_does_not_trigger_rebuild() {
17054 let dir = TempDir::new().unwrap();
17055 let db_path = dir.path().join("existing.db");
17056
17057 {
17059 let storage = SqliteStorage::open(&db_path).unwrap();
17060 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
17061 }
17062
17063 let reopened = SqliteStorage::open_or_rebuild(&db_path)
17065 .expect("current schema DB should open without rebuild");
17066 assert_eq!(reopened.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
17067 }
17068
17069 #[test]
17070 fn open_or_rebuild_does_not_treat_non_database_paths_as_corruption() {
17071 let dir = TempDir::new().unwrap();
17072 let db_path = dir.path().join("db_dir");
17073 std::fs::create_dir(&db_path).unwrap();
17074
17075 let result = SqliteStorage::open_or_rebuild(&db_path);
17076
17077 assert!(
17078 matches!(
17079 result,
17080 Err(MigrationError::Database(_)) | Err(MigrationError::Io(_))
17081 ),
17082 "non-database path should report the underlying open error without rebuild"
17083 );
17084
17085 assert!(
17086 db_path.is_dir(),
17087 "non-database directory must be left in place"
17088 );
17089 }
17090
17091 #[test]
17096 fn schema_version_returns_current() {
17097 let dir = TempDir::new().unwrap();
17098 let db_path = dir.path().join("test.db");
17099 let storage = SqliteStorage::open(&db_path).unwrap();
17100 let version = storage.schema_version().unwrap();
17101 assert!(version >= 5, "Schema version should be at least 5");
17102 }
17103
17104 #[test]
17109 fn migration_v13_creates_analytics_tables() {
17110 let dir = TempDir::new().unwrap();
17111 let db_path = dir.path().join("test.db");
17112 let storage = SqliteStorage::open(&db_path).unwrap();
17113
17114 let version = storage.schema_version().unwrap();
17116 assert_eq!(
17117 version, CURRENT_SCHEMA_VERSION,
17118 "Schema version must match CURRENT_SCHEMA_VERSION after migration"
17119 );
17120
17121 let conn = storage.raw();
17122
17123 fn col_names(conn: &FrankenConnection, table: &str) -> Vec<String> {
17125 conn.query_map_collect(
17126 &format!("PRAGMA table_info({})", table),
17127 fparams![],
17128 |row: &FrankenRow| row.get_typed(1),
17129 )
17130 .unwrap()
17131 }
17132
17133 fn idx_names(conn: &FrankenConnection, table: &str) -> Vec<String> {
17135 conn.query_map_collect(
17136 &format!("PRAGMA index_list({})", table),
17137 fparams![],
17138 |row: &FrankenRow| row.get_typed(1),
17139 )
17140 .unwrap()
17141 }
17142
17143 let mm_cols = col_names(conn, "message_metrics");
17145 for expected in &[
17146 "message_id",
17147 "hour_id",
17148 "day_id",
17149 "content_tokens_est",
17150 "model_name",
17151 "model_family",
17152 "model_tier",
17153 "provider",
17154 "api_input_tokens",
17155 "has_plan",
17156 "agent_slug",
17157 "role",
17158 "api_data_source",
17159 ] {
17160 assert!(
17161 mm_cols.contains(&expected.to_string()),
17162 "message_metrics missing column: {expected}"
17163 );
17164 }
17165
17166 let uh_cols = col_names(conn, "usage_hourly");
17168 for expected in &[
17169 "hour_id",
17170 "plan_message_count",
17171 "plan_content_tokens_est_total",
17172 "plan_api_tokens_total",
17173 "api_coverage_message_count",
17174 "content_tokens_est_user",
17175 "api_thinking_tokens_total",
17176 ] {
17177 assert!(
17178 uh_cols.contains(&expected.to_string()),
17179 "usage_hourly missing column: {expected}"
17180 );
17181 }
17182
17183 let ud_cols = col_names(conn, "usage_daily");
17185 for expected in &[
17186 "day_id",
17187 "plan_content_tokens_est_total",
17188 "plan_api_tokens_total",
17189 "api_thinking_tokens_total",
17190 "content_tokens_est_assistant",
17191 "message_count",
17192 ] {
17193 assert!(
17194 ud_cols.contains(&expected.to_string()),
17195 "usage_daily missing column: {expected}"
17196 );
17197 }
17198
17199 let umd_cols = col_names(conn, "usage_models_daily");
17201 for expected in &[
17202 "day_id",
17203 "model_family",
17204 "model_tier",
17205 "message_count",
17206 "api_tokens_total",
17207 "api_coverage_message_count",
17208 ] {
17209 assert!(
17210 umd_cols.contains(&expected.to_string()),
17211 "usage_models_daily missing column: {expected}"
17212 );
17213 }
17214
17215 let mm_idxs = idx_names(conn, "message_metrics");
17217 assert!(
17218 mm_idxs.iter().any(|n| n.contains("idx_mm_hour")),
17219 "message_metrics must have hour index"
17220 );
17221 assert!(
17222 mm_idxs.iter().any(|n| n.contains("idx_mm_agent_day")),
17223 "message_metrics must have agent+day index"
17224 );
17225 assert!(
17226 mm_idxs
17227 .iter()
17228 .any(|n| n.contains("idx_mm_model_family_day")),
17229 "message_metrics must have model_family+day index"
17230 );
17231
17232 let uh_idxs = idx_names(conn, "usage_hourly");
17234 assert!(
17235 uh_idxs.iter().any(|n| n.contains("idx_uh_agent")),
17236 "usage_hourly must have agent index"
17237 );
17238
17239 let ud_idxs = idx_names(conn, "usage_daily");
17241 assert!(
17242 ud_idxs.iter().any(|n| n.contains("idx_ud_agent")),
17243 "usage_daily must have agent index"
17244 );
17245
17246 let umd_idxs = idx_names(conn, "usage_models_daily");
17248 assert!(
17249 umd_idxs.iter().any(|n| n.contains("idx_umd_model_day")),
17250 "usage_models_daily must have model+day index"
17251 );
17252
17253 let conversation_cols = col_names(conn, "conversations");
17254 assert!(
17255 conversation_cols.contains(&"last_message_idx".to_string())
17256 && conversation_cols.contains(&"last_message_created_at".to_string()),
17257 "fresh schema must include V15 tail columns without ALTER TABLE on conversations"
17258 );
17259 let fts_schema_rows: i64 = conn
17260 .query_row_map(
17261 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
17262 fparams![],
17263 |row: &FrankenRow| row.get_typed(0),
17264 )
17265 .unwrap();
17266 assert_eq!(
17267 fts_schema_rows, 0,
17268 "fresh schema should not create and immediately drop derived fts_messages"
17269 );
17270 let integrity: Vec<String> = conn
17271 .query_map_collect("PRAGMA integrity_check;", fparams![], |row: &FrankenRow| {
17272 row.get_typed(0)
17273 })
17274 .unwrap();
17275 assert_eq!(
17276 integrity,
17277 vec!["ok".to_string()],
17278 "fresh schema must pass SQLite integrity_check"
17279 );
17280 }
17281
17282 #[test]
17283 fn hour_id_round_trip() {
17284 let ts_ms = 1_770_508_800_000_i64;
17286 let hour_id = SqliteStorage::hour_id_from_millis(ts_ms);
17287 let day_id = SqliteStorage::day_id_from_millis(ts_ms);
17288
17289 assert_eq!(hour_id / 24, day_id, "hour_id/24 should equal day_id");
17291
17292 let back = SqliteStorage::millis_from_hour_id(hour_id);
17294 assert!(
17295 back <= ts_ms && ts_ms - back < 3_600_000,
17296 "Round-trip should land within the same hour"
17297 );
17298 }
17299
17300 #[test]
17301 fn day_and_hour_ids_floor_negative_millis() {
17302 let ts_ms = -1_i64;
17305 let expected_secs = -1_i64;
17306 let epoch_2020_secs = 1_577_836_800_i64;
17307
17308 assert_eq!(
17309 SqliteStorage::day_id_from_millis(ts_ms),
17310 (expected_secs - epoch_2020_secs).div_euclid(86_400)
17311 );
17312 assert_eq!(
17313 SqliteStorage::hour_id_from_millis(ts_ms),
17314 (expected_secs - epoch_2020_secs).div_euclid(3_600)
17315 );
17316 }
17317
17318 #[test]
17319 fn migration_v13_from_v10() {
17320 let dir = TempDir::new().unwrap();
17321 let db_path = dir.path().join("test.db");
17322
17323 {
17325 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
17326 conn.execute_batch("PRAGMA journal_mode=WAL;").unwrap();
17327 conn.execute_batch(
17328 "CREATE TABLE IF NOT EXISTS meta (key TEXT PRIMARY KEY, value TEXT);",
17329 )
17330 .unwrap();
17331 conn.execute("INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', '10')")
17332 .unwrap();
17333 conn.execute_batch(MIGRATION_V1).unwrap();
17338 conn.execute_batch(MIGRATION_V2).unwrap();
17339 conn.execute_batch(MIGRATION_V4).unwrap();
17340 conn.execute_batch(MIGRATION_V5).unwrap();
17341 conn.execute_batch(MIGRATION_V6).unwrap();
17342 conn.execute_batch(MIGRATION_V7).unwrap();
17343 conn.execute_batch(MIGRATION_V8).unwrap();
17344 conn.execute_batch(MIGRATION_V9).unwrap();
17345 conn.execute_batch(MIGRATION_V10).unwrap();
17346 conn.execute("UPDATE meta SET value = '10' WHERE key = 'schema_version'")
17347 .unwrap();
17348 }
17349 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
17350
17351 let storage = SqliteStorage::open(&db_path).unwrap();
17353 let version = storage.schema_version().unwrap();
17354 assert_eq!(
17355 version, CURRENT_SCHEMA_VERSION,
17356 "Should have migrated from v10 to the current schema"
17357 );
17358
17359 let count: i64 = storage
17361 .raw()
17362 .query_row_map(
17363 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name IN ('message_metrics', 'usage_hourly', 'usage_daily', 'usage_models_daily')",
17364 &[],
17365 |row: &FrankenRow| row.get_typed::<i64>(0),
17366 )
17367 .unwrap();
17368 assert_eq!(count, 4, "All 4 analytics tables should exist");
17369 }
17370
17371 #[test]
17376 fn analytics_ingest_populates_metrics_and_rollups() {
17377 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
17378 use std::path::PathBuf;
17379
17380 let dir = TempDir::new().unwrap();
17381 let db_path = dir.path().join("test.db");
17382 let storage = SqliteStorage::open(&db_path).unwrap();
17383
17384 let agent = Agent {
17386 id: None,
17387 slug: "claude_code".into(),
17388 name: "Claude Code".into(),
17389 version: Some("1.0".into()),
17390 kind: AgentKind::Cli,
17391 };
17392 let agent_id = storage.ensure_agent(&agent).unwrap();
17393
17394 let ts_ms = 1_770_551_400_000_i64;
17397 let expected_day = SqliteStorage::day_id_from_millis(ts_ms);
17398 let expected_hour = SqliteStorage::hour_id_from_millis(ts_ms);
17399
17400 let usage_json = serde_json::json!({
17402 "message": {
17403 "model": "claude-opus-4-6",
17404 "usage": {
17405 "input_tokens": 100,
17406 "output_tokens": 50,
17407 "cache_read_input_tokens": 200,
17408 "cache_creation_input_tokens": 30,
17409 "service_tier": "standard"
17410 }
17411 }
17412 });
17413
17414 let conv = Conversation {
17415 id: None,
17416 agent_slug: "claude_code".into(),
17417 workspace: None,
17418 external_id: Some("test-conv-1".into()),
17419 title: Some("Test conversation".into()),
17420 source_path: PathBuf::from("/tmp/test.jsonl"),
17421 started_at: Some(ts_ms),
17422 ended_at: Some(ts_ms + 60_000),
17423 approx_tokens: None,
17424 metadata_json: serde_json::Value::Null,
17425 messages: vec![
17426 Message {
17427 id: None,
17428 idx: 0,
17429 role: MessageRole::User,
17430 author: None,
17431 created_at: Some(ts_ms),
17432 content: "Hello, can you help me with a plan?".into(),
17433 extra_json: serde_json::Value::Null,
17434 snippets: vec![],
17435 },
17436 Message {
17437 id: None,
17438 idx: 1,
17439 role: MessageRole::Agent,
17440 author: None,
17441 created_at: Some(ts_ms + 30_000),
17442 content: "## Plan\n\n1. First step\n2. Second step\n3. Third step".into(),
17443 extra_json: usage_json,
17444 snippets: vec![],
17445 },
17446 Message {
17447 id: None,
17448 idx: 2,
17449 role: MessageRole::User,
17450 author: None,
17451 created_at: Some(ts_ms + 60_000),
17452 content: "Great, let's proceed!".into(),
17453 extra_json: serde_json::Value::Null,
17454 snippets: vec![],
17455 },
17456 ],
17457 source_id: "local".into(),
17458 origin_host: None,
17459 };
17460
17461 let outcomes = storage
17462 .insert_conversations_batched(&[(agent_id, None, &conv)])
17463 .unwrap();
17464 assert_eq!(outcomes.len(), 1);
17465 assert_eq!(outcomes[0].inserted_indices.len(), 3);
17466
17467 let conn = storage.raw();
17468
17469 let mm_count: i64 = conn
17471 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
17472 row.get_typed::<i64>(0)
17473 })
17474 .unwrap();
17475 assert_eq!(mm_count, 3, "Should have 3 message_metrics rows");
17476
17477 #[allow(clippy::type_complexity)]
17479 let rows: Vec<(i64, i64, String, i64, i64, String, String, String, String)> = conn
17480 .query_map_collect(
17481 "SELECT hour_id, day_id, role, content_tokens_est, has_plan, api_data_source, model_family, model_tier, provider FROM message_metrics ORDER BY message_id",
17482 fparams![],
17483 |row: &FrankenRow| {
17484 Ok((
17485 row.get_typed(0)?,
17486 row.get_typed(1)?,
17487 row.get_typed(2)?,
17488 row.get_typed(3)?,
17489 row.get_typed(4)?,
17490 row.get_typed(5)?,
17491 row.get_typed(6)?,
17492 row.get_typed(7)?,
17493 row.get_typed(8)?,
17494 ))
17495 },
17496 )
17497 .unwrap();
17498
17499 assert_eq!(rows.len(), 3);
17500 assert_eq!(rows[0].0, expected_hour);
17502 assert_eq!(rows[0].1, expected_day);
17503 assert_eq!(rows[0].2, "user");
17505 assert_eq!(
17507 rows[1].4, 1,
17508 "Assistant message with plan should have has_plan=1"
17509 );
17510 assert_eq!(
17512 rows[1].5, "api",
17513 "Claude Code assistant message should have api data source"
17514 );
17515 assert_eq!(rows[0].5, "estimated");
17517 assert_eq!(rows[2].5, "estimated");
17518 assert_eq!(rows[1].6, "claude");
17519 assert_eq!(rows[1].7, "opus");
17520 assert_eq!(rows[1].8, "anthropic");
17521 assert_eq!(rows[0].6, "unknown");
17522 let user_chars = "Hello, can you help me with a plan?".len() as i64;
17524 assert_eq!(rows[0].3, user_chars / 4);
17525
17526 let (uh_msg, uh_user, uh_asst, uh_plan, uh_plan_content, uh_plan_api, uh_api_cov): (
17528 i64,
17529 i64,
17530 i64,
17531 i64,
17532 i64,
17533 i64,
17534 i64,
17535 ) = conn
17536 .query_row_map(
17537 "SELECT message_count, user_message_count, assistant_message_count, plan_message_count,
17538 plan_content_tokens_est_total, plan_api_tokens_total, api_coverage_message_count
17539 FROM usage_hourly WHERE hour_id = ?",
17540 fparams![expected_hour],
17541 |row: &FrankenRow| {
17542 Ok((
17543 row.get_typed(0)?,
17544 row.get_typed(1)?,
17545 row.get_typed(2)?,
17546 row.get_typed(3)?,
17547 row.get_typed(4)?,
17548 row.get_typed(5)?,
17549 row.get_typed(6)?,
17550 ))
17551 },
17552 )
17553 .unwrap();
17554 assert_eq!(uh_msg, 3, "Hourly rollup should have 3 messages");
17555 assert_eq!(uh_user, 2, "Hourly rollup should have 2 user messages");
17556 assert_eq!(uh_asst, 1, "Hourly rollup should have 1 assistant message");
17557 assert_eq!(uh_plan, 1, "Hourly rollup should have 1 plan message");
17558 assert!(
17559 uh_plan_content > 0,
17560 "Hourly rollup should include plan content tokens"
17561 );
17562 assert!(
17563 uh_plan_api > 0,
17564 "Hourly rollup should include plan API tokens"
17565 );
17566 assert_eq!(
17567 uh_api_cov, 1,
17568 "Hourly rollup should have 1 API-covered message"
17569 );
17570
17571 let (ud_msg, ud_api_cov): (i64, i64) = conn
17573 .query_row_map(
17574 "SELECT message_count, api_coverage_message_count FROM usage_daily WHERE day_id = ?",
17575 fparams![expected_day],
17576 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?)),
17577 )
17578 .unwrap();
17579 assert_eq!(ud_msg, 3, "Daily rollup should match hourly");
17580 assert_eq!(
17581 ud_api_cov, 1,
17582 "Daily api_coverage should be 1 (only assistant msg has real API data)"
17583 );
17584
17585 let api_only_input: i64 = conn
17587 .query_row_map(
17588 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE day_id = ? AND api_data_source = 'api'",
17589 fparams![expected_day],
17590 |row: &FrankenRow| row.get_typed::<i64>(0),
17591 )
17592 .unwrap();
17593 assert_eq!(
17594 api_only_input, 100,
17595 "Only API-sourced input tokens should be 100"
17596 );
17597
17598 let mm_total_content_est: i64 = conn
17600 .query_row_map(
17601 "SELECT SUM(content_tokens_est) FROM message_metrics WHERE day_id = ?",
17602 fparams![expected_day],
17603 |row| row.get_typed::<i64>(0),
17604 )
17605 .unwrap();
17606 let mm_plan_content_est: i64 = conn
17607 .query_row_map(
17608 "SELECT COALESCE(SUM(content_tokens_est), 0) FROM message_metrics WHERE day_id = ? AND has_plan = 1",
17609 fparams![expected_day],
17610 |row: &FrankenRow| row.get_typed::<i64>(0),
17611 )
17612 .unwrap();
17613 let mm_plan_api_total: i64 = conn
17614 .query_row_map(
17615 "SELECT COALESCE(SUM(COALESCE(api_input_tokens, 0) + COALESCE(api_output_tokens, 0) + COALESCE(api_cache_read_tokens, 0) + COALESCE(api_cache_creation_tokens, 0) + COALESCE(api_thinking_tokens, 0)), 0)
17616 FROM message_metrics WHERE day_id = ? AND has_plan = 1 AND api_data_source = 'api'",
17617 fparams![expected_day],
17618 |row: &FrankenRow| row.get_typed::<i64>(0),
17619 )
17620 .unwrap();
17621 let ud_content_est: i64 = conn
17622 .query_row_map(
17623 "SELECT content_tokens_est_total FROM usage_daily WHERE day_id = ?",
17624 fparams![expected_day],
17625 |row| row.get_typed::<i64>(0),
17626 )
17627 .unwrap();
17628 let (ud_plan_content_est, ud_plan_api_total): (i64, i64) = conn
17629 .query_row_map(
17630 "SELECT plan_content_tokens_est_total, plan_api_tokens_total FROM usage_daily WHERE day_id = ?",
17631 fparams![expected_day],
17632 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?)),
17633 )
17634 .unwrap();
17635 assert_eq!(
17636 mm_total_content_est, ud_content_est,
17637 "Daily rollup content_tokens_est_total must equal SUM of message_metrics"
17638 );
17639 assert_eq!(
17640 mm_plan_content_est, ud_plan_content_est,
17641 "Daily rollup plan_content_tokens_est_total must equal planned message_metrics content sum"
17642 );
17643 assert_eq!(
17644 mm_plan_api_total, ud_plan_api_total,
17645 "Daily rollup plan_api_tokens_total must equal planned message_metrics API token sum"
17646 );
17647
17648 let (claude_msg, claude_user, claude_asst, claude_api_total, claude_api_cov): (
17650 i64,
17651 i64,
17652 i64,
17653 i64,
17654 i64,
17655 ) = conn
17656 .query_row_map(
17657 "SELECT message_count, user_message_count, assistant_message_count, api_tokens_total, api_coverage_message_count
17658 FROM usage_models_daily
17659 WHERE day_id = ? AND model_family = 'claude' AND model_tier = 'opus'",
17660 fparams![expected_day],
17661 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?, row.get_typed(3)?, row.get_typed(4)?)),
17662 )
17663 .unwrap();
17664 assert_eq!(claude_msg, 1);
17665 assert_eq!(claude_user, 0);
17666 assert_eq!(claude_asst, 1);
17667 assert_eq!(claude_api_total, 380);
17668 assert_eq!(claude_api_cov, 1);
17669
17670 let unknown_msg: i64 = conn
17671 .query_row_map(
17672 "SELECT message_count FROM usage_models_daily
17673 WHERE day_id = ? AND model_family = 'unknown' AND model_tier = 'unknown'",
17674 fparams![expected_day],
17675 |row| row.get_typed(0),
17676 )
17677 .unwrap();
17678 assert_eq!(
17679 unknown_msg, 2,
17680 "user messages should land in unknown model bucket"
17681 );
17682 }
17683
17684 #[test]
17685 fn has_plan_heuristic_detects_plans() {
17686 assert!(has_plan_heuristic(
17687 "## Plan\n\n1. First step\n2. Second step"
17688 ));
17689 assert!(has_plan_heuristic(
17690 "# Plan\nHere is what we will do:\n1. Step one\n2. Step two"
17691 ));
17692 assert!(has_plan_heuristic(
17693 "Plan:\n- Gather baseline\n- Implement changes\n- Validate with tests"
17694 ));
17695 assert!(has_plan_heuristic(
17696 "Next steps:\n1. Update schema\n2. Rebuild rollups"
17697 ));
17698 assert!(!has_plan_heuristic("Hello world"));
17699 assert!(!has_plan_heuristic("Short"));
17700 assert!(!has_plan_heuristic(
17701 "This is a regular message without plans"
17702 ));
17703 assert!(!has_plan_heuristic(
17704 "```json\n{\"tool\":\"shell\",\"stdout\":\"1. install\\n2. run\"}\n```"
17705 ));
17706 }
17707
17708 #[test]
17709 fn has_plan_for_role_only_counts_assistant_messages() {
17710 let plan_text = "## Plan\n1. First\n2. Second";
17711 assert!(has_plan_for_role("assistant", plan_text));
17712 assert!(has_plan_for_role("agent", plan_text));
17713 assert!(has_plan_for_role("Assistant", plan_text));
17714 assert!(!has_plan_for_role("user", plan_text));
17715 assert!(!has_plan_for_role("tool", plan_text));
17716 }
17717
17718 #[test]
17719 fn api_rollups_require_api_data_source() {
17720 let mut agg = AnalyticsRollupAggregator::new();
17721
17722 let estimated_plan = MessageMetricsEntry {
17723 message_id: 1,
17724 created_at_ms: 0,
17725 hour_id: 1,
17726 day_id: 1,
17727 agent_slug: "codex".into(),
17728 workspace_id: 0,
17729 source_id: "local".into(),
17730 role: "assistant".into(),
17731 content_chars: 120,
17732 content_tokens_est: 30,
17733 model_name: None,
17734 model_family: "unknown".into(),
17735 model_tier: "unknown".into(),
17736 provider: "unknown".into(),
17737 api_input_tokens: Some(100),
17738 api_output_tokens: Some(50),
17739 api_cache_read_tokens: Some(0),
17740 api_cache_creation_tokens: Some(0),
17741 api_thinking_tokens: Some(0),
17742 api_service_tier: None,
17743 api_data_source: "estimated".into(),
17744 tool_call_count: 0,
17745 has_tool_calls: false,
17746 has_plan: true,
17747 };
17748 agg.record(&estimated_plan);
17749
17750 let api_plan = MessageMetricsEntry {
17751 message_id: 2,
17752 created_at_ms: 0,
17753 hour_id: 1,
17754 day_id: 1,
17755 agent_slug: "codex".into(),
17756 workspace_id: 0,
17757 source_id: "local".into(),
17758 role: "assistant".into(),
17759 content_chars: 80,
17760 content_tokens_est: 20,
17761 model_name: None,
17762 model_family: "unknown".into(),
17763 model_tier: "unknown".into(),
17764 provider: "unknown".into(),
17765 api_input_tokens: Some(40),
17766 api_output_tokens: Some(10),
17767 api_cache_read_tokens: Some(0),
17768 api_cache_creation_tokens: Some(0),
17769 api_thinking_tokens: Some(0),
17770 api_service_tier: None,
17771 api_data_source: "api".into(),
17772 tool_call_count: 0,
17773 has_tool_calls: false,
17774 has_plan: true,
17775 };
17776 agg.record(&api_plan);
17777
17778 let key = (1_i64, "codex".to_string(), 0_i64, "local".to_string());
17779 let hourly = agg.hourly.get(&key).expect("hourly rollup key must exist");
17780 let daily = agg.daily.get(&key).expect("daily rollup key must exist");
17781 let model_key = (
17782 1_i64,
17783 "codex".to_string(),
17784 0_i64,
17785 "local".to_string(),
17786 "unknown".to_string(),
17787 "unknown".to_string(),
17788 );
17789 let models_daily = agg
17790 .models_daily
17791 .get(&model_key)
17792 .expect("model rollup key must exist");
17793
17794 assert_eq!(hourly.plan_message_count, 2);
17796 assert_eq!(hourly.plan_content_tokens_est_total, 50);
17797 assert_eq!(hourly.plan_api_tokens_total, 50);
17799 assert_eq!(daily.plan_api_tokens_total, 50);
17800 assert_eq!(models_daily.plan_api_tokens_total, 50);
17801 assert_eq!(hourly.api_tokens_total, 50);
17803 assert_eq!(hourly.api_input_tokens_total, 40);
17804 assert_eq!(hourly.api_output_tokens_total, 10);
17805 assert_eq!(hourly.api_coverage_message_count, 1);
17806 assert_eq!(daily.api_tokens_total, 50);
17807 assert_eq!(models_daily.api_tokens_total, 50);
17808 }
17809
17810 #[test]
17811 fn has_plan_heuristic_curated_corpus_thresholds() {
17812 let positives = [
17814 "## Plan\n1. Inspect current schema\n2. Add migration\n3. Verify rebuild",
17815 "Plan:\n1) Reproduce\n2) Patch\n3) Add tests",
17816 "Implementation plan:\n- Parse inputs\n- Update rollups\n- Run checks",
17817 "Next steps:\n1. Reserve file\n2. Implement\n3. Report status",
17818 "# Plan\n1. Gather requirements\n2. Ship changes",
17819 "Action plan:\n- Identify root cause\n- Fix it\n- Validate",
17820 ];
17821
17822 let negatives = [
17824 "The plan is to move fast and fix things later.",
17825 "```json\n{\"tool\":\"shell\",\"stdout\":\"1. ls\\n2. cat\"}\n```",
17826 "stdout:\n1. Build started\n2. Build finished\nexit code: 0",
17827 "I can help with that request. Let me know if you want details.",
17828 "Here is a list:\n- apples\n- oranges",
17829 "Status update: completed tasks and blockers below.",
17830 ];
17831
17832 let tp = positives
17833 .iter()
17834 .filter(|msg| has_plan_heuristic(msg))
17835 .count();
17836 let fp = negatives
17837 .iter()
17838 .filter(|msg| has_plan_heuristic(msg))
17839 .count();
17840
17841 let recall = tp as f64 / positives.len() as f64;
17842 let false_positive_rate = fp as f64 / negatives.len() as f64;
17843
17844 assert!(
17845 recall >= 0.80,
17846 "plan heuristic recall too low: got {recall:.2}"
17847 );
17848 assert!(
17849 false_positive_rate <= 0.20,
17850 "plan heuristic false-positive rate too high: got {false_positive_rate:.2}"
17851 );
17852 }
17853
17854 #[test]
17855 fn rebuild_analytics_repopulates_from_messages() {
17856 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
17857 use std::path::PathBuf;
17858
17859 let dir = TempDir::new().unwrap();
17860 let db_path = dir.path().join("test.db");
17861 let storage = SqliteStorage::open(&db_path).unwrap();
17862
17863 let agent = Agent {
17865 id: None,
17866 slug: "claude_code".into(),
17867 name: "Claude Code".into(),
17868 version: Some("1.0".into()),
17869 kind: AgentKind::Cli,
17870 };
17871 let agent_id = storage.ensure_agent(&agent).unwrap();
17872
17873 let ts_ms = 1_770_551_400_000_i64;
17875 let expected_day = SqliteStorage::day_id_from_millis(ts_ms);
17876 let expected_hour = SqliteStorage::hour_id_from_millis(ts_ms);
17877
17878 let usage_json = serde_json::json!({
17879 "message": {
17880 "model": "claude-opus-4-6",
17881 "usage": {
17882 "input_tokens": 100,
17883 "output_tokens": 50,
17884 "cache_read_input_tokens": 200,
17885 "cache_creation_input_tokens": 30,
17886 "service_tier": "standard"
17887 }
17888 }
17889 });
17890
17891 let conv = Conversation {
17892 id: None,
17893 agent_slug: "claude_code".into(),
17894 workspace: None,
17895 external_id: Some("test-rebuild-1".into()),
17896 title: Some("Test conversation".into()),
17897 source_path: PathBuf::from("/tmp/test.jsonl"),
17898 started_at: Some(ts_ms),
17899 ended_at: Some(ts_ms + 60_000),
17900 approx_tokens: None,
17901 metadata_json: serde_json::Value::Null,
17902 messages: vec![
17903 Message {
17904 id: None,
17905 idx: 0,
17906 role: MessageRole::User,
17907 author: None,
17908 created_at: Some(ts_ms),
17909 content: "Hello, can you help me with a plan?".into(),
17910 extra_json: serde_json::Value::Null,
17911 snippets: vec![],
17912 },
17913 Message {
17914 id: None,
17915 idx: 1,
17916 role: MessageRole::Agent,
17917 author: None,
17918 created_at: Some(ts_ms + 30_000),
17919 content: "## Plan\n\n1. First step\n2. Second step\n3. Third step".into(),
17920 extra_json: usage_json,
17921 snippets: vec![],
17922 },
17923 Message {
17924 id: None,
17925 idx: 2,
17926 role: MessageRole::User,
17927 author: None,
17928 created_at: Some(ts_ms + 60_000),
17929 content: "Great, let's proceed!".into(),
17930 extra_json: serde_json::Value::Null,
17931 snippets: vec![],
17932 },
17933 ],
17934 source_id: "local".into(),
17935 origin_host: None,
17936 };
17937
17938 storage
17939 .insert_conversations_batched(&[(agent_id, None, &conv)])
17940 .unwrap();
17941
17942 let conn = storage.raw();
17944 let orig_mm: i64 = conn
17945 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
17946 row.get_typed(0)
17947 })
17948 .unwrap();
17949 let orig_hourly: i64 = conn
17950 .query_row_map("SELECT COUNT(*) FROM usage_hourly", &[], |row| {
17951 row.get_typed(0)
17952 })
17953 .unwrap();
17954 let orig_daily: i64 = conn
17955 .query_row_map("SELECT COUNT(*) FROM usage_daily", &[], |row| {
17956 row.get_typed(0)
17957 })
17958 .unwrap();
17959 let orig_models_daily: i64 = conn
17960 .query_row_map("SELECT COUNT(*) FROM usage_models_daily", &[], |row| {
17961 row.get_typed(0)
17962 })
17963 .unwrap();
17964 let orig_api_input: i64 = conn
17965 .query_row_map(
17966 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE api_data_source = 'api'",
17967 &[],
17968 |row: &FrankenRow| row.get_typed(0),
17969 )
17970 .unwrap();
17971
17972 assert_eq!(orig_mm, 3);
17973 assert!(orig_hourly > 0);
17974 assert!(orig_daily > 0);
17975 assert!(orig_models_daily > 0);
17976
17977 conn.execute("DELETE FROM message_metrics").unwrap();
17979 conn.execute("DELETE FROM usage_hourly").unwrap();
17980 conn.execute("DELETE FROM usage_daily").unwrap();
17981 conn.execute("DELETE FROM usage_models_daily").unwrap();
17982
17983 let zero: i64 = conn
17985 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
17986 row.get_typed(0)
17987 })
17988 .unwrap();
17989 assert_eq!(zero, 0);
17990
17991 let result = storage.rebuild_analytics().unwrap();
17993
17994 assert_eq!(result.message_metrics_rows, 3);
17995 assert!(result.usage_hourly_rows > 0);
17996 assert!(result.usage_daily_rows > 0);
17997 assert!(result.usage_models_daily_rows > 0);
17998 assert!(
17999 result.elapsed_ms < 10_000,
18000 "Rebuild should be fast for 3 msgs"
18001 );
18002
18003 let conn = storage.raw();
18005 let rebuilt_mm: i64 = conn
18006 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
18007 row.get_typed(0)
18008 })
18009 .unwrap();
18010 assert_eq!(
18011 rebuilt_mm, orig_mm,
18012 "Rebuilt message_metrics count should match"
18013 );
18014
18015 let rebuilt_hourly: i64 = conn
18016 .query_row_map("SELECT COUNT(*) FROM usage_hourly", &[], |row| {
18017 row.get_typed(0)
18018 })
18019 .unwrap();
18020 assert_eq!(
18021 rebuilt_hourly, orig_hourly,
18022 "Rebuilt hourly rows should match"
18023 );
18024
18025 let rebuilt_daily: i64 = conn
18026 .query_row_map("SELECT COUNT(*) FROM usage_daily", &[], |row| {
18027 row.get_typed(0)
18028 })
18029 .unwrap();
18030 assert_eq!(rebuilt_daily, orig_daily, "Rebuilt daily rows should match");
18031
18032 let rebuilt_models_daily: i64 = conn
18033 .query_row_map("SELECT COUNT(*) FROM usage_models_daily", &[], |row| {
18034 row.get_typed(0)
18035 })
18036 .unwrap();
18037 assert_eq!(
18038 rebuilt_models_daily, orig_models_daily,
18039 "Rebuilt model rollup rows should match"
18040 );
18041
18042 let rebuilt_api_input: i64 = conn
18044 .query_row_map(
18045 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE api_data_source = 'api'",
18046 &[],
18047 |row: &FrankenRow| row.get_typed(0),
18048 )
18049 .unwrap();
18050 assert_eq!(
18051 rebuilt_api_input, orig_api_input,
18052 "Rebuilt API input tokens should match original"
18053 );
18054
18055 let (uh_msg, uh_user, uh_asst, uh_plan, uh_plan_content, uh_plan_api): (
18057 i64,
18058 i64,
18059 i64,
18060 i64,
18061 i64,
18062 i64,
18063 ) = conn
18064 .query_row_map(
18065 "SELECT message_count, user_message_count, assistant_message_count, plan_message_count,
18066 plan_content_tokens_est_total, plan_api_tokens_total
18067 FROM usage_hourly WHERE hour_id = ?",
18068 fparams![expected_hour],
18069 |row: &FrankenRow| {
18070 Ok((
18071 row.get_typed(0)?,
18072 row.get_typed(1)?,
18073 row.get_typed(2)?,
18074 row.get_typed(3)?,
18075 row.get_typed(4)?,
18076 row.get_typed(5)?,
18077 ))
18078 },
18079 )
18080 .unwrap();
18081 assert_eq!(uh_msg, 3);
18082 assert_eq!(uh_user, 2);
18083 assert_eq!(uh_asst, 1);
18084 assert_eq!(uh_plan, 1);
18085 assert!(uh_plan_content > 0);
18086 assert!(uh_plan_api > 0);
18087
18088 let ud_msg: i64 = conn
18089 .query_row_map(
18090 "SELECT message_count FROM usage_daily WHERE day_id = ?",
18091 fparams![expected_day],
18092 |row| row.get_typed(0),
18093 )
18094 .unwrap();
18095 assert_eq!(ud_msg, 3);
18096 }
18097
18098 #[test]
18099 fn insert_conversations_batched_flushes_large_fts_batches() {
18100 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18101 use std::path::PathBuf;
18102
18103 let dir = TempDir::new().unwrap();
18104 let db_path = dir.path().join("test.db");
18105 let storage = SqliteStorage::open(&db_path).unwrap();
18106 storage
18111 .ensure_search_fallback_fts_consistency()
18112 .expect("ensure FTS consistency before insert");
18113
18114 let agent = Agent {
18115 id: None,
18116 slug: "codex".into(),
18117 name: "Codex".into(),
18118 version: Some("0.2.3".into()),
18119 kind: AgentKind::Cli,
18120 };
18121 let agent_id = storage.ensure_agent(&agent).unwrap();
18122
18123 let content = "y".repeat((FTS_ENTRY_BATCH_MAX_CHARS / 2) + 1);
18124 let messages: Vec<_> = (0_i64..2)
18125 .map(|i| Message {
18126 id: None,
18127 idx: i,
18128 role: MessageRole::Agent,
18129 author: None,
18130 created_at: Some(1_700_000_000_000 + i),
18131 content: format!("{i}-{content}"),
18132 extra_json: serde_json::Value::Null,
18133 snippets: Vec::new(),
18134 })
18135 .collect();
18136 let conv = Conversation {
18137 id: None,
18138 agent_slug: "codex".into(),
18139 workspace: Some(PathBuf::from("/tmp/workspace")),
18140 external_id: Some("fts-large-batch".into()),
18141 title: Some("FTS Large Batch".into()),
18142 source_path: PathBuf::from("/tmp/rollout.jsonl"),
18143 started_at: Some(1_700_000_000_000),
18144 ended_at: Some(1_700_000_000_999),
18145 approx_tokens: None,
18146 metadata_json: serde_json::Value::Null,
18147 messages,
18148 source_id: "local".into(),
18149 origin_host: None,
18150 };
18151
18152 let outcomes = storage
18153 .insert_conversations_batched(&[(agent_id, None, &conv)])
18154 .unwrap();
18155 assert_eq!(outcomes.len(), 1);
18156 assert_eq!(outcomes[0].inserted_indices.len(), conv.messages.len());
18157
18158 let message_count: i64 = storage
18159 .conn
18160 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
18161 row.get_typed(0)
18162 })
18163 .unwrap();
18164 let fts_count: i64 = storage
18165 .conn
18166 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
18167 row.get_typed(0)
18168 })
18169 .unwrap();
18170
18171 assert_eq!(message_count, conv.messages.len() as i64);
18172 assert_eq!(fts_count, conv.messages.len() as i64);
18173 }
18174
18175 fn make_profiled_storage_remote_conversation(
18176 external_id: i64,
18177 msg_count: usize,
18178 ) -> Conversation {
18179 Conversation {
18180 id: None,
18181 agent_slug: "codex".into(),
18182 workspace: Some(PathBuf::from("/ws/profiled-storage-remote")),
18183 external_id: Some(format!("profiled-storage-remote-{external_id}")),
18184 title: Some(format!(
18185 "Profiled storage remote conversation {external_id}"
18186 )),
18187 source_path: PathBuf::from(format!("/log/profiled-storage-remote-{external_id}.jsonl")),
18188 started_at: Some(10_000 + external_id * 100),
18189 ended_at: Some(10_000 + external_id * 100 + msg_count as i64),
18190 approx_tokens: Some(msg_count as i64 * 32),
18191 metadata_json: serde_json::json!({ "bench": true }),
18192 messages: (0..msg_count)
18193 .map(|idx| Message {
18194 id: None,
18195 idx: idx as i64,
18196 role: if idx % 2 == 0 {
18197 MessageRole::User
18198 } else {
18199 MessageRole::Agent
18200 },
18201 author: Some("tester".into()),
18202 created_at: Some(20_000 + external_id * 100 + idx as i64),
18203 content: format!(
18204 "profiled storage remote content ext={external_id} idx={idx} {}",
18205 "x".repeat(64)
18206 ),
18207 extra_json: serde_json::json!({ "idx": idx }),
18208 snippets: Vec::new(),
18209 })
18210 .collect(),
18211 source_id: "profiled-storage-remote-source".into(),
18212 origin_host: Some("builder-profile".into()),
18213 }
18214 }
18215
18216 fn make_profiled_append_remote_merge_conversation(
18217 external_id: i64,
18218 msg_count: usize,
18219 ) -> Conversation {
18220 let base_ts = 100_000 + external_id * 1_000;
18221 Conversation {
18222 id: None,
18223 agent_slug: "codex".into(),
18224 workspace: Some(PathBuf::from("/ws/profiled-append-remote")),
18225 external_id: Some(format!("profiled-append-remote-{external_id}")),
18226 title: Some(format!("Profiled append remote conversation {external_id}")),
18227 source_path: PathBuf::from(format!("/log/profiled-append-remote-{external_id}.jsonl")),
18228 started_at: Some(base_ts),
18229 ended_at: Some(base_ts + msg_count as i64),
18230 approx_tokens: Some(msg_count as i64 * 50),
18231 metadata_json: serde_json::json!({ "bench": true }),
18232 messages: (0..msg_count)
18233 .map(|idx| Message {
18234 id: None,
18235 idx: idx as i64,
18236 role: if idx % 2 == 0 {
18237 MessageRole::User
18238 } else {
18239 MessageRole::Agent
18240 },
18241 author: Some(format!("model-{}", external_id % 5)),
18242 created_at: Some(base_ts + idx as i64),
18243 content: format!(
18244 "Profiled append remote conversation {} message {}: Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
18245 external_id, idx
18246 ),
18247 extra_json: serde_json::json!({ "bench": true }),
18248 snippets: Vec::new(),
18249 })
18250 .collect(),
18251 source_id: "profiled-append-remote-source".into(),
18252 origin_host: Some("builder-profile".into()),
18253 }
18254 }
18255
18256 #[test]
18257 fn insert_conversation_tree_batched_new_message_ids_match_snippet_rows() {
18258 let dir = TempDir::new().unwrap();
18259 let db_path = dir.path().join("batched-message-ids.db");
18260 let storage = SqliteStorage::open(&db_path).unwrap();
18261 let agent_id = storage
18262 .ensure_agent(&Agent {
18263 id: None,
18264 slug: "codex".into(),
18265 name: "Codex".into(),
18266 version: None,
18267 kind: AgentKind::Cli,
18268 })
18269 .unwrap();
18270 let workspace_id = storage
18271 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
18272 .unwrap();
18273 let mut conv = make_profiled_storage_remote_conversation(42, 5);
18274 for (idx, msg) in conv.messages.iter_mut().enumerate() {
18275 msg.snippets.push(Snippet {
18276 id: None,
18277 file_path: Some(PathBuf::from(format!("src/file_{idx}.rs"))),
18278 start_line: Some((idx + 1) as i64),
18279 end_line: Some((idx + 2) as i64),
18280 language: Some("rust".into()),
18281 snippet_text: Some(format!("fn snippet_{idx}() {{}}")),
18282 });
18283 }
18284 let outcome = storage
18285 .insert_conversation_tree(agent_id, Some(workspace_id), &conv)
18286 .unwrap();
18287
18288 let message_count: i64 = storage
18289 .conn
18290 .query_row_map(
18291 "SELECT COUNT(*) FROM messages WHERE conversation_id = ?1",
18292 fparams![outcome.conversation_id],
18293 |row| row.get_typed(0),
18294 )
18295 .unwrap();
18296 let joined_snippet_count: i64 = storage
18297 .conn
18298 .query_row_map(
18299 "SELECT COUNT(*)
18300 FROM snippets s
18301 JOIN messages m ON s.message_id = m.id
18302 WHERE m.conversation_id = ?1",
18303 fparams![outcome.conversation_id],
18304 |row| row.get_typed(0),
18305 )
18306 .unwrap();
18307
18308 assert_eq!(message_count, conv.messages.len() as i64);
18309 assert_eq!(joined_snippet_count, conv.messages.len() as i64);
18310 }
18311
18312 #[test]
18313 fn insert_conversation_tree_batched_appended_message_ids_match_snippet_rows() {
18314 let dir = TempDir::new().unwrap();
18315 let db_path = dir.path().join("batched-append-message-ids.db");
18316 let storage = SqliteStorage::open(&db_path).unwrap();
18317 let agent_id = storage
18318 .ensure_agent(&Agent {
18319 id: None,
18320 slug: "codex".into(),
18321 name: "Codex".into(),
18322 version: None,
18323 kind: AgentKind::Cli,
18324 })
18325 .unwrap();
18326 let workspace_id = storage
18327 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
18328 .unwrap();
18329
18330 let mut initial = make_profiled_storage_remote_conversation(77, 2);
18331 for (idx, msg) in initial.messages.iter_mut().enumerate() {
18332 msg.snippets.push(Snippet {
18333 id: None,
18334 file_path: Some(PathBuf::from(format!("src/append_initial_{idx}.rs"))),
18335 start_line: Some((idx + 1) as i64),
18336 end_line: Some((idx + 2) as i64),
18337 language: Some("rust".into()),
18338 snippet_text: Some(format!("fn append_initial_{idx}() {{}}")),
18339 });
18340 }
18341 let first = storage
18342 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
18343 .unwrap();
18344 assert_eq!(first.inserted_indices, vec![0, 1]);
18345
18346 let mut appended = make_profiled_storage_remote_conversation(77, 5);
18347 for (idx, msg) in appended.messages.iter_mut().enumerate() {
18348 msg.snippets.push(Snippet {
18349 id: None,
18350 file_path: Some(PathBuf::from(format!("src/append_full_{idx}.rs"))),
18351 start_line: Some((idx + 10) as i64),
18352 end_line: Some((idx + 11) as i64),
18353 language: Some("rust".into()),
18354 snippet_text: Some(format!("fn append_full_{idx}() {{}}")),
18355 });
18356 }
18357 let second = storage
18358 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
18359 .unwrap();
18360 assert_eq!(second.conversation_id, first.conversation_id);
18361 assert_eq!(second.inserted_indices, vec![2, 3, 4]);
18362
18363 let message_count: i64 = storage
18364 .conn
18365 .query_row_map(
18366 "SELECT COUNT(*) FROM messages WHERE conversation_id = ?1",
18367 fparams![first.conversation_id],
18368 |row| row.get_typed(0),
18369 )
18370 .unwrap();
18371 let joined_snippets: Vec<(i64, String)> = storage
18372 .conn
18373 .query_map_collect(
18374 "SELECT m.idx, s.file_path
18375 FROM snippets s
18376 JOIN messages m ON s.message_id = m.id
18377 WHERE m.conversation_id = ?1
18378 ORDER BY m.idx, s.id",
18379 fparams![first.conversation_id],
18380 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
18381 )
18382 .unwrap();
18383
18384 assert_eq!(message_count, 5);
18385 assert_eq!(
18386 joined_snippets,
18387 vec![
18388 (0, "src/append_initial_0.rs".to_string()),
18389 (1, "src/append_initial_1.rs".to_string()),
18390 (2, "src/append_full_2.rs".to_string()),
18391 (3, "src/append_full_3.rs".to_string()),
18392 (4, "src/append_full_4.rs".to_string()),
18393 ]
18394 );
18395 }
18396
18397 #[test]
18398 fn insert_conversation_tree_rehydrates_external_lookup_after_manual_clear() {
18399 let dir = TempDir::new().unwrap();
18400 let db_path = dir.path().join("external-lookup-rehydrate.db");
18401 let storage = SqliteStorage::open(&db_path).unwrap();
18402 let agent_id = storage
18403 .ensure_agent(&Agent {
18404 id: None,
18405 slug: "codex".into(),
18406 name: "Codex".into(),
18407 version: None,
18408 kind: AgentKind::Cli,
18409 })
18410 .unwrap();
18411 let workspace_id = storage
18412 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
18413 .unwrap();
18414
18415 let initial = make_profiled_storage_remote_conversation(88, 2);
18416 let first = storage
18417 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
18418 .unwrap();
18419 let external_id = initial.external_id.as_deref().unwrap();
18420 let lookup_key =
18421 conversation_external_lookup_key(&initial.source_id, agent_id, external_id);
18422 let lookup_id: i64 = storage
18423 .conn
18424 .query_row_map(
18425 "SELECT conversation_id
18426 FROM conversation_external_tail_lookup
18427 WHERE lookup_key = ?1",
18428 fparams![lookup_key.as_str()],
18429 |row| row.get_typed(0),
18430 )
18431 .unwrap();
18432 assert_eq!(lookup_id, first.conversation_id);
18433
18434 storage
18435 .conn
18436 .execute_compat(
18437 "DELETE FROM conversation_external_tail_lookup WHERE lookup_key = ?1",
18438 fparams![lookup_key.as_str()],
18439 )
18440 .unwrap();
18441
18442 let appended = make_profiled_storage_remote_conversation(88, 4);
18443 let second = storage
18444 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
18445 .unwrap();
18446 assert_eq!(second.conversation_id, first.conversation_id);
18447 assert_eq!(second.inserted_indices, vec![2, 3]);
18448
18449 let conversation_count: i64 = storage
18450 .conn
18451 .query_row_map(
18452 "SELECT COUNT(*)
18453 FROM conversations
18454 WHERE source_id = ?1 AND agent_id = ?2 AND external_id = ?3",
18455 fparams![initial.source_id.as_str(), agent_id, external_id],
18456 |row| row.get_typed(0),
18457 )
18458 .unwrap();
18459 let restored_lookup: (i64, Option<i64>, Option<i64>, Option<i64>) = storage
18460 .conn
18461 .query_row_map(
18462 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
18463 FROM conversation_external_tail_lookup
18464 WHERE lookup_key = ?1",
18465 fparams![lookup_key.as_str()],
18466 |row| {
18467 Ok((
18468 row.get_typed(0)?,
18469 row.get_typed(1)?,
18470 row.get_typed(2)?,
18471 row.get_typed(3)?,
18472 ))
18473 },
18474 )
18475 .unwrap();
18476 let tail_state: (Option<i64>, Option<i64>, Option<i64>) = storage
18477 .conn
18478 .query_row_map(
18479 "SELECT ended_at, last_message_idx, last_message_created_at
18480 FROM conversation_tail_state
18481 WHERE conversation_id = ?1",
18482 fparams![first.conversation_id],
18483 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
18484 )
18485 .unwrap();
18486 assert_eq!(conversation_count, 1);
18487 assert_eq!(
18488 restored_lookup,
18489 (
18490 first.conversation_id,
18491 tail_state.0,
18492 tail_state.1,
18493 tail_state.2
18494 )
18495 );
18496 assert_eq!(
18497 tail_state,
18498 (
18499 appended.messages[3].created_at,
18500 Some(3),
18501 appended.messages[3].created_at
18502 )
18503 );
18504 }
18505
18506 #[test]
18507 fn insert_conversation_tree_recreates_daily_stats_after_manual_clear() {
18508 let dir = TempDir::new().unwrap();
18509 let db_path = dir.path().join("test.db");
18510 let storage = SqliteStorage::open(&db_path).unwrap();
18511 let agent_id = storage
18512 .ensure_agent(&Agent {
18513 id: None,
18514 slug: "codex".into(),
18515 name: "Codex".into(),
18516 version: None,
18517 kind: AgentKind::Cli,
18518 })
18519 .unwrap();
18520 let workspace = PathBuf::from("/ws/profiled-storage-remote");
18521 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
18522
18523 storage
18524 .insert_conversation_tree(
18525 agent_id,
18526 Some(workspace_id),
18527 &make_profiled_storage_remote_conversation(0, 3),
18528 )
18529 .unwrap();
18530 storage.conn.execute("DELETE FROM daily_stats").unwrap();
18531
18532 storage
18533 .insert_conversation_tree(
18534 agent_id,
18535 Some(workspace_id),
18536 &make_profiled_storage_remote_conversation(1, 2),
18537 )
18538 .unwrap();
18539
18540 let row_count: i64 = storage
18541 .conn
18542 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
18543 row.get_typed(0)
18544 })
18545 .unwrap();
18546 let (session_count, message_count): (i64, i64) = storage
18547 .conn
18548 .query_row_map(
18549 "SELECT session_count, message_count
18550 FROM daily_stats
18551 WHERE agent_slug = 'all' AND source_id = 'all'",
18552 fparams![],
18553 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
18554 )
18555 .unwrap();
18556
18557 assert_eq!(row_count, 4);
18558 assert_eq!(session_count, 1);
18559 assert_eq!(message_count, 2);
18560 }
18561
18562 #[test]
18563 #[serial]
18564 fn insert_conversation_tree_stage_profile_tracks_steady_state_remote_reuse() {
18565 let _defer_guard = set_env_var("CASS_DEFER_LEXICAL_UPDATES", "0");
18566
18567 for &(msg_count, iterations) in &[(5usize, 80usize), (20, 50), (50, 24)] {
18568 let dir = TempDir::new().unwrap();
18569 let db_path = dir.path().join(format!("profile-{msg_count}.db"));
18570 let storage = SqliteStorage::open(&db_path).unwrap();
18571 let agent_id = storage
18572 .ensure_agent(&Agent {
18573 id: None,
18574 slug: "codex".into(),
18575 name: "Codex".into(),
18576 version: None,
18577 kind: AgentKind::Cli,
18578 })
18579 .unwrap();
18580 let workspace = PathBuf::from("/ws/profiled-storage-remote");
18581 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
18582
18583 storage
18584 .insert_conversation_tree(
18585 agent_id,
18586 Some(workspace_id),
18587 &make_profiled_storage_remote_conversation(0, msg_count),
18588 )
18589 .unwrap();
18590
18591 let mut profile = InsertConversationTreePerfProfile::default();
18592 for external_id in 1..=iterations {
18593 storage
18594 .insert_conversation_tree_with_profile(
18595 agent_id,
18596 Some(workspace_id),
18597 &make_profiled_storage_remote_conversation(external_id as i64, msg_count),
18598 &mut profile,
18599 )
18600 .unwrap();
18601 }
18602
18603 let accounted_duration = profile.source_duration
18604 + profile.tx_open_duration
18605 + profile.existing_lookup_duration
18606 + profile.conversation_row_duration
18607 + profile.message_insert_duration
18608 + profile.snippet_insert_duration
18609 + profile.fts_entry_duration
18610 + profile.fts_flush_duration
18611 + profile.analytics_duration
18612 + profile.commit_duration;
18613 assert_eq!(profile.invocations, iterations);
18614 assert_eq!(profile.messages, iterations * msg_count);
18615 assert_eq!(profile.inserted_messages, iterations * msg_count);
18616 assert!(
18617 profile.total_duration >= accounted_duration,
18618 "accounted stage durations cannot exceed total duration"
18619 );
18620
18621 profile.log_summary(&format!("remote_reuse_{msg_count}_msgs"));
18622 }
18623 }
18624
18625 #[test]
18626 #[serial]
18627 fn insert_conversation_tree_stage_profile_tracks_append_remote_source_merge() {
18628 let _defer_guard = set_env_var("CASS_DEFER_LEXICAL_UPDATES", "0");
18629
18630 for &(msg_count, iterations) in &[(5usize, 80usize), (20, 50), (50, 24)] {
18631 let dir = TempDir::new().unwrap();
18632 let db_path = dir.path().join(format!("append-profile-{msg_count}.db"));
18633 let storage = SqliteStorage::open(&db_path).unwrap();
18634 let agent_id = storage
18635 .ensure_agent(&Agent {
18636 id: None,
18637 slug: "codex".into(),
18638 name: "Codex".into(),
18639 version: None,
18640 kind: AgentKind::Cli,
18641 })
18642 .unwrap();
18643 let workspace = PathBuf::from("/ws/profiled-append-remote");
18644 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
18645
18646 for external_id in 0..iterations {
18647 storage
18648 .insert_conversation_tree(
18649 agent_id,
18650 Some(workspace_id),
18651 &make_profiled_append_remote_merge_conversation(
18652 external_id as i64,
18653 msg_count,
18654 ),
18655 )
18656 .unwrap();
18657 }
18658
18659 let mut profile = InsertConversationTreePerfProfile::default();
18660 for external_id in 0..iterations {
18661 storage
18662 .append_existing_conversation_with_profile(
18663 agent_id,
18664 Some(workspace_id),
18665 &make_profiled_append_remote_merge_conversation(
18666 external_id as i64,
18667 msg_count * 2,
18668 ),
18669 &mut profile,
18670 )
18671 .unwrap();
18672 }
18673
18674 let accounted_duration = profile.source_duration
18675 + profile.tx_open_duration
18676 + profile.existing_lookup_duration
18677 + profile.existing_idx_lookup_duration
18678 + profile.existing_replay_lookup_duration
18679 + profile.dedupe_filter_duration
18680 + profile.conversation_row_duration
18681 + profile.message_insert_duration
18682 + profile.snippet_insert_duration
18683 + profile.fts_entry_duration
18684 + profile.fts_flush_duration
18685 + profile.analytics_duration
18686 + profile.commit_duration;
18687 assert_eq!(profile.invocations, iterations);
18688 assert_eq!(profile.messages, iterations * msg_count * 2);
18689 assert_eq!(profile.inserted_messages, iterations * msg_count);
18690 assert!(
18691 profile.total_duration >= accounted_duration,
18692 "accounted append stage durations cannot exceed total duration"
18693 );
18694
18695 profile.log_summary(&format!("append_remote_merge_{msg_count}_msgs"));
18696 }
18697 }
18698
18699 #[test]
18700 fn rebuild_daily_stats_recomputes_materialized_totals_without_monolithic_group_by() {
18701 let dir = TempDir::new().unwrap();
18702 let db_path = dir.path().join("test.db");
18703 let storage = SqliteStorage::open(&db_path).unwrap();
18704 let started_at = 1_700_000_000_000_i64;
18705 let day_id = FrankenStorage::day_id_from_millis(started_at);
18706 let hour_id = FrankenStorage::hour_id_from_millis(started_at);
18707
18708 storage
18709 .conn
18710 .execute_compat(
18711 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
18712 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
18713 fparams![1_i64, "codex", "Codex", "cli"],
18714 )
18715 .unwrap();
18716 storage
18717 .conn
18718 .execute_compat(
18719 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
18720 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
18721 fparams![2_i64, "claude", "Claude", "cli"],
18722 )
18723 .unwrap();
18724
18725 storage
18726 .conn
18727 .execute_compat(
18728 "INSERT INTO conversations (
18729 id, agent_id, workspace_id, source_id, external_id, title, source_path,
18730 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
18731 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, ?8, NULL, ?9, NULL, NULL)",
18732 fparams![
18733 1_i64,
18734 1_i64,
18735 LOCAL_SOURCE_ID,
18736 "daily-a",
18737 "Daily A",
18738 "/tmp/daily-a.jsonl",
18739 started_at,
18740 started_at + 200,
18741 "{}"
18742 ],
18743 )
18744 .unwrap();
18745 storage
18746 .conn
18747 .execute_compat(
18748 "INSERT INTO conversations (
18749 id, agent_id, workspace_id, source_id, external_id, title, source_path,
18750 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
18751 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, ?8, NULL, ?9, NULL, NULL)",
18752 fparams![
18753 2_i64,
18754 2_i64,
18755 LOCAL_SOURCE_ID,
18756 "daily-b",
18757 "Daily B",
18758 "/tmp/daily-b.jsonl",
18759 started_at,
18760 started_at + 300,
18761 "{}"
18762 ],
18763 )
18764 .unwrap();
18765
18766 storage
18767 .conn
18768 .execute_compat(
18769 "INSERT INTO messages (
18770 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
18771 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
18772 fparams![1_i64, 1_i64, 0_i64, "user", started_at, "hello"],
18773 )
18774 .unwrap();
18775 storage
18776 .conn
18777 .execute_compat(
18778 "INSERT INTO messages (
18779 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
18780 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
18781 fparams![2_i64, 1_i64, 1_i64, "assistant", started_at + 100, "response"],
18782 )
18783 .unwrap();
18784 storage
18785 .conn
18786 .execute_compat(
18787 "INSERT INTO messages (
18788 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
18789 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
18790 fparams![3_i64, 2_i64, 0_i64, "user", started_at + 50, "abc"],
18791 )
18792 .unwrap();
18793
18794 for (message_id, agent_slug, role, content_len) in [
18795 (1_i64, "codex", "user", 5_i64),
18796 (2_i64, "codex", "assistant", 8_i64),
18797 (3_i64, "claude", "user", 3_i64),
18798 ] {
18799 storage
18800 .conn
18801 .execute_compat(
18802 "INSERT INTO message_metrics (
18803 message_id, created_at_ms, hour_id, day_id, agent_slug, workspace_id, source_id,
18804 role, content_chars, content_tokens_est, api_input_tokens, api_output_tokens,
18805 api_cache_read_tokens, api_cache_creation_tokens, api_thinking_tokens,
18806 api_service_tier, api_data_source, tool_call_count, has_tool_calls, has_plan,
18807 model_name, model_family, model_tier, provider
18808 ) VALUES (
18809 ?1, ?2, ?3, ?4, ?5, ?6, ?7,
18810 ?8, ?9, ?10, ?11, ?12,
18811 ?13, ?14, ?15,
18812 ?16, ?17, ?18, ?19, ?20,
18813 ?21, ?22, ?23, ?24
18814 )",
18815 fparams![
18816 message_id,
18817 started_at,
18818 hour_id,
18819 day_id,
18820 agent_slug,
18821 0_i64,
18822 LOCAL_SOURCE_ID,
18823 role,
18824 content_len,
18825 content_len / 4,
18826 0_i64,
18827 0_i64,
18828 0_i64,
18829 0_i64,
18830 0_i64,
18831 "",
18832 "estimated",
18833 0_i64,
18834 0_i64,
18835 0_i64,
18836 "",
18837 "unknown",
18838 "unknown",
18839 "unknown"
18840 ],
18841 )
18842 .unwrap();
18843 }
18844
18845 storage.conn.execute("DELETE FROM daily_stats").unwrap();
18846
18847 let rebuilt = storage.rebuild_daily_stats().unwrap();
18848 assert_eq!(rebuilt.total_sessions, 2);
18849
18850 let health = storage.daily_stats_health().unwrap();
18851 assert_eq!(health.conversation_count, 2);
18852 assert_eq!(health.materialized_total, 2);
18853 assert_eq!(health.drift, 0);
18854
18855 let total_messages: i64 = storage
18856 .conn
18857 .query_row_map(
18858 "SELECT message_count FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
18859 fparams![],
18860 |row| row.get_typed(0),
18861 )
18862 .unwrap();
18863 assert_eq!(total_messages, 3);
18864 }
18865
18866 #[test]
18867 fn rebuild_daily_stats_preserves_byte_counts_with_message_metrics() {
18868 let dir = TempDir::new().unwrap();
18869 let db_path = dir.path().join("test.db");
18870 let storage = SqliteStorage::open(&db_path).unwrap();
18871
18872 let content = "ASCII🙂é漢字";
18873 let expected_bytes = content.len() as i64;
18874 let started_at = 1_704_067_200_000_i64;
18875 let day_id = FrankenStorage::day_id_from_millis(started_at);
18876 let hour_id = FrankenStorage::hour_id_from_millis(started_at);
18877
18878 storage
18879 .conn
18880 .execute_compat(
18881 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
18882 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
18883 fparams![1_i64, "tester", "Tester", "cli"],
18884 )
18885 .unwrap();
18886 storage
18887 .conn
18888 .execute_compat(
18889 "INSERT INTO conversations (
18890 id, agent_id, workspace_id, source_id, external_id, title, source_path,
18891 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
18892 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, NULL, NULL, ?8, NULL, NULL)",
18893 fparams![
18894 1_i64,
18895 1_i64,
18896 LOCAL_SOURCE_ID,
18897 "unicode-metrics",
18898 "Unicode Metrics",
18899 "/tmp/unicode-metrics.jsonl",
18900 started_at,
18901 "{}"
18902 ],
18903 )
18904 .unwrap();
18905 storage
18906 .conn
18907 .execute_compat(
18908 "INSERT INTO messages (
18909 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
18910 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
18911 fparams![1_i64, 1_i64, 0_i64, "user", started_at, content],
18912 )
18913 .unwrap();
18914 storage
18915 .conn
18916 .execute_compat(
18917 "INSERT INTO message_metrics (
18918 message_id, created_at_ms, hour_id, day_id, agent_slug, workspace_id, source_id,
18919 role, content_chars, content_tokens_est, api_input_tokens, api_output_tokens,
18920 api_cache_read_tokens, api_cache_creation_tokens, api_thinking_tokens,
18921 api_service_tier, api_data_source, tool_call_count, has_tool_calls, has_plan,
18922 model_name, model_family, model_tier, provider
18923 ) VALUES (
18924 ?1, ?2, ?3, ?4, ?5, ?6, ?7,
18925 ?8, ?9, ?10, ?11, ?12,
18926 ?13, ?14, ?15,
18927 ?16, ?17, ?18, ?19, ?20,
18928 ?21, ?22, ?23, ?24
18929 )",
18930 fparams![
18931 1_i64,
18932 started_at,
18933 hour_id,
18934 day_id,
18935 "tester",
18936 0_i64,
18937 LOCAL_SOURCE_ID,
18938 "user",
18939 expected_bytes,
18940 expected_bytes / 4,
18941 0_i64,
18942 0_i64,
18943 0_i64,
18944 0_i64,
18945 0_i64,
18946 "",
18947 "estimated",
18948 0_i64,
18949 0_i64,
18950 0_i64,
18951 "",
18952 "unknown",
18953 "unknown",
18954 "unknown"
18955 ],
18956 )
18957 .unwrap();
18958
18959 let mut tx = storage.conn.transaction().unwrap();
18960 franken_update_daily_stats_in_tx(
18961 &storage,
18962 &tx,
18963 "tester",
18964 LOCAL_SOURCE_ID,
18965 Some(started_at),
18966 StatsDelta {
18967 session_count_delta: 1,
18968 message_count_delta: 1,
18969 total_chars_delta: expected_bytes,
18970 },
18971 )
18972 .unwrap();
18973 tx.commit().unwrap();
18974
18975 let inline_total: i64 = storage
18976 .conn
18977 .query_row_map(
18978 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
18979 fparams![],
18980 |row| row.get_typed(0),
18981 )
18982 .unwrap();
18983 assert_eq!(inline_total, expected_bytes);
18984
18985 storage.conn.execute("DELETE FROM daily_stats").unwrap();
18986
18987 let rebuilt = storage.rebuild_daily_stats().unwrap();
18988 assert_eq!(rebuilt.total_sessions, 1);
18989
18990 let rebuilt_total: i64 = storage
18991 .conn
18992 .query_row_map(
18993 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
18994 fparams![],
18995 |row| row.get_typed(0),
18996 )
18997 .unwrap();
18998 assert_eq!(rebuilt_total, expected_bytes);
18999 }
19000
19001 #[test]
19002 fn rebuild_daily_stats_raw_fallback_preserves_byte_counts() {
19003 let dir = TempDir::new().unwrap();
19004 let db_path = dir.path().join("test.db");
19005 let storage = SqliteStorage::open(&db_path).unwrap();
19006
19007 let content = "fallback🙂é漢字";
19008 let expected_bytes = content.len() as i64;
19009 let started_at = 1_704_067_200_000_i64;
19010 storage
19011 .conn
19012 .execute_compat(
19013 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
19014 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
19015 fparams![1_i64, "tester", "Tester", "cli"],
19016 )
19017 .unwrap();
19018 storage
19019 .conn
19020 .execute_compat(
19021 "INSERT INTO conversations (
19022 id, agent_id, workspace_id, source_id, external_id, title, source_path,
19023 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
19024 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, NULL, NULL, ?8, NULL, NULL)",
19025 fparams![
19026 1_i64,
19027 1_i64,
19028 LOCAL_SOURCE_ID,
19029 "unicode-fallback",
19030 "Unicode Fallback",
19031 "/tmp/unicode-fallback.jsonl",
19032 started_at,
19033 "{}"
19034 ],
19035 )
19036 .unwrap();
19037 storage
19038 .conn
19039 .execute_compat(
19040 "INSERT INTO messages (
19041 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
19042 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
19043 fparams![1_i64, 1_i64, 0_i64, "assistant", started_at, content],
19044 )
19045 .unwrap();
19046
19047 let mut tx = storage.conn.transaction().unwrap();
19048 franken_update_daily_stats_in_tx(
19049 &storage,
19050 &tx,
19051 "tester",
19052 LOCAL_SOURCE_ID,
19053 Some(started_at),
19054 StatsDelta {
19055 session_count_delta: 1,
19056 message_count_delta: 1,
19057 total_chars_delta: expected_bytes,
19058 },
19059 )
19060 .unwrap();
19061 tx.commit().unwrap();
19062
19063 storage.conn.execute("DELETE FROM daily_stats").unwrap();
19064
19065 let rebuilt = storage.rebuild_daily_stats().unwrap();
19066 assert_eq!(rebuilt.total_sessions, 1);
19067
19068 let rebuilt_total: i64 = storage
19069 .conn
19070 .query_row_map(
19071 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
19072 fparams![],
19073 |row| row.get_typed(0),
19074 )
19075 .unwrap();
19076 assert_eq!(rebuilt_total, expected_bytes);
19077 }
19078
19079 #[test]
19080 fn insert_conversations_batched_appends_duplicate_external_id() {
19081 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19082 use std::path::PathBuf;
19083
19084 let dir = TempDir::new().unwrap();
19085 let db_path = dir.path().join("test.db");
19086 let storage = SqliteStorage::open(&db_path).unwrap();
19087
19088 let agent = Agent {
19089 id: None,
19090 slug: "codex".into(),
19091 name: "Codex".into(),
19092 version: Some("0.2.3".into()),
19093 kind: AgentKind::Cli,
19094 };
19095 let agent_id = storage.ensure_agent(&agent).unwrap();
19096
19097 let base_conv = |messages: Vec<Message>| Conversation {
19098 id: None,
19099 agent_slug: "codex".into(),
19100 workspace: Some(PathBuf::from("/tmp/workspace")),
19101 external_id: Some("shared-session".into()),
19102 title: Some("Shared Session".into()),
19103 source_path: PathBuf::from("/tmp/rollout.jsonl"),
19104 started_at: Some(1_700_000_000_000),
19105 ended_at: Some(1_700_000_000_999),
19106 approx_tokens: None,
19107 metadata_json: serde_json::Value::Null,
19108 messages,
19109 source_id: "local".into(),
19110 origin_host: None,
19111 };
19112
19113 let conv_a = base_conv(vec![
19114 Message {
19115 id: None,
19116 idx: 0,
19117 role: MessageRole::User,
19118 author: None,
19119 created_at: Some(1_700_000_000_000),
19120 content: "first".into(),
19121 extra_json: serde_json::Value::Null,
19122 snippets: Vec::new(),
19123 },
19124 Message {
19125 id: None,
19126 idx: 1,
19127 role: MessageRole::Agent,
19128 author: None,
19129 created_at: Some(1_700_000_000_100),
19130 content: "second".into(),
19131 extra_json: serde_json::Value::Null,
19132 snippets: Vec::new(),
19133 },
19134 ]);
19135 let conv_b = base_conv(vec![
19136 Message {
19137 id: None,
19138 idx: 0,
19139 role: MessageRole::User,
19140 author: None,
19141 created_at: Some(1_700_000_000_000),
19142 content: "first".into(),
19143 extra_json: serde_json::Value::Null,
19144 snippets: Vec::new(),
19145 },
19146 Message {
19147 id: None,
19148 idx: 1,
19149 role: MessageRole::Agent,
19150 author: None,
19151 created_at: Some(1_700_000_000_100),
19152 content: "second".into(),
19153 extra_json: serde_json::Value::Null,
19154 snippets: Vec::new(),
19155 },
19156 Message {
19157 id: None,
19158 idx: 2,
19159 role: MessageRole::User,
19160 author: None,
19161 created_at: Some(1_700_000_000_200),
19162 content: "third".into(),
19163 extra_json: serde_json::Value::Null,
19164 snippets: Vec::new(),
19165 },
19166 Message {
19167 id: None,
19168 idx: 3,
19169 role: MessageRole::Agent,
19170 author: None,
19171 created_at: Some(1_700_000_000_300),
19172 content: "fourth".into(),
19173 extra_json: serde_json::Value::Null,
19174 snippets: Vec::new(),
19175 },
19176 ]);
19177
19178 let outcomes = storage
19179 .insert_conversations_batched(&[(agent_id, None, &conv_a), (agent_id, None, &conv_b)])
19180 .unwrap();
19181 assert_eq!(outcomes.len(), 2);
19182 assert_eq!(outcomes[0].inserted_indices, vec![0, 1]);
19183 assert_eq!(outcomes[1].inserted_indices, vec![2, 3]);
19184 assert_eq!(outcomes[0].conversation_id, outcomes[1].conversation_id);
19185
19186 let conversation_count: i64 = storage
19187 .conn
19188 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
19189 row.get_typed(0)
19190 })
19191 .unwrap();
19192 let conversation_count_not_indexed: i64 = storage
19193 .conn
19194 .query_row_map(
19195 "SELECT COUNT(*) FROM conversations NOT INDEXED",
19196 fparams![],
19197 |row| row.get_typed(0),
19198 )
19199 .unwrap();
19200 let conversation_count_source_index: i64 = storage
19201 .conn
19202 .query_row_map(
19203 "SELECT COUNT(*) FROM conversations INDEXED BY idx_conversations_source_id",
19204 fparams![],
19205 |row| row.get_typed(0),
19206 )
19207 .unwrap();
19208 let message_count: i64 = storage
19209 .conn
19210 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
19211 row.get_typed(0)
19212 })
19213 .unwrap();
19214 let reopened_storage = SqliteStorage::open(&db_path).unwrap();
19215 let reopened_conversation_count: i64 = reopened_storage
19216 .conn
19217 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
19218 row.get_typed(0)
19219 })
19220 .unwrap();
19221 let reopened_conversation_count_not_indexed: i64 = reopened_storage
19222 .conn
19223 .query_row_map(
19224 "SELECT COUNT(*) FROM conversations NOT INDEXED",
19225 fparams![],
19226 |row| row.get_typed(0),
19227 )
19228 .unwrap();
19229 let reopened_conversation_ids: Vec<i64> = reopened_storage
19230 .conn
19231 .query_map_collect(
19232 "SELECT id FROM conversations ORDER BY id",
19233 fparams![],
19234 |row| row.get_typed(0),
19235 )
19236 .unwrap();
19237 let reopened_conversation_ids_not_indexed: Vec<i64> = reopened_storage
19238 .conn
19239 .query_map_collect(
19240 "SELECT id FROM conversations NOT INDEXED ORDER BY id",
19241 fparams![],
19242 |row| row.get_typed(0),
19243 )
19244 .unwrap();
19245 let reopened_conversation_ids_source_index: Vec<i64> = reopened_storage
19246 .conn
19247 .query_map_collect(
19248 "SELECT id FROM conversations INDEXED BY idx_conversations_source_id ORDER BY id",
19249 fparams![],
19250 |row| row.get_typed(0),
19251 )
19252 .unwrap();
19253
19254 assert_eq!(reopened_conversation_ids, vec![outcomes[0].conversation_id]);
19255 assert_eq!(
19256 reopened_conversation_ids_not_indexed,
19257 vec![outcomes[0].conversation_id]
19258 );
19259 assert_eq!(
19260 reopened_conversation_ids_source_index,
19261 vec![outcomes[0].conversation_id]
19262 );
19263 assert_eq!(reopened_conversation_count, 1);
19264 assert_eq!(reopened_conversation_count_not_indexed, 1);
19265 assert_eq!(conversation_count_not_indexed, 1);
19266 assert_eq!(conversation_count_source_index, 1);
19267 assert_eq!(conversation_count, 1);
19268 assert_eq!(message_count, 4);
19269 }
19270
19271 #[test]
19272 fn franken_insert_conversation_or_get_existing_recovers_unique_conflict() {
19273 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19274 use std::path::PathBuf;
19275
19276 let dir = TempDir::new().unwrap();
19277 let db_path = dir.path().join("test.db");
19278 let storage = SqliteStorage::open(&db_path).unwrap();
19279
19280 let agent = Agent {
19281 id: None,
19282 slug: "codex".into(),
19283 name: "Codex".into(),
19284 version: Some("0.2.3".into()),
19285 kind: AgentKind::Cli,
19286 };
19287 let agent_id = storage.ensure_agent(&agent).unwrap();
19288
19289 let conv = Conversation {
19290 id: None,
19291 agent_slug: "codex".into(),
19292 workspace: Some(PathBuf::from("/tmp/workspace")),
19293 external_id: Some("recover-duplicate".into()),
19294 title: Some("Recover Duplicate".into()),
19295 source_path: PathBuf::from("/tmp/rollout.jsonl"),
19296 started_at: Some(1_700_000_000_000),
19297 ended_at: Some(1_700_000_000_100),
19298 approx_tokens: None,
19299 metadata_json: serde_json::Value::Null,
19300 messages: vec![Message {
19301 id: None,
19302 idx: 0,
19303 role: MessageRole::User,
19304 author: None,
19305 created_at: Some(1_700_000_000_000),
19306 content: "hello".into(),
19307 extra_json: serde_json::Value::Null,
19308 snippets: Vec::new(),
19309 }],
19310 source_id: "local".into(),
19311 origin_host: None,
19312 };
19313
19314 let tx = storage.conn.transaction().unwrap();
19315 let inserted_id = franken_insert_conversation(&tx, agent_id, None, &conv)
19316 .unwrap()
19317 .expect("first insert should succeed");
19318
19319 let conversation_key = conversation_merge_key(agent_id, &conv);
19320 let resolved = franken_insert_conversation_or_get_existing_after_miss(
19321 &tx,
19322 agent_id,
19323 None,
19324 &conv,
19325 &conversation_key,
19326 )
19327 .unwrap();
19328
19329 assert!(
19330 matches!(
19331 resolved,
19332 ConversationInsertStatus::Existing(existing_id)
19333 if existing_id.cmp(&inserted_id).is_eq()
19334 ),
19335 "expected existing conversation id {inserted_id}"
19336 );
19337
19338 let conversation_count: i64 = tx
19339 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
19340 row.get_typed(0)
19341 })
19342 .unwrap();
19343 assert_eq!(conversation_count, 1);
19344 }
19345
19346 #[test]
19347 fn insert_conversations_batched_merges_duplicate_external_id_with_gaps() {
19348 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19349 use std::path::PathBuf;
19350
19351 let dir = TempDir::new().unwrap();
19352 let db_path = dir.path().join("test.db");
19353 let storage = SqliteStorage::open(&db_path).unwrap();
19354
19355 let agent = Agent {
19356 id: None,
19357 slug: "codex".into(),
19358 name: "Codex".into(),
19359 version: Some("0.2.3".into()),
19360 kind: AgentKind::Cli,
19361 };
19362 let agent_id = storage.ensure_agent(&agent).unwrap();
19363
19364 let base_conv = |messages: Vec<Message>| Conversation {
19365 id: None,
19366 agent_slug: "codex".into(),
19367 workspace: Some(PathBuf::from("/tmp/workspace")),
19368 external_id: Some("shared-session-gap".into()),
19369 title: Some("Shared Session Gap".into()),
19370 source_path: PathBuf::from("/tmp/rollout.jsonl"),
19371 started_at: Some(1_700_000_000_000),
19372 ended_at: Some(1_700_000_000_999),
19373 approx_tokens: None,
19374 metadata_json: serde_json::Value::Null,
19375 messages,
19376 source_id: "local".into(),
19377 origin_host: None,
19378 };
19379
19380 let conv_a = base_conv(vec![
19381 Message {
19382 id: None,
19383 idx: 2,
19384 role: MessageRole::User,
19385 author: None,
19386 created_at: Some(1_700_000_000_200),
19387 content: "third".into(),
19388 extra_json: serde_json::Value::Null,
19389 snippets: Vec::new(),
19390 },
19391 Message {
19392 id: None,
19393 idx: 3,
19394 role: MessageRole::Agent,
19395 author: None,
19396 created_at: Some(1_700_000_000_300),
19397 content: "fourth".into(),
19398 extra_json: serde_json::Value::Null,
19399 snippets: Vec::new(),
19400 },
19401 ]);
19402 let conv_b = base_conv(vec![
19403 Message {
19404 id: None,
19405 idx: 0,
19406 role: MessageRole::User,
19407 author: None,
19408 created_at: Some(1_700_000_000_000),
19409 content: "first".into(),
19410 extra_json: serde_json::Value::Null,
19411 snippets: Vec::new(),
19412 },
19413 Message {
19414 id: None,
19415 idx: 1,
19416 role: MessageRole::Agent,
19417 author: None,
19418 created_at: Some(1_700_000_000_100),
19419 content: "second".into(),
19420 extra_json: serde_json::Value::Null,
19421 snippets: Vec::new(),
19422 },
19423 Message {
19424 id: None,
19425 idx: 3,
19426 role: MessageRole::Agent,
19427 author: None,
19428 created_at: Some(1_700_000_000_300),
19429 content: "fourth".into(),
19430 extra_json: serde_json::Value::Null,
19431 snippets: Vec::new(),
19432 },
19433 ]);
19434
19435 let outcomes = storage
19436 .insert_conversations_batched(&[(agent_id, None, &conv_a), (agent_id, None, &conv_b)])
19437 .unwrap();
19438 assert_eq!(outcomes.len(), 2);
19439 assert_eq!(outcomes[0].inserted_indices, vec![2, 3]);
19440 assert_eq!(outcomes[1].inserted_indices, vec![0, 1]);
19441 assert_eq!(outcomes[0].conversation_id, outcomes[1].conversation_id);
19442
19443 let stored_indices: Vec<i64> = storage
19444 .conn
19445 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
19446 row.get_typed(0)
19447 })
19448 .unwrap();
19449 assert_eq!(stored_indices, vec![0, 1, 2, 3]);
19450 }
19451
19452 #[test]
19453 fn insert_conversations_batched_refreshes_partial_pending_message_lookup() {
19454 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19455 use std::path::PathBuf;
19456
19457 let dir = TempDir::new().unwrap();
19458 let db_path = dir.path().join("test.db");
19459 let storage = SqliteStorage::open(&db_path).unwrap();
19460
19461 let agent = Agent {
19462 id: None,
19463 slug: "codex".into(),
19464 name: "Codex".into(),
19465 version: Some("0.2.3".into()),
19466 kind: AgentKind::Cli,
19467 };
19468 let agent_id = storage.ensure_agent(&agent).unwrap();
19469
19470 let make_message = |idx: i64, content: &str| Message {
19471 id: None,
19472 idx,
19473 role: if idx == 0 {
19474 MessageRole::User
19475 } else {
19476 MessageRole::Agent
19477 },
19478 author: None,
19479 created_at: Some(1_700_000_000_000 + idx),
19480 content: content.into(),
19481 extra_json: serde_json::Value::Null,
19482 snippets: Vec::new(),
19483 };
19484
19485 let base_conv = |messages: Vec<Message>| Conversation {
19486 id: None,
19487 agent_slug: "codex".into(),
19488 workspace: Some(PathBuf::from("/tmp/workspace")),
19489 external_id: Some("partial-cache-session".into()),
19490 title: Some("Partial cache session".into()),
19491 source_path: PathBuf::from("/tmp/partial-cache.jsonl"),
19492 started_at: Some(1_700_000_000_000),
19493 ended_at: Some(1_700_000_000_100),
19494 approx_tokens: None,
19495 metadata_json: serde_json::Value::Null,
19496 messages,
19497 source_id: "local".into(),
19498 origin_host: None,
19499 };
19500
19501 let canonical = base_conv(vec![
19502 make_message(0, "canonical zero"),
19503 make_message(20, "canonical twenty"),
19504 ]);
19505 storage
19506 .insert_conversation_tree(agent_id, None, &canonical)
19507 .unwrap();
19508
19509 let exact_prefix = base_conv(vec![make_message(0, "canonical zero")]);
19510 let conflicting_tail = base_conv(vec![make_message(20, "conflicting twenty")]);
19511
19512 let outcomes = storage
19513 .insert_conversations_batched(&[
19514 (agent_id, None, &exact_prefix),
19515 (agent_id, None, &conflicting_tail),
19516 ])
19517 .unwrap();
19518
19519 assert_eq!(outcomes.len(), 2);
19520 assert!(outcomes[0].inserted_indices.is_empty());
19521 assert!(
19522 outcomes[1].inserted_indices.is_empty(),
19523 "the second batch item must refresh the partial pending lookup and retain the canonical idx=20 row"
19524 );
19525
19526 let stored_messages: Vec<(i64, String)> = storage
19527 .conn
19528 .query_map_collect(
19529 "SELECT idx, content FROM messages ORDER BY idx",
19530 fparams![],
19531 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
19532 )
19533 .unwrap();
19534 assert_eq!(
19535 stored_messages,
19536 vec![
19537 (0, "canonical zero".to_string()),
19538 (20, "canonical twenty".to_string()),
19539 ]
19540 );
19541 }
19542
19543 #[test]
19544 fn insert_conversations_batched_reprocessing_conversation_is_idempotent() {
19545 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19546 use std::path::PathBuf;
19547
19548 const MESSAGE_COUNT: i64 = 64;
19549
19550 let dir = TempDir::new().unwrap();
19551 let db_path = dir.path().join("test.db");
19552 let storage = SqliteStorage::open(&db_path).unwrap();
19553
19554 let agent = Agent {
19555 id: None,
19556 slug: "codex".into(),
19557 name: "Codex".into(),
19558 version: Some("0.2.3".into()),
19559 kind: AgentKind::Cli,
19560 };
19561 let agent_id = storage.ensure_agent(&agent).unwrap();
19562
19563 let messages: Vec<Message> = (0..MESSAGE_COUNT)
19564 .map(|idx| Message {
19565 id: None,
19566 idx,
19567 role: if idx % 2 == 0 {
19568 MessageRole::User
19569 } else {
19570 MessageRole::Agent
19571 },
19572 author: None,
19573 created_at: Some(1_700_000_000_000 + idx),
19574 content: format!("message {idx}"),
19575 extra_json: serde_json::Value::Null,
19576 snippets: Vec::new(),
19577 })
19578 .collect();
19579
19580 let conversation = Conversation {
19581 id: None,
19582 agent_slug: "codex".into(),
19583 workspace: Some(PathBuf::from("/tmp/workspace")),
19584 external_id: Some("large-reprocess-session".into()),
19585 title: Some("Large Reprocess Session".into()),
19586 source_path: PathBuf::from("/tmp/large-reprocess-session.jsonl"),
19587 started_at: Some(1_700_000_000_000),
19588 ended_at: Some(1_700_000_000_000 + MESSAGE_COUNT - 1),
19589 approx_tokens: None,
19590 metadata_json: serde_json::Value::Null,
19591 messages,
19592 source_id: "local".into(),
19593 origin_host: None,
19594 };
19595
19596 let first = storage
19597 .insert_conversations_batched(&[(agent_id, None, &conversation)])
19598 .unwrap();
19599 let second = storage
19600 .insert_conversations_batched(&[(agent_id, None, &conversation)])
19601 .unwrap();
19602
19603 assert_eq!(first.len(), 1);
19604 assert_eq!(second.len(), 1);
19605 assert_eq!(first[0].inserted_indices.len(), MESSAGE_COUNT as usize);
19606 assert!(
19607 second[0].inserted_indices.is_empty(),
19608 "full reprocessing of a large conversation must not attempt duplicate idx inserts"
19609 );
19610 assert_eq!(first[0].conversation_id, second[0].conversation_id);
19611
19612 let conversation_count: i64 = storage
19613 .conn
19614 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
19615 row.get_typed(0)
19616 })
19617 .unwrap();
19618 let message_count: i64 = storage
19619 .conn
19620 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
19621 row.get_typed(0)
19622 })
19623 .unwrap();
19624
19625 assert_eq!(conversation_count, 1);
19626 assert_eq!(message_count, MESSAGE_COUNT);
19627 }
19628
19629 #[test]
19630 fn parallel_insert_conversation_tree_keeps_unique_external_ids_distinct() {
19631 use crate::connectors::{NormalizedConversation, NormalizedMessage};
19632 use crate::indexer::persist::map_to_internal;
19633 use crate::model::types::{Agent, AgentKind};
19634 use frankensqlite::compat::{ConnectionExt, RowExt};
19635 use rand::RngExt;
19636 use rayon::prelude::*;
19637
19638 fn retryable_franken_error(err: &anyhow::Error) -> bool {
19639 err.downcast_ref::<frankensqlite::FrankenError>()
19640 .or_else(|| {
19641 err.root_cause()
19642 .downcast_ref::<frankensqlite::FrankenError>()
19643 })
19644 .is_some_and(|inner| {
19645 matches!(
19646 inner,
19647 frankensqlite::FrankenError::Busy
19648 | frankensqlite::FrankenError::BusyRecovery
19649 | frankensqlite::FrankenError::BusySnapshot { .. }
19650 | frankensqlite::FrankenError::WriteConflict { .. }
19651 | frankensqlite::FrankenError::SerializationFailure { .. }
19652 )
19653 })
19654 }
19655
19656 fn with_retry<F, T>(mut f: F) -> anyhow::Result<T>
19657 where
19658 F: FnMut() -> anyhow::Result<T>,
19659 {
19660 let mut rng = rand::rng();
19661 let mut backoff_ms = 4_u64;
19662 for attempt in 0..=24 {
19663 match f() {
19664 Ok(value) => return Ok(value),
19665 Err(err) if attempt < 24 && retryable_franken_error(&err) => {
19666 let sleep_ms = backoff_ms + rng.random_range(0..=backoff_ms);
19667 std::thread::sleep(Duration::from_millis(sleep_ms));
19668 backoff_ms = (backoff_ms * 2).min(512);
19669 }
19670 Err(err) => return Err(err),
19671 }
19672 }
19673 unreachable!("retry loop must return on success or final failure")
19674 }
19675
19676 let dir = TempDir::new().unwrap();
19677 let db_path = dir.path().join("parallel_insert_conversation_tree.db");
19678 let seed = FrankenStorage::open(&db_path).unwrap();
19679 drop(seed);
19680
19681 let conversations: Vec<NormalizedConversation> = (0..10)
19682 .map(|i| NormalizedConversation {
19683 agent_slug: format!("agent-{}", i % 3),
19684 external_id: Some(format!("conv-{i}")),
19685 title: Some(format!("Conversation {i}")),
19686 workspace: Some(PathBuf::from(format!("/ws/{i}"))),
19687 source_path: PathBuf::from(format!("/log/{i}.jsonl")),
19688 started_at: Some(1_000 + i * 100),
19689 ended_at: Some(1_000 + i * 100 + 50),
19690 metadata: serde_json::json!({}),
19691 messages: (0..3)
19692 .map(|j| NormalizedMessage {
19693 idx: j,
19694 role: if j % 2 == 0 { "user" } else { "assistant" }.to_string(),
19695 author: Some("tester".into()),
19696 created_at: Some(1_000 + i * 100 + j * 10),
19697 content: format!("parallel-distinct-test conv={i} msg={j}"),
19698 extra: serde_json::json!({}),
19699 snippets: vec![],
19700 invocations: Vec::new(),
19701 })
19702 .collect(),
19703 })
19704 .collect();
19705
19706 let mut outcomes: Vec<(String, i64, Vec<i64>)> = conversations
19707 .par_chunks(3)
19708 .map(|chunk| {
19709 let storage = FrankenStorage::open_writer(&db_path).unwrap();
19710 let mut agent_cache: HashMap<String, i64> = HashMap::new();
19711 let mut workspace_cache: HashMap<PathBuf, i64> = HashMap::new();
19712 let mut chunk_outcomes = Vec::with_capacity(chunk.len());
19713
19714 for conv in chunk {
19715 let agent_slug = conv.agent_slug.clone();
19716 let workspace = conv.workspace.clone();
19717 let external_id = conv.external_id.clone().expect("external id");
19718 let internal = map_to_internal(conv);
19719 let outcome = with_retry(|| {
19720 let agent_id = if let Some(id) = agent_cache.get(&agent_slug) {
19721 *id
19722 } else {
19723 let agent = Agent {
19724 id: None,
19725 slug: agent_slug.clone(),
19726 name: agent_slug.clone(),
19727 version: None,
19728 kind: AgentKind::Cli,
19729 };
19730 let id = storage.ensure_agent(&agent)?;
19731 agent_cache.insert(agent_slug.clone(), id);
19732 id
19733 };
19734 let workspace_id = if let Some(path) = &workspace {
19735 if let Some(id) = workspace_cache.get(path) {
19736 Some(*id)
19737 } else {
19738 let id = storage.ensure_workspace(path, None)?;
19739 workspace_cache.insert(path.clone(), id);
19740 Some(id)
19741 }
19742 } else {
19743 None
19744 };
19745 storage.insert_conversation_tree(agent_id, workspace_id, &internal)
19746 })
19747 .unwrap();
19748 chunk_outcomes.push((
19749 external_id,
19750 outcome.conversation_id,
19751 outcome.inserted_indices,
19752 ));
19753 }
19754
19755 storage.close().unwrap();
19756 chunk_outcomes
19757 })
19758 .flatten()
19759 .collect();
19760 outcomes.sort_by(|left, right| left.0.cmp(&right.0));
19761
19762 assert!(
19763 outcomes
19764 .iter()
19765 .all(|(_, _, inserted_indices)| inserted_indices == &vec![0, 1, 2]),
19766 "unique external ids must not be routed through the existing-conversation merge path: {outcomes:?}"
19767 );
19768
19769 let distinct_ids: HashSet<i64> = outcomes
19770 .iter()
19771 .map(|(_, conversation_id, _)| *conversation_id)
19772 .collect();
19773 assert_eq!(
19774 distinct_ids.len(),
19775 conversations.len(),
19776 "unique external ids must produce distinct conversation ids: {outcomes:?}"
19777 );
19778
19779 let reader = FrankenStorage::open(&db_path).unwrap();
19780 let stored_rows: Vec<(i64, String)> = reader
19781 .raw()
19782 .query_map_collect(
19783 "SELECT id, external_id FROM conversations ORDER BY id",
19784 &[],
19785 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
19786 )
19787 .unwrap();
19788 let stored_count: i64 = reader
19789 .raw()
19790 .query_row_map("SELECT COUNT(*) FROM conversations", &[], |row| {
19791 row.get_typed(0)
19792 })
19793 .unwrap();
19794
19795 assert_eq!(
19796 stored_count as usize,
19797 conversations.len(),
19798 "parallel distinct inserts must persist one row per external id; rows={stored_rows:?}; outcomes={outcomes:?}"
19799 );
19800 assert_eq!(
19801 stored_rows.len(),
19802 conversations.len(),
19803 "parallel distinct inserts must remain visible after reopening; rows={stored_rows:?}; outcomes={outcomes:?}"
19804 );
19805 }
19806
19807 #[test]
19808 fn insert_conversation_tree_merges_duplicate_external_id_with_gaps() {
19809 use crate::connectors::{NormalizedConversation, NormalizedMessage};
19810 use crate::indexer::persist::map_to_internal;
19811 use crate::model::types::{Agent, AgentKind};
19812 use std::path::PathBuf;
19813
19814 let dir = TempDir::new().unwrap();
19815 let db_path = dir.path().join("test.db");
19816 let storage = SqliteStorage::open(&db_path).unwrap();
19817
19818 let agent = Agent {
19819 id: None,
19820 slug: "codex".into(),
19821 name: "Codex".into(),
19822 version: Some("0.2.3".into()),
19823 kind: AgentKind::Cli,
19824 };
19825 let agent_id = storage.ensure_agent(&agent).unwrap();
19826
19827 let base_conv = |messages: Vec<NormalizedMessage>| NormalizedConversation {
19828 agent_slug: "codex".into(),
19829 workspace: Some(PathBuf::from("/tmp/workspace")),
19830 external_id: Some("tree-gap-session".into()),
19831 title: Some("Tree Gap Session".into()),
19832 source_path: PathBuf::from("/tmp/tree.jsonl"),
19833 started_at: Some(1_700_000_000_000),
19834 ended_at: Some(1_700_000_000_999),
19835 metadata: serde_json::Value::Null,
19836 messages,
19837 };
19838
19839 let conv_a = map_to_internal(&base_conv(vec![
19840 NormalizedMessage {
19841 idx: 2,
19842 role: "user".into(),
19843 author: None,
19844 created_at: Some(1_700_000_000_200),
19845 content: "third".into(),
19846 extra: serde_json::Value::Null,
19847 snippets: Vec::new(),
19848 invocations: Vec::new(),
19849 },
19850 NormalizedMessage {
19851 idx: 3,
19852 role: "assistant".into(),
19853 author: None,
19854 created_at: Some(1_700_000_000_300),
19855 content: "fourth".into(),
19856 extra: serde_json::Value::Null,
19857 snippets: Vec::new(),
19858 invocations: Vec::new(),
19859 },
19860 ]));
19861 let conv_b = map_to_internal(&base_conv(vec![
19862 NormalizedMessage {
19863 idx: 0,
19864 role: "user".into(),
19865 author: None,
19866 created_at: Some(1_700_000_000_000),
19867 content: "first".into(),
19868 extra: serde_json::Value::Null,
19869 snippets: Vec::new(),
19870 invocations: Vec::new(),
19871 },
19872 NormalizedMessage {
19873 idx: 1,
19874 role: "assistant".into(),
19875 author: None,
19876 created_at: Some(1_700_000_000_100),
19877 content: "second".into(),
19878 extra: serde_json::Value::Null,
19879 snippets: Vec::new(),
19880 invocations: Vec::new(),
19881 },
19882 NormalizedMessage {
19883 idx: 3,
19884 role: "assistant".into(),
19885 author: None,
19886 created_at: Some(1_700_000_000_300),
19887 content: "fourth".into(),
19888 extra: serde_json::Value::Null,
19889 snippets: Vec::new(),
19890 invocations: Vec::new(),
19891 },
19892 ]));
19893
19894 let first = storage
19895 .insert_conversation_tree(agent_id, None, &conv_a)
19896 .unwrap();
19897 let second = storage
19898 .insert_conversation_tree(agent_id, None, &conv_b)
19899 .unwrap();
19900
19901 assert_eq!(first.inserted_indices, vec![2, 3]);
19902 assert_eq!(second.inserted_indices, vec![0, 1]);
19903 assert_eq!(first.conversation_id, second.conversation_id);
19904
19905 let stored_indices: Vec<i64> = storage
19906 .conn
19907 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
19908 row.get_typed(0)
19909 })
19910 .unwrap();
19911 assert_eq!(stored_indices, vec![0, 1, 2, 3]);
19912 }
19913
19914 #[test]
19915 fn insert_conversation_tree_skips_duplicate_message_indices_for_new_conversation() {
19916 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19917 use std::path::PathBuf;
19918
19919 let dir = TempDir::new().unwrap();
19920 let db_path = dir.path().join("test.db");
19921 let storage = SqliteStorage::open(&db_path).unwrap();
19922
19923 let agent = Agent {
19924 id: None,
19925 slug: "codex".into(),
19926 name: "Codex".into(),
19927 version: Some("0.2.3".into()),
19928 kind: AgentKind::Cli,
19929 };
19930 let agent_id = storage.ensure_agent(&agent).unwrap();
19931
19932 let conversation = Conversation {
19933 id: None,
19934 agent_slug: "codex".into(),
19935 workspace: Some(PathBuf::from("/tmp/workspace")),
19936 external_id: Some("duplicate-new-session".into()),
19937 title: Some("Duplicate New Session".into()),
19938 source_path: PathBuf::from("/tmp/duplicate-new-session.jsonl"),
19939 started_at: Some(1_700_000_000_000),
19940 ended_at: Some(1_700_000_000_999),
19941 approx_tokens: None,
19942 metadata_json: serde_json::Value::Null,
19943 messages: vec![
19944 Message {
19945 id: None,
19946 idx: 0,
19947 role: MessageRole::User,
19948 author: None,
19949 created_at: Some(1_700_000_000_000),
19950 content: "first canonical".into(),
19951 extra_json: serde_json::Value::Null,
19952 snippets: Vec::new(),
19953 },
19954 Message {
19955 id: None,
19956 idx: 0,
19957 role: MessageRole::User,
19958 author: None,
19959 created_at: Some(1_700_000_000_001),
19960 content: "duplicate idx should be skipped".into(),
19961 extra_json: serde_json::Value::Null,
19962 snippets: Vec::new(),
19963 },
19964 Message {
19965 id: None,
19966 idx: 1,
19967 role: MessageRole::Agent,
19968 author: None,
19969 created_at: Some(1_700_000_000_100),
19970 content: "second".into(),
19971 extra_json: serde_json::Value::Null,
19972 snippets: Vec::new(),
19973 },
19974 ],
19975 source_id: "local".into(),
19976 origin_host: None,
19977 };
19978
19979 let outcome = storage
19980 .insert_conversation_tree(agent_id, None, &conversation)
19981 .unwrap();
19982
19983 assert_eq!(outcome.inserted_indices, vec![0, 1]);
19984
19985 let stored_messages: Vec<(i64, String)> = storage
19986 .conn
19987 .query_map_collect(
19988 "SELECT idx, content FROM messages ORDER BY idx",
19989 fparams![],
19990 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
19991 )
19992 .unwrap();
19993 assert_eq!(
19994 stored_messages,
19995 vec![
19996 (0, "first canonical".to_string()),
19997 (1, "second".to_string())
19998 ]
19999 );
20000 }
20001
20002 #[test]
20003 fn insert_conversation_tree_merges_duplicate_source_path_without_external_id() {
20004 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20005 use std::path::PathBuf;
20006
20007 let dir = TempDir::new().unwrap();
20008 let db_path = dir.path().join("test.db");
20009 let storage = SqliteStorage::open(&db_path).unwrap();
20010
20011 let agent = Agent {
20012 id: None,
20013 slug: "codex".into(),
20014 name: "Codex".into(),
20015 version: Some("0.2.3".into()),
20016 kind: AgentKind::Cli,
20017 };
20018 let agent_id = storage.ensure_agent(&agent).unwrap();
20019
20020 let base_conv = |messages: Vec<Message>| Conversation {
20021 id: None,
20022 agent_slug: "codex".into(),
20023 workspace: Some(PathBuf::from("/tmp/workspace")),
20024 external_id: None,
20025 title: Some("Source Path Merge".into()),
20026 source_path: PathBuf::from("/tmp/shared-session.jsonl"),
20027 started_at: Some(1_700_000_000_000),
20028 ended_at: Some(1_700_000_000_999),
20029 approx_tokens: None,
20030 metadata_json: serde_json::Value::Null,
20031 messages,
20032 source_id: "local".into(),
20033 origin_host: None,
20034 };
20035
20036 let first = storage
20037 .insert_conversation_tree(
20038 agent_id,
20039 None,
20040 &base_conv(vec![
20041 Message {
20042 id: None,
20043 idx: 0,
20044 role: MessageRole::User,
20045 author: None,
20046 created_at: Some(1_700_000_000_000),
20047 content: "first".into(),
20048 extra_json: serde_json::Value::Null,
20049 snippets: Vec::new(),
20050 },
20051 Message {
20052 id: None,
20053 idx: 1,
20054 role: MessageRole::Agent,
20055 author: None,
20056 created_at: Some(1_700_000_000_100),
20057 content: "second".into(),
20058 extra_json: serde_json::Value::Null,
20059 snippets: Vec::new(),
20060 },
20061 ]),
20062 )
20063 .unwrap();
20064
20065 let second = storage
20066 .insert_conversation_tree(
20067 agent_id,
20068 None,
20069 &base_conv(vec![
20070 Message {
20071 id: None,
20072 idx: 1,
20073 role: MessageRole::Agent,
20074 author: None,
20075 created_at: Some(1_700_000_000_100),
20076 content: "second".into(),
20077 extra_json: serde_json::Value::Null,
20078 snippets: Vec::new(),
20079 },
20080 Message {
20081 id: None,
20082 idx: 2,
20083 role: MessageRole::User,
20084 author: None,
20085 created_at: Some(1_700_000_000_200),
20086 content: "third".into(),
20087 extra_json: serde_json::Value::Null,
20088 snippets: Vec::new(),
20089 },
20090 ]),
20091 )
20092 .unwrap();
20093
20094 assert_eq!(first.conversation_id, second.conversation_id);
20095 assert_eq!(first.inserted_indices, vec![0, 1]);
20096 assert_eq!(second.inserted_indices, vec![2]);
20097
20098 let stored_indices: Vec<i64> = storage
20099 .conn
20100 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
20101 row.get_typed(0)
20102 })
20103 .unwrap();
20104 assert_eq!(stored_indices, vec![0, 1, 2]);
20105 }
20106
20107 #[test]
20108 fn insert_conversation_tree_merges_source_path_duplicates_with_start_drift() {
20109 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20110 use std::path::PathBuf;
20111
20112 let dir = TempDir::new().unwrap();
20113 let db_path = dir.path().join("test.db");
20114 let storage = SqliteStorage::open(&db_path).unwrap();
20115
20116 let agent = Agent {
20117 id: None,
20118 slug: "codex".into(),
20119 name: "Codex".into(),
20120 version: Some("0.2.3".into()),
20121 kind: AgentKind::Cli,
20122 };
20123 let agent_id = storage.ensure_agent(&agent).unwrap();
20124
20125 let base_conv = |started_at: Option<i64>, messages: Vec<Message>| Conversation {
20126 id: None,
20127 agent_slug: "codex".into(),
20128 workspace: Some(PathBuf::from("/tmp/workspace")),
20129 external_id: None,
20130 title: Some("Drift Merge".into()),
20131 source_path: PathBuf::from("/tmp/drift-session.jsonl"),
20132 started_at,
20133 ended_at: Some(1_700_000_000_999),
20134 approx_tokens: None,
20135 metadata_json: serde_json::Value::Null,
20136 messages,
20137 source_id: "local".into(),
20138 origin_host: None,
20139 };
20140
20141 let first = storage
20142 .insert_conversation_tree(
20143 agent_id,
20144 None,
20145 &base_conv(
20146 Some(1_700_000_000_000),
20147 vec![
20148 Message {
20149 id: None,
20150 idx: 0,
20151 role: MessageRole::User,
20152 author: None,
20153 created_at: Some(1_700_000_000_000),
20154 content: "first".into(),
20155 extra_json: serde_json::Value::Null,
20156 snippets: Vec::new(),
20157 },
20158 Message {
20159 id: None,
20160 idx: 1,
20161 role: MessageRole::Agent,
20162 author: None,
20163 created_at: Some(1_700_000_000_100),
20164 content: "second".into(),
20165 extra_json: serde_json::Value::Null,
20166 snippets: Vec::new(),
20167 },
20168 ],
20169 ),
20170 )
20171 .unwrap();
20172
20173 let second = storage
20174 .insert_conversation_tree(
20175 agent_id,
20176 None,
20177 &base_conv(
20178 Some(1_700_000_004_000),
20179 vec![
20180 Message {
20181 id: None,
20182 idx: 1,
20183 role: MessageRole::Agent,
20184 author: None,
20185 created_at: Some(1_700_000_000_100),
20186 content: "second".into(),
20187 extra_json: serde_json::Value::Null,
20188 snippets: Vec::new(),
20189 },
20190 Message {
20191 id: None,
20192 idx: 2,
20193 role: MessageRole::User,
20194 author: None,
20195 created_at: Some(1_700_000_004_200),
20196 content: "third".into(),
20197 extra_json: serde_json::Value::Null,
20198 snippets: Vec::new(),
20199 },
20200 ],
20201 ),
20202 )
20203 .unwrap();
20204
20205 assert_eq!(first.conversation_id, second.conversation_id);
20206 assert_eq!(second.inserted_indices, vec![2]);
20207 }
20208
20209 #[test]
20210 fn insert_conversation_tree_keeps_single_message_overlap_sessions_separate() {
20211 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20212 use std::path::PathBuf;
20213
20214 let dir = TempDir::new().unwrap();
20215 let db_path = dir.path().join("test.db");
20216 let storage = SqliteStorage::open(&db_path).unwrap();
20217
20218 let agent = Agent {
20219 id: None,
20220 slug: "codex".into(),
20221 name: "Codex".into(),
20222 version: Some("0.2.3".into()),
20223 kind: AgentKind::Cli,
20224 };
20225 let agent_id = storage.ensure_agent(&agent).unwrap();
20226
20227 let make_conv = |started_at: i64, idx: i64, content: &str| Conversation {
20228 id: None,
20229 agent_slug: "codex".into(),
20230 workspace: Some(PathBuf::from("/tmp/workspace")),
20231 external_id: None,
20232 title: Some("Partial overlap".into()),
20233 source_path: PathBuf::from("/tmp/reused-session.jsonl"),
20234 started_at: Some(started_at),
20235 ended_at: Some(started_at + 500),
20236 approx_tokens: None,
20237 metadata_json: serde_json::Value::Null,
20238 messages: vec![Message {
20239 id: None,
20240 idx,
20241 role: MessageRole::User,
20242 author: None,
20243 created_at: Some(started_at),
20244 content: content.into(),
20245 extra_json: serde_json::Value::Null,
20246 snippets: Vec::new(),
20247 }],
20248 source_id: "local".into(),
20249 origin_host: None,
20250 };
20251
20252 storage
20253 .insert_conversation_tree(
20254 agent_id,
20255 None,
20256 &Conversation {
20257 messages: vec![
20258 Message {
20259 id: None,
20260 idx: 0,
20261 role: MessageRole::User,
20262 author: None,
20263 created_at: Some(1_700_000_000_000),
20264 content: "shared opener".into(),
20265 extra_json: serde_json::Value::Null,
20266 snippets: Vec::new(),
20267 },
20268 Message {
20269 id: None,
20270 idx: 1,
20271 role: MessageRole::Agent,
20272 author: None,
20273 created_at: Some(1_700_000_000_100),
20274 content: "first session unique".into(),
20275 extra_json: serde_json::Value::Null,
20276 snippets: Vec::new(),
20277 },
20278 ],
20279 ..make_conv(1_700_000_000_000, 0, "unused")
20280 },
20281 )
20282 .unwrap();
20283 storage
20284 .insert_conversation_tree(
20285 agent_id,
20286 None,
20287 &make_conv(1_700_000_900_000, 0, "shared opener"),
20288 )
20289 .unwrap();
20290
20291 let conversation_count: i64 = storage
20292 .conn
20293 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
20294 row.get_typed(0)
20295 })
20296 .unwrap();
20297 assert_eq!(conversation_count, 2);
20298 }
20299
20300 #[test]
20301 fn insert_conversation_tree_keeps_distinct_source_path_sessions_separate() {
20302 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20303 use std::path::PathBuf;
20304
20305 let dir = TempDir::new().unwrap();
20306 let db_path = dir.path().join("test.db");
20307 let storage = SqliteStorage::open(&db_path).unwrap();
20308
20309 let agent = Agent {
20310 id: None,
20311 slug: "codex".into(),
20312 name: "Codex".into(),
20313 version: Some("0.2.3".into()),
20314 kind: AgentKind::Cli,
20315 };
20316 let agent_id = storage.ensure_agent(&agent).unwrap();
20317
20318 let make_conv = |started_at: i64, created_at: i64, content: &str| Conversation {
20319 id: None,
20320 agent_slug: "codex".into(),
20321 workspace: Some(PathBuf::from("/tmp/workspace")),
20322 external_id: None,
20323 title: Some("Same Path Different Session".into()),
20324 source_path: PathBuf::from("/tmp/reused-session.jsonl"),
20325 started_at: Some(started_at),
20326 ended_at: Some(started_at + 500),
20327 approx_tokens: None,
20328 metadata_json: serde_json::Value::Null,
20329 messages: vec![Message {
20330 id: None,
20331 idx: 0,
20332 role: MessageRole::User,
20333 author: None,
20334 created_at: Some(created_at),
20335 content: content.into(),
20336 extra_json: serde_json::Value::Null,
20337 snippets: Vec::new(),
20338 }],
20339 source_id: "local".into(),
20340 origin_host: None,
20341 };
20342
20343 storage
20344 .insert_conversation_tree(
20345 agent_id,
20346 None,
20347 &make_conv(1_700_000_000_000, 1_700_000_000_000, "first session"),
20348 )
20349 .unwrap();
20350 storage
20351 .insert_conversation_tree(
20352 agent_id,
20353 None,
20354 &make_conv(1_700_000_900_000, 1_700_000_900_000, "second session"),
20355 )
20356 .unwrap();
20357
20358 let conversation_count: i64 = storage
20359 .conn
20360 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
20361 row.get_typed(0)
20362 })
20363 .unwrap();
20364 assert_eq!(conversation_count, 2);
20365 }
20366
20367 #[test]
20368 fn insert_conversation_tree_merges_replay_equivalent_messages_with_shifted_idx() {
20369 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20370 use std::path::PathBuf;
20371
20372 let dir = TempDir::new().unwrap();
20373 let db_path = dir.path().join("test.db");
20374 let storage = SqliteStorage::open(&db_path).unwrap();
20375
20376 let agent = Agent {
20377 id: None,
20378 slug: "codex".into(),
20379 name: "Codex".into(),
20380 version: Some("0.2.3".into()),
20381 kind: AgentKind::Cli,
20382 };
20383 let agent_id = storage.ensure_agent(&agent).unwrap();
20384
20385 let make_conv = |started_at: i64, messages: Vec<Message>| Conversation {
20386 id: None,
20387 agent_slug: "codex".into(),
20388 workspace: Some(PathBuf::from("/tmp/workspace")),
20389 external_id: None,
20390 title: Some("Shifted replay".into()),
20391 source_path: PathBuf::from("/tmp/replay-session.jsonl"),
20392 started_at: Some(started_at),
20393 ended_at: Some(started_at + 500),
20394 approx_tokens: None,
20395 metadata_json: serde_json::Value::Null,
20396 messages,
20397 source_id: "local".into(),
20398 origin_host: None,
20399 };
20400
20401 let first = storage
20402 .insert_conversation_tree(
20403 agent_id,
20404 None,
20405 &make_conv(
20406 1_700_000_000_000,
20407 vec![
20408 Message {
20409 id: None,
20410 idx: 0,
20411 role: MessageRole::User,
20412 author: None,
20413 created_at: Some(1_700_000_000_000),
20414 content: "first".into(),
20415 extra_json: serde_json::Value::Null,
20416 snippets: Vec::new(),
20417 },
20418 Message {
20419 id: None,
20420 idx: 1,
20421 role: MessageRole::Agent,
20422 author: None,
20423 created_at: Some(1_700_000_000_100),
20424 content: "second".into(),
20425 extra_json: serde_json::Value::Null,
20426 snippets: Vec::new(),
20427 },
20428 ],
20429 ),
20430 )
20431 .unwrap();
20432
20433 let second = storage
20434 .insert_conversation_tree(
20435 agent_id,
20436 None,
20437 &make_conv(
20438 1_700_000_900_000,
20439 vec![
20440 Message {
20441 id: None,
20442 idx: 10,
20443 role: MessageRole::User,
20444 author: None,
20445 created_at: Some(1_700_000_000_000),
20446 content: "first".into(),
20447 extra_json: serde_json::Value::Null,
20448 snippets: Vec::new(),
20449 },
20450 Message {
20451 id: None,
20452 idx: 11,
20453 role: MessageRole::Agent,
20454 author: None,
20455 created_at: Some(1_700_000_000_100),
20456 content: "second".into(),
20457 extra_json: serde_json::Value::Null,
20458 snippets: Vec::new(),
20459 },
20460 Message {
20461 id: None,
20462 idx: 12,
20463 role: MessageRole::User,
20464 author: None,
20465 created_at: Some(1_700_000_000_200),
20466 content: "third".into(),
20467 extra_json: serde_json::Value::Null,
20468 snippets: Vec::new(),
20469 },
20470 ],
20471 ),
20472 )
20473 .unwrap();
20474
20475 assert_eq!(first.conversation_id, second.conversation_id);
20476 assert_eq!(second.inserted_indices, vec![12]);
20477
20478 let stored_indices: Vec<i64> = storage
20479 .conn
20480 .query_map_collect(
20481 "SELECT idx FROM messages WHERE conversation_id = ?1 ORDER BY idx",
20482 fparams![first.conversation_id],
20483 |row| row.get_typed(0),
20484 )
20485 .unwrap();
20486 assert_eq!(stored_indices, vec![0, 1, 12]);
20487 }
20488
20489 #[test]
20490 fn salvage_historical_databases_imports_backups_once_and_merges_overlap() {
20491 use crate::model::types::{Conversation, Message, MessageRole};
20492 use std::path::PathBuf;
20493
20494 fn base_conv(source_path: &str, messages: Vec<Message>) -> Conversation {
20495 Conversation {
20496 id: None,
20497 agent_slug: "codex".into(),
20498 workspace: Some(PathBuf::from("/tmp/workspace")),
20499 external_id: None,
20500 title: Some("Recovered".into()),
20501 source_path: PathBuf::from(source_path),
20502 started_at: Some(1_700_000_000_000),
20503 ended_at: Some(1_700_000_000_999),
20504 approx_tokens: None,
20505 metadata_json: serde_json::Value::Null,
20506 messages,
20507 source_id: "local".into(),
20508 origin_host: None,
20509 }
20510 }
20511
20512 let dir = TempDir::new().unwrap();
20513 let canonical_db = dir.path().join("agent_search.db");
20514 let storage = SqliteStorage::open(&canonical_db).unwrap();
20515
20516 let overlapping_a = base_conv(
20517 "/tmp/shared-history.jsonl",
20518 vec![
20519 Message {
20520 id: None,
20521 idx: 0,
20522 role: MessageRole::User,
20523 author: None,
20524 created_at: Some(1_700_000_000_000),
20525 content: "first".into(),
20526 extra_json: serde_json::Value::Null,
20527 snippets: Vec::new(),
20528 },
20529 Message {
20530 id: None,
20531 idx: 1,
20532 role: MessageRole::Agent,
20533 author: None,
20534 created_at: Some(1_700_000_000_100),
20535 content: "second".into(),
20536 extra_json: serde_json::Value::Null,
20537 snippets: Vec::new(),
20538 },
20539 ],
20540 );
20541 let overlapping_b = base_conv(
20542 "/tmp/shared-history.jsonl",
20543 vec![
20544 Message {
20545 id: None,
20546 idx: 1,
20547 role: MessageRole::Agent,
20548 author: None,
20549 created_at: Some(1_700_000_000_100),
20550 content: "second".into(),
20551 extra_json: serde_json::Value::Null,
20552 snippets: Vec::new(),
20553 },
20554 Message {
20555 id: None,
20556 idx: 2,
20557 role: MessageRole::User,
20558 author: None,
20559 created_at: Some(1_700_000_000_200),
20560 content: "third".into(),
20561 extra_json: serde_json::Value::Null,
20562 snippets: Vec::new(),
20563 },
20564 ],
20565 );
20566 let unique = Conversation {
20567 source_path: PathBuf::from("/tmp/unique-history.jsonl"),
20568 messages: vec![Message {
20569 id: None,
20570 idx: 0,
20571 role: MessageRole::User,
20572 author: None,
20573 created_at: Some(1_700_000_001_000),
20574 content: "unique".into(),
20575 extra_json: serde_json::Value::Null,
20576 snippets: Vec::new(),
20577 }],
20578 started_at: Some(1_700_000_001_000),
20579 ended_at: Some(1_700_000_001_100),
20580 ..base_conv("/tmp/unique-history.jsonl", Vec::new())
20581 };
20582
20583 seed_historical_db_direct(
20584 &dir.path()
20585 .join("backups/agent_search.db.20260322T020200.bak"),
20586 std::slice::from_ref(&overlapping_a),
20587 );
20588 seed_historical_db_direct(
20589 &dir.path().join("agent_search.corrupt.20260324_212907"),
20590 &[overlapping_b, unique],
20591 );
20592
20593 let first = storage.salvage_historical_databases(&canonical_db).unwrap();
20594 assert_eq!(first.bundles_considered, 2);
20595 assert_eq!(first.bundles_imported, 2);
20596 assert_eq!(first.messages_imported, 4);
20597
20598 let conversations = storage.list_conversations(10, 0).unwrap();
20599 assert_eq!(conversations.len(), 2);
20600
20601 let shared_id = conversations
20602 .iter()
20603 .find(|conv| conv.source_path == std::path::Path::new("/tmp/shared-history.jsonl"))
20604 .and_then(|conv| conv.id)
20605 .unwrap();
20606 let shared_indices: Vec<i64> = storage
20607 .fetch_messages(shared_id)
20608 .unwrap()
20609 .into_iter()
20610 .map(|msg| msg.idx)
20611 .collect();
20612 assert_eq!(shared_indices, vec![0, 1, 2]);
20613
20614 let second = storage.salvage_historical_databases(&canonical_db).unwrap();
20615 assert_eq!(second.bundles_imported, 0);
20616 assert_eq!(second.messages_imported, 0);
20617 }
20618
20619 #[test]
20620 fn salvage_historical_databases_normalizes_host_only_remote_provenance() {
20621 use crate::model::types::{Conversation, Message, MessageRole};
20622 use std::path::PathBuf;
20623
20624 let dir = TempDir::new().unwrap();
20625 let canonical_db = dir.path().join("agent_search.db");
20626 let storage = SqliteStorage::open(&canonical_db).unwrap();
20627
20628 let host_only_remote = Conversation {
20629 id: None,
20630 agent_slug: "codex".into(),
20631 workspace: Some(PathBuf::from("/tmp/workspace")),
20632 external_id: None,
20633 title: Some("Recovered Host Only Remote".into()),
20634 source_path: PathBuf::from("/tmp/host-only-history.jsonl"),
20635 started_at: Some(1_700_000_000_000),
20636 ended_at: Some(1_700_000_000_999),
20637 approx_tokens: None,
20638 metadata_json: serde_json::Value::Null,
20639 messages: vec![Message {
20640 id: None,
20641 idx: 0,
20642 role: MessageRole::User,
20643 author: None,
20644 created_at: Some(1_700_000_000_000),
20645 content: "host-only remote".into(),
20646 extra_json: serde_json::Value::Null,
20647 snippets: Vec::new(),
20648 }],
20649 source_id: " ".into(),
20650 origin_host: Some("builder-5".into()),
20651 };
20652
20653 let historical_db = dir
20654 .path()
20655 .join("backups/agent_search.db.20260322T020200.bak");
20656 seed_historical_db_direct(&historical_db, std::slice::from_ref(&host_only_remote));
20657
20658 let historical_conn =
20659 FrankenConnection::open(historical_db.to_string_lossy().into_owned()).unwrap();
20660 historical_conn
20661 .execute_compat(
20662 "INSERT INTO sources(id, kind, host_label, created_at, updated_at) VALUES(?1, ?2, ?3, ?4, ?5)",
20663 fparams![" ", "ssh", "builder-5", 0_i64, 0_i64],
20664 )
20665 .unwrap();
20666 historical_conn
20667 .execute_compat(
20668 "UPDATE conversations SET source_id = ?1, origin_host = ?2 WHERE source_path = ?3",
20669 fparams![" ", "builder-5", "/tmp/host-only-history.jsonl"],
20670 )
20671 .unwrap();
20672 historical_conn
20673 .execute_compat("DELETE FROM sources WHERE id = ?1", fparams!["builder-5"])
20674 .unwrap();
20675 drop(historical_conn);
20676
20677 let first = storage.salvage_historical_databases(&canonical_db).unwrap();
20678 assert_eq!(first.bundles_imported, 1);
20679 assert_eq!(first.messages_imported, 1);
20680
20681 let source_ids = storage.get_source_ids().unwrap();
20682 assert_eq!(source_ids, vec!["builder-5".to_string()]);
20683
20684 let conversations = storage.list_conversations(10, 0).unwrap();
20685 assert_eq!(conversations.len(), 1);
20686 assert_eq!(conversations[0].source_id, "builder-5");
20687 assert_eq!(conversations[0].origin_host.as_deref(), Some("builder-5"));
20688 }
20689
20690 #[test]
20691 fn historical_salvage_retry_splits_single_conversation_until_it_fits() {
20692 use crate::model::types::{Conversation, Message, MessageRole};
20693 use std::path::PathBuf;
20694
20695 let mut attempts: Vec<Vec<usize>> = Vec::new();
20696 let entry = HistoricalBatchEntry {
20697 source_row_id: 77,
20698 agent_id: 1,
20699 workspace_id: None,
20700 conversation: Conversation {
20701 id: None,
20702 agent_slug: "gemini".into(),
20703 workspace: Some(PathBuf::from("/tmp/workspace")),
20704 external_id: Some("conv-77".into()),
20705 title: Some("Large recovered conversation".into()),
20706 source_path: PathBuf::from("/tmp/history.jsonl"),
20707 started_at: Some(1_700_000_000_000),
20708 ended_at: Some(1_700_000_000_999),
20709 approx_tokens: None,
20710 metadata_json: serde_json::Value::Null,
20711 messages: (0..4)
20712 .map(|idx| Message {
20713 id: None,
20714 idx,
20715 role: MessageRole::User,
20716 author: None,
20717 created_at: Some(1_700_000_000_000 + idx),
20718 content: format!("message-{idx}"),
20719 extra_json: serde_json::Value::Null,
20720 snippets: Vec::new(),
20721 })
20722 .collect(),
20723 source_id: LOCAL_SOURCE_ID.into(),
20724 origin_host: None,
20725 },
20726 };
20727
20728 let totals = SqliteStorage::import_historical_batch_with_retry(
20729 std::slice::from_ref(&entry),
20730 &mut |batch| {
20731 attempts.push(
20732 batch
20733 .iter()
20734 .map(|entry| entry.conversation.messages.len())
20735 .collect(),
20736 );
20737 let total_messages: usize = batch
20738 .iter()
20739 .map(|entry| entry.conversation.messages.len())
20740 .sum();
20741 if total_messages > 1 {
20742 Err(anyhow!("out of memory"))
20743 } else {
20744 Ok(HistoricalBatchImportTotals {
20745 inserted_source_rows: batch.len(),
20746 inserted_messages: total_messages,
20747 })
20748 }
20749 },
20750 )
20751 .unwrap();
20752
20753 assert_eq!(
20754 totals,
20755 HistoricalBatchImportTotals {
20756 inserted_source_rows: 1,
20757 inserted_messages: 4,
20758 }
20759 );
20760 assert_eq!(attempts.first().cloned(), Some(vec![4]));
20761 assert!(
20762 attempts.iter().filter(|sizes| sizes == &&vec![1]).count() >= 4,
20763 "expected recursive fallback to reach one-message slices"
20764 );
20765 }
20766
20767 #[test]
20768 fn salvage_historical_databases_resumes_from_progress_checkpoint() {
20769 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20770 use std::path::PathBuf;
20771
20772 fn make_conv(source_path: &str, idx_seed: i64) -> Conversation {
20773 Conversation {
20774 id: None,
20775 agent_slug: "codex".into(),
20776 workspace: Some(PathBuf::from("/tmp/workspace")),
20777 external_id: Some(format!("conv-{idx_seed}")),
20778 title: Some(format!("Recovered {idx_seed}")),
20779 source_path: PathBuf::from(source_path),
20780 started_at: Some(1_700_000_000_000 + idx_seed),
20781 ended_at: Some(1_700_000_000_100 + idx_seed),
20782 approx_tokens: None,
20783 metadata_json: serde_json::Value::Null,
20784 messages: vec![Message {
20785 id: None,
20786 idx: 0,
20787 role: MessageRole::User,
20788 author: None,
20789 created_at: Some(1_700_000_000_000 + idx_seed),
20790 content: format!("message-{idx_seed}"),
20791 extra_json: serde_json::Value::Null,
20792 snippets: Vec::new(),
20793 }],
20794 source_id: LOCAL_SOURCE_ID.into(),
20795 origin_host: None,
20796 }
20797 }
20798
20799 let dir = TempDir::new().unwrap();
20800 let canonical_db = dir.path().join("agent_search.db");
20801 let backup_db = dir
20802 .path()
20803 .join("backups/agent_search.db.20260322T020200.bak");
20804 let storage = SqliteStorage::open(&canonical_db).unwrap();
20805 let conv_a = make_conv("/tmp/one.jsonl", 1);
20806 let conv_b = make_conv("/tmp/two.jsonl", 2);
20807 let conv_c = make_conv("/tmp/three.jsonl", 3);
20808 seed_historical_db_direct(
20809 &backup_db,
20810 &[conv_a.clone(), conv_b.clone(), conv_c.clone()],
20811 );
20812
20813 let agent = Agent {
20814 id: None,
20815 slug: "codex".into(),
20816 name: "Codex".into(),
20817 version: Some("0.2.3".into()),
20818 kind: AgentKind::Cli,
20819 };
20820 let agent_id = storage.ensure_agent(&agent).unwrap();
20821 storage
20822 .insert_conversation_tree(agent_id, None, &conv_a)
20823 .unwrap();
20824
20825 let bundle = discover_historical_database_bundles(&canonical_db)
20826 .into_iter()
20827 .find(|bundle| bundle.root_path == backup_db)
20828 .unwrap();
20829 let first_row_id: i64 = FrankenConnection::open(backup_db.to_string_lossy().into_owned())
20830 .unwrap()
20831 .query_row_map(
20832 "SELECT id FROM conversations WHERE source_path = ?1",
20833 fparams!["/tmp/one.jsonl"],
20834 |row| row.get_typed(0),
20835 )
20836 .unwrap();
20837 storage
20838 .record_historical_bundle_progress(&bundle, "direct-readonly", first_row_id, 50, 99)
20839 .unwrap();
20840
20841 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
20842 assert_eq!(outcome.bundles_imported, 1);
20843 assert_eq!(outcome.conversations_imported, 52);
20844 assert_eq!(outcome.messages_imported, 101);
20845 assert_eq!(storage.list_conversations(10, 0).unwrap().len(), 3);
20846
20847 let progress_key = SqliteStorage::historical_bundle_progress_key(&bundle);
20848 let progress_left: Option<String> = storage
20849 .conn
20850 .query_row_map(
20851 "SELECT value FROM meta WHERE key = ?1",
20852 fparams![progress_key.as_str()],
20853 |row| row.get_typed(0),
20854 )
20855 .optional()
20856 .unwrap();
20857 assert!(
20858 progress_left.is_none(),
20859 "completed salvage should clear bundle progress"
20860 );
20861
20862 let second = storage.salvage_historical_databases(&canonical_db).unwrap();
20863 assert_eq!(second.bundles_imported, 0);
20864 assert_eq!(second.messages_imported, 0);
20865 }
20866
20867 #[test]
20868 fn salvage_historical_databases_skips_bundle_when_checkpoint_covers_backup() {
20869 use crate::model::types::{Conversation, Message, MessageRole};
20875 use std::path::PathBuf;
20876
20877 fn make_conv(source_path: &str, idx_seed: i64) -> Conversation {
20878 Conversation {
20879 id: None,
20880 agent_slug: "codex".into(),
20881 workspace: Some(PathBuf::from("/tmp/workspace")),
20882 external_id: Some(format!("conv-{idx_seed}")),
20883 title: Some(format!("Recovered {idx_seed}")),
20884 source_path: PathBuf::from(source_path),
20885 started_at: Some(1_700_000_000_000 + idx_seed),
20886 ended_at: Some(1_700_000_000_100 + idx_seed),
20887 approx_tokens: None,
20888 metadata_json: serde_json::Value::Null,
20889 messages: vec![Message {
20890 id: None,
20891 idx: 0,
20892 role: MessageRole::User,
20893 author: None,
20894 created_at: Some(1_700_000_000_000 + idx_seed),
20895 content: format!("message-{idx_seed}"),
20896 extra_json: serde_json::Value::Null,
20897 snippets: Vec::new(),
20898 }],
20899 source_id: LOCAL_SOURCE_ID.into(),
20900 origin_host: None,
20901 }
20902 }
20903
20904 let dir = TempDir::new().unwrap();
20905 let canonical_db = dir.path().join("agent_search.db");
20906 let backup_db = dir
20907 .path()
20908 .join("backups/agent_search.db.20260322T020200.bak");
20909 let storage = SqliteStorage::open(&canonical_db).unwrap();
20910 seed_historical_db_direct(
20911 &backup_db,
20912 &[
20913 make_conv("/tmp/one.jsonl", 1),
20914 make_conv("/tmp/two.jsonl", 2),
20915 make_conv("/tmp/three.jsonl", 3),
20916 ],
20917 );
20918
20919 let bundle = discover_historical_database_bundles(&canonical_db)
20920 .into_iter()
20921 .find(|bundle| bundle.root_path == backup_db)
20922 .unwrap();
20923
20924 let backup_max_id: i64 = FrankenConnection::open(backup_db.to_string_lossy().into_owned())
20926 .unwrap()
20927 .query_row_map(
20928 "SELECT COALESCE(MAX(id), 0) FROM conversations",
20929 fparams![],
20930 |row| row.get_typed(0),
20931 )
20932 .unwrap();
20933 assert!(backup_max_id > 0, "seeded backup should have conversations");
20934 storage
20935 .record_historical_bundle_progress(&bundle, "direct-readonly", backup_max_id, 3, 3)
20936 .unwrap();
20937
20938 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
20939 assert_eq!(
20940 outcome.bundles_imported, 0,
20941 "fully-checkpointed bundle must not be re-scanned"
20942 );
20943 assert_eq!(outcome.conversations_imported, 0);
20944 assert_eq!(outcome.messages_imported, 0);
20945 assert_eq!(
20946 storage.list_conversations(10, 0).unwrap().len(),
20947 0,
20948 "skip path must not import anything"
20949 );
20950 assert!(
20951 storage.historical_bundle_already_imported(&bundle).unwrap(),
20952 "skipped bundle must be ledgered as salvaged so future runs short-circuit"
20953 );
20954
20955 let progress_key = SqliteStorage::historical_bundle_progress_key(&bundle);
20956 let progress_left: Option<String> = storage
20957 .conn
20958 .query_row_map(
20959 "SELECT value FROM meta WHERE key = ?1",
20960 fparams![progress_key.as_str()],
20961 |row| row.get_typed(0),
20962 )
20963 .optional()
20964 .unwrap();
20965 assert!(
20966 progress_left.is_none(),
20967 "skip path must clear the bundle progress checkpoint"
20968 );
20969 }
20970
20971 #[test]
20972 fn list_conversations_for_lexical_rebuild_uses_stable_id_order() {
20973 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20974 use std::path::PathBuf;
20975
20976 let dir = TempDir::new().unwrap();
20977 let db_path = dir.path().join("agent_search.db");
20978 let storage = SqliteStorage::open(&db_path).unwrap();
20979 let agent = Agent {
20980 id: None,
20981 slug: "codex".into(),
20982 name: "Codex".into(),
20983 version: Some("0.2.3".into()),
20984 kind: AgentKind::Cli,
20985 };
20986 let agent_id = storage.ensure_agent(&agent).unwrap();
20987
20988 let make_conv = |source_path: &str, started_at: i64| Conversation {
20989 id: None,
20990 agent_slug: "codex".into(),
20991 workspace: Some(PathBuf::from("/tmp/workspace")),
20992 external_id: Some(source_path.to_string()),
20993 title: Some(source_path.to_string()),
20994 source_path: PathBuf::from(source_path),
20995 started_at: Some(started_at),
20996 ended_at: Some(started_at + 1),
20997 approx_tokens: None,
20998 metadata_json: serde_json::Value::Null,
20999 messages: vec![Message {
21000 id: None,
21001 idx: 0,
21002 role: MessageRole::User,
21003 author: None,
21004 created_at: Some(started_at),
21005 content: format!("message for {source_path}"),
21006 extra_json: serde_json::Value::Null,
21007 snippets: Vec::new(),
21008 }],
21009 source_id: LOCAL_SOURCE_ID.into(),
21010 origin_host: None,
21011 };
21012
21013 let conv_a = make_conv("/tmp/a.jsonl", 3_000);
21014 let conv_b = make_conv("/tmp/b.jsonl", 1_000);
21015 let conv_c = make_conv("/tmp/c.jsonl", 2_000);
21016
21017 storage
21018 .insert_conversation_tree(agent_id, None, &conv_a)
21019 .unwrap();
21020 storage
21021 .insert_conversation_tree(agent_id, None, &conv_b)
21022 .unwrap();
21023 storage
21024 .insert_conversation_tree(agent_id, None, &conv_c)
21025 .unwrap();
21026
21027 let user_order: Vec<PathBuf> = storage
21028 .list_conversations(10, 0)
21029 .unwrap()
21030 .into_iter()
21031 .map(|conv| conv.source_path)
21032 .collect();
21033 assert_eq!(
21034 user_order,
21035 vec![
21036 PathBuf::from("/tmp/a.jsonl"),
21037 PathBuf::from("/tmp/c.jsonl"),
21038 PathBuf::from("/tmp/b.jsonl"),
21039 ]
21040 );
21041
21042 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
21043 let rebuild_order: Vec<PathBuf> = storage
21044 .list_conversations_for_lexical_rebuild_after_id(10, 0, &agent_slugs, &workspace_paths)
21045 .unwrap()
21046 .into_iter()
21047 .map(|conv| conv.source_path)
21048 .collect();
21049 assert_eq!(
21050 rebuild_order,
21051 vec![
21052 PathBuf::from("/tmp/a.jsonl"),
21053 PathBuf::from("/tmp/b.jsonl"),
21054 PathBuf::from("/tmp/c.jsonl"),
21055 ]
21056 );
21057
21058 let first_page = storage
21059 .list_conversations_for_lexical_rebuild_after_id(2, 0, &agent_slugs, &workspace_paths)
21060 .unwrap();
21061 let first_page_paths: Vec<PathBuf> = first_page
21062 .iter()
21063 .map(|conv| conv.source_path.clone())
21064 .collect();
21065 assert_eq!(
21066 first_page_paths,
21067 vec![PathBuf::from("/tmp/a.jsonl"), PathBuf::from("/tmp/b.jsonl")]
21068 );
21069
21070 let second_page = storage
21071 .list_conversations_for_lexical_rebuild_after_id(
21072 2,
21073 first_page
21074 .last()
21075 .and_then(|conv| conv.id)
21076 .expect("first page should include an id"),
21077 &agent_slugs,
21078 &workspace_paths,
21079 )
21080 .unwrap();
21081 let second_page_paths: Vec<PathBuf> = second_page
21082 .iter()
21083 .map(|conv| conv.source_path.clone())
21084 .collect();
21085 assert_eq!(second_page_paths, vec![PathBuf::from("/tmp/c.jsonl")]);
21086
21087 let bounded_page = storage
21088 .list_conversations_for_lexical_rebuild_after_id_through_id(
21089 10,
21090 0,
21091 first_page
21092 .last()
21093 .and_then(|conv| conv.id)
21094 .expect("first page should include an id"),
21095 &agent_slugs,
21096 &workspace_paths,
21097 )
21098 .unwrap();
21099 let bounded_paths: Vec<PathBuf> = bounded_page
21100 .iter()
21101 .map(|conv| conv.source_path.clone())
21102 .collect();
21103 assert_eq!(
21104 bounded_paths,
21105 vec![PathBuf::from("/tmp/a.jsonl"), PathBuf::from("/tmp/b.jsonl")]
21106 );
21107 }
21108
21109 #[test]
21110 fn keyset_traversal_handles_sparse_holey_conversation_ids() {
21111 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21112 use std::path::PathBuf;
21113
21114 let dir = TempDir::new().unwrap();
21115 let db_path = dir.path().join("agent_search.db");
21116 let storage = SqliteStorage::open(&db_path).unwrap();
21117 let agent = Agent {
21118 id: None,
21119 slug: "codex".into(),
21120 name: "Codex".into(),
21121 version: Some("0.2.3".into()),
21122 kind: AgentKind::Cli,
21123 };
21124 let agent_id = storage.ensure_agent(&agent).unwrap();
21125
21126 let make_conv = |label: &str, ts: i64| Conversation {
21127 id: None,
21128 agent_slug: "codex".into(),
21129 workspace: Some(PathBuf::from("/tmp/workspace")),
21130 external_id: Some(label.to_string()),
21131 title: Some(label.to_string()),
21132 source_path: PathBuf::from(format!("/tmp/{label}.jsonl")),
21133 started_at: Some(ts),
21134 ended_at: Some(ts + 1),
21135 approx_tokens: None,
21136 metadata_json: serde_json::Value::Null,
21137 messages: vec![Message {
21138 id: None,
21139 idx: 0,
21140 role: MessageRole::User,
21141 author: None,
21142 created_at: Some(ts),
21143 content: format!("msg for {label}"),
21144 extra_json: serde_json::Value::Null,
21145 snippets: Vec::new(),
21146 }],
21147 source_id: LOCAL_SOURCE_ID.into(),
21148 origin_host: None,
21149 };
21150
21151 for i in 0..6 {
21152 storage
21153 .insert_conversation_tree(
21154 agent_id,
21155 None,
21156 &make_conv(&format!("conv-{i}"), 1000 + i),
21157 )
21158 .unwrap();
21159 }
21160
21161 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
21162 storage
21163 .conn
21164 .execute_compat("DELETE FROM conversations WHERE id IN (2, 4)", fparams![])
21165 .unwrap();
21166 storage
21167 .conn
21168 .execute_compat(
21169 "DELETE FROM messages WHERE conversation_id IN (2, 4)",
21170 fparams![],
21171 )
21172 .unwrap();
21173 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
21174
21175 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
21176
21177 let page1 = storage
21178 .list_conversations_for_lexical_rebuild_after_id(2, 0, &agent_slugs, &workspace_paths)
21179 .unwrap();
21180 assert_eq!(page1.len(), 2);
21181 let page1_ids: Vec<i64> = page1.iter().map(|c| c.id.unwrap()).collect();
21182 assert_eq!(page1_ids, vec![1, 3]);
21183
21184 let page2 = storage
21185 .list_conversations_for_lexical_rebuild_after_id(
21186 2,
21187 *page1_ids.last().unwrap(),
21188 &agent_slugs,
21189 &workspace_paths,
21190 )
21191 .unwrap();
21192 assert_eq!(page2.len(), 2);
21193 let page2_ids: Vec<i64> = page2.iter().map(|c| c.id.unwrap()).collect();
21194 assert_eq!(page2_ids, vec![5, 6]);
21195
21196 let page3 = storage
21197 .list_conversations_for_lexical_rebuild_after_id(
21198 2,
21199 *page2_ids.last().unwrap(),
21200 &agent_slugs,
21201 &workspace_paths,
21202 )
21203 .unwrap();
21204 assert!(page3.is_empty());
21205
21206 let all_ids: Vec<i64> = page1_ids.iter().chain(page2_ids.iter()).copied().collect();
21207 assert_eq!(all_ids, vec![1, 3, 5, 6]);
21208 }
21209
21210 #[test]
21211 fn keyset_traversal_through_id_with_sparse_ranges() {
21212 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21213 use std::path::PathBuf;
21214
21215 let dir = TempDir::new().unwrap();
21216 let db_path = dir.path().join("agent_search.db");
21217 let storage = SqliteStorage::open(&db_path).unwrap();
21218 let agent = Agent {
21219 id: None,
21220 slug: "codex".into(),
21221 name: "Codex".into(),
21222 version: Some("0.2.3".into()),
21223 kind: AgentKind::Cli,
21224 };
21225 let agent_id = storage.ensure_agent(&agent).unwrap();
21226
21227 let make_conv = |label: &str, ts: i64| Conversation {
21228 id: None,
21229 agent_slug: "codex".into(),
21230 workspace: Some(PathBuf::from("/tmp/workspace")),
21231 external_id: Some(label.to_string()),
21232 title: Some(label.to_string()),
21233 source_path: PathBuf::from(format!("/tmp/{label}.jsonl")),
21234 started_at: Some(ts),
21235 ended_at: Some(ts + 1),
21236 approx_tokens: None,
21237 metadata_json: serde_json::Value::Null,
21238 messages: vec![Message {
21239 id: None,
21240 idx: 0,
21241 role: MessageRole::User,
21242 author: None,
21243 created_at: Some(ts),
21244 content: format!("msg for {label}"),
21245 extra_json: serde_json::Value::Null,
21246 snippets: Vec::new(),
21247 }],
21248 source_id: LOCAL_SOURCE_ID.into(),
21249 origin_host: None,
21250 };
21251
21252 for i in 0..10 {
21253 storage
21254 .insert_conversation_tree(
21255 agent_id,
21256 None,
21257 &make_conv(&format!("conv-{i}"), 1000 + i),
21258 )
21259 .unwrap();
21260 }
21261
21262 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
21263 storage
21264 .conn
21265 .execute_compat(
21266 "DELETE FROM conversations WHERE id IN (3, 5, 7, 8)",
21267 fparams![],
21268 )
21269 .unwrap();
21270 storage
21271 .conn
21272 .execute_compat(
21273 "DELETE FROM messages WHERE conversation_id IN (3, 5, 7, 8)",
21274 fparams![],
21275 )
21276 .unwrap();
21277 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
21278
21279 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
21280
21281 let through_5 = storage
21282 .list_conversations_for_lexical_rebuild_after_id_through_id(
21283 100,
21284 0,
21285 5,
21286 &agent_slugs,
21287 &workspace_paths,
21288 )
21289 .unwrap();
21290 let through_5_ids: Vec<i64> = through_5.iter().map(|c| c.id.unwrap()).collect();
21291 assert_eq!(through_5_ids, vec![1, 2, 4]);
21292
21293 let after_4_through_10 = storage
21294 .list_conversations_for_lexical_rebuild_after_id_through_id(
21295 100,
21296 4,
21297 10,
21298 &agent_slugs,
21299 &workspace_paths,
21300 )
21301 .unwrap();
21302 let ids: Vec<i64> = after_4_through_10.iter().map(|c| c.id.unwrap()).collect();
21303 assert_eq!(ids, vec![6, 9, 10]);
21304
21305 let after_10 = storage
21306 .list_conversations_for_lexical_rebuild_after_id_through_id(
21307 100,
21308 10,
21309 20,
21310 &agent_slugs,
21311 &workspace_paths,
21312 )
21313 .unwrap();
21314 assert!(after_10.is_empty());
21315 }
21316
21317 #[test]
21318 fn list_conversation_footprints_for_lexical_rebuild_estimates_bytes_and_keeps_empty_conversations()
21319 {
21320 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21321 use std::path::PathBuf;
21322
21323 let dir = TempDir::new().unwrap();
21324 let db_path = dir.path().join("agent_search.db");
21325 let storage = SqliteStorage::open(&db_path).unwrap();
21326 let agent = Agent {
21327 id: None,
21328 slug: "codex".into(),
21329 name: "Codex".into(),
21330 version: Some("0.2.3".into()),
21331 kind: AgentKind::Cli,
21332 };
21333 let agent_id = storage.ensure_agent(&agent).unwrap();
21334
21335 let insert = |external_id: &str, base_ts: i64, messages: Vec<Message>| {
21336 storage
21337 .insert_conversation_tree(
21338 agent_id,
21339 None,
21340 &Conversation {
21341 id: None,
21342 agent_slug: "codex".into(),
21343 workspace: Some(PathBuf::from("/tmp/workspace")),
21344 external_id: Some(external_id.to_string()),
21345 title: Some(external_id.to_string()),
21346 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
21347 started_at: Some(base_ts),
21348 ended_at: Some(base_ts + 100),
21349 approx_tokens: None,
21350 metadata_json: serde_json::Value::Null,
21351 messages,
21352 source_id: LOCAL_SOURCE_ID.into(),
21353 origin_host: None,
21354 },
21355 )
21356 .unwrap()
21357 .conversation_id
21358 };
21359
21360 let ascii_id = insert(
21361 "footprint-ascii",
21362 1_700_000_000_000,
21363 vec![
21364 Message {
21365 id: None,
21366 idx: 0,
21367 role: MessageRole::User,
21368 author: None,
21369 created_at: Some(1_700_000_000_001),
21370 content: "abc".into(),
21371 extra_json: serde_json::Value::Null,
21372 snippets: Vec::new(),
21373 },
21374 Message {
21375 id: None,
21376 idx: 1,
21377 role: MessageRole::Agent,
21378 author: None,
21379 created_at: Some(1_700_000_000_002),
21380 content: "defg".into(),
21381 extra_json: serde_json::Value::Null,
21382 snippets: Vec::new(),
21383 },
21384 ],
21385 );
21386 let empty_id = insert("footprint-empty", 1_700_000_001_000, Vec::new());
21387 let utf8_id = insert(
21388 "footprint-utf8",
21389 1_700_000_002_000,
21390 vec![Message {
21391 id: None,
21392 idx: 0,
21393 role: MessageRole::Tool,
21394 author: None,
21395 created_at: Some(1_700_000_002_001),
21396 content: "hé🙂".into(),
21397 extra_json: serde_json::Value::Null,
21398 snippets: Vec::new(),
21399 }],
21400 );
21401 let sparse_id = insert(
21402 "footprint-sparse",
21403 1_700_000_003_000,
21404 vec![Message {
21405 id: None,
21406 idx: 10,
21407 role: MessageRole::User,
21408 author: None,
21409 created_at: Some(1_700_000_003_010),
21410 content: "sparse".into(),
21411 extra_json: serde_json::Value::Null,
21412 snippets: Vec::new(),
21413 }],
21414 );
21415 storage
21416 .conn
21417 .execute_compat(
21418 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
21419 fparams![utf8_id],
21420 )
21421 .unwrap();
21422
21423 let footprints = storage
21424 .list_conversation_footprints_for_lexical_rebuild()
21425 .unwrap();
21426 assert_eq!(
21427 footprints,
21428 vec![
21429 LexicalRebuildConversationFootprintRow {
21430 conversation_id: ascii_id,
21431 message_count: 2,
21432 message_bytes: 2 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
21433 },
21434 LexicalRebuildConversationFootprintRow {
21435 conversation_id: empty_id,
21436 message_count: 0,
21437 message_bytes: 0,
21438 },
21439 LexicalRebuildConversationFootprintRow {
21440 conversation_id: utf8_id,
21441 message_count: 1,
21442 message_bytes: LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
21443 },
21444 LexicalRebuildConversationFootprintRow {
21445 conversation_id: sparse_id,
21446 message_count: 11,
21447 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
21448 },
21449 ]
21450 );
21451 }
21452
21453 #[test]
21454 fn list_conversation_footprints_for_lexical_rebuild_falls_back_for_missing_tail_cache() {
21455 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21456 use std::path::PathBuf;
21457
21458 let dir = TempDir::new().unwrap();
21459 let db_path = dir.path().join("agent_search.db");
21460 let storage = SqliteStorage::open(&db_path).unwrap();
21461 let agent = Agent {
21462 id: None,
21463 slug: "codex".into(),
21464 name: "Codex".into(),
21465 version: Some("0.2.3".into()),
21466 kind: AgentKind::Cli,
21467 };
21468 let agent_id = storage.ensure_agent(&agent).unwrap();
21469 let conversation_id = storage
21470 .insert_conversation_tree(
21471 agent_id,
21472 None,
21473 &Conversation {
21474 id: None,
21475 agent_slug: "codex".into(),
21476 workspace: Some(PathBuf::from("/tmp/workspace")),
21477 external_id: Some("footprint-missing-tail".to_string()),
21478 title: Some("footprint-missing-tail".to_string()),
21479 source_path: PathBuf::from("/tmp/footprint-missing-tail.jsonl"),
21480 started_at: Some(1_700_000_000_000),
21481 ended_at: Some(1_700_000_000_100),
21482 approx_tokens: None,
21483 metadata_json: serde_json::Value::Null,
21484 messages: vec![Message {
21485 id: None,
21486 idx: 10,
21487 role: MessageRole::User,
21488 author: None,
21489 created_at: Some(1_700_000_000_010),
21490 content: "legacy sparse tail".into(),
21491 extra_json: serde_json::Value::Null,
21492 snippets: Vec::new(),
21493 }],
21494 source_id: LOCAL_SOURCE_ID.into(),
21495 origin_host: None,
21496 },
21497 )
21498 .unwrap()
21499 .conversation_id;
21500
21501 storage
21502 .conn
21503 .execute_compat(
21504 "UPDATE conversations
21505 SET last_message_idx = NULL, last_message_created_at = NULL
21506 WHERE id = ?1",
21507 fparams![conversation_id],
21508 )
21509 .unwrap();
21510 storage
21511 .conn
21512 .execute_compat(
21513 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
21514 fparams![conversation_id],
21515 )
21516 .unwrap();
21517
21518 let footprints = storage
21519 .list_conversation_footprints_for_lexical_rebuild()
21520 .unwrap();
21521
21522 assert_eq!(
21523 footprints,
21524 vec![LexicalRebuildConversationFootprintRow {
21525 conversation_id,
21526 message_count: 11,
21527 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
21528 }],
21529 "missing tail-cache metadata should fall back to messages MAX(idx) instead of treating legacy conversations as empty"
21530 );
21531 }
21532
21533 #[test]
21534 fn list_conversation_footprints_for_lexical_rebuild_raises_stale_low_tail_cache() {
21535 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21536 use std::path::PathBuf;
21537
21538 let dir = TempDir::new().unwrap();
21539 let db_path = dir.path().join("agent_search.db");
21540 let storage = SqliteStorage::open(&db_path).unwrap();
21541 let agent = Agent {
21542 id: None,
21543 slug: "codex".into(),
21544 name: "Codex".into(),
21545 version: Some("0.2.3".into()),
21546 kind: AgentKind::Cli,
21547 };
21548 let agent_id = storage.ensure_agent(&agent).unwrap();
21549 let conversation_id = storage
21550 .insert_conversation_tree(
21551 agent_id,
21552 None,
21553 &Conversation {
21554 id: None,
21555 agent_slug: "codex".into(),
21556 workspace: Some(PathBuf::from("/tmp/workspace")),
21557 external_id: Some("footprint-stale-tail".to_string()),
21558 title: Some("footprint-stale-tail".to_string()),
21559 source_path: PathBuf::from("/tmp/footprint-stale-tail.jsonl"),
21560 started_at: Some(1_700_000_000_000),
21561 ended_at: Some(1_700_000_000_100),
21562 approx_tokens: None,
21563 metadata_json: serde_json::Value::Null,
21564 messages: (0..3)
21565 .map(|idx| Message {
21566 id: None,
21567 idx,
21568 role: MessageRole::User,
21569 author: None,
21570 created_at: Some(1_700_000_000_010 + idx),
21571 content: format!("message {idx}"),
21572 extra_json: serde_json::Value::Null,
21573 snippets: Vec::new(),
21574 })
21575 .collect(),
21576 source_id: LOCAL_SOURCE_ID.into(),
21577 origin_host: None,
21578 },
21579 )
21580 .unwrap()
21581 .conversation_id;
21582
21583 storage
21584 .conn
21585 .execute_compat(
21586 "UPDATE conversations
21587 SET last_message_idx = 0, last_message_created_at = 1700000000010
21588 WHERE id = ?1",
21589 fparams![conversation_id],
21590 )
21591 .unwrap();
21592 storage
21593 .conn
21594 .execute_compat(
21595 "UPDATE conversation_tail_state
21596 SET last_message_idx = 0, last_message_created_at = 1700000000010
21597 WHERE conversation_id = ?1",
21598 fparams![conversation_id],
21599 )
21600 .unwrap();
21601
21602 let footprints = storage
21603 .list_conversation_footprints_for_lexical_rebuild()
21604 .unwrap();
21605
21606 assert_eq!(
21607 footprints,
21608 vec![LexicalRebuildConversationFootprintRow {
21609 conversation_id,
21610 message_count: 3,
21611 message_bytes: 3 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
21612 }],
21613 "stale-low tail caches must not under-plan lexical shards and trip doc>plan invariants"
21614 );
21615 }
21616
21617 #[test]
21618 fn list_conversation_footprints_for_lexical_rebuild_tolerates_missing_tail_state_table() {
21619 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21620 use std::path::PathBuf;
21621
21622 let dir = TempDir::new().unwrap();
21623 let db_path = dir.path().join("agent_search.db");
21624 let storage = SqliteStorage::open(&db_path).unwrap();
21625 let agent = Agent {
21626 id: None,
21627 slug: "codex".into(),
21628 name: "Codex".into(),
21629 version: Some("0.2.3".into()),
21630 kind: AgentKind::Cli,
21631 };
21632 let agent_id = storage.ensure_agent(&agent).unwrap();
21633 let conversation_id = storage
21634 .insert_conversation_tree(
21635 agent_id,
21636 None,
21637 &Conversation {
21638 id: None,
21639 agent_slug: "codex".into(),
21640 workspace: Some(PathBuf::from("/tmp/workspace")),
21641 external_id: Some("footprint-missing-tail-table".to_string()),
21642 title: Some("footprint-missing-tail-table".to_string()),
21643 source_path: PathBuf::from("/tmp/footprint-missing-tail-table.jsonl"),
21644 started_at: Some(1_700_000_000_000),
21645 ended_at: Some(1_700_000_000_100),
21646 approx_tokens: None,
21647 metadata_json: serde_json::Value::Null,
21648 messages: vec![Message {
21649 id: None,
21650 idx: 10,
21651 role: MessageRole::User,
21652 author: None,
21653 created_at: Some(1_700_000_000_010),
21654 content: "legacy sparse tail without hot table".into(),
21655 extra_json: serde_json::Value::Null,
21656 snippets: Vec::new(),
21657 }],
21658 source_id: LOCAL_SOURCE_ID.into(),
21659 origin_host: None,
21660 },
21661 )
21662 .unwrap()
21663 .conversation_id;
21664
21665 storage
21666 .conn
21667 .execute_compat(
21668 "UPDATE conversations
21669 SET last_message_idx = NULL, last_message_created_at = NULL
21670 WHERE id = ?1",
21671 fparams![conversation_id],
21672 )
21673 .unwrap();
21674 storage
21675 .conn
21676 .execute_compat("DROP TABLE conversation_tail_state", fparams![])
21677 .unwrap();
21678
21679 let footprints = storage
21680 .list_conversation_footprints_for_lexical_rebuild()
21681 .unwrap();
21682
21683 assert_eq!(
21684 footprints,
21685 vec![LexicalRebuildConversationFootprintRow {
21686 conversation_id,
21687 message_count: 11,
21688 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
21689 }],
21690 "read-only lexical self-heal must tolerate pre-tail-cache databases and use messages MAX(idx)"
21691 );
21692 }
21693
21694 #[test]
21695 fn list_conversation_footprints_for_lexical_rebuild_tolerates_legacy_search_demo_fixture() {
21696 let fixture_db = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
21697 .join("tests")
21698 .join("fixtures")
21699 .join("search_demo_data")
21700 .join("agent_search.db");
21701 let storage = FrankenStorage::open_readonly(&fixture_db).unwrap();
21702
21703 let footprints = storage
21704 .list_conversation_footprints_for_lexical_rebuild()
21705 .unwrap();
21706
21707 assert!(
21708 !footprints.is_empty(),
21709 "search self-heal should be able to plan a lexical rebuild from the legacy search demo fixture"
21710 );
21711 assert!(
21712 footprints
21713 .iter()
21714 .all(|footprint| footprint.message_count > 0),
21715 "legacy fixture conversations should derive message counts from messages when tail caches are absent"
21716 );
21717 }
21718
21719 #[test]
21720 fn lexical_rebuild_listing_normalizes_host_only_remote_source_from_blank_source_id() {
21721 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21722 use std::path::PathBuf;
21723
21724 let dir = TempDir::new().unwrap();
21725 let db_path = dir.path().join("agent_search.db");
21726 let storage = SqliteStorage::open(&db_path).unwrap();
21727 let agent = Agent {
21728 id: None,
21729 slug: "codex".into(),
21730 name: "Codex".into(),
21731 version: Some("0.2.3".into()),
21732 kind: AgentKind::Cli,
21733 };
21734 let agent_id = storage.ensure_agent(&agent).unwrap();
21735 let conversation = Conversation {
21736 id: None,
21737 agent_slug: "codex".into(),
21738 workspace: Some(PathBuf::from("/tmp/workspace")),
21739 external_id: Some("legacy-blank-source".into()),
21740 title: Some("Legacy blank source".into()),
21741 source_path: PathBuf::from("/tmp/legacy-blank-source.jsonl"),
21742 started_at: Some(1_700_000_000_000),
21743 ended_at: Some(1_700_000_000_100),
21744 approx_tokens: None,
21745 metadata_json: serde_json::Value::Null,
21746 messages: vec![Message {
21747 id: None,
21748 idx: 0,
21749 role: MessageRole::User,
21750 author: None,
21751 created_at: Some(1_700_000_000_000),
21752 content: "hello".into(),
21753 extra_json: serde_json::Value::Null,
21754 snippets: Vec::new(),
21755 }],
21756 source_id: LOCAL_SOURCE_ID.into(),
21757 origin_host: None,
21758 };
21759
21760 let conversation_id = storage
21761 .insert_conversation_tree(agent_id, None, &conversation)
21762 .unwrap()
21763 .conversation_id;
21764 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
21765 storage
21766 .conn
21767 .execute_compat(
21768 "UPDATE conversations SET source_id = ?1, origin_host = ?2 WHERE id = ?3",
21769 fparams![" ", "dev@laptop", conversation_id],
21770 )
21771 .unwrap();
21772 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
21773
21774 let listed = storage.list_conversations(10, 0).unwrap();
21775 assert_eq!(listed.len(), 1);
21776 assert_eq!(listed[0].source_id, "dev@laptop");
21777 assert_eq!(listed[0].origin_host.as_deref(), Some("dev@laptop"));
21778
21779 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
21780 let rebuild_listed = storage
21781 .list_conversations_for_lexical_rebuild_after_id(10, 0, &agent_slugs, &workspace_paths)
21782 .unwrap();
21783 assert_eq!(rebuild_listed.len(), 1);
21784 assert_eq!(rebuild_listed[0].source_id, "dev@laptop");
21785 assert_eq!(rebuild_listed[0].origin_host.as_deref(), Some("dev@laptop"));
21786 }
21787
21788 #[test]
21789 fn seed_canonical_from_best_historical_bundle_copies_data_and_resets_runtime_meta() {
21790 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21791 use std::path::PathBuf;
21792
21793 let dir = TempDir::new().unwrap();
21794 let canonical_db = dir.path().join("agent_search.db");
21795 let source_db = dir
21796 .path()
21797 .join("backups/agent_search.db.20260322T020200.bak");
21798
21799 fs::create_dir_all(source_db.parent().unwrap()).unwrap();
21800
21801 let source = SqliteStorage::open(&source_db).unwrap();
21802 let agent = Agent {
21803 id: None,
21804 slug: "codex".into(),
21805 name: "Codex".into(),
21806 version: Some("0.2.3".into()),
21807 kind: AgentKind::Cli,
21808 };
21809 let agent_id = source.ensure_agent(&agent).unwrap();
21810 let conversation = Conversation {
21811 id: None,
21812 agent_slug: "codex".into(),
21813 workspace: Some(PathBuf::from("/tmp/workspace")),
21814 external_id: Some("seed-conv".into()),
21815 title: Some("Historical seed".into()),
21816 source_path: PathBuf::from("/tmp/historical-seed.jsonl"),
21817 started_at: Some(1_700_000_000_000),
21818 ended_at: Some(1_700_000_000_100),
21819 approx_tokens: Some(42),
21820 metadata_json: serde_json::json!({"seed": true}),
21821 messages: vec![Message {
21822 id: None,
21823 idx: 0,
21824 role: MessageRole::Agent,
21825 author: Some("assistant".into()),
21826 created_at: Some(1_700_000_000_050),
21827 content: "seeded message".into(),
21828 extra_json: serde_json::json!({"usage": {"total_tokens": 12}}),
21829 snippets: Vec::new(),
21830 }],
21831 source_id: LOCAL_SOURCE_ID.into(),
21832 origin_host: None,
21833 };
21834 source
21835 .insert_conversation_tree(agent_id, None, &conversation)
21836 .unwrap();
21837 source.set_last_scan_ts(123).unwrap();
21838 source.set_last_indexed_at(456).unwrap();
21839 source.set_last_embedded_message_id(789).unwrap();
21840 source
21841 .conn
21842 .execute_compat(
21843 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
21844 fparams!["historical_bundle_salvaged:stale", "{\"stale\":true}"],
21845 )
21846 .unwrap();
21847 drop(source);
21848
21849 #[cfg(not(windows))]
21850 {
21851 let legacy_v13_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, content='', tokenize='porter')";
21862 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
21863 let legacy = rusqlite_test_fixture_conn(&source_db);
21864 legacy
21865 .execute_batch(
21866 "UPDATE meta SET value = '13' WHERE key = 'schema_version';
21867 DELETE FROM _schema_migrations WHERE version = 14;
21868 PRAGMA writable_schema = ON;",
21869 )
21870 .unwrap();
21871 legacy
21872 .execute(
21873 "DELETE FROM meta WHERE key = ?1",
21874 [FTS_FRANKEN_REBUILD_META_KEY],
21875 )
21876 .unwrap();
21877 legacy
21879 .execute(
21880 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
21881 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
21882 [legacy_v13_fts_sql],
21883 )
21884 .unwrap();
21885 legacy
21887 .execute(
21888 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
21889 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
21890 [duplicate_legacy_fts_sql],
21891 )
21892 .unwrap();
21893 legacy
21894 .execute_batch("PRAGMA writable_schema = OFF;")
21895 .unwrap();
21896 drop(legacy);
21897
21898 {
21901 let verify = rusqlite_test_fixture_conn(&source_db);
21902 verify
21903 .execute_batch("PRAGMA writable_schema = ON;")
21904 .unwrap();
21905 let fts_entries: i64 = verify
21906 .query_row(
21907 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
21908 [],
21909 |row| row.get(0),
21910 )
21911 .unwrap();
21912 assert_eq!(
21913 fts_entries, 2,
21914 "test fixture should reproduce the duplicate legacy fts_messages rows"
21915 );
21916 let msg_count: i64 = verify
21917 .query_row("SELECT COUNT(*) FROM messages", [], |row| row.get(0))
21918 .unwrap();
21919 assert_eq!(msg_count, 1);
21920 }
21921 }
21922
21923 let fresh = SqliteStorage::open(&canonical_db).unwrap();
21924 drop(fresh);
21925
21926 let outcome = seed_canonical_from_best_historical_bundle(&canonical_db)
21927 .unwrap()
21928 .unwrap();
21929 assert_eq!(outcome.bundles_imported, 1);
21930 assert_eq!(outcome.conversations_imported, 1);
21931 assert_eq!(outcome.messages_imported, 1);
21932
21933 let readonly = open_franken_with_flags(
21934 &canonical_db.to_string_lossy(),
21935 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
21936 )
21937 .unwrap();
21938 let readonly_message_count: i64 = readonly
21939 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
21940 row.get_typed(0)
21941 })
21942 .unwrap();
21943 assert_eq!(readonly_message_count, 1);
21944
21945 let seeded = SqliteStorage::open(&canonical_db).unwrap();
21946 assert_eq!(
21947 seeded
21948 .count_sessions_in_range(None, None, None, None)
21949 .unwrap()
21950 .0,
21951 1
21952 );
21953 let message_count: i64 = seeded
21954 .conn
21955 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
21956 row.get_typed(0)
21957 })
21958 .unwrap();
21959 assert_eq!(message_count, 1);
21960 assert_eq!(seeded.get_last_scan_ts().unwrap(), None);
21961 assert_eq!(seeded.get_last_embedded_message_id().unwrap(), None);
21962
21963 let last_indexed: Option<String> = seeded
21964 .conn
21965 .query_row_map(
21966 "SELECT value FROM meta WHERE key = 'last_indexed_at'",
21967 fparams![],
21968 |row| row.get_typed(0),
21969 )
21970 .optional()
21971 .unwrap();
21972 assert!(last_indexed.is_none());
21973
21974 let salvage_keys: Vec<String> = seeded
21975 .conn
21976 .query_map_collect(
21977 "SELECT key FROM meta WHERE key LIKE 'historical_bundle_salvaged:%' ORDER BY key",
21978 fparams![],
21979 |row| row.get_typed(0),
21980 )
21981 .unwrap();
21982 assert_eq!(salvage_keys.len(), 1);
21983
21984 let reopened_readonly = open_franken_with_flags(
21985 &canonical_db.to_string_lossy(),
21986 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
21987 )
21988 .unwrap();
21989 let reopened_fts_entries: i64 = reopened_readonly
21990 .query_row_map(
21991 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
21992 fparams![],
21993 |row| row.get_typed(0),
21994 )
21995 .unwrap();
21996 assert_eq!(
21997 reopened_fts_entries, 1,
21998 "seeded canonical db should keep a single stock-SQLite fts_messages schema row"
21999 );
22000 let reopened_message_count: i64 = reopened_readonly
22001 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
22002 row.get_typed(0)
22003 })
22004 .unwrap();
22005 assert_eq!(reopened_message_count, 1);
22006
22007 let franken_seeded = FrankenStorage::open(&canonical_db).unwrap();
22008 assert_eq!(
22009 franken_seeded.schema_version().unwrap(),
22010 CURRENT_SCHEMA_VERSION
22011 );
22012 franken_seeded
22019 .ensure_search_fallback_fts_consistency()
22020 .expect("ensure FTS consistency after seed");
22021 let post_franken_schema_rows: i64 = franken_seeded
22022 .raw()
22023 .query_row_map(
22024 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
22025 fparams![],
22026 |row| row.get_typed(0),
22027 )
22028 .unwrap();
22029 assert_eq!(post_franken_schema_rows, 1);
22030 let fts_probe = franken_seeded
22031 .raw()
22032 .query("SELECT COUNT(*) FROM fts_messages");
22033 assert!(
22034 fts_probe.is_ok(),
22035 "expected post-seed FTS to be queryable, got {fts_probe:?}"
22036 );
22037 }
22038
22039 #[test]
22040 fn failed_baseline_seed_preserves_existing_canonical_bundle() {
22041 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22042 use std::path::PathBuf;
22043
22044 let dir = TempDir::new().unwrap();
22045 let canonical_db = dir.path().join("agent_search.db");
22046 let source_db = dir
22047 .path()
22048 .join("backups/agent_search.db.20260325T120000Z.bad-seed.bak");
22049
22050 fs::create_dir_all(source_db.parent().unwrap()).unwrap();
22051
22052 let canonical = SqliteStorage::open(&canonical_db).unwrap();
22053 canonical
22054 .conn
22055 .execute_compat(
22056 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
22057 fparams!["sentinel", "keep-me"],
22058 )
22059 .unwrap();
22060 drop(canonical);
22061
22062 let source = SqliteStorage::open(&source_db).unwrap();
22063 let agent = Agent {
22064 id: None,
22065 slug: "codex".into(),
22066 name: "Codex".into(),
22067 version: Some("0.2.3".into()),
22068 kind: AgentKind::Cli,
22069 };
22070 let agent_id = source.ensure_agent(&agent).unwrap();
22071 let conversation = Conversation {
22072 id: None,
22073 agent_slug: "codex".into(),
22074 workspace: Some(PathBuf::from("/tmp/workspace")),
22075 external_id: Some("bad-seed-conv".into()),
22076 title: Some("Bad seed".into()),
22077 source_path: PathBuf::from("/tmp/bad-seed.jsonl"),
22078 started_at: Some(1_700_000_000_000),
22079 ended_at: Some(1_700_000_000_100),
22080 approx_tokens: Some(42),
22081 metadata_json: serde_json::json!({"seed": "bad"}),
22082 messages: vec![Message {
22083 id: None,
22084 idx: 0,
22085 role: MessageRole::Agent,
22086 author: Some("assistant".into()),
22087 created_at: Some(1_700_000_000_050),
22088 content: "this seed should fail".into(),
22089 extra_json: serde_json::Value::Null,
22090 snippets: Vec::new(),
22091 }],
22092 source_id: LOCAL_SOURCE_ID.into(),
22093 origin_host: None,
22094 };
22095 source
22096 .insert_conversation_tree(agent_id, None, &conversation)
22097 .unwrap();
22098 drop(source);
22099
22100 let legacy = FrankenConnection::open(source_db.to_string_lossy().into_owned()).unwrap();
22101 legacy
22102 .execute("UPDATE meta SET value = '12' WHERE key = 'schema_version'")
22103 .unwrap();
22104 drop(legacy);
22105
22106 let err = seed_canonical_from_best_historical_bundle(&canonical_db).unwrap_err();
22107 assert!(
22108 err.to_string()
22109 .contains("schema_version 12 is too old for baseline import"),
22110 "unexpected seed error: {err:#}"
22111 );
22112
22113 let reopened = SqliteStorage::open(&canonical_db).unwrap();
22114 let sentinel: Option<String> = reopened
22115 .conn
22116 .query_row_map(
22117 "SELECT value FROM meta WHERE key = 'sentinel'",
22118 fparams![],
22119 |row| row.get_typed(0),
22120 )
22121 .optional()
22122 .unwrap();
22123 assert_eq!(sentinel.as_deref(), Some("keep-me"));
22124
22125 let conversation_count: i64 = reopened
22126 .conn
22127 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
22128 row.get_typed(0)
22129 })
22130 .unwrap();
22131 assert_eq!(conversation_count, 0);
22132
22133 let readonly = open_franken_with_flags(
22134 &canonical_db.to_string_lossy(),
22135 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
22136 )
22137 .unwrap();
22138 let readonly_conversation_count: i64 = readonly
22139 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
22140 row.get_typed(0)
22141 })
22142 .unwrap();
22143 assert_eq!(readonly_conversation_count, 0);
22144 }
22145
22146 #[test]
22147 fn fetch_messages_for_lexical_rebuild_skips_extra_json() {
22148 let dir = TempDir::new().unwrap();
22149 let db_path = dir.path().join("test.db");
22150 let storage = SqliteStorage::open(&db_path).unwrap();
22151
22152 let agent = Agent {
22153 id: None,
22154 slug: "codex".into(),
22155 name: "Codex".into(),
22156 version: Some("0.2.3".into()),
22157 kind: AgentKind::Cli,
22158 };
22159 let agent_id = storage.ensure_agent(&agent).unwrap();
22160
22161 let conversation = Conversation {
22162 id: None,
22163 agent_slug: "codex".into(),
22164 workspace: Some(PathBuf::from("/tmp/workspace")),
22165 external_id: Some("lexical-rebuild-test".into()),
22166 title: Some("Lexical rebuild".into()),
22167 source_path: PathBuf::from("/tmp/lexical-rebuild.jsonl"),
22168 started_at: Some(1_700_000_000_000),
22169 ended_at: Some(1_700_000_000_100),
22170 approx_tokens: Some(42),
22171 metadata_json: serde_json::Value::Null,
22172 messages: vec![Message {
22173 id: None,
22174 idx: 0,
22175 role: MessageRole::Agent,
22176 author: Some("assistant".into()),
22177 created_at: Some(1_700_000_000_050),
22178 content: "indexed text".into(),
22179 extra_json: serde_json::json!({
22180 "usage": { "total_tokens": 1234 },
22181 "irrelevant_blob": "still preserved in canonical storage"
22182 }),
22183 snippets: Vec::new(),
22184 }],
22185 source_id: LOCAL_SOURCE_ID.into(),
22186 origin_host: None,
22187 };
22188
22189 let inserted = storage
22190 .insert_conversation_tree(agent_id, None, &conversation)
22191 .unwrap();
22192 let conversation_id = inserted.conversation_id;
22193
22194 let stored = storage.fetch_messages(conversation_id).unwrap();
22195 assert_eq!(stored.len(), 1);
22196 assert!(!stored[0].extra_json.is_null());
22197
22198 let lexical = storage
22199 .fetch_messages_for_lexical_rebuild(conversation_id)
22200 .unwrap();
22201 assert_eq!(lexical.len(), 1);
22202 assert_eq!(lexical[0].content, "indexed text");
22203 assert_eq!(lexical[0].author.as_deref(), Some("assistant"));
22204 assert!(lexical[0].extra_json.is_null());
22205 }
22206
22207 #[test]
22208 fn fetch_messages_for_lexical_rebuild_batch_groups_and_orders_messages() {
22209 let dir = TempDir::new().unwrap();
22210 let db_path = dir.path().join("test.db");
22211 let storage = SqliteStorage::open(&db_path).unwrap();
22212
22213 let agent = Agent {
22214 id: None,
22215 slug: "codex".into(),
22216 name: "Codex".into(),
22217 version: Some("0.2.3".into()),
22218 kind: AgentKind::Cli,
22219 };
22220 let agent_id = storage.ensure_agent(&agent).unwrap();
22221
22222 let first = Conversation {
22223 id: None,
22224 agent_slug: "codex".into(),
22225 workspace: Some(PathBuf::from("/tmp/workspace")),
22226 external_id: Some("lexical-batch-1".into()),
22227 title: Some("Lexical batch 1".into()),
22228 source_path: PathBuf::from("/tmp/lexical-batch-1.jsonl"),
22229 started_at: Some(1_700_000_000_000),
22230 ended_at: Some(1_700_000_000_100),
22231 approx_tokens: Some(42),
22232 metadata_json: serde_json::Value::Null,
22233 messages: vec![
22234 Message {
22235 id: None,
22236 idx: 0,
22237 role: MessageRole::User,
22238 author: Some("user".into()),
22239 created_at: Some(1_700_000_000_010),
22240 content: "first-a".into(),
22241 extra_json: serde_json::json!({"opaque": true}),
22242 snippets: Vec::new(),
22243 },
22244 Message {
22245 id: None,
22246 idx: 1,
22247 role: MessageRole::Agent,
22248 author: Some("assistant".into()),
22249 created_at: Some(1_700_000_000_020),
22250 content: "first-b".into(),
22251 extra_json: serde_json::json!({"opaque": true}),
22252 snippets: Vec::new(),
22253 },
22254 ],
22255 source_id: LOCAL_SOURCE_ID.into(),
22256 origin_host: None,
22257 };
22258
22259 let second = Conversation {
22260 id: None,
22261 agent_slug: "codex".into(),
22262 workspace: Some(PathBuf::from("/tmp/workspace")),
22263 external_id: Some("lexical-batch-2".into()),
22264 title: Some("Lexical batch 2".into()),
22265 source_path: PathBuf::from("/tmp/lexical-batch-2.jsonl"),
22266 started_at: Some(1_700_000_000_200),
22267 ended_at: Some(1_700_000_000_300),
22268 approx_tokens: Some(84),
22269 metadata_json: serde_json::Value::Null,
22270 messages: vec![Message {
22271 id: None,
22272 idx: 0,
22273 role: MessageRole::Tool,
22274 author: Some("tool".into()),
22275 created_at: Some(1_700_000_000_210),
22276 content: "second-a".into(),
22277 extra_json: serde_json::json!({"opaque": true}),
22278 snippets: Vec::new(),
22279 }],
22280 source_id: LOCAL_SOURCE_ID.into(),
22281 origin_host: None,
22282 };
22283 let third = Conversation {
22284 external_id: Some("lexical-batch-3".into()),
22285 title: Some("Lexical batch 3".into()),
22286 source_path: PathBuf::from("/tmp/lexical-batch-3.jsonl"),
22287 messages: vec![Message {
22288 id: None,
22289 idx: 0,
22290 role: MessageRole::System,
22291 author: Some("system".into()),
22292 created_at: Some(1_700_000_000_410),
22293 content: "third-a".into(),
22294 extra_json: serde_json::json!({"opaque": true}),
22295 snippets: Vec::new(),
22296 }],
22297 ..second.clone()
22298 };
22299
22300 let first_id = storage
22301 .insert_conversation_tree(agent_id, None, &first)
22302 .unwrap()
22303 .conversation_id;
22304 let second_id = storage
22305 .insert_conversation_tree(agent_id, None, &second)
22306 .unwrap()
22307 .conversation_id;
22308 let third_id = storage
22309 .insert_conversation_tree(agent_id, None, &third)
22310 .unwrap()
22311 .conversation_id;
22312
22313 let lexical = storage
22314 .fetch_messages_for_lexical_rebuild_batch(&[third_id, first_id], None, None)
22315 .unwrap();
22316
22317 let first_messages = lexical.get(&first_id).expect("first conversation");
22318 assert_eq!(first_messages.len(), 2);
22319 assert_eq!(first_messages[0].content, "first-a");
22320 assert_eq!(first_messages[1].content, "first-b");
22321 assert!(
22322 first_messages
22323 .iter()
22324 .all(|message| message.extra_json.is_null())
22325 );
22326
22327 assert!(
22328 !lexical.contains_key(&second_id),
22329 "batch fetch must exclude conversations not requested by the caller"
22330 );
22331
22332 let third_messages = lexical.get(&third_id).expect("third conversation");
22333 assert_eq!(third_messages.len(), 1);
22334 assert_eq!(third_messages[0].content, "third-a");
22335 assert!(third_messages[0].extra_json.is_null());
22336 }
22337
22338 #[test]
22339 fn fetch_messages_for_lexical_rebuild_batch_enforces_content_byte_guardrail() {
22340 let dir = TempDir::new().unwrap();
22341 let db_path = dir.path().join("test.db");
22342 let storage = SqliteStorage::open(&db_path).unwrap();
22343
22344 let agent = Agent {
22345 id: None,
22346 slug: "codex".into(),
22347 name: "Codex".into(),
22348 version: Some("0.2.3".into()),
22349 kind: AgentKind::Cli,
22350 };
22351 let agent_id = storage.ensure_agent(&agent).unwrap();
22352
22353 let conversation = Conversation {
22354 id: None,
22355 agent_slug: "codex".into(),
22356 workspace: Some(PathBuf::from("/tmp/workspace")),
22357 external_id: Some("lexical-batch-guard".into()),
22358 title: Some("Lexical batch guard".into()),
22359 source_path: PathBuf::from("/tmp/lexical-batch-guard.jsonl"),
22360 started_at: Some(1_700_000_000_000),
22361 ended_at: Some(1_700_000_000_100),
22362 approx_tokens: Some(42),
22363 metadata_json: serde_json::Value::Null,
22364 messages: vec![
22365 Message {
22366 id: None,
22367 idx: 0,
22368 role: MessageRole::User,
22369 author: Some("user".into()),
22370 created_at: Some(1_700_000_000_010),
22371 content: "123456".into(),
22372 extra_json: serde_json::Value::Null,
22373 snippets: Vec::new(),
22374 },
22375 Message {
22376 id: None,
22377 idx: 1,
22378 role: MessageRole::Agent,
22379 author: Some("assistant".into()),
22380 created_at: Some(1_700_000_000_020),
22381 content: "abcdef".into(),
22382 extra_json: serde_json::Value::Null,
22383 snippets: Vec::new(),
22384 },
22385 ],
22386 source_id: LOCAL_SOURCE_ID.into(),
22387 origin_host: None,
22388 };
22389
22390 let conversation_id = storage
22391 .insert_conversation_tree(agent_id, None, &conversation)
22392 .unwrap()
22393 .conversation_id;
22394
22395 let error = storage
22396 .fetch_messages_for_lexical_rebuild_batch(&[conversation_id], Some(10), Some(8))
22397 .expect_err("guardrail should reject oversized batch content");
22398
22399 let message = format!("{error:#}");
22400 assert!(
22401 message.contains("content-byte guardrail"),
22402 "expected guardrail reason in error, got {message}"
22403 );
22404 }
22405
22406 #[test]
22407 fn fetch_messages_handles_manual_rows_inserted_via_raw_connection() {
22408 let dir = TempDir::new().unwrap();
22409 let db_path = dir.path().join("manual-rows.db");
22410 let storage = FrankenStorage::open(&db_path).unwrap();
22411 let conn = storage.raw();
22412
22413 conn.execute(
22414 "INSERT INTO agents (id, slug, name, kind, created_at, updated_at)
22415 VALUES (1, 'claude_code', 'Claude Code', 'local', 0, 0)",
22416 )
22417 .unwrap();
22418 conn.execute(
22419 "INSERT INTO conversations
22420 (id, agent_id, external_id, title, source_path, source_id, started_at)
22421 VALUES (1, 1, 'manual-ext', 'Manual Session', '/tmp/manual.jsonl', 'local', 200)",
22422 )
22423 .unwrap();
22424 conn.execute(
22425 "INSERT INTO messages
22426 (id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
22427 VALUES (1, 1, 0, 'user', 'tester', 1700000000000, 'manual body', '{\"k\":1}', NULL)",
22428 )
22429 .unwrap();
22430
22431 let lexical = storage.fetch_messages_for_lexical_rebuild(1).unwrap();
22432 assert_eq!(lexical.len(), 1);
22433 assert_eq!(lexical[0].content, "manual body");
22434
22435 let full = storage.fetch_messages(1).unwrap();
22436 assert_eq!(full.len(), 1);
22437 assert_eq!(full[0].content, "manual body");
22438 assert_eq!(full[0].author.as_deref(), Some("tester"));
22439 assert_eq!(full[0].extra_json, serde_json::json!({ "k": 1 }));
22440 }
22441
22442 #[test]
22443 fn lexical_rebuild_batch_messages_query_avoids_sorter_temp_btrees() {
22444 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22445 use std::path::PathBuf;
22446
22447 let dir = TempDir::new().unwrap();
22448 let db_path = dir.path().join("agent_search.db");
22449 let storage = SqliteStorage::open(&db_path).unwrap();
22450
22451 let agent = Agent {
22452 id: None,
22453 slug: "claude_code".into(),
22454 name: "Claude Code".into(),
22455 version: None,
22456 kind: AgentKind::Cli,
22457 };
22458 let agent_id = storage.ensure_agent(&agent).unwrap();
22459
22460 for (external_id, base_ts) in [
22461 ("conv-1", 1_700_000_000_000_i64),
22462 ("conv-2", 1_700_000_001_000_i64),
22463 ] {
22464 let conversation = Conversation {
22465 id: None,
22466 agent_slug: "claude_code".into(),
22467 workspace: Some(PathBuf::from("/tmp/workspace")),
22468 external_id: Some(external_id.to_string()),
22469 title: Some("Lexical rebuild".into()),
22470 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
22471 started_at: Some(base_ts),
22472 ended_at: Some(base_ts + 100),
22473 approx_tokens: None,
22474 metadata_json: serde_json::Value::Null,
22475 messages: vec![
22476 Message {
22477 id: None,
22478 idx: 0,
22479 role: MessageRole::User,
22480 author: Some("user".into()),
22481 created_at: Some(base_ts + 10),
22482 content: format!("{external_id}-first"),
22483 extra_json: serde_json::Value::Null,
22484 snippets: Vec::new(),
22485 },
22486 Message {
22487 id: None,
22488 idx: 1,
22489 role: MessageRole::Agent,
22490 author: Some("assistant".into()),
22491 created_at: Some(base_ts + 20),
22492 content: format!("{external_id}-second"),
22493 extra_json: serde_json::Value::Null,
22494 snippets: Vec::new(),
22495 },
22496 ],
22497 source_id: LOCAL_SOURCE_ID.into(),
22498 origin_host: None,
22499 };
22500 storage
22501 .insert_conversation_tree(agent_id, None, &conversation)
22502 .unwrap();
22503 }
22504
22505 let conversation_ids: Vec<i64> = storage
22506 .conn
22507 .query_map_collect(
22508 "SELECT id FROM conversations ORDER BY id",
22509 fparams![],
22510 |row| row.get_typed(0),
22511 )
22512 .unwrap();
22513 assert_eq!(conversation_ids.len(), 2);
22514
22515 let plan_details: Vec<String> = storage
22516 .conn
22517 .query_map_collect(
22518 "EXPLAIN QUERY PLAN \
22519 SELECT conversation_id, id, idx, role, author, created_at, content \
22520 FROM messages \
22521 WHERE conversation_id IN (?1, ?2) \
22522 ORDER BY conversation_id ASC, idx ASC",
22523 fparams![conversation_ids[0], conversation_ids[1]],
22524 |row| row.get_typed(3),
22525 )
22526 .unwrap();
22527
22528 assert!(
22529 plan_details
22530 .iter()
22531 .any(|detail| detail.contains("sqlite_autoindex_messages_1")),
22532 "expected batched lexical rebuild fetch to use the conversation_id/idx composite index, got {plan_details:?}"
22533 );
22534 assert!(
22535 !plan_details
22536 .iter()
22537 .any(|detail| detail.contains("TEMP B-TREE")),
22538 "expected batched lexical rebuild fetch to avoid sorter temp b-trees, got {plan_details:?}"
22539 );
22540 }
22541
22542 #[test]
22543 fn stream_messages_for_lexical_rebuild_groups_and_orders_messages() {
22544 let dir = TempDir::new().unwrap();
22545 let db_path = dir.path().join("test.db");
22546 let storage = SqliteStorage::open(&db_path).unwrap();
22547
22548 let agent = Agent {
22549 id: None,
22550 slug: "codex".into(),
22551 name: "Codex".into(),
22552 version: Some("0.2.3".into()),
22553 kind: AgentKind::Cli,
22554 };
22555 let agent_id = storage.ensure_agent(&agent).unwrap();
22556
22557 let first = Conversation {
22558 id: None,
22559 agent_slug: "codex".into(),
22560 workspace: Some(PathBuf::from("/tmp/workspace")),
22561 external_id: Some("lexical-stream-1".into()),
22562 title: Some("Lexical stream 1".into()),
22563 source_path: PathBuf::from("/tmp/lexical-stream-1.jsonl"),
22564 started_at: Some(1_700_000_000_000),
22565 ended_at: Some(1_700_000_000_100),
22566 approx_tokens: Some(42),
22567 metadata_json: serde_json::Value::Null,
22568 messages: vec![
22569 Message {
22570 id: None,
22571 idx: 0,
22572 role: MessageRole::User,
22573 author: Some("user".into()),
22574 created_at: Some(1_700_000_000_010),
22575 content: "first-a".into(),
22576 extra_json: serde_json::json!({"opaque": true}),
22577 snippets: Vec::new(),
22578 },
22579 Message {
22580 id: None,
22581 idx: 1,
22582 role: MessageRole::Agent,
22583 author: Some("assistant".into()),
22584 created_at: Some(1_700_000_000_020),
22585 content: "first-b".into(),
22586 extra_json: serde_json::json!({"opaque": true}),
22587 snippets: Vec::new(),
22588 },
22589 ],
22590 source_id: LOCAL_SOURCE_ID.into(),
22591 origin_host: None,
22592 };
22593
22594 let second = Conversation {
22595 id: None,
22596 agent_slug: "codex".into(),
22597 workspace: Some(PathBuf::from("/tmp/workspace")),
22598 external_id: Some("lexical-stream-2".into()),
22599 title: Some("Lexical stream 2".into()),
22600 source_path: PathBuf::from("/tmp/lexical-stream-2.jsonl"),
22601 started_at: Some(1_700_000_000_200),
22602 ended_at: Some(1_700_000_000_300),
22603 approx_tokens: Some(84),
22604 metadata_json: serde_json::Value::Null,
22605 messages: vec![Message {
22606 id: None,
22607 idx: 0,
22608 role: MessageRole::Tool,
22609 author: Some("tool".into()),
22610 created_at: Some(1_700_000_000_210),
22611 content: "second-a".into(),
22612 extra_json: serde_json::json!({"opaque": true}),
22613 snippets: Vec::new(),
22614 }],
22615 source_id: LOCAL_SOURCE_ID.into(),
22616 origin_host: None,
22617 };
22618
22619 let first_id = storage
22620 .insert_conversation_tree(agent_id, None, &first)
22621 .unwrap()
22622 .conversation_id;
22623 let second_id = storage
22624 .insert_conversation_tree(agent_id, None, &second)
22625 .unwrap()
22626 .conversation_id;
22627
22628 let mut streamed = Vec::new();
22629 storage
22630 .stream_messages_for_lexical_rebuild_from_conversation_id(first_id, |row| {
22631 streamed.push((
22632 row.conversation_id,
22633 row.idx,
22634 row.role,
22635 row.author,
22636 row.content,
22637 ));
22638 Ok(())
22639 })
22640 .unwrap();
22641
22642 assert_eq!(
22643 streamed,
22644 vec![
22645 (
22646 first_id,
22647 0,
22648 "user".to_string(),
22649 Some("user".to_string()),
22650 "first-a".to_string(),
22651 ),
22652 (
22653 first_id,
22654 1,
22655 "agent".to_string(),
22656 Some("assistant".to_string()),
22657 "first-b".to_string(),
22658 ),
22659 (
22660 second_id,
22661 0,
22662 "tool".to_string(),
22663 Some("tool".to_string()),
22664 "second-a".to_string(),
22665 ),
22666 ]
22667 );
22668 }
22669
22670 #[test]
22671 fn stream_messages_for_lexical_rebuild_between_conversation_ids_respects_upper_bound() {
22672 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22673 use std::path::PathBuf;
22674
22675 let dir = TempDir::new().unwrap();
22676 let db_path = dir.path().join("agent_search.db");
22677 let storage = SqliteStorage::open(&db_path).unwrap();
22678
22679 let agent = Agent {
22680 id: None,
22681 slug: "claude_code".into(),
22682 name: "Claude Code".into(),
22683 version: Some("1.2.3".into()),
22684 kind: AgentKind::Cli,
22685 };
22686 let agent_id = storage.ensure_agent(&agent).unwrap();
22687
22688 let first = Conversation {
22689 id: None,
22690 agent_slug: "claude_code".into(),
22691 workspace: Some(PathBuf::from("/tmp/workspace")),
22692 external_id: Some("lexical-range-1".into()),
22693 title: Some("Lexical range 1".into()),
22694 source_path: PathBuf::from("/tmp/lexical-range-1.jsonl"),
22695 started_at: Some(1_700_000_000_000),
22696 ended_at: Some(1_700_000_000_100),
22697 approx_tokens: Some(42),
22698 metadata_json: serde_json::Value::Null,
22699 messages: vec![Message {
22700 id: None,
22701 idx: 0,
22702 role: MessageRole::User,
22703 author: Some("user".into()),
22704 created_at: Some(1_700_000_000_010),
22705 content: "first-only".into(),
22706 extra_json: serde_json::json!({"opaque": true}),
22707 snippets: Vec::new(),
22708 }],
22709 source_id: LOCAL_SOURCE_ID.into(),
22710 origin_host: None,
22711 };
22712
22713 let second = Conversation {
22714 id: None,
22715 agent_slug: "claude_code".into(),
22716 workspace: Some(PathBuf::from("/tmp/workspace")),
22717 external_id: Some("lexical-range-2".into()),
22718 title: Some("Lexical range 2".into()),
22719 source_path: PathBuf::from("/tmp/lexical-range-2.jsonl"),
22720 started_at: Some(1_700_000_000_200),
22721 ended_at: Some(1_700_000_000_300),
22722 approx_tokens: Some(84),
22723 metadata_json: serde_json::Value::Null,
22724 messages: vec![Message {
22725 id: None,
22726 idx: 0,
22727 role: MessageRole::Tool,
22728 author: Some("tool".into()),
22729 created_at: Some(1_700_000_000_210),
22730 content: "second-should-not-appear".into(),
22731 extra_json: serde_json::json!({"opaque": true}),
22732 snippets: Vec::new(),
22733 }],
22734 source_id: LOCAL_SOURCE_ID.into(),
22735 origin_host: None,
22736 };
22737
22738 let first_id = storage
22739 .insert_conversation_tree(agent_id, None, &first)
22740 .unwrap()
22741 .conversation_id;
22742 let second_id = storage
22743 .insert_conversation_tree(agent_id, None, &second)
22744 .unwrap()
22745 .conversation_id;
22746
22747 let mut streamed = Vec::new();
22748 storage
22749 .stream_messages_for_lexical_rebuild_between_conversation_ids(
22750 first_id,
22751 first_id,
22752 |row| {
22753 streamed.push((row.conversation_id, row.idx, row.content));
22754 Ok(())
22755 },
22756 )
22757 .unwrap();
22758
22759 assert_eq!(streamed, vec![(first_id, 0, "first-only".to_string())]);
22760 assert!(
22761 streamed
22762 .iter()
22763 .all(|(conversation_id, _, _)| *conversation_id != second_id),
22764 "upper bound should exclude later conversation ids"
22765 );
22766 }
22767
22768 #[test]
22769 fn stream_messages_for_lexical_rebuild_between_conversation_ids_handles_mixed_ranges() {
22770 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22771 use std::path::PathBuf;
22772
22773 let dir = TempDir::new().unwrap();
22774 let db_path = dir.path().join("agent_search.db");
22775 let storage = SqliteStorage::open(&db_path).unwrap();
22776
22777 let claude_agent_id = storage
22778 .ensure_agent(&Agent {
22779 id: None,
22780 slug: "claude_code".into(),
22781 name: "Claude Code".into(),
22782 version: None,
22783 kind: AgentKind::Cli,
22784 })
22785 .unwrap();
22786 let aider_agent_id = storage
22787 .ensure_agent(&Agent {
22788 id: None,
22789 slug: "aider".into(),
22790 name: "Aider".into(),
22791 version: None,
22792 kind: AgentKind::Cli,
22793 })
22794 .unwrap();
22795
22796 type MessageSpec = (i64, MessageRole, Option<String>, Option<i64>, String);
22797
22798 let mut expected = Vec::new();
22799 let mut first_conversation_id = None;
22800 let mut last_conversation_id = None;
22801 let mut insert_conversation =
22802 |agent_id: i64,
22803 external_id: &str,
22804 title: &str,
22805 source_path: &str,
22806 started_at: i64,
22807 message_specs: Vec<MessageSpec>| {
22808 let conversation = Conversation {
22809 id: None,
22810 agent_slug: if agent_id == aider_agent_id {
22811 "aider".into()
22812 } else {
22813 "claude_code".into()
22814 },
22815 workspace: Some(PathBuf::from("/tmp/workspace")),
22816 external_id: Some(external_id.to_string()),
22817 title: Some(title.to_string()),
22818 source_path: PathBuf::from(source_path),
22819 started_at: Some(started_at),
22820 ended_at: Some(started_at + 100),
22821 approx_tokens: None,
22822 metadata_json: serde_json::Value::Null,
22823 messages: message_specs
22824 .iter()
22825 .map(|(idx, role, author, created_at, content)| Message {
22826 id: None,
22827 idx: *idx,
22828 role: role.clone(),
22829 author: author.clone(),
22830 created_at: *created_at,
22831 content: content.clone(),
22832 extra_json: serde_json::Value::Null,
22833 snippets: Vec::new(),
22834 })
22835 .collect(),
22836 source_id: LOCAL_SOURCE_ID.into(),
22837 origin_host: None,
22838 };
22839 let conversation_id = storage
22840 .insert_conversation_tree(agent_id, None, &conversation)
22841 .unwrap()
22842 .conversation_id;
22843 if first_conversation_id.is_none() {
22844 first_conversation_id = Some(conversation_id);
22845 }
22846 last_conversation_id = Some(conversation_id);
22847 expected.extend(message_specs.into_iter().map(
22848 |(idx, role, author, created_at, content)| {
22849 (
22850 conversation_id,
22851 idx,
22852 match role {
22853 MessageRole::User => "user".to_string(),
22854 MessageRole::Agent => "agent".to_string(),
22855 MessageRole::Tool => "tool".to_string(),
22856 MessageRole::System => "system".to_string(),
22857 MessageRole::Other(other) => other,
22858 },
22859 author,
22860 created_at,
22861 content,
22862 )
22863 },
22864 ));
22865 };
22866
22867 for (label, base_ts) in [
22868 ("alpha", 1_700_000_000_000_i64),
22869 ("beta", 1_700_000_001_000_i64),
22870 ("gamma", 1_700_000_002_000_i64),
22871 ("delta", 1_700_000_003_000_i64),
22872 ("epsilon", 1_700_000_004_000_i64),
22873 ] {
22874 insert_conversation(
22875 claude_agent_id,
22876 &format!("lexical-{label}"),
22877 &format!("Lexical {label}"),
22878 &format!("/tmp/{label}.jsonl"),
22879 base_ts,
22880 vec![
22881 (
22882 0,
22883 MessageRole::User,
22884 None,
22885 Some(base_ts + 10),
22886 format!("{label}_content"),
22887 ),
22888 (
22889 1,
22890 MessageRole::Agent,
22891 None,
22892 Some(base_ts + 20),
22893 format!("{label}_content_response"),
22894 ),
22895 ],
22896 );
22897 }
22898
22899 insert_conversation(
22900 aider_agent_id,
22901 "lexical-aider-history",
22902 "Aider Chat: coding_agent_session_search",
22903 "/tmp/.aider.chat.history.md",
22904 1_764_619_673_394,
22905 vec![
22906 (
22907 0,
22908 MessageRole::System,
22909 Some("system".to_string()),
22910 None,
22911 "# aider chat started at 2025-12-01 20:07:47".to_string(),
22912 ),
22913 (
22914 1,
22915 MessageRole::User,
22916 Some("user".to_string()),
22917 None,
22918 "/tmp/workspace/.venv/bin/aider --no-git --message hello world".to_string(),
22919 ),
22920 ],
22921 );
22922 insert_conversation(
22923 aider_agent_id,
22924 "lexical-aider-fixture",
22925 "Aider Chat: aider",
22926 "/tmp/tests/fixtures/aider/.aider.chat.history.md",
22927 1_764_621_401_399,
22928 vec![
22929 (
22930 0,
22931 MessageRole::User,
22932 Some("user".to_string()),
22933 None,
22934 "/add src/main.rs".to_string(),
22935 ),
22936 (
22937 1,
22938 MessageRole::Agent,
22939 Some("assistant".to_string()),
22940 None,
22941 "Added src/main.rs to the chat.
22942
22943#### /add src/main.rs"
22944 .to_string(),
22945 ),
22946 (
22947 2,
22948 MessageRole::User,
22949 Some("user".to_string()),
22950 None,
22951 "Please refactor.".to_string(),
22952 ),
22953 (
22954 3,
22955 MessageRole::Agent,
22956 Some("assistant".to_string()),
22957 None,
22958 "Sure, here is the code.".to_string(),
22959 ),
22960 ],
22961 );
22962
22963 let mut streamed = Vec::new();
22964 storage
22965 .stream_messages_for_lexical_rebuild_between_conversation_ids(
22966 first_conversation_id.unwrap(),
22967 last_conversation_id.unwrap(),
22968 |row| {
22969 streamed.push((
22970 row.conversation_id,
22971 row.idx,
22972 row.role,
22973 row.author,
22974 row.created_at,
22975 row.content,
22976 ));
22977 Ok(())
22978 },
22979 )
22980 .unwrap();
22981
22982 assert_eq!(streamed, expected);
22983 }
22984
22985 #[test]
22986 fn lexical_rebuild_stream_queries_use_rowid_and_per_conversation_probes() {
22987 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22988 use std::path::PathBuf;
22989
22990 let dir = TempDir::new().unwrap();
22991 let db_path = dir.path().join("agent_search.db");
22992 let storage = SqliteStorage::open(&db_path).unwrap();
22993
22994 let agent = Agent {
22995 id: None,
22996 slug: "claude_code".into(),
22997 name: "Claude Code".into(),
22998 version: None,
22999 kind: AgentKind::Cli,
23000 };
23001 let agent_id = storage.ensure_agent(&agent).unwrap();
23002
23003 for (external_id, base_ts) in [
23004 ("conv-1", 1_700_000_000_000_i64),
23005 ("conv-2", 1_700_000_001_000_i64),
23006 ] {
23007 let conversation = Conversation {
23008 id: None,
23009 agent_slug: "claude_code".into(),
23010 workspace: Some(PathBuf::from("/tmp/workspace")),
23011 external_id: Some(external_id.to_string()),
23012 title: Some("Lexical rebuild".into()),
23013 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
23014 started_at: Some(base_ts),
23015 ended_at: Some(base_ts + 100),
23016 approx_tokens: None,
23017 metadata_json: serde_json::Value::Null,
23018 messages: vec![
23019 Message {
23020 id: None,
23021 idx: 0,
23022 role: MessageRole::User,
23023 author: Some("user".into()),
23024 created_at: Some(base_ts + 10),
23025 content: format!("{external_id}-first"),
23026 extra_json: serde_json::Value::Null,
23027 snippets: Vec::new(),
23028 },
23029 Message {
23030 id: None,
23031 idx: 1,
23032 role: MessageRole::Agent,
23033 author: Some("assistant".into()),
23034 created_at: Some(base_ts + 20),
23035 content: format!("{external_id}-second"),
23036 extra_json: serde_json::Value::Null,
23037 snippets: Vec::new(),
23038 },
23039 ],
23040 source_id: LOCAL_SOURCE_ID.into(),
23041 origin_host: None,
23042 };
23043 storage
23044 .insert_conversation_tree(agent_id, None, &conversation)
23045 .unwrap();
23046 }
23047
23048 let first_id: i64 = storage
23049 .conn
23050 .query_row_map(
23051 "SELECT id FROM conversations ORDER BY id LIMIT 1",
23052 fparams![],
23053 |row| row.get_typed(0),
23054 )
23055 .unwrap();
23056 let last_id: i64 = storage
23057 .conn
23058 .query_row_map(
23059 "SELECT id FROM conversations ORDER BY id DESC LIMIT 1",
23060 fparams![],
23061 |row| row.get_typed(0),
23062 )
23063 .unwrap();
23064
23065 let conversation_plan_details: Vec<String> = storage
23066 .conn
23067 .query_map_collect(
23068 "EXPLAIN QUERY PLAN SELECT id FROM conversations WHERE id >= ?1 AND id <= ?2 ORDER BY id ASC",
23069 fparams![first_id, last_id],
23070 |row| row.get_typed(3),
23071 )
23072 .unwrap();
23073 assert!(
23074 !conversation_plan_details
23075 .iter()
23076 .any(|detail| detail.contains("TEMP B-TREE")),
23077 "expected streamed lexical rebuild conversation listing to avoid sorter temp b-trees, got {conversation_plan_details:?}"
23078 );
23079
23080 let message_plan_details: Vec<String> = storage
23081 .conn
23082 .query_map_collect(
23083 "EXPLAIN QUERY PLAN SELECT id, idx, role, author, created_at, content FROM messages INDEXED BY sqlite_autoindex_messages_1 WHERE conversation_id = ?1 ORDER BY idx",
23084 fparams![first_id],
23085 |row| row.get_typed(3),
23086 )
23087 .unwrap();
23088 assert!(
23089 message_plan_details
23090 .iter()
23091 .any(|detail| detail.contains("sqlite_autoindex_messages_1")
23092 || detail.contains("idx_messages_conv_idx")),
23093 "expected per-conversation lexical rebuild fetch to use the conversation_id/idx index, got {message_plan_details:?}"
23094 );
23095 assert!(
23096 !message_plan_details
23097 .iter()
23098 .any(|detail| detail.contains("TEMP B-TREE")),
23099 "expected per-conversation lexical rebuild fetch to avoid sorter temp b-trees, got {message_plan_details:?}"
23100 );
23101 }
23102
23103 #[test]
23104 fn discover_historical_database_bundles_prefers_larger_archives_first() {
23105 let dir = TempDir::new().unwrap();
23106 let canonical_db = dir.path().join("agent_search.db");
23107 fs::write(&canonical_db, b"canonical").unwrap();
23108
23109 let smaller = dir.path().join("agent_search.corrupt.small");
23110 fs::write(&smaller, vec![0_u8; 32]).unwrap();
23111
23112 let backups_dir = dir.path().join("backups");
23113 fs::create_dir_all(&backups_dir).unwrap();
23114 let larger = backups_dir.join("agent_search.db.20260322T020200.bak");
23115 fs::write(&larger, vec![0_u8; 128]).unwrap();
23116
23117 let bundles = discover_historical_database_bundles(&canonical_db);
23118 let ordered_paths: Vec<PathBuf> =
23119 bundles.into_iter().map(|bundle| bundle.root_path).collect();
23120
23121 assert_eq!(ordered_paths, vec![larger, smaller]);
23122 }
23123
23124 #[test]
23125 fn discover_historical_database_bundles_prefers_queryable_direct_bundles_first() {
23126 let dir = TempDir::new().unwrap();
23127 let canonical_db = dir.path().join("agent_search.db");
23128 fs::write(&canonical_db, b"canonical").unwrap();
23129
23130 let larger_corrupt = dir.path().join("agent_search.corrupt.20260324_212907");
23131 fs::write(&larger_corrupt, vec![0_u8; 4096]).unwrap();
23132
23133 let backups_dir = dir.path().join("backups");
23134 fs::create_dir_all(&backups_dir).unwrap();
23135 let smaller_healthy = backups_dir.join("agent_search.db.20260322T020200.bak");
23136 let conn = FrankenConnection::open(smaller_healthy.to_string_lossy().into_owned()).unwrap();
23137 conn.execute_batch(
23138 "CREATE TABLE conversations (id INTEGER PRIMARY KEY, source_path TEXT);
23139 CREATE TABLE messages (
23140 id INTEGER PRIMARY KEY,
23141 conversation_id INTEGER NOT NULL,
23142 idx INTEGER NOT NULL,
23143 content TEXT
23144 );
23145 INSERT INTO conversations(id, source_path) VALUES (1, '/tmp/history.jsonl');
23146 INSERT INTO messages(id, conversation_id, idx, content)
23147 VALUES (1, 1, 0, 'seed');",
23148 )
23149 .unwrap();
23150 drop(conn);
23151
23152 let bundles = discover_historical_database_bundles(&canonical_db);
23153 let ordered_paths: Vec<PathBuf> = bundles
23154 .iter()
23155 .map(|bundle| bundle.root_path.clone())
23156 .collect();
23157
23158 assert_eq!(ordered_paths, vec![smaller_healthy, larger_corrupt]);
23159 assert!(bundles[0].supports_direct_readonly);
23160 assert!(!bundles[1].supports_direct_readonly);
23161 }
23162
23163 #[test]
23164 fn salvage_historical_databases_skips_unreadable_quarantined_bundles() {
23165 let dir = TempDir::new().unwrap();
23166 let canonical_db = dir.path().join("agent_search.db");
23167 let storage = SqliteStorage::open(&canonical_db).unwrap();
23168
23169 let quarantined = dir.path().join("agent_search.corrupt.20260324_212907");
23170 fs::write(&quarantined, b"not a sqlite database").unwrap();
23171
23172 let discovered: Vec<PathBuf> = discover_historical_database_bundles(&canonical_db)
23173 .into_iter()
23174 .map(|bundle| bundle.root_path)
23175 .collect();
23176 assert_eq!(discovered, vec![quarantined]);
23177
23178 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
23179 assert_eq!(outcome.bundles_considered, 1);
23180 assert_eq!(outcome.bundles_imported, 0);
23181 assert_eq!(outcome.conversations_imported, 0);
23182 assert_eq!(outcome.messages_imported, 0);
23183 assert!(storage.list_conversations(10, 0).unwrap().is_empty());
23184 }
23185
23186 #[test]
23187 fn discover_historical_database_bundles_includes_repair_lab_and_snapshots_named_roots() {
23188 let dir = TempDir::new().unwrap();
23189 let canonical_db = dir.path().join("agent_search.db");
23190 fs::write(&canonical_db, b"canonical").unwrap();
23191
23192 let repair_lab_dir = dir.path().join("repair-lab").join("live-copy");
23193 fs::create_dir_all(&repair_lab_dir).unwrap();
23194 let repair_lab_db = repair_lab_dir.join("agent_search.db");
23195 fs::write(&repair_lab_db, vec![0_u8; 96]).unwrap();
23196 fs::write(
23197 repair_lab_dir.join("agent_search.rebuild-test.db"),
23198 vec![0_u8; 192],
23199 )
23200 .unwrap();
23201
23202 let snapshots_dir = dir.path().join("snapshots").join("20260324T013201Z");
23203 fs::create_dir_all(&snapshots_dir).unwrap();
23204 let snapshot_db = snapshots_dir.join("agent_search.db");
23205 fs::write(&snapshot_db, vec![0_u8; 64]).unwrap();
23206
23207 let bundles = discover_historical_database_bundles(&canonical_db);
23208 let ordered_paths: Vec<PathBuf> =
23209 bundles.into_iter().map(|bundle| bundle.root_path).collect();
23210
23211 assert!(ordered_paths.contains(&repair_lab_db));
23212 assert!(ordered_paths.contains(&snapshot_db));
23213 assert!(
23214 !ordered_paths
23215 .iter()
23216 .any(|path| path.file_name().and_then(|name| name.to_str())
23217 == Some("agent_search.rebuild-test.db"))
23218 );
23219 }
23220
23221 #[test]
23222 fn discover_historical_database_bundles_prefers_healthy_backup_over_replay_priority() {
23223 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
23224
23225 let dir = TempDir::new().unwrap();
23226 let canonical_db = dir.path().join("agent_search.db");
23227 fs::write(&canonical_db, b"canonical").unwrap();
23228
23229 let replay_dir = dir
23230 .path()
23231 .join("repair-lab")
23232 .join("replay-20260324T070101Z");
23233 fs::create_dir_all(&replay_dir).unwrap();
23234 let replay_db = replay_dir.join("agent_search.db");
23235 let replay_storage = SqliteStorage::open(&replay_db).unwrap();
23236 let agent = Agent {
23237 id: None,
23238 slug: "codex".into(),
23239 name: "Codex".into(),
23240 version: Some("0.2.3".into()),
23241 kind: AgentKind::Cli,
23242 };
23243 let agent_id = replay_storage.ensure_agent(&agent).unwrap();
23244 let conversation = Conversation {
23245 id: None,
23246 agent_slug: "codex".into(),
23247 workspace: Some(PathBuf::from("/tmp/workspace")),
23248 external_id: Some("replay-conv".into()),
23249 title: Some("Replay bundle".into()),
23250 source_path: PathBuf::from("/tmp/replay.jsonl"),
23251 started_at: Some(1_700_000_000_000),
23252 ended_at: Some(1_700_000_000_100),
23253 approx_tokens: Some(42),
23254 metadata_json: serde_json::Value::Null,
23255 messages: vec![Message {
23256 id: None,
23257 idx: 0,
23258 role: MessageRole::Agent,
23259 author: Some("assistant".into()),
23260 created_at: Some(1_700_000_000_050),
23261 content: "replay message".into(),
23262 extra_json: serde_json::Value::Null,
23263 snippets: Vec::new(),
23264 }],
23265 source_id: LOCAL_SOURCE_ID.into(),
23266 origin_host: None,
23267 };
23268 replay_storage
23269 .insert_conversation_tree(agent_id, None, &conversation)
23270 .unwrap();
23271 drop(replay_storage);
23272
23273 let replay_legacy = rusqlite_test_fixture_conn(&replay_db);
23274 replay_legacy
23275 .execute_batch(
23276 "UPDATE meta SET value = '13' WHERE key = 'schema_version';
23277 DELETE FROM _schema_migrations WHERE version = 14;
23278 PRAGMA writable_schema = ON;",
23279 )
23280 .unwrap();
23281 replay_legacy
23282 .execute(
23283 "DELETE FROM meta WHERE key = ?1",
23284 [FTS_FRANKEN_REBUILD_META_KEY],
23285 )
23286 .unwrap();
23287 #[cfg(not(windows))]
23288 {
23289 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
23290 replay_legacy
23291 .execute(
23292 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
23293 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
23294 [duplicate_legacy_fts_sql],
23295 )
23296 .unwrap();
23297 }
23298 replay_legacy
23299 .execute_batch("PRAGMA writable_schema = OFF;")
23300 .unwrap();
23301 drop(replay_legacy);
23302
23303 let backups_dir = dir.path().join("backups");
23304 fs::create_dir_all(&backups_dir).unwrap();
23305 let clean_backup = backups_dir.join("agent_search.db.20260322T020200.bak");
23306 let clean_storage = SqliteStorage::open(&clean_backup).unwrap();
23307 let clean_agent_id = clean_storage.ensure_agent(&agent).unwrap();
23308 clean_storage
23309 .insert_conversation_tree(clean_agent_id, None, &conversation)
23310 .unwrap();
23311 drop(clean_storage);
23312
23313 let bundles = discover_historical_database_bundles(&canonical_db);
23314 let ordered_paths: Vec<PathBuf> = bundles
23315 .iter()
23316 .map(|bundle| bundle.root_path.clone())
23317 .collect();
23318
23319 assert_eq!(ordered_paths[0], clean_backup);
23320 assert_eq!(ordered_paths[1], replay_db);
23321 assert_eq!(
23322 bundles[0].probe.schema_version,
23323 Some(CURRENT_SCHEMA_VERSION)
23324 );
23325 assert_eq!(bundles[0].probe.fts_schema_rows, Some(0));
23331 assert!(!bundles[0].probe.fts_queryable);
23334 assert_eq!(bundles[1].probe.schema_version, Some(13));
23335 let expected_fts_schema_rows = if cfg!(windows) { Some(0) } else { Some(1) };
23340 assert_eq!(bundles[1].probe.fts_schema_rows, expected_fts_schema_rows);
23341 }
23342
23343 #[test]
23344 fn ensure_fts_consistency_via_rusqlite_catches_up_missing_rows() {
23345 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
23346
23347 let dir = TempDir::new().unwrap();
23348 let db_path = dir.path().join("fts-catchup.db");
23349 let storage = SqliteStorage::open(&db_path).unwrap();
23350 let agent = Agent {
23351 id: None,
23352 slug: "codex".into(),
23353 name: "Codex".into(),
23354 version: Some("0.2.3".into()),
23355 kind: AgentKind::Cli,
23356 };
23357 let agent_id = storage.ensure_agent(&agent).unwrap();
23358 let conversation = Conversation {
23359 id: None,
23360 agent_slug: "codex".into(),
23361 workspace: Some(PathBuf::from("/tmp/workspace")),
23362 external_id: Some("fts-catchup".into()),
23363 title: Some("FTS catchup".into()),
23364 source_path: PathBuf::from("/tmp/fts-catchup.jsonl"),
23365 started_at: Some(1_700_000_000_000),
23366 ended_at: Some(1_700_000_000_100),
23367 approx_tokens: Some(42),
23368 metadata_json: serde_json::Value::Null,
23369 messages: vec![Message {
23370 id: None,
23371 idx: 0,
23372 role: MessageRole::User,
23373 author: Some("user".into()),
23374 created_at: Some(1_700_000_000_050),
23375 content: "initial message".into(),
23376 extra_json: serde_json::Value::Null,
23377 snippets: Vec::new(),
23378 }],
23379 source_id: LOCAL_SOURCE_ID.into(),
23380 origin_host: None,
23381 };
23382 storage
23383 .insert_conversation_tree(agent_id, None, &conversation)
23384 .unwrap();
23385 drop(storage);
23386
23387 rebuild_fts_via_rusqlite(&db_path).unwrap();
23388
23389 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
23390 let conversation_id: i64 = conn
23391 .query_row_map("SELECT id FROM conversations LIMIT 1", fparams![], |row| {
23392 row.get_typed(0)
23393 })
23394 .unwrap();
23395 conn.execute_compat(
23396 "INSERT INTO messages(id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
23397 VALUES(2, ?1, 1, 'assistant', 'assistant', 1700000000060, 'authentication catchup', NULL, NULL)",
23398 fparams![conversation_id],
23399 )
23400 .unwrap();
23401 drop(conn);
23402
23403 let repair = ensure_fts_consistency_via_rusqlite(&db_path).unwrap();
23404 assert_eq!(
23405 repair,
23406 FtsConsistencyRepair::IncrementalCatchUp {
23407 inserted_rows: 1,
23408 total_rows: 2
23409 }
23410 );
23411
23412 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
23413 let auth_rows: i64 = conn
23414 .query_row_map(
23415 "SELECT COUNT(*) FROM fts_messages WHERE rowid = 2",
23416 fparams![],
23417 |row| row.get_typed(0),
23418 )
23419 .unwrap();
23420 assert_eq!(auth_rows, 1);
23421 }
23422
23423 #[test]
23424 fn rebuild_fts_via_rusqlite_cleans_duplicate_legacy_schema_rows() {
23425 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
23426
23427 let dir = TempDir::new().unwrap();
23428 let db_path = dir.path().join("fts-duplicate-rebuild.db");
23429
23430 let storage = SqliteStorage::open(&db_path).unwrap();
23431 let agent = Agent {
23432 id: None,
23433 slug: "codex".into(),
23434 name: "Codex".into(),
23435 version: Some("0.2.3".into()),
23436 kind: AgentKind::Cli,
23437 };
23438 let agent_id = storage.ensure_agent(&agent).unwrap();
23439 let conversation = Conversation {
23440 id: None,
23441 agent_slug: "codex".into(),
23442 workspace: Some(PathBuf::from("/ws")),
23443 external_id: Some("retro".into()),
23444 title: Some("retro".into()),
23445 source_path: PathBuf::from("/tmp/retro.jsonl"),
23446 started_at: Some(42),
23447 ended_at: Some(42),
23448 approx_tokens: None,
23449 metadata_json: serde_json::Value::Null,
23450 messages: vec![Message {
23451 id: None,
23452 idx: 0,
23453 role: MessageRole::User,
23454 author: None,
23455 created_at: Some(42),
23456 content: "retro investigation".into(),
23457 extra_json: serde_json::Value::Null,
23458 snippets: Vec::new(),
23459 }],
23460 source_id: LOCAL_SOURCE_ID.into(),
23461 origin_host: None,
23462 };
23463 storage
23464 .insert_conversation_tree(agent_id, None, &conversation)
23465 .unwrap();
23466 drop(storage);
23467 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
23468
23469 let conn = rusqlite_test_fixture_conn(&db_path);
23470 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
23471 conn.execute(
23472 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
23473 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
23474 ["CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')"],
23475 )
23476 .unwrap();
23477 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
23478 let duplicate_rows: i64 = conn
23479 .query_row(
23480 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
23481 [],
23482 |row| row.get(0),
23483 )
23484 .unwrap();
23485 assert_eq!(duplicate_rows, 2);
23486 drop(conn);
23487
23488 let inserted = rebuild_fts_via_rusqlite(&db_path).unwrap();
23489 assert_eq!(inserted, 1);
23490
23491 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
23492 let schema_rows = franken_fts_schema_rows(&conn).unwrap();
23493 assert_eq!(
23494 schema_rows, 1,
23495 "DROP TABLE should leave one clean FTS schema"
23496 );
23497 let match_count: i64 = conn
23498 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
23499 row.get_typed(0)
23500 })
23501 .unwrap();
23502 assert_eq!(match_count, 1);
23503 }
23504
23505 #[test]
23510 fn ensure_agent_creates_new() {
23511 let dir = TempDir::new().unwrap();
23512 let db_path = dir.path().join("test.db");
23513 let storage = SqliteStorage::open(&db_path).unwrap();
23514
23515 let agent = Agent {
23516 id: None,
23517 slug: "test_agent".into(),
23518 name: "Test Agent".into(),
23519 version: Some("1.0".into()),
23520 kind: AgentKind::Cli,
23521 };
23522
23523 let id = storage.ensure_agent(&agent).unwrap();
23524 assert!(id > 0);
23525 }
23526
23527 #[test]
23528 fn ensure_agent_returns_existing_id() {
23529 let dir = TempDir::new().unwrap();
23530 let db_path = dir.path().join("test.db");
23531 let storage = SqliteStorage::open(&db_path).unwrap();
23532
23533 let agent = Agent {
23534 id: None,
23535 slug: "codex".into(),
23536 name: "Codex".into(),
23537 version: None,
23538 kind: AgentKind::Cli,
23539 };
23540
23541 let id1 = storage.ensure_agent(&agent).unwrap();
23542 let id2 = storage.ensure_agent(&agent).unwrap();
23543 assert_eq!(id1, id2);
23544 }
23545
23546 #[test]
23547 fn ensure_agent_unchanged_preserves_updated_at() {
23548 let dir = TempDir::new().unwrap();
23549 let db_path = dir.path().join("test.db");
23550 let storage = SqliteStorage::open(&db_path).unwrap();
23551
23552 let agent = Agent {
23553 id: None,
23554 slug: "codex".into(),
23555 name: "Codex".into(),
23556 version: Some("1.0".into()),
23557 kind: AgentKind::Cli,
23558 };
23559
23560 storage.ensure_agent(&agent).unwrap();
23561 let initial_updated_at: i64 = storage
23562 .conn
23563 .query_row_map(
23564 "SELECT updated_at FROM agents WHERE slug = ?1",
23565 fparams![agent.slug.as_str()],
23566 |row| row.get_typed(0),
23567 )
23568 .unwrap();
23569 std::thread::sleep(std::time::Duration::from_millis(5));
23570
23571 storage.ensure_agent(&agent).unwrap();
23572 let fetched_updated_at: i64 = storage
23573 .conn
23574 .query_row_map(
23575 "SELECT updated_at FROM agents WHERE slug = ?1",
23576 fparams![agent.slug.as_str()],
23577 |row| row.get_typed(0),
23578 )
23579 .unwrap();
23580
23581 assert_eq!(fetched_updated_at, initial_updated_at);
23582 }
23583
23584 #[test]
23585 fn ensure_agent_changed_metadata_updates_cached_slug() {
23586 let dir = TempDir::new().unwrap();
23587 let db_path = dir.path().join("test.db");
23588 let storage = SqliteStorage::open(&db_path).unwrap();
23589
23590 let mut agent = Agent {
23591 id: None,
23592 slug: "codex".into(),
23593 name: "Codex".into(),
23594 version: Some("1.0".into()),
23595 kind: AgentKind::Cli,
23596 };
23597
23598 let id1 = storage.ensure_agent(&agent).unwrap();
23599 agent.name = "Codex CLI".into();
23600 agent.version = Some("1.1".into());
23601 let id2 = storage.ensure_agent(&agent).unwrap();
23602
23603 let fetched: (String, Option<String>) = storage
23604 .conn
23605 .query_row_map(
23606 "SELECT name, version FROM agents WHERE slug = ?1",
23607 fparams![agent.slug.as_str()],
23608 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23609 )
23610 .unwrap();
23611
23612 assert_eq!(id1, id2);
23613 assert_eq!(fetched, ("Codex CLI".into(), Some("1.1".into())));
23614 }
23615
23616 #[test]
23617 fn list_agents_returns_inserted() {
23618 let dir = TempDir::new().unwrap();
23619 let db_path = dir.path().join("test.db");
23620 let storage = SqliteStorage::open(&db_path).unwrap();
23621
23622 let agent = Agent {
23623 id: None,
23624 slug: "new_agent".into(),
23625 name: "New Agent".into(),
23626 version: None,
23627 kind: AgentKind::VsCode,
23628 };
23629 storage.ensure_agent(&agent).unwrap();
23630
23631 let agents = storage.list_agents().unwrap();
23632 assert!(agents.iter().any(|a| a.slug == "new_agent"));
23633 }
23634
23635 #[test]
23640 fn ensure_workspace_creates_new() {
23641 let dir = TempDir::new().unwrap();
23642 let db_path = dir.path().join("test.db");
23643 let storage = SqliteStorage::open(&db_path).unwrap();
23644
23645 let id = storage
23646 .ensure_workspace(Path::new("/home/user/project"), Some("My Project"))
23647 .unwrap();
23648 assert!(id > 0);
23649 }
23650
23651 #[test]
23652 fn ensure_workspace_returns_existing() {
23653 let dir = TempDir::new().unwrap();
23654 let db_path = dir.path().join("test.db");
23655 let storage = SqliteStorage::open(&db_path).unwrap();
23656
23657 let path = Path::new("/home/user/myproject");
23658 let id1 = storage.ensure_workspace(path, None).unwrap();
23659 let id2 = storage.ensure_workspace(path, None).unwrap();
23660 assert_eq!(id1, id2);
23661 }
23662
23663 #[test]
23664 fn ensure_workspace_changed_display_name_updates_cached_path() {
23665 let dir = TempDir::new().unwrap();
23666 let db_path = dir.path().join("test.db");
23667 let storage = SqliteStorage::open(&db_path).unwrap();
23668
23669 let path = Path::new("/home/user/myproject");
23670 let id1 = storage.ensure_workspace(path, Some("Before")).unwrap();
23671 let id2 = storage.ensure_workspace(path, Some("After")).unwrap();
23672
23673 let display_name: Option<String> = storage
23674 .conn
23675 .query_row_map(
23676 "SELECT display_name FROM workspaces WHERE path = ?1",
23677 fparams![path.to_string_lossy().as_ref()],
23678 |row| row.get_typed(0),
23679 )
23680 .unwrap();
23681
23682 assert_eq!(id1, id2);
23683 assert_eq!(display_name.as_deref(), Some("After"));
23684 }
23685
23686 #[test]
23687 fn list_workspaces_returns_inserted() {
23688 let dir = TempDir::new().unwrap();
23689 let db_path = dir.path().join("test.db");
23690 let storage = SqliteStorage::open(&db_path).unwrap();
23691
23692 storage
23693 .ensure_workspace(Path::new("/test/workspace"), Some("Test WS"))
23694 .unwrap();
23695
23696 let workspaces = storage.list_workspaces().unwrap();
23697 assert!(
23698 workspaces
23699 .iter()
23700 .any(|w| w.path.to_str() == Some("/test/workspace"))
23701 );
23702 }
23703
23704 #[test]
23709 fn upsert_source_creates_new() {
23710 let dir = TempDir::new().unwrap();
23711 let db_path = dir.path().join("test.db");
23712 let storage = SqliteStorage::open(&db_path).unwrap();
23713
23714 let source = Source {
23715 id: "test-laptop".into(),
23716 kind: SourceKind::Ssh,
23717 host_label: Some("test.local".into()),
23718 machine_id: Some("test-machine-id".into()),
23719 platform: None,
23720 config_json: None,
23721 created_at: Some(SqliteStorage::now_millis()),
23722 updated_at: None,
23723 };
23724
23725 storage.upsert_source(&source).unwrap();
23726 let fetched = storage.get_source("test-laptop").unwrap();
23727 assert!(fetched.is_some());
23728 assert_eq!(fetched.unwrap().host_label, Some("test.local".into()));
23729 }
23730
23731 #[test]
23732 fn upsert_source_updates_existing() {
23733 let dir = TempDir::new().unwrap();
23734 let db_path = dir.path().join("test.db");
23735 let storage = SqliteStorage::open(&db_path).unwrap();
23736
23737 let source1 = Source {
23738 id: "my-source".into(),
23739 kind: SourceKind::Ssh,
23740 host_label: Some("Original Label".into()),
23741 machine_id: None,
23742 platform: None,
23743 config_json: None,
23744 created_at: Some(SqliteStorage::now_millis()),
23745 updated_at: None,
23746 };
23747 storage.upsert_source(&source1).unwrap();
23748
23749 let source2 = Source {
23750 id: "my-source".into(),
23751 kind: SourceKind::Ssh,
23752 host_label: Some("Updated Label".into()),
23753 machine_id: None,
23754 platform: Some("linux".into()),
23755 config_json: None,
23756 created_at: Some(SqliteStorage::now_millis()),
23757 updated_at: Some(SqliteStorage::now_millis()),
23758 };
23759 storage.upsert_source(&source2).unwrap();
23760
23761 let fetched = storage.get_source("my-source").unwrap().unwrap();
23762 assert_eq!(fetched.host_label, Some("Updated Label".into()));
23763 assert!(fetched.platform.is_some());
23764 }
23765
23766 #[test]
23767 fn upsert_source_unchanged_preserves_updated_at() {
23768 let dir = TempDir::new().unwrap();
23769 let db_path = dir.path().join("test.db");
23770 let storage = SqliteStorage::open(&db_path).unwrap();
23771
23772 let source = Source {
23773 id: "stable-source".into(),
23774 kind: SourceKind::Ssh,
23775 host_label: Some("builder.local".into()),
23776 machine_id: None,
23777 platform: Some("linux".into()),
23778 config_json: Some(serde_json::json!({"role": "bench"})),
23779 created_at: None,
23780 updated_at: None,
23781 };
23782
23783 storage.upsert_source(&source).unwrap();
23784 let initial = storage.get_source("stable-source").unwrap().unwrap();
23785 std::thread::sleep(std::time::Duration::from_millis(5));
23786
23787 storage.upsert_source(&source).unwrap();
23788 let fetched = storage.get_source("stable-source").unwrap().unwrap();
23789
23790 assert_eq!(fetched.created_at, initial.created_at);
23791 assert_eq!(fetched.updated_at, initial.updated_at);
23792 assert_eq!(fetched.host_label, initial.host_label);
23793 assert_eq!(fetched.platform, initial.platform);
23794 assert_eq!(fetched.config_json, initial.config_json);
23795 }
23796
23797 #[test]
23798 fn ensure_source_for_conversation_recreates_remote_source_after_delete() {
23799 let dir = TempDir::new().unwrap();
23800 let db_path = dir.path().join("test.db");
23801 let storage = SqliteStorage::open(&db_path).unwrap();
23802
23803 let conversation = Conversation {
23804 id: None,
23805 agent_slug: "codex".into(),
23806 workspace: Some(PathBuf::from("/ws/cache-recreate")),
23807 external_id: Some("cache-recreate".into()),
23808 title: Some("Cache Recreate".into()),
23809 source_path: PathBuf::from("/log/cache-recreate.jsonl"),
23810 started_at: Some(1_700_000_000_000),
23811 ended_at: Some(1_700_000_000_001),
23812 approx_tokens: Some(16),
23813 metadata_json: serde_json::json!({}),
23814 messages: vec![Message {
23815 id: None,
23816 idx: 0,
23817 role: MessageRole::User,
23818 author: Some("tester".into()),
23819 created_at: Some(1_700_000_000_000),
23820 content: "cache recreate".into(),
23821 extra_json: serde_json::json!({}),
23822 snippets: Vec::new(),
23823 }],
23824 source_id: "cache-remote-source".into(),
23825 origin_host: Some("builder-cache".into()),
23826 };
23827
23828 storage
23829 .ensure_source_for_conversation(&conversation)
23830 .unwrap();
23831 assert!(storage.get_source("cache-remote-source").unwrap().is_some());
23832
23833 let deleted = storage.delete_source("cache-remote-source", false).unwrap();
23834 assert!(deleted);
23835 assert!(storage.get_source("cache-remote-source").unwrap().is_none());
23836
23837 storage
23838 .ensure_source_for_conversation(&conversation)
23839 .unwrap();
23840 let recreated = storage.get_source("cache-remote-source").unwrap();
23841 assert!(recreated.is_some());
23842 assert_eq!(
23843 recreated.unwrap().host_label.as_deref(),
23844 Some("builder-cache")
23845 );
23846 }
23847
23848 #[test]
23849 fn delete_source_removes_entry() {
23850 let dir = TempDir::new().unwrap();
23851 let db_path = dir.path().join("test.db");
23852 let storage = SqliteStorage::open(&db_path).unwrap();
23853
23854 let source = Source {
23855 id: "to-delete".into(),
23856 kind: SourceKind::Local,
23857 host_label: None,
23858 machine_id: None,
23859 platform: None,
23860 config_json: None,
23861 created_at: Some(SqliteStorage::now_millis()),
23862 updated_at: None,
23863 };
23864 storage.upsert_source(&source).unwrap();
23865
23866 let deleted = storage.delete_source("to-delete", false).unwrap();
23867 assert!(deleted);
23868
23869 let fetched = storage.get_source("to-delete").unwrap();
23870 assert!(fetched.is_none());
23871 }
23872
23873 #[test]
23874 fn delete_source_cannot_delete_local() {
23875 let dir = TempDir::new().unwrap();
23876 let db_path = dir.path().join("test.db");
23877 let storage = SqliteStorage::open(&db_path).unwrap();
23878
23879 let result = storage.delete_source(LOCAL_SOURCE_ID, false);
23880 assert!(result.is_err());
23881 }
23882
23883 #[test]
23884 fn list_sources_includes_local() {
23885 let dir = TempDir::new().unwrap();
23886 let db_path = dir.path().join("test.db");
23887 let storage = SqliteStorage::open(&db_path).unwrap();
23888
23889 let sources = storage.list_sources().unwrap();
23890 assert!(sources.iter().any(|s| s.id == LOCAL_SOURCE_ID));
23891 }
23892
23893 #[test]
23894 fn insert_conversation_tree_blank_local_source_normalizes_to_local_id() {
23895 let dir = TempDir::new().unwrap();
23896 let db_path = dir.path().join("test.db");
23897 let storage = SqliteStorage::open(&db_path).unwrap();
23898
23899 let agent_id = storage
23900 .ensure_agent(&Agent {
23901 id: None,
23902 slug: "codex".into(),
23903 name: "Codex".into(),
23904 version: None,
23905 kind: AgentKind::Cli,
23906 })
23907 .unwrap();
23908
23909 let conversation = Conversation {
23910 id: None,
23911 agent_slug: "codex".into(),
23912 workspace: None,
23913 external_id: Some("blank-local-source".into()),
23914 title: Some("Blank local source".into()),
23915 source_path: dir.path().join("blank-local.jsonl"),
23916 started_at: Some(1_700_000_000_000),
23917 ended_at: Some(1_700_000_000_001),
23918 approx_tokens: None,
23919 metadata_json: serde_json::Value::Null,
23920 messages: vec![Message {
23921 id: None,
23922 idx: 0,
23923 role: MessageRole::User,
23924 author: None,
23925 created_at: Some(1_700_000_000_000),
23926 content: "hello".into(),
23927 extra_json: serde_json::Value::Null,
23928 snippets: Vec::new(),
23929 }],
23930 source_id: " ".into(),
23931 origin_host: None,
23932 };
23933
23934 storage
23935 .insert_conversation_tree(agent_id, None, &conversation)
23936 .unwrap();
23937
23938 assert!(storage.get_source(" ").unwrap().is_none());
23939 let source = storage
23940 .get_source(LOCAL_SOURCE_ID)
23941 .unwrap()
23942 .expect("local source row should exist");
23943 assert_eq!(source.kind, SourceKind::Local);
23944 assert_eq!(source.host_label, None);
23945
23946 let conversations = storage.list_conversations(10, 0).unwrap();
23947 assert_eq!(conversations.len(), 1);
23948 assert_eq!(conversations[0].source_id, LOCAL_SOURCE_ID);
23949 assert_eq!(conversations[0].origin_host, None);
23950 }
23951
23952 #[test]
23953 fn repeated_local_inserts_do_not_touch_bootstrap_source_row() {
23954 let dir = TempDir::new().unwrap();
23955 let db_path = dir.path().join("test.db");
23956 let storage = SqliteStorage::open(&db_path).unwrap();
23957
23958 let agent_id = storage
23959 .ensure_agent(&Agent {
23960 id: None,
23961 slug: "codex".into(),
23962 name: "Codex".into(),
23963 version: None,
23964 kind: AgentKind::Cli,
23965 })
23966 .unwrap();
23967
23968 let bootstrap_updated_at: i64 = storage
23969 .conn
23970 .query_row_map(
23971 "SELECT updated_at FROM sources WHERE id = ?1",
23972 fparams![LOCAL_SOURCE_ID],
23973 |row| row.get_typed(0),
23974 )
23975 .unwrap();
23976
23977 let make_conversation = |external_id: &str, suffix: &str| Conversation {
23978 id: None,
23979 agent_slug: "codex".into(),
23980 workspace: None,
23981 external_id: Some(external_id.into()),
23982 title: Some(format!("Local source {suffix}")),
23983 source_path: dir.path().join(format!("local-{suffix}.jsonl")),
23984 started_at: Some(1_700_000_000_000),
23985 ended_at: Some(1_700_000_000_001),
23986 approx_tokens: None,
23987 metadata_json: serde_json::Value::Null,
23988 messages: vec![Message {
23989 id: None,
23990 idx: 0,
23991 role: MessageRole::User,
23992 author: None,
23993 created_at: Some(1_700_000_000_000),
23994 content: format!("hello-{suffix}"),
23995 extra_json: serde_json::Value::Null,
23996 snippets: Vec::new(),
23997 }],
23998 source_id: LOCAL_SOURCE_ID.into(),
23999 origin_host: None,
24000 };
24001
24002 std::thread::sleep(std::time::Duration::from_millis(5));
24003 storage
24004 .insert_conversation_tree(agent_id, None, &make_conversation("local-source-1", "one"))
24005 .unwrap();
24006 let after_first_insert: i64 = storage
24007 .conn
24008 .query_row_map(
24009 "SELECT updated_at FROM sources WHERE id = ?1",
24010 fparams![LOCAL_SOURCE_ID],
24011 |row| row.get_typed(0),
24012 )
24013 .unwrap();
24014
24015 std::thread::sleep(std::time::Duration::from_millis(5));
24016 storage
24017 .insert_conversation_tree(agent_id, None, &make_conversation("local-source-2", "two"))
24018 .unwrap();
24019 let after_second_insert: i64 = storage
24020 .conn
24021 .query_row_map(
24022 "SELECT updated_at FROM sources WHERE id = ?1",
24023 fparams![LOCAL_SOURCE_ID],
24024 |row| row.get_typed(0),
24025 )
24026 .unwrap();
24027
24028 assert_eq!(after_first_insert, bootstrap_updated_at);
24029 assert_eq!(after_second_insert, bootstrap_updated_at);
24030 }
24031
24032 #[test]
24033 fn insert_conversation_tree_blank_remote_source_normalizes_to_origin_host() {
24034 let dir = TempDir::new().unwrap();
24035 let db_path = dir.path().join("test.db");
24036 let storage = SqliteStorage::open(&db_path).unwrap();
24037
24038 let agent_id = storage
24039 .ensure_agent(&Agent {
24040 id: None,
24041 slug: "codex".into(),
24042 name: "Codex".into(),
24043 version: None,
24044 kind: AgentKind::Cli,
24045 })
24046 .unwrap();
24047
24048 let conversation = Conversation {
24049 id: None,
24050 agent_slug: "codex".into(),
24051 workspace: None,
24052 external_id: Some("blank-remote-source".into()),
24053 title: Some("Blank remote source".into()),
24054 source_path: dir.path().join("blank-remote.jsonl"),
24055 started_at: Some(1_700_000_000_000),
24056 ended_at: Some(1_700_000_000_001),
24057 approx_tokens: None,
24058 metadata_json: serde_json::Value::Null,
24059 messages: vec![Message {
24060 id: None,
24061 idx: 0,
24062 role: MessageRole::User,
24063 author: None,
24064 created_at: Some(1_700_000_000_000),
24065 content: "hello".into(),
24066 extra_json: serde_json::Value::Null,
24067 snippets: Vec::new(),
24068 }],
24069 source_id: " ".into(),
24070 origin_host: Some("user@work-laptop".into()),
24071 };
24072
24073 storage
24074 .insert_conversation_tree(agent_id, None, &conversation)
24075 .unwrap();
24076
24077 assert!(storage.get_source(" ").unwrap().is_none());
24078 let source = storage
24079 .get_source("user@work-laptop")
24080 .unwrap()
24081 .expect("normalized remote source row should exist");
24082 assert_eq!(source.kind, SourceKind::Ssh);
24083 assert_eq!(source.host_label.as_deref(), Some("user@work-laptop"));
24084
24085 let conversations = storage.list_conversations(10, 0).unwrap();
24086 assert_eq!(conversations.len(), 1);
24087 assert_eq!(conversations[0].source_id, "user@work-laptop");
24088 assert_eq!(
24089 conversations[0].origin_host.as_deref(),
24090 Some("user@work-laptop")
24091 );
24092 }
24093
24094 #[test]
24095 fn insert_conversations_batched_normalizes_host_only_remote_source_id() {
24096 let dir = TempDir::new().unwrap();
24097 let db_path = dir.path().join("test.db");
24098 let storage = SqliteStorage::open(&db_path).unwrap();
24099
24100 let agent_id = storage
24101 .ensure_agent(&Agent {
24102 id: None,
24103 slug: "codex".into(),
24104 name: "Codex".into(),
24105 version: None,
24106 kind: AgentKind::Cli,
24107 })
24108 .unwrap();
24109
24110 let conversation = Conversation {
24111 id: None,
24112 agent_slug: "codex".into(),
24113 workspace: None,
24114 external_id: Some("batched-blank-remote-source".into()),
24115 title: Some("Batched blank remote source".into()),
24116 source_path: dir.path().join("batched-blank-remote.jsonl"),
24117 started_at: Some(1_700_000_000_000),
24118 ended_at: Some(1_700_000_000_001),
24119 approx_tokens: None,
24120 metadata_json: serde_json::Value::Null,
24121 messages: vec![Message {
24122 id: None,
24123 idx: 0,
24124 role: MessageRole::User,
24125 author: None,
24126 created_at: Some(1_700_000_000_000),
24127 content: "hello".into(),
24128 extra_json: serde_json::Value::Null,
24129 snippets: Vec::new(),
24130 }],
24131 source_id: " ".into(),
24132 origin_host: Some("user@batch-host".into()),
24133 };
24134
24135 storage
24136 .insert_conversations_batched(&[(agent_id, None, &conversation)])
24137 .unwrap();
24138
24139 assert!(storage.get_source(" ").unwrap().is_none());
24140 let source = storage
24141 .get_source("user@batch-host")
24142 .unwrap()
24143 .expect("normalized batched remote source row should exist");
24144 assert_eq!(source.kind, SourceKind::Ssh);
24145 assert_eq!(source.host_label.as_deref(), Some("user@batch-host"));
24146
24147 let conversations = storage.list_conversations(10, 0).unwrap();
24148 assert_eq!(conversations.len(), 1);
24149 assert_eq!(conversations[0].source_id, "user@batch-host");
24150 assert_eq!(
24151 conversations[0].origin_host.as_deref(),
24152 Some("user@batch-host")
24153 );
24154 }
24155
24156 #[test]
24157 fn get_source_ids_excludes_local() {
24158 let dir = TempDir::new().unwrap();
24159 let db_path = dir.path().join("test.db");
24160 let storage = SqliteStorage::open(&db_path).unwrap();
24161
24162 let source = Source {
24164 id: "remote-1".into(),
24165 kind: SourceKind::Ssh,
24166 host_label: Some("server".into()),
24167 machine_id: None,
24168 platform: None,
24169 config_json: None,
24170 created_at: Some(SqliteStorage::now_millis()),
24171 updated_at: None,
24172 };
24173 storage.upsert_source(&source).unwrap();
24174
24175 let ids = storage.get_source_ids().unwrap();
24176 assert!(!ids.contains(&LOCAL_SOURCE_ID.to_string()));
24177 assert!(ids.contains(&"remote-1".to_string()));
24178 }
24179
24180 #[test]
24185 fn get_last_scan_ts_returns_none_initially() {
24186 let dir = TempDir::new().unwrap();
24187 let db_path = dir.path().join("test.db");
24188 let storage = SqliteStorage::open(&db_path).unwrap();
24189
24190 let ts = storage.get_last_scan_ts().unwrap();
24191 assert!(ts.is_none());
24192 }
24193
24194 #[test]
24195 fn set_and_get_last_scan_ts() {
24196 let dir = TempDir::new().unwrap();
24197 let db_path = dir.path().join("test.db");
24198 let storage = SqliteStorage::open(&db_path).unwrap();
24199
24200 let expected_ts = 1700000000000_i64;
24201 storage.set_last_scan_ts(expected_ts).unwrap();
24202
24203 let actual_ts = storage.get_last_scan_ts().unwrap();
24204 assert_eq!(actual_ts, Some(expected_ts));
24205 }
24206
24207 #[test]
24208 fn connector_last_scan_ts_round_trip_normalizes_name() -> anyhow::Result<()> {
24209 let dir = TempDir::new()?;
24210 let db_path = dir.path().join("test.db");
24211 let storage = SqliteStorage::open(&db_path)?;
24212
24213 assert_eq!(storage.get_connector_last_scan_ts(" Codex ")?, None);
24214
24215 let expected_ts = 1_700_000_123_456_i64;
24216 storage.set_connector_last_scan_ts(" Codex ", expected_ts)?;
24217
24218 assert_eq!(
24219 storage.get_connector_last_scan_ts("codex")?,
24220 Some(expected_ts)
24221 );
24222 assert_eq!(
24223 storage.get_connector_last_scan_ts("CODEX")?,
24224 Some(expected_ts)
24225 );
24226 assert_eq!(storage.get_connector_last_scan_ts("claude-code")?, None);
24227 Ok(())
24228 }
24229
24230 #[test]
24231 fn connector_has_conversations_tracks_archived_agent_slug() -> anyhow::Result<()> {
24232 let dir = TempDir::new()?;
24233 let db_path = dir.path().join("test.db");
24234 let storage = SqliteStorage::open(&db_path)?;
24235 let agent_id = storage.ensure_agent(&Agent {
24236 id: None,
24237 slug: "codex".into(),
24238 name: "Codex".into(),
24239 version: None,
24240 kind: AgentKind::Cli,
24241 })?;
24242
24243 assert!(!storage.connector_has_conversations("codex")?);
24244
24245 let conversation = Conversation {
24246 id: None,
24247 agent_slug: "codex".into(),
24248 workspace: None,
24249 external_id: Some("connector-watermark-fixture".into()),
24250 title: Some("Connector watermark fixture".into()),
24251 source_path: PathBuf::from("/tmp/connector-watermark-fixture.jsonl"),
24252 started_at: Some(1_700_000_000_000),
24253 ended_at: Some(1_700_000_000_001),
24254 approx_tokens: None,
24255 metadata_json: serde_json::Value::Null,
24256 messages: vec![Message {
24257 id: None,
24258 idx: 0,
24259 role: MessageRole::User,
24260 author: None,
24261 created_at: Some(1_700_000_000_000),
24262 content: "per-connector watermark regression".into(),
24263 extra_json: serde_json::Value::Null,
24264 snippets: Vec::new(),
24265 }],
24266 source_id: LOCAL_SOURCE_ID.into(),
24267 origin_host: None,
24268 };
24269 storage.insert_conversation_tree(agent_id, None, &conversation)?;
24270
24271 assert!(storage.connector_has_conversations(" Codex ")?);
24272 assert!(!storage.connector_has_conversations("claude-code")?);
24273 assert!(!storage.connector_has_conversations(" ")?);
24274 Ok(())
24275 }
24276
24277 #[test]
24278 fn connector_scan_states_loads_watermarks_and_agent_presence() -> anyhow::Result<()> {
24279 let dir = TempDir::new()?;
24280 let db_path = dir.path().join("test.db");
24281 let storage = SqliteStorage::open(&db_path)?;
24282 let agent_id = storage.ensure_agent(&Agent {
24283 id: None,
24284 slug: "claude_code".into(),
24285 name: "Claude Code".into(),
24286 version: None,
24287 kind: AgentKind::Cli,
24288 })?;
24289 storage.set_connector_last_scan_ts(" Codex ", 1_700_000_123_456)?;
24290
24291 let conversation = Conversation {
24292 id: None,
24293 agent_slug: "claude_code".into(),
24294 workspace: None,
24295 external_id: Some("connector-scan-states-fixture".into()),
24296 title: Some("Connector scan states fixture".into()),
24297 source_path: PathBuf::from("/tmp/connector-scan-states-fixture.jsonl"),
24298 started_at: Some(1_700_000_000_000),
24299 ended_at: Some(1_700_000_000_001),
24300 approx_tokens: None,
24301 metadata_json: serde_json::Value::Null,
24302 messages: vec![Message {
24303 id: None,
24304 idx: 0,
24305 role: MessageRole::User,
24306 author: None,
24307 created_at: Some(1_700_000_000_000),
24308 content: "bulk connector scan state regression".into(),
24309 extra_json: serde_json::Value::Null,
24310 snippets: Vec::new(),
24311 }],
24312 source_id: LOCAL_SOURCE_ID.into(),
24313 origin_host: None,
24314 };
24315 storage.insert_conversation_tree(agent_id, None, &conversation)?;
24316
24317 let states = storage.connector_scan_states(&["codex", "claude", "gemini"])?;
24318 assert_eq!(
24319 states.get("codex").copied(),
24320 Some((Some(1_700_000_123_456), false)),
24321 "bulk state should preserve connector-specific watermarks"
24322 );
24323 assert_eq!(
24324 states.get("claude").copied(),
24325 Some((None, true)),
24326 "bulk state should honor known connector slug aliases"
24327 );
24328 assert_eq!(
24329 states.get("gemini").copied(),
24330 Some((None, false)),
24331 "bulk state should identify newly enabled connectors with no archived rows"
24332 );
24333 Ok(())
24334 }
24335
24336 #[test]
24337 fn connector_has_conversations_checks_known_agent_slug_aliases() -> anyhow::Result<()> {
24338 let dir = TempDir::new()?;
24339 let db_path = dir.path().join("test.db");
24340 let storage = SqliteStorage::open(&db_path)?;
24341 let agent_id = storage.ensure_agent(&Agent {
24342 id: None,
24343 slug: "claude_code".into(),
24344 name: "Claude Code".into(),
24345 version: None,
24346 kind: AgentKind::Cli,
24347 })?;
24348
24349 let conversation = Conversation {
24350 id: None,
24351 agent_slug: "claude_code".into(),
24352 workspace: None,
24353 external_id: Some("connector-watermark-claude-fixture".into()),
24354 title: Some("Claude connector watermark fixture".into()),
24355 source_path: PathBuf::from("/tmp/connector-watermark-claude-fixture.jsonl"),
24356 started_at: Some(1_700_000_000_000),
24357 ended_at: Some(1_700_000_000_001),
24358 approx_tokens: None,
24359 metadata_json: serde_json::Value::Null,
24360 messages: vec![Message {
24361 id: None,
24362 idx: 0,
24363 role: MessageRole::User,
24364 author: None,
24365 created_at: Some(1_700_000_000_000),
24366 content: "claude connector alias regression".into(),
24367 extra_json: serde_json::Value::Null,
24368 snippets: Vec::new(),
24369 }],
24370 source_id: LOCAL_SOURCE_ID.into(),
24371 origin_host: None,
24372 };
24373 storage.insert_conversation_tree(agent_id, None, &conversation)?;
24374
24375 assert!(
24376 storage.connector_has_conversations("claude")?,
24377 "the claude connector factory name must recognize legacy claude_code rows"
24378 );
24379 assert!(storage.connector_has_conversations("claude-code")?);
24380 assert!(storage.connector_has_conversations("claude_code")?);
24381 assert!(!storage.connector_has_conversations("codex")?);
24382 Ok(())
24383 }
24384
24385 #[test]
24390 fn now_millis_returns_reasonable_value() {
24391 let ts = SqliteStorage::now_millis();
24392 assert!(ts > 1577836800000);
24394 assert!(ts < 4102444800000);
24396 }
24397
24398 #[test]
24403 fn msgpack_roundtrip_basic_object() {
24404 let value = serde_json::json!({
24405 "key": "value",
24406 "number": 42,
24407 "nested": { "inner": true }
24408 });
24409
24410 let bytes = serialize_json_to_msgpack(&value).expect("should serialize");
24411 let recovered = deserialize_msgpack_to_json(&bytes);
24412
24413 assert_eq!(value, recovered);
24414 }
24415
24416 #[test]
24417 fn msgpack_returns_none_for_null() {
24418 let value = serde_json::Value::Null;
24419 assert!(serialize_json_to_msgpack(&value).is_none());
24420 }
24421
24422 #[test]
24423 fn message_insert_stores_null_extra_json_as_sql_null() {
24424 let dir = TempDir::new().unwrap();
24425 let db_path = dir.path().join("test.db");
24426 let storage = SqliteStorage::open(&db_path).unwrap();
24427 let agent_id = storage
24428 .ensure_agent(&Agent {
24429 id: None,
24430 slug: "codex".into(),
24431 name: "Codex".into(),
24432 version: None,
24433 kind: AgentKind::Cli,
24434 })
24435 .unwrap();
24436 let conversation = Conversation {
24437 id: None,
24438 agent_slug: "codex".into(),
24439 workspace: None,
24440 external_id: Some("null-extra-json".into()),
24441 title: Some("Null extra_json".into()),
24442 source_path: PathBuf::from("/tmp/null-extra-json.jsonl"),
24443 started_at: Some(1_700_000_000_000),
24444 ended_at: Some(1_700_000_000_001),
24445 approx_tokens: None,
24446 metadata_json: serde_json::Value::Null,
24447 messages: vec![Message {
24448 id: None,
24449 idx: 0,
24450 role: MessageRole::User,
24451 author: None,
24452 created_at: Some(1_700_000_000_000),
24453 content: "null metadata message".into(),
24454 extra_json: serde_json::Value::Null,
24455 snippets: Vec::new(),
24456 }],
24457 source_id: LOCAL_SOURCE_ID.into(),
24458 origin_host: None,
24459 };
24460
24461 let conversation_id = storage
24462 .insert_conversation_tree(agent_id, None, &conversation)
24463 .unwrap()
24464 .conversation_id;
24465
24466 let (extra_json, extra_bin): (Option<String>, Option<Vec<u8>>) = storage
24467 .conn
24468 .query_row_map(
24469 "SELECT extra_json, extra_bin FROM messages WHERE conversation_id = ?1",
24470 fparams![conversation_id],
24471 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
24472 )
24473 .unwrap();
24474 assert!(extra_json.is_none());
24475 assert!(extra_bin.is_none());
24476
24477 let stored = storage.fetch_messages(conversation_id).unwrap();
24478 assert!(stored[0].extra_json.is_null());
24479 }
24480
24481 #[test]
24482 fn message_insert_stores_nonempty_extra_json_as_msgpack_only() {
24483 let dir = TempDir::new().unwrap();
24484 let db_path = dir.path().join("test.db");
24485 let storage = SqliteStorage::open(&db_path).unwrap();
24486 let agent_id = storage
24487 .ensure_agent(&Agent {
24488 id: None,
24489 slug: "codex".into(),
24490 name: "Codex".into(),
24491 version: None,
24492 kind: AgentKind::Cli,
24493 })
24494 .unwrap();
24495 let extra_json = serde_json::json!({ "idx": 7, "kind": "profile" });
24496 let conversation = Conversation {
24497 id: None,
24498 agent_slug: "codex".into(),
24499 workspace: None,
24500 external_id: Some("msgpack-extra-json".into()),
24501 title: Some("MessagePack extra_json".into()),
24502 source_path: PathBuf::from("/tmp/msgpack-extra-json.jsonl"),
24503 started_at: Some(1_700_000_000_000),
24504 ended_at: Some(1_700_000_000_001),
24505 approx_tokens: None,
24506 metadata_json: serde_json::Value::Null,
24507 messages: vec![Message {
24508 id: None,
24509 idx: 0,
24510 role: MessageRole::User,
24511 author: None,
24512 created_at: Some(1_700_000_000_000),
24513 content: "msgpack metadata message".into(),
24514 extra_json: extra_json.clone(),
24515 snippets: Vec::new(),
24516 }],
24517 source_id: LOCAL_SOURCE_ID.into(),
24518 origin_host: None,
24519 };
24520
24521 let conversation_id = storage
24522 .insert_conversation_tree(agent_id, None, &conversation)
24523 .unwrap()
24524 .conversation_id;
24525
24526 let (extra_json_text, extra_bin): (Option<String>, Option<Vec<u8>>) = storage
24527 .conn
24528 .query_row_map(
24529 "SELECT extra_json, extra_bin FROM messages WHERE conversation_id = ?1",
24530 fparams![conversation_id],
24531 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
24532 )
24533 .unwrap();
24534 assert!(extra_json_text.is_none());
24535 assert!(extra_bin.is_some());
24536
24537 let stored = storage.fetch_messages(conversation_id).unwrap();
24538 assert_eq!(stored[0].extra_json, extra_json);
24539 }
24540
24541 #[test]
24542 fn conversation_insert_preserves_null_metadata_json_as_json_null() {
24543 let dir = TempDir::new().unwrap();
24544 let db_path = dir.path().join("test.db");
24545 let storage = SqliteStorage::open(&db_path).unwrap();
24546 let agent_id = storage
24547 .ensure_agent(&Agent {
24548 id: None,
24549 slug: "codex".into(),
24550 name: "Codex".into(),
24551 version: None,
24552 kind: AgentKind::Cli,
24553 })
24554 .unwrap();
24555 let conversation = Conversation {
24556 id: None,
24557 agent_slug: "codex".into(),
24558 workspace: None,
24559 external_id: Some("null-conversation-metadata".into()),
24560 title: Some("Null conversation metadata".into()),
24561 source_path: PathBuf::from("/tmp/null-conversation-metadata.jsonl"),
24562 started_at: Some(1_700_000_000_000),
24563 ended_at: Some(1_700_000_000_001),
24564 approx_tokens: None,
24565 metadata_json: serde_json::Value::Null,
24566 messages: vec![Message {
24567 id: None,
24568 idx: 0,
24569 role: MessageRole::User,
24570 author: None,
24571 created_at: Some(1_700_000_000_000),
24572 content: "null conversation metadata message".into(),
24573 extra_json: serde_json::Value::Null,
24574 snippets: Vec::new(),
24575 }],
24576 source_id: LOCAL_SOURCE_ID.into(),
24577 origin_host: None,
24578 };
24579
24580 storage
24581 .insert_conversation_tree(agent_id, None, &conversation)
24582 .unwrap();
24583
24584 let (metadata_json, metadata_bin): (Option<String>, Option<Vec<u8>>) = storage
24585 .conn
24586 .query_row_map(
24587 "SELECT metadata_json, metadata_bin FROM conversations WHERE external_id = ?1",
24588 fparams!["null-conversation-metadata"],
24589 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
24590 )
24591 .unwrap();
24592 assert_eq!(metadata_json.as_deref(), Some("null"));
24593 assert!(metadata_bin.is_none());
24594
24595 let listed = storage.list_conversations(10, 0).unwrap();
24596 assert!(listed[0].metadata_json.is_null());
24597 }
24598
24599 #[test]
24600 fn conversation_insert_stores_nonempty_metadata_as_msgpack_only() {
24601 let dir = TempDir::new().unwrap();
24602 let db_path = dir.path().join("test.db");
24603 let storage = SqliteStorage::open(&db_path).unwrap();
24604 let agent_id = storage
24605 .ensure_agent(&Agent {
24606 id: None,
24607 slug: "codex".into(),
24608 name: "Codex".into(),
24609 version: None,
24610 kind: AgentKind::Cli,
24611 })
24612 .unwrap();
24613 let metadata_json = serde_json::json!({ "bench": true, "source": "profile" });
24614 let conversation = Conversation {
24615 id: None,
24616 agent_slug: "codex".into(),
24617 workspace: None,
24618 external_id: Some("msgpack-conversation-metadata".into()),
24619 title: Some("MessagePack conversation metadata".into()),
24620 source_path: PathBuf::from("/tmp/msgpack-conversation-metadata.jsonl"),
24621 started_at: Some(1_700_000_000_000),
24622 ended_at: Some(1_700_000_000_001),
24623 approx_tokens: None,
24624 metadata_json: metadata_json.clone(),
24625 messages: vec![Message {
24626 id: None,
24627 idx: 0,
24628 role: MessageRole::User,
24629 author: None,
24630 created_at: Some(1_700_000_000_000),
24631 content: "msgpack conversation metadata message".into(),
24632 extra_json: serde_json::Value::Null,
24633 snippets: Vec::new(),
24634 }],
24635 source_id: LOCAL_SOURCE_ID.into(),
24636 origin_host: None,
24637 };
24638
24639 storage
24640 .insert_conversation_tree(agent_id, None, &conversation)
24641 .unwrap();
24642
24643 let (metadata_text, metadata_bin): (Option<String>, Option<Vec<u8>>) = storage
24644 .conn
24645 .query_row_map(
24646 "SELECT metadata_json, metadata_bin FROM conversations WHERE external_id = ?1",
24647 fparams!["msgpack-conversation-metadata"],
24648 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
24649 )
24650 .unwrap();
24651 assert!(metadata_text.is_none());
24652 assert!(metadata_bin.is_some());
24653
24654 let listed = storage.list_conversations(10, 0).unwrap();
24655 assert_eq!(listed[0].metadata_json, metadata_json);
24656 }
24657
24658 #[test]
24659 fn msgpack_returns_none_for_empty_object() {
24660 let value = serde_json::json!({});
24661 assert!(serialize_json_to_msgpack(&value).is_none());
24662 }
24663
24664 #[test]
24665 fn parse_historical_json_column_preserves_large_payloads_as_raw_json() {
24666 let raw = format!("{{\"blob\":\"{}\"}}", "x".repeat(1_000_000));
24667
24668 let value = parse_historical_json_column(Some(raw.clone()));
24669
24670 assert_eq!(historical_raw_json(&value), Some(raw.as_str()));
24671 assert_eq!(json_value_size_hint(&value), raw.len());
24672 }
24673
24674 #[test]
24675 fn parse_historical_json_column_preserves_small_payloads_as_raw_json() {
24676 let raw = String::from("{\"ok\":true,\"n\":1}");
24677
24678 let value = parse_historical_json_column(Some(raw.clone()));
24679
24680 assert_eq!(historical_raw_json(&value), Some(raw.as_str()));
24681 }
24682
24683 #[test]
24684 fn msgpack_serializes_non_empty_array() {
24685 let value = serde_json::json!([1, 2, 3]);
24686 let bytes = serialize_json_to_msgpack(&value).expect("should serialize array");
24687 let recovered = deserialize_msgpack_to_json(&bytes);
24688 assert_eq!(value, recovered);
24689 }
24690
24691 #[test]
24692 fn msgpack_smaller_than_json() {
24693 let value = serde_json::json!({
24694 "field_name_one": "some_value",
24695 "field_name_two": 123456,
24696 "field_name_three": [1, 2, 3, 4, 5],
24697 "field_name_four": { "nested": true }
24698 });
24699
24700 let json_bytes = serde_json::to_vec(&value).unwrap();
24701 let msgpack_bytes = serialize_json_to_msgpack(&value).unwrap();
24702
24703 assert!(
24705 msgpack_bytes.len() < json_bytes.len(),
24706 "MessagePack ({} bytes) should be smaller than JSON ({} bytes)",
24707 msgpack_bytes.len(),
24708 json_bytes.len()
24709 );
24710 }
24711
24712 #[test]
24713 fn migration_v7_adds_binary_columns() {
24714 let dir = TempDir::new().unwrap();
24715 let db_path = dir.path().join("test.db");
24716 let storage = SqliteStorage::open(&db_path).unwrap();
24717
24718 let has_metadata_bin = storage
24720 .raw()
24721 .query("PRAGMA table_info(conversations)")
24722 .unwrap()
24723 .iter()
24724 .any(|row| row.get_typed::<String>(1).unwrap() == "metadata_bin");
24725 assert!(
24726 has_metadata_bin,
24727 "conversations should have metadata_bin column"
24728 );
24729
24730 let has_extra_bin = storage
24732 .raw()
24733 .query("PRAGMA table_info(messages)")
24734 .unwrap()
24735 .iter()
24736 .any(|row| row.get_typed::<String>(1).unwrap() == "extra_bin");
24737 assert!(has_extra_bin, "messages should have extra_bin column");
24738 }
24739
24740 #[test]
24741 fn insert_conversation_tree_rehydrates_append_tail_state_cache_after_manual_clear() {
24742 let dir = TempDir::new().unwrap();
24743 let db_path = dir.path().join("append-tail-state-cache.db");
24744 let storage = SqliteStorage::open(&db_path).unwrap();
24745 let agent_id = storage
24746 .ensure_agent(&Agent {
24747 id: None,
24748 slug: "codex".into(),
24749 name: "Codex".into(),
24750 version: None,
24751 kind: AgentKind::Cli,
24752 })
24753 .unwrap();
24754 let workspace = PathBuf::from("/ws/profiled-append-remote");
24755 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
24756
24757 let initial = make_profiled_append_remote_merge_conversation(11, 5);
24758 let insert_outcome = storage
24759 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
24760 .unwrap();
24761 let conversation_id = insert_outcome.conversation_id;
24762
24763 let initial_tail: (Option<i64>, Option<i64>, Option<i64>) = storage
24764 .raw()
24765 .query_row_map(
24766 "SELECT ended_at, last_message_idx, last_message_created_at
24767 FROM conversation_tail_state
24768 WHERE conversation_id = ?1",
24769 fparams![conversation_id],
24770 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
24771 )
24772 .unwrap();
24773 assert_eq!(initial_tail, (Some(111_005), Some(4), Some(111_004)));
24774
24775 storage
24776 .raw()
24777 .execute_compat(
24778 "UPDATE conversations SET ended_at = ?1 WHERE id = ?2",
24779 fparams![111_999_i64, conversation_id],
24780 )
24781 .unwrap();
24782 storage
24783 .raw()
24784 .execute_compat(
24785 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
24786 fparams![conversation_id],
24787 )
24788 .unwrap();
24789
24790 let appended = make_profiled_append_remote_merge_conversation(11, 10);
24791 let append_outcome = storage
24792 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
24793 .unwrap();
24794 assert_eq!(append_outcome.inserted_indices, vec![5, 6, 7, 8, 9]);
24795
24796 let final_tail: (Option<i64>, Option<i64>, Option<i64>) = storage
24797 .raw()
24798 .query_row_map(
24799 "SELECT ended_at, last_message_idx, last_message_created_at
24800 FROM conversation_tail_state
24801 WHERE conversation_id = ?1",
24802 fparams![conversation_id],
24803 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
24804 )
24805 .unwrap();
24806 assert_eq!(final_tail, (Some(111_999), Some(9), Some(111_009)));
24807 }
24808
24809 #[test]
24810 fn msgpack_deserialize_empty_returns_default() {
24811 let recovered = deserialize_msgpack_to_json(&[]);
24812 assert_eq!(recovered, serde_json::Value::Object(serde_json::Map::new()));
24813 }
24814
24815 #[test]
24816 fn msgpack_deserialize_garbage_returns_default() {
24817 let recovered = deserialize_msgpack_to_json(&[0x85]);
24820 assert_eq!(recovered, serde_json::Value::Object(serde_json::Map::new()));
24821 }
24822
24823 #[test]
24824 fn stats_aggregator_collects_and_expands() {
24825 let mut agg = StatsAggregator::new();
24826 assert!(agg.is_empty());
24827
24828 agg.record("claude", "local", 100, 5, 500);
24831 agg.record("codex", "local", 100, 3, 300);
24833 agg.record("claude", "local", 101, 2, 200);
24835
24836 assert!(!agg.is_empty());
24837 assert_eq!(agg.raw_entry_count(), 3);
24838
24839 let entries = agg.expand();
24840 assert_eq!(entries.len(), 10);
24868
24869 let day100_all = entries
24871 .iter()
24872 .find(|(d, a, s, _)| *d == 100 && a == "all" && s == "all")
24873 .unwrap();
24874 assert_eq!(day100_all.3.session_count_delta, 2);
24875 assert_eq!(day100_all.3.message_count_delta, 8);
24876 assert_eq!(day100_all.3.total_chars_delta, 800);
24877 }
24878
24879 #[test]
24884 fn lazy_franken_db_not_open_before_get() {
24885 let dir = TempDir::new().unwrap();
24886 let db_path = dir.path().join("lazy_test.db");
24887
24888 let _storage = SqliteStorage::open(&db_path).unwrap();
24890
24891 let lazy = LazyFrankenDb::new(db_path);
24892 assert!(
24893 !lazy.is_open(),
24894 "LazyFrankenDb must not open on construction"
24895 );
24896 }
24897
24898 #[test]
24899 fn lazy_franken_db_opens_on_first_get() {
24900 let dir = TempDir::new().unwrap();
24901 let db_path = dir.path().join("lazy_test.db");
24902
24903 let _storage = SqliteStorage::open(&db_path).unwrap();
24905 drop(_storage);
24906
24907 let lazy = LazyFrankenDb::new(db_path);
24908 assert!(!lazy.is_open());
24909
24910 let conn = lazy.get("test").expect("should open successfully");
24911 let count: i64 = conn
24912 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |r| {
24913 r.get_typed(0)
24914 })
24915 .unwrap();
24916 assert_eq!(count, 0);
24917 drop(conn);
24918
24919 assert!(lazy.is_open(), "LazyFrankenDb must be open after get()");
24920 }
24921
24922 #[test]
24923 fn lazy_franken_db_reuses_connection() {
24924 let dir = TempDir::new().unwrap();
24925 let db_path = dir.path().join("lazy_test.db");
24926 let _storage = SqliteStorage::open(&db_path).unwrap();
24927 drop(_storage);
24928
24929 let lazy = LazyFrankenDb::new(db_path);
24930
24931 {
24933 let conn = lazy.get("first").unwrap();
24934 conn.execute_batch("CREATE TABLE IF NOT EXISTS test_tbl (id INTEGER)")
24935 .unwrap();
24936 }
24937
24938 {
24940 let conn = lazy.get("second").unwrap();
24941 let count: i64 = conn
24942 .query_row_map("SELECT COUNT(*) FROM test_tbl", fparams![], |r| {
24943 r.get_typed(0)
24944 })
24945 .unwrap();
24946 assert_eq!(count, 0);
24947 }
24948 }
24949
24950 #[test]
24951 fn lazy_franken_db_not_found_error() {
24952 let dir = TempDir::new().unwrap();
24953 let db_path = dir.path().join("nonexistent.db");
24954
24955 let lazy = LazyFrankenDb::new(db_path);
24956 let result = lazy.get("test");
24957 assert!(result.is_err());
24958 assert!(
24959 matches!(result.unwrap_err(), LazyDbError::NotFound(_)),
24960 "should return NotFound for missing DB"
24961 );
24962 }
24963
24964 #[test]
24965 fn lazy_franken_db_path_accessor() {
24966 let path = PathBuf::from("/tmp/test_lazy.db");
24967 let lazy = LazyFrankenDb::new(path.clone());
24968 assert_eq!(lazy.path(), path.as_path());
24969 }
24970
24971 #[test]
24976 fn sql_like_match_basic_patterns() {
24977 assert!(sql_like_match("claude-opus-4-20250101", "claude-opus-4%"));
24978 assert!(sql_like_match("claude-opus-4", "claude-opus-4%"));
24979 assert!(!sql_like_match("claude-sonnet-4", "claude-opus-4%"));
24980
24981 assert!(sql_like_match("gemini-2.0-flash-001", "gemini-2%flash%"));
24983 assert!(sql_like_match("gemini-2-flash", "gemini-2%flash%"));
24984 assert!(!sql_like_match("gemini-2-pro", "gemini-2%flash%"));
24985
24986 assert!(sql_like_match("hello", "hello"));
24988 assert!(!sql_like_match("hello!", "hello"));
24989
24990 assert!(sql_like_match("gpt-4o", "gpt-4_"));
24992 assert!(!sql_like_match("gpt-4oo", "gpt-4_"));
24993
24994 assert!(sql_like_match("Claude-Opus-4", "claude-opus-4%"));
24996 }
24997
24998 #[test]
24999 fn date_str_to_day_id_converts_correctly() {
25000 assert_eq!(date_str_to_day_id("2025-10-01").unwrap(), 2100);
25002 assert_eq!(date_str_to_day_id("2024-04-01").unwrap(), 1552);
25004 assert!(date_str_to_day_id("invalid").is_err());
25005 }
25006
25007 #[test]
25008 fn pricing_table_lookup_selects_matching_entry() {
25009 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
25010 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
25011 let table = PricingTable {
25012 entries: vec![
25013 PricingEntry {
25014 model_pattern: "claude-opus-4%".into(),
25015 provider: "anthropic".into(),
25016 input_cost_per_mtok: 15.0,
25017 output_cost_per_mtok: 75.0,
25018 cache_read_cost_per_mtok: Some(1.5),
25019 cache_creation_cost_per_mtok: Some(18.75),
25020 effective_day_id: effective_day,
25021 },
25022 PricingEntry {
25023 model_pattern: "claude-sonnet-4%".into(),
25024 provider: "anthropic".into(),
25025 input_cost_per_mtok: 3.0,
25026 output_cost_per_mtok: 15.0,
25027 cache_read_cost_per_mtok: Some(0.3),
25028 cache_creation_cost_per_mtok: Some(3.75),
25029 effective_day_id: effective_day,
25030 },
25031 ],
25032 };
25033
25034 let result = table.lookup("claude-opus-4-20260101", lookup_day);
25035 assert!(result.is_some());
25036 assert_eq!(result.unwrap().input_cost_per_mtok, 15.0);
25037
25038 let result = table.lookup("claude-sonnet-4-latest", lookup_day);
25039 assert!(result.is_some());
25040 assert_eq!(result.unwrap().input_cost_per_mtok, 3.0);
25041
25042 assert!(table.lookup("unknown-model", lookup_day).is_none());
25043 }
25044
25045 #[test]
25046 fn pricing_table_lookup_respects_effective_date() {
25047 let effective_day_1 = date_str_to_day_id("2025-10-01").unwrap();
25048 let effective_day_2 = date_str_to_day_id("2026-01-01").unwrap();
25049 let table = PricingTable {
25050 entries: vec![
25051 PricingEntry {
25052 model_pattern: "claude-opus-4%".into(),
25053 provider: "anthropic".into(),
25054 input_cost_per_mtok: 15.0,
25055 output_cost_per_mtok: 75.0,
25056 cache_read_cost_per_mtok: None,
25057 cache_creation_cost_per_mtok: None,
25058 effective_day_id: effective_day_1,
25059 },
25060 PricingEntry {
25061 model_pattern: "claude-opus-4%".into(),
25062 provider: "anthropic".into(),
25063 input_cost_per_mtok: 12.0,
25064 output_cost_per_mtok: 60.0,
25065 cache_read_cost_per_mtok: None,
25066 cache_creation_cost_per_mtok: None,
25067 effective_day_id: effective_day_2,
25068 },
25069 ],
25070 };
25071
25072 let result = table.lookup("claude-opus-4", date_str_to_day_id("2025-11-01").unwrap());
25074 assert!(result.is_some());
25075 assert_eq!(result.unwrap().input_cost_per_mtok, 15.0);
25076
25077 let result = table.lookup("claude-opus-4", date_str_to_day_id("2026-02-01").unwrap());
25079 assert!(result.is_some());
25080 assert_eq!(result.unwrap().input_cost_per_mtok, 12.0);
25081
25082 assert!(
25084 table
25085 .lookup("claude-opus-4", date_str_to_day_id("2024-01-01").unwrap())
25086 .is_none()
25087 );
25088 }
25089
25090 #[test]
25091 fn pricing_table_lookup_specificity_tiebreak() {
25092 let effective_day = date_str_to_day_id("2025-01-01").unwrap();
25093 let lookup_day = date_str_to_day_id("2026-01-01").unwrap();
25094 let table = PricingTable {
25095 entries: vec![
25096 PricingEntry {
25097 model_pattern: "gpt-4%".into(),
25098 provider: "openai".into(),
25099 input_cost_per_mtok: 10.0,
25100 output_cost_per_mtok: 30.0,
25101 cache_read_cost_per_mtok: None,
25102 cache_creation_cost_per_mtok: None,
25103 effective_day_id: effective_day,
25104 },
25105 PricingEntry {
25106 model_pattern: "gpt-4-turbo%".into(),
25107 provider: "openai".into(),
25108 input_cost_per_mtok: 5.0,
25109 output_cost_per_mtok: 15.0,
25110 cache_read_cost_per_mtok: None,
25111 cache_creation_cost_per_mtok: None,
25112 effective_day_id: effective_day,
25113 },
25114 ],
25115 };
25116
25117 let result = table.lookup("gpt-4-turbo-2025", lookup_day);
25119 assert!(result.is_some());
25120 assert_eq!(result.unwrap().input_cost_per_mtok, 5.0);
25121
25122 let result = table.lookup("gpt-4o", lookup_day);
25124 assert!(result.is_some());
25125 assert_eq!(result.unwrap().input_cost_per_mtok, 10.0);
25126 }
25127
25128 #[test]
25129 fn pricing_table_compute_cost_basic() {
25130 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
25131 let table = PricingTable {
25132 entries: vec![PricingEntry {
25133 model_pattern: "claude-opus-4%".into(),
25134 provider: "anthropic".into(),
25135 input_cost_per_mtok: 15.0,
25136 output_cost_per_mtok: 75.0,
25137 cache_read_cost_per_mtok: Some(1.5),
25138 cache_creation_cost_per_mtok: Some(18.75),
25139 effective_day_id: effective_day,
25140 }],
25141 };
25142
25143 let cost = table.compute_cost(
25144 Some("claude-opus-4-latest"),
25145 date_str_to_day_id("2026-02-06").unwrap(),
25146 Some(1000),
25147 Some(500),
25148 None,
25149 None,
25150 );
25151 assert!(cost.is_some());
25152 assert!((cost.unwrap() - 0.0525).abs() < 1e-10);
25154 }
25155
25156 #[test]
25157 fn pricing_table_compute_cost_with_cache() {
25158 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
25159 let table = PricingTable {
25160 entries: vec![PricingEntry {
25161 model_pattern: "claude-opus-4%".into(),
25162 provider: "anthropic".into(),
25163 input_cost_per_mtok: 15.0,
25164 output_cost_per_mtok: 75.0,
25165 cache_read_cost_per_mtok: Some(1.5),
25166 cache_creation_cost_per_mtok: Some(18.75),
25167 effective_day_id: effective_day,
25168 }],
25169 };
25170
25171 let cost = table.compute_cost(
25172 Some("claude-opus-4-latest"),
25173 date_str_to_day_id("2026-02-06").unwrap(),
25174 Some(1_000_000),
25175 Some(100_000),
25176 Some(500_000),
25177 Some(200_000),
25178 );
25179 assert!(cost.is_some());
25180 assert!((cost.unwrap() - 16.5).abs() < 1e-10);
25186 }
25187
25188 #[test]
25189 fn pricing_table_compute_cost_returns_none_for_unknown_model() {
25190 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
25191 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
25192 let table = PricingTable {
25193 entries: vec![PricingEntry {
25194 model_pattern: "claude-opus-4%".into(),
25195 provider: "anthropic".into(),
25196 input_cost_per_mtok: 15.0,
25197 output_cost_per_mtok: 75.0,
25198 cache_read_cost_per_mtok: None,
25199 cache_creation_cost_per_mtok: None,
25200 effective_day_id: effective_day,
25201 }],
25202 };
25203
25204 assert!(
25205 table
25206 .compute_cost(
25207 Some("unknown-model"),
25208 lookup_day,
25209 Some(1000),
25210 Some(500),
25211 None,
25212 None
25213 )
25214 .is_none()
25215 );
25216 assert!(
25217 table
25218 .compute_cost(None, lookup_day, Some(1000), Some(500), None, None)
25219 .is_none()
25220 );
25221 assert!(
25222 table
25223 .compute_cost(Some("claude-opus-4"), lookup_day, None, None, None, None)
25224 .is_none()
25225 );
25226 }
25227
25228 #[test]
25229 fn pricing_table_load_from_db() {
25230 let dir = TempDir::new().unwrap();
25231 let db_path = dir.path().join("test.db");
25232 let storage = SqliteStorage::open(&db_path).unwrap();
25233
25234 let table = PricingTable::load(&storage.conn).unwrap();
25235 assert!(!table.is_empty());
25236
25237 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
25238
25239 let opus = table.lookup("claude-opus-4-latest", lookup_day);
25240 assert!(opus.is_some());
25241 assert_eq!(opus.unwrap().input_cost_per_mtok, 15.0);
25242
25243 let flash = table.lookup("gemini-2.0-flash-001", lookup_day);
25244 assert!(flash.is_some());
25245 assert_eq!(flash.unwrap().input_cost_per_mtok, 0.075);
25246 }
25247
25248 #[test]
25249 fn pricing_table_load_rejects_invalid_effective_date() {
25250 let dir = TempDir::new().unwrap();
25251 let db_path = dir.path().join("test.db");
25252 let storage = SqliteStorage::open(&db_path).unwrap();
25253
25254 storage
25255 .conn
25256 .execute_compat(
25257 "INSERT INTO model_pricing (
25258 model_pattern, provider, input_cost_per_mtok, output_cost_per_mtok,
25259 cache_read_cost_per_mtok, cache_creation_cost_per_mtok, effective_date
25260 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
25261 fparams![
25262 "broken-model%",
25263 "test",
25264 1.0_f64,
25265 2.0_f64,
25266 Option::<f64>::None,
25267 Option::<f64>::None,
25268 "not-a-date"
25269 ],
25270 )
25271 .unwrap();
25272
25273 let err = PricingTable::load(&storage.conn).unwrap_err();
25274 assert!(err.to_string().contains("invalid effective_date"));
25275 }
25276
25277 #[test]
25278 fn pricing_diagnostics_tracks_coverage() {
25279 let mut diag = PricingDiagnostics::default();
25280 diag.record_priced();
25281 diag.record_priced();
25282 diag.record_unpriced(Some("custom-model-v1"));
25283 diag.record_unpriced(Some("custom-model-v1"));
25284 diag.record_unpriced(None);
25285
25286 assert_eq!(diag.priced_count, 2);
25287 assert_eq!(diag.unpriced_count, 3);
25288 assert_eq!(diag.unknown_models.len(), 2);
25289 assert_eq!(diag.unknown_models["custom-model-v1"], 2);
25290 assert_eq!(diag.unknown_models["(none)"], 1);
25291 }
25292
25293 fn franken_storage_in_memory() -> FrankenStorage {
25303 let conn = FrankenConnection::open(":memory:").unwrap();
25304 let storage = FrankenStorage::new(conn, PathBuf::from(":memory:"));
25305 storage.run_migrations().unwrap();
25306 storage.apply_config().unwrap();
25307 storage
25308 }
25309
25310 #[test]
25311 fn franken_migrations_create_all_tables() {
25312 let storage = franken_storage_in_memory();
25313
25314 let version = storage.schema_version().unwrap();
25316 assert_eq!(
25317 version, CURRENT_SCHEMA_VERSION,
25318 "fresh FrankenStorage should be at current schema version"
25319 );
25320
25321 let rows = storage
25323 .raw()
25324 .query("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;")
25325 .unwrap();
25326 let table_names: Vec<String> = rows
25327 .iter()
25328 .filter_map(|r| r.get_typed::<String>(0).ok())
25329 .collect();
25330
25331 for required in [
25332 "meta",
25333 "agents",
25334 "workspaces",
25335 "conversations",
25336 "messages",
25337 "snippets",
25338 "tags",
25339 "conversation_tags",
25340 ] {
25341 assert!(
25342 table_names.contains(&required.to_string()),
25343 "missing table: {required}"
25344 );
25345 }
25346
25347 assert!(
25349 table_names.contains(&"sources".to_string()),
25350 "missing sources table"
25351 );
25352
25353 assert!(
25355 table_names.contains(&"daily_stats".to_string()),
25356 "missing daily_stats table"
25357 );
25358
25359 assert!(
25361 table_names.contains(&"embedding_jobs".to_string()),
25362 "missing embedding_jobs table"
25363 );
25364
25365 for analytics_table in ["message_metrics", "usage_hourly", "usage_daily"] {
25367 assert!(
25368 table_names.contains(&analytics_table.to_string()),
25369 "missing table: {analytics_table}"
25370 );
25371 }
25372 assert!(
25373 table_names.contains(&"conversation_tail_state".to_string()),
25374 "missing conversation_tail_state table"
25375 );
25376 assert!(
25377 table_names.contains(&"conversation_external_lookup".to_string()),
25378 "missing conversation_external_lookup table"
25379 );
25380 assert!(
25381 table_names.contains(&"conversation_external_tail_lookup".to_string()),
25382 "missing conversation_external_tail_lookup table"
25383 );
25384
25385 let rows = storage
25388 .raw()
25389 .query("SELECT COUNT(*) FROM _schema_migrations;")
25390 .unwrap();
25391 let count: i64 = rows.first().unwrap().get_typed(0).unwrap();
25392 assert_eq!(
25393 count,
25394 (13..=CURRENT_SCHEMA_VERSION).count() as i64,
25395 "_schema_migrations should record the V13 base schema and post-V13 migrations"
25396 );
25397
25398 let rows = storage
25400 .raw()
25401 .query("SELECT version FROM _schema_migrations ORDER BY version;")
25402 .unwrap();
25403 let versions: Vec<i64> = rows
25404 .iter()
25405 .map(|row| row.get_typed(0))
25406 .collect::<std::result::Result<_, _>>()
25407 .unwrap();
25408 assert_eq!(
25409 versions,
25410 (13..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
25411 "_schema_migrations should contain v13 through current"
25412 );
25413 }
25414
25415 #[test]
25416 fn franken_migrations_idempotent() {
25417 let storage = franken_storage_in_memory();
25418 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
25419
25420 storage.run_migrations().unwrap();
25422 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
25423 }
25424
25425 #[test]
25426 fn migration_v20_backfills_conversation_external_tail_lookup() {
25427 let storage = franken_storage_in_memory();
25428 let agent_id = storage
25429 .ensure_agent(&Agent {
25430 id: None,
25431 slug: "codex".into(),
25432 name: "Codex".into(),
25433 version: None,
25434 kind: AgentKind::Cli,
25435 })
25436 .unwrap();
25437 let workspace_id = storage
25438 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
25439 .unwrap();
25440 let mut conv = make_profiled_storage_remote_conversation(1919, 2);
25441 conv.source_id = "profiled-storage-remote-source-東京".into();
25442 conv.external_id = Some("profiled-storage-remote-☃-1919".into());
25443 let outcome = storage
25444 .insert_conversation_tree(agent_id, Some(workspace_id), &conv)
25445 .unwrap();
25446 let external_id = conv.external_id.as_deref().unwrap();
25447 let lookup_key = conversation_external_lookup_key(&conv.source_id, agent_id, external_id);
25448
25449 storage
25450 .raw()
25451 .execute("DELETE FROM conversation_external_tail_lookup")
25452 .unwrap();
25453 storage
25454 .raw()
25455 .execute("DELETE FROM _schema_migrations WHERE version = 20")
25456 .unwrap();
25457 storage
25458 .raw()
25459 .execute_compat(
25460 "UPDATE meta SET value = ?1 WHERE key = 'schema_version'",
25461 fparams!["19"],
25462 )
25463 .unwrap();
25464
25465 storage.run_migrations().unwrap();
25466
25467 let backfilled: (i64, Option<i64>, Option<i64>, Option<i64>) = storage
25468 .raw()
25469 .query_row_map(
25470 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
25471 FROM conversation_external_tail_lookup
25472 WHERE lookup_key = ?1",
25473 fparams![lookup_key.as_str()],
25474 |row| {
25475 Ok((
25476 row.get_typed(0)?,
25477 row.get_typed(1)?,
25478 row.get_typed(2)?,
25479 row.get_typed(3)?,
25480 ))
25481 },
25482 )
25483 .unwrap();
25484 assert_eq!(
25485 backfilled,
25486 (
25487 outcome.conversation_id,
25488 conv.ended_at,
25489 Some(1),
25490 conv.messages[1].created_at
25491 )
25492 );
25493 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
25494 }
25495
25496 #[test]
25497 fn migration_v15_creates_lazy_tail_state_cache() {
25498 let conn = FrankenConnection::open(":memory:").unwrap();
25499 conn.execute_batch(
25500 "CREATE TABLE conversations (
25501 id INTEGER PRIMARY KEY,
25502 ended_at INTEGER
25503 );
25504 CREATE TABLE messages (
25505 id INTEGER PRIMARY KEY,
25506 conversation_id INTEGER NOT NULL,
25507 idx INTEGER NOT NULL,
25508 created_at INTEGER
25509 );
25510 INSERT INTO conversations(id, ended_at) VALUES
25511 (1, 1710000000300),
25512 (2, NULL);
25513 INSERT INTO messages(id, conversation_id, idx, created_at) VALUES
25514 (10, 1, 0, 1710000000100),
25515 (11, 1, 1, 1710000000200),
25516 (12, 2, 0, 1710000000400);",
25517 )
25518 .unwrap();
25519
25520 conn.execute(
25521 "CREATE TABLE _schema_migrations (
25522 version INTEGER PRIMARY KEY,
25523 name TEXT NOT NULL,
25524 applied_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
25525 );",
25526 )
25527 .unwrap();
25528
25529 assert!(
25530 apply_conversation_tail_state_cache_migration(&conn).unwrap(),
25531 "v15 migration should apply once"
25532 );
25533 assert!(
25534 !apply_conversation_tail_state_cache_migration(&conn).unwrap(),
25535 "v15 migration should be idempotent once recorded"
25536 );
25537
25538 let columns = conn.query("PRAGMA table_info(conversations);").unwrap();
25539 let column_names: HashSet<String> = columns
25540 .iter()
25541 .map(|row| row.get_typed(1))
25542 .collect::<std::result::Result<_, frankensqlite::FrankenError>>()
25543 .unwrap();
25544 assert!(column_names.contains("last_message_idx"));
25545 assert!(column_names.contains("last_message_created_at"));
25546
25547 let tail_rows: i64 = conn
25548 .query("SELECT COUNT(*) FROM conversation_tail_state;")
25549 .unwrap()
25550 .first()
25551 .unwrap()
25552 .get_typed(0)
25553 .unwrap();
25554 assert_eq!(
25555 tail_rows, 0,
25556 "v15 should create the cache without an open-time message scan"
25557 );
25558
25559 let applied: i64 = conn
25560 .query("SELECT COUNT(*) FROM _schema_migrations WHERE version = 15;")
25561 .unwrap()
25562 .first()
25563 .unwrap()
25564 .get_typed(0)
25565 .unwrap();
25566 assert_eq!(applied, 1);
25567 }
25568
25569 #[test]
25570 fn schema_repair_adds_missing_conversations_token_columns() {
25571 let conn = FrankenConnection::open(":memory:").unwrap();
25572 conn.execute_batch(
25573 "CREATE TABLE conversations (
25574 id INTEGER PRIMARY KEY,
25575 agent_id INTEGER NOT NULL,
25576 source_path TEXT NOT NULL
25577 );",
25578 )
25579 .unwrap();
25580 let storage = FrankenStorage::new(conn, std::path::PathBuf::from(":memory:"));
25581
25582 storage.repair_missing_conversation_token_columns().unwrap();
25583 storage.repair_missing_conversation_token_columns().unwrap();
25584
25585 let columns = franken_table_column_names(&storage.conn, "conversations").unwrap();
25586 for &(column_name, _) in REQUIRED_CONVERSATION_TOKEN_COLUMNS {
25587 assert!(
25588 columns.contains(column_name),
25589 "schema repair should add conversations.{column_name}"
25590 );
25591 }
25592 }
25593
25594 #[test]
25595 fn franken_meta_schema_version_in_sync() {
25596 let storage = franken_storage_in_memory();
25597
25598 let rows = storage
25600 .raw()
25601 .query("SELECT value FROM meta WHERE key = 'schema_version';")
25602 .unwrap();
25603 let meta_version: String = rows.first().unwrap().get_typed(0).unwrap();
25604 assert_eq!(
25605 meta_version,
25606 CURRENT_SCHEMA_VERSION.to_string(),
25607 "meta.schema_version should match CURRENT_SCHEMA_VERSION"
25608 );
25609 }
25610
25611 #[test]
25612 fn franken_transition_from_meta_version() {
25613 let dir = TempDir::new().unwrap();
25614 let db_path = dir.path().join("test_transition.db");
25615
25616 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
25619 conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);")
25620 .unwrap();
25621 conn.execute("INSERT INTO meta(key, value) VALUES('schema_version', '10');")
25622 .unwrap();
25623 conn.execute("CREATE TABLE conversations (id INTEGER PRIMARY KEY);")
25625 .unwrap();
25626 drop(conn);
25627
25628 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
25630 transition_from_meta_version(&conn).unwrap();
25631
25632 let rows = conn
25636 .query("SELECT version FROM _schema_migrations ORDER BY version;")
25637 .unwrap();
25638 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
25639 assert_eq!(
25640 versions,
25641 (1..=13).collect::<Vec<i64>>(),
25642 "transition should bridge legacy V10 databases through the combined V13 base marker"
25643 );
25644 }
25645
25646 #[test]
25647 fn franken_transition_from_current_meta_backfills_current_schema_marker() {
25648 let dir = TempDir::new().unwrap();
25649 let db_path = dir.path().join("test_current_transition.db");
25650
25651 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
25652 conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);")
25653 .unwrap();
25654 conn.execute_compat(
25655 "INSERT INTO meta(key, value) VALUES('schema_version', ?1);",
25656 &[ParamValue::from(CURRENT_SCHEMA_VERSION.to_string())],
25657 )
25658 .unwrap();
25659 conn.execute("CREATE TABLE conversations (id INTEGER PRIMARY KEY);")
25660 .unwrap();
25661 drop(conn);
25662
25663 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
25664 transition_from_meta_version(&conn).unwrap();
25665
25666 let rows = conn
25667 .query("SELECT version FROM _schema_migrations ORDER BY version;")
25668 .unwrap();
25669 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
25670 assert_eq!(
25671 versions,
25672 (1..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
25673 "current meta schema marker should backfill every known migration"
25674 );
25675 }
25676
25677 #[test]
25678 fn franken_transition_skips_when_already_done() {
25679 let dir = TempDir::new().unwrap();
25680 let db_path = dir.path().join("test_transition_skip.db");
25681
25682 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
25684 conn.execute(
25685 "CREATE TABLE _schema_migrations (version INTEGER PRIMARY KEY, name TEXT NOT NULL, applied_at TEXT NOT NULL DEFAULT 'now');",
25686 ).unwrap();
25687 conn.execute("INSERT INTO _schema_migrations (version, name) VALUES (1, 'test');")
25688 .unwrap();
25689
25690 transition_from_meta_version(&conn).unwrap();
25692
25693 let rows = conn
25695 .query("SELECT COUNT(*) FROM _schema_migrations;")
25696 .unwrap();
25697 let count: i64 = rows.first().unwrap().get_typed(0).unwrap();
25698 assert_eq!(
25699 count, 1,
25700 "transition should not re-run on already-transitioned DB"
25701 );
25702 }
25703
25704 #[test]
25705 fn franken_transition_fresh_db_is_noop() {
25706 let dir = TempDir::new().unwrap();
25707 let db_path = dir.path().join("test_fresh_noop.db");
25708
25709 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
25711 transition_from_meta_version(&conn).unwrap();
25712
25713 let res = conn.query("SELECT * FROM \"_schema_migrations\";");
25715 assert!(
25716 res.is_err(),
25717 "transition should not create _schema_migrations on fresh DB"
25718 );
25719 }
25720
25721 #[test]
25722 fn franken_transition_with_fts_virtual_table_succeeds() {
25723 let dir = TempDir::new().unwrap();
25724 let db_path = dir.path().join("test_transition_with_fts.db");
25725
25726 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
25727 conn.execute_batch(
25728 "CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
25729 INSERT INTO meta(key, value) VALUES('schema_version', '13');
25730 CREATE TABLE conversations (id INTEGER PRIMARY KEY);
25731 CREATE VIRTUAL TABLE fts_messages USING fts5(
25732 content,
25733 title,
25734 agent,
25735 workspace,
25736 source_path,
25737 created_at,
25738 content='',
25739 tokenize='porter unicode61'
25740 );",
25741 )
25742 .unwrap();
25743 drop(conn);
25744
25745 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
25746 transition_from_meta_version(&conn).unwrap();
25747
25748 let rows = conn
25749 .query("SELECT version FROM _schema_migrations ORDER BY version;")
25750 .unwrap();
25751 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
25752 assert_eq!(versions, (1..=13).collect::<Vec<i64>>());
25753 }
25754
25755 #[test]
25756 fn franken_storage_open_legacy_v13_with_fts_virtual_table_succeeds() {
25757 let dir = TempDir::new().unwrap();
25758 let db_path = dir.path().join("test_open_legacy_v13_with_fts.db");
25759
25760 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
25761 conn.execute_batch(
25762 "CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
25763 INSERT INTO meta(key, value) VALUES('schema_version', '13');
25764 CREATE TABLE agents (
25765 id INTEGER PRIMARY KEY,
25766 slug TEXT NOT NULL
25767 );
25768 CREATE TABLE workspaces (
25769 id INTEGER PRIMARY KEY,
25770 path TEXT NOT NULL
25771 );
25772 CREATE TABLE sources (
25773 id TEXT PRIMARY KEY,
25774 kind TEXT NOT NULL,
25775 host_label TEXT,
25776 machine_id TEXT,
25777 platform TEXT,
25778 config_json TEXT,
25779 created_at INTEGER NOT NULL,
25780 updated_at INTEGER NOT NULL
25781 );
25782 CREATE TABLE conversations (
25783 id INTEGER PRIMARY KEY,
25784 agent_id INTEGER NOT NULL,
25785 workspace_id INTEGER,
25786 source_id TEXT NOT NULL DEFAULT 'local',
25787 external_id TEXT,
25788 title TEXT,
25789 source_path TEXT NOT NULL,
25790 started_at INTEGER,
25791 ended_at INTEGER
25792 );
25793 CREATE TABLE messages (
25794 id INTEGER PRIMARY KEY,
25795 conversation_id INTEGER NOT NULL,
25796 idx INTEGER NOT NULL,
25797 role TEXT NOT NULL,
25798 author TEXT,
25799 created_at INTEGER,
25800 content TEXT NOT NULL,
25801 extra_json TEXT,
25802 extra_bin BLOB
25803 );
25804 INSERT INTO agents(id, slug) VALUES (1, 'codex');
25805 INSERT INTO workspaces(id, path) VALUES (1, '/data/projects/coding_agent_session_search');
25806 INSERT INTO sources(id, kind, host_label, created_at, updated_at)
25807 VALUES ('local', 'local', NULL, 1710000000000, 1710000000000);
25808 INSERT INTO conversations(
25809 id,
25810 agent_id,
25811 workspace_id,
25812 source_id,
25813 external_id,
25814 title,
25815 source_path,
25816 started_at
25817 )
25818 VALUES (
25819 1,
25820 1,
25821 1,
25822 'local',
25823 'legacy-session',
25824 'legacy session',
25825 '/tmp/legacy.jsonl',
25826 1710000000000
25827 );
25828 INSERT INTO messages(id, conversation_id, idx, role, author, created_at, content)
25829 VALUES (1, 1, 0, 'user', 'tester', 1710000000000, 'legacy content');
25830 CREATE VIRTUAL TABLE fts_messages USING fts5(
25831 content,
25832 title,
25833 agent,
25834 workspace,
25835 source_path,
25836 created_at,
25837 message_id,
25838 content='',
25839 tokenize='porter unicode61'
25840 );",
25841 )
25842 .unwrap();
25843 drop(conn);
25844
25845 let storage = FrankenStorage::open(&db_path).unwrap();
25846 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
25847
25848 let rows = storage
25849 .raw()
25850 .query("SELECT version FROM _schema_migrations ORDER BY version;")
25851 .unwrap();
25852 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
25853 assert_eq!(versions, (1..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>());
25854 }
25855
25856 #[test]
25857 fn franken_storage_open_repairs_duplicate_fts_messages_schema_rows() {
25858 let dir = TempDir::new().unwrap();
25859 let db_path = dir.path().join("test_open_repairs_duplicate_fts_schema.db");
25860
25861 let storage = FrankenStorage::open(&db_path).unwrap();
25862 let agent = Agent {
25863 id: None,
25864 slug: "codex".into(),
25865 name: "Codex".into(),
25866 version: None,
25867 kind: AgentKind::Cli,
25868 };
25869 let agent_id = storage.ensure_agent(&agent).unwrap();
25870 let conversation = Conversation {
25871 id: None,
25872 agent_slug: "codex".into(),
25873 workspace: Some(PathBuf::from("/tmp/workspace")),
25874 external_id: Some("dup-fts-schema".into()),
25875 title: Some("Duplicate FTS schema".into()),
25876 source_path: PathBuf::from("/tmp/dup-fts-schema.jsonl"),
25877 started_at: Some(1_700_000_000_000),
25878 ended_at: Some(1_700_000_000_100),
25879 approx_tokens: Some(42),
25880 metadata_json: serde_json::Value::Null,
25881 messages: vec![Message {
25882 id: None,
25883 idx: 0,
25884 role: MessageRole::User,
25885 author: Some("user".into()),
25886 created_at: Some(1_700_000_000_050),
25887 content: "message that should remain queryable".into(),
25888 extra_json: serde_json::Value::Null,
25889 snippets: Vec::new(),
25890 }],
25891 source_id: LOCAL_SOURCE_ID.into(),
25892 origin_host: None,
25893 };
25894 storage
25895 .insert_conversation_tree(agent_id, None, &conversation)
25896 .unwrap();
25897 drop(storage);
25898 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
25899
25900 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
25901 let conn = rusqlite_test_fixture_conn(&db_path);
25902 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
25903 conn.execute(
25904 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
25905 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
25906 [duplicate_legacy_fts_sql],
25907 )
25908 .unwrap();
25909 conn.execute(
25910 "DELETE FROM meta WHERE key = ?1",
25911 [FTS_FRANKEN_REBUILD_META_KEY],
25912 )
25913 .unwrap();
25914 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
25917
25918 let duplicate_rows: i64 = conn
25919 .query_row(
25920 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
25921 [],
25922 |row| row.get(0),
25923 )
25924 .unwrap();
25925 assert_eq!(duplicate_rows, 2);
25926 drop(conn);
25927
25928 let reopened = FrankenStorage::open(&db_path).unwrap();
25929 assert_eq!(reopened.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
25930 let generation_rows: Vec<String> = reopened
25931 .raw()
25932 .query_map_collect(
25933 "SELECT value FROM meta WHERE key = ?1",
25934 fparams![FTS_FRANKEN_REBUILD_META_KEY],
25935 |row| row.get_typed(0),
25936 )
25937 .unwrap();
25938 assert_eq!(
25939 generation_rows.len(),
25940 0,
25941 "canonical open should not eagerly rewrite FTS repair metadata"
25942 );
25943 reopened.ensure_search_fallback_fts_consistency().unwrap();
25944 let repaired = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
25945 assert_eq!(franken_fts_schema_rows(&repaired).unwrap(), 1);
25946
25947 let total_messages: i64 = reopened
25948 .raw()
25949 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25950 row.get_typed(0)
25951 })
25952 .unwrap();
25953 let total_fts_rows: i64 = reopened
25954 .raw()
25955 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
25956 row.get_typed(0)
25957 })
25958 .unwrap();
25959 assert_eq!(total_fts_rows, total_messages);
25960 }
25961
25962 #[test]
25963 fn fts_messages_integrity_reports_missing_shadow_tables() {
25964 let dir = TempDir::new().unwrap();
25965 let healthy_db_path = dir.path().join("healthy_fts.db");
25966
25967 {
25968 let storage = FrankenStorage::open(&healthy_db_path).unwrap();
25969 storage.ensure_search_fallback_fts_consistency().unwrap();
25970 storage
25971 .validate_fts_messages_integrity()
25972 .expect("freshly materialized fts_messages should pass integrity validation");
25973 }
25974
25975 let corrupt_db_path = dir.path().join("test_corrupt_fts_missing_shadows.db");
25976 {
25977 let conn = rusqlite_test_fixture_conn(&corrupt_db_path);
25978 conn.execute("CREATE TABLE schema_anchor(id INTEGER PRIMARY KEY)", [])
25979 .unwrap();
25980 let orphaned_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
25981 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
25982 conn.execute(
25983 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
25984 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
25985 [orphaned_fts_sql],
25986 )
25987 .unwrap();
25988 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
25989 }
25990
25991 let open_err = FrankenConnection::open(corrupt_db_path.to_string_lossy().to_string())
25992 .expect_err("orphaned fts_messages schema should fail during connection open");
25993 let integrity = fts_messages_integrity_error_from_message(open_err.to_string())
25994 .expect("open-time FTS corruption should map to the typed FTS integrity kind");
25995 assert_eq!(integrity.missing_shadow_tables(), &["fts_messages_content"]);
25996 let rendered = integrity.to_string();
25997 assert!(
25998 rendered.contains("fts_messages")
25999 && rendered.contains("required FTS5 shadow tables")
26000 && rendered.contains("fts_messages_content"),
26001 "error should be an operator-facing FTS corruption diagnosis: {rendered}"
26002 );
26003 }
26004
26005 #[test]
26006 fn franken_storage_open_fresh_db_keeps_single_franken_fts_schema_row() {
26007 let dir = TempDir::new().unwrap();
26008 let db_path = dir.path().join("fresh-franken-storage-open.db");
26009
26010 let storage = FrankenStorage::open(&db_path).unwrap();
26011 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
26012
26013 storage
26020 .ensure_search_fallback_fts_consistency()
26021 .expect("ensure FTS consistency after fresh open");
26022 drop(storage);
26023
26024 let c_reader = FrankenConnection::open(db_path.to_string_lossy().into_owned())
26025 .expect("open DB via frankensqlite for sqlite_master inspection");
26026 assert_eq!(
26027 franken_fts_schema_rows(&c_reader).unwrap(),
26028 1,
26029 "exactly one fts_messages schema row should exist after ensure_search_fallback_fts_consistency"
26030 );
26031 drop(c_reader);
26032
26033 let storage = FrankenStorage::open(&db_path).unwrap();
26034 assert!(
26035 storage
26036 .raw()
26037 .query("SELECT COUNT(*) FROM fts_messages")
26038 .is_ok(),
26039 "fts_messages must be queryable through frankensqlite after open"
26040 );
26041 }
26042
26043 #[test]
26044 fn franken_storage_open_repairs_missing_analytics_tables_when_version_markers_lie() {
26045 let dir = TempDir::new().unwrap();
26046 let db_path = dir.path().join("test_repair_missing_analytics.db");
26047
26048 {
26049 let storage = FrankenStorage::open(&db_path).unwrap();
26050 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
26051 }
26052
26053 {
26054 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
26055 for table in &[
26056 "usage_models_daily",
26057 "usage_daily",
26058 "usage_hourly",
26059 "message_metrics",
26060 "token_daily_stats",
26061 "token_usage",
26062 "model_pricing",
26063 "embedding_jobs",
26064 "daily_stats",
26065 ] {
26066 conn.execute(&format!("DROP TABLE IF EXISTS {table}"))
26067 .unwrap();
26068 }
26069 conn.execute_compat(
26070 "UPDATE meta SET value = ?1 WHERE key = 'schema_version'",
26071 &[ParamValue::from(CURRENT_SCHEMA_VERSION.to_string())],
26072 )
26073 .unwrap();
26074 }
26075
26076 let repaired = FrankenStorage::open(&db_path).unwrap();
26077 assert_eq!(repaired.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
26078
26079 let analytics_count: i64 = repaired
26080 .raw()
26081 .query_row_map(
26082 "SELECT COUNT(*) FROM sqlite_master
26083 WHERE type='table'
26084 AND name IN (
26085 'daily_stats',
26086 'embedding_jobs',
26087 'token_usage',
26088 'token_daily_stats',
26089 'model_pricing',
26090 'message_metrics',
26091 'usage_hourly',
26092 'usage_daily',
26093 'usage_models_daily'
26094 )",
26095 &[],
26096 |row| row.get_typed(0),
26097 )
26098 .unwrap();
26099 assert_eq!(
26100 analytics_count, 9,
26101 "open() should recreate the missing analytics tables even when schema_version already says current"
26102 );
26103 }
26104
26105 #[test]
26106 fn current_schema_repair_batches_cover_every_required_probe() {
26107 let missing_tables: Vec<&'static str> = REQUIRED_CURRENT_SCHEMA_TABLE_PROBES
26108 .iter()
26109 .map(|(table_name, _)| *table_name)
26110 .collect();
26111
26112 let batches = current_schema_repair_batches_for_missing_tables(&missing_tables).unwrap();
26113 let covered_tables: HashSet<&'static str> = batches
26114 .iter()
26115 .flat_map(|batch| batch.tables.iter().copied())
26116 .collect();
26117
26118 for table_name in missing_tables {
26119 assert!(
26120 covered_tables.contains(table_name),
26121 "missing repair coverage for {table_name}"
26122 );
26123 }
26124 }
26125
26126 #[test]
26127 fn current_schema_repair_batches_do_not_replay_core_schema_bootstrap() {
26128 for batch in CURRENT_SCHEMA_REPAIR_BATCHES {
26129 assert!(
26130 !batch.sql.contains("CREATE TABLE IF NOT EXISTS meta"),
26131 "repair batch {} should not recreate meta",
26132 batch.name
26133 );
26134 assert!(
26135 !batch.sql.contains("CREATE TABLE IF NOT EXISTS agents"),
26136 "repair batch {} should not recreate agents",
26137 batch.name
26138 );
26139 assert!(
26140 !batch.sql.contains("CREATE TABLE IF NOT EXISTS workspaces"),
26141 "repair batch {} should not recreate workspaces",
26142 batch.name
26143 );
26144 assert!(
26145 !batch
26146 .sql
26147 .contains("CREATE TABLE IF NOT EXISTS conversations"),
26148 "repair batch {} should not recreate conversations",
26149 batch.name
26150 );
26151 assert!(
26152 !batch.sql.contains("CREATE TABLE IF NOT EXISTS messages"),
26153 "repair batch {} should not recreate messages",
26154 batch.name
26155 );
26156 assert!(
26157 !batch.sql.contains("CREATE TABLE IF NOT EXISTS snippets"),
26158 "repair batch {} should not recreate snippets",
26159 batch.name
26160 );
26161 assert!(
26162 !batch.sql.contains("CREATE VIRTUAL TABLE fts_messages"),
26163 "repair batch {} should not recreate FTS tables",
26164 batch.name
26165 );
26166 assert!(
26167 !batch.sql.contains("DROP TABLE"),
26168 "repair batch {} should never drop tables",
26169 batch.name
26170 );
26171 }
26172 }
26173
26174 #[test]
26175 fn build_cass_migrations_applies_combined_v13() {
26176 let conn = FrankenConnection::open(":memory:").unwrap();
26177 let base_result = build_cass_migrations_before_tail_cache()
26178 .run(&conn)
26179 .unwrap();
26180 assert!(apply_conversation_tail_state_cache_migration(&conn).unwrap());
26181 let post_result = build_cass_migrations_after_tail_cache().run(&conn).unwrap();
26182
26183 assert!(base_result.was_fresh);
26184 let mut applied = base_result.applied;
26185 applied.push(15);
26186 applied.extend(post_result.applied);
26187 assert_eq!(
26188 applied,
26189 (13..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
26190 "should apply combined V13 plus additive post-V13 migrations"
26191 );
26192 let current: i64 = conn
26193 .query("SELECT MAX(version) FROM _schema_migrations;")
26194 .unwrap()
26195 .first()
26196 .unwrap()
26197 .get_typed(0)
26198 .unwrap();
26199 assert_eq!(current, CURRENT_SCHEMA_VERSION);
26200 }
26201
26202 #[test]
26203 fn franken_insert_conversations_batched_populates_analytics_rollups() {
26204 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
26205 use frankensqlite::compat::{ConnectionExt, RowExt};
26206 use std::path::PathBuf;
26207
26208 let dir = TempDir::new().unwrap();
26209 let db_path = dir.path().join("franken-index.db");
26210 let storage = FrankenStorage::open(&db_path).unwrap();
26211
26212 let agent = Agent {
26213 id: None,
26214 slug: "claude_code".into(),
26215 name: "Claude Code".into(),
26216 version: Some("1.0".into()),
26217 kind: AgentKind::Cli,
26218 };
26219 let agent_id = storage.ensure_agent(&agent).unwrap();
26220
26221 let ts_ms = 1_770_551_400_000_i64;
26222 let usage_json = serde_json::json!({
26223 "message": {
26224 "model": "claude-opus-4-6",
26225 "usage": {
26226 "input_tokens": 100,
26227 "output_tokens": 50,
26228 "cache_read_input_tokens": 25,
26229 "cache_creation_input_tokens": 10,
26230 "service_tier": "standard"
26231 }
26232 }
26233 });
26234
26235 let conv = Conversation {
26236 id: None,
26237 agent_slug: "claude_code".into(),
26238 workspace: Some(PathBuf::from("/tmp/workspace")),
26239 external_id: Some("franken-batch-upsert".into()),
26240 title: Some("Franken batch upsert".into()),
26241 source_path: PathBuf::from("/tmp/franken.jsonl"),
26242 started_at: Some(ts_ms),
26243 ended_at: Some(ts_ms + 60_000),
26244 approx_tokens: None,
26245 metadata_json: serde_json::Value::Null,
26246 messages: vec![
26247 Message {
26248 id: None,
26249 idx: 0,
26250 role: MessageRole::User,
26251 author: None,
26252 created_at: Some(ts_ms),
26253 content: "Please make a plan.".into(),
26254 extra_json: serde_json::Value::Null,
26255 snippets: vec![],
26256 },
26257 Message {
26258 id: None,
26259 idx: 1,
26260 role: MessageRole::Agent,
26261 author: None,
26262 created_at: Some(ts_ms + 30_000),
26263 content: "## Plan\n\n1. Reproduce\n2. Patch\n3. Verify".into(),
26264 extra_json: usage_json,
26265 snippets: vec![],
26266 },
26267 ],
26268 source_id: "local".into(),
26269 origin_host: None,
26270 };
26271
26272 let outcomes = storage
26273 .insert_conversations_batched(&[(agent_id, None, &conv)])
26274 .unwrap();
26275 assert_eq!(outcomes.len(), 1);
26276 assert_eq!(outcomes[0].inserted_indices, vec![0, 1]);
26277
26278 let conn = storage.raw();
26279 let daily_stats_rows: i64 = conn
26280 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
26281 row.get_typed(0)
26282 })
26283 .unwrap();
26284 let token_daily_rows: i64 = conn
26285 .query_row_map(
26286 "SELECT COUNT(*) FROM token_daily_stats",
26287 fparams![],
26288 |row| row.get_typed(0),
26289 )
26290 .unwrap();
26291 let usage_daily_rows: i64 = conn
26292 .query_row_map("SELECT COUNT(*) FROM usage_daily", fparams![], |row| {
26293 row.get_typed(0)
26294 })
26295 .unwrap();
26296 let model_daily_rows: i64 = conn
26297 .query_row_map(
26298 "SELECT COUNT(*) FROM usage_models_daily",
26299 fparams![],
26300 |row| row.get_typed(0),
26301 )
26302 .unwrap();
26303
26304 assert!(daily_stats_rows > 0, "daily_stats should be populated");
26305 assert!(
26306 token_daily_rows > 0,
26307 "token_daily_stats should be populated"
26308 );
26309 assert!(usage_daily_rows > 0, "usage_daily should be populated");
26310 assert!(
26311 model_daily_rows > 0,
26312 "usage_models_daily should be populated"
26313 );
26314 }
26315
26316 #[test]
26321 fn connection_manager_creates_readers() {
26322 let dir = TempDir::new().unwrap();
26323 let db_path = dir.path().join("cm.db");
26324
26325 let fs = FrankenStorage::open(&db_path).unwrap();
26327 drop(fs);
26328
26329 let config = ConnectionManagerConfig {
26330 reader_count: 3,
26331 max_writers: 2,
26332 };
26333 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
26334 assert_eq!(mgr.reader_count(), 3);
26335 assert_eq!(mgr.max_writers(), 2);
26336 }
26337
26338 #[test]
26339 fn connection_manager_clamps_zero_writer_limit_to_prevent_deadlock() {
26340 let dir = TempDir::new().unwrap();
26341 let db_path = dir.path().join("cm.db");
26342
26343 let fs = FrankenStorage::open(&db_path).unwrap();
26344 drop(fs);
26345
26346 let mgr = std::sync::Arc::new(
26347 FrankenConnectionManager::new(
26348 &db_path,
26349 ConnectionManagerConfig {
26350 reader_count: 0,
26351 max_writers: 0,
26352 },
26353 )
26354 .unwrap(),
26355 );
26356 assert_eq!(mgr.reader_count(), 1);
26357 assert_eq!(mgr.max_writers(), 1);
26358
26359 let (tx, rx) = std::sync::mpsc::channel();
26360 let mgr_for_thread = std::sync::Arc::clone(&mgr);
26361 std::thread::spawn(move || {
26362 let result = mgr_for_thread.writer().map(|mut guard| {
26363 guard.mark_committed();
26364 });
26365 tx.send(result.is_ok()).expect("writer result send");
26366 });
26367
26368 assert!(
26369 rx.recv_timeout(Duration::from_secs(10)).unwrap(),
26370 "writer acquisition should not block forever when configured with zero writer slots"
26371 );
26372 }
26373
26374 #[test]
26375 fn connection_manager_reader_round_robin() {
26376 let dir = TempDir::new().unwrap();
26377 let db_path = dir.path().join("cm.db");
26378
26379 let fs = FrankenStorage::open(&db_path).unwrap();
26380 drop(fs);
26381
26382 let config = ConnectionManagerConfig {
26383 reader_count: 2,
26384 max_writers: 1,
26385 };
26386 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
26387
26388 let idx_before = mgr.reader_idx.load(std::sync::atomic::Ordering::Relaxed);
26390 let _r1 = mgr.reader();
26391 let idx_after = mgr.reader_idx.load(std::sync::atomic::Ordering::Relaxed);
26392 assert_eq!(idx_after, idx_before + 1, "reader index should advance");
26393 }
26394
26395 #[test]
26396 fn connection_manager_writer_reads_and_writes() {
26397 use frankensqlite::compat::RowExt;
26398
26399 let dir = TempDir::new().unwrap();
26400 let db_path = dir.path().join("cm.db");
26401
26402 let fs = FrankenStorage::open(&db_path).unwrap();
26403 drop(fs);
26404
26405 let mgr = FrankenConnectionManager::new(&db_path, Default::default()).unwrap();
26406
26407 {
26409 let mut guard = mgr.writer().unwrap();
26410 guard
26411 .storage()
26412 .raw()
26413 .execute("CREATE TABLE IF NOT EXISTS cm_test (id INTEGER PRIMARY KEY, val TEXT)")
26414 .unwrap();
26415 guard
26416 .storage()
26417 .raw()
26418 .execute("INSERT INTO cm_test (val) VALUES ('hello')")
26419 .unwrap();
26420 guard.mark_committed();
26421 }
26422
26423 let reader_guard = mgr.reader();
26425 let rows = reader_guard.query("SELECT val FROM cm_test").unwrap();
26426 assert_eq!(rows.len(), 1);
26427 assert_eq!(rows[0].get_typed::<String>(0).unwrap(), "hello");
26428 }
26429
26430 #[test]
26431 fn connection_manager_writer_guard_drops_releases_slot() {
26432 let dir = TempDir::new().unwrap();
26433 let db_path = dir.path().join("cm.db");
26434
26435 let fs = FrankenStorage::open(&db_path).unwrap();
26436 drop(fs);
26437
26438 let config = ConnectionManagerConfig {
26439 reader_count: 1,
26440 max_writers: 1,
26441 };
26442 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
26443
26444 {
26446 let mut guard = mgr.writer().unwrap();
26447 guard.mark_committed();
26448 }
26449
26450 let mut guard2 = mgr.writer().unwrap();
26452 guard2.mark_committed();
26453 }
26454
26455 #[test]
26456 fn connection_manager_concurrent_writer_works() {
26457 use frankensqlite::compat::RowExt;
26458
26459 let dir = TempDir::new().unwrap();
26460 let db_path = dir.path().join("cm.db");
26461
26462 let fs = FrankenStorage::open(&db_path).unwrap();
26463 drop(fs);
26464
26465 let config = ConnectionManagerConfig {
26466 reader_count: 1,
26467 max_writers: 2,
26468 };
26469 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
26470
26471 {
26472 let mut guard = mgr.concurrent_writer().unwrap();
26473 guard
26474 .storage()
26475 .raw()
26476 .execute("CREATE TABLE IF NOT EXISTS cm_conc (id INTEGER PRIMARY KEY, val TEXT)")
26477 .unwrap();
26478 guard
26479 .storage()
26480 .raw()
26481 .execute("INSERT INTO cm_conc (val) VALUES ('concurrent')")
26482 .unwrap();
26483 guard.mark_committed();
26484 }
26485
26486 let reader_guard = mgr.reader();
26487 let rows = reader_guard.query("SELECT val FROM cm_conc").unwrap();
26488 assert_eq!(rows.len(), 1);
26489 assert_eq!(rows[0].get_typed::<String>(0).unwrap(), "concurrent");
26490 }
26491
26492 #[test]
26493 fn connection_manager_default_config() {
26494 let config = ConnectionManagerConfig::default();
26495 assert_eq!(config.reader_count, 4);
26496 assert!(config.max_writers > 0);
26497 }
26498
26499 #[test]
26500 fn purge_agent_archive_data_removes_only_target_agent_and_rebuilds_derived_tables() {
26501 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
26502 use std::path::PathBuf;
26503
26504 fn seed_conversation(storage: &FrankenStorage, agent_slug: &str, marker: &str) {
26505 let agent = Agent {
26506 id: None,
26507 slug: agent_slug.into(),
26508 name: agent_slug.into(),
26509 version: None,
26510 kind: AgentKind::Cli,
26511 };
26512 let agent_id = storage.ensure_agent(&agent).unwrap();
26513 let conversation = Conversation {
26514 id: None,
26515 agent_slug: agent_slug.into(),
26516 workspace: Some(PathBuf::from("/tmp/workspace")),
26517 external_id: Some(format!("{agent_slug}-{marker}")),
26518 title: Some(format!("{agent_slug} {marker}")),
26519 source_path: PathBuf::from(format!("/tmp/{agent_slug}-{marker}.jsonl")),
26520 started_at: Some(1_700_000_000_000),
26521 ended_at: Some(1_700_000_000_100),
26522 approx_tokens: None,
26523 metadata_json: serde_json::Value::Null,
26524 messages: vec![
26525 Message {
26526 id: None,
26527 idx: 0,
26528 role: MessageRole::User,
26529 author: Some("user".into()),
26530 created_at: Some(1_700_000_000_010),
26531 content: format!("{agent_slug} {marker} user"),
26532 extra_json: serde_json::Value::Null,
26533 snippets: Vec::new(),
26534 },
26535 Message {
26536 id: None,
26537 idx: 1,
26538 role: MessageRole::Agent,
26539 author: Some("assistant".into()),
26540 created_at: Some(1_700_000_000_020),
26541 content: format!("{agent_slug} {marker} assistant"),
26542 extra_json: serde_json::Value::Null,
26543 snippets: Vec::new(),
26544 },
26545 ],
26546 source_id: LOCAL_SOURCE_ID.into(),
26547 origin_host: None,
26548 };
26549 storage
26550 .insert_conversation_tree(agent_id, None, &conversation)
26551 .unwrap();
26552 }
26553
26554 let dir = TempDir::new().unwrap();
26555 let db_path = dir.path().join("agent_search.db");
26556 let storage = FrankenStorage::open(&db_path).unwrap();
26557
26558 seed_conversation(&storage, "openclaw", "purge-target");
26559 seed_conversation(&storage, "codex", "keep-target");
26560
26561 let purge = storage.purge_agent_archive_data("openclaw").unwrap();
26562 assert_eq!(purge.conversations_deleted, 1);
26563 assert_eq!(purge.messages_deleted, 2);
26564
26565 storage.rebuild_fts().unwrap();
26566 storage.rebuild_analytics().unwrap();
26567 storage.rebuild_daily_stats().unwrap();
26568 storage.rebuild_token_daily_stats().unwrap();
26569
26570 let agents = storage.list_agents().unwrap();
26571 assert_eq!(agents.len(), 1);
26572 assert_eq!(agents[0].slug, "codex");
26573 assert_eq!(storage.total_conversation_count().unwrap(), 1);
26574 assert_eq!(storage.total_message_count().unwrap(), 2);
26575
26576 let fts_rows: i64 = storage
26577 .raw()
26578 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
26579 row.get_typed(0)
26580 })
26581 .unwrap();
26582 assert_eq!(fts_rows, 2);
26583
26584 let total_daily_sessions: i64 = storage
26585 .raw()
26586 .query_row_map(
26587 "SELECT COALESCE(SUM(session_count), 0)
26588 FROM daily_stats
26589 WHERE agent_slug = 'all' AND source_id = 'all'",
26590 fparams![],
26591 |row| row.get_typed(0),
26592 )
26593 .unwrap();
26594 assert_eq!(total_daily_sessions, 1);
26595
26596 let openclaw_token_rows: i64 = storage
26597 .raw()
26598 .query_row_map(
26599 "SELECT COUNT(*) FROM token_daily_stats WHERE agent_slug = 'openclaw'",
26600 fparams![],
26601 |row| row.get_typed(0),
26602 )
26603 .unwrap();
26604 assert_eq!(openclaw_token_rows, 0);
26605 }
26606
26607 #[test]
26614 fn cleanup_orphan_fk_rows_removes_orphans_and_is_noop_on_clean_db() {
26615 let dir = TempDir::new().unwrap();
26616 let db_path = dir.path().join("orphan_fk_self_heal.db");
26617 let storage = FrankenStorage::open(&db_path).unwrap();
26618
26619 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
26622
26623 storage
26626 .raw()
26627 .execute_compat(
26628 "INSERT INTO agents(id, slug, name, kind, created_at, updated_at) \
26629 VALUES(1, 'test-agent', 'Test Agent', 'cli', 0, 0)",
26630 fparams![],
26631 )
26632 .unwrap();
26633 storage
26634 .raw()
26635 .execute_compat(
26636 "INSERT INTO conversations(id, agent_id, source_id, source_path, started_at) \
26637 VALUES(1, 1, 'local', '/tmp/real.jsonl', 0)",
26638 fparams![],
26639 )
26640 .unwrap();
26641 storage
26642 .raw()
26643 .execute_compat(
26644 "INSERT INTO messages(id, conversation_id, idx, role, content) \
26645 VALUES(1, 1, 0, 'user', 'real message')",
26646 fparams![],
26647 )
26648 .unwrap();
26649
26650 for (mid, cid, idx) in [(101_i64, 99_999_i64, 0_i64), (102, 0, 0), (103, 0, 1)] {
26654 storage
26655 .raw()
26656 .execute_compat(
26657 "INSERT INTO messages(id, conversation_id, idx, role, content) \
26658 VALUES(?1, ?2, ?3, 'user', 'orphan message')",
26659 fparams![mid, cid, idx],
26660 )
26661 .unwrap();
26662 }
26663
26664 for message_id in [1_i64, 101_i64, 102_i64] {
26669 storage
26670 .raw()
26671 .execute_compat(
26672 "INSERT INTO message_metrics(
26673 message_id, created_at_ms, hour_id, day_id, agent_slug,
26674 role, content_chars, content_tokens_est
26675 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 13, 2)",
26676 fparams![message_id],
26677 )
26678 .unwrap();
26679 storage
26680 .raw()
26681 .execute_compat(
26682 "INSERT INTO token_usage(
26683 message_id, conversation_id, agent_id, timestamp_ms, day_id,
26684 role, content_chars
26685 ) VALUES(?1, 1, 1, 0, 0, 'user', 13)",
26686 fparams![message_id],
26687 )
26688 .unwrap();
26689 }
26690
26691 storage
26695 .raw()
26696 .execute_compat(
26697 "INSERT INTO snippets(message_id, file_path, start_line, end_line, language, snippet_text) \
26698 VALUES(99999, '/tmp/orphan-snippet.rs', 1, 2, 'rust', 'fn main() {}')",
26699 fparams![],
26700 )
26701 .unwrap();
26702
26703 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
26704
26705 let messages_before: i64 = storage
26707 .raw()
26708 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
26709 row.get_typed(0)
26710 })
26711 .unwrap();
26712 assert_eq!(messages_before, 4); let snippets_before: i64 = storage
26714 .raw()
26715 .query_row_map("SELECT COUNT(*) FROM snippets", fparams![], |row| {
26716 row.get_typed(0)
26717 })
26718 .unwrap();
26719 assert_eq!(snippets_before, 1);
26720 let metrics_before: i64 = storage
26721 .raw()
26722 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
26723 row.get_typed(0)
26724 })
26725 .unwrap();
26726 assert_eq!(metrics_before, 3);
26727 let token_usage_before: i64 = storage
26728 .raw()
26729 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
26730 row.get_typed(0)
26731 })
26732 .unwrap();
26733 assert_eq!(token_usage_before, 3);
26734
26735 let report = storage.cleanup_orphan_fk_rows().unwrap();
26737
26738 let messages_after: i64 = storage
26743 .raw()
26744 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
26745 row.get_typed(0)
26746 })
26747 .unwrap();
26748 assert_eq!(messages_after, 1, "real message must be preserved");
26749 let snippets_after: i64 = storage
26750 .raw()
26751 .query_row_map("SELECT COUNT(*) FROM snippets", fparams![], |row| {
26752 row.get_typed(0)
26753 })
26754 .unwrap();
26755 assert_eq!(snippets_after, 0);
26756 let metrics_after: i64 = storage
26757 .raw()
26758 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
26759 row.get_typed(0)
26760 })
26761 .unwrap();
26762 assert_eq!(metrics_after, 1, "real message metric must be preserved");
26763 let token_usage_after: i64 = storage
26764 .raw()
26765 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
26766 row.get_typed(0)
26767 })
26768 .unwrap();
26769 assert_eq!(token_usage_after, 1, "real token row must be preserved");
26770
26771 assert_eq!(report.total, 4, "report total: {:?}", report);
26772 let messages_count = report
26773 .per_table
26774 .iter()
26775 .find(|(t, _)| *t == "messages")
26776 .map(|(_, c)| *c);
26777 assert_eq!(messages_count, Some(3));
26778 let snippets_count = report
26779 .per_table
26780 .iter()
26781 .find(|(t, _)| *t == "snippets")
26782 .map(|(_, c)| *c);
26783 assert_eq!(snippets_count, Some(1));
26784
26785 let second = storage.cleanup_orphan_fk_rows().unwrap();
26787 assert_eq!(second.total, 0);
26788 assert!(second.per_table.is_empty());
26789 }
26790
26791 #[test]
26792 fn cleanup_orphan_fk_rows_handles_more_than_one_delete_chunk() {
26793 let dir = TempDir::new().unwrap();
26794 let db_path = dir.path().join("orphan_fk_chunked_self_heal.db");
26795 let storage = FrankenStorage::open(&db_path).unwrap();
26796 let orphan_count = ORPHAN_FK_ID_CHUNK_SIZE + 3;
26797
26798 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
26799 {
26800 let mut tx = storage.raw().transaction().unwrap();
26801 for idx in 0..orphan_count {
26802 let message_id = 10_000_i64 + i64::try_from(idx).unwrap();
26803 let conversation_id = 20_000_i64 + i64::try_from(idx).unwrap();
26804 tx.execute_compat(
26805 "INSERT INTO messages(id, conversation_id, idx, role, content) \
26806 VALUES(?1, ?2, 0, 'user', 'orphan message')",
26807 fparams![message_id, conversation_id],
26808 )
26809 .unwrap();
26810 tx.execute_compat(
26811 "INSERT INTO message_metrics(
26812 message_id, created_at_ms, hour_id, day_id, agent_slug,
26813 role, content_chars, content_tokens_est
26814 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 14, 2)",
26815 fparams![message_id],
26816 )
26817 .unwrap();
26818 }
26819 tx.commit().unwrap();
26820 }
26821 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
26822
26823 let report = storage.cleanup_orphan_fk_rows().unwrap();
26824
26825 assert_eq!(report.total, i64::try_from(orphan_count).unwrap());
26826 let messages_count = report
26827 .per_table
26828 .iter()
26829 .find(|(table, _)| *table == "messages")
26830 .map(|(_, count)| *count);
26831 assert_eq!(messages_count, Some(i64::try_from(orphan_count).unwrap()));
26832 let messages_after: i64 = storage
26833 .raw()
26834 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
26835 row.get_typed(0)
26836 })
26837 .unwrap();
26838 assert_eq!(messages_after, 0);
26839 let metrics_after: i64 = storage
26840 .raw()
26841 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
26842 row.get_typed(0)
26843 })
26844 .unwrap();
26845 assert_eq!(metrics_after, 0);
26846 }
26847
26848 #[test]
26849 fn cleanup_orphan_fk_rows_pages_direct_child_orphans() {
26850 let dir = TempDir::new().unwrap();
26851 let db_path = dir.path().join("direct_orphan_fk_paged_self_heal.db");
26852 let storage = FrankenStorage::open(&db_path).unwrap();
26853 let orphan_count = (ORPHAN_FK_ID_CHUNK_SIZE * 2) + 5;
26854
26855 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
26856 {
26857 let mut tx = storage.raw().transaction().unwrap();
26858 for idx in 0..orphan_count {
26859 let message_id = 50_000_i64 + i64::try_from(idx).unwrap();
26860 tx.execute_compat(
26861 "INSERT INTO message_metrics(
26862 message_id, created_at_ms, hour_id, day_id, agent_slug,
26863 role, content_chars, content_tokens_est
26864 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 21, 3)",
26865 fparams![message_id],
26866 )
26867 .unwrap();
26868 }
26869 tx.commit().unwrap();
26870 }
26871 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
26872
26873 let report = storage.cleanup_orphan_fk_rows().unwrap();
26874
26875 assert_eq!(report.total, i64::try_from(orphan_count).unwrap());
26876 let metrics_count = report
26877 .per_table
26878 .iter()
26879 .filter(|(table, _)| *table == "message_metrics")
26880 .map(|(_, count)| *count)
26881 .sum::<i64>();
26882 assert_eq!(metrics_count, i64::try_from(orphan_count).unwrap());
26883 assert_eq!(
26884 report
26885 .per_table
26886 .iter()
26887 .filter(|(table, _)| *table == "message_metrics")
26888 .count(),
26889 1,
26890 "paged cleanup should aggregate report entries by table: {report:?}"
26891 );
26892 let metrics_after: i64 = storage
26893 .raw()
26894 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
26895 row.get_typed(0)
26896 })
26897 .unwrap();
26898 assert_eq!(metrics_after, 0);
26899 }
26900}