1use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole, Snippet};
4use crate::sources::provenance::{LOCAL_SOURCE_ID, Source, SourceKind};
5use anyhow::{Context, Result, anyhow, bail};
6use frankensqlite::{
7 Connection as FrankenConnection, Row as FrankenRow, SqliteValue,
8 compat::{
9 ConnectionExt as FrankenConnectionExt, OpenFlags as FrankenOpenFlags,
10 OptionalExtension as FrankenOptionalExtension, ParamValue, RowExt as FrankenRowExt,
11 Transaction as FrankenTransaction, TransactionExt as FrankenTransactionExt,
12 open_with_flags as open_franken_with_flags, param_slice_to_values, params_from_iter,
13 },
14 migrate::MigrationRunner,
15};
16use serde::{Deserialize, Serialize};
17use smallvec::SmallVec;
18use std::borrow::Cow;
19use std::collections::{HashMap, HashSet};
20use std::fs;
21use std::io::{BufRead, BufReader, Write};
22use std::process::{Command, Stdio};
23use std::sync::{
24 Arc,
25 atomic::{AtomicBool, AtomicI8, AtomicI64, AtomicU64, AtomicUsize, Ordering},
26};
27
28macro_rules! fparams {
30 () => {
31 &[] as &[ParamValue]
32 };
33 ($($val:expr),+ $(,)?) => {
34 &[$(ParamValue::from($val)),+] as &[ParamValue]
35 };
36}
37use std::path::{Path, PathBuf};
38use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
39use thiserror::Error;
40use tracing::info;
41
42const DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT: Duration = Duration::from_secs(30);
43const DOCTOR_MUTATION_LOCK_MAX_METADATA_READ: u64 = 64 * 1024;
44
45#[derive(Debug, Error)]
54pub enum LazyDbError {
55 #[error("Database not found at {0}")]
56 NotFound(PathBuf),
57 #[error("Failed to open FrankenSQLite database at {path}: {source}")]
58 FrankenOpenFailed {
59 path: PathBuf,
60 source: frankensqlite::FrankenError,
61 },
62}
63
64pub struct SendFrankenConnection(FrankenConnection, i64, u64);
75
76unsafe impl Send for SendFrankenConnection {}
79
80impl SendFrankenConnection {
81 pub(crate) fn new(conn: FrankenConnection) -> Self {
82 Self(
83 conn,
84 UNSET_INDEX_WRITER_CHECKPOINT_PAGES,
85 UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS,
86 )
87 }
88
89 pub(crate) fn new_with_index_writer_state(
90 conn: FrankenConnection,
91 checkpoint_pages: i64,
92 busy_timeout_ms: u64,
93 ) -> Self {
94 Self(conn, checkpoint_pages, busy_timeout_ms)
95 }
96
97 pub(crate) fn into_parts(self) -> (FrankenConnection, i64, u64) {
98 (self.0, self.1, self.2)
99 }
100}
101
102impl std::ops::Deref for SendFrankenConnection {
103 type Target = FrankenConnection;
104 fn deref(&self) -> &FrankenConnection {
105 &self.0
106 }
107}
108
109pub struct LazyFrankenDb {
115 path: PathBuf,
116 conn: parking_lot::Mutex<Option<SendFrankenConnection>>,
117}
118
119pub struct LazyFrankenDbGuard<'a>(parking_lot::MutexGuard<'a, Option<SendFrankenConnection>>);
121
122impl std::fmt::Debug for LazyFrankenDbGuard<'_> {
123 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124 f.debug_tuple("LazyFrankenDbGuard")
125 .field(&self.0.is_some())
126 .finish()
127 }
128}
129
130impl std::ops::Deref for LazyFrankenDbGuard<'_> {
131 type Target = FrankenConnection;
132 fn deref(&self) -> &FrankenConnection {
133 self.0
134 .as_ref()
135 .expect("LazyFrankenDb connection must be initialized before access")
136 }
137}
138
139impl LazyFrankenDb {
140 pub fn new(path: PathBuf) -> Self {
142 Self {
143 path,
144 conn: parking_lot::Mutex::new(None),
145 }
146 }
147
148 pub fn from_overrides(data_dir: &Option<PathBuf>, db_override: Option<PathBuf>) -> Self {
152 let data_dir = data_dir.clone().unwrap_or_else(crate::default_data_dir);
153 let path = db_override.unwrap_or_else(|| data_dir.join("agent_search.db"));
154 Self::new(path)
155 }
156
157 pub fn get(&self, reason: &str) -> std::result::Result<LazyFrankenDbGuard<'_>, LazyDbError> {
162 let mut guard = self.conn.lock();
163 if guard.is_none() {
164 if !self.path.exists() {
165 return Err(LazyDbError::NotFound(self.path.clone()));
166 }
167 let start = Instant::now();
168 let _doctor_guard = acquire_doctor_mutation_db_open_guard(
169 &self.path,
170 DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT,
171 )
172 .map_err(|err| LazyDbError::FrankenOpenFailed {
173 path: self.path.clone(),
174 source: frankensqlite::FrankenError::Internal(err.to_string()),
175 })?;
176 let conn =
177 FrankenConnection::open(self.path.to_string_lossy().into_owned()).map_err(|e| {
178 LazyDbError::FrankenOpenFailed {
179 path: self.path.clone(),
180 source: e,
181 }
182 })?;
183 let elapsed_ms = start.elapsed().as_millis();
184 info!(
185 path = %self.path.display(),
186 elapsed_ms = elapsed_ms,
187 reason = reason,
188 "lazily opened FrankenSQLite database"
189 );
190 *guard = Some(SendFrankenConnection::new(conn));
191 }
192 Ok(LazyFrankenDbGuard(guard))
193 }
194
195 pub fn get_with_timeout(
201 &self,
202 reason: &str,
203 timeout: Duration,
204 ) -> std::result::Result<LazyFrankenDbGuard<'_>, LazyDbError> {
205 let mut guard = self.conn.lock();
206 if guard.is_none() {
207 if !self.path.exists() {
208 return Err(LazyDbError::NotFound(self.path.clone()));
209 }
210 let start = Instant::now();
211 let path_owned = self.path.to_string_lossy().into_owned();
212 let path_for_guard = self.path.clone();
213 let (tx, rx) = std::sync::mpsc::channel();
214 std::thread::spawn(move || {
215 let _doctor_guard =
216 match acquire_doctor_mutation_db_open_guard(&path_for_guard, timeout) {
217 Ok(guard) => guard,
218 Err(err) => {
219 let _ = tx
220 .send(Err(frankensqlite::FrankenError::Internal(err.to_string())));
221 return;
222 }
223 };
224 let _ =
225 tx.send(FrankenConnection::open(path_owned).map(SendFrankenConnection::new));
226 });
227 let conn = rx
228 .recv_timeout(timeout)
229 .map_err(|_| LazyDbError::FrankenOpenFailed {
230 path: self.path.clone(),
231 source: frankensqlite::FrankenError::Internal(format!(
232 "database open timed out after {}s (possible corruption or lock contention)",
233 timeout.as_secs()
234 )),
235 })?
236 .map_err(|e| LazyDbError::FrankenOpenFailed {
237 path: self.path.clone(),
238 source: e,
239 })?;
240 let elapsed_ms = start.elapsed().as_millis();
241 info!(
242 path = %self.path.display(),
243 elapsed_ms = elapsed_ms,
244 reason = reason,
245 "lazily opened FrankenSQLite database (with timeout)"
246 );
247 *guard = Some(conn);
248 }
249 Ok(LazyFrankenDbGuard(guard))
250 }
251
252 pub fn path(&self) -> &Path {
254 &self.path
255 }
256
257 pub fn is_open(&self) -> bool {
259 self.conn.lock().is_some()
260 }
261}
262
263static FRANKEN_RETRY_JITTER_STATE: AtomicU64 = AtomicU64::new(0x9e37_79b9_7f4a_7c15);
264static DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH: AtomicUsize = AtomicUsize::new(0);
265static MESSAGE_LOOKUP_TRACE_ENABLED: AtomicBool = AtomicBool::new(false);
266static MESSAGE_LOOKUP_EXACT_IDX_PROBES: AtomicU64 = AtomicU64::new(0);
267static MESSAGE_LOOKUP_BOUNDED_QUERIES: AtomicU64 = AtomicU64::new(0);
268static MESSAGE_LOOKUP_FULL_SCAN_QUERIES: AtomicU64 = AtomicU64::new(0);
269static MESSAGE_LOOKUP_ROWS_MATERIALIZED: AtomicU64 = AtomicU64::new(0);
270
271#[derive(Debug, Clone, Copy, Default, Serialize)]
272pub(crate) struct MessageLookupTraceCounters {
273 pub exact_idx_probes: u64,
274 pub bounded_lookup_queries: u64,
275 pub full_scan_queries: u64,
276 pub rows_materialized: u64,
277}
278
279impl MessageLookupTraceCounters {
280 pub(crate) fn saturating_sub(self, before: Self) -> Self {
281 Self {
282 exact_idx_probes: self
283 .exact_idx_probes
284 .saturating_sub(before.exact_idx_probes),
285 bounded_lookup_queries: self
286 .bounded_lookup_queries
287 .saturating_sub(before.bounded_lookup_queries),
288 full_scan_queries: self
289 .full_scan_queries
290 .saturating_sub(before.full_scan_queries),
291 rows_materialized: self
292 .rows_materialized
293 .saturating_sub(before.rows_materialized),
294 }
295 }
296
297 pub(crate) fn lookups_against_global(self) -> u64 {
298 self.exact_idx_probes.saturating_add(self.rows_materialized)
299 }
300}
301
302pub(crate) fn set_message_lookup_trace_enabled(enabled: bool) -> bool {
303 MESSAGE_LOOKUP_TRACE_ENABLED.swap(enabled, Ordering::Relaxed)
304}
305
306pub(crate) fn message_lookup_trace_snapshot() -> MessageLookupTraceCounters {
307 MessageLookupTraceCounters {
308 exact_idx_probes: MESSAGE_LOOKUP_EXACT_IDX_PROBES.load(Ordering::Relaxed),
309 bounded_lookup_queries: MESSAGE_LOOKUP_BOUNDED_QUERIES.load(Ordering::Relaxed),
310 full_scan_queries: MESSAGE_LOOKUP_FULL_SCAN_QUERIES.load(Ordering::Relaxed),
311 rows_materialized: MESSAGE_LOOKUP_ROWS_MATERIALIZED.load(Ordering::Relaxed),
312 }
313}
314
315fn record_message_lookup_exact_idx_probe() {
316 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
317 MESSAGE_LOOKUP_EXACT_IDX_PROBES.fetch_add(1, Ordering::Relaxed);
318 }
319}
320
321fn record_message_lookup_bounded_queries(query_count: u64, rows: usize) {
322 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
323 MESSAGE_LOOKUP_BOUNDED_QUERIES.fetch_add(query_count, Ordering::Relaxed);
324 MESSAGE_LOOKUP_ROWS_MATERIALIZED.fetch_add(rows as u64, Ordering::Relaxed);
325 }
326}
327
328fn record_message_lookup_full_scan_query(rows: usize) {
329 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
330 MESSAGE_LOOKUP_FULL_SCAN_QUERIES.fetch_add(1, Ordering::Relaxed);
331 MESSAGE_LOOKUP_ROWS_MATERIALIZED.fetch_add(rows as u64, Ordering::Relaxed);
332 }
333}
334
335pub(crate) struct DoctorMutationDbOpenBypassGuard;
336
337impl Drop for DoctorMutationDbOpenBypassGuard {
338 fn drop(&mut self) {
339 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.fetch_sub(1, Ordering::SeqCst);
340 }
341}
342
343pub(crate) fn enter_doctor_mutation_db_open_bypass() -> DoctorMutationDbOpenBypassGuard {
344 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.fetch_add(1, Ordering::SeqCst);
345 DoctorMutationDbOpenBypassGuard
346}
347
348fn doctor_mutation_db_open_bypass_active() -> bool {
349 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.load(Ordering::SeqCst) > 0
350}
351
352fn next_franken_retry_jitter_ms(max_inclusive: u64) -> u64 {
353 let mut value = FRANKEN_RETRY_JITTER_STATE.fetch_add(0x9e37_79b9_7f4a_7c15, Ordering::Relaxed);
354 value ^= value >> 30;
355 value = value.wrapping_mul(0xbf58_476d_1ce4_e5b9);
356 value ^= value >> 27;
357 value = value.wrapping_mul(0x94d0_49bb_1331_11eb);
358 value ^= value >> 31;
359 value % max_inclusive.saturating_add(1)
360}
361
362pub(crate) fn sleep_with_franken_retry_backoff(
365 backoff: &mut Duration,
366 remaining: Duration,
367 max_backoff: Duration,
368) {
369 let capped = (*backoff).min(remaining);
370 let extra_budget = remaining.saturating_sub(capped).min(capped);
371 let extra_ms = extra_budget.as_millis().min(u128::from(u64::MAX)) as u64;
372 let sleep_for = if extra_ms == 0 {
373 capped
374 } else {
375 capped
376 .saturating_add(Duration::from_millis(next_franken_retry_jitter_ms(
377 extra_ms,
378 )))
379 .min(remaining)
380 };
381 std::thread::sleep(sleep_for);
382 *backoff = backoff.saturating_mul(2).min(max_backoff);
383}
384
385struct DoctorMutationDbOpenGuard(Option<fs::File>);
386
387impl Drop for DoctorMutationDbOpenGuard {
388 fn drop(&mut self) {
389 if let Some(file) = self.0.as_ref() {
390 let _ = fs2::FileExt::unlock(file);
391 }
392 }
393}
394
395fn doctor_mutation_lock_path_for_db_open(db_path: &Path) -> Option<PathBuf> {
396 if db_path.file_name().and_then(|name| name.to_str()) != Some("agent_search.db") {
397 return None;
398 }
399
400 Some(
401 db_path
402 .parent()?
403 .join("doctor")
404 .join("locks")
405 .join("doctor-repair.lock"),
406 )
407}
408
409fn doctor_lock_metadata_pid_is_current_process(raw: &str) -> bool {
410 raw.lines().any(|line| {
411 let Some((key, value)) = line.split_once('=') else {
412 return false;
413 };
414 key.trim() == "pid"
415 && value
416 .trim()
417 .parse::<u32>()
418 .is_ok_and(|pid| pid == std::process::id())
419 })
420}
421
422fn doctor_lock_file_pid_is_current_process(file: &fs::File) -> bool {
423 use std::io::Read as _;
424
425 let Ok(mut file) = file.try_clone() else {
426 return false;
427 };
428 let mut raw = String::new();
429 let _ = std::io::Read::take(&mut file, DOCTOR_MUTATION_LOCK_MAX_METADATA_READ)
430 .read_to_string(&mut raw);
431 doctor_lock_metadata_pid_is_current_process(&raw)
432}
433
434fn acquire_doctor_mutation_db_open_guard(
435 db_path: &Path,
436 timeout: Duration,
437) -> Result<DoctorMutationDbOpenGuard> {
438 let Some(lock_path) = doctor_mutation_lock_path_for_db_open(db_path) else {
439 return Ok(DoctorMutationDbOpenGuard(None));
440 };
441 if doctor_mutation_db_open_bypass_active() {
442 return Ok(DoctorMutationDbOpenGuard(None));
443 }
444
445 if let Some(parent) = lock_path.parent() {
446 fs::create_dir_all(parent).with_context(|| {
447 format!(
448 "creating doctor mutation lock directory {} before opening {}",
449 parent.display(),
450 db_path.display()
451 )
452 })?;
453 }
454
455 let deadline = Instant::now() + timeout;
456 let mut backoff = Duration::from_millis(4);
457 loop {
458 let file = fs::OpenOptions::new()
459 .create(true)
460 .truncate(false)
461 .read(true)
462 .write(true)
463 .open(&lock_path)
464 .with_context(|| {
465 format!(
466 "opening doctor mutation lock {} before opening {}",
467 lock_path.display(),
468 db_path.display()
469 )
470 })?;
471
472 if doctor_lock_file_pid_is_current_process(&file) {
473 return Ok(DoctorMutationDbOpenGuard(None));
474 }
475
476 match fs2::FileExt::try_lock_shared(&file) {
477 Ok(()) => return Ok(DoctorMutationDbOpenGuard(Some(file))),
478 Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => {
479 let now = Instant::now();
480 if now >= deadline {
481 return Err(anyhow!(
482 "doctor mutation lock {} is active while opening {}; refusing to open during repair after waiting {}ms",
483 lock_path.display(),
484 db_path.display(),
485 timeout.as_millis()
486 ));
487 }
488 let remaining = deadline.saturating_duration_since(now);
489 sleep_with_franken_retry_backoff(
490 &mut backoff,
491 remaining,
492 Duration::from_millis(128),
493 );
494 }
495 Err(err) => {
496 return Err(anyhow!(
497 "failed to acquire shared doctor mutation lock {} before opening {}: {}",
498 lock_path.display(),
499 db_path.display(),
500 err
501 ));
502 }
503 }
504 }
505}
506
507pub(crate) fn open_franken_storage_with_timeout(
508 path: &Path,
509 timeout: Duration,
510) -> Result<FrankenStorage> {
511 if !path.exists() {
512 return Err(anyhow!("Database not found at {}", path.display()));
513 }
514
515 let deadline = Instant::now() + timeout;
516 let mut backoff = Duration::from_millis(4);
517 loop {
518 match FrankenStorage::open(path) {
519 Ok(storage) => return Ok(storage),
520 Err(err) if retryable_franken_anyhow(&err) => {
521 let now = Instant::now();
522 if now >= deadline {
523 return Err(err);
524 }
525 let remaining = deadline.saturating_duration_since(now);
526 sleep_with_franken_retry_backoff(
527 &mut backoff,
528 remaining,
529 Duration::from_millis(128),
530 );
531 }
532 Err(err) => return Err(err),
533 }
534 }
535}
536
537pub(crate) fn open_current_schema_storage_with_timeout(
538 path: &Path,
539 timeout: Duration,
540) -> Result<Option<FrankenStorage>> {
541 if !path.exists() {
542 return Ok(None);
543 }
544
545 let mut storage = FrankenStorage::new(
546 open_franken_raw_connection_with_timeout(path, timeout)?,
547 path.to_path_buf(),
548 );
549 storage.apply_open_stage_busy_timeout();
550
551 let version = storage
552 .raw()
553 .query("SELECT value FROM meta WHERE key = 'schema_version';")
554 .ok()
555 .and_then(|rows| rows.first().cloned())
556 .and_then(|row| row.get_typed::<String>(0).ok())
557 .and_then(|raw| raw.parse::<i64>().ok());
558
559 if version != Some(CURRENT_SCHEMA_VERSION) {
560 if let Err(close_err) = storage.close_without_checkpoint_in_place() {
561 tracing::debug!(
562 error = %close_err,
563 db_path = %path.display(),
564 "open_current_schema_storage_with_timeout: close_without_checkpoint_in_place failed; falling back to best-effort close"
565 );
566 storage.close_best_effort_in_place();
567 }
568 return Ok(None);
569 }
570
571 transition_from_meta_version(&storage.conn)?;
572 storage.repair_missing_current_schema_objects()?;
573 storage.apply_config()?;
574 Ok(Some(storage))
575}
576
577pub(crate) fn open_franken_readonly_storage_with_timeout(
578 path: &Path,
579 timeout: Duration,
580) -> Result<FrankenStorage> {
581 if !path.exists() {
582 return Err(anyhow!("Database not found at {}", path.display()));
583 }
584
585 let deadline = Instant::now() + timeout;
586 let mut backoff = Duration::from_millis(4);
587 loop {
588 match FrankenStorage::open_readonly(path) {
589 Ok(storage) => return Ok(storage),
590 Err(err) if retryable_franken_anyhow(&err) => {
591 let now = Instant::now();
592 if now >= deadline {
593 return Err(err);
594 }
595 let remaining = deadline.saturating_duration_since(now);
596 sleep_with_franken_retry_backoff(
597 &mut backoff,
598 remaining,
599 Duration::from_millis(128),
600 );
601 }
602 Err(err) => return Err(err),
603 }
604 }
605}
606
607pub(crate) fn open_franken_raw_connection_with_timeout(
608 path: &Path,
609 timeout: Duration,
610) -> Result<FrankenConnection> {
611 if !path.exists() {
612 return Err(anyhow!("Database not found at {}", path.display()));
613 }
614
615 let path_str = path.to_string_lossy().to_string();
616 let deadline = Instant::now() + timeout;
617 let mut backoff = Duration::from_millis(4);
618 loop {
619 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
620 match FrankenConnection::open(&path_str)
621 .with_context(|| format!("opening raw frankensqlite db at {}", path.display()))
622 {
623 Ok(conn) => return Ok(conn),
624 Err(err) if retryable_franken_anyhow(&err) => {
625 let now = Instant::now();
626 if now >= deadline {
627 return Err(err);
628 }
629 let remaining = deadline.saturating_duration_since(now);
630 sleep_with_franken_retry_backoff(
631 &mut backoff,
632 remaining,
633 Duration::from_millis(128),
634 );
635 }
636 Err(err) => return Err(err),
637 }
638 }
639}
640
641pub(crate) fn open_franken_raw_readonly_connection_with_timeout(
642 path: &Path,
643 timeout: Duration,
644) -> Result<FrankenConnection> {
645 if !path.exists() {
646 return Err(anyhow!("Database not found at {}", path.display()));
647 }
648
649 let path_str = path.to_string_lossy().to_string();
650 let deadline = Instant::now() + timeout;
651 let mut backoff = Duration::from_millis(4);
652 loop {
653 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
654 match open_franken_with_flags(&path_str, FrankenOpenFlags::SQLITE_OPEN_READ_ONLY)
655 .with_context(|| {
656 format!(
657 "opening raw frankensqlite db readonly at {}",
658 path.display()
659 )
660 }) {
661 Ok(conn) => return Ok(conn),
662 Err(err) if retryable_franken_anyhow(&err) => {
663 let now = Instant::now();
664 if now >= deadline {
665 return Err(err);
666 }
667 let remaining = deadline.saturating_duration_since(now);
668 sleep_with_franken_retry_backoff(
669 &mut backoff,
670 remaining,
671 Duration::from_millis(128),
672 );
673 }
674 Err(err) => return Err(err),
675 }
676 }
677}
678
679pub(crate) fn retryable_franken_error(err: &frankensqlite::FrankenError) -> bool {
680 matches!(
681 err,
682 frankensqlite::FrankenError::Busy
683 | frankensqlite::FrankenError::BusyRecovery
684 | frankensqlite::FrankenError::BusySnapshot { .. }
685 | frankensqlite::FrankenError::DatabaseLocked { .. }
686 | frankensqlite::FrankenError::LockFailed { .. }
687 | frankensqlite::FrankenError::WriteConflict { .. }
688 | frankensqlite::FrankenError::SerializationFailure { .. }
689 ) || retryable_storage_error_message(&err.to_string())
690}
691
692pub(crate) fn retryable_storage_error_message(message: &str) -> bool {
693 let lower = message.to_ascii_lowercase();
694 lower.contains("busy")
695 || lower.contains("locked")
696 || lower.contains("locking")
697 || lower.contains("contention")
698 || lower.contains("temporarily unavailable")
699 || lower.contains("would block")
700}
701
702pub(crate) fn retryable_franken_anyhow(err: &anyhow::Error) -> bool {
703 err.chain().any(|cause| {
704 cause
705 .downcast_ref::<frankensqlite::FrankenError>()
706 .is_some_and(retryable_franken_error)
707 || retryable_storage_error_message(&cause.to_string())
708 })
709}
710
711impl Drop for LazyFrankenDb {
712 fn drop(&mut self) {
713 let Some(mut conn) = self.conn.get_mut().take() else {
714 return;
715 };
716 conn.0.close_best_effort_in_place();
717 }
718}
719
720#[derive(Debug, Clone)]
729pub struct ConnectionManagerConfig {
730 pub reader_count: usize,
732 pub max_writers: usize,
734}
735
736impl Default for ConnectionManagerConfig {
737 fn default() -> Self {
738 let cpus = std::thread::available_parallelism()
739 .map(|n| n.get())
740 .unwrap_or(4);
741 Self {
742 reader_count: 4,
743 max_writers: cpus,
744 }
745 }
746}
747
748pub struct FrankenConnectionManager {
758 db_path: PathBuf,
759 readers: Vec<parking_lot::Mutex<SendFrankenConnection>>,
760 reader_idx: std::sync::atomic::AtomicUsize,
761 writer_tokens: (
764 crossbeam_channel::Sender<()>,
765 crossbeam_channel::Receiver<()>,
766 ),
767 config: ConnectionManagerConfig,
768}
769
770unsafe impl Send for FrankenConnectionManager {}
775unsafe impl Sync for FrankenConnectionManager {}
776
777impl FrankenConnectionManager {
778 pub fn new(db_path: impl Into<PathBuf>, config: ConnectionManagerConfig) -> Result<Self> {
783 let db_path = db_path.into();
784 let path_str = db_path.to_string_lossy().to_string();
785
786 let reader_count = config.reader_count.max(1);
787 let mut readers = Vec::with_capacity(reader_count);
788 for _ in 0..reader_count {
789 let conn = FrankenConnection::open(&path_str)
790 .with_context(|| format!("opening reader connection at {}", db_path.display()))?;
791 let _ = conn.execute("PRAGMA busy_timeout = 5000;"); let _ = conn.execute("PRAGMA cache_size = -16384;"); readers.push(parking_lot::Mutex::new(SendFrankenConnection::new(conn)));
795 }
796
797 let max_writers = config.max_writers.max(1);
798
799 let (tx, rx) = crossbeam_channel::bounded(max_writers);
803 for _ in 0..max_writers {
804 tx.send(())
805 .map_err(|_| anyhow!("writer token channel closed during initialization"))?;
806 }
807
808 Ok(Self {
809 db_path,
810 readers,
811 reader_idx: std::sync::atomic::AtomicUsize::new(0),
812 writer_tokens: (tx, rx),
813 config: ConnectionManagerConfig {
814 reader_count,
815 max_writers,
816 },
817 })
818 }
819
820 pub fn reader(&self) -> parking_lot::MutexGuard<'_, SendFrankenConnection> {
825 let idx = self
826 .reader_idx
827 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
828 self.readers[idx % self.readers.len()].lock()
829 }
830
831 pub fn writer(&self) -> Result<WriterGuard<'_>> {
837 self.writer_tokens
838 .1
839 .recv()
840 .map_err(|_| anyhow!("writer token channel closed"))?;
841 let path_str = self.db_path.to_string_lossy().to_string();
842 let conn = match FrankenConnection::open(&path_str) {
843 Ok(c) => c,
844 Err(e) => {
845 let _ = self.writer_tokens.0.send(());
846 return Err(anyhow::Error::from(e).context(format!(
847 "opening writer connection at {}",
848 self.db_path.display()
849 )));
850 }
851 };
852 let storage = FrankenStorage::new(conn, self.db_path.clone());
853 if let Err(e) = storage.apply_config() {
854 let _ = self.writer_tokens.0.send(());
855 return Err(e);
856 }
857 Ok(WriterGuard {
858 storage,
859 mgr: self,
860 committed: false,
861 })
862 }
863
864 pub fn concurrent_writer(&self) -> Result<WriterGuard<'_>> {
869 self.writer_tokens
870 .1
871 .recv()
872 .map_err(|_| anyhow!("writer token channel closed"))?;
873 let path_str = self.db_path.to_string_lossy().to_string();
874 let conn = match FrankenConnection::open(&path_str) {
875 Ok(c) => c,
876 Err(e) => {
877 let _ = self.writer_tokens.0.send(());
878 return Err(anyhow::Error::from(e).context(format!(
879 "opening concurrent writer at {}",
880 self.db_path.display()
881 )));
882 }
883 };
884 let storage = FrankenStorage::new(conn, self.db_path.clone());
885 if let Err(e) = storage.apply_config() {
886 let _ = self.writer_tokens.0.send(());
887 return Err(e);
888 }
889 let _ = storage.raw().execute("PRAGMA cache_size = -4096;");
891 Ok(WriterGuard {
892 storage,
893 mgr: self,
894 committed: false,
895 })
896 }
897
898 pub fn db_path(&self) -> &Path {
900 &self.db_path
901 }
902
903 pub fn reader_count(&self) -> usize {
905 self.readers.len()
906 }
907
908 pub fn max_writers(&self) -> usize {
910 self.config.max_writers
911 }
912}
913
914impl Drop for FrankenConnectionManager {
915 fn drop(&mut self) {
916 for reader in &mut self.readers {
917 reader.get_mut().0.close_best_effort_in_place();
918 }
919 }
920}
921
922pub struct WriterGuard<'a> {
927 storage: FrankenStorage,
928 mgr: &'a FrankenConnectionManager,
929 committed: bool,
930}
931
932impl<'a> WriterGuard<'a> {
933 pub fn storage(&self) -> &FrankenStorage {
935 &self.storage
936 }
937
938 pub fn mark_committed(&mut self) {
943 self.committed = true;
944 }
945}
946
947impl Drop for WriterGuard<'_> {
948 fn drop(&mut self) {
949 if !self.committed {
950 let _ = self.storage.raw().execute("ROLLBACK;");
952 }
953 self.storage.close_best_effort_in_place();
954 let _ = self.mgr.writer_tokens.0.send(());
956 }
957}
958
959fn serialize_json_to_msgpack(value: &serde_json::Value) -> Option<Vec<u8>> {
968 if value.is_null() || value.as_object().is_some_and(|o| o.is_empty()) {
969 return None;
970 }
971 rmp_serde::to_vec(value).ok()
972}
973
974fn deserialize_msgpack_to_json(bytes: &[u8]) -> serde_json::Value {
977 if bytes.is_empty() {
978 return serde_json::Value::Object(serde_json::Map::new());
979 }
980 rmp_serde::from_slice(bytes).unwrap_or_else(|e| {
981 tracing::debug!(
982 error = %e,
983 bytes_len = bytes.len(),
984 "Failed to deserialize metadata - returning empty object"
985 );
986 serde_json::Value::Object(serde_json::Map::new())
987 })
988}
989
990fn franken_read_metadata_compat(
992 row: &FrankenRow,
993 json_idx: usize,
994 bin_idx: usize,
995) -> serde_json::Value {
996 if let Ok(Some(bytes)) = row.get_typed::<Option<Vec<u8>>>(bin_idx)
998 && !bytes.is_empty()
999 {
1000 return deserialize_msgpack_to_json(&bytes);
1001 }
1002
1003 if let Ok(Some(json_str)) = row.get_typed::<Option<String>>(json_idx) {
1005 return serde_json::from_str(&json_str)
1006 .unwrap_or_else(|_| serde_json::Value::Object(serde_json::Map::new()));
1007 }
1008
1009 serde_json::Value::Object(serde_json::Map::new())
1010}
1011
1012fn franken_read_message_extra_compat(
1013 row: &FrankenRow,
1014 json_idx: usize,
1015 bin_idx: usize,
1016) -> serde_json::Value {
1017 if let Ok(Some(bytes)) = row.get_typed::<Option<Vec<u8>>>(bin_idx)
1018 && !bytes.is_empty()
1019 {
1020 return deserialize_msgpack_to_json(&bytes);
1021 }
1022
1023 if let Ok(Some(json_str)) = row.get_typed::<Option<String>>(json_idx) {
1024 return serde_json::from_str(&json_str).unwrap_or(serde_json::Value::Null);
1025 }
1026
1027 serde_json::Value::Null
1028}
1029
1030#[derive(Debug, Error)]
1036pub enum MigrationError {
1037 #[error("Rebuild required: {reason}")]
1039 RebuildRequired {
1040 reason: String,
1041 backup_path: Option<std::path::PathBuf>,
1042 },
1043
1044 #[error("Database error: {0}")]
1046 Database(#[from] frankensqlite::FrankenError),
1047
1048 #[error("I/O error: {0}")]
1050 Io(#[from] std::io::Error),
1051
1052 #[error("{0}")]
1054 Other(String),
1055}
1056
1057impl From<anyhow::Error> for MigrationError {
1058 fn from(e: anyhow::Error) -> Self {
1059 MigrationError::Other(e.to_string())
1060 }
1061}
1062
1063const MAX_BACKUPS: usize = 3;
1065const BACKUP_VACUUM_BUSY_TIMEOUT_PRAGMA: &str = "PRAGMA busy_timeout = 30000;";
1066
1067const USER_DATA_FILES: &[&str] = &["bookmarks.db", "tui_state.json", "sources.toml", ".env"];
1069
1070pub fn is_user_data_file(path: &Path) -> bool {
1072 path.file_name()
1073 .and_then(|n| n.to_str())
1074 .map(|name| USER_DATA_FILES.contains(&name))
1075 .unwrap_or(false)
1076}
1077
1078pub const FTS5_REGISTER_SQL: &str = "\
1085 CREATE VIRTUAL TABLE IF NOT EXISTS fts_messages USING fts5(\
1086 content, title, agent, workspace, source_path, \
1087 created_at UNINDEXED, \
1088 content='', tokenize='porter'\
1089 )";
1090
1091const FTS_FRANKEN_REBUILD_META_KEY: &str = "fts_frankensqlite_rebuild_generation";
1092const FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY: &str = "fts_frankensqlite_archive_fingerprint";
1093const FTS_FRANKEN_REBUILD_GENERATION: i64 = 1;
1094const DAILY_STATS_HEALTH_META_KEY: &str = "daily_stats_archive_fingerprint";
1095const DAILY_STATS_HEALTH_GENERATION_META_KEY: &str = "daily_stats_health_generation";
1096const DAILY_STATS_HEALTH_GENERATION: i64 = 1;
1097
1098pub const FTS5_DELETE_ALL_SQL: &str =
1102 "INSERT INTO fts_messages(fts_messages) VALUES('delete-all');";
1103
1104#[cfg(test)]
1105pub(crate) fn materialize_fresh_fts_schema_via_rusqlite(db_path: &Path) -> Result<()> {
1106 let storage = FrankenStorage::open(db_path).with_context(|| {
1112 format!(
1113 "opening frankensqlite db at {} for FTS materialization",
1114 db_path.display()
1115 )
1116 })?;
1117 storage.rebuild_fts_via_frankensqlite().map(|_| ())
1118}
1119
1120#[cfg(test)]
1121pub(crate) fn rebuild_fts_via_rusqlite(db_path: &Path) -> Result<usize> {
1122 let storage = FrankenStorage::open(db_path).with_context(|| {
1123 format!(
1124 "opening frankensqlite db at {} for FTS rebuild",
1125 db_path.display()
1126 )
1127 })?;
1128 let inserted = storage.rebuild_fts_via_frankensqlite()?;
1129 storage.record_fts_franken_rebuild_generation()?;
1130 Ok(inserted)
1131}
1132
1133pub(crate) fn ensure_fts_consistency_via_rusqlite(db_path: &Path) -> Result<FtsConsistencyRepair> {
1134 let storage = FrankenStorage::open(db_path).with_context(|| {
1138 format!(
1139 "opening frankensqlite db at {} for FTS consistency check",
1140 db_path.display()
1141 )
1142 })?;
1143 storage.ensure_search_fallback_fts_consistency()
1144}
1145
1146pub fn create_backup(db_path: &Path) -> Result<Option<std::path::PathBuf>, MigrationError> {
1150 if !bundle_path_exists(db_path)? {
1151 return Ok(None);
1152 }
1153
1154 if !copyable_bundle_file_exists(db_path)? {
1155 return Ok(None);
1156 }
1157 let _ = copyable_bundle_sidecar_sources(db_path)?;
1158
1159 let backup_path = unique_backup_path(db_path);
1160 let vacuum_stage_path = vacuum_stage_backup_path(&backup_path);
1161
1162 match vacuum_into_backup_stage(db_path, &vacuum_stage_path) {
1165 Ok(()) => {
1166 fs::rename(&vacuum_stage_path, &backup_path)?;
1167 }
1168 Err(err) if backup_vacuum_error_requires_consistent_retry(&err) => {
1169 tracing::warn!(
1170 db_path = %db_path.display(),
1171 error = %err,
1172 "create_backup: VACUUM INTO hit transient contention; refusing raw WAL bundle copy"
1173 );
1174 return Err(MigrationError::Database(err));
1175 }
1176 Err(err) => {
1177 tracing::warn!(
1178 db_path = %db_path.display(),
1179 error = %err,
1180 "create_backup: VACUUM INTO failed; falling back to raw evidence copy"
1181 );
1182 }
1183 }
1184
1185 if backup_path.exists() {
1186 sync_file_if_exists(&backup_path)?;
1187 if let Some(parent) = backup_path.parent() {
1188 sync_parent_directory(parent)?;
1189 }
1190 return Ok(Some(backup_path));
1191 }
1192
1193 copy_database_bundle(db_path, &backup_path)?;
1198
1199 Ok(Some(backup_path))
1200}
1201
1202fn vacuum_into_backup_stage(
1203 db_path: &Path,
1204 stage_path: &Path,
1205) -> std::result::Result<(), frankensqlite::FrankenError> {
1206 let mut conn = open_franken_with_flags(
1207 &db_path.to_string_lossy(),
1208 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
1209 )?;
1210 let result = (|| {
1211 conn.execute(BACKUP_VACUUM_BUSY_TIMEOUT_PRAGMA)?;
1212 let path_str = stage_path.to_string_lossy();
1213 conn.execute_compat("VACUUM INTO ?", fparams![path_str.as_ref()])?;
1214 Ok(())
1215 })();
1216 if let Err(close_err) = conn.close_in_place() {
1217 tracing::warn!(
1218 error = %close_err,
1219 db_path = %db_path.display(),
1220 "create_backup: close_in_place failed after VACUUM INTO; falling back to best-effort close"
1221 );
1222 conn.close_best_effort_in_place();
1223 }
1224 result
1225}
1226
1227fn backup_vacuum_error_requires_consistent_retry(err: &frankensqlite::FrankenError) -> bool {
1228 retryable_franken_error(err)
1229}
1230
1231#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1232pub struct DatabaseBundleMoveResult {
1233 pub database: bool,
1234 pub wal: bool,
1235 pub shm: bool,
1236}
1237
1238impl DatabaseBundleMoveResult {
1239 pub fn moved_any(&self) -> bool {
1240 self.database || self.wal || self.shm
1241 }
1242}
1243
1244fn database_sidecar_path(path: &Path, suffix: &str) -> PathBuf {
1245 PathBuf::from(format!("{}{}", path.to_string_lossy(), suffix))
1246}
1247
1248pub(crate) fn move_database_bundle(
1255 source_root: &Path,
1256 destination_root: &Path,
1257) -> std::io::Result<DatabaseBundleMoveResult> {
1258 let mut moved = DatabaseBundleMoveResult::default();
1259 if let Some(parent) = destination_root.parent() {
1260 fs::create_dir_all(parent)?;
1261 sync_parent_directory(parent)?;
1262 }
1263
1264 if bundle_path_exists(source_root)? {
1265 fs::rename(source_root, destination_root)?;
1266 moved.database = true;
1267 }
1268
1269 let wal_source = database_sidecar_path(source_root, "-wal");
1270 if bundle_path_exists(&wal_source)? {
1271 fs::rename(&wal_source, database_sidecar_path(destination_root, "-wal"))?;
1272 moved.wal = true;
1273 }
1274
1275 let shm_source = database_sidecar_path(source_root, "-shm");
1276 if bundle_path_exists(&shm_source)? {
1277 fs::rename(&shm_source, database_sidecar_path(destination_root, "-shm"))?;
1278 moved.shm = true;
1279 }
1280
1281 if moved.moved_any() {
1282 if let Some(parent) = source_root.parent() {
1283 sync_parent_directory(parent)?;
1284 }
1285 if let Some(parent) = destination_root.parent() {
1286 sync_parent_directory(parent)?;
1287 }
1288 }
1289
1290 Ok(moved)
1291}
1292
1293fn bundle_path_exists(path: &Path) -> std::io::Result<bool> {
1294 match fs::symlink_metadata(path) {
1295 Ok(_) => Ok(true),
1296 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1297 Err(err) => Err(err),
1298 }
1299}
1300
1301fn copy_database_bundle(source_root: &Path, destination_root: &Path) -> Result<()> {
1302 if let Some(parent) = destination_root.parent() {
1303 fs::create_dir_all(parent).with_context(|| {
1304 format!(
1305 "creating destination directory for database bundle copy: {}",
1306 parent.display()
1307 )
1308 })?;
1309 sync_parent_directory(parent)
1310 .with_context(|| format!("syncing destination directory {}", parent.display()))?;
1311 }
1312
1313 if !copyable_bundle_file_exists(source_root)? {
1314 bail!(
1315 "database bundle root is missing before copy: {}",
1316 source_root.display()
1317 );
1318 }
1319
1320 let sidecars = copyable_bundle_sidecar_sources(source_root)?;
1321
1322 fs::copy(source_root, destination_root).with_context(|| {
1323 format!(
1324 "copying database bundle {} -> {}",
1325 source_root.display(),
1326 destination_root.display()
1327 )
1328 })?;
1329 sync_file_if_exists(destination_root).with_context(|| {
1330 format!(
1331 "syncing copied database bundle {}",
1332 destination_root.display()
1333 )
1334 })?;
1335
1336 for (source_sidecar, suffix) in sidecars {
1337 let destination_sidecar = database_sidecar_path(destination_root, suffix);
1338 fs::copy(&source_sidecar, &destination_sidecar).with_context(|| {
1339 format!(
1340 "copying database bundle sidecar {} -> {}",
1341 source_sidecar.display(),
1342 destination_sidecar.display()
1343 )
1344 })?;
1345 sync_file_if_exists(&destination_sidecar).with_context(|| {
1346 format!(
1347 "syncing copied database bundle sidecar {}",
1348 destination_sidecar.display()
1349 )
1350 })?;
1351 }
1352
1353 if let Some(parent) = destination_root.parent() {
1354 sync_parent_directory(parent)
1355 .with_context(|| format!("syncing destination directory {}", parent.display()))?;
1356 }
1357
1358 Ok(())
1359}
1360
1361fn copyable_bundle_sidecar_sources(source_root: &Path) -> Result<Vec<(PathBuf, &'static str)>> {
1362 let mut sidecars = Vec::new();
1363 for suffix in ["-wal", "-shm"] {
1364 let source_sidecar = database_sidecar_path(source_root, suffix);
1365 if copyable_bundle_file_exists(&source_sidecar)? {
1366 sidecars.push((source_sidecar, suffix));
1367 }
1368 }
1369 Ok(sidecars)
1370}
1371
1372fn copyable_bundle_file_exists(path: &Path) -> Result<bool> {
1373 match fs::symlink_metadata(path) {
1374 Ok(metadata) => {
1375 let file_type = metadata.file_type();
1376 if file_type.is_symlink() {
1377 bail!(
1378 "refusing to copy database bundle symlink: {}",
1379 path.display()
1380 );
1381 }
1382 if !file_type.is_file() {
1383 bail!(
1384 "refusing to copy non-file database bundle path: {}",
1385 path.display()
1386 );
1387 }
1388 Ok(true)
1389 }
1390 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1391 Err(err) => Err(err).with_context(|| {
1392 format!(
1393 "checking database bundle path before copy: {}",
1394 path.display()
1395 )
1396 }),
1397 }
1398}
1399
1400pub(crate) fn remove_database_files(path: &Path) -> std::io::Result<()> {
1402 let mut removed_any = false;
1403
1404 match fs::remove_file(path) {
1405 Ok(()) => removed_any = true,
1406 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1407 Err(err) => return Err(err),
1408 }
1409
1410 for suffix in ["-wal", "-shm"] {
1412 match fs::remove_file(database_sidecar_path(path, suffix)) {
1413 Ok(()) => removed_any = true,
1414 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1415 Err(err) => return Err(err),
1416 }
1417 }
1418
1419 if removed_any && let Some(parent) = path.parent() {
1420 sync_parent_directory(parent)?;
1421 }
1422
1423 Ok(())
1424}
1425
1426#[cfg(not(windows))]
1427fn sync_parent_directory(path: &Path) -> std::io::Result<()> {
1428 fs::File::open(path)?.sync_all()
1429}
1430
1431#[cfg(windows)]
1432fn sync_parent_directory(_path: &Path) -> std::io::Result<()> {
1433 Ok(())
1434}
1435
1436fn sync_file_if_exists(path: &Path) -> std::io::Result<()> {
1437 if path.exists() {
1438 fs::File::open(path)?.sync_all()?;
1439 }
1440 Ok(())
1441}
1442
1443pub fn cleanup_old_backups(db_path: &Path, keep_count: usize) -> Result<(), std::io::Error> {
1445 let parent = match db_path.parent() {
1446 Some(p) => p,
1447 None => return Ok(()),
1448 };
1449
1450 let db_name = db_path.file_name().and_then(|n| n.to_str()).unwrap_or("db");
1451
1452 let prefix = format!("{}.backup.", db_name);
1453
1454 let mut backups: Vec<(std::path::PathBuf, SystemTime)> = Vec::new();
1456
1457 if let Ok(entries) = fs::read_dir(parent) {
1458 for entry in entries.flatten() {
1459 let path = entry.path();
1460 if let Some(name) = path.file_name().and_then(|n| n.to_str())
1461 && is_backup_root_name(name, &prefix)
1462 && let Ok(meta) = fs::metadata(&path)
1463 && meta.is_file()
1464 && let Ok(mtime) = meta.modified()
1465 {
1466 backups.push((path, mtime));
1467 }
1468 }
1469 }
1470
1471 backups.sort_by_key(|entry| std::cmp::Reverse(entry.1));
1473
1474 for (path, _) in backups.into_iter().skip(keep_count) {
1476 let _ = fs::remove_file(&path);
1477
1478 let _ = fs::remove_file(database_sidecar_path(&path, "-wal"));
1480 let _ = fs::remove_file(database_sidecar_path(&path, "-shm"));
1481 }
1482
1483 Ok(())
1484}
1485
1486#[derive(Debug, Clone)]
1487pub(crate) struct HistoricalDatabaseBundle {
1488 root_path: PathBuf,
1489 total_bytes: u64,
1490 modified_at_ms: i64,
1491 supports_direct_readonly: bool,
1492 probe: HistoricalBundleProbe,
1493}
1494
1495#[derive(Debug, Clone, Copy, Default)]
1496struct HistoricalBundleProbe {
1497 schema_version: Option<i64>,
1498 fts_schema_rows: Option<i64>,
1499 fts_queryable: bool,
1500 max_message_id: i64,
1501}
1502
1503#[cfg(test)]
1504#[allow(dead_code)]
1505#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1506pub(crate) struct SqliteDatabaseHealthProbe {
1507 pub schema_version: Option<i64>,
1508 pub quick_check_ok: bool,
1509 pub fts_schema_rows: i64,
1510 pub fts_queryable: bool,
1511 pub message_count: i64,
1512 pub max_message_id: i64,
1513}
1514
1515#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1516pub(crate) enum FtsConsistencyRepair {
1517 AlreadyHealthy {
1518 rows: usize,
1519 },
1520 IncrementalCatchUp {
1521 inserted_rows: usize,
1522 total_rows: usize,
1523 },
1524 Rebuilt {
1525 inserted_rows: usize,
1526 },
1527}
1528
1529#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
1530pub struct HistoricalSalvageOutcome {
1531 pub bundles_considered: usize,
1532 pub bundles_imported: usize,
1533 pub conversations_imported: usize,
1534 pub messages_imported: usize,
1535}
1536
1537impl HistoricalSalvageOutcome {
1538 pub(crate) fn accumulate(&mut self, other: Self) {
1539 self.bundles_considered += other.bundles_considered;
1540 self.bundles_imported += other.bundles_imported;
1541 self.conversations_imported += other.conversations_imported;
1542 self.messages_imported += other.messages_imported;
1543 }
1544}
1545
1546#[derive(Debug)]
1547struct HistoricalReadConnection {
1548 conn: FrankenConnection,
1549 method: &'static str,
1550 _tempdir: Option<tempfile::TempDir>,
1551}
1552
1553const HISTORICAL_RECOVERY_CORE_SCHEMA: &str = r"
1554CREATE TABLE sources (
1555 id TEXT PRIMARY KEY,
1556 kind TEXT,
1557 host_label TEXT,
1558 machine_id TEXT,
1559 platform TEXT,
1560 config_json TEXT,
1561 created_at INTEGER,
1562 updated_at INTEGER
1563);
1564CREATE TABLE agents (
1565 id INTEGER PRIMARY KEY,
1566 slug TEXT,
1567 name TEXT,
1568 version TEXT,
1569 kind TEXT,
1570 created_at INTEGER,
1571 updated_at INTEGER
1572);
1573CREATE TABLE workspaces (
1574 id INTEGER PRIMARY KEY,
1575 path TEXT,
1576 display_name TEXT
1577);
1578CREATE TABLE conversations (
1579 id INTEGER PRIMARY KEY,
1580 agent_id INTEGER,
1581 workspace_id INTEGER,
1582 source_id TEXT,
1583 external_id TEXT,
1584 title TEXT,
1585 source_path TEXT,
1586 started_at INTEGER,
1587 ended_at INTEGER,
1588 approx_tokens INTEGER,
1589 metadata_json TEXT,
1590 origin_host TEXT,
1591 metadata_bin BLOB,
1592 total_input_tokens INTEGER,
1593 total_output_tokens INTEGER,
1594 total_cache_read_tokens INTEGER,
1595 total_cache_creation_tokens INTEGER,
1596 grand_total_tokens INTEGER,
1597 estimated_cost_usd REAL,
1598 primary_model TEXT,
1599 api_call_count INTEGER,
1600 tool_call_count INTEGER,
1601 user_message_count INTEGER,
1602 assistant_message_count INTEGER,
1603 last_message_idx INTEGER,
1604 last_message_created_at INTEGER
1605);
1606CREATE TABLE messages (
1607 id INTEGER PRIMARY KEY,
1608 conversation_id INTEGER,
1609 idx INTEGER,
1610 role TEXT,
1611 author TEXT,
1612 created_at INTEGER,
1613 content TEXT,
1614 extra_json TEXT,
1615 extra_bin BLOB
1616);
1617CREATE TABLE snippets (
1618 id INTEGER PRIMARY KEY,
1619 message_id INTEGER,
1620 file_path TEXT,
1621 start_line INTEGER,
1622 end_line INTEGER,
1623 language TEXT,
1624 snippet_text TEXT
1625);
1626";
1627const HISTORICAL_SALVAGE_LEDGER_VERSION: u32 = 2;
1628const HISTORICAL_SALVAGE_PROGRESS_VERSION: u32 = 1;
1629const SOURCE_PATH_MERGE_START_TOLERANCE_MS: i64 = 5 * 60 * 1000;
1630
1631#[derive(Debug, Clone, Serialize, Deserialize)]
1632struct HistoricalBundleProgress {
1633 progress_version: u32,
1634 path: String,
1635 bytes: u64,
1636 modified_at_ms: i64,
1637 method: String,
1638 last_completed_source_row_id: i64,
1639 conversations_imported: usize,
1640 messages_imported: usize,
1641 updated_at_ms: i64,
1642}
1643
1644#[derive(Debug, Clone)]
1645struct HistoricalBatchEntry {
1646 source_row_id: i64,
1647 agent_id: i64,
1648 workspace_id: Option<i64>,
1649 conversation: Conversation,
1650}
1651
1652#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1653struct HistoricalBatchImportTotals {
1654 inserted_source_rows: usize,
1655 inserted_messages: usize,
1656}
1657
1658fn historical_bundle_root_paths(db_path: &Path) -> Vec<PathBuf> {
1659 let mut roots = Vec::new();
1660 let Some(parent) = db_path.parent() else {
1661 return roots;
1662 };
1663 let db_name = db_path
1664 .file_name()
1665 .and_then(|n| n.to_str())
1666 .unwrap_or("agent_search.db");
1667 let db_stem = db_path
1668 .file_stem()
1669 .and_then(|n| n.to_str())
1670 .unwrap_or("agent_search");
1671
1672 let mut push_root = |path: PathBuf| {
1673 if path == db_path {
1674 return;
1675 }
1676 if !roots.iter().any(|existing| existing == &path) {
1677 roots.push(path);
1678 }
1679 };
1680
1681 if let Ok(entries) = fs::read_dir(parent) {
1682 for entry in entries.flatten() {
1683 let path = entry.path();
1684 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1685 continue;
1686 };
1687 if has_db_sidecar_suffix(name) {
1688 continue;
1689 }
1690 if name.starts_with(&format!("{db_name}.backup."))
1691 || name.starts_with(&format!("{db_stem}.corrupt."))
1692 {
1693 push_root(path);
1694 }
1695 }
1696 }
1697
1698 let backups_dir = parent.join("backups");
1699 if let Ok(entries) = fs::read_dir(backups_dir) {
1700 for entry in entries.flatten() {
1701 let path = entry.path();
1702 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1703 continue;
1704 };
1705 if has_db_sidecar_suffix(name) {
1706 continue;
1707 }
1708 if name.starts_with(&format!("{db_name}.")) && name.ends_with(".bak") {
1709 push_root(path);
1710 }
1711 }
1712 }
1713
1714 push_named_database_children(&mut roots, db_path, &parent.join("repair-lab"), db_name);
1715 push_named_database_children(&mut roots, db_path, &parent.join("snapshots"), db_name);
1716
1717 roots
1718}
1719
1720fn push_named_database_children(
1721 roots: &mut Vec<PathBuf>,
1722 canonical_db_path: &Path,
1723 dir: &Path,
1724 db_name: &str,
1725) {
1726 if let Ok(entries) = fs::read_dir(dir) {
1727 for entry in entries.flatten() {
1728 let candidate = entry.path().join(db_name);
1729 if candidate == canonical_db_path {
1730 continue;
1731 }
1732 if candidate.exists() && !roots.iter().any(|existing| existing == &candidate) {
1733 roots.push(candidate);
1734 }
1735 }
1736 }
1737}
1738
1739fn file_mtime_ms(path: &Path) -> i64 {
1740 fs::metadata(path)
1741 .and_then(|meta| meta.modified())
1742 .ok()
1743 .and_then(|ts| ts.duration_since(UNIX_EPOCH).ok())
1744 .map(|d| d.as_millis() as i64)
1745 .unwrap_or(0)
1746}
1747
1748fn bundle_total_bytes(root_path: &Path) -> u64 {
1749 let mut total = fs::metadata(root_path).map(|meta| meta.len()).unwrap_or(0);
1750 for suffix in ["-wal", "-shm"] {
1751 let sidecar = database_sidecar_path(root_path, suffix);
1752 total = total.saturating_add(fs::metadata(sidecar).map(|meta| meta.len()).unwrap_or(0));
1753 }
1754 total
1755}
1756
1757pub(crate) fn discover_historical_database_bundles(
1758 db_path: &Path,
1759) -> Vec<HistoricalDatabaseBundle> {
1760 let mut bundles: Vec<_> = historical_bundle_root_paths(db_path)
1761 .into_iter()
1762 .filter(|root| root.exists())
1763 .map(|root_path| {
1764 let modified_at_ms = file_mtime_ms(&root_path);
1765 let total_bytes = bundle_total_bytes(&root_path);
1766 let supports_direct_readonly = historical_bundle_supports_direct_readonly(&root_path);
1767 let probe = probe_historical_bundle(&root_path, supports_direct_readonly);
1768 HistoricalDatabaseBundle {
1769 modified_at_ms,
1770 total_bytes,
1771 supports_direct_readonly,
1772 root_path,
1773 probe,
1774 }
1775 })
1776 .filter(|bundle| bundle.total_bytes > 0)
1777 .collect();
1778
1779 fn bundle_priority(path: &Path) -> i32 {
1780 let path_str = path.to_string_lossy();
1781 if path_str.contains("/repair-lab/replay-") {
1782 return 5;
1783 }
1784 if path_str.contains("/repair-lab/") {
1785 return 4;
1786 }
1787 if path_str.contains("/snapshots/") {
1788 return 3;
1789 }
1790 if path_str.contains(".corrupt.") || path_str.contains("failed-baseline-seed") {
1791 return 0;
1792 }
1793 1
1794 }
1795
1796 fn bundle_health_rank(bundle: &HistoricalDatabaseBundle) -> i32 {
1797 let fts_clean = match bundle.probe.fts_schema_rows {
1820 Some(1) => bundle.probe.fts_queryable,
1821 Some(0) => bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION),
1822 _ => false,
1823 };
1824
1825 let clean_schema14_fts =
1826 bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION) && fts_clean;
1827 if clean_schema14_fts {
1828 return 5;
1829 }
1830
1831 if fts_clean {
1832 return 4;
1833 }
1834
1835 if bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION)
1836 && bundle.supports_direct_readonly
1837 {
1838 return 3;
1839 }
1840
1841 if bundle.supports_direct_readonly {
1842 return 2;
1843 }
1844
1845 1
1846 }
1847
1848 bundles.sort_by(|left, right| {
1849 bundle_health_rank(right)
1850 .cmp(&bundle_health_rank(left))
1851 .then_with(|| right.probe.max_message_id.cmp(&left.probe.max_message_id))
1852 .then_with(|| bundle_priority(&right.root_path).cmp(&bundle_priority(&left.root_path)))
1853 .then_with(|| {
1854 right
1855 .supports_direct_readonly
1856 .cmp(&left.supports_direct_readonly)
1857 })
1858 .then_with(|| right.total_bytes.cmp(&left.total_bytes))
1859 .then_with(|| right.modified_at_ms.cmp(&left.modified_at_ms))
1860 .then_with(|| right.root_path.cmp(&left.root_path))
1861 });
1862 bundles
1863}
1864
1865fn probe_historical_bundle(
1866 root_path: &Path,
1867 supports_direct_readonly: bool,
1868) -> HistoricalBundleProbe {
1869 if !supports_direct_readonly {
1870 return HistoricalBundleProbe::default();
1871 }
1872
1873 let Ok(conn) = open_historical_bundle_readonly(root_path) else {
1874 return HistoricalBundleProbe::default();
1875 };
1876
1877 let schema_version = read_meta_schema_version(&conn).ok().flatten();
1878 let fts_schema_rows: Option<i64> = conn
1879 .query_row_map(
1880 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
1881 fparams![],
1882 |row| row.get_typed(0),
1883 )
1884 .ok();
1885 let fts_queryable =
1886 historical_bundle_fts_queryable_via_frankensqlite(root_path, fts_schema_rows);
1887 let max_message_id: i64 = conn
1888 .query_row_map(
1889 "SELECT COALESCE(MAX(id), 0) FROM messages",
1890 fparams![],
1891 |row| row.get_typed(0),
1892 )
1893 .unwrap_or(0);
1894
1895 HistoricalBundleProbe {
1896 schema_version,
1897 fts_schema_rows,
1898 fts_queryable,
1899 max_message_id,
1900 }
1901}
1902
1903fn historical_bundle_fts_queryable_via_frankensqlite(
1904 root_path: &Path,
1905 fts_schema_rows: Option<i64>,
1906) -> bool {
1907 matches!(fts_schema_rows, Some(1))
1908 && FrankenStorage::open_readonly(root_path)
1909 .map(|storage| {
1910 storage
1911 .raw()
1912 .query("SELECT rowid FROM fts_messages LIMIT 1")
1913 .is_ok()
1914 })
1915 .unwrap_or(false)
1916}
1917
1918fn historical_bundle_supports_direct_readonly(root_path: &Path) -> bool {
1919 open_historical_bundle_readonly(root_path)
1920 .and_then(|conn| historical_bundle_has_queryable_core_tables(&conn))
1921 .is_ok()
1922}
1923
1924fn historical_table_exists(conn: &FrankenConnection, table: &str) -> Result<bool> {
1925 let found: Option<i64> = conn
1926 .query_row_map(
1927 "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1 LIMIT 1",
1928 fparams![table],
1929 |row| row.get_typed(0),
1930 )
1931 .optional()
1932 .with_context(|| format!("checking for historical table {table}"))?;
1933 Ok(found.is_some())
1934}
1935
1936fn probe_historical_table_reads(conn: &FrankenConnection, table: &str) -> Result<()> {
1937 if !historical_table_exists(conn, table)? {
1938 return Err(anyhow!(
1939 "historical database missing required table {table}"
1940 ));
1941 }
1942
1943 let sql = format!("SELECT rowid FROM {table} LIMIT 1");
1944 let _: Option<i64> = conn
1945 .query_row_map(&sql, fparams![], |row| row.get_typed(0))
1946 .optional()
1947 .with_context(|| format!("probing rows from historical table {table}"))?;
1948 Ok(())
1949}
1950
1951fn historical_bundle_has_queryable_core_tables(conn: &FrankenConnection) -> Result<()> {
1952 probe_historical_table_reads(conn, "conversations")?;
1953 probe_historical_table_reads(conn, "messages")?;
1954 Ok(())
1955}
1956
1957fn open_historical_bundle_readonly(root_path: &Path) -> Result<FrankenConnection> {
1958 let path_str = root_path.to_string_lossy();
1959 let flags = FrankenOpenFlags::SQLITE_OPEN_READ_ONLY;
1960 let conn = open_franken_with_flags(&path_str, flags)
1961 .with_context(|| format!("opening historical database {}", root_path.display()))?;
1962 Ok(conn)
1963}
1964
1965fn is_recoverable_insert_line(line: &str) -> bool {
1966 [
1967 "sources",
1968 "agents",
1969 "workspaces",
1970 "conversations",
1971 "messages",
1972 "snippets",
1973 ]
1974 .iter()
1975 .any(|table| {
1976 line.starts_with(&format!("INSERT INTO '{table}'"))
1977 || line.starts_with(&format!("INSERT OR IGNORE INTO '{table}'"))
1978 || line.starts_with(&format!("INSERT INTO \"{table}\""))
1979 || line.starts_with(&format!("INSERT OR IGNORE INTO \"{table}\""))
1980 })
1981}
1982
1983fn recover_historical_bundle_via_sqlite3(
1984 bundle: &HistoricalDatabaseBundle,
1985) -> Result<HistoricalReadConnection> {
1986 let tempdir = tempfile::TempDir::new().context("creating temporary salvage directory")?;
1987 let recovered_db = tempdir.path().join("historical-recovered.db");
1988 let temp_conn = FrankenConnection::open(recovered_db.to_string_lossy().as_ref())
1989 .with_context(|| format!("creating recovered database {}", recovered_db.display()))?;
1990 temp_conn
1991 .execute_batch(HISTORICAL_RECOVERY_CORE_SCHEMA)
1992 .with_context(|| format!("initializing recovered schema {}", recovered_db.display()))?;
1993 drop(temp_conn);
1994
1995 let bundle_uri = format!("file:{}?immutable=1", bundle.root_path.to_string_lossy());
1996 let mut recover = Command::new("sqlite3")
1997 .arg(&bundle_uri)
1998 .arg(".recover")
1999 .stdout(Stdio::piped())
2000 .spawn()
2001 .with_context(|| {
2002 format!(
2003 "launching sqlite3 .recover for historical bundle {}",
2004 bundle.root_path.display()
2005 )
2006 })?;
2007 let recover_stdout = recover
2008 .stdout
2009 .take()
2010 .context("capturing sqlite3 .recover stdout")?;
2011
2012 let mut importer = Command::new("sqlite3")
2013 .arg(&recovered_db)
2014 .stdin(Stdio::piped())
2015 .spawn()
2016 .with_context(|| {
2017 format!(
2018 "launching sqlite3 importer for recovered bundle {}",
2019 recovered_db.display()
2020 )
2021 })?;
2022
2023 {
2024 let importer_stdin = importer
2025 .stdin
2026 .as_mut()
2027 .context("opening sqlite3 importer stdin")?;
2028 importer_stdin
2029 .write_all(b"BEGIN;\n")
2030 .context("starting recovery import transaction")?;
2031
2032 let reader = BufReader::new(recover_stdout);
2033 for line in reader.lines() {
2034 let line = line.context("reading sqlite3 .recover output")?;
2035 if is_recoverable_insert_line(&line) {
2036 importer_stdin
2037 .write_all(line.as_bytes())
2038 .context("writing recovered INSERT")?;
2039 importer_stdin
2040 .write_all(b"\n")
2041 .context("writing recovered INSERT newline")?;
2042 }
2043 }
2044
2045 importer_stdin
2046 .write_all(b"COMMIT;\n")
2047 .context("committing recovery import transaction")?;
2048 }
2049
2050 let recover_status = recover
2051 .wait()
2052 .context("waiting for sqlite3 .recover process")?;
2053 if !recover_status.success() {
2054 anyhow::bail!(
2055 "sqlite3 .recover exited with status {} for {}",
2056 recover_status,
2057 bundle.root_path.display()
2058 );
2059 }
2060
2061 let importer_status = importer
2062 .wait()
2063 .context("waiting for sqlite3 recovery importer")?;
2064 if !importer_status.success() {
2065 anyhow::bail!(
2066 "sqlite3 recovery importer exited with status {} for {}",
2067 importer_status,
2068 recovered_db.display()
2069 );
2070 }
2071
2072 let conn = open_historical_bundle_readonly(&recovered_db)?;
2073 historical_bundle_has_queryable_core_tables(&conn)?;
2074 Ok(HistoricalReadConnection {
2075 conn,
2076 method: "sqlite3-recover",
2077 _tempdir: Some(tempdir),
2078 })
2079}
2080
2081fn open_historical_bundle_for_salvage(
2082 bundle: &HistoricalDatabaseBundle,
2083) -> Result<HistoricalReadConnection> {
2084 match open_historical_bundle_readonly(&bundle.root_path) {
2085 Ok(conn) => {
2086 if historical_bundle_has_queryable_core_tables(&conn).is_ok() {
2087 return Ok(HistoricalReadConnection {
2088 conn,
2089 method: "direct-readonly",
2090 _tempdir: None,
2091 });
2092 }
2093 }
2094 Err(err) => {
2095 tracing::warn!(
2096 path = %bundle.root_path.display(),
2097 error = %err,
2098 "historical bundle direct open failed; falling back to sqlite3 .recover"
2099 );
2100 }
2101 }
2102
2103 recover_historical_bundle_via_sqlite3(bundle)
2104}
2105
2106fn historical_bundle_counts(conn: &FrankenConnection) -> Result<(usize, usize)> {
2107 let conversations: i64 =
2108 conn.query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
2109 row.get_typed(0)
2110 })?;
2111 let messages: i64 = conn.query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
2112 row.get_typed(0)
2113 })?;
2114 Ok((
2115 usize::try_from(conversations.max(0)).unwrap_or(usize::MAX),
2116 usize::try_from(messages.max(0)).unwrap_or(usize::MAX),
2117 ))
2118}
2119
2120fn clear_seeded_runtime_meta(conn: &FrankenConnection) -> Result<()> {
2121 conn.execute(
2122 "DELETE FROM meta
2123 WHERE key LIKE 'historical_bundle_salvaged:%'
2124 OR key IN ('last_scan_ts', 'last_indexed_at', 'last_embedded_message_id')",
2125 )?;
2126 Ok(())
2127}
2128
2129fn record_historical_bundle_import(
2130 conn: &FrankenConnection,
2131 bundle: &HistoricalDatabaseBundle,
2132 method: &str,
2133 conversations_imported: usize,
2134 messages_imported: usize,
2135) -> Result<()> {
2136 let key = FrankenStorage::historical_bundle_meta_key(bundle);
2137 let value = serde_json::json!({
2138 "salvage_version": HISTORICAL_SALVAGE_LEDGER_VERSION,
2139 "path": bundle.root_path.display().to_string(),
2140 "bytes": bundle.total_bytes,
2141 "modified_at_ms": bundle.modified_at_ms,
2142 "method": method,
2143 "conversations_imported": conversations_imported,
2144 "messages_imported": messages_imported,
2145 "recorded_at_ms": FrankenStorage::now_millis(),
2146 });
2147 let value_str = serde_json::to_string(&value)?;
2148 conn.execute_compat(
2149 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
2150 fparams![key, value_str],
2151 )?;
2152 Ok(())
2153}
2154
2155fn finalize_seeded_canonical_bundle_via_rusqlite(
2156 canonical_db_path: &Path,
2157 bundle: &HistoricalDatabaseBundle,
2158 conversations_imported: usize,
2159 messages_imported: usize,
2160) -> Result<()> {
2161 let _fts_repair =
2162 ensure_fts_consistency_via_rusqlite(canonical_db_path).with_context(|| {
2163 format!(
2164 "repairing staged canonical FTS consistency before finalization: {}",
2165 canonical_db_path.display()
2166 )
2167 })?;
2168
2169 let path_str = canonical_db_path.to_string_lossy();
2170 let conn = FrankenConnection::open(path_str.as_ref()).with_context(|| {
2171 format!(
2172 "opening seeded canonical database for post-seed finalization: {}",
2173 canonical_db_path.display()
2174 )
2175 })?;
2176 conn.execute("PRAGMA busy_timeout = 30000;")
2177 .with_context(|| {
2178 format!(
2179 "configuring busy timeout for seeded canonical database {}",
2180 canonical_db_path.display()
2181 )
2182 })?;
2183 let schema_version = read_meta_schema_version(&conn)?;
2184
2185 if let Some(version) = schema_version
2186 && version < CURRENT_SCHEMA_VERSION
2187 && version != 13
2188 {
2189 anyhow::bail!(
2190 "seeded canonical bundle schema_version {version} is too old for baseline import and cannot be finalized automatically"
2191 );
2192 }
2193
2194 clear_seeded_runtime_meta(&conn)?;
2195
2196 conn.execute_compat(
2197 "INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', ?1)",
2198 fparams![CURRENT_SCHEMA_VERSION.to_string()],
2199 )?;
2200
2201 conn.execute_compat(
2202 "INSERT OR IGNORE INTO _schema_migrations(version, name) VALUES(?1, 'fts_contentless')",
2203 fparams![CURRENT_SCHEMA_VERSION],
2204 )?;
2205 record_historical_bundle_import(
2206 &conn,
2207 bundle,
2208 "baseline-bulk-sql-copy",
2209 conversations_imported,
2210 messages_imported,
2211 )?;
2212 Ok(())
2213}
2214
2215fn read_meta_schema_version(conn: &FrankenConnection) -> Result<Option<i64>> {
2216 let version: Option<String> = conn
2217 .query_row_map(
2218 "SELECT value FROM meta WHERE key = 'schema_version'",
2219 fparams![],
2220 |row| row.get_typed(0),
2221 )
2222 .optional()?;
2223 Ok(version.and_then(|raw| raw.parse::<i64>().ok()))
2224}
2225
2226#[cfg(test)]
2227fn franken_fts_schema_rows(conn: &FrankenConnection) -> Result<i64> {
2228 conn.query_row_map(
2229 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
2230 fparams![],
2231 |row| row.get_typed(0),
2232 )
2233 .context("counting sqlite_master rows for fts_messages via frankensqlite")
2234}
2235
2236#[cfg(test)]
2237fn franken_fts_limit_probe(conn: &FrankenConnection) -> bool {
2238 conn.query("SELECT rowid FROM fts_messages LIMIT 1").is_ok()
2239}
2240
2241#[cfg(test)]
2242#[allow(dead_code)]
2243pub(crate) fn probe_database_health_via_frankensqlite(
2244 db_path: &Path,
2245) -> Result<SqliteDatabaseHealthProbe> {
2246 let path_str = db_path.to_string_lossy();
2247 let conn = FrankenConnection::open(path_str.as_ref()).with_context(|| {
2248 format!(
2249 "opening frankensqlite db at {} for database health probe",
2250 db_path.display()
2251 )
2252 })?;
2253 conn.execute_batch("PRAGMA busy_timeout = 30000;")
2254 .with_context(|| {
2255 format!(
2256 "configuring busy timeout for database health probe at {}",
2257 db_path.display()
2258 )
2259 })?;
2260
2261 let schema_version = read_meta_schema_version(&conn)?;
2262 let quick_check_status: String = conn
2263 .query_row_map("PRAGMA quick_check(1)", fparams![], |row| row.get_typed(0))
2264 .with_context(|| format!("running PRAGMA quick_check(1) for {}", db_path.display()))?;
2265 let quick_check_ok = quick_check_status.trim().eq_ignore_ascii_case("ok");
2266 let fts_schema_rows = franken_fts_schema_rows(&conn)?;
2267 let fts_queryable = fts_schema_rows == 1 && franken_fts_limit_probe(&conn);
2268
2269 if !quick_check_ok {
2270 return Ok(SqliteDatabaseHealthProbe {
2271 schema_version,
2272 quick_check_ok,
2273 fts_schema_rows,
2274 fts_queryable,
2275 message_count: 0,
2276 max_message_id: 0,
2277 });
2278 }
2279
2280 let message_count: i64 = conn
2281 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
2282 row.get_typed(0)
2283 })
2284 .context("counting messages during frankensqlite database health probe")?;
2285 let max_message_id: i64 = conn
2286 .query_row_map(
2287 "SELECT COALESCE(MAX(id), 0) FROM messages",
2288 fparams![],
2289 |row| row.get_typed(0),
2290 )
2291 .context("reading max message id during frankensqlite database health probe")?;
2292
2293 Ok(SqliteDatabaseHealthProbe {
2294 schema_version,
2295 quick_check_ok,
2296 fts_schema_rows,
2297 fts_queryable,
2298 message_count,
2299 max_message_id,
2300 })
2301}
2302
2303struct StagedHistoricalSeed {
2304 tempdir: tempfile::TempDir,
2305 db_path: PathBuf,
2306}
2307
2308fn stage_historical_bundle_for_seed(
2309 canonical_db_path: &Path,
2310 bundle: &HistoricalDatabaseBundle,
2311) -> Result<StagedHistoricalSeed> {
2312 let canonical_parent = canonical_db_path.parent().unwrap_or_else(|| Path::new("."));
2313 fs::create_dir_all(canonical_parent).with_context(|| {
2314 format!(
2315 "creating canonical database directory before bulk historical seed import: {}",
2316 canonical_parent.display()
2317 )
2318 })?;
2319 let tempdir = tempfile::TempDir::new_in(canonical_parent)
2320 .context("creating temporary baseline seed directory")?;
2321 let staged_seed_db = tempdir.path().join("baseline-seed-output.db");
2322 copy_database_bundle(&bundle.root_path, &staged_seed_db)?;
2323
2324 Ok(StagedHistoricalSeed {
2325 tempdir,
2326 db_path: staged_seed_db,
2327 })
2328}
2329
2330fn promote_staged_historical_seed(
2331 canonical_db_path: &Path,
2332 staged_seed: &StagedHistoricalSeed,
2333) -> Result<()> {
2334 let canonical_backup = staged_seed
2335 .tempdir
2336 .path()
2337 .join("pre-seed-canonical-backup.db");
2338 let had_canonical = canonical_db_path.exists()
2339 || database_sidecar_path(canonical_db_path, "-wal").exists()
2340 || database_sidecar_path(canonical_db_path, "-shm").exists();
2341
2342 if had_canonical {
2343 move_database_bundle(canonical_db_path, &canonical_backup).with_context(|| {
2344 format!(
2345 "backing up canonical database before promoting staged historical seed import: {}",
2346 canonical_db_path.display()
2347 )
2348 })?;
2349 }
2350
2351 if let Err(err) =
2352 move_database_bundle(&staged_seed.db_path, canonical_db_path).with_context(|| {
2353 format!(
2354 "promoting staged historical seed database bundle {} into canonical path {}",
2355 staged_seed.db_path.display(),
2356 canonical_db_path.display()
2357 )
2358 })
2359 {
2360 if had_canonical {
2361 let _ = move_database_bundle(&canonical_backup, canonical_db_path);
2362 }
2363 return Err(err);
2364 }
2365
2366 Ok(())
2367}
2368
2369pub(crate) fn seed_canonical_from_best_historical_bundle(
2370 canonical_db_path: &Path,
2371) -> Result<Option<HistoricalSalvageOutcome>> {
2372 let ordered_bundles = discover_historical_database_bundles(canonical_db_path);
2373 let mut last_seed_error: Option<anyhow::Error> = None;
2374 for bundle in ordered_bundles
2375 .into_iter()
2376 .filter(|bundle| bundle.supports_direct_readonly)
2377 {
2378 if let Some(version) = bundle.probe.schema_version
2379 && version < 13
2380 {
2381 let err = anyhow!(
2382 "historical bundle {} schema_version {version} is too old for baseline import",
2383 bundle.root_path.display()
2384 );
2385 tracing::warn!(
2386 path = %bundle.root_path.display(),
2387 schema_version = version,
2388 "historical bundle is too old for baseline seed import"
2389 );
2390 last_seed_error = Some(err);
2391 continue;
2392 }
2393
2394 let source = open_historical_bundle_for_salvage(&bundle).with_context(|| {
2395 format!(
2396 "opening historical seed bundle {} for baseline import",
2397 bundle.root_path.display()
2398 )
2399 })?;
2400 let (conversations_imported, messages_imported) = historical_bundle_counts(&source.conn)?;
2401
2402 let staged_seed = match stage_historical_bundle_for_seed(canonical_db_path, &bundle) {
2403 Ok(staged_seed) => staged_seed,
2404 Err(err) => {
2405 tracing::warn!(
2406 path = %bundle.root_path.display(),
2407 error = %err,
2408 "bulk baseline seed staging from historical bundle failed; trying next candidate"
2409 );
2410 last_seed_error = Some(err);
2411 continue;
2412 }
2413 };
2414
2415 if let Err(err) = finalize_seeded_canonical_bundle_via_rusqlite(
2416 &staged_seed.db_path,
2417 &bundle,
2418 conversations_imported,
2419 messages_imported,
2420 ) {
2421 tracing::warn!(
2422 path = %bundle.root_path.display(),
2423 error = %err,
2424 "finalizing staged historical seed import failed; trying next candidate"
2425 );
2426 last_seed_error = Some(err);
2427 continue;
2428 }
2429
2430 if let Err(err) = promote_staged_historical_seed(canonical_db_path, &staged_seed) {
2431 tracing::warn!(
2432 path = %bundle.root_path.display(),
2433 error = %err,
2434 "promoting staged historical seed import failed; trying next candidate"
2435 );
2436 last_seed_error = Some(err);
2437 continue;
2438 }
2439
2440 tracing::info!(
2441 path = %bundle.root_path.display(),
2442 conversations_imported,
2443 messages_imported,
2444 "seeded empty canonical database from largest healthy historical bundle"
2445 );
2446
2447 return Ok(Some(HistoricalSalvageOutcome {
2448 bundles_considered: 0,
2449 bundles_imported: 1,
2450 conversations_imported,
2451 messages_imported,
2452 }));
2453 }
2454 if let Some(err) = last_seed_error {
2455 return Err(err);
2456 }
2457 Ok(None)
2458}
2459
2460fn parse_json_column(value: Option<String>) -> serde_json::Value {
2461 value
2462 .and_then(|raw| serde_json::from_str(&raw).ok())
2463 .unwrap_or(serde_json::Value::Null)
2464}
2465
2466const HISTORICAL_RAW_JSON_SENTINEL_KEY: &str = "__cass_historical_raw_json__";
2467
2468fn wrap_historical_raw_json(raw: String) -> serde_json::Value {
2469 serde_json::json!({ HISTORICAL_RAW_JSON_SENTINEL_KEY: raw })
2470}
2471
2472fn historical_raw_json(value: &serde_json::Value) -> Option<&str> {
2473 match value {
2474 serde_json::Value::Object(map) if map.len() == 1 => map
2475 .get(HISTORICAL_RAW_JSON_SENTINEL_KEY)
2476 .and_then(serde_json::Value::as_str),
2477 _ => None,
2478 }
2479}
2480
2481fn parse_historical_json_column(value: Option<String>) -> serde_json::Value {
2482 match value {
2483 Some(raw) if raw.trim().is_empty() => serde_json::Value::Null,
2484 Some(raw) => wrap_historical_raw_json(raw),
2485 None => serde_json::Value::Null,
2486 }
2487}
2488
2489fn historical_salvage_debug_enabled() -> bool {
2490 std::env::var_os("CASS_DEBUG_HISTORICAL_SALVAGE").is_some()
2491}
2492
2493#[derive(Debug, Clone, Copy)]
2494struct HistoricalImportBatchLimits {
2495 conversations: usize,
2496 messages: usize,
2497 payload_chars: usize,
2498}
2499
2500fn env_positive_usize(key: &str) -> Option<usize> {
2501 dotenvy::var(key)
2502 .ok()
2503 .and_then(|value| value.parse::<usize>().ok())
2504 .filter(|value| *value > 0)
2505}
2506
2507fn historical_import_batch_limits() -> HistoricalImportBatchLimits {
2508 let cpu_count = std::thread::available_parallelism()
2509 .map(std::num::NonZeroUsize::get)
2510 .unwrap_or(1);
2511
2512 let default_limits = if cpu_count >= 32 {
2513 HistoricalImportBatchLimits {
2514 conversations: 128,
2515 messages: 16_384,
2516 payload_chars: 12_000_000,
2517 }
2518 } else {
2519 HistoricalImportBatchLimits {
2520 conversations: 32,
2521 messages: 4_096,
2522 payload_chars: 3_000_000,
2523 }
2524 };
2525
2526 HistoricalImportBatchLimits {
2527 conversations: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_CONVERSATIONS")
2528 .unwrap_or(default_limits.conversations),
2529 messages: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_MESSAGES")
2530 .unwrap_or(default_limits.messages),
2531 payload_chars: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_CHARS")
2532 .unwrap_or(default_limits.payload_chars),
2533 }
2534}
2535
2536fn json_value_size_hint(value: &serde_json::Value) -> usize {
2537 if let Some(raw) = historical_raw_json(value) {
2538 return raw.len();
2539 }
2540 match value {
2541 serde_json::Value::Null => 0,
2542 other => serde_json::to_string(other)
2543 .map(|raw| raw.len())
2544 .unwrap_or(0),
2545 }
2546}
2547
2548fn message_payload_size_hint(message: &Message) -> usize {
2549 message
2550 .content
2551 .len()
2552 .saturating_add(json_value_size_hint(&message.extra_json))
2553}
2554
2555fn is_backup_root_name(name: &str, prefix: &str) -> bool {
2556 name.starts_with(prefix) && !name.ends_with("-wal") && !name.ends_with("-shm")
2557}
2558
2559fn has_db_sidecar_suffix(name: &str) -> bool {
2566 const SIDECAR_SUFFIXES: &[&str] = &[
2567 "-wal",
2568 "-shm",
2569 "-lock-shared",
2570 "-lock-reserved",
2571 "-lock-pending",
2572 ];
2573 SIDECAR_SUFFIXES.iter().any(|suffix| name.ends_with(suffix))
2574}
2575
2576pub const CURRENT_SCHEMA_VERSION: i64 = 20;
2578const MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION: i64 = 13;
2579
2580#[derive(Debug, Clone)]
2582pub enum SchemaCheck {
2583 Compatible,
2585 NeedsMigration,
2587 NeedsRebuild(String),
2589}
2590
2591fn schema_check_error_requires_rebuild(err: &frankensqlite::FrankenError) -> bool {
2592 matches!(
2596 err,
2597 frankensqlite::FrankenError::DatabaseCorrupt { .. }
2598 | frankensqlite::FrankenError::WalCorrupt { .. }
2599 | frankensqlite::FrankenError::NotADatabase { .. }
2600 | frankensqlite::FrankenError::ShortRead { .. }
2601 )
2602}
2603
2604fn unique_backup_path(path: &Path) -> PathBuf {
2605 static NEXT_NONCE: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
2606
2607 let timestamp = SystemTime::now()
2608 .duration_since(UNIX_EPOCH)
2609 .map(|d| d.as_nanos())
2610 .unwrap_or(0);
2611 let nonce = NEXT_NONCE.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2612 let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("db");
2613
2614 path.with_file_name(format!(
2615 "{file_name}.backup.{}.{}.{}",
2616 std::process::id(),
2617 timestamp,
2618 nonce
2619 ))
2620}
2621
2622fn vacuum_stage_backup_path(backup_path: &Path) -> PathBuf {
2623 let file_name = backup_path
2624 .file_name()
2625 .and_then(|name| name.to_str())
2626 .unwrap_or("db.backup");
2627 backup_path.with_file_name(format!(".{file_name}.vacuum-in-progress"))
2628}
2629
2630fn check_schema_compatibility(
2634 path: &Path,
2635) -> std::result::Result<SchemaCheck, frankensqlite::FrankenError> {
2636 let mut conn = open_franken_with_flags(
2637 &path.to_string_lossy(),
2638 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
2639 )?;
2640
2641 let result = (|| {
2642 let meta_exists: i32 = conn.query_row_map(
2644 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='meta'",
2645 fparams![],
2646 |row| row.get_typed(0),
2647 )?;
2648
2649 if meta_exists == 0 {
2650 let table_count: i32 = conn.query_row_map(
2653 "SELECT COUNT(*) FROM sqlite_master WHERE type='table'",
2654 fparams![],
2655 |row| row.get_typed(0),
2656 )?;
2657
2658 if table_count == 0 {
2659 return Ok(SchemaCheck::NeedsMigration);
2661 }
2662
2663 return Ok(SchemaCheck::NeedsRebuild(
2665 "Database missing schema version metadata".to_string(),
2666 ));
2667 }
2668
2669 let version: Option<i64> = conn
2671 .query_row_map(
2672 "SELECT value FROM meta WHERE key = 'schema_version'",
2673 fparams![],
2674 |row| Ok(row.get_typed::<String>(0)?.parse().ok()),
2675 )
2676 .ok()
2677 .flatten();
2678
2679 match version {
2680 Some(v) if v == SCHEMA_VERSION => Ok(SchemaCheck::Compatible),
2681 Some(v) if (MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION..SCHEMA_VERSION).contains(&v) => {
2682 Ok(SchemaCheck::NeedsMigration)
2683 }
2684 Some(v) if v > 0 && v < MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION => {
2685 Ok(SchemaCheck::NeedsRebuild(format!(
2686 "Schema version {} is too old for in-place migration; supported upgrade path starts at version {}",
2687 v, MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION
2688 )))
2689 }
2690 Some(v) => {
2691 Ok(SchemaCheck::NeedsRebuild(format!(
2693 "Schema version {} is newer than supported version {}",
2694 v, SCHEMA_VERSION
2695 )))
2696 }
2697 None => Ok(SchemaCheck::NeedsRebuild(
2698 "Schema version not found or invalid".to_string(),
2699 )),
2700 }
2701 })();
2702
2703 if let Err(close_err) = conn.close_in_place() {
2704 tracing::warn!(
2705 error = %close_err,
2706 db_path = %path.display(),
2707 "check_schema_compatibility: close_in_place failed; falling back to best-effort close"
2708 );
2709 conn.close_best_effort_in_place();
2710 }
2711
2712 result
2713}
2714
2715const SCHEMA_VERSION: i64 = CURRENT_SCHEMA_VERSION;
2716
2717#[cfg(test)]
2718const MIGRATION_V1: &str = r"
2719PRAGMA foreign_keys = ON;
2720
2721CREATE TABLE IF NOT EXISTS meta (
2722 key TEXT PRIMARY KEY,
2723 value TEXT NOT NULL
2724);
2725
2726CREATE TABLE IF NOT EXISTS agents (
2727 id INTEGER PRIMARY KEY,
2728 slug TEXT NOT NULL UNIQUE,
2729 name TEXT NOT NULL,
2730 version TEXT,
2731 kind TEXT NOT NULL,
2732 created_at INTEGER NOT NULL,
2733 updated_at INTEGER NOT NULL
2734);
2735
2736CREATE TABLE IF NOT EXISTS workspaces (
2737 id INTEGER PRIMARY KEY,
2738 path TEXT NOT NULL UNIQUE,
2739 display_name TEXT
2740);
2741
2742CREATE TABLE IF NOT EXISTS conversations (
2743 id INTEGER PRIMARY KEY,
2744 agent_id INTEGER NOT NULL REFERENCES agents(id),
2745 workspace_id INTEGER REFERENCES workspaces(id),
2746 external_id TEXT,
2747 title TEXT,
2748 source_path TEXT NOT NULL,
2749 started_at INTEGER,
2750 ended_at INTEGER,
2751 approx_tokens INTEGER,
2752 metadata_json TEXT,
2753 UNIQUE(agent_id, external_id)
2754);
2755
2756CREATE TABLE IF NOT EXISTS messages (
2757 id INTEGER PRIMARY KEY,
2758 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
2759 idx INTEGER NOT NULL,
2760 role TEXT NOT NULL,
2761 author TEXT,
2762 created_at INTEGER,
2763 content TEXT NOT NULL,
2764 extra_json TEXT,
2765 UNIQUE(conversation_id, idx)
2766);
2767
2768CREATE TABLE IF NOT EXISTS snippets (
2769 id INTEGER PRIMARY KEY,
2770 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
2771 file_path TEXT,
2772 start_line INTEGER,
2773 end_line INTEGER,
2774 language TEXT,
2775 snippet_text TEXT
2776);
2777
2778CREATE TABLE IF NOT EXISTS tags (
2779 id INTEGER PRIMARY KEY,
2780 name TEXT NOT NULL UNIQUE
2781);
2782
2783CREATE TABLE IF NOT EXISTS conversation_tags (
2784 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
2785 tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
2786 PRIMARY KEY (conversation_id, tag_id)
2787);
2788
2789CREATE INDEX IF NOT EXISTS idx_conversations_agent_started
2790 ON conversations(agent_id, started_at DESC);
2791
2792CREATE INDEX IF NOT EXISTS idx_messages_conv_idx
2793 ON messages(conversation_id, idx);
2794
2795";
2796
2797#[cfg(test)]
2798const MIGRATION_V2: &str = r"
2799CREATE VIRTUAL TABLE IF NOT EXISTS fts_messages USING fts5(
2800 content,
2801 title,
2802 agent,
2803 workspace,
2804 source_path,
2805 created_at UNINDEXED,
2806 message_id UNINDEXED,
2807 tokenize='porter'
2808);
2809INSERT INTO fts_messages(content, title, agent, workspace, source_path, created_at, message_id)
2810SELECT
2811 m.content,
2812 c.title,
2813 a.slug,
2814 w.path,
2815 c.source_path,
2816 m.created_at,
2817 m.id
2818FROM messages m
2819JOIN conversations c ON m.conversation_id = c.id
2820JOIN agents a ON c.agent_id = a.id
2821LEFT JOIN workspaces w ON c.workspace_id = w.id;
2822";
2823
2824#[cfg(test)]
2825#[allow(dead_code)]
2826const MIGRATION_V3: &str = r"
2827DROP TABLE IF EXISTS fts_messages;
2828CREATE VIRTUAL TABLE fts_messages USING fts5(
2829 content,
2830 title,
2831 agent,
2832 workspace,
2833 source_path,
2834 created_at UNINDEXED,
2835 message_id UNINDEXED,
2836 tokenize='porter'
2837);
2838INSERT INTO fts_messages(content, title, agent, workspace, source_path, created_at, message_id)
2839SELECT
2840 m.content,
2841 c.title,
2842 a.slug,
2843 w.path,
2844 c.source_path,
2845 m.created_at,
2846 m.id
2847FROM messages m
2848JOIN conversations c ON m.conversation_id = c.id
2849JOIN agents a ON c.agent_id = a.id
2850LEFT JOIN workspaces w ON c.workspace_id = w.id;
2851";
2852
2853#[cfg(test)]
2854const MIGRATION_V4: &str = r"
2855-- Sources table for tracking where conversations come from
2856CREATE TABLE IF NOT EXISTS sources (
2857 id TEXT PRIMARY KEY, -- source_id (e.g., 'local', 'work-laptop')
2858 kind TEXT NOT NULL, -- 'local', 'ssh', etc.
2859 host_label TEXT, -- display label
2860 machine_id TEXT, -- optional stable machine id
2861 platform TEXT, -- 'macos', 'linux', 'windows'
2862 config_json TEXT, -- JSON blob for extra config (SSH params, path rewrites)
2863 created_at INTEGER NOT NULL,
2864 updated_at INTEGER NOT NULL
2865);
2866
2867-- Bootstrap: Insert the default 'local' source
2868INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
2869VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
2870";
2871
2872#[cfg(test)]
2873const MIGRATION_V5: &str = r"
2874-- Add provenance columns to conversations table
2875-- SQLite cannot alter unique constraints, so we need to recreate the table
2876
2877-- Create new table with provenance columns and updated unique constraint
2878CREATE TABLE conversations_new (
2879 id INTEGER PRIMARY KEY,
2880 agent_id INTEGER NOT NULL REFERENCES agents(id),
2881 workspace_id INTEGER REFERENCES workspaces(id),
2882 source_id TEXT NOT NULL DEFAULT 'local' REFERENCES sources(id),
2883 external_id TEXT,
2884 title TEXT,
2885 source_path TEXT NOT NULL,
2886 started_at INTEGER,
2887 ended_at INTEGER,
2888 approx_tokens INTEGER,
2889 metadata_json TEXT,
2890 origin_host TEXT,
2891 UNIQUE(source_id, agent_id, external_id)
2892);
2893
2894-- Copy data from old table (all existing conversations get source_id='local')
2895INSERT INTO conversations_new (id, agent_id, workspace_id, source_id, external_id, title,
2896 source_path, started_at, ended_at, approx_tokens, metadata_json, origin_host)
2897SELECT id, agent_id, workspace_id, 'local', external_id, title,
2898 source_path, started_at, ended_at, approx_tokens, metadata_json, NULL
2899FROM conversations;
2900
2901-- Drop old table and rename new
2902DROP TABLE conversations;
2903ALTER TABLE conversations_new RENAME TO conversations;
2904
2905-- Recreate indexes
2906CREATE INDEX IF NOT EXISTS idx_conversations_agent_started ON conversations(agent_id, started_at DESC);
2907CREATE INDEX IF NOT EXISTS idx_conversations_source_id ON conversations(source_id);
2908";
2909
2910#[cfg(test)]
2911const MIGRATION_V6: &str = r"
2912-- Optimize lookup by source_path (used by TUI detail view)
2913CREATE INDEX IF NOT EXISTS idx_conversations_source_path ON conversations(source_path);
2914";
2915
2916#[cfg(test)]
2917const MIGRATION_V7: &str = r"
2918-- Add binary columns for MessagePack serialization (Opt 3.1)
2919-- Binary format is 50-70% smaller than JSON and faster to parse
2920ALTER TABLE conversations ADD COLUMN metadata_bin BLOB;
2921ALTER TABLE messages ADD COLUMN extra_bin BLOB;
2922";
2923
2924#[cfg(test)]
2925const MIGRATION_V8: &str = r"
2926-- Opt 3.2: Daily stats materialized table for O(1) time-range histograms
2927-- Provides fast aggregated queries for stats/dashboard without full table scans
2928
2929CREATE TABLE IF NOT EXISTS daily_stats (
2930 day_id INTEGER NOT NULL, -- Days since 2020-01-01 (Unix epoch + offset)
2931 agent_slug TEXT NOT NULL, -- 'all' for totals, or specific agent slug
2932 source_id TEXT NOT NULL DEFAULT 'all', -- 'all' for totals, or specific source
2933 session_count INTEGER NOT NULL DEFAULT 0,
2934 message_count INTEGER NOT NULL DEFAULT 0,
2935 total_chars INTEGER NOT NULL DEFAULT 0,
2936 last_updated INTEGER NOT NULL,
2937 PRIMARY KEY (day_id, agent_slug, source_id)
2938);
2939
2940CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
2941CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
2942";
2943
2944#[cfg(test)]
2945const MIGRATION_V9: &str = r"
2946-- Background embedding jobs tracking table
2947CREATE TABLE IF NOT EXISTS embedding_jobs (
2948 id INTEGER PRIMARY KEY AUTOINCREMENT,
2949 db_path TEXT NOT NULL,
2950 model_id TEXT NOT NULL,
2951 status TEXT NOT NULL DEFAULT 'pending',
2952 total_docs INTEGER NOT NULL DEFAULT 0,
2953 completed_docs INTEGER NOT NULL DEFAULT 0,
2954 error_message TEXT,
2955 created_at TEXT NOT NULL DEFAULT (datetime('now')),
2956 started_at TEXT,
2957 completed_at TEXT
2958);
2959
2960-- Only one pending or running job per (db_path, model_id) at a time.
2961-- Multiple completed/failed/cancelled jobs are allowed for history.
2962CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
2963ON embedding_jobs(db_path, model_id)
2964WHERE status IN ('pending', 'running');
2965";
2966
2967#[cfg(test)]
2968const MIGRATION_V10: &str = r"
2969-- Token analytics: per-message token usage ledger
2970CREATE TABLE IF NOT EXISTS token_usage (
2971 id INTEGER PRIMARY KEY AUTOINCREMENT,
2972 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
2973 conversation_id INTEGER NOT NULL,
2974 agent_id INTEGER NOT NULL,
2975 workspace_id INTEGER,
2976 source_id TEXT NOT NULL DEFAULT 'local',
2977
2978 -- Timing
2979 timestamp_ms INTEGER NOT NULL,
2980 day_id INTEGER NOT NULL,
2981
2982 -- Model identification
2983 model_name TEXT,
2984 model_family TEXT,
2985 model_tier TEXT,
2986 service_tier TEXT,
2987 provider TEXT,
2988
2989 -- Token counts (nullable — not all agents provide all fields)
2990 input_tokens INTEGER,
2991 output_tokens INTEGER,
2992 cache_read_tokens INTEGER,
2993 cache_creation_tokens INTEGER,
2994 thinking_tokens INTEGER,
2995 total_tokens INTEGER,
2996
2997 -- Cost estimation
2998 estimated_cost_usd REAL,
2999
3000 -- Message context
3001 role TEXT NOT NULL,
3002 content_chars INTEGER NOT NULL,
3003 has_tool_calls INTEGER NOT NULL DEFAULT 0,
3004 tool_call_count INTEGER NOT NULL DEFAULT 0,
3005
3006 -- Data quality
3007 data_source TEXT NOT NULL DEFAULT 'api',
3008
3009 UNIQUE(message_id)
3010);
3011
3012CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
3013CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
3014CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
3015CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
3016CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
3017
3018-- Token analytics: pre-aggregated daily rollups
3019CREATE TABLE IF NOT EXISTS token_daily_stats (
3020 day_id INTEGER NOT NULL,
3021 agent_slug TEXT NOT NULL,
3022 source_id TEXT NOT NULL DEFAULT 'all',
3023 model_family TEXT NOT NULL DEFAULT 'all',
3024
3025 api_call_count INTEGER NOT NULL DEFAULT 0,
3026 user_message_count INTEGER NOT NULL DEFAULT 0,
3027 assistant_message_count INTEGER NOT NULL DEFAULT 0,
3028 tool_message_count INTEGER NOT NULL DEFAULT 0,
3029
3030 total_input_tokens INTEGER NOT NULL DEFAULT 0,
3031 total_output_tokens INTEGER NOT NULL DEFAULT 0,
3032 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
3033 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
3034 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
3035 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
3036
3037 total_content_chars INTEGER NOT NULL DEFAULT 0,
3038 total_tool_calls INTEGER NOT NULL DEFAULT 0,
3039
3040 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
3041
3042 session_count INTEGER NOT NULL DEFAULT 0,
3043
3044 last_updated INTEGER NOT NULL,
3045
3046 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
3047);
3048
3049CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
3050CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
3051
3052-- Model pricing lookup table
3053CREATE TABLE IF NOT EXISTS model_pricing (
3054 model_pattern TEXT NOT NULL,
3055 provider TEXT NOT NULL,
3056 input_cost_per_mtok REAL NOT NULL,
3057 output_cost_per_mtok REAL NOT NULL,
3058 cache_read_cost_per_mtok REAL,
3059 cache_creation_cost_per_mtok REAL,
3060 effective_date TEXT NOT NULL,
3061 PRIMARY KEY (model_pattern, effective_date)
3062);
3063
3064-- Seed with current pricing (as of 2026-02)
3065INSERT OR IGNORE INTO model_pricing VALUES
3066 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
3067 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
3068 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
3069 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
3070 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
3071 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
3072 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
3073 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
3074 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
3075 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
3076
3077-- Extend conversations table with token summary columns
3078ALTER TABLE conversations ADD COLUMN total_input_tokens INTEGER;
3079ALTER TABLE conversations ADD COLUMN total_output_tokens INTEGER;
3080ALTER TABLE conversations ADD COLUMN total_cache_read_tokens INTEGER;
3081ALTER TABLE conversations ADD COLUMN total_cache_creation_tokens INTEGER;
3082ALTER TABLE conversations ADD COLUMN grand_total_tokens INTEGER;
3083ALTER TABLE conversations ADD COLUMN estimated_cost_usd REAL;
3084ALTER TABLE conversations ADD COLUMN primary_model TEXT;
3085ALTER TABLE conversations ADD COLUMN api_call_count INTEGER;
3086ALTER TABLE conversations ADD COLUMN tool_call_count INTEGER;
3087ALTER TABLE conversations ADD COLUMN user_message_count INTEGER;
3088ALTER TABLE conversations ADD COLUMN assistant_message_count INTEGER;
3089";
3090
3091const MIGRATION_V14: &str = r"
3092-- Switch FTS5 from internal-content to contentless mode (CASS #163).
3093-- Drop the old V13 internal-content fts_messages first so that
3094-- sqlite_schema does not contain two conflicting CREATE VIRTUAL TABLE
3095-- entries, which makes the database completely unreadable.
3096-- The current contentless table is recreated lazily after open() only when the
3097-- frankensqlite FTS consistency check finds it missing or malformed.
3098DROP TABLE IF EXISTS fts_messages;
3099";
3100
3101const MIGRATION_V15_TAIL_STATE_TABLE: &str = r"
3102CREATE TABLE IF NOT EXISTS conversation_tail_state (
3103 -- Deliberately no FOREIGN KEY: this hot row is maintained by insert/append
3104 -- paths, and FK metadata keeps frankensqlite off the direct rowid update path.
3105 conversation_id INTEGER PRIMARY KEY,
3106 ended_at INTEGER,
3107 last_message_idx INTEGER,
3108 last_message_created_at INTEGER
3109);
3110";
3111
3112const MIGRATION_V16: &str = r"
3113-- UNIQUE(conversation_id, idx) already creates sqlite_autoindex_messages_1,
3114-- which covers the same lookup/order key as idx_messages_conv_idx. Keeping both
3115-- doubles message insert index maintenance on the hot indexing path.
3116DROP INDEX IF EXISTS idx_messages_conv_idx;
3117";
3118
3119const MIGRATION_V17: &str = r"
3120-- Drop the global messages(created_at) secondary index from the ingest hot
3121-- path. Search/time filters are served by the derived search layer and
3122-- conversation/analytics indexes, while this index is maintained on every
3123-- message insert.
3124DROP INDEX IF EXISTS idx_messages_created;
3125";
3126
3127const MIGRATION_V18: &str = r"
3128-- Move append-tail state out of the wide, indexed conversations row. The hot
3129-- append path updates this cache for every appended conversation; keeping it in
3130-- a tiny rowid table avoids rewriting the large conversation record.
3131CREATE TABLE IF NOT EXISTS conversation_tail_state (
3132 -- Deliberately no FOREIGN KEY: this hot row is maintained by insert/append
3133 -- paths, and FK metadata keeps frankensqlite off the direct rowid update path.
3134 conversation_id INTEGER PRIMARY KEY,
3135 ended_at INTEGER,
3136 last_message_idx INTEGER,
3137 last_message_created_at INTEGER
3138);
3139
3140INSERT OR REPLACE INTO conversation_tail_state (
3141 conversation_id, ended_at, last_message_idx, last_message_created_at
3142)
3143SELECT id, ended_at, last_message_idx, last_message_created_at
3144FROM conversations
3145WHERE ended_at IS NOT NULL
3146 OR last_message_idx IS NOT NULL
3147 OR last_message_created_at IS NOT NULL;
3148";
3149
3150const MIGRATION_V19: &str = r"
3151-- Materialize external conversation provenance into one compact lookup key.
3152-- This keeps the hot append/new-conversation probe on a single primary-key
3153-- lookup instead of a composite conversations-table predicate.
3154CREATE TABLE IF NOT EXISTS conversation_external_lookup (
3155 lookup_key TEXT PRIMARY KEY,
3156 conversation_id INTEGER NOT NULL
3157);
3158
3159INSERT OR REPLACE INTO conversation_external_lookup (lookup_key, conversation_id)
3160SELECT
3161 CAST(length(source_id) AS TEXT) || ':' || source_id || ':' ||
3162 CAST(agent_id AS TEXT) || ':' ||
3163 CAST(length(external_id) AS TEXT) || ':' || external_id,
3164 id
3165FROM conversations
3166WHERE external_id IS NOT NULL;
3167";
3168
3169const MIGRATION_V20: &str = r"
3170-- Fuse external conversation lookup with append-tail state. Append-heavy
3171-- workloads can resolve both the conversation id and tail plan from one
3172-- primary-key probe.
3173CREATE TABLE IF NOT EXISTS conversation_external_tail_lookup (
3174 lookup_key TEXT PRIMARY KEY,
3175 conversation_id INTEGER NOT NULL,
3176 ended_at INTEGER,
3177 last_message_idx INTEGER,
3178 last_message_created_at INTEGER
3179);
3180
3181INSERT OR REPLACE INTO conversation_external_tail_lookup (
3182 lookup_key,
3183 conversation_id,
3184 ended_at,
3185 last_message_idx,
3186 last_message_created_at
3187)
3188SELECT
3189 CAST(length(c.source_id) AS TEXT) || ':' || c.source_id || ':' ||
3190 CAST(c.agent_id AS TEXT) || ':' ||
3191 CAST(length(c.external_id) AS TEXT) || ':' || c.external_id,
3192 c.id,
3193 ts.ended_at,
3194 ts.last_message_idx,
3195 ts.last_message_created_at
3196FROM conversations c
3197LEFT JOIN conversation_tail_state ts ON ts.conversation_id = c.id
3198WHERE c.external_id IS NOT NULL;
3199";
3200
3201#[derive(Debug, Clone)]
3203pub struct EmbeddingJobRow {
3204 pub id: i64,
3205 pub db_path: String,
3206 pub model_id: String,
3207 pub status: String,
3208 pub total_docs: i64,
3209 pub completed_docs: i64,
3210 pub error_message: Option<String>,
3211 pub created_at: String,
3212 pub started_at: Option<String>,
3213 pub completed_at: Option<String>,
3214}
3215
3216#[derive(Debug, Clone)]
3223pub struct LexicalRebuildConversationRow {
3224 pub id: Option<i64>,
3225 pub agent_slug: String,
3226 pub workspace: Option<PathBuf>,
3227 pub external_id: Option<String>,
3228 pub title: Option<String>,
3229 pub source_path: PathBuf,
3230 pub started_at: Option<i64>,
3231 pub ended_at: Option<i64>,
3232 pub source_id: String,
3233 pub origin_host: Option<String>,
3234}
3235
3236#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3239pub struct LexicalRebuildConversationFootprintRow {
3240 pub conversation_id: i64,
3241 pub message_count: usize,
3242 pub message_bytes: usize,
3243}
3244
3245pub(crate) const LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE: usize = 4 * 1024;
3246const LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT: usize = 64;
3247
3248fn lexical_rebuild_tail_metadata_coverage_is_sufficient(
3249 total_conversations: usize,
3250 covered_conversations: usize,
3251) -> bool {
3252 total_conversations == 0
3253 || total_conversations.saturating_sub(covered_conversations.min(total_conversations))
3254 <= LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT
3255}
3256
3257fn lexical_rebuild_message_count_from_tail_idx(last_message_idx: Option<i64>) -> Option<usize> {
3258 let last_message_idx = u64::try_from(last_message_idx?).ok()?;
3259 let high_water = last_message_idx.checked_add(1)?;
3260 usize::try_from(high_water).ok()
3261}
3262
3263fn lexical_rebuild_conversation_footprint_from_count(
3264 conversation_id: i64,
3265 message_count: usize,
3266) -> LexicalRebuildConversationFootprintRow {
3267 LexicalRebuildConversationFootprintRow {
3268 conversation_id,
3269 message_count,
3270 message_bytes: message_count
3271 .saturating_mul(LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE),
3272 }
3273}
3274
3275#[derive(Debug, Clone)]
3277pub struct LexicalRebuildMessageRow {
3278 pub conversation_id: i64,
3279 pub id: i64,
3280 pub idx: i64,
3281 pub role: String,
3282 pub author: Option<String>,
3283 pub created_at: Option<i64>,
3284 pub content: String,
3285}
3286
3287#[derive(Debug, Clone, PartialEq, Eq)]
3291pub struct LexicalRebuildGroupedMessageRow {
3292 pub idx: i64,
3293 pub is_tool_role: bool,
3294 pub created_at: Option<i64>,
3295 pub content: String,
3296}
3297
3298pub type LexicalRebuildGroupedMessageRows = SmallVec<[LexicalRebuildGroupedMessageRow; 32]>;
3299
3300pub type SqliteStorage = FrankenStorage;
3302
3303pub struct FrankenStorage {
3305 conn: FrankenConnection,
3306 db_path: PathBuf,
3307 ephemeral_writer_preflight_verified: AtomicBool,
3308 index_writer_checkpoint_pages: AtomicI64,
3309 index_writer_busy_timeout_ms: AtomicU64,
3310 cached_ephemeral_writer: parking_lot::Mutex<CachedEphemeralWriter>,
3311 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3312 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3313 ensured_conversation_sources: Arc<parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>>,
3314 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3315 fts_messages_present_cache: AtomicI8,
3316}
3317
3318const DEFAULT_WAL_AUTOCHECKPOINT_PAGES: i64 = 4096;
3322const UNSET_INDEX_WRITER_CHECKPOINT_PAGES: i64 = i64::MIN;
3323const UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS: u64 = 0;
3324const FTS_MESSAGES_PRESENT_UNKNOWN: i8 = 0;
3325const FTS_MESSAGES_PRESENT_ABSENT: i8 = 1;
3326const FTS_MESSAGES_PRESENT_PRESENT: i8 = 2;
3327
3328enum CachedEphemeralWriter {
3329 Uninitialized,
3330 Cached(Box<SendFrankenConnection>),
3331 InUse,
3332}
3333
3334#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3335struct EnsuredAgentKey {
3336 slug: String,
3337 name: String,
3338 version: Option<String>,
3339 kind: String,
3340}
3341
3342impl EnsuredAgentKey {
3343 fn from_agent(agent: &Agent) -> Self {
3344 Self {
3345 slug: agent.slug.clone(),
3346 name: agent.name.clone(),
3347 version: agent.version.clone(),
3348 kind: agent_kind_str(agent.kind.clone()),
3349 }
3350 }
3351}
3352
3353#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3354struct EnsuredWorkspaceKey {
3355 path: String,
3356 display_name: Option<String>,
3357}
3358
3359impl EnsuredWorkspaceKey {
3360 fn new(path: String, display_name: Option<&str>) -> Self {
3361 Self {
3362 path,
3363 display_name: display_name.map(str::to_owned),
3364 }
3365 }
3366}
3367
3368#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3369struct EnsuredConversationSourceKey {
3370 id: String,
3371 kind: SourceKind,
3372 host_label: Option<String>,
3373}
3374
3375impl EnsuredConversationSourceKey {
3376 fn from_source(source: &Source) -> Self {
3377 Self {
3378 id: source.id.clone(),
3379 kind: source.kind,
3380 host_label: source.host_label.clone(),
3381 }
3382 }
3383}
3384
3385#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3386struct EnsuredDailyStatsKey {
3387 day_id: i64,
3388 agent_slug: String,
3389 source_id: String,
3390}
3391
3392impl EnsuredDailyStatsKey {
3393 fn new(day_id: i64, agent_slug: &str, source_id: &str) -> Self {
3394 Self {
3395 day_id,
3396 agent_slug: agent_slug.to_owned(),
3397 source_id: source_id.to_owned(),
3398 }
3399 }
3400}
3401
3402const AUTOCOMMIT_RETAIN_OFF_PRAGMAS: [&str; 2] = [
3403 "PRAGMA fsqlite.autocommit_retain = OFF;",
3404 "PRAGMA autocommit_retain = OFF;",
3405];
3406
3407fn disable_autocommit_retain<E>(
3408 mut execute: impl FnMut(&'static str) -> std::result::Result<(), E>,
3409) -> Result<&'static str>
3410where
3411 E: std::fmt::Display,
3412{
3413 let mut failures = Vec::new();
3414 for pragma in AUTOCOMMIT_RETAIN_OFF_PRAGMAS {
3415 match execute(pragma) {
3416 Ok(()) => return Ok(pragma),
3417 Err(err) => {
3418 let error = err.to_string();
3419 tracing::debug!(
3420 %pragma,
3421 error = %error,
3422 "autocommit_retain PRAGMA variant not supported"
3423 );
3424 failures.push(format!("{pragma}: {error}"));
3425 }
3426 }
3427 }
3428
3429 Err(anyhow!(
3430 "failed to disable autocommit_retain on frankensqlite connection; \
3431 refusing to keep a long-lived MVCC connection that may accumulate \
3432 unbounded write snapshots. Upgrade frankensqlite to a version that \
3433 supports one of these PRAGMAs or use a short-lived connection path. \
3434 attempts: {}",
3435 failures.join("; ")
3436 ))
3437}
3438
3439impl FrankenStorage {
3440 fn new(conn: FrankenConnection, db_path: PathBuf) -> Self {
3441 Self::new_with_shared_caches(
3442 conn,
3443 db_path,
3444 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3445 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3446 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3447 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3448 )
3449 }
3450
3451 fn new_with_shared_caches(
3452 conn: FrankenConnection,
3453 db_path: PathBuf,
3454 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3455 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3456 ensured_conversation_sources: Arc<
3457 parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>,
3458 >,
3459 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3460 ) -> Self {
3461 Self {
3462 conn,
3463 db_path,
3464 ephemeral_writer_preflight_verified: AtomicBool::new(false),
3465 index_writer_checkpoint_pages: AtomicI64::new(UNSET_INDEX_WRITER_CHECKPOINT_PAGES),
3466 index_writer_busy_timeout_ms: AtomicU64::new(UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS),
3467 cached_ephemeral_writer: parking_lot::Mutex::new(CachedEphemeralWriter::Uninitialized),
3468 ensured_agents,
3469 ensured_workspaces,
3470 ensured_conversation_sources,
3471 ensured_daily_stats_keys,
3472 fts_messages_present_cache: AtomicI8::new(FTS_MESSAGES_PRESENT_UNKNOWN),
3473 }
3474 }
3475
3476 fn apply_open_stage_busy_timeout(&self) {
3477 if let Err(err) = self.conn.execute("PRAGMA busy_timeout = 5000;") {
3478 tracing::debug!(
3479 error = %err,
3480 "failed to apply open-stage busy_timeout before migrations"
3481 );
3482 }
3483 }
3484
3485 pub fn open(path: &Path) -> Result<Self> {
3491 if let Some(parent) = path.parent() {
3492 fs::create_dir_all(parent)
3493 .with_context(|| format!("creating db directory {}", parent.display()))?;
3494 }
3495
3496 let path_str = path.to_string_lossy().to_string();
3497 let _doctor_guard =
3498 acquire_doctor_mutation_db_open_guard(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)?;
3499 let conn = FrankenConnection::open(&path_str)
3500 .with_context(|| format!("opening frankensqlite db at {}", path.display()))?;
3501 let storage = Self::new(conn, path.to_path_buf());
3502 storage.apply_open_stage_busy_timeout();
3503 storage.run_migrations()?;
3504 storage.repair_missing_current_schema_objects()?;
3505 storage.apply_config()?;
3506 Ok(storage)
3507 }
3508
3509 pub fn open_writer(path: &Path) -> Result<Self> {
3515 Self::open_writer_with_shared_caches(
3516 path,
3517 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3518 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3519 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3520 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3521 )
3522 }
3523
3524 fn open_writer_with_shared_caches(
3525 path: &Path,
3526 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3527 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3528 ensured_conversation_sources: Arc<
3529 parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>,
3530 >,
3531 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3532 ) -> Result<Self> {
3533 let path_str = path.to_string_lossy().to_string();
3534 let _doctor_guard =
3535 acquire_doctor_mutation_db_open_guard(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)?;
3536 let conn = FrankenConnection::open(&path_str)
3537 .with_context(|| format!("opening frankensqlite writer at {}", path.display()))?;
3538 let storage = Self::new_with_shared_caches(
3539 conn,
3540 path.to_path_buf(),
3541 ensured_agents,
3542 ensured_workspaces,
3543 ensured_conversation_sources,
3544 ensured_daily_stats_keys,
3545 );
3546 storage.apply_config()?;
3547 Ok(storage)
3548 }
3549
3550 pub(crate) fn acquire_cached_ephemeral_writer(&self) -> Result<(Self, bool)> {
3551 let mut cached = self.cached_ephemeral_writer.lock();
3552 match std::mem::replace(&mut *cached, CachedEphemeralWriter::InUse) {
3553 CachedEphemeralWriter::Cached(conn) => {
3554 let (conn, checkpoint_pages, busy_timeout_ms) = (*conn).into_parts();
3555 let writer = Self::new_with_shared_caches(
3556 conn,
3557 self.db_path.clone(),
3558 Arc::clone(&self.ensured_agents),
3559 Arc::clone(&self.ensured_workspaces),
3560 Arc::clone(&self.ensured_conversation_sources),
3561 Arc::clone(&self.ensured_daily_stats_keys),
3562 );
3563 writer
3564 .index_writer_checkpoint_pages
3565 .store(checkpoint_pages, Ordering::Relaxed);
3566 writer
3567 .index_writer_busy_timeout_ms
3568 .store(busy_timeout_ms, Ordering::Relaxed);
3569 Ok((writer, true))
3570 }
3571 CachedEphemeralWriter::Uninitialized => {
3572 drop(cached);
3573 match Self::open_writer_with_shared_caches(
3574 &self.db_path,
3575 Arc::clone(&self.ensured_agents),
3576 Arc::clone(&self.ensured_workspaces),
3577 Arc::clone(&self.ensured_conversation_sources),
3578 Arc::clone(&self.ensured_daily_stats_keys),
3579 ) {
3580 Ok(writer) => Ok((writer, true)),
3581 Err(err) => {
3582 let mut cached = self.cached_ephemeral_writer.lock();
3583 if matches!(&*cached, CachedEphemeralWriter::InUse) {
3584 *cached = CachedEphemeralWriter::Uninitialized;
3585 }
3586 Err(err)
3587 }
3588 }
3589 }
3590 CachedEphemeralWriter::InUse => {
3591 *cached = CachedEphemeralWriter::InUse;
3592 drop(cached);
3593 Ok((
3594 Self::open_writer_with_shared_caches(
3595 &self.db_path,
3596 Arc::clone(&self.ensured_agents),
3597 Arc::clone(&self.ensured_workspaces),
3598 Arc::clone(&self.ensured_conversation_sources),
3599 Arc::clone(&self.ensured_daily_stats_keys),
3600 )?,
3601 false,
3602 ))
3603 }
3604 }
3605 }
3606
3607 pub(crate) fn release_cached_ephemeral_writer(&self, writer: Self) {
3608 let checkpoint_pages = writer.index_writer_checkpoint_pages.load(Ordering::Relaxed);
3609 let busy_timeout_ms = writer.index_writer_busy_timeout_ms.load(Ordering::Relaxed);
3610 let conn = writer.into_raw();
3611 let mut cached = self.cached_ephemeral_writer.lock();
3612 debug_assert!(
3613 matches!(&*cached, CachedEphemeralWriter::InUse),
3614 "cached ephemeral writer state should be in-use when releasing"
3615 );
3616 *cached = CachedEphemeralWriter::Cached(Box::new(
3617 SendFrankenConnection::new_with_index_writer_state(
3618 conn,
3619 checkpoint_pages,
3620 busy_timeout_ms,
3621 ),
3622 ));
3623 }
3624
3625 pub(crate) fn discard_cached_ephemeral_writer(&self, mut writer: Self) {
3626 writer.close_best_effort_in_place();
3627 let mut cached = self.cached_ephemeral_writer.lock();
3628 if matches!(&*cached, CachedEphemeralWriter::InUse) {
3629 *cached = CachedEphemeralWriter::Uninitialized;
3630 }
3631 }
3632
3633 fn cached_agent_id(&self, key: &EnsuredAgentKey) -> Option<i64> {
3634 self.ensured_agents.lock().get(key).copied()
3635 }
3636
3637 fn mark_agent_ensured(&self, key: EnsuredAgentKey, id: i64) {
3638 self.ensured_agents.lock().insert(key, id);
3639 }
3640
3641 fn cached_workspace_id(&self, key: &EnsuredWorkspaceKey) -> Option<i64> {
3642 self.ensured_workspaces.lock().get(key).copied()
3643 }
3644
3645 fn mark_workspace_ensured(&self, key: EnsuredWorkspaceKey, id: i64) {
3646 self.ensured_workspaces.lock().insert(key, id);
3647 }
3648
3649 fn conversation_source_already_ensured(&self, key: &EnsuredConversationSourceKey) -> bool {
3650 self.ensured_conversation_sources.lock().contains(key)
3651 }
3652
3653 fn mark_conversation_source_ensured(&self, key: EnsuredConversationSourceKey) {
3654 self.ensured_conversation_sources.lock().insert(key);
3655 }
3656
3657 fn daily_stats_key_already_ensured(&self, key: &EnsuredDailyStatsKey) -> bool {
3658 self.ensured_daily_stats_keys.lock().contains(key)
3659 }
3660
3661 fn daily_stats_keys_already_ensured(&self, keys: &[EnsuredDailyStatsKey; 4]) -> bool {
3662 let ensured = self.ensured_daily_stats_keys.lock();
3663 keys.iter().all(|key| ensured.contains(key))
3664 }
3665
3666 fn mark_daily_stats_key_ensured(&self, key: EnsuredDailyStatsKey) {
3667 self.ensured_daily_stats_keys.lock().insert(key);
3668 }
3669
3670 fn fts_messages_present_cached(&self, tx: &FrankenTransaction<'_>) -> bool {
3671 match self.fts_messages_present_cache.load(Ordering::Acquire) {
3672 FTS_MESSAGES_PRESENT_PRESENT => return true,
3673 FTS_MESSAGES_PRESENT_ABSENT => return false,
3674 _ => {}
3675 }
3676
3677 let present = tx
3678 .query_row_map(
3679 "SELECT COUNT(*) FROM sqlite_master
3680 WHERE name = 'fts_messages'
3681 AND rootpage > 0",
3682 fparams![],
3683 |row| row.get_typed::<i64>(0),
3684 )
3685 .map(|count| count > 0)
3686 .unwrap_or_else(|err| {
3687 tracing::debug!(
3688 error = %err,
3689 "failed to probe fts_messages presence; skipping db-resident FTS maintenance"
3690 );
3691 false
3692 });
3693 self.set_fts_messages_present_cache(present);
3694 present
3695 }
3696
3697 fn set_fts_messages_present_cache(&self, present: bool) {
3698 self.fts_messages_present_cache.store(
3699 if present {
3700 FTS_MESSAGES_PRESENT_PRESENT
3701 } else {
3702 FTS_MESSAGES_PRESENT_ABSENT
3703 },
3704 Ordering::Release,
3705 );
3706 }
3707
3708 fn invalidate_fts_messages_present_cache(&self) {
3709 self.fts_messages_present_cache
3710 .store(FTS_MESSAGES_PRESENT_UNKNOWN, Ordering::Release);
3711 }
3712
3713 fn invalidate_conversation_source_cache(&self, source_id: &str) {
3714 self.ensured_conversation_sources
3715 .lock()
3716 .retain(|key| key.id != source_id);
3717 }
3718
3719 fn close_cached_ephemeral_writer_best_effort_in_place(&mut self) {
3720 let cached = self.cached_ephemeral_writer.get_mut();
3721 if let CachedEphemeralWriter::Cached(conn) =
3722 std::mem::replace(cached, CachedEphemeralWriter::Uninitialized)
3723 {
3724 let mut conn = conn;
3725 conn.0.close_best_effort_in_place();
3726 }
3727 }
3728
3729 fn close_cached_ephemeral_writer_without_checkpoint_in_place(&mut self) -> Result<()> {
3730 let cached = self.cached_ephemeral_writer.get_mut();
3731 match std::mem::replace(cached, CachedEphemeralWriter::Uninitialized) {
3732 CachedEphemeralWriter::Cached(mut conn) => conn
3733 .0
3734 .close_without_checkpoint_in_place()
3735 .with_context(|| "closing cached frankensqlite writer without final checkpoint"),
3736 CachedEphemeralWriter::Uninitialized | CachedEphemeralWriter::InUse => Ok(()),
3737 }
3738 }
3739
3740 pub fn open_readonly(path: &Path) -> Result<Self> {
3742 Self::open_readonly_with_doctor_lock_timeout(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)
3743 }
3744
3745 pub fn open_readonly_with_doctor_lock_timeout(path: &Path, timeout: Duration) -> Result<Self> {
3750 let path_str = path.to_string_lossy().to_string();
3751 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
3752 let conn = open_franken_with_flags(&path_str, FrankenOpenFlags::SQLITE_OPEN_READ_ONLY)
3753 .with_context(|| format!("opening frankensqlite db readonly at {}", path.display()))?;
3754 let storage = Self::new(conn, path.to_path_buf());
3755 storage.apply_readonly_config()?;
3756 Ok(storage)
3757 }
3758
3759 pub fn close(self) -> Result<()> {
3760 let mut this = self;
3761 this.close_cached_ephemeral_writer_best_effort_in_place();
3762 this.conn
3763 .close()
3764 .with_context(|| "closing frankensqlite connection")
3765 }
3766
3767 pub fn close_without_checkpoint(self) -> Result<()> {
3768 let mut this = self;
3769 this.close_cached_ephemeral_writer_without_checkpoint_in_place()?;
3770 this.conn
3771 .close_without_checkpoint()
3772 .with_context(|| "closing frankensqlite connection without final checkpoint")
3773 }
3774
3775 pub fn close_best_effort_in_place(&mut self) {
3776 self.close_cached_ephemeral_writer_best_effort_in_place();
3777 self.conn.close_best_effort_in_place();
3778 }
3779
3780 pub fn close_without_checkpoint_in_place(&mut self) -> Result<()> {
3781 self.close_cached_ephemeral_writer_without_checkpoint_in_place()?;
3782 self.conn
3783 .close_without_checkpoint_in_place()
3784 .with_context(|| "closing frankensqlite connection without final checkpoint")
3785 }
3786
3787 pub fn raw(&self) -> &FrankenConnection {
3789 &self.conn
3790 }
3791
3792 pub fn into_raw(self) -> FrankenConnection {
3795 let mut this = self;
3796 this.close_cached_ephemeral_writer_best_effort_in_place();
3797 this.conn
3798 }
3799
3800 pub fn apply_config(&self) -> Result<()> {
3807 self.conn
3811 .execute("PRAGMA journal_mode = WAL;")
3812 .with_context(|| "setting journal_mode")?;
3813 self.conn
3814 .execute("PRAGMA synchronous = NORMAL;")
3815 .with_context(|| "setting synchronous")?;
3816
3817 self.conn
3819 .execute("PRAGMA cache_size = -65536;")
3820 .with_context(|| "setting cache_size")?;
3821
3822 self.conn
3824 .execute("PRAGMA foreign_keys = ON;")
3825 .with_context(|| "setting foreign_keys")?;
3826
3827 self.conn
3829 .execute("PRAGMA busy_timeout = 5000;")
3830 .with_context(|| "setting busy_timeout")?;
3831
3832 let checkpoint_pragma =
3840 format!("PRAGMA wal_autocheckpoint = {DEFAULT_WAL_AUTOCHECKPOINT_PAGES};");
3841 let _ = self.conn.execute(&checkpoint_pragma);
3842 self.index_writer_checkpoint_pages
3843 .store(DEFAULT_WAL_AUTOCHECKPOINT_PAGES, Ordering::Relaxed);
3844 let _ = self.conn.execute("PRAGMA fsqlite.concurrent_mode = ON;");
3847 let _ = self.conn.execute("PRAGMA concurrent_mode = ON;");
3848 let autocommit_pragma =
3859 disable_autocommit_retain(|pragma| self.conn.execute(pragma).map(|_| ()))?;
3860 tracing::debug!(
3861 pragma = autocommit_pragma,
3862 "disabled frankensqlite autocommit_retain for storage connection"
3863 );
3864
3865 Ok(())
3866 }
3867
3868 fn apply_readonly_config(&self) -> Result<()> {
3869 self.conn
3870 .execute("PRAGMA query_only = 1;")
3871 .with_context(|| "setting query_only")?;
3872 self.conn
3873 .execute("PRAGMA busy_timeout = 5000;")
3874 .with_context(|| "setting busy_timeout")?;
3875 self.conn
3876 .execute("PRAGMA cache_size = -65536;")
3877 .with_context(|| "setting cache_size")?;
3878 self.conn
3879 .execute("PRAGMA foreign_keys = ON;")
3880 .with_context(|| "setting foreign_keys")?;
3881 Ok(())
3882 }
3883
3884 pub fn run_migrations(&self) -> Result<()> {
3902 transition_from_meta_version(&self.conn)?;
3903
3904 let base_result = build_cass_migrations_before_tail_cache()
3905 .run(&self.conn)
3906 .with_context(|| "running base schema migrations")?;
3907
3908 let mut applied = base_result.applied;
3909 if apply_conversation_tail_state_cache_migration(&self.conn)
3910 .with_context(|| "running conversation tail-state cache migration")?
3911 {
3912 applied.push(15);
3913 }
3914
3915 let post_result = build_cass_migrations_after_tail_cache()
3916 .run(&self.conn)
3917 .with_context(|| "running post-tail-cache schema migrations")?;
3918 applied.extend(post_result.applied);
3919
3920 let current = self.schema_version()?;
3921 if !applied.is_empty() {
3922 info!(
3923 applied = ?applied,
3924 current,
3925 was_fresh = base_result.was_fresh,
3926 "frankensqlite schema migrations applied"
3927 );
3928 }
3929
3930 self.sync_meta_schema_version(current)?;
3932
3933 Ok(())
3934 }
3935
3936 fn repair_missing_current_schema_objects(&self) -> Result<()> {
3941 let mut missing_tables = Vec::new();
3942 for &(table_name, probe_sql) in REQUIRED_CURRENT_SCHEMA_TABLE_PROBES {
3943 if let Err(err) = self.conn.query(probe_sql) {
3944 if error_indicates_missing_table(&err) {
3945 missing_tables.push(table_name);
3946 continue;
3947 }
3948 return Err(err).with_context(|| {
3949 format!("probing required schema table {table_name} for completeness")
3950 });
3951 }
3952 }
3953
3954 if !missing_tables.is_empty() {
3955 info!(
3956 missing_tables = ?missing_tables,
3957 "repairing missing current-schema tables on an already-versioned cass database"
3958 );
3959
3960 for batch in current_schema_repair_batches_for_missing_tables(&missing_tables)? {
3961 self.conn
3962 .execute_batch(batch.sql)
3963 .with_context(|| format!("repairing current-schema batch {}", batch.name))?;
3964 }
3965
3966 for &(table_name, probe_sql) in REQUIRED_CURRENT_SCHEMA_TABLE_PROBES {
3967 if !missing_tables.contains(&table_name) {
3968 continue;
3969 }
3970 self.conn
3971 .query(probe_sql)
3972 .with_context(|| format!("verifying repaired schema table {table_name}"))?;
3973 }
3974 }
3975 self.repair_missing_conversation_token_columns()?;
3976 Ok(())
3977 }
3978
3979 fn repair_missing_conversation_token_columns(&self) -> Result<()> {
3980 let columns = franken_table_column_names(&self.conn, "conversations")
3981 .with_context(|| "inspecting conversations columns for token-summary repair")?;
3982 let mut missing_columns = Vec::new();
3983 for &(column_name, column_type) in REQUIRED_CONVERSATION_TOKEN_COLUMNS {
3984 if columns.contains(column_name) {
3985 continue;
3986 }
3987 let sql = format!("ALTER TABLE conversations ADD COLUMN {column_name} {column_type};");
3988 self.conn.execute(&sql).with_context(|| {
3989 format!("adding missing conversations.{column_name} token-summary column")
3990 })?;
3991 missing_columns.push(column_name);
3992 }
3993 if !missing_columns.is_empty() {
3994 tracing::warn!(
3995 target: "cass::schema_repair",
3996 db_path = %self.db_path.display(),
3997 missing_columns = ?missing_columns,
3998 "cass#222: repaired missing conversations token-summary columns"
3999 );
4000 }
4001 Ok(())
4002 }
4003
4004 pub(crate) fn cleanup_orphan_fk_rows(&self) -> Result<OrphanFkCleanupReport> {
4023 let mut report = OrphanFkCleanupReport::default();
4024 let orphan_message_ids = match collect_orphan_message_ids(&self.conn) {
4025 Ok(ids) => ids,
4026 Err(err) if error_indicates_missing_table(&err) => {
4027 tracing::debug!(
4028 target: "cass::fk_repair",
4029 child_table = "messages",
4030 error = %err,
4031 "skipping orphan-message probe (table or column unavailable)"
4032 );
4033 Vec::new()
4034 }
4035 Err(err) => return Err(err),
4036 };
4037 if !orphan_message_ids.is_empty() {
4038 report.record("messages", orphan_message_ids.len() as i64);
4039 }
4040
4041 if !orphan_message_ids.is_empty() {
4042 delete_orphan_message_ids_bisecting_oom(&self.conn, &orphan_message_ids)
4043 .context("deleting orphan message rows and dependent children")?;
4044 }
4045
4046 for entry in ORPHAN_DIRECT_CHILD_TABLES {
4047 loop {
4048 let ids = match collect_direct_orphan_id_page(&self.conn, entry) {
4049 Ok(ids) => ids,
4050 Err(err)
4051 if error_indicates_missing_table(&err)
4052 || error_indicates_missing_column(&err) =>
4053 {
4054 tracing::debug!(
4058 target: "cass::fk_repair",
4059 child_table = entry.child_table,
4060 error = %err,
4061 "skipping orphan probe (table or column unavailable)"
4062 );
4063 break;
4064 }
4065 Err(err) => {
4066 return Err(err).with_context(|| {
4067 format!("probing orphan rows in {}", entry.child_table)
4068 });
4069 }
4070 };
4071 if ids.is_empty() {
4072 break;
4073 }
4074
4075 let deleted = delete_direct_orphan_ids_bisecting_oom(&self.conn, entry, &ids)
4076 .with_context(|| format!("deleting orphan rows from {}", entry.child_table))?;
4077 if deleted == 0 {
4078 break;
4079 }
4080 report.record(
4081 entry.child_table,
4082 i64::try_from(deleted).unwrap_or(i64::MAX),
4083 );
4084 }
4085 }
4086
4087 if report.total == 0 {
4088 return Ok(report);
4089 }
4090
4091 tracing::warn!(
4096 target: "cass::fk_repair",
4097 db_path = %self.db_path.display(),
4098 total_orphans = report.total,
4099 per_table = ?report.per_table,
4100 "cass#202: removed orphan rows left behind by interrupted index transactions"
4101 );
4102
4103 Ok(report)
4104 }
4105
4106 pub fn schema_version(&self) -> Result<i64> {
4108 let rows = self
4109 .conn
4110 .query("SELECT MAX(version) FROM _schema_migrations;")
4111 .with_context(|| "reading schema version from _schema_migrations")?;
4112
4113 if let Some(row) = rows.first()
4114 && let Ok(v) = row.get_typed::<Option<i64>>(0)
4115 {
4116 return Ok(v.unwrap_or(0));
4117 }
4118 Ok(0)
4119 }
4120
4121 fn sync_meta_schema_version(&self, version: i64) -> Result<()> {
4123 if self.conn.query("SELECT key FROM meta LIMIT 1;").is_err() {
4126 return Ok(());
4127 }
4128
4129 if let Ok(rows) = self
4131 .conn
4132 .query("SELECT value FROM meta WHERE key = 'schema_version';")
4133 && let Some(row) = rows.first()
4134 && let Ok(val) = row.get_typed::<String>(0)
4135 && val == version.to_string()
4136 {
4137 return Ok(()); }
4139
4140 self.conn
4141 .execute_compat(
4142 "INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', ?1);",
4143 &[ParamValue::from(version.to_string())],
4144 )
4145 .with_context(|| "syncing meta schema_version")?;
4146
4147 Ok(())
4148 }
4149
4150 pub fn database_path(&self) -> Result<PathBuf> {
4152 Ok(self.db_path.clone())
4153 }
4154
4155 pub(crate) fn ephemeral_writer_preflight_verified(&self) -> bool {
4156 self.ephemeral_writer_preflight_verified
4157 .load(Ordering::Relaxed)
4158 }
4159
4160 pub(crate) fn mark_ephemeral_writer_preflight_verified(&self) {
4161 self.ephemeral_writer_preflight_verified
4162 .store(true, Ordering::Relaxed);
4163 }
4164
4165 pub(crate) fn index_writer_checkpoint_pages(&self) -> Option<i64> {
4166 let pages = self.index_writer_checkpoint_pages.load(Ordering::Relaxed);
4167 (pages != UNSET_INDEX_WRITER_CHECKPOINT_PAGES).then_some(pages)
4168 }
4169
4170 pub(crate) fn mark_index_writer_checkpoint_pages(&self, pages: i64) {
4171 self.index_writer_checkpoint_pages
4172 .store(pages, Ordering::Relaxed);
4173 }
4174
4175 pub(crate) fn index_writer_busy_timeout_ms(&self) -> Option<u64> {
4176 let timeout_ms = self.index_writer_busy_timeout_ms.load(Ordering::Relaxed);
4177 (timeout_ms != UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS).then_some(timeout_ms)
4178 }
4179
4180 pub(crate) fn mark_index_writer_busy_timeout_ms(&self, timeout_ms: u64) {
4181 self.index_writer_busy_timeout_ms
4182 .store(timeout_ms, Ordering::Relaxed);
4183 }
4184
4185 pub fn open_or_rebuild(path: &Path) -> std::result::Result<Self, MigrationError> {
4187 if let Some(parent) = path.parent() {
4188 fs::create_dir_all(parent)?;
4189 }
4190
4191 if path.exists() {
4192 let check_result = check_schema_compatibility(path);
4193 match check_result {
4194 Ok(SchemaCheck::Compatible) | Ok(SchemaCheck::NeedsMigration) => {
4195 }
4197 Ok(SchemaCheck::NeedsRebuild(reason)) => {
4198 let backup_path = create_backup(path)?;
4199 cleanup_old_backups(path, MAX_BACKUPS)?;
4200 remove_database_files(path)?;
4201 return Err(MigrationError::RebuildRequired {
4202 reason,
4203 backup_path,
4204 });
4205 }
4206 Err(err) if schema_check_error_requires_rebuild(&err) => {
4207 let backup_path = create_backup(path)?;
4208 cleanup_old_backups(path, MAX_BACKUPS)?;
4209 remove_database_files(path)?;
4210 return Err(MigrationError::RebuildRequired {
4211 reason: format!("Database appears corrupted: {err}"),
4212 backup_path,
4213 });
4214 }
4215 Err(err) => return Err(MigrationError::Database(err)),
4216 }
4217 }
4218
4219 let storage = Self::open(path).map_err(|e| MigrationError::Other(e.to_string()))?;
4220 Ok(storage)
4221 }
4222}
4223
4224fn build_cass_migrations_before_tail_cache() -> MigrationRunner {
4240 MigrationRunner::new()
4241 .add(13, "full_schema_v13", MIGRATION_FRESH_SCHEMA)
4242 .add(14, "fts_contentless", MIGRATION_V14)
4243}
4244
4245fn build_cass_migrations_after_tail_cache() -> MigrationRunner {
4246 MigrationRunner::new()
4247 .add(16, "drop_redundant_message_conv_idx", MIGRATION_V16)
4248 .add(17, "drop_message_created_idx", MIGRATION_V17)
4249 .add(18, "conversation_tail_state_hot_table", MIGRATION_V18)
4250 .add(19, "conversation_external_lookup", MIGRATION_V19)
4251 .add(20, "conversation_external_tail_lookup", MIGRATION_V20)
4252}
4253
4254fn schema_migration_is_applied(conn: &FrankenConnection, version: i64) -> Result<bool> {
4255 let rows = conn
4256 .query_with_params(
4257 "SELECT 1 FROM _schema_migrations WHERE version = ?1 LIMIT 1;",
4258 &[SqliteValue::from(version)],
4259 )
4260 .with_context(|| format!("checking schema migration version {version}"))?;
4261 Ok(!rows.is_empty())
4262}
4263
4264fn apply_conversation_tail_state_cache_migration(conn: &FrankenConnection) -> Result<bool> {
4265 conn.execute("BEGIN IMMEDIATE;")
4266 .with_context(|| "starting v15 conversation tail-state migration transaction")?;
4267
4268 let result = (|| -> Result<bool> {
4269 if schema_migration_is_applied(conn, 15)? {
4270 conn.execute("COMMIT;")
4271 .with_context(|| "committing already-applied v15 migration transaction")?;
4272 return Ok(false);
4273 }
4274
4275 let started = Instant::now();
4276 let conversation_columns = franken_table_column_names(conn, "conversations")
4277 .with_context(|| "inspecting conversations columns before v15 migration")?;
4278 if !conversation_columns.contains("last_message_idx") {
4279 conn.execute("ALTER TABLE conversations ADD COLUMN last_message_idx INTEGER;")
4280 .with_context(|| "adding v15 conversations.last_message_idx column")?;
4281 }
4282 if !conversation_columns.contains("last_message_created_at") {
4283 conn.execute("ALTER TABLE conversations ADD COLUMN last_message_created_at INTEGER;")
4284 .with_context(|| "adding v15 conversations.last_message_created_at column")?;
4285 }
4286 conn.execute_batch(MIGRATION_V15_TAIL_STATE_TABLE)
4287 .with_context(|| "applying v15 conversation tail-state table schema")?;
4288 conn.execute_compat(
4289 "INSERT INTO _schema_migrations (version, name) VALUES (?1, ?2);",
4290 fparams![15_i64, "conversation_tail_state_cache"],
4291 )
4292 .with_context(|| "recording v15 conversation tail-state migration")?;
4293 conn.execute("COMMIT;")
4294 .with_context(|| "committing v15 conversation tail-state migration")?;
4295 info!(
4296 elapsed_ms = started.elapsed().as_millis(),
4297 "applied v15 conversation tail-state cache migration"
4298 );
4299 Ok(true)
4300 })();
4301
4302 if result.is_err() {
4303 let _ = conn.execute("ROLLBACK;");
4304 }
4305
4306 result
4307}
4308
4309fn franken_table_column_names(
4310 conn: &FrankenConnection,
4311 table_name: &str,
4312) -> Result<HashSet<String>> {
4313 if !table_name
4314 .chars()
4315 .all(|c| c.is_ascii_alphanumeric() || c == '_')
4316 {
4317 return Err(anyhow!(
4318 "unsafe table name for PRAGMA table_info: {table_name}"
4319 ));
4320 }
4321
4322 conn.query_map_collect(
4323 &format!("PRAGMA table_info({table_name})"),
4324 fparams![],
4325 |row: &FrankenRow| row.get_typed::<String>(1),
4326 )
4327 .with_context(|| format!("reading PRAGMA table_info({table_name})"))
4328 .map(|columns| columns.into_iter().collect())
4329}
4330
4331const MIGRATION_FRESH_SCHEMA: &str = r"
4341-- Core tables (V1)
4342CREATE TABLE IF NOT EXISTS meta (
4343 key TEXT PRIMARY KEY,
4344 value TEXT NOT NULL
4345);
4346
4347CREATE TABLE IF NOT EXISTS agents (
4348 id INTEGER PRIMARY KEY,
4349 slug TEXT NOT NULL UNIQUE,
4350 name TEXT NOT NULL,
4351 version TEXT,
4352 kind TEXT NOT NULL,
4353 created_at INTEGER NOT NULL,
4354 updated_at INTEGER NOT NULL
4355);
4356
4357CREATE TABLE IF NOT EXISTS workspaces (
4358 id INTEGER PRIMARY KEY,
4359 path TEXT NOT NULL UNIQUE,
4360 display_name TEXT
4361);
4362
4363-- Sources (V4)
4364CREATE TABLE IF NOT EXISTS sources (
4365 id TEXT PRIMARY KEY,
4366 kind TEXT NOT NULL,
4367 host_label TEXT,
4368 machine_id TEXT,
4369 platform TEXT,
4370 config_json TEXT,
4371 created_at INTEGER NOT NULL,
4372 updated_at INTEGER NOT NULL
4373);
4374
4375INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
4376VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
4377
4378-- Conversations: V1 base + V5 provenance + V7 metadata_bin + V10 token summary
4379CREATE TABLE IF NOT EXISTS conversations (
4380 id INTEGER PRIMARY KEY,
4381 agent_id INTEGER NOT NULL REFERENCES agents(id),
4382 workspace_id INTEGER REFERENCES workspaces(id),
4383 source_id TEXT NOT NULL DEFAULT 'local' REFERENCES sources(id),
4384 external_id TEXT,
4385 title TEXT,
4386 source_path TEXT NOT NULL,
4387 started_at INTEGER,
4388 ended_at INTEGER,
4389 approx_tokens INTEGER,
4390 metadata_json TEXT,
4391 origin_host TEXT,
4392 metadata_bin BLOB,
4393 total_input_tokens INTEGER,
4394 total_output_tokens INTEGER,
4395 total_cache_read_tokens INTEGER,
4396 total_cache_creation_tokens INTEGER,
4397 grand_total_tokens INTEGER,
4398 estimated_cost_usd REAL,
4399 primary_model TEXT,
4400 api_call_count INTEGER,
4401 tool_call_count INTEGER,
4402 user_message_count INTEGER,
4403 assistant_message_count INTEGER,
4404 -- V15 columns are included in the fresh schema so fresh DB creation does
4405 -- not need ALTER TABLE on conversations. That ALTER path can duplicate
4406 -- provenance autoindex state in frankensqlite when the named unique
4407 -- provenance index already exists.
4408 last_message_idx INTEGER,
4409 last_message_created_at INTEGER
4410);
4411
4412-- Named unique index avoids autoindex issues if table is ever recreated
4413CREATE UNIQUE INDEX IF NOT EXISTS idx_conversations_provenance
4414 ON conversations(source_id, agent_id, external_id);
4415
4416-- Messages: V1 base + V7 extra_bin
4417CREATE TABLE IF NOT EXISTS messages (
4418 id INTEGER PRIMARY KEY,
4419 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
4420 idx INTEGER NOT NULL,
4421 role TEXT NOT NULL,
4422 author TEXT,
4423 created_at INTEGER,
4424 content TEXT NOT NULL,
4425 extra_json TEXT,
4426 extra_bin BLOB,
4427 UNIQUE(conversation_id, idx)
4428);
4429
4430CREATE TABLE IF NOT EXISTS snippets (
4431 id INTEGER PRIMARY KEY,
4432 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4433 file_path TEXT,
4434 start_line INTEGER,
4435 end_line INTEGER,
4436 language TEXT,
4437 snippet_text TEXT
4438);
4439
4440CREATE TABLE IF NOT EXISTS tags (
4441 id INTEGER PRIMARY KEY,
4442 name TEXT NOT NULL UNIQUE
4443);
4444
4445CREATE TABLE IF NOT EXISTS conversation_tags (
4446 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
4447 tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
4448 PRIMARY KEY (conversation_id, tag_id)
4449);
4450
4451-- Daily stats (V8)
4452CREATE TABLE IF NOT EXISTS daily_stats (
4453 day_id INTEGER NOT NULL,
4454 agent_slug TEXT NOT NULL,
4455 source_id TEXT NOT NULL DEFAULT 'all',
4456 session_count INTEGER NOT NULL DEFAULT 0,
4457 message_count INTEGER NOT NULL DEFAULT 0,
4458 total_chars INTEGER NOT NULL DEFAULT 0,
4459 last_updated INTEGER NOT NULL,
4460 PRIMARY KEY (day_id, agent_slug, source_id)
4461);
4462
4463-- Embedding jobs (V9)
4464CREATE TABLE IF NOT EXISTS embedding_jobs (
4465 id INTEGER PRIMARY KEY AUTOINCREMENT,
4466 db_path TEXT NOT NULL,
4467 model_id TEXT NOT NULL,
4468 status TEXT NOT NULL DEFAULT 'pending',
4469 total_docs INTEGER NOT NULL DEFAULT 0,
4470 completed_docs INTEGER NOT NULL DEFAULT 0,
4471 error_message TEXT,
4472 created_at TEXT NOT NULL DEFAULT (datetime('now')),
4473 started_at TEXT,
4474 completed_at TEXT
4475);
4476
4477CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
4478ON embedding_jobs(db_path, model_id)
4479WHERE status IN ('pending', 'running');
4480
4481-- Token usage ledger (V10)
4482CREATE TABLE IF NOT EXISTS token_usage (
4483 id INTEGER PRIMARY KEY AUTOINCREMENT,
4484 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4485 conversation_id INTEGER NOT NULL,
4486 agent_id INTEGER NOT NULL,
4487 workspace_id INTEGER,
4488 source_id TEXT NOT NULL DEFAULT 'local',
4489 timestamp_ms INTEGER NOT NULL,
4490 day_id INTEGER NOT NULL,
4491 model_name TEXT,
4492 model_family TEXT,
4493 model_tier TEXT,
4494 service_tier TEXT,
4495 provider TEXT,
4496 input_tokens INTEGER,
4497 output_tokens INTEGER,
4498 cache_read_tokens INTEGER,
4499 cache_creation_tokens INTEGER,
4500 thinking_tokens INTEGER,
4501 total_tokens INTEGER,
4502 estimated_cost_usd REAL,
4503 role TEXT NOT NULL,
4504 content_chars INTEGER NOT NULL,
4505 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4506 tool_call_count INTEGER NOT NULL DEFAULT 0,
4507 data_source TEXT NOT NULL DEFAULT 'api',
4508 UNIQUE(message_id)
4509);
4510
4511-- Token daily stats (V10)
4512CREATE TABLE IF NOT EXISTS token_daily_stats (
4513 day_id INTEGER NOT NULL,
4514 agent_slug TEXT NOT NULL,
4515 source_id TEXT NOT NULL DEFAULT 'all',
4516 model_family TEXT NOT NULL DEFAULT 'all',
4517 api_call_count INTEGER NOT NULL DEFAULT 0,
4518 user_message_count INTEGER NOT NULL DEFAULT 0,
4519 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4520 tool_message_count INTEGER NOT NULL DEFAULT 0,
4521 total_input_tokens INTEGER NOT NULL DEFAULT 0,
4522 total_output_tokens INTEGER NOT NULL DEFAULT 0,
4523 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
4524 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
4525 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
4526 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
4527 total_content_chars INTEGER NOT NULL DEFAULT 0,
4528 total_tool_calls INTEGER NOT NULL DEFAULT 0,
4529 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
4530 session_count INTEGER NOT NULL DEFAULT 0,
4531 last_updated INTEGER NOT NULL,
4532 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
4533);
4534
4535-- Model pricing (V10)
4536CREATE TABLE IF NOT EXISTS model_pricing (
4537 model_pattern TEXT NOT NULL,
4538 provider TEXT NOT NULL,
4539 input_cost_per_mtok REAL NOT NULL,
4540 output_cost_per_mtok REAL NOT NULL,
4541 cache_read_cost_per_mtok REAL,
4542 cache_creation_cost_per_mtok REAL,
4543 effective_date TEXT NOT NULL,
4544 PRIMARY KEY (model_pattern, effective_date)
4545);
4546
4547INSERT OR IGNORE INTO model_pricing VALUES
4548 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
4549 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
4550 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
4551 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
4552 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
4553 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4554 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4555 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
4556 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
4557 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
4558
4559-- Message metrics: V11 base + V12 model dimensions
4560CREATE TABLE IF NOT EXISTS message_metrics (
4561 message_id INTEGER PRIMARY KEY REFERENCES messages(id) ON DELETE CASCADE,
4562 created_at_ms INTEGER NOT NULL,
4563 hour_id INTEGER NOT NULL,
4564 day_id INTEGER NOT NULL,
4565 agent_slug TEXT NOT NULL,
4566 workspace_id INTEGER NOT NULL DEFAULT 0,
4567 source_id TEXT NOT NULL DEFAULT 'local',
4568 role TEXT NOT NULL,
4569 content_chars INTEGER NOT NULL,
4570 content_tokens_est INTEGER NOT NULL,
4571 api_input_tokens INTEGER,
4572 api_output_tokens INTEGER,
4573 api_cache_read_tokens INTEGER,
4574 api_cache_creation_tokens INTEGER,
4575 api_thinking_tokens INTEGER,
4576 api_service_tier TEXT,
4577 api_data_source TEXT NOT NULL DEFAULT 'estimated',
4578 tool_call_count INTEGER NOT NULL DEFAULT 0,
4579 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4580 has_plan INTEGER NOT NULL DEFAULT 0,
4581 model_name TEXT,
4582 model_family TEXT NOT NULL DEFAULT 'unknown',
4583 model_tier TEXT NOT NULL DEFAULT 'unknown',
4584 provider TEXT NOT NULL DEFAULT 'unknown'
4585);
4586
4587-- Hourly rollups: V11 base + V13 plan columns
4588CREATE TABLE IF NOT EXISTS usage_hourly (
4589 hour_id INTEGER NOT NULL,
4590 agent_slug TEXT NOT NULL,
4591 workspace_id INTEGER NOT NULL DEFAULT 0,
4592 source_id TEXT NOT NULL DEFAULT 'local',
4593 message_count INTEGER NOT NULL DEFAULT 0,
4594 user_message_count INTEGER NOT NULL DEFAULT 0,
4595 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4596 tool_call_count INTEGER NOT NULL DEFAULT 0,
4597 plan_message_count INTEGER NOT NULL DEFAULT 0,
4598 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4599 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4600 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4601 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4602 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4603 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4604 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4605 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4606 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4607 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4608 last_updated INTEGER NOT NULL DEFAULT 0,
4609 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4610 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
4611 PRIMARY KEY (hour_id, agent_slug, workspace_id, source_id)
4612);
4613
4614-- Daily rollups: V11 base + V13 plan columns
4615CREATE TABLE IF NOT EXISTS usage_daily (
4616 day_id INTEGER NOT NULL,
4617 agent_slug TEXT NOT NULL,
4618 workspace_id INTEGER NOT NULL DEFAULT 0,
4619 source_id TEXT NOT NULL DEFAULT 'local',
4620 message_count INTEGER NOT NULL DEFAULT 0,
4621 user_message_count INTEGER NOT NULL DEFAULT 0,
4622 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4623 tool_call_count INTEGER NOT NULL DEFAULT 0,
4624 plan_message_count INTEGER NOT NULL DEFAULT 0,
4625 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4626 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4627 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4628 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4629 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4630 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4631 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4632 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4633 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4634 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4635 last_updated INTEGER NOT NULL DEFAULT 0,
4636 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4637 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
4638 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id)
4639);
4640
4641-- Model daily rollups (V12)
4642CREATE TABLE IF NOT EXISTS usage_models_daily (
4643 day_id INTEGER NOT NULL,
4644 agent_slug TEXT NOT NULL,
4645 workspace_id INTEGER NOT NULL DEFAULT 0,
4646 source_id TEXT NOT NULL DEFAULT 'local',
4647 model_family TEXT NOT NULL DEFAULT 'unknown',
4648 model_tier TEXT NOT NULL DEFAULT 'unknown',
4649 message_count INTEGER NOT NULL DEFAULT 0,
4650 user_message_count INTEGER NOT NULL DEFAULT 0,
4651 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4652 tool_call_count INTEGER NOT NULL DEFAULT 0,
4653 plan_message_count INTEGER NOT NULL DEFAULT 0,
4654 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4655 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4656 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4657 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4658 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4659 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4660 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4661 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4662 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4663 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4664 last_updated INTEGER NOT NULL DEFAULT 0,
4665 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id, model_family, model_tier)
4666);
4667
4668-- All indexes
4669CREATE INDEX IF NOT EXISTS idx_conversations_agent_started ON conversations(agent_id, started_at DESC);
4670CREATE INDEX IF NOT EXISTS idx_conversations_source_id ON conversations(source_id);
4671CREATE INDEX IF NOT EXISTS idx_conversations_source_path ON conversations(source_path);
4672CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
4673CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
4674CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
4675CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
4676CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
4677CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
4678CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
4679CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
4680CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
4681CREATE INDEX IF NOT EXISTS idx_mm_hour ON message_metrics(hour_id);
4682CREATE INDEX IF NOT EXISTS idx_mm_day ON message_metrics(day_id);
4683CREATE INDEX IF NOT EXISTS idx_mm_agent_hour ON message_metrics(agent_slug, hour_id);
4684CREATE INDEX IF NOT EXISTS idx_mm_agent_day ON message_metrics(agent_slug, day_id);
4685CREATE INDEX IF NOT EXISTS idx_mm_workspace_hour ON message_metrics(workspace_id, hour_id);
4686CREATE INDEX IF NOT EXISTS idx_mm_source_hour ON message_metrics(source_id, hour_id);
4687CREATE INDEX IF NOT EXISTS idx_mm_model_family_day ON message_metrics(model_family, day_id);
4688CREATE INDEX IF NOT EXISTS idx_mm_provider_day ON message_metrics(provider, day_id);
4689CREATE INDEX IF NOT EXISTS idx_uh_agent ON usage_hourly(agent_slug, hour_id);
4690CREATE INDEX IF NOT EXISTS idx_uh_workspace ON usage_hourly(workspace_id, hour_id);
4691CREATE INDEX IF NOT EXISTS idx_uh_source ON usage_hourly(source_id, hour_id);
4692CREATE INDEX IF NOT EXISTS idx_ud_agent ON usage_daily(agent_slug, day_id);
4693CREATE INDEX IF NOT EXISTS idx_ud_workspace ON usage_daily(workspace_id, day_id);
4694CREATE INDEX IF NOT EXISTS idx_ud_source ON usage_daily(source_id, day_id);
4695CREATE INDEX IF NOT EXISTS idx_umd_model_day ON usage_models_daily(model_family, day_id);
4696CREATE INDEX IF NOT EXISTS idx_umd_agent_day ON usage_models_daily(agent_slug, day_id);
4697CREATE INDEX IF NOT EXISTS idx_umd_workspace_day ON usage_models_daily(workspace_id, day_id);
4698CREATE INDEX IF NOT EXISTS idx_umd_source_day ON usage_models_daily(source_id, day_id);
4699";
4700
4701#[derive(Clone, Copy)]
4702struct SchemaRepairBatch {
4703 name: &'static str,
4704 tables: &'static [&'static str],
4705 sql: &'static str,
4706}
4707
4708const CURRENT_SCHEMA_REPAIR_SOURCES_SQL: &str = r"
4709CREATE TABLE IF NOT EXISTS sources (
4710 id TEXT PRIMARY KEY,
4711 kind TEXT NOT NULL,
4712 host_label TEXT,
4713 machine_id TEXT,
4714 platform TEXT,
4715 config_json TEXT,
4716 created_at INTEGER NOT NULL,
4717 updated_at INTEGER NOT NULL
4718);
4719
4720INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
4721VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
4722";
4723
4724const CURRENT_SCHEMA_REPAIR_DAILY_STATS_SQL: &str = r"
4725CREATE TABLE IF NOT EXISTS daily_stats (
4726 day_id INTEGER NOT NULL,
4727 agent_slug TEXT NOT NULL,
4728 source_id TEXT NOT NULL DEFAULT 'all',
4729 session_count INTEGER NOT NULL DEFAULT 0,
4730 message_count INTEGER NOT NULL DEFAULT 0,
4731 total_chars INTEGER NOT NULL DEFAULT 0,
4732 last_updated INTEGER NOT NULL,
4733 PRIMARY KEY (day_id, agent_slug, source_id)
4734);
4735
4736CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
4737CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
4738";
4739
4740const CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_LOOKUP_SQL: &str = r"
4741CREATE TABLE IF NOT EXISTS conversation_external_lookup (
4742 lookup_key TEXT PRIMARY KEY,
4743 conversation_id INTEGER NOT NULL
4744);
4745
4746INSERT OR REPLACE INTO conversation_external_lookup (lookup_key, conversation_id)
4747SELECT
4748 CAST(length(source_id) AS TEXT) || ':' || source_id || ':' ||
4749 CAST(agent_id AS TEXT) || ':' ||
4750 CAST(length(external_id) AS TEXT) || ':' || external_id,
4751 id
4752FROM conversations
4753WHERE external_id IS NOT NULL;
4754";
4755
4756const CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_TAIL_LOOKUP_SQL: &str = r"
4757CREATE TABLE IF NOT EXISTS conversation_tail_state (
4758 conversation_id INTEGER PRIMARY KEY,
4759 ended_at INTEGER,
4760 last_message_idx INTEGER,
4761 last_message_created_at INTEGER
4762);
4763
4764CREATE TABLE IF NOT EXISTS conversation_external_tail_lookup (
4765 lookup_key TEXT PRIMARY KEY,
4766 conversation_id INTEGER NOT NULL,
4767 ended_at INTEGER,
4768 last_message_idx INTEGER,
4769 last_message_created_at INTEGER
4770);
4771
4772INSERT OR REPLACE INTO conversation_external_tail_lookup (
4773 lookup_key,
4774 conversation_id,
4775 ended_at,
4776 last_message_idx,
4777 last_message_created_at
4778)
4779SELECT
4780 CAST(length(c.source_id) AS TEXT) || ':' || c.source_id || ':' ||
4781 CAST(c.agent_id AS TEXT) || ':' ||
4782 CAST(length(c.external_id) AS TEXT) || ':' || c.external_id,
4783 c.id,
4784 ts.ended_at,
4785 ts.last_message_idx,
4786 ts.last_message_created_at
4787FROM conversations c
4788LEFT JOIN conversation_tail_state ts ON ts.conversation_id = c.id
4789WHERE c.external_id IS NOT NULL;
4790";
4791
4792const CURRENT_SCHEMA_REPAIR_EMBEDDING_JOBS_SQL: &str = r"
4793CREATE TABLE IF NOT EXISTS embedding_jobs (
4794 id INTEGER PRIMARY KEY AUTOINCREMENT,
4795 db_path TEXT NOT NULL,
4796 model_id TEXT NOT NULL,
4797 status TEXT NOT NULL DEFAULT 'pending',
4798 total_docs INTEGER NOT NULL DEFAULT 0,
4799 completed_docs INTEGER NOT NULL DEFAULT 0,
4800 error_message TEXT,
4801 created_at TEXT NOT NULL DEFAULT (datetime('now')),
4802 started_at TEXT,
4803 completed_at TEXT
4804);
4805
4806CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
4807ON embedding_jobs(db_path, model_id)
4808WHERE status IN ('pending', 'running');
4809";
4810
4811const CURRENT_SCHEMA_REPAIR_TOKEN_ANALYTICS_SQL: &str = r"
4812CREATE TABLE IF NOT EXISTS token_usage (
4813 id INTEGER PRIMARY KEY AUTOINCREMENT,
4814 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4815 conversation_id INTEGER NOT NULL,
4816 agent_id INTEGER NOT NULL,
4817 workspace_id INTEGER,
4818 source_id TEXT NOT NULL DEFAULT 'local',
4819 timestamp_ms INTEGER NOT NULL,
4820 day_id INTEGER NOT NULL,
4821 model_name TEXT,
4822 model_family TEXT,
4823 model_tier TEXT,
4824 service_tier TEXT,
4825 provider TEXT,
4826 input_tokens INTEGER,
4827 output_tokens INTEGER,
4828 cache_read_tokens INTEGER,
4829 cache_creation_tokens INTEGER,
4830 thinking_tokens INTEGER,
4831 total_tokens INTEGER,
4832 estimated_cost_usd REAL,
4833 role TEXT NOT NULL,
4834 content_chars INTEGER NOT NULL,
4835 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4836 tool_call_count INTEGER NOT NULL DEFAULT 0,
4837 data_source TEXT NOT NULL DEFAULT 'api',
4838 UNIQUE(message_id)
4839);
4840
4841CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
4842CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
4843CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
4844CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
4845CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
4846
4847CREATE TABLE IF NOT EXISTS token_daily_stats (
4848 day_id INTEGER NOT NULL,
4849 agent_slug TEXT NOT NULL,
4850 source_id TEXT NOT NULL DEFAULT 'all',
4851 model_family TEXT NOT NULL DEFAULT 'all',
4852 api_call_count INTEGER NOT NULL DEFAULT 0,
4853 user_message_count INTEGER NOT NULL DEFAULT 0,
4854 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4855 tool_message_count INTEGER NOT NULL DEFAULT 0,
4856 total_input_tokens INTEGER NOT NULL DEFAULT 0,
4857 total_output_tokens INTEGER NOT NULL DEFAULT 0,
4858 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
4859 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
4860 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
4861 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
4862 total_content_chars INTEGER NOT NULL DEFAULT 0,
4863 total_tool_calls INTEGER NOT NULL DEFAULT 0,
4864 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
4865 session_count INTEGER NOT NULL DEFAULT 0,
4866 last_updated INTEGER NOT NULL,
4867 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
4868);
4869
4870CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
4871CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
4872
4873CREATE TABLE IF NOT EXISTS model_pricing (
4874 model_pattern TEXT NOT NULL,
4875 provider TEXT NOT NULL,
4876 input_cost_per_mtok REAL NOT NULL,
4877 output_cost_per_mtok REAL NOT NULL,
4878 cache_read_cost_per_mtok REAL,
4879 cache_creation_cost_per_mtok REAL,
4880 effective_date TEXT NOT NULL,
4881 PRIMARY KEY (model_pattern, effective_date)
4882);
4883
4884INSERT OR IGNORE INTO model_pricing VALUES
4885 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
4886 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
4887 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
4888 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
4889 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
4890 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4891 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4892 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
4893 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
4894 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
4895";
4896
4897const CURRENT_SCHEMA_REPAIR_MESSAGE_METRICS_SQL: &str = r"
4898CREATE TABLE IF NOT EXISTS message_metrics (
4899 message_id INTEGER PRIMARY KEY REFERENCES messages(id) ON DELETE CASCADE,
4900 created_at_ms INTEGER NOT NULL,
4901 hour_id INTEGER NOT NULL,
4902 day_id INTEGER NOT NULL,
4903 agent_slug TEXT NOT NULL,
4904 workspace_id INTEGER NOT NULL DEFAULT 0,
4905 source_id TEXT NOT NULL DEFAULT 'local',
4906 role TEXT NOT NULL,
4907 content_chars INTEGER NOT NULL,
4908 content_tokens_est INTEGER NOT NULL,
4909 api_input_tokens INTEGER,
4910 api_output_tokens INTEGER,
4911 api_cache_read_tokens INTEGER,
4912 api_cache_creation_tokens INTEGER,
4913 api_thinking_tokens INTEGER,
4914 api_service_tier TEXT,
4915 api_data_source TEXT NOT NULL DEFAULT 'estimated',
4916 tool_call_count INTEGER NOT NULL DEFAULT 0,
4917 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4918 has_plan INTEGER NOT NULL DEFAULT 0,
4919 model_name TEXT,
4920 model_family TEXT NOT NULL DEFAULT 'unknown',
4921 model_tier TEXT NOT NULL DEFAULT 'unknown',
4922 provider TEXT NOT NULL DEFAULT 'unknown'
4923);
4924
4925CREATE TABLE IF NOT EXISTS usage_hourly (
4926 hour_id INTEGER NOT NULL,
4927 agent_slug TEXT NOT NULL,
4928 workspace_id INTEGER NOT NULL DEFAULT 0,
4929 source_id TEXT NOT NULL DEFAULT 'local',
4930 message_count INTEGER NOT NULL DEFAULT 0,
4931 user_message_count INTEGER NOT NULL DEFAULT 0,
4932 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4933 tool_call_count INTEGER NOT NULL DEFAULT 0,
4934 plan_message_count INTEGER NOT NULL DEFAULT 0,
4935 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4936 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4937 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4938 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4939 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4940 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4941 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4942 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4943 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4944 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4945 last_updated INTEGER NOT NULL DEFAULT 0,
4946 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4947 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
4948 PRIMARY KEY (hour_id, agent_slug, workspace_id, source_id)
4949);
4950
4951CREATE TABLE IF NOT EXISTS usage_daily (
4952 day_id INTEGER NOT NULL,
4953 agent_slug TEXT NOT NULL,
4954 workspace_id INTEGER NOT NULL DEFAULT 0,
4955 source_id TEXT NOT NULL DEFAULT 'local',
4956 message_count INTEGER NOT NULL DEFAULT 0,
4957 user_message_count INTEGER NOT NULL DEFAULT 0,
4958 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4959 tool_call_count INTEGER NOT NULL DEFAULT 0,
4960 plan_message_count INTEGER NOT NULL DEFAULT 0,
4961 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4962 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4963 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4964 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4965 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4966 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4967 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4968 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4969 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4970 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4971 last_updated INTEGER NOT NULL DEFAULT 0,
4972 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4973 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
4974 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id)
4975);
4976
4977CREATE TABLE IF NOT EXISTS usage_models_daily (
4978 day_id INTEGER NOT NULL,
4979 agent_slug TEXT NOT NULL,
4980 workspace_id INTEGER NOT NULL DEFAULT 0,
4981 source_id TEXT NOT NULL DEFAULT 'local',
4982 model_family TEXT NOT NULL DEFAULT 'unknown',
4983 model_tier TEXT NOT NULL DEFAULT 'unknown',
4984 message_count INTEGER NOT NULL DEFAULT 0,
4985 user_message_count INTEGER NOT NULL DEFAULT 0,
4986 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4987 tool_call_count INTEGER NOT NULL DEFAULT 0,
4988 plan_message_count INTEGER NOT NULL DEFAULT 0,
4989 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4990 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4991 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4992 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4993 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4994 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4995 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4996 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4997 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4998 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4999 last_updated INTEGER NOT NULL DEFAULT 0,
5000 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id, model_family, model_tier)
5001);
5002
5003CREATE INDEX IF NOT EXISTS idx_mm_hour ON message_metrics(hour_id);
5004CREATE INDEX IF NOT EXISTS idx_mm_day ON message_metrics(day_id);
5005CREATE INDEX IF NOT EXISTS idx_mm_agent_hour ON message_metrics(agent_slug, hour_id);
5006CREATE INDEX IF NOT EXISTS idx_mm_agent_day ON message_metrics(agent_slug, day_id);
5007CREATE INDEX IF NOT EXISTS idx_mm_workspace_hour ON message_metrics(workspace_id, hour_id);
5008CREATE INDEX IF NOT EXISTS idx_mm_source_hour ON message_metrics(source_id, hour_id);
5009CREATE INDEX IF NOT EXISTS idx_mm_model_family_day ON message_metrics(model_family, day_id);
5010CREATE INDEX IF NOT EXISTS idx_mm_provider_day ON message_metrics(provider, day_id);
5011CREATE INDEX IF NOT EXISTS idx_uh_agent ON usage_hourly(agent_slug, hour_id);
5012CREATE INDEX IF NOT EXISTS idx_uh_workspace ON usage_hourly(workspace_id, hour_id);
5013CREATE INDEX IF NOT EXISTS idx_uh_source ON usage_hourly(source_id, hour_id);
5014CREATE INDEX IF NOT EXISTS idx_ud_agent ON usage_daily(agent_slug, day_id);
5015CREATE INDEX IF NOT EXISTS idx_ud_workspace ON usage_daily(workspace_id, day_id);
5016CREATE INDEX IF NOT EXISTS idx_ud_source ON usage_daily(source_id, day_id);
5017CREATE INDEX IF NOT EXISTS idx_umd_model_day ON usage_models_daily(model_family, day_id);
5018CREATE INDEX IF NOT EXISTS idx_umd_agent_day ON usage_models_daily(agent_slug, day_id);
5019CREATE INDEX IF NOT EXISTS idx_umd_workspace_day ON usage_models_daily(workspace_id, day_id);
5020CREATE INDEX IF NOT EXISTS idx_umd_source_day ON usage_models_daily(source_id, day_id);
5021";
5022
5023const CURRENT_SCHEMA_REPAIR_BATCHES: &[SchemaRepairBatch] = &[
5024 SchemaRepairBatch {
5025 name: "sources",
5026 tables: &["sources"],
5027 sql: CURRENT_SCHEMA_REPAIR_SOURCES_SQL,
5028 },
5029 SchemaRepairBatch {
5030 name: "daily_stats",
5031 tables: &["daily_stats"],
5032 sql: CURRENT_SCHEMA_REPAIR_DAILY_STATS_SQL,
5033 },
5034 SchemaRepairBatch {
5035 name: "conversation_external_lookup",
5036 tables: &["conversation_external_lookup"],
5037 sql: CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_LOOKUP_SQL,
5038 },
5039 SchemaRepairBatch {
5040 name: "conversation_external_tail_lookup",
5041 tables: &[
5042 "conversation_tail_state",
5043 "conversation_external_tail_lookup",
5044 ],
5045 sql: CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_TAIL_LOOKUP_SQL,
5046 },
5047 SchemaRepairBatch {
5048 name: "embedding_jobs",
5049 tables: &["embedding_jobs"],
5050 sql: CURRENT_SCHEMA_REPAIR_EMBEDDING_JOBS_SQL,
5051 },
5052 SchemaRepairBatch {
5053 name: "token_analytics",
5054 tables: &["token_usage", "token_daily_stats", "model_pricing"],
5055 sql: CURRENT_SCHEMA_REPAIR_TOKEN_ANALYTICS_SQL,
5056 },
5057 SchemaRepairBatch {
5058 name: "message_rollups",
5059 tables: &[
5060 "message_metrics",
5061 "usage_hourly",
5062 "usage_daily",
5063 "usage_models_daily",
5064 ],
5065 sql: CURRENT_SCHEMA_REPAIR_MESSAGE_METRICS_SQL,
5066 },
5067];
5068
5069fn current_schema_repair_batches_for_missing_tables(
5070 missing_tables: &[&'static str],
5071) -> Result<Vec<&'static SchemaRepairBatch>> {
5072 let missing_set: HashSet<&'static str> = missing_tables.iter().copied().collect();
5073 let mut selected_batches = Vec::new();
5074 let mut covered_tables = HashSet::new();
5075
5076 for batch in CURRENT_SCHEMA_REPAIR_BATCHES {
5077 if !batch
5078 .tables
5079 .iter()
5080 .any(|table_name| missing_set.contains(table_name))
5081 {
5082 continue;
5083 }
5084 selected_batches.push(batch);
5085 covered_tables.extend(batch.tables.iter().copied());
5086 }
5087
5088 for &table_name in missing_tables {
5089 if !covered_tables.contains(table_name) {
5090 return Err(anyhow!(
5091 "no current-schema repair batch registered for missing table {table_name}"
5092 ));
5093 }
5094 }
5095
5096 Ok(selected_batches)
5097}
5098
5099const MIGRATION_NAMES: [(i64, &str); 20] = [
5101 (1, "core_tables"),
5102 (2, "fts_messages"),
5103 (3, "fts_messages_rebuild"),
5104 (4, "sources"),
5105 (5, "provenance_columns"),
5106 (6, "source_path_index"),
5107 (7, "msgpack_columns"),
5108 (8, "daily_stats"),
5109 (9, "embedding_jobs"),
5110 (10, "token_analytics"),
5111 (11, "message_metrics"),
5112 (12, "model_dimensions"),
5113 (13, "plan_token_rollups"),
5114 (14, "fts_contentless"),
5115 (15, "conversation_tail_state_cache"),
5116 (16, "drop_redundant_message_conv_idx"),
5117 (17, "drop_message_created_idx"),
5118 (18, "conversation_tail_state_hot_table"),
5119 (19, "conversation_external_lookup"),
5120 (20, "conversation_external_tail_lookup"),
5121];
5122
5123fn transition_from_meta_version(conn: &FrankenConnection) -> Result<()> {
5139 if conn
5143 .query("SELECT version FROM \"_schema_migrations\";")
5144 .is_ok()
5145 {
5146 return Ok(());
5147 }
5148
5149 if conn.query("SELECT key FROM meta;").is_err() {
5151 return Ok(());
5153 }
5154
5155 let rows = conn
5157 .query("SELECT value FROM meta WHERE key = 'schema_version';")
5158 .with_context(|| "reading schema_version from meta")?;
5159
5160 let current_version: i64 = rows
5161 .first()
5162 .and_then(|row| row.get_typed::<String>(0).ok())
5163 .and_then(|s| s.parse().ok())
5164 .unwrap_or(0);
5165
5166 if current_version == 0 {
5167 if conn.query("SELECT id FROM conversations LIMIT 1;").is_err() {
5169 return Ok(());
5171 }
5172
5173 info!("meta.schema_version=0 but tables exist; skipping transition (corrupted state)");
5176 return Ok(());
5177 }
5178
5179 info!(
5181 current_version,
5182 "transitioning schema tracking from meta table to _schema_migrations"
5183 );
5184
5185 conn.execute(
5186 "CREATE TABLE IF NOT EXISTS _schema_migrations (\
5187 version INTEGER PRIMARY KEY, \
5188 name TEXT NOT NULL, \
5189 applied_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))\
5190 );",
5191 )
5192 .with_context(|| "creating _schema_migrations table for transition")?;
5193
5194 for &(version, name) in &MIGRATION_NAMES {
5195 if version > current_version {
5196 break;
5197 }
5198 conn.execute_compat(
5199 "INSERT INTO _schema_migrations (version, name) VALUES (?1, ?2);",
5200 &[ParamValue::from(version), ParamValue::from(name)],
5201 )
5202 .with_context(|| format!("backfilling _schema_migrations version {version}"))?;
5203 }
5204
5205 info!(
5206 current_version,
5207 "schema version transition complete: backfilled entries for versions 1..={current_version}"
5208 );
5209
5210 Ok(())
5211}
5212
5213const REQUIRED_CURRENT_SCHEMA_TABLE_PROBES: &[(&str, &str)] = &[
5214 ("sources", "SELECT id FROM sources LIMIT 1;"),
5215 ("daily_stats", "SELECT day_id FROM daily_stats LIMIT 1;"),
5216 (
5217 "conversation_external_lookup",
5218 "SELECT lookup_key FROM conversation_external_lookup LIMIT 1;",
5219 ),
5220 (
5221 "conversation_tail_state",
5222 "SELECT conversation_id FROM conversation_tail_state LIMIT 1;",
5223 ),
5224 (
5225 "conversation_external_tail_lookup",
5226 "SELECT lookup_key, last_message_idx FROM conversation_external_tail_lookup LIMIT 1;",
5227 ),
5228 ("embedding_jobs", "SELECT id FROM embedding_jobs LIMIT 1;"),
5229 ("token_usage", "SELECT id FROM token_usage LIMIT 1;"),
5230 (
5231 "token_daily_stats",
5232 "SELECT day_id FROM token_daily_stats LIMIT 1;",
5233 ),
5234 (
5235 "model_pricing",
5236 "SELECT model_pattern FROM model_pricing LIMIT 1;",
5237 ),
5238 (
5239 "message_metrics",
5240 "SELECT message_id FROM message_metrics LIMIT 1;",
5241 ),
5242 ("usage_hourly", "SELECT hour_id FROM usage_hourly LIMIT 1;"),
5243 ("usage_daily", "SELECT day_id FROM usage_daily LIMIT 1;"),
5244 (
5245 "usage_models_daily",
5246 "SELECT day_id FROM usage_models_daily LIMIT 1;",
5247 ),
5248];
5249
5250const REQUIRED_CONVERSATION_TOKEN_COLUMNS: &[(&str, &str)] = &[
5251 ("total_input_tokens", "INTEGER"),
5252 ("total_output_tokens", "INTEGER"),
5253 ("total_cache_read_tokens", "INTEGER"),
5254 ("total_cache_creation_tokens", "INTEGER"),
5255 ("grand_total_tokens", "INTEGER"),
5256 ("estimated_cost_usd", "REAL"),
5257 ("primary_model", "TEXT"),
5258 ("api_call_count", "INTEGER"),
5259 ("tool_call_count", "INTEGER"),
5260 ("user_message_count", "INTEGER"),
5261 ("assistant_message_count", "INTEGER"),
5262];
5263
5264fn error_indicates_missing_table(err: &impl std::fmt::Display) -> bool {
5265 err.to_string()
5266 .to_ascii_lowercase()
5267 .contains("no such table")
5268}
5269
5270fn error_indicates_missing_column(err: &impl std::fmt::Display) -> bool {
5271 err.to_string()
5272 .to_ascii_lowercase()
5273 .contains("no such column")
5274}
5275
5276const ORPHAN_FK_ID_CHUNK_SIZE: usize = 256;
5277
5278fn collect_orphan_message_ids(conn: &FrankenConnection) -> Result<Vec<i64>> {
5279 let min_conversation_id = conn
5280 .query_map_collect(
5281 "SELECT conversation_id
5282 FROM messages
5283 ORDER BY conversation_id ASC
5284 LIMIT 1",
5285 fparams![],
5286 |row| row.get_typed(0),
5287 )
5288 .context("finding minimum message conversation id for orphan FK cleanup")?
5289 .into_iter()
5290 .next();
5291 let Some(min_conversation_id) = min_conversation_id else {
5292 return Ok(Vec::new());
5293 };
5294 let max_conversation_id: i64 = conn
5295 .query_row_map(
5296 "SELECT conversation_id
5297 FROM messages
5298 ORDER BY conversation_id DESC
5299 LIMIT 1",
5300 fparams![],
5301 |row| row.get_typed(0),
5302 )
5303 .context("finding maximum message conversation id for orphan FK cleanup")?;
5304
5305 let parent_conversation_ids: Vec<i64> = conn
5306 .query_map_collect(
5307 "SELECT id
5308 FROM conversations
5309 WHERE id BETWEEN ?1 AND ?2
5310 ORDER BY id",
5311 fparams![min_conversation_id, max_conversation_id],
5312 |row| row.get_typed(0),
5313 )
5314 .context("listing parent conversation ids for orphan FK cleanup")?;
5315
5316 let mut message_ids = Vec::new();
5317 let mut gap_start = min_conversation_id;
5318 for parent_id in parent_conversation_ids {
5319 if parent_id < gap_start {
5320 continue;
5321 }
5322 if parent_id > max_conversation_id {
5323 break;
5324 }
5325 if gap_start < parent_id {
5326 collect_message_ids_for_conversation_gap(
5327 conn,
5328 gap_start,
5329 parent_id.saturating_sub(1),
5330 &mut message_ids,
5331 )?;
5332 }
5333 if parent_id == i64::MAX {
5334 return Ok(message_ids);
5335 }
5336 gap_start = parent_id + 1;
5337 }
5338 if gap_start <= max_conversation_id {
5339 collect_message_ids_for_conversation_gap(
5340 conn,
5341 gap_start,
5342 max_conversation_id,
5343 &mut message_ids,
5344 )?;
5345 }
5346
5347 Ok(message_ids)
5348}
5349
5350fn collect_message_ids_for_conversation_gap(
5351 conn: &FrankenConnection,
5352 gap_start: i64,
5353 gap_end: i64,
5354 message_ids: &mut Vec<i64>,
5355) -> Result<()> {
5356 let (sql, params) = if gap_start == gap_end {
5357 (
5358 "SELECT id FROM messages WHERE conversation_id = ?1",
5359 vec![SqliteValue::from(gap_start)],
5360 )
5361 } else {
5362 (
5363 "SELECT id FROM messages WHERE conversation_id BETWEEN ?1 AND ?2",
5364 vec![SqliteValue::from(gap_start), SqliteValue::from(gap_end)],
5365 )
5366 };
5367 let rows = conn.query_with_params(sql, ¶ms).with_context(|| {
5368 format!("listing orphan message ids for conversation-id gap {gap_start}..={gap_end}")
5369 })?;
5370 message_ids.reserve(rows.len());
5371 for row in rows {
5372 message_ids.push(row.get_typed(0)?);
5373 }
5374 Ok(())
5375}
5376
5377fn delete_rows_by_i64_chunks(
5378 tx: &FrankenTransaction<'_>,
5379 delete_sql: &'static str,
5380 ids: &[i64],
5381) -> Result<usize> {
5382 let mut deleted = 0;
5383 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5384 for id in chunk {
5385 deleted += tx.execute_with_params(delete_sql, &[SqliteValue::from(*id)])?;
5386 }
5387 }
5388 Ok(deleted)
5389}
5390
5391fn delete_orphan_message_ids_bisecting_oom(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5392 let mut deleted = 0usize;
5393 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5394 deleted = deleted.saturating_add(delete_orphan_message_id_chunk(conn, chunk)?);
5395 }
5396 Ok(deleted)
5397}
5398
5399fn delete_orphan_message_id_chunk(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5400 if ids.is_empty() {
5401 return Ok(0);
5402 }
5403
5404 match delete_orphan_message_id_chunk_once(conn, ids) {
5405 Ok(deleted) => Ok(deleted),
5406 Err(err) if is_out_of_memory_error(&err) && ids.len() > 1 => {
5407 let split_at = ids.len() / 2;
5408 tracing::warn!(
5409 target: "cass::fk_repair",
5410 rows = ids.len(),
5411 left = split_at,
5412 right = ids.len().saturating_sub(split_at),
5413 error = %err,
5414 "orphan-message cleanup ran out of memory; retrying as smaller batches"
5415 );
5416 let left = delete_orphan_message_id_chunk(conn, &ids[..split_at])?;
5417 let right = delete_orphan_message_id_chunk(conn, &ids[split_at..])?;
5418 Ok(left.saturating_add(right))
5419 }
5420 Err(err) => Err(err),
5421 }
5422}
5423
5424fn delete_orphan_message_id_chunk_once(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5425 let mut tx = conn.transaction()?;
5426 let mut deleted = 0usize;
5427 for entry in ORPHAN_MESSAGE_DEPENDENT_TABLES {
5428 match delete_rows_by_i64_chunks(&tx, entry.delete_sql, ids) {
5429 Ok(count) => {
5430 deleted = deleted.saturating_add(count);
5431 }
5432 Err(err) if error_indicates_missing_table(&err) => {
5433 tracing::debug!(
5434 target: "cass::fk_repair",
5435 child_table = entry.child_table,
5436 error = %err,
5437 "skipping orphan-message dependent cleanup (table unavailable)"
5438 );
5439 }
5440 Err(err) => {
5441 return Err(err).with_context(|| {
5442 format!(
5443 "deleting rows from {} that depend on orphan messages",
5444 entry.child_table
5445 )
5446 });
5447 }
5448 }
5449 }
5450 deleted = deleted.saturating_add(
5451 delete_rows_by_i64_chunks(&tx, "DELETE FROM messages WHERE id = ?1", ids)
5452 .context("deleting orphan rows from messages")?,
5453 );
5454 tx.commit()?;
5455 Ok(deleted)
5456}
5457
5458fn collect_direct_orphan_id_page(
5459 conn: &FrankenConnection,
5460 entry: &'static OrphanFkTable,
5461) -> Result<Vec<i64>> {
5462 Ok(conn.query_map_collect(
5463 entry.orphan_id_page_sql,
5464 fparams![i64::try_from(ORPHAN_FK_ID_CHUNK_SIZE).unwrap_or(i64::MAX)],
5465 |row| row.get_typed(0),
5466 )?)
5467}
5468
5469fn delete_direct_orphan_ids_bisecting_oom(
5470 conn: &FrankenConnection,
5471 entry: &'static OrphanFkTable,
5472 ids: &[i64],
5473) -> Result<usize> {
5474 let mut deleted = 0usize;
5475 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5476 deleted = deleted.saturating_add(delete_direct_orphan_id_chunk(conn, entry, chunk)?);
5477 }
5478 Ok(deleted)
5479}
5480
5481fn delete_direct_orphan_id_chunk(
5482 conn: &FrankenConnection,
5483 entry: &'static OrphanFkTable,
5484 ids: &[i64],
5485) -> Result<usize> {
5486 if ids.is_empty() {
5487 return Ok(0);
5488 }
5489
5490 match delete_direct_orphan_id_chunk_once(conn, entry, ids) {
5491 Ok(deleted) => Ok(deleted),
5492 Err(err) if is_out_of_memory_error(&err) && ids.len() > 1 => {
5493 let split_at = ids.len() / 2;
5494 tracing::warn!(
5495 target: "cass::fk_repair",
5496 child_table = entry.child_table,
5497 rows = ids.len(),
5498 left = split_at,
5499 right = ids.len().saturating_sub(split_at),
5500 error = %err,
5501 "direct orphan cleanup ran out of memory; retrying as smaller batches"
5502 );
5503 let left = delete_direct_orphan_id_chunk(conn, entry, &ids[..split_at])?;
5504 let right = delete_direct_orphan_id_chunk(conn, entry, &ids[split_at..])?;
5505 Ok(left.saturating_add(right))
5506 }
5507 Err(err) => Err(err),
5508 }
5509}
5510
5511fn delete_direct_orphan_id_chunk_once(
5512 conn: &FrankenConnection,
5513 entry: &'static OrphanFkTable,
5514 ids: &[i64],
5515) -> Result<usize> {
5516 let mut tx = conn.transaction()?;
5517 let deleted = delete_rows_by_i64_chunk_bulk(&tx, entry.delete_many_sql_prefix, ids)?;
5518 tx.commit()?;
5519 Ok(deleted)
5520}
5521
5522fn delete_rows_by_i64_chunk_bulk(
5523 tx: &FrankenTransaction<'_>,
5524 delete_many_sql_prefix: &'static str,
5525 ids: &[i64],
5526) -> Result<usize> {
5527 if ids.is_empty() {
5528 return Ok(0);
5529 }
5530
5531 let placeholders = (1..=ids.len())
5532 .map(|idx| format!("?{idx}"))
5533 .collect::<Vec<_>>()
5534 .join(", ");
5535 let sql = format!("{delete_many_sql_prefix} ({placeholders})");
5536 let params = ids
5537 .iter()
5538 .map(|id| SqliteValue::from(*id))
5539 .collect::<Vec<_>>();
5540 Ok(tx.execute_with_params(&sql, ¶ms)?)
5541}
5542
5543struct OrphanFkTable {
5549 child_table: &'static str,
5550 orphan_id_page_sql: &'static str,
5551 delete_many_sql_prefix: &'static str,
5552}
5553
5554const ORPHAN_DIRECT_CHILD_TABLES: &[OrphanFkTable] = &[
5555 OrphanFkTable {
5556 child_table: "message_metrics",
5557 orphan_id_page_sql: "SELECT message_id FROM message_metrics \
5558 WHERE message_id NOT IN (SELECT id FROM messages) \
5559 ORDER BY message_id \
5560 LIMIT ?1",
5561 delete_many_sql_prefix: "DELETE FROM message_metrics WHERE message_id IN",
5562 },
5563 OrphanFkTable {
5564 child_table: "token_usage",
5565 orphan_id_page_sql: "SELECT message_id FROM token_usage \
5566 WHERE message_id NOT IN (SELECT id FROM messages) \
5567 ORDER BY message_id \
5568 LIMIT ?1",
5569 delete_many_sql_prefix: "DELETE FROM token_usage WHERE message_id IN",
5570 },
5571 OrphanFkTable {
5572 child_table: "snippets",
5573 orphan_id_page_sql: "SELECT message_id FROM snippets \
5574 WHERE message_id NOT IN (SELECT id FROM messages) \
5575 ORDER BY message_id \
5576 LIMIT ?1",
5577 delete_many_sql_prefix: "DELETE FROM snippets WHERE message_id IN",
5578 },
5579 OrphanFkTable {
5580 child_table: "conversation_tags",
5581 orphan_id_page_sql: "SELECT conversation_id FROM conversation_tags \
5582 WHERE conversation_id NOT IN (SELECT id FROM conversations) \
5583 ORDER BY conversation_id \
5584 LIMIT ?1",
5585 delete_many_sql_prefix: "DELETE FROM conversation_tags WHERE conversation_id IN",
5586 },
5587];
5588
5589struct OrphanMessageDependentTable {
5590 child_table: &'static str,
5591 delete_sql: &'static str,
5592}
5593
5594const ORPHAN_MESSAGE_DEPENDENT_TABLES: &[OrphanMessageDependentTable] = &[
5595 OrphanMessageDependentTable {
5596 child_table: "message_metrics",
5597 delete_sql: "DELETE FROM message_metrics WHERE message_id = ?1",
5598 },
5599 OrphanMessageDependentTable {
5600 child_table: "token_usage",
5601 delete_sql: "DELETE FROM token_usage WHERE message_id = ?1",
5602 },
5603 OrphanMessageDependentTable {
5604 child_table: "snippets",
5605 delete_sql: "DELETE FROM snippets WHERE message_id = ?1",
5606 },
5607];
5608
5609#[derive(Debug, Default, Clone)]
5620pub(crate) struct OrphanFkCleanupReport {
5621 pub total: i64,
5622 pub per_table: Vec<(&'static str, i64)>,
5623}
5624
5625impl OrphanFkCleanupReport {
5626 fn record(&mut self, child_table: &'static str, count: i64) {
5627 if let Some((_, existing)) = self
5628 .per_table
5629 .iter_mut()
5630 .find(|(table, _)| *table == child_table)
5631 {
5632 *existing = existing.saturating_add(count);
5633 } else {
5634 self.per_table.push((child_table, count));
5635 }
5636 self.total = self.total.saturating_add(count);
5637 }
5638}
5639
5640pub struct InsertOutcome {
5641 pub conversation_id: i64,
5642 pub conversation_inserted: bool,
5643 pub inserted_indices: Vec<i64>,
5644}
5645
5646#[cfg(test)]
5647#[derive(Debug, Clone, Default)]
5648struct MessageInsertSubstageProfile {
5649 single_row_calls: usize,
5650 batch_calls: usize,
5651 batch_rows: usize,
5652 payload_duration: Duration,
5653 sql_build_duration: Duration,
5654 param_build_duration: Duration,
5655 execute_duration: Duration,
5656 rowid_duration: Duration,
5657}
5658
5659#[cfg(test)]
5660#[derive(Debug, Clone, Default)]
5661struct InsertConversationTreePerfProfile {
5662 invocations: usize,
5663 messages: usize,
5664 inserted_messages: usize,
5665 total_duration: Duration,
5666 source_duration: Duration,
5667 tx_open_duration: Duration,
5668 existing_lookup_duration: Duration,
5669 existing_idx_lookup_duration: Duration,
5670 existing_replay_lookup_duration: Duration,
5671 dedupe_filter_duration: Duration,
5672 conversation_row_duration: Duration,
5673 message_insert_duration: Duration,
5674 message_insert_breakdown: MessageInsertSubstageProfile,
5675 snippet_insert_duration: Duration,
5676 fts_entry_duration: Duration,
5677 fts_flush_duration: Duration,
5678 analytics_duration: Duration,
5679 commit_duration: Duration,
5680}
5681
5682#[cfg(test)]
5683impl InsertConversationTreePerfProfile {
5684 fn millis(duration: Duration) -> f64 {
5685 duration.as_secs_f64() * 1000.0
5686 }
5687
5688 fn log_summary(&self, label: &str) {
5689 let calls = self.invocations.max(1) as f64;
5690 let accounted_duration = self.source_duration
5691 + self.tx_open_duration
5692 + self.existing_lookup_duration
5693 + self.existing_idx_lookup_duration
5694 + self.existing_replay_lookup_duration
5695 + self.dedupe_filter_duration
5696 + self.conversation_row_duration
5697 + self.message_insert_duration
5698 + self.snippet_insert_duration
5699 + self.fts_entry_duration
5700 + self.fts_flush_duration
5701 + self.analytics_duration
5702 + self.commit_duration;
5703 let residual_duration = self.total_duration.saturating_sub(accounted_duration);
5704 eprintln!(
5705 concat!(
5706 "CASS_INSERT_TREE_STAGE_PROFILE ",
5707 "label={} calls={} messages={} inserted_messages={} ",
5708 "total_ms={:.3} source_ms={:.3} tx_open_ms={:.3} existing_lookup_ms={:.3} ",
5709 "existing_idx_lookup_ms={:.3} existing_replay_lookup_ms={:.3} dedupe_filter_ms={:.3} ",
5710 "conversation_row_ms={:.3} message_insert_ms={:.3} snippet_insert_ms={:.3} ",
5711 "fts_entry_ms={:.3} fts_flush_ms={:.3} analytics_ms={:.3} commit_ms={:.3} ",
5712 "msg_payload_ms={:.3} msg_sql_ms={:.3} msg_param_ms={:.3} msg_execute_ms={:.3} msg_rowid_ms={:.3} ",
5713 "residual_ms={:.3} avg_total_ms={:.3} avg_message_insert_ms={:.3} ",
5714 "avg_msg_execute_ms={:.3} avg_msg_payload_ms={:.3} avg_snippet_insert_ms={:.3} avg_fts_entry_ms={:.3} avg_commit_ms={:.3}"
5715 ),
5716 label,
5717 self.invocations,
5718 self.messages,
5719 self.inserted_messages,
5720 Self::millis(self.total_duration),
5721 Self::millis(self.source_duration),
5722 Self::millis(self.tx_open_duration),
5723 Self::millis(self.existing_lookup_duration),
5724 Self::millis(self.existing_idx_lookup_duration),
5725 Self::millis(self.existing_replay_lookup_duration),
5726 Self::millis(self.dedupe_filter_duration),
5727 Self::millis(self.conversation_row_duration),
5728 Self::millis(self.message_insert_duration),
5729 Self::millis(self.snippet_insert_duration),
5730 Self::millis(self.fts_entry_duration),
5731 Self::millis(self.fts_flush_duration),
5732 Self::millis(self.analytics_duration),
5733 Self::millis(self.commit_duration),
5734 Self::millis(self.message_insert_breakdown.payload_duration),
5735 Self::millis(self.message_insert_breakdown.sql_build_duration),
5736 Self::millis(self.message_insert_breakdown.param_build_duration),
5737 Self::millis(self.message_insert_breakdown.execute_duration),
5738 Self::millis(self.message_insert_breakdown.rowid_duration),
5739 Self::millis(residual_duration),
5740 Self::millis(self.total_duration) / calls,
5741 Self::millis(self.message_insert_duration) / calls,
5742 Self::millis(self.message_insert_breakdown.execute_duration) / calls,
5743 Self::millis(self.message_insert_breakdown.payload_duration) / calls,
5744 Self::millis(self.snippet_insert_duration) / calls,
5745 Self::millis(self.fts_entry_duration) / calls,
5746 Self::millis(self.commit_duration) / calls,
5747 );
5748 }
5749}
5750
5751#[derive(Debug, Clone, PartialEq, Eq, Hash)]
5752enum PendingConversationKey {
5753 External {
5754 source_id: String,
5755 agent_id: i64,
5756 external_id: String,
5757 },
5758 SourcePath {
5759 source_id: String,
5760 agent_id: i64,
5761 source_path: String,
5762 started_at: Option<i64>,
5763 },
5764}
5765
5766fn conversation_external_lookup_key(source_id: &str, agent_id: i64, external_id: &str) -> String {
5767 format!(
5768 "{}:{source_id}:{agent_id}:{}:{external_id}",
5769 source_id.chars().count(),
5770 external_id.chars().count()
5771 )
5772}
5773
5774fn conversation_external_lookup_key_for_conv(agent_id: i64, conv: &Conversation) -> Option<String> {
5775 conv.external_id
5776 .as_deref()
5777 .map(|external_id| conversation_external_lookup_key(&conv.source_id, agent_id, external_id))
5778}
5779
5780#[derive(Debug, Clone, PartialEq, Eq, Hash)]
5781struct MessageMergeFingerprint {
5782 idx: i64,
5783 created_at: Option<i64>,
5784 role: MessageRole,
5785 author: Option<String>,
5786 content_hash: [u8; 32],
5787}
5788
5789#[derive(Debug, Clone, PartialEq, Eq, Hash)]
5790struct MessageReplayFingerprint {
5791 created_at: Option<i64>,
5792 role: MessageRole,
5793 author: Option<String>,
5794 content_hash: [u8; 32],
5795}
5796
5797#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5798struct ConversationMergeEvidence {
5799 exact_overlap: usize,
5800 replay_overlap: usize,
5801 smaller_replay_set: usize,
5802 started_close: bool,
5803 start_distance_ms: i64,
5804}
5805
5806struct ExistingConversationNewMessages<'a> {
5807 messages: Vec<&'a Message>,
5808 new_chars: i64,
5809 idx_collision_count: usize,
5810 first_collision_idx: Option<i64>,
5811}
5812
5813#[derive(Debug, Clone, Copy)]
5814struct ExistingConversationTailState {
5815 last_message_idx: i64,
5816 last_message_created_at: i64,
5817 ended_at: Option<i64>,
5818}
5819
5820#[derive(Debug, Clone, Copy)]
5821struct ExistingConversationWithTail {
5822 id: i64,
5823 tail_state: Option<ExistingConversationTailState>,
5824}
5825
5826fn conversation_effective_started_at(conv: &Conversation) -> Option<i64> {
5827 conv.started_at
5828 .or_else(|| conv.messages.iter().filter_map(|msg| msg.created_at).min())
5829}
5830
5831fn conversation_tail_state(conv: &Conversation) -> (Option<i64>, Option<i64>) {
5832 (
5833 conv.messages.iter().map(|msg| msg.idx).max(),
5834 conv.messages.iter().filter_map(|msg| msg.created_at).max(),
5835 )
5836}
5837
5838fn borrowed_messages_tail_state(messages: &[&Message]) -> (Option<i64>, Option<i64>) {
5839 (
5840 messages.iter().map(|msg| msg.idx).max(),
5841 messages.iter().filter_map(|msg| msg.created_at).max(),
5842 )
5843}
5844
5845fn role_from_str(role: &str) -> MessageRole {
5846 match role {
5847 "user" => MessageRole::User,
5848 "agent" | "assistant" => MessageRole::Agent,
5849 "tool" => MessageRole::Tool,
5850 "system" => MessageRole::System,
5851 other => MessageRole::Other(other.to_string()),
5852 }
5853}
5854
5855fn message_merge_fingerprint(msg: &Message) -> MessageMergeFingerprint {
5856 MessageMergeFingerprint {
5857 idx: msg.idx,
5858 created_at: msg.created_at,
5859 role: msg.role.clone(),
5860 author: msg.author.clone(),
5861 content_hash: *blake3::hash(msg.content.as_bytes()).as_bytes(),
5862 }
5863}
5864
5865fn message_replay_fingerprint(msg: &Message) -> MessageReplayFingerprint {
5866 MessageReplayFingerprint {
5867 created_at: msg.created_at,
5868 role: msg.role.clone(),
5869 author: msg.author.clone(),
5870 content_hash: *blake3::hash(msg.content.as_bytes()).as_bytes(),
5871 }
5872}
5873
5874fn conversation_message_fingerprints(conv: &Conversation) -> HashSet<MessageMergeFingerprint> {
5875 conv.messages
5876 .iter()
5877 .map(message_merge_fingerprint)
5878 .collect()
5879}
5880
5881fn conversation_message_replay_fingerprints(
5882 conv: &Conversation,
5883) -> HashSet<MessageReplayFingerprint> {
5884 conv.messages
5885 .iter()
5886 .map(message_replay_fingerprint)
5887 .collect()
5888}
5889
5890fn replay_fingerprint_from_merge(
5891 fingerprint: &MessageMergeFingerprint,
5892) -> MessageReplayFingerprint {
5893 MessageReplayFingerprint {
5894 created_at: fingerprint.created_at,
5895 role: fingerprint.role.clone(),
5896 author: fingerprint.author.clone(),
5897 content_hash: fingerprint.content_hash,
5898 }
5899}
5900
5901fn replay_fingerprints_from_merge_set(
5902 fingerprints: &HashSet<MessageMergeFingerprint>,
5903) -> HashSet<MessageReplayFingerprint> {
5904 fingerprints
5905 .iter()
5906 .map(replay_fingerprint_from_merge)
5907 .collect()
5908}
5909
5910fn collect_new_messages_for_existing_conversation<'a>(
5911 conversation_id: i64,
5912 conv: &'a Conversation,
5913 existing_messages: &mut HashMap<i64, MessageMergeFingerprint>,
5914 existing_replay_fingerprints: &mut HashSet<MessageReplayFingerprint>,
5915 replay_skip_log: &'static str,
5916) -> ExistingConversationNewMessages<'a> {
5917 let mut idx_collision_count = 0usize;
5918 let mut first_collision_idx: Option<i64> = None;
5919 let mut new_chars: i64 = 0;
5920 let mut messages = Vec::new();
5921
5922 for msg in &conv.messages {
5923 let incoming_fingerprint = message_merge_fingerprint(msg);
5924 if let Some(existing_fingerprint) = existing_messages.get(&msg.idx) {
5925 if existing_fingerprint != &incoming_fingerprint {
5926 idx_collision_count = idx_collision_count.saturating_add(1);
5927 first_collision_idx.get_or_insert(msg.idx);
5928 }
5929 continue;
5930 }
5931
5932 let incoming_replay = replay_fingerprint_from_merge(&incoming_fingerprint);
5933 if existing_replay_fingerprints.contains(&incoming_replay) {
5934 tracing::debug!(
5935 conversation_id,
5936 idx = msg.idx,
5937 source_path = %conv.source_path.display(),
5938 "{replay_skip_log}"
5939 );
5940 continue;
5941 }
5942
5943 existing_messages.insert(msg.idx, incoming_fingerprint);
5944 existing_replay_fingerprints.insert(incoming_replay);
5945 new_chars += msg.content.len() as i64;
5946 messages.push(msg);
5947 }
5948
5949 ExistingConversationNewMessages {
5950 messages,
5951 new_chars,
5952 idx_collision_count,
5953 first_collision_idx,
5954 }
5955}
5956
5957fn franken_existing_conversation_append_tail_state(
5958 tx: &FrankenTransaction<'_>,
5959 conversation_id: i64,
5960) -> Result<Option<ExistingConversationTailState>> {
5961 let cached: Option<(Option<i64>, Option<i64>, Option<i64>)> = tx
5962 .query_row_map(
5963 "SELECT last_message_idx, last_message_created_at, ended_at
5964 FROM conversation_tail_state
5965 WHERE conversation_id = ?1",
5966 fparams![conversation_id],
5967 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
5968 )
5969 .optional()?;
5970 if let Some(cached) = cached {
5971 let (_, _, cached_ended_at) = cached;
5972 if let Some(tail_state) =
5973 existing_conversation_tail_state_from_cached(cached.0, cached.1, cached_ended_at)
5974 {
5975 return Ok(Some(tail_state));
5976 }
5977 }
5978
5979 let legacy_cached: (Option<i64>, Option<i64>, Option<i64>) = tx.query_row_map(
5980 "SELECT last_message_idx, last_message_created_at, ended_at
5981 FROM conversations
5982 WHERE id = ?1",
5983 fparams![conversation_id],
5984 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
5985 )?;
5986 let (_, _, cached_ended_at) = legacy_cached;
5987 if let Some(tail_state) = existing_conversation_tail_state_from_cached(
5988 legacy_cached.0,
5989 legacy_cached.1,
5990 cached_ended_at,
5991 ) {
5992 franken_insert_conversation_tail_state(
5993 tx,
5994 conversation_id,
5995 cached_ended_at,
5996 Some(tail_state.last_message_idx),
5997 Some(tail_state.last_message_created_at),
5998 )?;
5999 return Ok(Some(tail_state));
6000 }
6001
6002 let (max_idx, max_created_at): (Option<i64>, Option<i64>) = tx.query_row_map(
6003 "SELECT MAX(idx), MAX(created_at)
6004 FROM messages
6005 WHERE conversation_id = ?1",
6006 fparams![conversation_id],
6007 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
6008 )?;
6009 if let Some((last_message_idx, last_message_created_at)) = max_idx.zip(max_created_at) {
6010 franken_update_conversation_tail_state(
6011 tx,
6012 conversation_id,
6013 None,
6014 Some(last_message_idx),
6015 Some(last_message_created_at),
6016 )?;
6017 return Ok(Some(ExistingConversationTailState {
6018 last_message_idx,
6019 last_message_created_at,
6020 ended_at: cached_ended_at,
6021 }));
6022 }
6023 Ok(None)
6024}
6025
6026fn existing_conversation_tail_state_from_cached(
6027 last_message_idx: Option<i64>,
6028 last_message_created_at: Option<i64>,
6029 ended_at: Option<i64>,
6030) -> Option<ExistingConversationTailState> {
6031 let (last_message_idx, last_message_created_at) =
6032 last_message_idx.zip(last_message_created_at)?;
6033 Some(ExistingConversationTailState {
6034 last_message_idx,
6035 last_message_created_at,
6036 ended_at,
6037 })
6038}
6039
6040fn franken_find_existing_conversation_with_tail_by_key(
6041 tx: &FrankenTransaction<'_>,
6042 key: &PendingConversationKey,
6043 conv: Option<&Conversation>,
6044) -> Result<Option<ExistingConversationWithTail>> {
6045 if let PendingConversationKey::External {
6046 source_id,
6047 agent_id,
6048 external_id,
6049 } = key
6050 {
6051 let lookup_key = conversation_external_lookup_key(source_id, *agent_id, external_id);
6052 if let Some(existing) = franken_find_external_conversation_tail_lookup(tx, &lookup_key)? {
6053 return Ok(Some(existing));
6054 }
6055 return Ok(None);
6056 }
6057
6058 let Some(id) = franken_find_existing_conversation_by_key(tx, key, conv)? else {
6059 return Ok(None);
6060 };
6061 let tail_state = franken_existing_conversation_append_tail_state(tx, id)?;
6062 Ok(Some(ExistingConversationWithTail { id, tail_state }))
6063}
6064
6065fn franken_insert_conversation_tail_state(
6066 tx: &FrankenTransaction<'_>,
6067 conversation_id: i64,
6068 ended_at: Option<i64>,
6069 last_message_idx: Option<i64>,
6070 last_message_created_at: Option<i64>,
6071) -> Result<()> {
6072 if ended_at.is_none() && last_message_idx.is_none() && last_message_created_at.is_none() {
6073 return Ok(());
6074 }
6075 tx.execute_compat(
6076 "INSERT OR REPLACE INTO conversation_tail_state (
6077 conversation_id, ended_at, last_message_idx, last_message_created_at
6078 ) VALUES (?1, ?2, ?3, ?4)",
6079 fparams![
6080 conversation_id,
6081 ended_at,
6082 last_message_idx,
6083 last_message_created_at
6084 ],
6085 )?;
6086 Ok(())
6087}
6088
6089fn franken_update_conversation_tail_columns(
6090 tx: &FrankenTransaction<'_>,
6091 conversation_id: i64,
6092 ended_at_candidate: Option<i64>,
6093 last_message_idx_candidate: Option<i64>,
6094 last_message_created_at_candidate: Option<i64>,
6095) -> Result<()> {
6096 if ended_at_candidate.is_none()
6097 && last_message_idx_candidate.is_none()
6098 && last_message_created_at_candidate.is_none()
6099 {
6100 return Ok(());
6101 }
6102
6103 tx.execute_compat(
6104 "UPDATE conversations
6105 SET ended_at = CASE
6106 WHEN ?1 IS NULL THEN ended_at
6107 WHEN ended_at IS NULL OR ended_at < ?1 THEN ?1
6108 ELSE ended_at
6109 END,
6110 last_message_idx = CASE
6111 WHEN ?2 IS NULL THEN last_message_idx
6112 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
6113 ELSE last_message_idx
6114 END,
6115 last_message_created_at = CASE
6116 WHEN ?3 IS NULL THEN last_message_created_at
6117 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
6118 ELSE last_message_created_at
6119 END
6120 WHERE id = ?4",
6121 fparams![
6122 ended_at_candidate,
6123 last_message_idx_candidate,
6124 last_message_created_at_candidate,
6125 conversation_id
6126 ],
6127 )?;
6128 Ok(())
6129}
6130
6131fn franken_tail_state_insert_ended_at(
6132 tx: &FrankenTransaction<'_>,
6133 conversation_id: i64,
6134 candidate: Option<i64>,
6135) -> Result<Option<i64>> {
6136 let canonical: Option<i64> = tx
6137 .query_row_map(
6138 "SELECT ended_at FROM conversations WHERE id = ?1",
6139 fparams![conversation_id],
6140 |row| row.get_typed(0),
6141 )
6142 .optional()?
6143 .flatten();
6144 Ok(canonical.max(candidate))
6145}
6146
6147fn franken_update_conversation_tail_state(
6148 tx: &FrankenTransaction<'_>,
6149 conversation_id: i64,
6150 ended_at_candidate: Option<i64>,
6151 last_message_idx_candidate: Option<i64>,
6152 last_message_created_at_candidate: Option<i64>,
6153) -> Result<()> {
6154 if ended_at_candidate.is_none()
6155 && last_message_idx_candidate.is_none()
6156 && last_message_created_at_candidate.is_none()
6157 {
6158 return Ok(());
6159 }
6160
6161 let changed = tx.execute_compat(
6162 "UPDATE conversation_tail_state
6163 SET ended_at = CASE
6164 WHEN ?1 IS NULL THEN ended_at
6165 ELSE MAX(IFNULL(ended_at, 0), ?1)
6166 END,
6167 last_message_idx = CASE
6168 WHEN ?2 IS NULL THEN last_message_idx
6169 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
6170 ELSE last_message_idx
6171 END,
6172 last_message_created_at = CASE
6173 WHEN ?3 IS NULL THEN last_message_created_at
6174 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
6175 ELSE last_message_created_at
6176 END
6177 WHERE conversation_id = ?4",
6178 fparams![
6179 ended_at_candidate,
6180 last_message_idx_candidate,
6181 last_message_created_at_candidate,
6182 conversation_id
6183 ],
6184 )?;
6185 if changed == 0 {
6186 let insert_ended_at =
6187 franken_tail_state_insert_ended_at(tx, conversation_id, ended_at_candidate)?;
6188 franken_insert_conversation_tail_state(
6189 tx,
6190 conversation_id,
6191 insert_ended_at,
6192 last_message_idx_candidate,
6193 last_message_created_at_candidate,
6194 )?;
6195 }
6196 franken_update_conversation_tail_columns(
6197 tx,
6198 conversation_id,
6199 ended_at_candidate,
6200 last_message_idx_candidate,
6201 last_message_created_at_candidate,
6202 )?;
6203 Ok(())
6204}
6205
6206fn franken_set_conversation_tail_state_after_append(
6207 tx: &FrankenTransaction<'_>,
6208 conversation_id: i64,
6209 ended_at: i64,
6210 last_message_idx: i64,
6211 last_message_created_at: i64,
6212) -> Result<()> {
6213 let changed = tx.execute_compat(
6214 "UPDATE conversation_tail_state
6215 SET ended_at = ?1,
6216 last_message_idx = ?2,
6217 last_message_created_at = ?3
6218 WHERE conversation_id = ?4",
6219 fparams![
6220 ended_at,
6221 last_message_idx,
6222 last_message_created_at,
6223 conversation_id
6224 ],
6225 )?;
6226 if changed == 0 {
6227 let insert_ended_at =
6228 franken_tail_state_insert_ended_at(tx, conversation_id, Some(ended_at))?;
6229 franken_insert_conversation_tail_state(
6230 tx,
6231 conversation_id,
6232 insert_ended_at,
6233 Some(last_message_idx),
6234 Some(last_message_created_at),
6235 )?;
6236 }
6237 franken_update_conversation_tail_columns(
6238 tx,
6239 conversation_id,
6240 Some(ended_at),
6241 Some(last_message_idx),
6242 Some(last_message_created_at),
6243 )?;
6244 Ok(())
6245}
6246
6247fn collect_append_only_tail_messages<'a>(
6248 conv: &'a Conversation,
6249 existing_max_idx: i64,
6250 existing_max_created_at: i64,
6251) -> Option<ExistingConversationNewMessages<'a>> {
6252 if conv.messages.is_empty() {
6253 return Some(ExistingConversationNewMessages {
6254 messages: Vec::new(),
6255 new_chars: 0,
6256 idx_collision_count: 0,
6257 first_collision_idx: None,
6258 });
6259 }
6260
6261 let mut split_idx = None;
6262 let mut prev_idx = None;
6263 for (pos, msg) in conv.messages.iter().enumerate() {
6264 if prev_idx.is_some_and(|prev| msg.idx < prev) {
6265 return None;
6266 }
6267 prev_idx = Some(msg.idx);
6268 if split_idx.is_none() && msg.idx > existing_max_idx {
6269 split_idx = Some(pos);
6270 }
6271 }
6272 let split_idx = split_idx?;
6273
6274 let mut seen_tail_idx = HashSet::new();
6275 let mut seen_tail_replay = HashSet::new();
6276 let mut new_chars = 0i64;
6277 let mut messages = Vec::new();
6278 for msg in &conv.messages[split_idx..] {
6279 let created_at = msg.created_at?;
6280 if created_at <= existing_max_created_at {
6281 return None;
6282 }
6283
6284 if !seen_tail_idx.insert(msg.idx) {
6285 return None;
6286 }
6287
6288 let replay_fingerprint = message_replay_fingerprint(msg);
6289 if !seen_tail_replay.insert(replay_fingerprint) {
6290 return None;
6291 }
6292
6293 new_chars += msg.content.len() as i64;
6294 messages.push(msg);
6295 }
6296
6297 Some(ExistingConversationNewMessages {
6298 messages,
6299 new_chars,
6300 idx_collision_count: 0,
6301 first_collision_idx: None,
6302 })
6303}
6304
6305fn start_distance_ms(left: Option<i64>, right: Option<i64>) -> i64 {
6306 match (left, right) {
6307 (Some(left), Some(right)) => (i128::from(left) - i128::from(right))
6308 .abs()
6309 .try_into()
6310 .unwrap_or(i64::MAX),
6311 _ => i64::MAX,
6312 }
6313}
6314
6315fn conversation_merge_evidence(
6316 incoming_exact: &HashSet<MessageMergeFingerprint>,
6317 incoming_replay: &HashSet<MessageReplayFingerprint>,
6318 existing_exact: &HashSet<MessageMergeFingerprint>,
6319 existing_replay: &HashSet<MessageReplayFingerprint>,
6320 incoming_started_at: Option<i64>,
6321 existing_started_at: Option<i64>,
6322) -> Option<ConversationMergeEvidence> {
6323 let exact_overlap = incoming_exact.intersection(existing_exact).count();
6324 let replay_overlap = incoming_replay.intersection(existing_replay).count();
6325 if exact_overlap == 0 && replay_overlap == 0 {
6326 return None;
6327 }
6328
6329 let smaller_replay_set = incoming_replay.len().min(existing_replay.len());
6330 let started_close = timestamps_within_tolerance(
6331 incoming_started_at,
6332 existing_started_at,
6333 SOURCE_PATH_MERGE_START_TOLERANCE_MS,
6334 );
6335 let full_replay_subset_match = smaller_replay_set >= 2 && replay_overlap == smaller_replay_set;
6336
6337 let merge_allowed = if started_close {
6338 exact_overlap >= 1 || replay_overlap >= 2
6339 } else {
6340 exact_overlap >= 2 || full_replay_subset_match
6341 };
6342
6343 merge_allowed.then_some(ConversationMergeEvidence {
6344 exact_overlap,
6345 replay_overlap,
6346 smaller_replay_set,
6347 started_close,
6348 start_distance_ms: start_distance_ms(incoming_started_at, existing_started_at),
6349 })
6350}
6351
6352fn timestamps_within_tolerance(left: Option<i64>, right: Option<i64>, tolerance_ms: i64) -> bool {
6353 match (left, right) {
6354 (Some(left), Some(right)) => {
6355 (i128::from(left) - i128::from(right)).abs() <= i128::from(tolerance_ms)
6356 }
6357 _ => false,
6358 }
6359}
6360
6361fn conversation_merge_key(agent_id: i64, conv: &Conversation) -> PendingConversationKey {
6362 if let Some(external_id) = conv.external_id.clone() {
6363 PendingConversationKey::External {
6364 source_id: conv.source_id.clone(),
6365 agent_id,
6366 external_id,
6367 }
6368 } else {
6369 PendingConversationKey::SourcePath {
6370 source_id: conv.source_id.clone(),
6371 agent_id,
6372 source_path: path_to_string(&conv.source_path),
6373 started_at: conversation_effective_started_at(conv),
6374 }
6375 }
6376}
6377
6378pub struct MessageForEmbedding {
6380 pub message_id: i64,
6381 pub created_at: Option<i64>,
6382 pub agent_id: i64,
6383 pub workspace_id: Option<i64>,
6384 pub source_id_hash: u32,
6385 pub role: String,
6386 pub content: String,
6387}
6388
6389impl FrankenStorage {
6394 pub fn ensure_agent(&self, agent: &Agent) -> Result<i64> {
6396 let cache_key = EnsuredAgentKey::from_agent(agent);
6397 if let Some(id) = self.cached_agent_id(&cache_key) {
6398 return Ok(id);
6399 }
6400
6401 let now = Self::now_millis();
6402 self.conn.execute_compat(
6403 "INSERT INTO agents(slug, name, version, kind, created_at, updated_at)
6404 VALUES(?1, ?2, ?3, ?4, ?5, ?6)
6405 ON CONFLICT(slug) DO UPDATE SET
6406 name = excluded.name,
6407 version = excluded.version,
6408 kind = excluded.kind,
6409 updated_at = excluded.updated_at
6410 WHERE NOT (
6411 agents.name IS excluded.name
6412 AND agents.version IS excluded.version
6413 AND agents.kind IS excluded.kind
6414 )",
6415 fparams![
6416 agent.slug.as_str(),
6417 agent.name.as_str(),
6418 agent.version.as_deref(),
6419 cache_key.kind.as_str(),
6420 now,
6421 now
6422 ],
6423 )?;
6424
6425 let id = self
6426 .conn
6427 .query_row_map(
6428 "SELECT id FROM agents WHERE slug = ?1 LIMIT 1",
6429 fparams![agent.slug.as_str()],
6430 |row| row.get_typed(0),
6431 )
6432 .with_context(|| format!("fetching agent id for {}", agent.slug))?;
6433 self.mark_agent_ensured(cache_key, id);
6434 Ok(id)
6435 }
6436
6437 pub fn ensure_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64> {
6439 let path_str = path.to_string_lossy().to_string();
6440 let cache_key = EnsuredWorkspaceKey::new(path_str.clone(), display_name);
6441 if let Some(id) = self.cached_workspace_id(&cache_key) {
6442 return Ok(id);
6443 }
6444
6445 if let Some(display_name) = display_name {
6446 self.conn.execute_compat(
6447 "INSERT INTO workspaces(path, display_name)
6448 VALUES(?1, ?2)
6449 ON CONFLICT(path) DO UPDATE SET
6450 display_name = excluded.display_name
6451 WHERE NOT (workspaces.display_name IS excluded.display_name)",
6452 fparams![path_str.as_str(), display_name],
6453 )?;
6454 } else {
6455 self.conn.execute_compat(
6456 "INSERT OR IGNORE INTO workspaces(path, display_name) VALUES(?1, NULL)",
6457 fparams![path_str.as_str()],
6458 )?;
6459 }
6460
6461 let id = self
6462 .conn
6463 .query_row_map(
6464 "SELECT id FROM workspaces WHERE path = ?1 LIMIT 1",
6465 fparams![path_str.as_str()],
6466 |row| row.get_typed(0),
6467 )
6468 .with_context(|| format!("fetching workspace id for {path_str}"))?;
6469 self.mark_workspace_ensured(cache_key, id);
6470 Ok(id)
6471 }
6472
6473 pub fn now_millis() -> i64 {
6475 SystemTime::now()
6476 .duration_since(UNIX_EPOCH)
6477 .map(|d| i64::try_from(d.as_millis()).unwrap_or(i64::MAX))
6478 .unwrap_or(0)
6479 }
6480
6481 pub fn day_id_from_millis(timestamp_ms: i64) -> i64 {
6483 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6484 let secs = timestamp_ms.div_euclid(1000);
6485 (secs - EPOCH_2020_SECS).div_euclid(86400)
6486 }
6487
6488 pub fn hour_id_from_millis(timestamp_ms: i64) -> i64 {
6490 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6491 let secs = timestamp_ms.div_euclid(1000);
6492 (secs - EPOCH_2020_SECS).div_euclid(3600)
6493 }
6494
6495 pub fn millis_from_day_id(day_id: i64) -> i64 {
6497 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6498 (EPOCH_2020_SECS + day_id * 86400) * 1000
6499 }
6500
6501 pub fn millis_from_hour_id(hour_id: i64) -> i64 {
6503 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6504 (EPOCH_2020_SECS + hour_id * 3600) * 1000
6505 }
6506
6507 pub fn get_last_scan_ts(&self) -> Result<Option<i64>> {
6509 let result: Result<String, _> = self.conn.query_row_map(
6510 "SELECT value FROM meta WHERE key = 'last_scan_ts'",
6511 fparams![],
6512 |row| row.get_typed(0),
6513 );
6514 match result.optional() {
6515 Ok(Some(s)) => Ok(s.parse().ok()),
6516 Ok(None) => Ok(None),
6517 Err(e) => Err(e.into()),
6518 }
6519 }
6520
6521 pub fn set_last_scan_ts(&self, ts: i64) -> Result<()> {
6523 self.conn.execute_compat(
6524 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_scan_ts', ?1)",
6525 fparams![ts.to_string()],
6526 )?;
6527 Ok(())
6528 }
6529
6530 pub fn get_last_indexed_at(&self) -> Result<Option<i64>> {
6532 let result: Result<String, _> = self.conn.query_row_map(
6533 "SELECT value FROM meta WHERE key = 'last_indexed_at'",
6534 fparams![],
6535 |row| row.get_typed(0),
6536 );
6537 match result.optional() {
6538 Ok(Some(s)) => Ok(s.parse().ok()),
6539 Ok(None) => Ok(None),
6540 Err(e) => Err(e.into()),
6541 }
6542 }
6543
6544 pub fn set_last_indexed_at(&self, ts: i64) -> Result<()> {
6546 self.conn.execute_compat(
6547 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_indexed_at', ?1)",
6548 fparams![ts.to_string()],
6549 )?;
6550 Ok(())
6551 }
6552
6553 pub fn list_agents(&self) -> Result<Vec<Agent>> {
6555 self.conn
6556 .query_map_collect(
6557 "SELECT id, slug, name, version, kind FROM agents ORDER BY slug",
6558 fparams![],
6559 |row| {
6560 let kind: String = row.get_typed(4)?;
6561 Ok(Agent {
6562 id: Some(row.get_typed(0)?),
6563 slug: row.get_typed(1)?,
6564 name: row.get_typed(2)?,
6565 version: row.get_typed(3)?,
6566 kind: match kind.as_str() {
6567 "cli" => AgentKind::Cli,
6568 "vscode" => AgentKind::VsCode,
6569 _ => AgentKind::Hybrid,
6570 },
6571 })
6572 },
6573 )
6574 .with_context(|| "listing agents")
6575 }
6576
6577 pub fn total_conversation_count(&self) -> Result<usize> {
6579 let count: i64 =
6580 self.conn
6581 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
6582 row.get_typed(0)
6583 })?;
6584 Ok(count.max(0) as usize)
6585 }
6586
6587 pub fn total_message_count(&self) -> Result<usize> {
6589 let count: i64 =
6590 self.conn
6591 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
6592 row.get_typed(0)
6593 })?;
6594 Ok(count.max(0) as usize)
6595 }
6596
6597 pub fn purge_agent_archive_data(&self, agent_slug: &str) -> Result<AgentArchivePurgeResult> {
6602 let normalized = agent_slug.trim().to_ascii_lowercase();
6603 if normalized.is_empty() {
6604 return Err(anyhow!("agent slug cannot be empty"));
6605 }
6606
6607 let Some(agent_id) = self
6608 .conn
6609 .query_row_map(
6610 "SELECT id FROM agents WHERE slug = ?1",
6611 fparams![normalized.as_str()],
6612 |row| row.get_typed::<i64>(0),
6613 )
6614 .optional()?
6615 else {
6616 return Ok(AgentArchivePurgeResult::default());
6617 };
6618
6619 let conversations_deleted: i64 = self.conn.query_row_map(
6620 "SELECT COUNT(*) FROM conversations WHERE agent_id = ?1",
6621 fparams![agent_id],
6622 |row| row.get_typed(0),
6623 )?;
6624 if conversations_deleted == 0 {
6625 return Ok(AgentArchivePurgeResult::default());
6626 }
6627
6628 let messages_deleted: i64 = self.conn.query_row_map(
6629 "SELECT COUNT(*)
6630 FROM messages
6631 WHERE conversation_id IN (
6632 SELECT id FROM conversations WHERE agent_id = ?1
6633 )",
6634 fparams![agent_id],
6635 |row| row.get_typed(0),
6636 )?;
6637
6638 let mut tx = self.conn.transaction()?;
6639 tx.execute_compat(
6640 "DELETE FROM conversation_external_lookup
6641 WHERE conversation_id IN (
6642 SELECT id FROM conversations WHERE agent_id = ?1
6643 )",
6644 fparams![agent_id],
6645 )?;
6646 tx.execute_compat(
6647 "DELETE FROM conversation_external_tail_lookup
6648 WHERE conversation_id IN (
6649 SELECT id FROM conversations WHERE agent_id = ?1
6650 )",
6651 fparams![agent_id],
6652 )?;
6653 tx.execute_compat(
6654 "DELETE FROM conversations WHERE agent_id = ?1",
6655 fparams![agent_id],
6656 )?;
6657 tx.execute_compat(
6658 "DELETE FROM agents
6659 WHERE id = ?1
6660 AND NOT EXISTS (
6661 SELECT 1 FROM conversations WHERE agent_id = ?1
6662 )",
6663 fparams![agent_id],
6664 )?;
6665 tx.commit()?;
6666
6667 Ok(AgentArchivePurgeResult {
6668 conversations_deleted: conversations_deleted.max(0) as usize,
6669 messages_deleted: messages_deleted.max(0) as usize,
6670 })
6671 }
6672
6673 pub fn list_workspaces(&self) -> Result<Vec<crate::model::types::Workspace>> {
6675 self.conn
6676 .query_map_collect(
6677 "SELECT id, path, display_name FROM workspaces ORDER BY path",
6678 fparams![],
6679 |row| {
6680 let path_str: String = row.get_typed(1)?;
6681 Ok(crate::model::types::Workspace {
6682 id: Some(row.get_typed(0)?),
6683 path: Path::new(&path_str).to_path_buf(),
6684 display_name: row.get_typed(2)?,
6685 })
6686 },
6687 )
6688 .with_context(|| "listing workspaces")
6689 }
6690
6691 pub fn list_conversations(&self, limit: i64, offset: i64) -> Result<Vec<Conversation>> {
6693 self.conn
6700 .query_map_collect(
6701 r"SELECT c.id,
6702 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown'),
6703 (SELECT w.path FROM workspaces w WHERE w.id = c.workspace_id),
6704 c.external_id, c.title, c.source_path,
6705 c.started_at,
6706 COALESCE(
6707 (SELECT ts.ended_at
6708 FROM conversation_tail_state ts
6709 WHERE ts.conversation_id = c.id),
6710 c.ended_at
6711 ),
6712 c.approx_tokens, c.metadata_json,
6713 c.source_id, c.origin_host, c.metadata_bin
6714 FROM conversations c
6715 ORDER BY CASE WHEN c.started_at IS NULL THEN 1 ELSE 0 END, c.started_at DESC, c.id DESC
6716 LIMIT ?1 OFFSET ?2",
6717 fparams![limit, offset],
6718 |row| {
6719 let workspace_path: Option<String> = row.get_typed(2)?;
6720 let source_path: String = row.get_typed(5)?;
6721 let raw_source_id: Option<String> = row.get_typed(10)?;
6722 let raw_origin_host: Option<String> = row.get_typed(11)?;
6723 let (source_id, _, origin_host) = normalized_storage_source_parts(
6724 raw_source_id.as_deref(),
6725 None,
6726 raw_origin_host.as_deref(),
6727 );
6728 Ok(Conversation {
6729 id: Some(row.get_typed(0)?),
6730 agent_slug: row.get_typed(1)?,
6731 workspace: workspace_path.map(|p| Path::new(&p).to_path_buf()),
6732 external_id: row.get_typed(3)?,
6733 title: row.get_typed(4)?,
6734 source_path: Path::new(&source_path).to_path_buf(),
6735 started_at: row.get_typed(6)?,
6736 ended_at: row.get_typed(7)?,
6737 approx_tokens: row.get_typed(8)?,
6738 metadata_json: franken_read_metadata_compat(row, 9, 12),
6739 messages: Vec::new(),
6740 source_id,
6741 origin_host,
6742 })
6743 },
6744 )
6745 .with_context(|| "listing conversations")
6746 }
6747
6748 pub fn build_lexical_rebuild_lookups(
6752 &self,
6753 ) -> Result<(HashMap<i64, String>, HashMap<i64, PathBuf>)> {
6754 let agents: HashMap<i64, String> = self
6755 .conn
6756 .query_map_collect("SELECT id, slug FROM agents", fparams![], |row| {
6757 Ok((row.get_typed::<i64>(0)?, row.get_typed::<String>(1)?))
6758 })
6759 .with_context(|| "loading agent lookup for lexical rebuild")?
6760 .into_iter()
6761 .collect();
6762 let workspaces: HashMap<i64, PathBuf> = self
6763 .conn
6764 .query_map_collect("SELECT id, path FROM workspaces", fparams![], |row| {
6765 let path_str: String = row.get_typed(1)?;
6766 Ok((row.get_typed::<i64>(0)?, PathBuf::from(path_str)))
6767 })
6768 .with_context(|| "loading workspace lookup for lexical rebuild")?
6769 .into_iter()
6770 .collect();
6771 Ok((agents, workspaces))
6772 }
6773
6774 pub fn list_conversation_footprints_for_lexical_rebuild(
6787 &self,
6788 ) -> Result<Vec<LexicalRebuildConversationFootprintRow>> {
6789 let tail_state_rows: Vec<(i64, Option<i64>)> = match self.conn.query_map_collect(
6790 "SELECT conversation_id, last_message_idx
6791 FROM conversation_tail_state
6792 ORDER BY conversation_id ASC",
6793 fparams![],
6794 |row| Ok((row.get_typed::<i64>(0)?, row.get_typed::<Option<i64>>(1)?)),
6795 ) {
6796 Ok(rows) => rows,
6797 Err(err) if error_indicates_missing_table(&err) => Vec::new(),
6798 Err(err) => {
6799 return Err(err).with_context(|| "listing lexical rebuild tail-state estimates");
6800 }
6801 };
6802 let tail_state_by_conversation: HashMap<i64, Option<i64>> =
6803 tail_state_rows.into_iter().collect();
6804
6805 let rows: Vec<(i64, Option<i64>)> = match self.conn.query_map_collect(
6806 "SELECT id, last_message_idx
6807 FROM conversations
6808 ORDER BY id ASC",
6809 fparams![],
6810 |row| Ok((row.get_typed::<i64>(0)?, row.get_typed::<Option<i64>>(1)?)),
6811 ) {
6812 Ok(rows) => rows,
6813 Err(err) if error_indicates_missing_column(&err) => self
6814 .conn
6815 .query_map_collect(
6816 "SELECT id
6817 FROM conversations
6818 ORDER BY id ASC",
6819 fparams![],
6820 |row| Ok((row.get_typed::<i64>(0)?, None)),
6821 )
6822 .with_context(|| {
6823 "listing lexical rebuild conversation ids after missing tail column fallback"
6824 })?,
6825 Err(err) => {
6826 return Err(err)
6827 .with_context(|| "listing lexical rebuild conversation footprint estimates");
6828 }
6829 };
6830
6831 let mut footprints = Vec::with_capacity(rows.len());
6832 let mut missing_tail_positions = HashMap::new();
6833 for (conversation_id, conversation_last_message_idx) in rows {
6834 let last_message_idx = tail_state_by_conversation
6835 .get(&conversation_id)
6836 .copied()
6837 .flatten()
6838 .or(conversation_last_message_idx);
6839 let Some(message_count) = lexical_rebuild_message_count_from_tail_idx(last_message_idx)
6840 else {
6841 missing_tail_positions.insert(conversation_id, footprints.len());
6842 footprints.push(LexicalRebuildConversationFootprintRow {
6843 conversation_id,
6844 message_count: 0,
6845 message_bytes: 0,
6846 });
6847 continue;
6848 };
6849 footprints.push(lexical_rebuild_conversation_footprint_from_count(
6850 conversation_id,
6851 message_count,
6852 ));
6853 }
6854
6855 let every_footprint_was_missing_tail = missing_tail_positions.len() == footprints.len();
6856 if !missing_tail_positions.is_empty() {
6857 self.fill_missing_lexical_rebuild_footprint_tails(
6858 &mut footprints,
6859 &missing_tail_positions,
6860 )?;
6861 }
6862 if !every_footprint_was_missing_tail {
6863 self.raise_lexical_rebuild_footprints_to_exact_message_counts(&mut footprints)?;
6864 }
6865
6866 Ok(footprints)
6867 }
6868
6869 pub fn lexical_rebuild_has_tail_footprint_metadata(&self) -> Result<bool> {
6870 let total_conversations: i64 = self
6871 .conn
6872 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
6873 row.get_typed(0)
6874 })
6875 .with_context(|| "counting conversations for lexical rebuild tail metadata coverage")?;
6876 let total_conversations = usize::try_from(total_conversations.max(0)).unwrap_or(usize::MAX);
6877 if total_conversations == 0 {
6878 return Ok(true);
6879 }
6880
6881 let conversation_columns = franken_table_column_names(&self.conn, "conversations")?;
6882 let conversations_have_tail_column = conversation_columns.contains("last_message_idx");
6883 let tail_state_has_tail_column =
6884 match franken_table_column_names(&self.conn, "conversation_tail_state") {
6885 Ok(columns) => columns.contains("last_message_idx"),
6886 Err(err) if error_indicates_missing_table(&err) => false,
6887 Err(err) => {
6888 return Err(err)
6889 .with_context(|| "reading lexical rebuild tail-state metadata columns");
6890 }
6891 };
6892 if !conversations_have_tail_column && !tail_state_has_tail_column {
6893 return Ok(false);
6894 }
6895
6896 let covered_sql = match (conversations_have_tail_column, tail_state_has_tail_column) {
6897 (true, true) => {
6898 "SELECT COUNT(*)
6899 FROM conversations c
6900 LEFT JOIN conversation_tail_state ts ON ts.conversation_id = c.id
6901 WHERE c.last_message_idx IS NOT NULL
6902 OR ts.last_message_idx IS NOT NULL"
6903 }
6904 (true, false) => {
6905 "SELECT COUNT(*)
6906 FROM conversations
6907 WHERE last_message_idx IS NOT NULL"
6908 }
6909 (false, true) => {
6910 "SELECT COUNT(*)
6911 FROM conversations c
6912 WHERE EXISTS (
6913 SELECT 1
6914 FROM conversation_tail_state ts
6915 WHERE ts.conversation_id = c.id
6916 AND ts.last_message_idx IS NOT NULL
6917 )"
6918 }
6919 (false, false) => unreachable!("checked before covered_sql selection"),
6920 };
6921 let covered_conversations: i64 = self
6922 .conn
6923 .query_row_map(covered_sql, fparams![], |row| row.get_typed(0))
6924 .with_context(
6925 || "counting conversations covered by lexical rebuild tail footprint metadata",
6926 )?;
6927 let covered_conversations =
6928 usize::try_from(covered_conversations.max(0)).unwrap_or(usize::MAX);
6929
6930 Ok(lexical_rebuild_tail_metadata_coverage_is_sufficient(
6931 total_conversations,
6932 covered_conversations,
6933 ))
6934 }
6935
6936 fn raise_lexical_rebuild_footprints_to_exact_message_counts(
6937 &self,
6938 footprints: &mut [LexicalRebuildConversationFootprintRow],
6939 ) -> Result<()> {
6940 if footprints.is_empty() {
6941 return Ok(());
6942 }
6943
6944 let positions_by_conversation: HashMap<i64, usize> = footprints
6945 .iter()
6946 .enumerate()
6947 .map(|(position, footprint)| (footprint.conversation_id, position))
6948 .collect();
6949 self.conn
6950 .query_with_params_for_each(
6951 "SELECT conversation_id, COUNT(*) AS message_count
6952 FROM messages
6953 GROUP BY conversation_id
6954 ORDER BY conversation_id ASC",
6955 &[] as &[SqliteValue],
6956 |row| {
6957 let conversation_id: i64 = row.get_typed(0)?;
6958 let exact_count: i64 = row.get_typed(1)?;
6959 let Some(position) = positions_by_conversation.get(&conversation_id) else {
6960 return Ok(());
6961 };
6962 let exact_count = usize::try_from(exact_count.max(0)).unwrap_or(usize::MAX);
6963 let footprint = &mut footprints[*position];
6964 if exact_count > footprint.message_count {
6965 footprint.message_count = exact_count;
6966 footprint.message_bytes =
6967 footprint.message_bytes.max(exact_count.saturating_mul(
6968 LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
6969 ));
6970 }
6971 Ok(())
6972 },
6973 )
6974 .with_context(|| "raising lexical rebuild footprints to exact message counts")?;
6975 Ok(())
6976 }
6977
6978 fn fill_missing_lexical_rebuild_footprint_tails(
6979 &self,
6980 footprints: &mut [LexicalRebuildConversationFootprintRow],
6981 missing_tail_positions: &HashMap<i64, usize>,
6982 ) -> Result<()> {
6983 if missing_tail_positions.len() <= LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT {
6984 for (conversation_id, position) in missing_tail_positions {
6985 let last_message_idx: Option<i64> = self
6986 .conn
6987 .query_row_map(
6988 "SELECT MAX(idx) FROM messages WHERE conversation_id = ?1",
6989 fparams![*conversation_id],
6990 |row| row.get_typed(0),
6991 )
6992 .with_context(|| {
6993 format!(
6994 "looking up missing lexical rebuild tail estimate for conversation {conversation_id}"
6995 )
6996 })?;
6997 if let Some(message_count) =
6998 lexical_rebuild_message_count_from_tail_idx(last_message_idx)
6999 {
7000 footprints[*position] = lexical_rebuild_conversation_footprint_from_count(
7001 *conversation_id,
7002 message_count,
7003 );
7004 }
7005 }
7006 return Ok(());
7007 }
7008
7009 self.fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7010 footprints,
7011 missing_tail_positions,
7012 "SELECT conversation_id, MAX(idx) AS last_message_idx
7013 FROM messages INDEXED BY idx_messages_conv_idx
7014 GROUP BY conversation_id
7015 ORDER BY conversation_id ASC",
7016 )
7017 .or_else(|err| {
7018 if err
7019 .to_string()
7020 .contains("no such index: idx_messages_conv_idx")
7021 {
7022 return self.fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7023 footprints,
7024 missing_tail_positions,
7025 "SELECT conversation_id, MAX(idx) AS last_message_idx
7026 FROM messages
7027 GROUP BY conversation_id
7028 ORDER BY conversation_id ASC",
7029 );
7030 }
7031 Err(err)
7032 })
7033 .with_context(|| "grouping missing lexical rebuild tail estimates from messages")?;
7034
7035 Ok(())
7036 }
7037
7038 fn fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7039 &self,
7040 footprints: &mut [LexicalRebuildConversationFootprintRow],
7041 missing_tail_positions: &HashMap<i64, usize>,
7042 sql: &str,
7043 ) -> Result<()> {
7044 self.conn
7045 .query_with_params_for_each(sql, &[] as &[SqliteValue], |row| {
7046 let conversation_id: i64 = row.get_typed(0)?;
7047 let last_message_idx: Option<i64> = row.get_typed(1)?;
7048 let Some(position) = missing_tail_positions.get(&conversation_id) else {
7049 return Ok(());
7050 };
7051 if let Some(message_count) =
7052 lexical_rebuild_message_count_from_tail_idx(last_message_idx)
7053 {
7054 footprints[*position] = lexical_rebuild_conversation_footprint_from_count(
7055 conversation_id,
7056 message_count,
7057 );
7058 }
7059 Ok(())
7060 })
7061 .with_context(|| "grouping lexical rebuild missing tail estimates")
7062 }
7063
7064 pub fn list_conversation_ids_for_lexical_rebuild(&self) -> Result<Vec<i64>> {
7066 self.conn
7067 .query_map_collect(
7068 "SELECT id FROM conversations ORDER BY id ASC",
7069 fparams![],
7070 |row| row.get_typed(0),
7071 )
7072 .with_context(|| "listing conversation ids for lexical rebuild")
7073 }
7074 pub fn list_conversations_for_lexical_rebuild_by_offset(
7079 &self,
7080 limit: i64,
7081 offset: i64,
7082 agent_slugs: &HashMap<i64, String>,
7083 workspace_paths: &HashMap<i64, PathBuf>,
7084 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7085 self.conn
7088 .query_map_collect(
7089 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7090 started_at,
7091 COALESCE(
7092 (SELECT ts.ended_at
7093 FROM conversation_tail_state ts
7094 WHERE ts.conversation_id = conversations.id),
7095 ended_at
7096 ),
7097 source_id, origin_host
7098 FROM conversations
7099 ORDER BY id ASC
7100 LIMIT ?1 OFFSET ?2",
7101 fparams![limit, offset],
7102 |row| {
7103 let agent_id: Option<i64> = row.get_typed(1)?;
7104 let workspace_id: Option<i64> = row.get_typed(2)?;
7105 let source_path: String = row.get_typed(5)?;
7106 let raw_source_id: Option<String> = row.get_typed(8)?;
7107 let raw_origin_host: Option<String> = row.get_typed(9)?;
7108 let (source_id, _, origin_host) = normalized_storage_source_parts(
7109 raw_source_id.as_deref(),
7110 None,
7111 raw_origin_host.as_deref(),
7112 );
7113 Ok(LexicalRebuildConversationRow {
7114 id: Some(row.get_typed(0)?),
7115 agent_slug: agent_id
7116 .and_then(|aid| agent_slugs.get(&aid).cloned())
7117 .unwrap_or_else(|| "unknown".to_string()),
7118 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7119 external_id: row.get_typed(3)?,
7120 title: row.get_typed(4)?,
7121 source_path: Path::new(&source_path).to_path_buf(),
7122 started_at: row.get_typed(6)?,
7123 ended_at: row.get_typed(7)?,
7124 source_id,
7125 origin_host,
7126 })
7127 },
7128 )
7129 .with_context(|| "listing conversations for lexical rebuild")
7130 }
7131
7132 pub fn list_conversations_for_lexical_rebuild_after_id(
7137 &self,
7138 limit: i64,
7139 after_conversation_id: i64,
7140 agent_slugs: &HashMap<i64, String>,
7141 workspace_paths: &HashMap<i64, PathBuf>,
7142 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7143 self.conn
7144 .query_map_collect(
7145 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7146 started_at,
7147 COALESCE(
7148 (SELECT ts.ended_at
7149 FROM conversation_tail_state ts
7150 WHERE ts.conversation_id = conversations.id),
7151 ended_at
7152 ),
7153 source_id, origin_host
7154 FROM conversations
7155 WHERE id > ?2
7156 ORDER BY id ASC
7157 LIMIT ?1",
7158 fparams![limit, after_conversation_id],
7159 |row| {
7160 let agent_id: Option<i64> = row.get_typed(1)?;
7161 let workspace_id: Option<i64> = row.get_typed(2)?;
7162 let source_path: String = row.get_typed(5)?;
7163 let raw_source_id: Option<String> = row.get_typed(8)?;
7164 let raw_origin_host: Option<String> = row.get_typed(9)?;
7165 let (source_id, _, origin_host) = normalized_storage_source_parts(
7166 raw_source_id.as_deref(),
7167 None,
7168 raw_origin_host.as_deref(),
7169 );
7170 Ok(LexicalRebuildConversationRow {
7171 id: Some(row.get_typed(0)?),
7172 agent_slug: agent_id
7173 .and_then(|aid| agent_slugs.get(&aid).cloned())
7174 .unwrap_or_else(|| "unknown".to_string()),
7175 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7176 external_id: row.get_typed(3)?,
7177 title: row.get_typed(4)?,
7178 source_path: Path::new(&source_path).to_path_buf(),
7179 started_at: row.get_typed(6)?,
7180 ended_at: row.get_typed(7)?,
7181 source_id,
7182 origin_host,
7183 })
7184 },
7185 )
7186 .with_context(|| {
7187 format!(
7188 "listing conversations for lexical rebuild after id {after_conversation_id}"
7189 )
7190 })
7191 }
7192
7193 pub fn list_conversations_for_lexical_rebuild_after_id_through_id(
7199 &self,
7200 limit: i64,
7201 after_conversation_id: i64,
7202 through_conversation_id: i64,
7203 agent_slugs: &HashMap<i64, String>,
7204 workspace_paths: &HashMap<i64, PathBuf>,
7205 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7206 if through_conversation_id <= after_conversation_id {
7207 return Ok(Vec::new());
7208 }
7209 self.conn
7210 .query_map_collect(
7211 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7212 started_at,
7213 COALESCE(
7214 (SELECT ts.ended_at
7215 FROM conversation_tail_state ts
7216 WHERE ts.conversation_id = conversations.id),
7217 ended_at
7218 ),
7219 source_id, origin_host
7220 FROM conversations
7221 WHERE id > ?2 AND id <= ?3
7222 ORDER BY id ASC
7223 LIMIT ?1",
7224 fparams![limit, after_conversation_id, through_conversation_id],
7225 |row| {
7226 let agent_id: Option<i64> = row.get_typed(1)?;
7227 let workspace_id: Option<i64> = row.get_typed(2)?;
7228 let source_path: String = row.get_typed(5)?;
7229 let raw_source_id: Option<String> = row.get_typed(8)?;
7230 let raw_origin_host: Option<String> = row.get_typed(9)?;
7231 let (source_id, _, origin_host) = normalized_storage_source_parts(
7232 raw_source_id.as_deref(),
7233 None,
7234 raw_origin_host.as_deref(),
7235 );
7236 Ok(LexicalRebuildConversationRow {
7237 id: Some(row.get_typed(0)?),
7238 agent_slug: agent_id
7239 .and_then(|aid| agent_slugs.get(&aid).cloned())
7240 .unwrap_or_else(|| "unknown".to_string()),
7241 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7242 external_id: row.get_typed(3)?,
7243 title: row.get_typed(4)?,
7244 source_path: Path::new(&source_path).to_path_buf(),
7245 started_at: row.get_typed(6)?,
7246 ended_at: row.get_typed(7)?,
7247 source_id,
7248 origin_host,
7249 })
7250 },
7251 )
7252 .with_context(|| {
7253 format!(
7254 "listing conversations for lexical rebuild after id {after_conversation_id} through id {through_conversation_id}"
7255 )
7256 })
7257 }
7258
7259 pub fn fetch_messages(&self, conversation_id: i64) -> Result<Vec<Message>> {
7261 let hinted_sql = "SELECT id, idx, role, author, created_at, content, extra_json, extra_bin \
7262 FROM messages INDEXED BY sqlite_autoindex_messages_1 \
7263 WHERE conversation_id = ?1 ORDER BY idx";
7264 let fallback_sql = "SELECT id, idx, role, author, created_at, content, extra_json, extra_bin \
7265 FROM messages \
7266 WHERE conversation_id = ?1 ORDER BY idx";
7267
7268 self.conn
7269 .query_map_collect(hinted_sql, fparams![conversation_id], |row| {
7270 let role: String = row.get_typed(2)?;
7271 Ok(Message {
7272 id: Some(row.get_typed(0)?),
7273 idx: row.get_typed(1)?,
7274 role: match role.as_str() {
7275 "user" => MessageRole::User,
7276 "agent" | "assistant" => MessageRole::Agent,
7277 "tool" => MessageRole::Tool,
7278 "system" => MessageRole::System,
7279 other => MessageRole::Other(other.to_string()),
7280 },
7281 author: row.get_typed(3)?,
7282 created_at: row.get_typed(4)?,
7283 content: row.get_typed(5)?,
7284 extra_json: franken_read_message_extra_compat(row, 6, 7),
7285 snippets: Vec::new(),
7286 })
7287 })
7288 .or_else(|err| {
7289 if err
7290 .to_string()
7291 .contains("no such index: sqlite_autoindex_messages_1")
7292 {
7293 return self.conn.query_map_collect(
7294 fallback_sql,
7295 fparams![conversation_id],
7296 |row| {
7297 let role: String = row.get_typed(2)?;
7298 Ok(Message {
7299 id: Some(row.get_typed(0)?),
7300 idx: row.get_typed(1)?,
7301 role: match role.as_str() {
7302 "user" => MessageRole::User,
7303 "agent" | "assistant" => MessageRole::Agent,
7304 "tool" => MessageRole::Tool,
7305 "system" => MessageRole::System,
7306 other => MessageRole::Other(other.to_string()),
7307 },
7308 author: row.get_typed(3)?,
7309 created_at: row.get_typed(4)?,
7310 content: row.get_typed(5)?,
7311 extra_json: franken_read_message_extra_compat(row, 6, 7),
7312 snippets: Vec::new(),
7313 })
7314 },
7315 );
7316 }
7317 Err(err)
7318 })
7319 .with_context(|| format!("fetching messages for conversation {conversation_id}"))
7320 }
7321
7322 pub fn fetch_messages_for_lexical_rebuild(&self, conversation_id: i64) -> Result<Vec<Message>> {
7328 let hinted_sql = "SELECT id, idx, role, author, created_at, content \
7329 FROM messages INDEXED BY sqlite_autoindex_messages_1 \
7330 WHERE conversation_id = ?1 ORDER BY idx";
7331 let fallback_sql = "SELECT id, idx, role, author, created_at, content \
7332 FROM messages \
7333 WHERE conversation_id = ?1 ORDER BY idx";
7334
7335 self.conn
7336 .query_map_collect(hinted_sql, fparams![conversation_id], |row| {
7337 let role: String = row.get_typed(2)?;
7338 Ok(Message {
7339 id: Some(row.get_typed(0)?),
7340 idx: row.get_typed(1)?,
7341 role: match role.as_str() {
7342 "user" => MessageRole::User,
7343 "agent" | "assistant" => MessageRole::Agent,
7344 "tool" => MessageRole::Tool,
7345 "system" => MessageRole::System,
7346 other => MessageRole::Other(other.to_string()),
7347 },
7348 author: row.get_typed(3)?,
7349 created_at: row.get_typed(4)?,
7350 content: row.get_typed(5)?,
7351 extra_json: serde_json::Value::Null,
7352 snippets: Vec::new(),
7353 })
7354 })
7355 .or_else(|err| {
7356 if err
7357 .to_string()
7358 .contains("no such index: sqlite_autoindex_messages_1")
7359 {
7360 return self.conn.query_map_collect(
7361 fallback_sql,
7362 fparams![conversation_id],
7363 |row| {
7364 let role: String = row.get_typed(2)?;
7365 Ok(Message {
7366 id: Some(row.get_typed(0)?),
7367 idx: row.get_typed(1)?,
7368 role: match role.as_str() {
7369 "user" => MessageRole::User,
7370 "agent" | "assistant" => MessageRole::Agent,
7371 "tool" => MessageRole::Tool,
7372 "system" => MessageRole::System,
7373 other => MessageRole::Other(other.to_string()),
7374 },
7375 author: row.get_typed(3)?,
7376 created_at: row.get_typed(4)?,
7377 content: row.get_typed(5)?,
7378 extra_json: serde_json::Value::Null,
7379 snippets: Vec::new(),
7380 })
7381 },
7382 );
7383 }
7384 Err(err)
7385 })
7386 .with_context(|| {
7387 format!("fetching messages for lexical rebuild of conversation {conversation_id}")
7388 })
7389 }
7390
7391 pub fn fetch_messages_for_lexical_rebuild_batch(
7396 &self,
7397 conversation_ids: &[i64],
7398 max_messages: Option<usize>,
7399 max_content_bytes: Option<usize>,
7400 ) -> Result<HashMap<i64, Vec<Message>>> {
7401 if conversation_ids.is_empty() {
7402 return Ok(HashMap::new());
7403 }
7404
7405 let mut grouped: HashMap<i64, Vec<Message>> =
7406 HashMap::with_capacity(conversation_ids.len());
7407 let mut fetched_conversation_ids = HashSet::with_capacity(conversation_ids.len());
7408 let mut total_messages = 0usize;
7409 let mut total_content_bytes = 0usize;
7410
7411 for conversation_id in conversation_ids {
7416 if !fetched_conversation_ids.insert(*conversation_id) {
7417 continue;
7418 }
7419
7420 let messages = self
7421 .fetch_messages_for_lexical_rebuild(*conversation_id)
7422 .with_context(|| {
7423 format!("fetching lexical rebuild messages for conversation {conversation_id}")
7424 })?;
7425 total_messages = total_messages.saturating_add(messages.len());
7426 if let Some(limit) = max_messages
7427 && total_messages > limit
7428 {
7429 return Err(anyhow!(
7430 "lexical rebuild batch fetch exceeded message guardrail: messages={total_messages} limit={limit} conversations={}",
7431 conversation_ids.len()
7432 ));
7433 }
7434
7435 let message_bytes = messages
7436 .iter()
7437 .map(|message| message.content.len())
7438 .sum::<usize>();
7439 total_content_bytes = total_content_bytes.saturating_add(message_bytes);
7440 if let Some(limit) = max_content_bytes
7441 && total_content_bytes > limit
7442 {
7443 return Err(anyhow!(
7444 "lexical rebuild batch fetch exceeded content-byte guardrail: bytes={total_content_bytes} limit={limit} conversations={}",
7445 conversation_ids.len()
7446 ));
7447 }
7448
7449 if !messages.is_empty() {
7450 grouped.insert(*conversation_id, messages);
7451 }
7452 }
7453
7454 Ok(grouped)
7455 }
7456
7457 pub fn stream_messages_for_lexical_rebuild_between_conversation_ids<F>(
7460 &self,
7461 start_conversation_id: i64,
7462 end_conversation_id: i64,
7463 mut f: F,
7464 ) -> Result<()>
7465 where
7466 F: FnMut(LexicalRebuildMessageRow) -> Result<()>,
7467 {
7468 if end_conversation_id < start_conversation_id {
7469 return Ok(());
7470 }
7471
7472 let conversation_ids: Vec<i64> = self
7473 .conn
7474 .query_map_collect(
7475 "SELECT id FROM conversations WHERE id >= ?1 AND id <= ?2 ORDER BY id ASC",
7476 fparams![start_conversation_id, end_conversation_id],
7477 |row| row.get_typed(0),
7478 )
7479 .with_context(|| "listing conversation ids for streamed lexical rebuild")?;
7480
7481 for conversation_id in conversation_ids {
7482 let messages = self
7483 .fetch_messages_for_lexical_rebuild(conversation_id)
7484 .with_context(|| {
7485 format!("streaming lexical rebuild messages for conversation {conversation_id}")
7486 })?;
7487
7488 for message in messages {
7489 let message_id = message.id.ok_or_else(|| {
7490 anyhow!(
7491 "lexical rebuild message missing id for conversation {conversation_id} idx {}",
7492 message.idx
7493 )
7494 })?;
7495 f(LexicalRebuildMessageRow {
7496 conversation_id,
7497 id: message_id,
7498 idx: message.idx,
7499 role: role_str(&message.role),
7500 author: message.author,
7501 created_at: message.created_at,
7502 content: message.content,
7503 })?;
7504 }
7505 }
7506
7507 Ok(())
7508 }
7509
7510 pub fn stream_grouped_messages_for_lexical_rebuild_between_conversation_ids<F>(
7514 &self,
7515 start_conversation_id: i64,
7516 end_conversation_id: i64,
7517 mut f: F,
7518 ) -> Result<()>
7519 where
7520 F: FnMut(i64, LexicalRebuildGroupedMessageRows, i64) -> Result<()>,
7521 {
7522 if end_conversation_id < start_conversation_id {
7523 return Ok(());
7524 }
7525
7526 let mut current_conversation_id: Option<i64> = None;
7527 let mut current_messages: LexicalRebuildGroupedMessageRows = SmallVec::new();
7528 let mut current_last_message_id = 0i64;
7529 let mut flush_current = |current_conversation_id: &mut Option<i64>,
7530 current_messages: &mut LexicalRebuildGroupedMessageRows,
7531 current_last_message_id: &mut i64|
7532 -> Result<()> {
7533 let Some(conversation_id) = current_conversation_id.take() else {
7534 return Ok(());
7535 };
7536 let messages = std::mem::take(current_messages);
7537 let last_message_id = std::mem::take(current_last_message_id);
7538 f(conversation_id, messages, last_message_id)
7539 };
7540
7541 self.stream_messages_for_lexical_rebuild_between_conversation_ids(
7542 start_conversation_id,
7543 end_conversation_id,
7544 |row| {
7545 if current_conversation_id != Some(row.conversation_id) {
7546 flush_current(
7547 &mut current_conversation_id,
7548 &mut current_messages,
7549 &mut current_last_message_id,
7550 )?;
7551 current_conversation_id = Some(row.conversation_id);
7552 }
7553 current_last_message_id = row.id;
7554 current_messages.push(LexicalRebuildGroupedMessageRow {
7555 idx: row.idx,
7556 is_tool_role: row.role == "tool",
7557 created_at: row.created_at,
7558 content: row.content,
7559 });
7560 Ok(())
7561 },
7562 )
7563 .with_context(|| "streaming grouped lexical rebuild messages")?;
7564
7565 flush_current(
7566 &mut current_conversation_id,
7567 &mut current_messages,
7568 &mut current_last_message_id,
7569 )
7570 .with_context(|| "flushing grouped lexical rebuild messages")
7571 }
7572
7573 pub fn stream_grouped_messages_for_lexical_rebuild_from_conversation_id<F>(
7576 &self,
7577 start_conversation_id: i64,
7578 f: F,
7579 ) -> Result<()>
7580 where
7581 F: FnMut(i64, LexicalRebuildGroupedMessageRows, i64) -> Result<()>,
7582 {
7583 self.stream_grouped_messages_for_lexical_rebuild_between_conversation_ids(
7584 start_conversation_id,
7585 i64::MAX,
7586 f,
7587 )
7588 }
7589
7590 pub fn stream_messages_for_lexical_rebuild_from_conversation_id<F>(
7593 &self,
7594 start_conversation_id: i64,
7595 f: F,
7596 ) -> Result<()>
7597 where
7598 F: FnMut(LexicalRebuildMessageRow) -> Result<()>,
7599 {
7600 self.stream_messages_for_lexical_rebuild_between_conversation_ids(
7601 start_conversation_id,
7602 i64::MAX,
7603 f,
7604 )
7605 }
7606
7607 pub fn get_source(&self, id: &str) -> Result<Option<Source>> {
7609 let result = self.conn.query_row_map(
7610 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at FROM sources WHERE id = ?1",
7611 fparams![id],
7612 |row| {
7613 let kind_str: String = row.get_typed(1)?;
7614 let config_json_str: Option<String> = row.get_typed(5)?;
7615 Ok(Source {
7616 id: row.get_typed(0)?,
7617 kind: SourceKind::parse(&kind_str).unwrap_or_default(),
7618 host_label: row.get_typed(2)?,
7619 machine_id: row.get_typed(3)?,
7620 platform: row.get_typed(4)?,
7621 config_json: config_json_str.and_then(|s| serde_json::from_str(&s).ok()),
7622 created_at: row.get_typed(6)?,
7623 updated_at: row.get_typed(7)?,
7624 })
7625 },
7626 );
7627 Ok(result.optional()?)
7628 }
7629
7630 pub fn list_sources(&self) -> Result<Vec<Source>> {
7632 self.conn
7633 .query_map_collect(
7634 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at FROM sources ORDER BY id",
7635 fparams![],
7636 |row| {
7637 let kind_str: String = row.get_typed(1)?;
7638 let config_json_str: Option<String> = row.get_typed(5)?;
7639 Ok(Source {
7640 id: row.get_typed(0)?,
7641 kind: SourceKind::parse(&kind_str).unwrap_or_default(),
7642 host_label: row.get_typed(2)?,
7643 machine_id: row.get_typed(3)?,
7644 platform: row.get_typed(4)?,
7645 config_json: config_json_str.and_then(|s| serde_json::from_str(&s).ok()),
7646 created_at: row.get_typed(6)?,
7647 updated_at: row.get_typed(7)?,
7648 })
7649 },
7650 )
7651 .with_context(|| "listing sources")
7652 }
7653
7654 pub fn get_source_ids(&self) -> Result<Vec<String>> {
7656 self.conn
7657 .query_map_collect(
7658 "SELECT id FROM sources WHERE id != 'local' ORDER BY id",
7659 fparams![],
7660 |row| row.get_typed(0),
7661 )
7662 .with_context(|| "listing source ids")
7663 }
7664
7665 pub fn upsert_source(&self, source: &Source) -> Result<()> {
7667 self.invalidate_conversation_source_cache(source.id.as_str());
7668 let now = Self::now_millis();
7669 let kind_str = source.kind.to_string();
7670 let config_json_str = source
7671 .config_json
7672 .as_ref()
7673 .map(serde_json::to_string)
7674 .transpose()?;
7675
7676 self.conn.execute_compat(
7680 "INSERT INTO sources(id, kind, host_label, machine_id, platform, config_json, created_at, updated_at)
7681 VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)
7682 ON CONFLICT(id) DO UPDATE SET
7683 kind = excluded.kind,
7684 host_label = excluded.host_label,
7685 machine_id = excluded.machine_id,
7686 platform = excluded.platform,
7687 config_json = excluded.config_json,
7688 updated_at = excluded.updated_at
7689 WHERE NOT (
7690 sources.kind IS excluded.kind
7691 AND sources.host_label IS excluded.host_label
7692 AND sources.machine_id IS excluded.machine_id
7693 AND sources.platform IS excluded.platform
7694 AND sources.config_json IS excluded.config_json
7695 )",
7696 fparams![
7697 source.id.as_str(),
7698 kind_str.as_str(),
7699 source.host_label.as_deref(),
7700 source.machine_id.as_deref(),
7701 source.platform.as_deref(),
7702 config_json_str.as_deref(),
7703 source.created_at.unwrap_or(now),
7704 now
7705 ],
7706 )?;
7707 Ok(())
7708 }
7709
7710 fn historical_bundle_key_hash(
7711 version: u32,
7712 bundle: &HistoricalDatabaseBundle,
7713 include_bundle_stats: bool,
7714 ) -> String {
7715 let signature = if include_bundle_stats {
7716 format!(
7717 "{}:{}:{}:{}",
7718 version,
7719 bundle.root_path.display(),
7720 bundle.total_bytes,
7721 bundle.modified_at_ms
7722 )
7723 } else {
7724 format!("{}:{}", version, bundle.root_path.display())
7725 };
7726 blake3::hash(signature.as_bytes()).to_hex().to_string()
7727 }
7728
7729 fn historical_bundle_meta_key(bundle: &HistoricalDatabaseBundle) -> String {
7730 format!(
7731 "historical_bundle_salvaged:{}",
7732 Self::historical_bundle_key_hash(HISTORICAL_SALVAGE_LEDGER_VERSION, bundle, false)
7733 )
7734 }
7735
7736 fn historical_bundle_legacy_meta_key(bundle: &HistoricalDatabaseBundle) -> String {
7737 let signature = format!(
7738 "{}:{}:{}:{}",
7739 HISTORICAL_SALVAGE_LEDGER_VERSION,
7740 bundle.root_path.display(),
7741 bundle.total_bytes,
7742 bundle.modified_at_ms
7743 );
7744 format!(
7745 "historical_bundle_salvaged:{}",
7746 blake3::hash(signature.as_bytes()).to_hex()
7747 )
7748 }
7749
7750 fn historical_bundle_progress_key(bundle: &HistoricalDatabaseBundle) -> String {
7751 format!(
7752 "historical_bundle_progress:{}",
7753 Self::historical_bundle_key_hash(HISTORICAL_SALVAGE_PROGRESS_VERSION, bundle, false)
7754 )
7755 }
7756
7757 fn historical_bundle_legacy_progress_key(bundle: &HistoricalDatabaseBundle) -> String {
7758 let signature = format!(
7759 "{}:{}:{}:{}",
7760 HISTORICAL_SALVAGE_PROGRESS_VERSION,
7761 bundle.root_path.display(),
7762 bundle.total_bytes,
7763 bundle.modified_at_ms
7764 );
7765 format!(
7766 "historical_bundle_progress:{}",
7767 blake3::hash(signature.as_bytes()).to_hex()
7768 )
7769 }
7770
7771 fn historical_bundle_already_imported(
7772 &self,
7773 bundle: &HistoricalDatabaseBundle,
7774 ) -> Result<bool> {
7775 for key in [
7776 Self::historical_bundle_meta_key(bundle),
7777 Self::historical_bundle_legacy_meta_key(bundle),
7778 ] {
7779 let existing: Option<String> = self
7780 .conn
7781 .query_row_map(
7782 "SELECT value FROM meta WHERE key = ?1",
7783 fparams![key.as_str()],
7784 |row| row.get_typed(0),
7785 )
7786 .optional()?;
7787 if existing.is_some() {
7788 return Ok(true);
7789 }
7790 }
7791 Ok(false)
7792 }
7793
7794 pub(crate) fn has_pending_historical_bundles(&self, canonical_db_path: &Path) -> Result<bool> {
7795 for bundle in discover_historical_database_bundles(canonical_db_path) {
7796 if !self.historical_bundle_already_imported(&bundle)? {
7797 return Ok(true);
7798 }
7799 }
7800 Ok(false)
7801 }
7802
7803 fn load_historical_bundle_progress(
7804 &self,
7805 bundle: &HistoricalDatabaseBundle,
7806 ) -> Result<Option<HistoricalBundleProgress>> {
7807 for key in [
7808 Self::historical_bundle_progress_key(bundle),
7809 Self::historical_bundle_legacy_progress_key(bundle),
7810 ] {
7811 let raw: Option<String> = self
7812 .conn
7813 .query_row_map(
7814 "SELECT value FROM meta WHERE key = ?1",
7815 fparams![key.as_str()],
7816 |row| row.get_typed(0),
7817 )
7818 .optional()?;
7819 let Some(raw) = raw else {
7820 continue;
7821 };
7822 let parsed: HistoricalBundleProgress =
7823 serde_json::from_str(&raw).with_context(|| {
7824 format!(
7825 "parsing historical salvage progress checkpoint for {}",
7826 bundle.root_path.display()
7827 )
7828 })?;
7829 if parsed.progress_version == HISTORICAL_SALVAGE_PROGRESS_VERSION {
7830 return Ok(Some(parsed));
7831 }
7832 }
7833 Ok(None)
7834 }
7835
7836 fn record_historical_bundle_progress(
7837 &self,
7838 bundle: &HistoricalDatabaseBundle,
7839 method: &str,
7840 last_completed_source_row_id: i64,
7841 conversations_imported: usize,
7842 messages_imported: usize,
7843 ) -> Result<()> {
7844 let key = Self::historical_bundle_progress_key(bundle);
7845 let value = HistoricalBundleProgress {
7846 progress_version: HISTORICAL_SALVAGE_PROGRESS_VERSION,
7847 path: bundle.root_path.display().to_string(),
7848 bytes: bundle.total_bytes,
7849 modified_at_ms: bundle.modified_at_ms,
7850 method: method.to_string(),
7851 last_completed_source_row_id,
7852 conversations_imported,
7853 messages_imported,
7854 updated_at_ms: Self::now_millis(),
7855 };
7856 let value_str = serde_json::to_string(&value)?;
7857 self.conn.execute_compat(
7858 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
7859 fparams![key.as_str(), value_str.as_str()],
7860 )?;
7861 Ok(())
7862 }
7863
7864 fn clear_historical_bundle_progress(&self, bundle: &HistoricalDatabaseBundle) -> Result<()> {
7865 for key in [
7866 Self::historical_bundle_progress_key(bundle),
7867 Self::historical_bundle_legacy_progress_key(bundle),
7868 ] {
7869 self.conn
7870 .execute_compat("DELETE FROM meta WHERE key = ?1", fparams![key.as_str()])?;
7871 }
7872 Ok(())
7873 }
7874
7875 fn record_historical_bundle_import(
7876 &self,
7877 bundle: &HistoricalDatabaseBundle,
7878 method: &str,
7879 conversations_imported: usize,
7880 messages_imported: usize,
7881 ) -> Result<()> {
7882 let key = Self::historical_bundle_meta_key(bundle);
7883 let value = serde_json::json!({
7884 "salvage_version": HISTORICAL_SALVAGE_LEDGER_VERSION,
7885 "path": bundle.root_path.display().to_string(),
7886 "bytes": bundle.total_bytes,
7887 "modified_at_ms": bundle.modified_at_ms,
7888 "method": method,
7889 "conversations_imported": conversations_imported,
7890 "messages_imported": messages_imported,
7891 "recorded_at_ms": Self::now_millis(),
7892 });
7893 let value_str = serde_json::to_string(&value)?;
7894 self.conn.execute_compat(
7895 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
7896 fparams![key.as_str(), value_str.as_str()],
7897 )?;
7898 Ok(())
7899 }
7900
7901 fn historical_import_error_is_split_retryable(err: &anyhow::Error) -> bool {
7902 const RETRYABLE_PATTERNS: &[&str] = &[
7903 "out of memory",
7904 "string or blob too big",
7905 "too many sql variables",
7906 ];
7907 err.chain().any(|cause| {
7908 let rendered = cause.to_string().to_ascii_lowercase();
7909 RETRYABLE_PATTERNS
7910 .iter()
7911 .any(|pattern| rendered.contains(pattern))
7912 })
7913 }
7914
7915 fn split_historical_batch_entry_messages(
7916 entry: &HistoricalBatchEntry,
7917 ) -> Option<(HistoricalBatchEntry, HistoricalBatchEntry)> {
7918 if entry.conversation.messages.len() < 2 {
7919 return None;
7920 }
7921 let split_at = entry.conversation.messages.len() / 2;
7922 if split_at == 0 || split_at >= entry.conversation.messages.len() {
7923 return None;
7924 }
7925
7926 let mut left = entry.clone();
7927 left.conversation.messages = entry.conversation.messages[..split_at].to_vec();
7928
7929 let mut right = entry.clone();
7930 right.conversation.messages = entry.conversation.messages[split_at..].to_vec();
7931
7932 Some((left, right))
7933 }
7934
7935 fn import_historical_batch_with_retry<F>(
7936 entries: &[HistoricalBatchEntry],
7937 insert_batch: &mut F,
7938 ) -> Result<HistoricalBatchImportTotals>
7939 where
7940 F: FnMut(&[HistoricalBatchEntry]) -> Result<HistoricalBatchImportTotals>,
7941 {
7942 match insert_batch(entries) {
7943 Ok(totals) => Ok(totals),
7944 Err(err) if Self::historical_import_error_is_split_retryable(&err) => {
7945 if entries.len() > 1 {
7946 let mid = entries.len() / 2;
7947 tracing::warn!(
7948 batch_entries = entries.len(),
7949 split_left = mid,
7950 split_right = entries.len() - mid,
7951 error = %err,
7952 "historical salvage batch failed; retrying in smaller sub-batches"
7953 );
7954 let left =
7955 Self::import_historical_batch_with_retry(&entries[..mid], insert_batch)?;
7956 let right =
7957 Self::import_historical_batch_with_retry(&entries[mid..], insert_batch)?;
7958 return Ok(HistoricalBatchImportTotals {
7959 inserted_source_rows: left.inserted_source_rows
7960 + right.inserted_source_rows,
7961 inserted_messages: left.inserted_messages + right.inserted_messages,
7962 });
7963 }
7964
7965 if let Some(entry) = entries.first()
7966 && let Some((left, right)) = Self::split_historical_batch_entry_messages(entry)
7967 {
7968 tracing::warn!(
7969 source_row_id = entry.source_row_id,
7970 message_count = entry.conversation.messages.len(),
7971 error = %err,
7972 "historical salvage conversation failed; retrying in smaller message slices"
7973 );
7974 let left_totals = Self::import_historical_batch_with_retry(
7975 std::slice::from_ref(&left),
7976 insert_batch,
7977 )?;
7978 let right_totals = Self::import_historical_batch_with_retry(
7979 std::slice::from_ref(&right),
7980 insert_batch,
7981 )?;
7982 return Ok(HistoricalBatchImportTotals {
7983 inserted_source_rows: usize::from(
7984 left_totals.inserted_source_rows > 0
7985 || right_totals.inserted_source_rows > 0,
7986 ),
7987 inserted_messages: left_totals
7988 .inserted_messages
7989 .saturating_add(right_totals.inserted_messages),
7990 });
7991 }
7992
7993 Err(err)
7994 }
7995 Err(err) => Err(err),
7996 }
7997 }
7998
7999 fn import_historical_sources(&self, source_conn: &FrankenConnection) -> Result<()> {
8000 let sources: Vec<Source> = match source_conn.query_map_collect(
8001 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at
8002 FROM sources",
8003 fparams![],
8004 |row| {
8005 let raw_source_id: String = row.get_typed(0)?;
8006 let kind_str: String = row.get_typed(1)?;
8007 let raw_host_label: Option<String> = row.get_typed(2)?;
8008 let config_json_raw: Option<String> = row.get_typed(5)?;
8009 let (source_id, source_kind, host_label) = normalized_storage_source_parts(
8010 Some(raw_source_id.as_str()),
8011 Some(kind_str.as_str()),
8012 raw_host_label.as_deref(),
8013 );
8014 Ok(Source {
8015 id: source_id,
8016 kind: source_kind,
8017 host_label,
8018 machine_id: row.get_typed(3)?,
8019 platform: row.get_typed(4)?,
8020 config_json: config_json_raw.and_then(|raw| serde_json::from_str(&raw).ok()),
8021 created_at: row.get_typed(6)?,
8022 updated_at: row.get_typed(7)?,
8023 })
8024 },
8025 ) {
8026 Ok(rows) => rows,
8027 Err(err) => {
8028 tracing::warn!(error = %err, "historical sources table unavailable; skipping source import");
8029 return Ok(());
8030 }
8031 };
8032
8033 for source in sources {
8034 self.upsert_source(&source)?;
8035 }
8036 Ok(())
8037 }
8038
8039 fn import_historical_conversations(
8040 &self,
8041 bundle: &HistoricalDatabaseBundle,
8042 salvage_method: &str,
8043 source_conn: &FrankenConnection,
8044 ) -> Result<(usize, usize)> {
8045 let batch_limits = historical_import_batch_limits();
8046 let cache_enabled = IndexingCache::is_enabled();
8047 let mut indexing_cache = IndexingCache::new();
8048 let mut known_sources: HashSet<String> = self
8049 .list_sources()?
8050 .into_iter()
8051 .map(|source| source.id)
8052 .collect();
8053 let resume_progress = self.load_historical_bundle_progress(bundle)?;
8054 let resume_after_row_id = resume_progress
8055 .as_ref()
8056 .map(|progress| progress.last_completed_source_row_id)
8057 .filter(|row_id| *row_id > 0);
8058
8059 tracing::info!(
8060 target: "cass::historical_salvage",
8061 batch_conversations = batch_limits.conversations,
8062 batch_messages = batch_limits.messages,
8063 batch_payload_chars = batch_limits.payload_chars,
8064 cache_enabled,
8065 resume_after_row_id,
8066 "configured historical salvage batch limits"
8067 );
8068
8069 if let Some(progress) = &resume_progress {
8070 tracing::info!(
8071 target: "cass::historical_salvage",
8072 path = %bundle.root_path.display(),
8073 resume_after_row_id = progress.last_completed_source_row_id,
8074 prior_conversations_imported = progress.conversations_imported,
8075 prior_messages_imported = progress.messages_imported,
8076 "resuming historical salvage bundle from durable checkpoint"
8077 );
8078 }
8079
8080 let conv_sql = if resume_after_row_id.is_some() {
8086 "SELECT
8087 c.id,
8088 COALESCE(a.slug, 'unknown'),
8089 w.path,
8090 c.external_id,
8091 c.title,
8092 c.source_path,
8093 c.started_at,
8094 c.ended_at,
8095 c.approx_tokens,
8096 c.metadata_json,
8097 c.source_id,
8098 c.origin_host
8099 FROM conversations c
8100 LEFT JOIN agents a ON c.agent_id = a.id
8101 LEFT JOIN workspaces w ON c.workspace_id = w.id
8102 WHERE c.id > ?1
8103 ORDER BY c.id"
8104 } else {
8105 "SELECT
8106 c.id,
8107 COALESCE(a.slug, 'unknown'),
8108 w.path,
8109 c.external_id,
8110 c.title,
8111 c.source_path,
8112 c.started_at,
8113 c.ended_at,
8114 c.approx_tokens,
8115 c.metadata_json,
8116 c.source_id,
8117 c.origin_host
8118 FROM conversations c
8119 LEFT JOIN agents a ON c.agent_id = a.id
8120 LEFT JOIN workspaces w ON c.workspace_id = w.id
8121 ORDER BY c.id"
8122 };
8123 let conv_params: &[ParamValue] =
8124 if let Some(last_completed_source_row_id) = resume_after_row_id {
8125 &[ParamValue::from(last_completed_source_row_id)]
8126 } else {
8127 &[]
8128 };
8129
8130 #[allow(clippy::type_complexity)]
8131 let conv_rows: Vec<(
8132 i64,
8133 String,
8134 Option<String>,
8135 Option<String>,
8136 Option<String>,
8137 String,
8138 Option<i64>,
8139 Option<i64>,
8140 Option<i64>,
8141 Option<String>,
8142 Option<String>,
8143 Option<String>,
8144 )> = source_conn
8145 .query_map_collect(conv_sql, conv_params, |row| {
8146 Ok((
8147 row.get_typed::<i64>(0)?,
8148 row.get_typed::<String>(1)?,
8149 row.get_typed::<Option<String>>(2)?,
8150 row.get_typed::<Option<String>>(3)?,
8151 row.get_typed::<Option<String>>(4)?,
8152 row.get_typed::<String>(5)?,
8153 row.get_typed::<Option<i64>>(6)?,
8154 row.get_typed::<Option<i64>>(7)?,
8155 row.get_typed::<Option<i64>>(8)?,
8156 row.get_typed::<Option<String>>(9)?,
8157 row.get_typed::<Option<String>>(10)?,
8158 row.get_typed::<Option<String>>(11)?,
8159 ))
8160 })
8161 .context("querying historical conversations")?;
8162
8163 let msg_sql = "SELECT idx, role, author, created_at, content, extra_json
8164 FROM messages
8165 WHERE conversation_id = ?1
8166 ORDER BY idx";
8167
8168 let mut imported_conversations = resume_progress
8169 .as_ref()
8170 .map(|progress| progress.conversations_imported)
8171 .unwrap_or(0);
8172 let mut imported_messages = resume_progress
8173 .as_ref()
8174 .map(|progress| progress.messages_imported)
8175 .unwrap_or(0);
8176 let mut pending_batch: Vec<HistoricalBatchEntry> = Vec::new();
8177 let mut pending_batch_messages = 0usize;
8178 let mut pending_batch_chars = 0usize;
8179 let mut pending_batch_first_row_id: Option<i64> = None;
8180 let mut pending_batch_last_row_id: Option<i64> = None;
8181
8182 let flush_batch = |storage: &FrankenStorage,
8183 batch: &mut Vec<HistoricalBatchEntry>,
8184 pending_messages: &mut usize,
8185 pending_chars: &mut usize,
8186 first_row_id: &mut Option<i64>,
8187 last_row_id: &mut Option<i64>,
8188 imported_conversations: &mut usize,
8189 imported_messages: &mut usize|
8190 -> Result<()> {
8191 if batch.is_empty() {
8192 return Ok(());
8193 }
8194
8195 let batch_first_row_id = *first_row_id;
8196 let batch_last_row_id = *last_row_id;
8197 if historical_salvage_debug_enabled() {
8198 eprintln!(
8199 "[historical-salvage] flushing batch rows {:?}..{:?} conversations={} messages={} payload_chars={}",
8200 batch_first_row_id,
8201 batch_last_row_id,
8202 batch.len(),
8203 *pending_messages,
8204 *pending_chars
8205 );
8206 }
8207 tracing::info!(
8208 target: "cass::historical_salvage",
8209 batch_conversations = batch.len(),
8210 batch_messages = *pending_messages,
8211 batch_payload_chars = *pending_chars,
8212 first_source_row_id = batch_first_row_id,
8213 last_source_row_id = batch_last_row_id,
8214 "flushing historical salvage batch"
8215 );
8216
8217 let mut insert_batch =
8218 |entries: &[HistoricalBatchEntry]| -> Result<HistoricalBatchImportTotals> {
8219 let borrowed_batch: Vec<(i64, Option<i64>, &Conversation)> = entries
8220 .iter()
8221 .map(|entry| (entry.agent_id, entry.workspace_id, &entry.conversation))
8222 .collect();
8223 let outcomes = storage
8224 .insert_conversations_batched(&borrowed_batch)
8225 .with_context(|| {
8226 let first_source_row_id =
8227 entries.first().map(|entry| entry.source_row_id);
8228 let last_source_row_id =
8229 entries.last().map(|entry| entry.source_row_id);
8230 format!(
8231 "inserting historical salvage batch source rows {:?}..{:?}",
8232 first_source_row_id, last_source_row_id
8233 )
8234 })?;
8235 let mut totals = HistoricalBatchImportTotals::default();
8236 for outcome in outcomes {
8237 if !outcome.inserted_indices.is_empty() {
8238 totals.inserted_source_rows += 1;
8239 totals.inserted_messages += outcome.inserted_indices.len();
8240 }
8241 }
8242 Ok(totals)
8243 };
8244 let totals =
8245 Self::import_historical_batch_with_retry(batch.as_slice(), &mut insert_batch)?;
8246 *imported_conversations =
8247 (*imported_conversations).saturating_add(totals.inserted_source_rows);
8248 *imported_messages = (*imported_messages).saturating_add(totals.inserted_messages);
8249 if let Some(last_completed_row_id) = batch_last_row_id {
8250 storage.record_historical_bundle_progress(
8251 bundle,
8252 salvage_method,
8253 last_completed_row_id,
8254 *imported_conversations,
8255 *imported_messages,
8256 )?;
8257 }
8258 tracing::info!(
8259 target: "cass::historical_salvage",
8260 batch_conversations = batch.len(),
8261 batch_messages = *pending_messages,
8262 imported_conversations = *imported_conversations,
8263 imported_messages = *imported_messages,
8264 first_source_row_id = batch_first_row_id,
8265 last_source_row_id = batch_last_row_id,
8266 "historical salvage batch committed"
8267 );
8268 if historical_salvage_debug_enabled() {
8269 eprintln!(
8270 "[historical-salvage] committed batch rows {:?}..{:?} imported_conversations={} imported_messages={}",
8271 batch_first_row_id,
8272 batch_last_row_id,
8273 *imported_conversations,
8274 *imported_messages
8275 );
8276 }
8277 batch.clear();
8278 *pending_messages = 0;
8279 *pending_chars = 0;
8280 *first_row_id = None;
8281 *last_row_id = None;
8282 Ok(())
8283 };
8284
8285 for (
8286 conversation_row_id,
8287 agent_slug,
8288 workspace_path,
8289 external_id,
8290 title,
8291 source_path,
8292 started_at,
8293 ended_at,
8294 approx_tokens,
8295 metadata_json_raw,
8296 raw_source_id,
8297 raw_origin_host,
8298 ) in conv_rows
8299 {
8300 let source_id = crate::search::tantivy::normalized_index_source_id(
8301 raw_source_id.as_deref(),
8302 None,
8303 raw_origin_host.as_deref(),
8304 );
8305 let origin_host =
8306 crate::search::tantivy::normalized_index_origin_host(raw_origin_host.as_deref());
8307
8308 let messages: Vec<Message> = source_conn
8309 .query_map_collect(msg_sql, fparams![conversation_row_id], |msg_row| {
8310 let role: String = msg_row.get_typed(1)?;
8311 Ok(Message {
8312 id: None,
8313 idx: msg_row.get_typed(0)?,
8314 role: match role.as_str() {
8315 "user" => MessageRole::User,
8316 "agent" | "assistant" => MessageRole::Agent,
8317 "tool" => MessageRole::Tool,
8318 "system" => MessageRole::System,
8319 other => MessageRole::Other(other.to_string()),
8320 },
8321 author: msg_row.get_typed(2)?,
8322 created_at: msg_row.get_typed(3)?,
8323 content: msg_row.get_typed(4)?,
8324 extra_json: parse_historical_json_column(msg_row.get_typed(5)?),
8325 snippets: Vec::new(),
8326 })
8327 })
8328 .context("collecting historical message rows")?;
8329
8330 if messages.is_empty() {
8331 continue;
8332 }
8333
8334 let conversation_message_count = messages.len();
8335 let conversation_chars = messages
8336 .iter()
8337 .map(message_payload_size_hint)
8338 .sum::<usize>();
8339
8340 let conversation = Conversation {
8341 id: None,
8342 agent_slug: agent_slug.clone(),
8343 workspace: workspace_path.map(PathBuf::from),
8344 external_id,
8345 title,
8346 source_path: PathBuf::from(source_path),
8347 started_at,
8348 ended_at,
8349 approx_tokens,
8350 metadata_json: parse_json_column(metadata_json_raw),
8351 messages,
8352 source_id,
8353 origin_host,
8354 };
8355
8356 if !known_sources.contains(&conversation.source_id) {
8357 let placeholder = if conversation.source_id == LOCAL_SOURCE_ID {
8358 Source::local()
8359 } else {
8360 Source {
8361 id: conversation.source_id.clone(),
8362 kind: SourceKind::Ssh,
8363 host_label: conversation.origin_host.clone(),
8364 machine_id: None,
8365 platform: None,
8366 config_json: None,
8367 created_at: None,
8368 updated_at: None,
8369 }
8370 };
8371 self.upsert_source(&placeholder)?;
8372 known_sources.insert(conversation.source_id.clone());
8373 }
8374
8375 let agent = Agent {
8376 id: None,
8377 slug: agent_slug.clone(),
8378 name: agent_slug,
8379 version: None,
8380 kind: AgentKind::Cli,
8381 };
8382 let agent_id = if cache_enabled {
8383 indexing_cache.get_or_insert_agent(self, &agent)?
8384 } else {
8385 self.ensure_agent(&agent)?
8386 };
8387 let workspace_id = if let Some(workspace) = &conversation.workspace {
8388 if cache_enabled {
8389 Some(indexing_cache.get_or_insert_workspace(self, workspace, None)?)
8390 } else {
8391 Some(self.ensure_workspace(workspace, None)?)
8392 }
8393 } else {
8394 None
8395 };
8396
8397 let exceeds_pending_limits = !pending_batch.is_empty()
8398 && (pending_batch.len() >= batch_limits.conversations
8399 || pending_batch_messages.saturating_add(conversation_message_count)
8400 > batch_limits.messages
8401 || pending_batch_chars.saturating_add(conversation_chars)
8402 > batch_limits.payload_chars);
8403 if exceeds_pending_limits {
8404 flush_batch(
8405 self,
8406 &mut pending_batch,
8407 &mut pending_batch_messages,
8408 &mut pending_batch_chars,
8409 &mut pending_batch_first_row_id,
8410 &mut pending_batch_last_row_id,
8411 &mut imported_conversations,
8412 &mut imported_messages,
8413 )?;
8414 }
8415
8416 if pending_batch_first_row_id.is_none() {
8417 pending_batch_first_row_id = Some(conversation_row_id);
8418 }
8419 pending_batch_last_row_id = Some(conversation_row_id);
8420 pending_batch_messages =
8421 pending_batch_messages.saturating_add(conversation_message_count);
8422 pending_batch_chars = pending_batch_chars.saturating_add(conversation_chars);
8423 pending_batch.push(HistoricalBatchEntry {
8424 source_row_id: conversation_row_id,
8425 agent_id,
8426 workspace_id,
8427 conversation,
8428 });
8429
8430 if pending_batch.len() >= batch_limits.conversations
8431 || pending_batch_messages >= batch_limits.messages
8432 || pending_batch_chars >= batch_limits.payload_chars
8433 {
8434 flush_batch(
8435 self,
8436 &mut pending_batch,
8437 &mut pending_batch_messages,
8438 &mut pending_batch_chars,
8439 &mut pending_batch_first_row_id,
8440 &mut pending_batch_last_row_id,
8441 &mut imported_conversations,
8442 &mut imported_messages,
8443 )?;
8444 }
8445 }
8446
8447 flush_batch(
8448 self,
8449 &mut pending_batch,
8450 &mut pending_batch_messages,
8451 &mut pending_batch_chars,
8452 &mut pending_batch_first_row_id,
8453 &mut pending_batch_last_row_id,
8454 &mut imported_conversations,
8455 &mut imported_messages,
8456 )?;
8457
8458 if cache_enabled {
8459 let (hits, misses, hit_rate) = indexing_cache.stats();
8460 tracing::info!(
8461 target: "cass::historical_salvage",
8462 hits,
8463 misses,
8464 hit_rate = format!("{:.1}%", hit_rate * 100.0),
8465 agents = indexing_cache.agent_count(),
8466 workspaces = indexing_cache.workspace_count(),
8467 sources = known_sources.len(),
8468 "historical salvage cache stats"
8469 );
8470 }
8471
8472 Ok((imported_conversations, imported_messages))
8473 }
8474
8475 pub fn salvage_historical_databases(
8476 &self,
8477 canonical_db_path: &Path,
8478 ) -> Result<HistoricalSalvageOutcome> {
8479 let ordered_bundles = discover_historical_database_bundles(canonical_db_path);
8480 let mut outcome = HistoricalSalvageOutcome {
8481 bundles_considered: ordered_bundles.len(),
8482 ..HistoricalSalvageOutcome::default()
8483 };
8484
8485 for bundle in ordered_bundles {
8486 if self.historical_bundle_already_imported(&bundle)? {
8487 self.clear_historical_bundle_progress(&bundle)?;
8488 continue;
8489 }
8490
8491 let source = match open_historical_bundle_for_salvage(&bundle).with_context(|| {
8492 format!(
8493 "opening historical bundle {} for salvage",
8494 bundle.root_path.display()
8495 )
8496 }) {
8497 Ok(source) => source,
8498 Err(err) => {
8499 tracing::warn!(
8500 path = %bundle.root_path.display(),
8501 error = %err,
8502 "skipping unreadable historical cass database bundle during salvage"
8503 );
8504 self.clear_historical_bundle_progress(&bundle)?;
8505 continue;
8506 }
8507 };
8508
8509 if let Some(progress) = self.load_historical_bundle_progress(&bundle)? {
8517 let backup_max_conversation_id: i64 = source
8518 .conn
8519 .query_row_map(
8520 "SELECT COALESCE(MAX(id), 0) FROM conversations",
8521 fparams![],
8522 |row| row.get_typed(0),
8523 )
8524 .unwrap_or(0);
8525 if backup_max_conversation_id > 0
8526 && progress.last_completed_source_row_id >= backup_max_conversation_id
8527 {
8528 self.record_historical_bundle_import(
8529 &bundle,
8530 source.method,
8531 progress.conversations_imported,
8532 progress.messages_imported,
8533 )?;
8534 self.clear_historical_bundle_progress(&bundle)?;
8535 tracing::info!(
8536 path = %bundle.root_path.display(),
8537 last_completed_source_row_id = progress.last_completed_source_row_id,
8538 backup_max_conversation_id,
8539 conversations_imported = progress.conversations_imported,
8540 messages_imported = progress.messages_imported,
8541 "historical bundle already fully imported per checkpoint; marking salvaged and skipping O(n) re-scan"
8542 );
8543 continue;
8544 }
8545 }
8546
8547 self.import_historical_sources(&source.conn)?;
8548 let (imported_conversations, imported_messages) =
8549 self.import_historical_conversations(&bundle, source.method, &source.conn)?;
8550 self.record_historical_bundle_import(
8551 &bundle,
8552 source.method,
8553 imported_conversations,
8554 imported_messages,
8555 )?;
8556 self.clear_historical_bundle_progress(&bundle)?;
8557
8558 outcome.bundles_imported += 1;
8559 outcome.conversations_imported += imported_conversations;
8560 outcome.messages_imported += imported_messages;
8561
8562 tracing::info!(
8563 path = %bundle.root_path.display(),
8564 bytes = bundle.total_bytes,
8565 method = source.method,
8566 imported_conversations,
8567 imported_messages,
8568 "salvaged historical cass database bundle"
8569 );
8570 }
8571
8572 Ok(outcome)
8573 }
8574
8575 pub fn delete_source(&self, id: &str, _cascade: bool) -> Result<bool> {
8577 if id == LOCAL_SOURCE_ID {
8578 anyhow::bail!("cannot delete the local source");
8579 }
8580 let count = self
8581 .conn
8582 .execute_compat("DELETE FROM sources WHERE id = ?1", fparams![id])?;
8583 if count > 0 {
8584 self.invalidate_conversation_source_cache(id);
8585 }
8586 Ok(count > 0)
8587 }
8588
8589 pub fn insert_conversation_tree(
8591 &self,
8592 agent_id: i64,
8593 workspace_id: Option<i64>,
8594 conv: &Conversation,
8595 ) -> Result<InsertOutcome> {
8596 let normalized_conv = normalized_conversation_for_storage(conv);
8597 let conv = normalized_conv.as_ref();
8598 self.ensure_source_for_conversation(conv)?;
8599 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
8600 let defer_analytics_updates = defer_analytics_updates_enabled();
8601 let conversation_key = conversation_merge_key(agent_id, conv);
8602 let mut tx = self.conn.transaction()?;
8603 let existing = franken_find_existing_conversation_with_tail_by_key(
8604 &tx,
8605 &conversation_key,
8606 Some(conv),
8607 )?;
8608 if let Some(existing) = existing {
8609 let outcome = self.franken_append_messages_with_tail_in_tx(
8610 &tx,
8611 agent_id,
8612 existing.id,
8613 conv,
8614 existing.tail_state,
8615 defer_lexical_updates,
8616 defer_analytics_updates,
8617 )?;
8618 tx.commit()?;
8619 return Ok(outcome);
8620 }
8621
8622 let conv_id = match franken_insert_conversation_or_get_existing_after_miss(
8623 &tx,
8624 agent_id,
8625 workspace_id,
8626 conv,
8627 &conversation_key,
8628 )? {
8629 ConversationInsertStatus::Inserted(conv_id) => conv_id,
8630 ConversationInsertStatus::Existing(existing_id) => {
8631 let ExistingMessageLookup {
8632 by_idx: mut existing_messages,
8633 replay: mut existing_replay_fingerprints,
8634 } = franken_existing_message_lookup(&tx, existing_id, &conv.messages)?;
8635 let ExistingConversationNewMessages {
8636 messages: new_messages,
8637 new_chars,
8638 idx_collision_count,
8639 first_collision_idx,
8640 } = collect_new_messages_for_existing_conversation(
8641 existing_id,
8642 conv,
8643 &mut existing_messages,
8644 &mut existing_replay_fingerprints,
8645 "skipping replay-equivalent recovered message with shifted idx",
8646 );
8647 let (inserted_last_idx, inserted_last_created_at) =
8648 borrowed_messages_tail_state(&new_messages);
8649 let mut inserted_indices = Vec::new();
8650 let mut fts_entries = Vec::new();
8651 let mut fts_pending_chars = 0usize;
8652 let mut _fts_inserted_total = 0usize;
8653 let inserted_message_ids =
8654 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
8655 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
8656 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
8657 if !defer_lexical_updates {
8658 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
8659 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
8660 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
8661 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
8662 {
8663 flush_pending_fts_entries(
8664 self,
8665 &tx,
8666 &mut fts_entries,
8667 &mut fts_pending_chars,
8668 &mut _fts_inserted_total,
8669 )?;
8670 }
8671 }
8672 inserted_indices.push(msg.idx);
8673 }
8674
8675 if idx_collision_count > 0 {
8676 tracing::warn!(
8677 conversation_id = existing_id,
8678 collision_count = idx_collision_count,
8679 first_idx = first_collision_idx,
8680 source_path = %conv.source_path.display(),
8681 "message idx collisions encountered while merging recovered conversation; retaining canonical message variants"
8682 );
8683 }
8684
8685 if !defer_lexical_updates {
8686 flush_pending_fts_entries(
8687 self,
8688 &tx,
8689 &mut fts_entries,
8690 &mut fts_pending_chars,
8691 &mut _fts_inserted_total,
8692 )?;
8693 }
8694
8695 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
8696 franken_update_conversation_tail_state(
8697 &tx,
8698 existing_id,
8699 conv_last_ts,
8700 inserted_last_idx,
8701 inserted_last_created_at,
8702 )?;
8703 if let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv)
8704 {
8705 franken_update_external_conversation_tail_lookup_key(
8706 &tx,
8707 &lookup_key,
8708 conv_last_ts,
8709 inserted_last_idx,
8710 inserted_last_created_at,
8711 )?;
8712 }
8713
8714 if !defer_analytics_updates && !inserted_indices.is_empty() {
8715 franken_update_daily_stats_in_tx(
8716 self,
8717 &tx,
8718 &conv.agent_slug,
8719 &conv.source_id,
8720 conversation_effective_started_at(conv),
8721 StatsDelta {
8722 session_count_delta: 0,
8723 message_count_delta: inserted_indices.len() as i64,
8724 total_chars_delta: new_chars,
8725 },
8726 )?;
8727 }
8728
8729 tx.commit()?;
8730 return Ok(InsertOutcome {
8731 conversation_id: existing_id,
8732 conversation_inserted: false,
8733 inserted_indices,
8734 });
8735 }
8736 };
8737 let mut fts_entries = Vec::new();
8738 let mut fts_pending_chars = 0usize;
8739 let mut _fts_inserted_total = 0usize;
8740 let mut total_chars: i64 = 0;
8741 let mut inserted_indices = Vec::new();
8742 let mut pending_messages = HashMap::new();
8743 let mut pending_replay_fingerprints = HashSet::new();
8744 let mut idx_collision_count = 0usize;
8745 let mut first_collision_idx: Option<i64> = None;
8746 let mut new_messages = Vec::new();
8747 for msg in &conv.messages {
8748 let incoming_fingerprint = message_merge_fingerprint(msg);
8749 if let Some(existing_fingerprint) = pending_messages.get(&msg.idx) {
8750 if existing_fingerprint != &incoming_fingerprint {
8751 idx_collision_count = idx_collision_count.saturating_add(1);
8752 first_collision_idx.get_or_insert(msg.idx);
8753 }
8754 continue;
8755 }
8756 let incoming_replay = message_replay_fingerprint(msg);
8757 if pending_replay_fingerprints.contains(&incoming_replay) {
8758 tracing::debug!(
8759 conversation_id = conv_id,
8760 idx = msg.idx,
8761 source_path = %conv.source_path.display(),
8762 "skipping replay-equivalent duplicate message within new conversation insert"
8763 );
8764 continue;
8765 }
8766 pending_messages.insert(msg.idx, incoming_fingerprint);
8767 pending_replay_fingerprints.insert(incoming_replay);
8768 new_messages.push(msg);
8769 }
8770 let inserted_message_ids = franken_batch_insert_new_messages(&tx, conv_id, &new_messages)?;
8771 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
8772 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
8773 if !defer_lexical_updates {
8774 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
8775 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
8776 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
8777 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
8778 {
8779 flush_pending_fts_entries(
8780 self,
8781 &tx,
8782 &mut fts_entries,
8783 &mut fts_pending_chars,
8784 &mut _fts_inserted_total,
8785 )?;
8786 }
8787 }
8788 total_chars += msg.content.len() as i64;
8789 inserted_indices.push(msg.idx);
8790 }
8791 if idx_collision_count > 0 {
8792 tracing::warn!(
8793 conversation_id = conv_id,
8794 collision_count = idx_collision_count,
8795 first_idx = first_collision_idx,
8796 source_path = %conv.source_path.display(),
8797 "message idx collisions encountered while inserting a new conversation; retaining the first canonical variant per idx"
8798 );
8799 }
8800 if !defer_lexical_updates {
8801 flush_pending_fts_entries(
8802 self,
8803 &tx,
8804 &mut fts_entries,
8805 &mut fts_pending_chars,
8806 &mut _fts_inserted_total,
8807 )?;
8808 }
8809
8810 if !defer_analytics_updates {
8811 franken_update_daily_stats_in_tx(
8812 self,
8813 &tx,
8814 &conv.agent_slug,
8815 &conv.source_id,
8816 conversation_effective_started_at(conv),
8817 StatsDelta {
8818 session_count_delta: 1,
8819 message_count_delta: inserted_indices.len() as i64,
8820 total_chars_delta: total_chars,
8821 },
8822 )?;
8823 }
8824
8825 tx.commit()?;
8826 Ok(InsertOutcome {
8827 conversation_id: conv_id,
8828 conversation_inserted: true,
8829 inserted_indices,
8830 })
8831 }
8832
8833 #[cfg(test)]
8834 fn insert_conversation_tree_with_profile(
8835 &self,
8836 agent_id: i64,
8837 workspace_id: Option<i64>,
8838 conv: &Conversation,
8839 profile: &mut InsertConversationTreePerfProfile,
8840 ) -> Result<InsertOutcome> {
8841 let total_start = Instant::now();
8842 let normalized_conv = normalized_conversation_for_storage(conv);
8843 let conv = normalized_conv.as_ref();
8844
8845 let source_start = Instant::now();
8846 self.ensure_source_for_conversation(conv)?;
8847 profile.source_duration += source_start.elapsed();
8848
8849 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
8850 let defer_analytics_updates = defer_analytics_updates_enabled();
8851 let conversation_key = conversation_merge_key(agent_id, conv);
8852
8853 let tx_open_start = Instant::now();
8854 let mut tx = self.conn.transaction()?;
8855 profile.tx_open_duration += tx_open_start.elapsed();
8856
8857 let existing_lookup_start = Instant::now();
8858 let existing =
8859 franken_find_existing_conversation_by_key(&tx, &conversation_key, Some(conv))?;
8860 profile.existing_lookup_duration += existing_lookup_start.elapsed();
8861 if let Some(existing_id) = existing {
8862 return Err(anyhow!(
8863 "profile helper expects new conversation path, found existing id {existing_id}"
8864 ));
8865 }
8866
8867 let conversation_row_start = Instant::now();
8868 let conv_id = match franken_insert_conversation_or_get_existing_after_miss(
8869 &tx,
8870 agent_id,
8871 workspace_id,
8872 conv,
8873 &conversation_key,
8874 )? {
8875 ConversationInsertStatus::Inserted(conv_id) => conv_id,
8876 ConversationInsertStatus::Existing(existing_id) => {
8877 return Err(anyhow!(
8878 "profile helper expected inserted conversation row, reused existing id {existing_id}"
8879 ));
8880 }
8881 };
8882 profile.conversation_row_duration += conversation_row_start.elapsed();
8883
8884 let mut fts_entries = Vec::new();
8885 let mut fts_pending_chars = 0usize;
8886 let mut fts_inserted_total = 0usize;
8887 let mut total_chars: i64 = 0;
8888 let mut inserted_indices = Vec::new();
8889 let mut pending_messages = HashMap::new();
8890 let mut pending_replay_fingerprints = HashSet::new();
8891 let mut idx_collision_count = 0usize;
8892 let mut first_collision_idx: Option<i64> = None;
8893 let mut new_messages = Vec::new();
8894
8895 for msg in &conv.messages {
8896 let incoming_fingerprint = message_merge_fingerprint(msg);
8897 if let Some(existing_fingerprint) = pending_messages.get(&msg.idx) {
8898 if existing_fingerprint != &incoming_fingerprint {
8899 idx_collision_count = idx_collision_count.saturating_add(1);
8900 first_collision_idx.get_or_insert(msg.idx);
8901 }
8902 continue;
8903 }
8904
8905 let incoming_replay = message_replay_fingerprint(msg);
8906 if pending_replay_fingerprints.contains(&incoming_replay) {
8907 tracing::debug!(
8908 conversation_id = conv_id,
8909 idx = msg.idx,
8910 source_path = %conv.source_path.display(),
8911 "skipping replay-equivalent duplicate message within profiled new conversation insert"
8912 );
8913 continue;
8914 }
8915
8916 pending_messages.insert(msg.idx, incoming_fingerprint);
8917 pending_replay_fingerprints.insert(incoming_replay);
8918 new_messages.push(msg);
8919 }
8920
8921 let message_insert_start = Instant::now();
8922 let inserted_message_ids = franken_batch_insert_new_messages_with_profile(
8923 &tx,
8924 conv_id,
8925 &new_messages,
8926 &mut profile.message_insert_breakdown,
8927 )?;
8928 profile.message_insert_duration += message_insert_start.elapsed();
8929
8930 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
8931 let snippet_insert_start = Instant::now();
8932 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
8933 profile.snippet_insert_duration += snippet_insert_start.elapsed();
8934
8935 if !defer_lexical_updates {
8936 let fts_entry_start = Instant::now();
8937 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
8938 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
8939 profile.fts_entry_duration += fts_entry_start.elapsed();
8940 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
8941 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
8942 {
8943 let fts_flush_start = Instant::now();
8944 flush_pending_fts_entries(
8945 self,
8946 &tx,
8947 &mut fts_entries,
8948 &mut fts_pending_chars,
8949 &mut fts_inserted_total,
8950 )?;
8951 profile.fts_flush_duration += fts_flush_start.elapsed();
8952 }
8953 }
8954
8955 total_chars += msg.content.len() as i64;
8956 inserted_indices.push(msg.idx);
8957 }
8958
8959 if idx_collision_count > 0 {
8960 tracing::warn!(
8961 conversation_id = conv_id,
8962 collision_count = idx_collision_count,
8963 first_idx = first_collision_idx,
8964 source_path = %conv.source_path.display(),
8965 "message idx collisions encountered while profiling a new conversation insert; retaining the first canonical variant per idx"
8966 );
8967 }
8968
8969 if !defer_lexical_updates {
8970 let fts_flush_start = Instant::now();
8971 flush_pending_fts_entries(
8972 self,
8973 &tx,
8974 &mut fts_entries,
8975 &mut fts_pending_chars,
8976 &mut fts_inserted_total,
8977 )?;
8978 profile.fts_flush_duration += fts_flush_start.elapsed();
8979 }
8980
8981 if !defer_analytics_updates {
8982 let analytics_start = Instant::now();
8983 franken_update_daily_stats_in_tx(
8984 self,
8985 &tx,
8986 &conv.agent_slug,
8987 &conv.source_id,
8988 conversation_effective_started_at(conv),
8989 StatsDelta {
8990 session_count_delta: 1,
8991 message_count_delta: inserted_indices.len() as i64,
8992 total_chars_delta: total_chars,
8993 },
8994 )?;
8995 profile.analytics_duration += analytics_start.elapsed();
8996 }
8997
8998 let commit_start = Instant::now();
8999 tx.commit()?;
9000 profile.commit_duration += commit_start.elapsed();
9001 profile.invocations += 1;
9002 profile.messages += conv.messages.len();
9003 profile.inserted_messages += inserted_indices.len();
9004 profile.total_duration += total_start.elapsed();
9005
9006 Ok(InsertOutcome {
9007 conversation_id: conv_id,
9008 conversation_inserted: true,
9009 inserted_indices,
9010 })
9011 }
9012
9013 #[cfg(test)]
9014 fn append_existing_conversation_with_profile(
9015 &self,
9016 agent_id: i64,
9017 _workspace_id: Option<i64>,
9018 conv: &Conversation,
9019 profile: &mut InsertConversationTreePerfProfile,
9020 ) -> Result<InsertOutcome> {
9021 let total_start = Instant::now();
9022 let normalized_conv = normalized_conversation_for_storage(conv);
9023 let conv = normalized_conv.as_ref();
9024
9025 let source_start = Instant::now();
9026 self.ensure_source_for_conversation(conv)?;
9027 profile.source_duration += source_start.elapsed();
9028
9029 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
9030 let defer_analytics_updates = defer_analytics_updates_enabled();
9031 let conversation_key = conversation_merge_key(agent_id, conv);
9032
9033 let tx_open_start = Instant::now();
9034 let mut tx = self.conn.transaction()?;
9035 profile.tx_open_duration += tx_open_start.elapsed();
9036
9037 let existing_lookup_start = Instant::now();
9038 let existing = franken_find_existing_conversation_with_tail_by_key(
9039 &tx,
9040 &conversation_key,
9041 Some(conv),
9042 )?;
9043 profile.existing_lookup_duration += existing_lookup_start.elapsed();
9044 let existing = existing.ok_or_else(|| {
9045 anyhow!("append profile helper expects existing conversation for {conversation_key:?}")
9046 })?;
9047 let existing_id = existing.id;
9048
9049 let existing_idx_lookup_start = Instant::now();
9050 let append_tail_state = existing.tail_state;
9051 let append_tail_ended_at = append_tail_state.and_then(|state| state.ended_at);
9052 let existing_plan = append_tail_state.as_ref().and_then(|state| {
9053 collect_append_only_tail_messages(
9054 conv,
9055 state.last_message_idx,
9056 state.last_message_created_at,
9057 )
9058 });
9059 let used_append_tail_plan = existing_plan.is_some();
9060 profile.existing_idx_lookup_duration += existing_idx_lookup_start.elapsed();
9061
9062 let dedupe_filter_start = Instant::now();
9063 let ExistingConversationNewMessages {
9064 messages: new_messages,
9065 new_chars,
9066 idx_collision_count,
9067 first_collision_idx,
9068 } = if let Some(existing_plan) = existing_plan {
9069 existing_plan
9070 } else {
9071 let ExistingMessageLookup {
9072 by_idx: mut existing_messages,
9073 replay: mut existing_replay_fingerprints,
9074 } = franken_existing_message_lookup(&tx, existing_id, &conv.messages)?;
9075 collect_new_messages_for_existing_conversation(
9076 existing_id,
9077 conv,
9078 &mut existing_messages,
9079 &mut existing_replay_fingerprints,
9080 "skipping replay-equivalent profiled append message with shifted idx",
9081 )
9082 };
9083 profile.dedupe_filter_duration += dedupe_filter_start.elapsed();
9084
9085 let mut inserted_indices = Vec::new();
9086 let mut fts_entries = Vec::new();
9087 let mut fts_pending_chars = 0usize;
9088 let mut fts_inserted_total = 0usize;
9089 let (inserted_last_idx, inserted_last_created_at) =
9090 borrowed_messages_tail_state(&new_messages);
9091
9092 let message_insert_start = Instant::now();
9093 let inserted_message_ids = franken_append_insert_new_messages_with_profile(
9094 &tx,
9095 existing_id,
9096 &new_messages,
9097 &mut profile.message_insert_breakdown,
9098 )?;
9099 profile.message_insert_duration += message_insert_start.elapsed();
9100
9101 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9102 let snippet_insert_start = Instant::now();
9103 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9104 profile.snippet_insert_duration += snippet_insert_start.elapsed();
9105
9106 if !defer_lexical_updates {
9107 let fts_entry_start = Instant::now();
9108 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9109 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9110 profile.fts_entry_duration += fts_entry_start.elapsed();
9111 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9112 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9113 {
9114 let fts_flush_start = Instant::now();
9115 flush_pending_fts_entries(
9116 self,
9117 &tx,
9118 &mut fts_entries,
9119 &mut fts_pending_chars,
9120 &mut fts_inserted_total,
9121 )?;
9122 profile.fts_flush_duration += fts_flush_start.elapsed();
9123 }
9124 }
9125
9126 inserted_indices.push(msg.idx);
9127 }
9128
9129 if idx_collision_count > 0 {
9130 tracing::warn!(
9131 conversation_id = existing_id,
9132 collision_count = idx_collision_count,
9133 first_idx = first_collision_idx,
9134 source_path = %conv.source_path.display(),
9135 "message idx collisions encountered while profiling append merge; retaining canonical message variants"
9136 );
9137 }
9138
9139 if !defer_lexical_updates {
9140 let fts_flush_start = Instant::now();
9141 flush_pending_fts_entries(
9142 self,
9143 &tx,
9144 &mut fts_entries,
9145 &mut fts_pending_chars,
9146 &mut fts_inserted_total,
9147 )?;
9148 profile.fts_flush_duration += fts_flush_start.elapsed();
9149 }
9150
9151 let conversation_row_start = Instant::now();
9152 let mut exact_append_tail_set = false;
9153 if used_append_tail_plan {
9154 if let (Some(last_message_idx), Some(last_message_created_at)) =
9155 (inserted_last_idx, inserted_last_created_at)
9156 {
9157 if append_tail_ended_at.is_none_or(|ended_at| ended_at <= last_message_created_at) {
9158 franken_set_conversation_tail_state_after_append(
9159 &tx,
9160 existing_id,
9161 last_message_created_at,
9162 last_message_idx,
9163 last_message_created_at,
9164 )?;
9165 exact_append_tail_set = true;
9166 } else {
9167 franken_update_conversation_tail_state(
9168 &tx,
9169 existing_id,
9170 Some(last_message_created_at),
9171 inserted_last_idx,
9172 inserted_last_created_at,
9173 )?;
9174 }
9175 }
9176 } else {
9177 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
9178 franken_update_conversation_tail_state(
9179 &tx,
9180 existing_id,
9181 conv_last_ts,
9182 inserted_last_idx,
9183 inserted_last_created_at,
9184 )?;
9185 }
9186 franken_update_external_conversation_tail_after_append(
9187 &tx,
9188 agent_id,
9189 conv,
9190 used_append_tail_plan,
9191 exact_append_tail_set,
9192 inserted_last_idx,
9193 inserted_last_created_at,
9194 )?;
9195 profile.conversation_row_duration += conversation_row_start.elapsed();
9196
9197 if !defer_analytics_updates && !inserted_indices.is_empty() {
9198 let analytics_start = Instant::now();
9199 franken_update_daily_stats_in_tx(
9200 self,
9201 &tx,
9202 &conv.agent_slug,
9203 &conv.source_id,
9204 conversation_effective_started_at(conv),
9205 StatsDelta {
9206 session_count_delta: 0,
9207 message_count_delta: inserted_indices.len() as i64,
9208 total_chars_delta: new_chars,
9209 },
9210 )?;
9211 profile.analytics_duration += analytics_start.elapsed();
9212 }
9213
9214 let commit_start = Instant::now();
9215 tx.commit()?;
9216 profile.commit_duration += commit_start.elapsed();
9217 profile.invocations += 1;
9218 profile.messages += conv.messages.len();
9219 profile.inserted_messages += inserted_indices.len();
9220 profile.total_duration += total_start.elapsed();
9221
9222 Ok(InsertOutcome {
9223 conversation_id: existing_id,
9224 conversation_inserted: false,
9225 inserted_indices,
9226 })
9227 }
9228
9229 #[allow(clippy::too_many_arguments)]
9231 fn franken_append_messages_with_tail_in_tx(
9232 &self,
9233 tx: &FrankenTransaction<'_>,
9234 agent_id: i64,
9235 conversation_id: i64,
9236 conv: &Conversation,
9237 append_tail_state: Option<ExistingConversationTailState>,
9238 defer_lexical_updates: bool,
9239 defer_analytics_updates: bool,
9240 ) -> Result<InsertOutcome> {
9241 let append_tail_ended_at = append_tail_state.and_then(|state| state.ended_at);
9242 let append_plan = append_tail_state.as_ref().and_then(|state| {
9243 collect_append_only_tail_messages(
9244 conv,
9245 state.last_message_idx,
9246 state.last_message_created_at,
9247 )
9248 });
9249 let used_append_tail_plan = append_plan.is_some();
9250 let ExistingConversationNewMessages {
9251 messages: new_messages,
9252 new_chars,
9253 idx_collision_count,
9254 first_collision_idx,
9255 } = if let Some(append_plan) = append_plan {
9256 append_plan
9257 } else {
9258 let ExistingMessageLookup {
9259 by_idx: mut existing_messages,
9260 replay: mut existing_replay_fingerprints,
9261 } = franken_existing_message_lookup(tx, conversation_id, &conv.messages)?;
9262 collect_new_messages_for_existing_conversation(
9263 conversation_id,
9264 conv,
9265 &mut existing_messages,
9266 &mut existing_replay_fingerprints,
9267 "skipping replay-equivalent recovered message with shifted idx",
9268 )
9269 };
9270
9271 let mut inserted_indices = Vec::new();
9272 let mut fts_entries = Vec::new();
9273 let mut fts_pending_chars = 0usize;
9274 let mut _fts_inserted_total = 0usize;
9275 let (inserted_last_idx, inserted_last_created_at) =
9276 borrowed_messages_tail_state(&new_messages);
9277 let inserted_message_ids =
9278 franken_append_insert_new_messages(tx, conversation_id, &new_messages)?;
9279 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9280 franken_insert_snippets(tx, msg_id, &msg.snippets)?;
9281 if !defer_lexical_updates {
9282 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9283 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9284 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9285 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9286 {
9287 flush_pending_fts_entries(
9288 self,
9289 tx,
9290 &mut fts_entries,
9291 &mut fts_pending_chars,
9292 &mut _fts_inserted_total,
9293 )?;
9294 }
9295 }
9296 inserted_indices.push(msg.idx);
9297 }
9298
9299 if idx_collision_count > 0 {
9300 tracing::warn!(
9301 conversation_id,
9302 collision_count = idx_collision_count,
9303 first_idx = first_collision_idx,
9304 source_path = %conv.source_path.display(),
9305 "message idx collisions encountered while appending to an existing conversation; retaining canonical message variants"
9306 );
9307 }
9308
9309 if !defer_lexical_updates {
9310 flush_pending_fts_entries(
9311 self,
9312 tx,
9313 &mut fts_entries,
9314 &mut fts_pending_chars,
9315 &mut _fts_inserted_total,
9316 )?;
9317 }
9318
9319 let mut exact_append_tail_set = false;
9320 if used_append_tail_plan {
9321 if let (Some(last_message_idx), Some(last_message_created_at)) =
9322 (inserted_last_idx, inserted_last_created_at)
9323 {
9324 if append_tail_ended_at.is_none_or(|ended_at| ended_at <= last_message_created_at) {
9325 franken_set_conversation_tail_state_after_append(
9326 tx,
9327 conversation_id,
9328 last_message_created_at,
9329 last_message_idx,
9330 last_message_created_at,
9331 )?;
9332 exact_append_tail_set = true;
9333 } else {
9334 franken_update_conversation_tail_state(
9335 tx,
9336 conversation_id,
9337 Some(last_message_created_at),
9338 inserted_last_idx,
9339 inserted_last_created_at,
9340 )?;
9341 }
9342 }
9343 } else {
9344 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
9345 franken_update_conversation_tail_state(
9346 tx,
9347 conversation_id,
9348 conv_last_ts,
9349 inserted_last_idx,
9350 inserted_last_created_at,
9351 )?;
9352 }
9353 franken_update_external_conversation_tail_after_append(
9354 tx,
9355 agent_id,
9356 conv,
9357 used_append_tail_plan,
9358 exact_append_tail_set,
9359 inserted_last_idx,
9360 inserted_last_created_at,
9361 )?;
9362
9363 if !defer_analytics_updates && !inserted_indices.is_empty() {
9364 let message_count = inserted_indices.len() as i64;
9365 franken_update_daily_stats_in_tx(
9366 self,
9367 tx,
9368 &conv.agent_slug,
9369 &conv.source_id,
9370 conversation_effective_started_at(conv),
9371 StatsDelta {
9372 session_count_delta: 0,
9373 message_count_delta: message_count,
9374 total_chars_delta: new_chars,
9375 },
9376 )?;
9377 }
9378
9379 Ok(InsertOutcome {
9380 conversation_id,
9381 conversation_inserted: false,
9382 inserted_indices,
9383 })
9384 }
9385
9386 pub fn rebuild_fts(&self) -> Result<()> {
9388 self.rebuild_fts_via_frankensqlite().map(|_| ())
9389 }
9390
9391 pub(crate) fn ensure_search_fallback_fts_consistency(&self) -> Result<FtsConsistencyRepair> {
9396 self.ensure_fts_consistency_via_frankensqlite()
9397 }
9398
9399 pub(crate) fn fallback_fts_is_known_healthy_for_archive_fingerprint(
9400 &self,
9401 archive_fingerprint: &str,
9402 ) -> Result<bool> {
9403 Ok(
9404 self.read_fts_franken_rebuild_generation()? == Some(FTS_FRANKEN_REBUILD_GENERATION)
9405 && self
9406 .read_fts_franken_rebuild_archive_fingerprint()?
9407 .as_deref()
9408 == Some(archive_fingerprint),
9409 )
9410 }
9411
9412 pub(crate) fn record_search_fallback_fts_archive_fingerprint(
9413 &self,
9414 archive_fingerprint: &str,
9415 ) -> Result<()> {
9416 self.conn
9417 .execute_compat(
9418 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9419 fparams![
9420 FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY,
9421 archive_fingerprint.to_string()
9422 ],
9423 )
9424 .with_context(|| "recording frankensqlite FTS archive fingerprint")?;
9425 Ok(())
9426 }
9427
9428 pub(crate) fn daily_stats_is_known_healthy_for_archive_fingerprint(
9429 &self,
9430 archive_fingerprint: &str,
9431 ) -> Result<bool> {
9432 Ok(
9433 self.read_daily_stats_health_generation()? == Some(DAILY_STATS_HEALTH_GENERATION)
9434 && self.read_daily_stats_archive_fingerprint()?.as_deref()
9435 == Some(archive_fingerprint),
9436 )
9437 }
9438
9439 pub(crate) fn record_daily_stats_archive_fingerprint(
9440 &self,
9441 archive_fingerprint: &str,
9442 ) -> Result<()> {
9443 self.conn
9444 .execute_compat(
9445 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9446 fparams![
9447 DAILY_STATS_HEALTH_GENERATION_META_KEY,
9448 DAILY_STATS_HEALTH_GENERATION.to_string()
9449 ],
9450 )
9451 .with_context(|| "recording daily_stats health generation")?;
9452 self.conn
9453 .execute_compat(
9454 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9455 fparams![DAILY_STATS_HEALTH_META_KEY, archive_fingerprint.to_string()],
9456 )
9457 .with_context(|| "recording daily_stats archive fingerprint")?;
9458 Ok(())
9459 }
9460
9461 fn read_fts_franken_rebuild_generation(&self) -> Result<Option<i64>> {
9462 let value: Option<String> = self
9463 .conn
9464 .query_row_map(
9465 "SELECT value FROM meta WHERE key = ?1",
9466 fparams![FTS_FRANKEN_REBUILD_META_KEY],
9467 |row| row.get_typed(0),
9468 )
9469 .optional()?;
9470 Ok(value.and_then(|v| v.parse::<i64>().ok()))
9471 }
9472
9473 fn read_fts_franken_rebuild_archive_fingerprint(&self) -> Result<Option<String>> {
9474 Ok(self
9475 .conn
9476 .query_row_map(
9477 "SELECT value FROM meta WHERE key = ?1",
9478 fparams![FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY],
9479 |row| row.get_typed(0),
9480 )
9481 .optional()?)
9482 }
9483
9484 fn read_daily_stats_health_generation(&self) -> Result<Option<i64>> {
9485 let value: Option<String> = self
9486 .conn
9487 .query_row_map(
9488 "SELECT value FROM meta WHERE key = ?1",
9489 fparams![DAILY_STATS_HEALTH_GENERATION_META_KEY],
9490 |row| row.get_typed(0),
9491 )
9492 .optional()?;
9493 Ok(value.and_then(|value| value.parse::<i64>().ok()))
9494 }
9495
9496 fn read_daily_stats_archive_fingerprint(&self) -> Result<Option<String>> {
9497 Ok(self
9498 .conn
9499 .query_row_map(
9500 "SELECT value FROM meta WHERE key = ?1",
9501 fparams![DAILY_STATS_HEALTH_META_KEY],
9502 |row| row.get_typed(0),
9503 )
9504 .optional()?)
9505 }
9506
9507 fn record_fts_franken_rebuild_generation(&self) -> Result<()> {
9508 self.conn
9509 .execute_compat(
9510 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9511 fparams![
9512 FTS_FRANKEN_REBUILD_META_KEY,
9513 FTS_FRANKEN_REBUILD_GENERATION.to_string()
9514 ],
9515 )
9516 .with_context(|| "recording frankensqlite FTS rebuild generation")?;
9517 Ok(())
9518 }
9519
9520 fn ensure_fts_consistency_via_frankensqlite(&self) -> Result<FtsConsistencyRepair> {
9521 if self.read_fts_franken_rebuild_generation()? != Some(FTS_FRANKEN_REBUILD_GENERATION) {
9522 let fts_already_healthy = (|| -> Result<bool> {
9527 let fts_exists: i64 = self.conn.query_row_map(
9528 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
9529 fparams![],
9530 |row| row.get_typed(0),
9531 )?;
9532 if fts_exists != 1 {
9533 return Ok(false);
9534 }
9535 let total: i64 = self.conn.query_row_map(
9536 "SELECT COUNT(*) FROM messages",
9537 fparams![],
9538 |row| row.get_typed(0),
9539 )?;
9540 if total == 0 {
9541 return Ok(false);
9542 }
9543 let indexed: i64 = self.conn.query_row_map(
9544 "SELECT COUNT(*) FROM fts_messages",
9545 fparams![],
9546 |row| row.get_typed(0),
9547 )?;
9548 Ok(indexed > 0 && indexed * 100 >= total * 90)
9550 })()
9551 .unwrap_or(false);
9552
9553 if fts_already_healthy {
9554 tracing::info!(
9555 target: "cass::fts_rebuild",
9556 "FTS already populated and consistent; setting generation marker without rebuild"
9557 );
9558 self.record_fts_franken_rebuild_generation()?;
9559 self.set_fts_messages_present_cache(true);
9560 } else {
9561 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9562 self.record_fts_franken_rebuild_generation()?;
9563 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9564 }
9565 }
9566
9567 let inspection = (|| -> Result<(i64, bool)> {
9568 let fts_schema_rows = self.conn.query_row_map(
9569 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
9570 fparams![],
9571 |row| row.get_typed::<i64>(0),
9572 )?;
9573 let fts_queryable = fts_schema_rows == 1
9574 && self
9575 .conn
9576 .query("SELECT rowid FROM fts_messages LIMIT 1")
9577 .is_ok();
9578 Ok((fts_schema_rows, fts_queryable))
9579 })();
9580
9581 let (fts_schema_rows, fts_queryable) = match inspection {
9582 Ok(result) => result,
9583 Err(err) => {
9584 tracing::warn!(
9585 error = %err,
9586 "frankensqlite FTS consistency probe failed; rebuilding authoritative FTS"
9587 );
9588 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9589 self.record_fts_franken_rebuild_generation()?;
9590 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9591 }
9592 };
9593
9594 if fts_schema_rows != 1 || !fts_queryable {
9595 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9596 self.record_fts_franken_rebuild_generation()?;
9597 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9598 }
9599
9600 let total_messages =
9601 self.conn
9602 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
9603 row.get_typed::<i64>(0)
9604 })?;
9605 let indexed_messages =
9606 self.conn
9607 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
9608 row.get_typed::<i64>(0)
9609 })?;
9610
9611 if indexed_messages == total_messages {
9612 self.set_fts_messages_present_cache(true);
9613 return Ok(FtsConsistencyRepair::AlreadyHealthy {
9614 rows: usize::try_from(total_messages.max(0)).unwrap_or(usize::MAX),
9615 });
9616 }
9617
9618 if indexed_messages > total_messages {
9619 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9620 self.record_fts_franken_rebuild_generation()?;
9621 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9622 }
9623
9624 let inserted_rows = self
9625 .stream_fts_rows_via_frankensqlite(true)
9626 .with_context(|| "incrementally repairing missing FTS rows via frankensqlite")?;
9627 let repaired_rows =
9628 self.conn
9629 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
9630 row.get_typed::<i64>(0)
9631 })?;
9632 if repaired_rows == total_messages {
9633 self.set_fts_messages_present_cache(true);
9634 return Ok(FtsConsistencyRepair::IncrementalCatchUp {
9635 inserted_rows,
9636 total_rows: usize::try_from(repaired_rows.max(0)).unwrap_or(usize::MAX),
9637 });
9638 }
9639
9640 if inserted_rows == 0 {
9648 tracing::debug!(
9649 target: "cass::fts_rebuild",
9650 indexed_messages = repaired_rows,
9651 total_messages,
9652 un_indexable_gap = total_messages.saturating_sub(repaired_rows),
9653 "FTS catch-up inserted 0 rows; remaining gap is un-indexable (likely orphaned messages with dangling conversation_id)"
9654 );
9655 self.set_fts_messages_present_cache(true);
9656 return Ok(FtsConsistencyRepair::IncrementalCatchUp {
9657 inserted_rows: 0,
9658 total_rows: usize::try_from(repaired_rows.max(0)).unwrap_or(usize::MAX),
9659 });
9660 }
9661
9662 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9665 self.record_fts_franken_rebuild_generation()?;
9666 Ok(FtsConsistencyRepair::Rebuilt { inserted_rows })
9667 }
9668
9669 pub(crate) fn rebuild_fts_via_frankensqlite(&self) -> Result<usize> {
9670 self.invalidate_fts_messages_present_cache();
9671 self.conn
9672 .execute("DROP TABLE IF EXISTS fts_messages;")
9673 .with_context(|| "dropping derived fts_messages before frankensqlite rebuild")?;
9674 self.conn
9675 .execute_compat(FTS5_REGISTER_SQL, fparams![])
9676 .with_context(|| "creating derived fts_messages via frankensqlite rebuild")?;
9677 self.set_fts_messages_present_cache(true);
9678
9679 self.stream_fts_rows_via_frankensqlite(false)
9680 }
9681
9682 fn stream_fts_rows_via_frankensqlite(&self, missing_only: bool) -> Result<usize> {
9683 let batch_size = fts_rebuild_batch_size().max(1);
9684 let batch_limit = i64::try_from(batch_size).unwrap_or(i64::MAX);
9685 let mut total_inserted: usize = 0;
9686 let mut total_skipped_orphans: usize = 0;
9687 let mut total_skipped_existing: usize = 0;
9688 let mut last_rowid: i64 = 0;
9689 let conversation_by_id = self.load_fts_conversation_projection_map()?;
9690 let agent_slug_by_id = self.load_fts_agent_slug_map()?;
9691 let workspace_path_by_id = self.load_fts_workspace_path_map()?;
9692 let existing_fts_rowids = if missing_only {
9693 Some(self.load_fts_message_rowid_set()?)
9694 } else {
9695 None
9696 };
9697 let mut entries = Vec::new();
9698 let mut pending_chars = 0usize;
9699
9700 loop {
9701 let rows = self.fetch_fts_rebuild_message_rows(last_rowid, batch_limit)?;
9702 let fetched_count = rows.len();
9703 if fetched_count == 0 {
9704 break;
9705 }
9706
9707 let inserted_before_batch = total_inserted;
9708 let skipped_before_batch = total_skipped_orphans;
9709 let existing_before_batch = total_skipped_existing;
9710
9711 for row in rows {
9712 last_rowid = row.rowid;
9713 if existing_fts_rowids
9714 .as_ref()
9715 .is_some_and(|rowids| rowids.contains(&row.message_id))
9716 {
9717 total_skipped_existing = total_skipped_existing.saturating_add(1);
9718 continue;
9719 }
9720 let Some(conversation) = conversation_by_id.get(&row.conversation_id) else {
9721 total_skipped_orphans = total_skipped_orphans.saturating_add(1);
9722 continue;
9723 };
9724 let agent = conversation
9725 .agent_id
9726 .and_then(|agent_id| agent_slug_by_id.get(&agent_id))
9727 .filter(|slug| !slug.is_empty())
9728 .cloned()
9729 .unwrap_or_else(|| "unknown".to_string());
9730 let workspace = conversation
9731 .workspace_id
9732 .and_then(|workspace_id| workspace_path_by_id.get(&workspace_id))
9733 .cloned()
9734 .unwrap_or_default();
9735 pending_chars = pending_chars.saturating_add(row.content.len());
9736 entries.push(FtsEntry {
9737 content: row.content,
9738 title: conversation.title.clone(),
9739 agent,
9740 workspace,
9741 source_path: conversation.source_path.clone(),
9742 created_at: row.created_at,
9743 message_id: row.message_id,
9744 });
9745 if entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9746 || pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9747 {
9748 total_inserted = total_inserted.saturating_add(
9749 franken_batch_insert_fts_on_connection(&self.conn, &entries)?,
9750 );
9751 entries.clear();
9752 pending_chars = 0;
9753 }
9754 }
9755
9756 if !entries.is_empty() {
9757 total_inserted = total_inserted.saturating_add(
9758 franken_batch_insert_fts_on_connection(&self.conn, &entries)?,
9759 );
9760 entries.clear();
9761 pending_chars = 0;
9762 }
9763
9764 tracing::debug!(
9765 target: "cass::fts_rebuild",
9766 batch_rows = fetched_count,
9767 batch_inserted = total_inserted.saturating_sub(inserted_before_batch),
9768 batch_skipped_orphans = total_skipped_orphans.saturating_sub(skipped_before_batch),
9769 batch_skipped_existing = total_skipped_existing.saturating_sub(existing_before_batch),
9770 total_inserted,
9771 total_skipped_orphans,
9772 total_skipped_existing,
9773 last_rowid,
9774 missing_only,
9775 "FTS streaming maintenance batch complete"
9776 );
9777
9778 if fetched_count < batch_size {
9779 break;
9780 }
9781 }
9782
9783 Ok(total_inserted)
9784 }
9785
9786 fn fetch_fts_rebuild_message_rows(
9787 &self,
9788 last_rowid: i64,
9789 batch_limit: i64,
9790 ) -> Result<Vec<FtsRebuildMessageRow>> {
9791 self.conn
9792 .query_map_collect(
9793 "SELECT m.rowid, m.id, m.conversation_id, m.content, m.created_at
9794 FROM messages m
9795 WHERE m.rowid > ?1
9796 ORDER BY m.rowid
9797 LIMIT ?2",
9798 fparams![last_rowid, batch_limit],
9799 |row| {
9800 Ok(FtsRebuildMessageRow {
9801 rowid: row.get_typed(0)?,
9802 message_id: row.get_typed(1)?,
9803 conversation_id: row.get_typed(2)?,
9804 content: row.get_typed::<Option<String>>(3)?.unwrap_or_default(),
9805 created_at: row.get_typed(4)?,
9806 })
9807 },
9808 )
9809 .with_context(|| format!("fetching FTS maintenance messages after rowid {last_rowid}"))
9810 }
9811
9812 fn load_fts_message_rowid_set(&self) -> Result<HashSet<i64>> {
9813 let rows: Vec<i64> = self
9814 .conn
9815 .query_map_collect("SELECT rowid FROM fts_messages", fparams![], |row| {
9816 row.get_typed(0)
9817 })
9818 .with_context(|| "loading existing FTS message rowids")?;
9819 Ok(rows.into_iter().collect())
9820 }
9821
9822 fn load_fts_conversation_projection_map(
9823 &self,
9824 ) -> Result<HashMap<i64, FtsConversationProjection>> {
9825 let rows: Vec<(i64, FtsConversationProjection)> = self
9826 .conn
9827 .query_map_collect(
9828 "SELECT id, title, agent_id, workspace_id, source_path
9829 FROM conversations",
9830 fparams![],
9831 |row| {
9832 Ok((
9833 row.get_typed(0)?,
9834 FtsConversationProjection {
9835 title: row.get_typed::<Option<String>>(1)?.unwrap_or_default(),
9836 agent_id: row.get_typed(2)?,
9837 workspace_id: row.get_typed(3)?,
9838 source_path: row.get_typed::<Option<String>>(4)?.unwrap_or_default(),
9839 },
9840 ))
9841 },
9842 )
9843 .with_context(|| "loading FTS conversation projection map")?;
9844 Ok(rows.into_iter().collect())
9845 }
9846
9847 fn load_fts_agent_slug_map(&self) -> Result<HashMap<i64, String>> {
9848 let rows: Vec<(i64, String)> = self
9849 .conn
9850 .query_map_collect("SELECT id, slug FROM agents", fparams![], |row| {
9851 Ok((
9852 row.get_typed(0)?,
9853 row.get_typed::<Option<String>>(1)?
9854 .unwrap_or_else(|| "unknown".to_string()),
9855 ))
9856 })
9857 .with_context(|| "loading FTS agent slug map")?;
9858 Ok(rows.into_iter().collect())
9859 }
9860
9861 fn load_fts_workspace_path_map(&self) -> Result<HashMap<i64, String>> {
9862 let rows: Vec<(i64, String)> = self
9863 .conn
9864 .query_map_collect("SELECT id, path FROM workspaces", fparams![], |row| {
9865 Ok((
9866 row.get_typed(0)?,
9867 row.get_typed::<Option<String>>(1)?.unwrap_or_default(),
9868 ))
9869 })
9870 .with_context(|| "loading FTS workspace path map")?;
9871 Ok(rows.into_iter().collect())
9872 }
9873
9874 pub fn fetch_messages_for_embedding(&self) -> Result<Vec<MessageForEmbedding>> {
9876 self.conn
9881 .query_map_collect(
9882 "SELECT m.id, m.created_at, COALESCE(c.agent_id, 0), c.workspace_id, c.source_id, m.role, m.content
9883 FROM messages m
9884 JOIN conversations c ON m.conversation_id = c.id
9885 ORDER BY m.id",
9886 fparams![],
9887 |row| {
9888 let source_id: String = row.get_typed::<Option<String>>(4)?
9889 .unwrap_or_else(|| "local".to_string());
9890 Ok(MessageForEmbedding {
9891 message_id: row.get_typed(0)?,
9892 created_at: row.get_typed(1)?,
9893 agent_id: row.get_typed(2)?,
9894 workspace_id: row.get_typed(3)?,
9895 source_id_hash: crc32fast::hash(source_id.as_bytes()),
9896 role: row.get_typed(5)?,
9897 content: row.get_typed(6)?,
9898 })
9899 },
9900 )
9901 .with_context(|| "fetching messages for embedding")
9902 }
9903
9904 pub fn get_last_embedded_message_id(&self) -> Result<Option<i64>> {
9906 let result: Result<String, _> = self.conn.query_row_map(
9907 "SELECT value FROM meta WHERE key = 'last_embedded_message_id'",
9908 fparams![],
9909 |row| row.get_typed(0),
9910 );
9911 match result.optional() {
9912 Ok(Some(s)) => Ok(s.parse().ok()),
9913 Ok(None) => Ok(None),
9914 Err(e) => Err(e.into()),
9915 }
9916 }
9917
9918 pub fn set_last_embedded_message_id(&self, id: i64) -> Result<()> {
9920 self.conn.execute_compat(
9921 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_embedded_message_id', ?1)",
9922 fparams![id.to_string()],
9923 )?;
9924 Ok(())
9925 }
9926
9927 pub fn get_embedding_jobs(&self, db_path: &str) -> Result<Vec<EmbeddingJobRow>> {
9929 self.conn
9930 .query_map_collect(
9931 "SELECT id, db_path, model_id, status, total_docs, completed_docs, error_message, created_at, started_at, completed_at
9932 FROM embedding_jobs WHERE db_path = ?1 ORDER BY id DESC",
9933 fparams![db_path],
9934 |row| {
9935 Ok(EmbeddingJobRow {
9936 id: row.get_typed(0)?,
9937 db_path: row.get_typed(1)?,
9938 model_id: row.get_typed(2)?,
9939 status: row.get_typed(3)?,
9940 total_docs: row.get_typed(4)?,
9941 completed_docs: row.get_typed(5)?,
9942 error_message: row.get_typed(6)?,
9943 created_at: row.get_typed(7)?,
9944 started_at: row.get_typed(8)?,
9945 completed_at: row.get_typed(9)?,
9946 })
9947 },
9948 )
9949 .with_context(|| format!("fetching embedding jobs for {db_path}"))
9950 }
9951
9952 pub fn upsert_embedding_job(
9954 &self,
9955 db_path: &str,
9956 model_id: &str,
9957 total_docs: i64,
9958 ) -> Result<i64> {
9959 let updated = self.conn.execute_compat(
9960 "UPDATE embedding_jobs
9961 SET total_docs = ?3
9962 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
9963 fparams![db_path, model_id, total_docs],
9964 )?;
9965 if updated == 0 {
9966 let insert_result = self.conn.execute_compat(
9967 "INSERT INTO embedding_jobs(db_path, model_id, total_docs) VALUES(?1,?2,?3)",
9968 fparams![db_path, model_id, total_docs],
9969 );
9970 if let Err(err) = insert_result {
9971 if !matches!(err, frankensqlite::FrankenError::UniqueViolation { .. }) {
9972 return Err(err.into());
9973 }
9974 self.conn.execute_compat(
9975 "UPDATE embedding_jobs
9976 SET total_docs = ?3
9977 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
9978 fparams![db_path, model_id, total_docs],
9979 )?;
9980 }
9981 }
9982 self.conn
9983 .query_row_map(
9984 "SELECT id FROM embedding_jobs
9985 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')
9986 ORDER BY id DESC
9987 LIMIT 1",
9988 fparams![db_path, model_id],
9989 |row| row.get_typed(0),
9990 )
9991 .with_context(|| "resolving embedding job id after upsert")
9992 }
9993
9994 pub fn start_embedding_job(&self, job_id: i64) -> Result<()> {
9996 self.conn.execute_compat(
9997 "UPDATE embedding_jobs SET status = 'running', started_at = datetime('now') WHERE id = ?1",
9998 fparams![job_id],
9999 )?;
10000 Ok(())
10001 }
10002
10003 pub fn complete_embedding_job(&self, job_id: i64) -> Result<()> {
10005 self.conn.execute_compat(
10006 "UPDATE embedding_jobs SET status = 'completed', completed_at = datetime('now') WHERE id = ?1",
10007 fparams![job_id],
10008 )?;
10009 Ok(())
10010 }
10011
10012 pub fn fail_embedding_job(&self, job_id: i64, error: &str) -> Result<()> {
10014 self.conn.execute_compat(
10015 "UPDATE embedding_jobs SET status = 'failed', error_message = ?2, completed_at = datetime('now') WHERE id = ?1",
10016 fparams![job_id, error],
10017 )?;
10018 Ok(())
10019 }
10020
10021 pub fn cancel_embedding_jobs(&self, db_path: &str, model_id: Option<&str>) -> Result<usize> {
10023 if let Some(mid) = model_id {
10024 Ok(self.conn.execute_compat(
10025 "UPDATE embedding_jobs SET status = 'cancelled' WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
10026 fparams![db_path, mid],
10027 )?)
10028 } else {
10029 Ok(self.conn.execute_compat(
10030 "UPDATE embedding_jobs SET status = 'cancelled' WHERE db_path = ?1 AND status IN ('pending', 'running')",
10031 fparams![db_path],
10032 )?)
10033 }
10034 }
10035
10036 pub fn update_job_progress(&self, job_id: i64, completed_docs: i64) -> Result<()> {
10038 self.conn.execute_compat(
10039 "UPDATE embedding_jobs SET completed_docs = ?2 WHERE id = ?1",
10040 fparams![job_id, completed_docs],
10041 )?;
10042 Ok(())
10043 }
10044
10045 pub fn count_sessions_in_range(
10054 &self,
10055 start_ts_ms: Option<i64>,
10056 end_ts_ms: Option<i64>,
10057 agent_slug: Option<&str>,
10058 source_id: Option<&str>,
10059 ) -> Result<(i64, bool)> {
10060 let agent = agent_slug.unwrap_or("all");
10061 let source = source_id.unwrap_or("all");
10062
10063 let stats_count: i64 = self
10065 .conn
10066 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
10067 row.get_typed(0)
10068 })
10069 .unwrap_or(0);
10070
10071 if stats_count == 0 {
10072 return self.count_sessions_direct(start_ts_ms, end_ts_ms, agent_slug, source_id);
10073 }
10074
10075 let start_day = start_ts_ms.map(Self::day_id_from_millis);
10077 let end_day = end_ts_ms.map(Self::day_id_from_millis);
10078
10079 let count: i64 = match (start_day, end_day) {
10080 (Some(start), Some(end)) => self.conn.query_row_map(
10081 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10082 WHERE day_id BETWEEN ?1 AND ?2 AND agent_slug = ?3 AND source_id = ?4",
10083 fparams![start, end, agent, source],
10084 |row| row.get_typed(0),
10085 )?,
10086 (Some(start), None) => self.conn.query_row_map(
10087 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10088 WHERE day_id >= ?1 AND agent_slug = ?2 AND source_id = ?3",
10089 fparams![start, agent, source],
10090 |row| row.get_typed(0),
10091 )?,
10092 (None, Some(end)) => self.conn.query_row_map(
10093 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10094 WHERE day_id <= ?1 AND agent_slug = ?2 AND source_id = ?3",
10095 fparams![end, agent, source],
10096 |row| row.get_typed(0),
10097 )?,
10098 (None, None) => self.conn.query_row_map(
10099 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10100 WHERE agent_slug = ?1 AND source_id = ?2",
10101 fparams![agent, source],
10102 |row| row.get_typed(0),
10103 )?,
10104 };
10105
10106 Ok((count, true))
10107 }
10108
10109 fn count_sessions_direct(
10111 &self,
10112 start_ts_ms: Option<i64>,
10113 end_ts_ms: Option<i64>,
10114 agent_slug: Option<&str>,
10115 source_id: Option<&str>,
10116 ) -> Result<(i64, bool)> {
10117 let mut sql = "SELECT COUNT(*) FROM conversations c WHERE 1=1".to_string();
10124 let mut param_values: Vec<ParamValue> = Vec::new();
10125 let mut idx = 1;
10126
10127 if let Some(start) = start_ts_ms {
10128 sql.push_str(&format!(" AND c.started_at >= ?{idx}"));
10129 param_values.push(ParamValue::from(start));
10130 idx += 1;
10131 }
10132 if let Some(end) = end_ts_ms {
10133 sql.push_str(&format!(" AND c.started_at <= ?{idx}"));
10134 param_values.push(ParamValue::from(end));
10135 idx += 1;
10136 }
10137 if let Some(agent) = agent_slug
10138 && agent != "all"
10139 {
10140 sql.push_str(&format!(
10141 " AND EXISTS (SELECT 1 FROM agents a WHERE a.id = c.agent_id AND a.slug = ?{idx})"
10142 ));
10143 param_values.push(ParamValue::from(agent));
10144 idx += 1;
10145 }
10146 if let Some(source) = source_id
10147 && source != "all"
10148 {
10149 sql.push_str(&format!(" AND c.source_id = ?{idx}"));
10150 param_values.push(ParamValue::from(source));
10151 let _ = idx; }
10153
10154 let count: i64 = self
10155 .conn
10156 .query_row_map(&sql, ¶m_values, |row| row.get_typed(0))?;
10157 Ok((count, false))
10158 }
10159
10160 pub fn get_daily_histogram(
10162 &self,
10163 start_ts_ms: i64,
10164 end_ts_ms: i64,
10165 agent_slug: Option<&str>,
10166 source_id: Option<&str>,
10167 ) -> Result<Vec<DailyCount>> {
10168 let start_day = Self::day_id_from_millis(start_ts_ms);
10169 let end_day = Self::day_id_from_millis(end_ts_ms);
10170 let agent = agent_slug.unwrap_or("all");
10171 let source = source_id.unwrap_or("all");
10172
10173 let rows = self.conn.query_map_collect(
10174 "SELECT day_id, session_count, message_count, total_chars
10175 FROM daily_stats
10176 WHERE day_id BETWEEN ?1 AND ?2 AND agent_slug = ?3 AND source_id = ?4
10177 ORDER BY day_id",
10178 fparams![start_day, end_day, agent, source],
10179 |row| {
10180 Ok(DailyCount {
10181 day_id: row.get_typed(0)?,
10182 sessions: row.get_typed(1)?,
10183 messages: row.get_typed(2)?,
10184 chars: row.get_typed(3)?,
10185 })
10186 },
10187 )?;
10188
10189 Ok(rows)
10190 }
10191
10192 pub fn daily_stats_health(&self) -> Result<DailyStatsHealth> {
10194 let row_count: i64 =
10195 self.conn
10196 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
10197 row.get_typed(0)
10198 })?;
10199
10200 let oldest_update: Option<i64> = self.conn.query_row_map(
10201 "SELECT MIN(last_updated) FROM daily_stats",
10202 fparams![],
10203 |row| row.get_typed(0),
10204 )?;
10205
10206 let conversation_count: i64 =
10207 self.conn
10208 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
10209 row.get_typed(0)
10210 })?;
10211
10212 let materialized_total: i64 = self.conn.query_row_map(
10213 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10214 WHERE agent_slug = 'all' AND source_id = 'all'",
10215 fparams![],
10216 |row| row.get_typed(0),
10217 )?;
10218
10219 Ok(DailyStatsHealth {
10220 populated: row_count > 0,
10221 row_count,
10222 oldest_update_ms: oldest_update,
10223 conversation_count,
10224 materialized_total,
10225 drift: (conversation_count - materialized_total).abs(),
10226 })
10227 }
10228
10229 pub fn insert_conversations_batched(
10233 &self,
10234 conversations: &[(i64, Option<i64>, &Conversation)],
10235 ) -> Result<Vec<InsertOutcome>> {
10236 if conversations.is_empty() {
10237 return Ok(Vec::new());
10238 }
10239
10240 self.ensure_sources_for_batch(conversations)?;
10241
10242 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
10243 let defer_analytics_updates = defer_analytics_updates_enabled();
10244
10245 let pricing_table = PricingTable::franken_load(&self.conn).unwrap_or_else(|e| {
10246 tracing::warn!(target: "cass::analytics::pricing", error = %e, "failed to load pricing table");
10247 PricingTable { entries: Vec::new() }
10248 });
10249 let mut pricing_diag = PricingDiagnostics::default();
10250
10251 let mut tx = self.conn.transaction()?;
10252
10253 ensure_agents_in_tx(&tx, conversations)?;
10260 ensure_workspaces_in_tx(&tx, conversations)?;
10261 ensure_sources_in_tx(&tx, conversations)?;
10262
10263 let mut outcomes = Vec::with_capacity(conversations.len());
10264 let mut fts_entries = Vec::new();
10265 let mut fts_pending_chars = 0usize;
10266 let mut fts_inserted_total = 0usize;
10267 let mut fts_count_total = 0usize;
10268 let mut stats = StatsAggregator::new();
10269 let mut token_stats = TokenStatsAggregator::new();
10270 let mut token_entries: Vec<TokenUsageEntry> = Vec::new();
10271 let mut metrics_entries: Vec<MessageMetricsEntry> = Vec::new();
10272 let mut rollup_agg = AnalyticsRollupAggregator::new();
10273 let mut conv_ids_to_summarize: Vec<i64> = Vec::new();
10274 let mut pending_conversation_ids: HashMap<PendingConversationKey, i64> = HashMap::new();
10275 let mut pending_message_fingerprints: HashMap<i64, HashMap<i64, MessageMergeFingerprint>> =
10276 HashMap::new();
10277 let mut pending_message_replay_fingerprints: HashMap<
10278 i64,
10279 HashSet<MessageReplayFingerprint>,
10280 > = HashMap::new();
10281
10282 for &(agent_id, workspace_id, raw_conv) in conversations {
10283 let normalized_conv = normalized_conversation_for_storage(raw_conv);
10284 let conv = normalized_conv.as_ref();
10285 let mut total_chars: i64 = 0;
10286 let mut inserted_indices = Vec::with_capacity(conv.messages.len());
10287 let mut inserted_messages: Vec<(i64, &Message)> =
10288 Vec::with_capacity(conv.messages.len());
10289 let mut session_count_delta = 1_i64;
10290 let conversation_key = conversation_merge_key(agent_id, conv);
10291
10292 let existing_conv_id = if let Some(existing_id) =
10293 pending_conversation_ids.get(&conversation_key)
10294 {
10295 Some(*existing_id)
10296 } else {
10297 let existing_id =
10298 franken_find_existing_conversation_by_key(&tx, &conversation_key, Some(conv))?;
10299 if let Some(existing_id) = existing_id {
10300 pending_conversation_ids.insert(conversation_key.clone(), existing_id);
10301 }
10302 existing_id
10303 };
10304
10305 let conv_id = if let Some(existing_id) = existing_conv_id {
10306 session_count_delta = 0;
10307 let ExistingMessageLookup {
10308 by_idx: mut existing_messages,
10309 replay: mut existing_replay_fingerprints,
10310 } = franken_existing_message_lookup_with_pending(
10311 &tx,
10312 existing_id,
10313 &conv.messages,
10314 &mut pending_message_fingerprints,
10315 &mut pending_message_replay_fingerprints,
10316 )?;
10317 let ExistingConversationNewMessages {
10318 messages: new_messages,
10319 new_chars,
10320 idx_collision_count,
10321 first_collision_idx,
10322 } = collect_new_messages_for_existing_conversation(
10323 existing_id,
10324 conv,
10325 &mut existing_messages,
10326 &mut existing_replay_fingerprints,
10327 "skipping replay-equivalent recovered message with shifted idx during batched merge",
10328 );
10329 let (inserted_last_idx, inserted_last_created_at) =
10330 borrowed_messages_tail_state(&new_messages);
10331 let inserted_message_ids =
10332 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
10333 total_chars += new_chars;
10334 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10335 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10336 if !defer_lexical_updates {
10337 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10338 fts_count_total += 1;
10339 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
10340 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10341 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10342 {
10343 flush_pending_fts_entries(
10344 self,
10345 &tx,
10346 &mut fts_entries,
10347 &mut fts_pending_chars,
10348 &mut fts_inserted_total,
10349 )?;
10350 }
10351 }
10352 inserted_indices.push(msg.idx);
10353 inserted_messages.push((msg_id, msg));
10354 }
10355
10356 if idx_collision_count > 0 {
10357 tracing::warn!(
10358 conversation_id = existing_id,
10359 collision_count = idx_collision_count,
10360 first_idx = first_collision_idx,
10361 source_path = %conv.source_path.display(),
10362 "message idx collisions encountered during batched conversation merge; retaining canonical message variants"
10363 );
10364 }
10365
10366 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
10367 franken_update_conversation_tail_state(
10368 &tx,
10369 existing_id,
10370 conv_last_ts,
10371 inserted_last_idx,
10372 inserted_last_created_at,
10373 )?;
10374 if let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv)
10375 {
10376 franken_update_external_conversation_tail_lookup_key(
10377 &tx,
10378 &lookup_key,
10379 conv_last_ts,
10380 inserted_last_idx,
10381 inserted_last_created_at,
10382 )?;
10383 }
10384
10385 pending_message_fingerprints.insert(existing_id, existing_messages);
10386 pending_message_replay_fingerprints
10387 .insert(existing_id, existing_replay_fingerprints);
10388
10389 existing_id
10390 } else {
10391 match franken_insert_conversation_or_get_existing(
10392 &tx,
10393 agent_id,
10394 workspace_id,
10395 conv,
10396 )? {
10397 ConversationInsertStatus::Inserted(new_conv_id) => {
10398 pending_conversation_ids.insert(conversation_key.clone(), new_conv_id);
10399 let pending_messages =
10400 pending_message_fingerprints.entry(new_conv_id).or_default();
10401 let pending_replay_fingerprints = pending_message_replay_fingerprints
10402 .entry(new_conv_id)
10403 .or_default();
10404 let mut new_messages = Vec::new();
10405 for msg in &conv.messages {
10406 let incoming_replay = message_replay_fingerprint(msg);
10407 if pending_messages.contains_key(&msg.idx)
10408 || pending_replay_fingerprints.contains(&incoming_replay)
10409 {
10410 continue;
10411 }
10412 pending_messages.insert(msg.idx, message_merge_fingerprint(msg));
10413 pending_replay_fingerprints.insert(incoming_replay);
10414 new_messages.push(msg);
10415 }
10416 let inserted_message_ids =
10417 franken_batch_insert_new_messages(&tx, new_conv_id, &new_messages)?;
10418 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10419 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10420 if !defer_lexical_updates {
10421 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10422 fts_count_total += 1;
10423 fts_pending_chars =
10424 fts_pending_chars.saturating_add(msg.content.len());
10425 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10426 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10427 {
10428 flush_pending_fts_entries(
10429 self,
10430 &tx,
10431 &mut fts_entries,
10432 &mut fts_pending_chars,
10433 &mut fts_inserted_total,
10434 )?;
10435 }
10436 }
10437 total_chars += msg.content.len() as i64;
10438 inserted_indices.push(msg.idx);
10439 inserted_messages.push((msg_id, msg));
10440 }
10441 new_conv_id
10442 }
10443 ConversationInsertStatus::Existing(existing_id) => {
10444 session_count_delta = 0;
10445 pending_conversation_ids.insert(conversation_key.clone(), existing_id);
10446 let ExistingMessageLookup {
10447 by_idx: mut existing_messages,
10448 replay: mut existing_replay_fingerprints,
10449 } = franken_existing_message_lookup_with_pending(
10450 &tx,
10451 existing_id,
10452 &conv.messages,
10453 &mut pending_message_fingerprints,
10454 &mut pending_message_replay_fingerprints,
10455 )?;
10456 let ExistingConversationNewMessages {
10457 messages: new_messages,
10458 new_chars,
10459 idx_collision_count,
10460 first_collision_idx,
10461 } = collect_new_messages_for_existing_conversation(
10462 existing_id,
10463 conv,
10464 &mut existing_messages,
10465 &mut existing_replay_fingerprints,
10466 "skipping replay-equivalent recovered message with shifted idx after duplicate conversation recovery",
10467 );
10468 let (inserted_last_idx, inserted_last_created_at) =
10469 borrowed_messages_tail_state(&new_messages);
10470 let inserted_message_ids =
10471 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
10472 total_chars += new_chars;
10473 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10474 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10475 if !defer_lexical_updates {
10476 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10477 fts_count_total += 1;
10478 fts_pending_chars =
10479 fts_pending_chars.saturating_add(msg.content.len());
10480 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10481 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10482 {
10483 flush_pending_fts_entries(
10484 self,
10485 &tx,
10486 &mut fts_entries,
10487 &mut fts_pending_chars,
10488 &mut fts_inserted_total,
10489 )?;
10490 }
10491 }
10492 inserted_indices.push(msg.idx);
10493 inserted_messages.push((msg_id, msg));
10494 }
10495
10496 if idx_collision_count > 0 {
10497 tracing::warn!(
10498 conversation_id = existing_id,
10499 collision_count = idx_collision_count,
10500 first_idx = first_collision_idx,
10501 source_path = %conv.source_path.display(),
10502 "message idx collisions encountered after duplicate conversation recovery; retaining canonical message variants"
10503 );
10504 }
10505
10506 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
10507 franken_update_conversation_tail_state(
10508 &tx,
10509 existing_id,
10510 conv_last_ts,
10511 inserted_last_idx,
10512 inserted_last_created_at,
10513 )?;
10514 if let Some(lookup_key) =
10515 conversation_external_lookup_key_for_conv(agent_id, conv)
10516 {
10517 franken_update_external_conversation_tail_lookup_key(
10518 &tx,
10519 &lookup_key,
10520 conv_last_ts,
10521 inserted_last_idx,
10522 inserted_last_created_at,
10523 )?;
10524 }
10525
10526 pending_message_fingerprints.insert(existing_id, existing_messages);
10527 pending_message_replay_fingerprints
10528 .insert(existing_id, existing_replay_fingerprints);
10529
10530 existing_id
10531 }
10532 }
10533 };
10534
10535 if !defer_analytics_updates {
10536 let delta = StatsDelta {
10537 session_count_delta,
10538 message_count_delta: inserted_messages.len() as i64,
10539 total_chars_delta: total_chars,
10540 };
10541
10542 let effective_started_at = conversation_effective_started_at(conv);
10543 let day_id = effective_started_at
10544 .map(FrankenStorage::day_id_from_millis)
10545 .unwrap_or(0);
10546 stats.record_delta(
10547 &conv.agent_slug,
10548 &conv.source_id,
10549 day_id,
10550 delta.session_count_delta,
10551 delta.message_count_delta,
10552 delta.total_chars_delta,
10553 );
10554
10555 let conv_day_id = day_id;
10556 let mut session_model_family = String::from("unknown");
10557 let mut has_any_tokens = false;
10558
10559 for &(message_id, msg) in &inserted_messages {
10560 let role_s = role_str(&msg.role);
10561 let usage = if historical_raw_json(&msg.extra_json).is_some() {
10562 crate::connectors::extract_tokens_for_agent(
10563 &conv.agent_slug,
10564 &serde_json::Value::Null,
10565 &msg.content,
10566 &role_s,
10567 )
10568 } else {
10569 crate::connectors::extract_tokens_for_agent(
10570 &conv.agent_slug,
10571 &msg.extra_json,
10572 &msg.content,
10573 &role_s,
10574 )
10575 };
10576
10577 let msg_ts = msg
10578 .created_at
10579 .or(conversation_effective_started_at(conv))
10580 .unwrap_or(0);
10581 let msg_day_id = if msg_ts > 0 {
10582 FrankenStorage::day_id_from_millis(msg_ts)
10583 } else {
10584 conv_day_id
10585 };
10586
10587 let model_info = usage
10588 .model_name
10589 .as_deref()
10590 .map(crate::connectors::normalize_model);
10591
10592 let model_family = model_info
10593 .as_ref()
10594 .map(|i| i.family.clone())
10595 .unwrap_or_else(|| "unknown".into());
10596 let model_tier = model_info
10597 .as_ref()
10598 .map(|i| i.tier.clone())
10599 .unwrap_or_else(|| "unknown".into());
10600 let provider = usage
10601 .provider
10602 .clone()
10603 .or_else(|| model_info.as_ref().map(|i| i.provider.clone()))
10604 .unwrap_or_else(|| "unknown".into());
10605
10606 if model_family != "unknown" {
10607 session_model_family = model_family.clone();
10608 }
10609
10610 let estimated_cost = pricing_table.compute_cost(
10611 usage.model_name.as_deref(),
10612 msg_day_id,
10613 usage.input_tokens,
10614 usage.output_tokens,
10615 usage.cache_read_tokens,
10616 usage.cache_creation_tokens,
10617 );
10618 if estimated_cost.is_some() {
10619 pricing_diag.record_priced();
10620 } else if usage.has_token_data() {
10621 pricing_diag.record_unpriced(usage.model_name.as_deref());
10622 }
10623
10624 token_stats.record(
10625 &conv.agent_slug,
10626 &conv.source_id,
10627 msg_day_id,
10628 &model_family,
10629 &role_s,
10630 &usage,
10631 msg.content.len() as i64,
10632 estimated_cost.unwrap_or(0.0),
10633 );
10634
10635 if usage.has_token_data() {
10636 has_any_tokens = true;
10637 }
10638
10639 let content_chars = msg.content.len() as i64;
10640 let content_tokens_est = content_chars / 4;
10641 let msg_hour_id = FrankenStorage::hour_id_from_millis(msg_ts);
10642 let has_plan = has_plan_for_role(&role_s, &msg.content);
10643
10644 token_entries.push(TokenUsageEntry {
10645 message_id,
10646 conversation_id: conv_id,
10647 agent_id,
10648 workspace_id,
10649 source_id: conv.source_id.clone(),
10650 timestamp_ms: msg_ts,
10651 day_id: msg_day_id,
10652 model_name: usage.model_name.clone(),
10653 model_family: Some(model_family.clone()),
10654 model_tier: Some(model_tier.clone()),
10655 service_tier: usage.service_tier.clone(),
10656 provider: Some(provider.clone()),
10657 input_tokens: usage.input_tokens,
10658 output_tokens: usage.output_tokens,
10659 cache_read_tokens: usage.cache_read_tokens,
10660 cache_creation_tokens: usage.cache_creation_tokens,
10661 thinking_tokens: usage.thinking_tokens,
10662 total_tokens: usage.total_tokens(),
10663 estimated_cost_usd: estimated_cost,
10664 role: role_s.to_string(),
10665 content_chars,
10666 has_tool_calls: usage.has_tool_calls,
10667 tool_call_count: usage.tool_call_count,
10668 data_source: usage.data_source.as_str().to_string(),
10669 });
10670
10671 let mm = MessageMetricsEntry {
10672 message_id,
10673 created_at_ms: msg_ts,
10674 hour_id: msg_hour_id,
10675 day_id: msg_day_id,
10676 agent_slug: conv.agent_slug.clone(),
10677 workspace_id: workspace_id.unwrap_or(0),
10678 source_id: conv.source_id.clone(),
10679 role: role_s.to_string(),
10680 content_chars,
10681 content_tokens_est,
10682 model_name: usage.model_name.clone(),
10683 model_family: model_family.clone(),
10684 model_tier: model_tier.clone(),
10685 provider,
10686 api_input_tokens: usage.input_tokens,
10687 api_output_tokens: usage.output_tokens,
10688 api_cache_read_tokens: usage.cache_read_tokens,
10689 api_cache_creation_tokens: usage.cache_creation_tokens,
10690 api_thinking_tokens: usage.thinking_tokens,
10691 api_service_tier: usage.service_tier.clone(),
10692 api_data_source: usage.data_source.as_str().to_string(),
10693 tool_call_count: usage.tool_call_count as i64,
10694 has_tool_calls: usage.has_tool_calls,
10695 has_plan,
10696 };
10697 rollup_agg.record(&mm);
10698 metrics_entries.push(mm);
10699 }
10700
10701 if session_count_delta > 0 {
10702 token_stats.record_session(
10703 &conv.agent_slug,
10704 &conv.source_id,
10705 conv_day_id,
10706 &session_model_family,
10707 );
10708 }
10709
10710 if has_any_tokens {
10711 conv_ids_to_summarize.push(conv_id);
10712 }
10713 }
10714
10715 outcomes.push(InsertOutcome {
10716 conversation_id: conv_id,
10717 conversation_inserted: session_count_delta > 0,
10718 inserted_indices,
10719 });
10720 }
10721
10722 if !defer_lexical_updates {
10724 flush_pending_fts_entries(
10725 self,
10726 &tx,
10727 &mut fts_entries,
10728 &mut fts_pending_chars,
10729 &mut fts_inserted_total,
10730 )?;
10731 }
10732 if !defer_lexical_updates && fts_count_total > 0 {
10733 tracing::debug!(
10734 target: "cass::perf::fts5",
10735 total = fts_count_total,
10736 inserted = fts_inserted_total,
10737 conversations = conversations.len(),
10738 "franken_batch_fts_insert_complete"
10739 );
10740 }
10741
10742 if !defer_analytics_updates && !stats.is_empty() {
10744 let entries = stats.expand();
10745 let affected = franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
10746 tracing::debug!(
10747 target: "cass::perf::daily_stats",
10748 raw = stats.raw_entry_count(),
10749 expanded = entries.len(),
10750 affected = affected,
10751 "franken_batched_stats_update_complete"
10752 );
10753 }
10754
10755 if !defer_analytics_updates && !token_entries.is_empty() {
10757 let token_count = token_entries.len();
10758 let inserted = franken_insert_token_usage_batched_in_tx(&tx, &token_entries)?;
10759 tracing::debug!(
10760 target: "cass::perf::token_usage",
10761 total = token_count,
10762 inserted = inserted,
10763 "franken_batch_token_usage_insert_complete"
10764 );
10765 }
10766
10767 if !defer_analytics_updates && !token_stats.is_empty() {
10769 let entries = token_stats.expand();
10770 let affected = franken_update_token_daily_stats_batched_in_tx(&tx, &entries)?;
10771 tracing::debug!(
10772 target: "cass::perf::token_daily_stats",
10773 raw = token_stats.raw_entry_count(),
10774 expanded = entries.len(),
10775 affected = affected,
10776 "franken_batched_token_stats_update_complete"
10777 );
10778 }
10779
10780 if !defer_analytics_updates && !metrics_entries.is_empty() {
10782 let mm_count = metrics_entries.len();
10783 let inserted = franken_insert_message_metrics_batched_in_tx(&tx, &metrics_entries)?;
10784 tracing::debug!(
10785 target: "cass::perf::message_metrics",
10786 total = mm_count,
10787 inserted = inserted,
10788 "franken_batch_message_metrics_insert_complete"
10789 );
10790 }
10791
10792 if !defer_analytics_updates && !rollup_agg.is_empty() {
10794 let (hourly, daily, models_daily) =
10795 franken_flush_analytics_rollups_in_tx(&tx, &rollup_agg)?;
10796 tracing::debug!(
10797 target: "cass::perf::usage_rollups",
10798 hourly_buckets = rollup_agg.hourly_entry_count(),
10799 daily_buckets = rollup_agg.daily_entry_count(),
10800 models_daily_buckets = rollup_agg.models_daily_entry_count(),
10801 hourly_affected = hourly,
10802 daily_affected = daily,
10803 models_daily_affected = models_daily,
10804 "franken_batched_usage_rollups_complete"
10805 );
10806 }
10807
10808 if !defer_analytics_updates {
10810 for conv_id in &conv_ids_to_summarize {
10811 franken_update_conversation_token_summaries_in_tx(&tx, *conv_id)?;
10812 }
10813 }
10814
10815 tx.commit()?;
10816
10817 pricing_diag.log_summary();
10818
10819 Ok(outcomes)
10820 }
10821}
10822
10823fn normalized_storage_source_parts(
10824 source_id: Option<&str>,
10825 origin_kind: Option<&str>,
10826 origin_host: Option<&str>,
10827) -> (String, SourceKind, Option<String>) {
10828 let host_label = crate::search::tantivy::normalized_index_origin_host(origin_host);
10829 let source_id = crate::search::tantivy::normalized_index_source_id(
10830 source_id,
10831 origin_kind,
10832 host_label.as_deref(),
10833 );
10834
10835 if source_id == LOCAL_SOURCE_ID {
10836 (source_id, SourceKind::Local, None)
10837 } else {
10838 (source_id, SourceKind::Ssh, host_label)
10839 }
10840}
10841
10842fn normalized_source_for_conversation(conv: &Conversation) -> Source {
10843 let (id, kind, host_label) = normalized_storage_source_parts(
10844 Some(conv.source_id.as_str()),
10845 None,
10846 conv.origin_host.as_deref(),
10847 );
10848 Source {
10849 id,
10850 kind,
10851 host_label,
10852 machine_id: None,
10853 platform: None,
10854 config_json: None,
10855 created_at: None,
10856 updated_at: None,
10857 }
10858}
10859
10860fn is_bootstrap_local_source(source: &Source) -> bool {
10861 source.id == LOCAL_SOURCE_ID
10862 && matches!(source.kind, SourceKind::Local)
10863 && source.host_label.is_none()
10864 && source.machine_id.is_none()
10865 && source.platform.is_none()
10866 && source.config_json.is_none()
10867 && source.created_at.is_none()
10868 && source.updated_at.is_none()
10869}
10870
10871fn normalized_conversation_for_storage<'a>(conv: &'a Conversation) -> Cow<'a, Conversation> {
10872 let normalized_source = normalized_source_for_conversation(conv);
10873 if normalized_source.id == conv.source_id && normalized_source.host_label == conv.origin_host {
10874 Cow::Borrowed(conv)
10875 } else {
10876 let mut normalized = conv.clone();
10877 normalized.source_id = normalized_source.id;
10878 normalized.origin_host = normalized_source.host_label;
10879 Cow::Owned(normalized)
10880 }
10881}
10882
10883impl FrankenStorage {
10884 fn ensure_source_for_conversation(&self, conv: &Conversation) -> Result<()> {
10885 let source = normalized_source_for_conversation(conv);
10886 if is_bootstrap_local_source(&source) {
10887 return Ok(());
10890 }
10891 let cache_key = EnsuredConversationSourceKey::from_source(&source);
10892 if self.conversation_source_already_ensured(&cache_key) {
10893 return Ok(());
10894 }
10895 self.upsert_source(&source)?;
10896 self.mark_conversation_source_ensured(cache_key);
10897 Ok(())
10898 }
10899
10900 fn ensure_sources_for_batch(
10901 &self,
10902 conversations: &[(i64, Option<i64>, &Conversation)],
10903 ) -> Result<()> {
10904 let mut seen = HashSet::with_capacity(conversations.len());
10905 for &(_, _, conv) in conversations {
10906 let source = normalized_source_for_conversation(conv);
10907 if seen.insert(source.id.clone()) {
10908 if is_bootstrap_local_source(&source) {
10909 continue;
10910 }
10911 self.upsert_source(&source)?;
10912 self.mark_conversation_source_ensured(EnsuredConversationSourceKey::from_source(
10913 &source,
10914 ));
10915 }
10916 }
10917 Ok(())
10918 }
10919}
10920
10921fn franken_last_rowid(tx: &FrankenTransaction<'_>) -> Result<i64> {
10927 tx.last_insert_rowid()
10928 .ok()
10929 .filter(|&id| id > 0)
10930 .with_context(|| "last_insert_rowid() returned NULL or 0 after INSERT")
10931}
10932
10933fn ensure_agents_in_tx(
10939 tx: &FrankenTransaction<'_>,
10940 conversations: &[(i64, Option<i64>, &Conversation)],
10941) -> Result<()> {
10942 let mut seen = HashSet::new();
10943 let now = FrankenStorage::now_millis();
10944 for &(agent_id, _, conv) in conversations {
10945 if !seen.insert(agent_id) {
10946 continue;
10947 }
10948 let exists: i64 = tx.query_row_map(
10949 "SELECT COUNT(*) FROM agents WHERE id = ?1",
10950 fparams![agent_id],
10951 |row| row.get_typed(0),
10952 )?;
10953 if exists == 0 {
10954 tracing::debug!(
10955 target: "cass::fk_guard",
10956 agent_id,
10957 slug = %conv.agent_slug,
10958 "inserting agent row inside transaction to satisfy FK constraint"
10959 );
10960 tx.execute_compat(
10964 "INSERT OR IGNORE INTO agents(id, slug, name, kind, created_at, updated_at)
10965 VALUES(?1, ?2, ?3, 'cli', ?4, ?5)",
10966 fparams![
10967 agent_id,
10968 conv.agent_slug.as_str(),
10969 conv.agent_slug.as_str(),
10970 now,
10971 now
10972 ],
10973 )?;
10974 }
10975 }
10976 Ok(())
10977}
10978
10979fn ensure_workspaces_in_tx(
10982 tx: &FrankenTransaction<'_>,
10983 conversations: &[(i64, Option<i64>, &Conversation)],
10984) -> Result<()> {
10985 let mut seen = HashSet::new();
10986 for &(_, workspace_id, conv) in conversations {
10987 let ws_id = match workspace_id {
10988 Some(id) => id,
10989 None => continue,
10990 };
10991 if !seen.insert(ws_id) {
10992 continue;
10993 }
10994 let exists: i64 = tx.query_row_map(
10995 "SELECT COUNT(*) FROM workspaces WHERE id = ?1",
10996 fparams![ws_id],
10997 |row| row.get_typed(0),
10998 )?;
10999 if exists == 0 {
11000 let path_str = conv
11001 .workspace
11002 .as_ref()
11003 .map(|p| p.to_string_lossy().to_string())
11004 .unwrap_or_default();
11005 tracing::debug!(
11006 target: "cass::fk_guard",
11007 workspace_id = ws_id,
11008 path = %path_str,
11009 "inserting workspace row inside transaction to satisfy FK constraint"
11010 );
11011 tx.execute_compat(
11012 "INSERT OR IGNORE INTO workspaces(id, path) VALUES(?1, ?2)",
11013 fparams![ws_id, path_str.as_str()],
11014 )?;
11015 }
11016 }
11017 Ok(())
11018}
11019
11020fn ensure_sources_in_tx(
11024 tx: &FrankenTransaction<'_>,
11025 conversations: &[(i64, Option<i64>, &Conversation)],
11026) -> Result<()> {
11027 let mut seen = HashSet::new();
11028 for &(_, _, conv) in conversations {
11029 let (source_id, source_kind, host_label) = normalized_storage_source_parts(
11030 Some(conv.source_id.as_str()),
11031 None,
11032 conv.origin_host.as_deref(),
11033 );
11034 if !seen.insert(source_id.clone()) {
11035 continue;
11036 }
11037 let exists: i64 = tx.query_row_map(
11038 "SELECT COUNT(*) FROM sources WHERE id = ?1",
11039 fparams![source_id.as_str()],
11040 |row| row.get_typed(0),
11041 )?;
11042 if exists == 0 {
11043 let kind_str = source_kind.to_string();
11044 let now = FrankenStorage::now_millis();
11045 tracing::debug!(
11046 target: "cass::fk_guard",
11047 source_id = %source_id,
11048 kind = kind_str.as_str(),
11049 "inserting source row inside transaction to satisfy FK constraint"
11050 );
11051 tx.execute_compat(
11052 "INSERT OR IGNORE INTO sources(id, kind, host_label, created_at, updated_at)
11053 VALUES(?1, ?2, ?3, ?4, ?5)",
11054 fparams![
11055 source_id.as_str(),
11056 kind_str.as_str(),
11057 host_label.as_deref(),
11058 now,
11059 now
11060 ],
11061 )?;
11062 }
11063 }
11064 Ok(())
11065}
11066
11067fn env_flag_enabled(name: &str) -> bool {
11068 dotenvy::var(name)
11069 .map(|v| !(v == "0" || v.eq_ignore_ascii_case("false")))
11070 .unwrap_or(false)
11071}
11072
11073fn defer_storage_lexical_updates_enabled() -> bool {
11074 env_flag_enabled("CASS_DEFER_LEXICAL_UPDATES")
11075}
11076
11077fn defer_analytics_updates_enabled() -> bool {
11078 env_flag_enabled("CASS_DEFER_ANALYTICS_UPDATES")
11079}
11080
11081enum ConversationInsertStatus {
11082 Inserted(i64),
11083 Existing(i64),
11084}
11085
11086fn franken_find_external_conversation_tail_lookup(
11087 tx: &FrankenTransaction<'_>,
11088 lookup_key: &str,
11089) -> Result<Option<ExistingConversationWithTail>> {
11090 let params = [SqliteValue::from(lookup_key)];
11091 let row = tx
11092 .query_row_with_params(
11093 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
11094 FROM conversation_external_tail_lookup
11095 WHERE lookup_key = ?1",
11096 ¶ms,
11097 )
11098 .optional()?;
11099 let Some(row) = row else {
11100 return Ok(None);
11101 };
11102 let id = row.get_typed(0)?;
11103 let ended_at = row.get_typed(1)?;
11104 let last_message_idx = row.get_typed(2)?;
11105 let last_message_created_at = row.get_typed(3)?;
11106 Ok(Some(ExistingConversationWithTail {
11107 id,
11108 tail_state: existing_conversation_tail_state_from_cached(
11109 last_message_idx,
11110 last_message_created_at,
11111 ended_at,
11112 ),
11113 }))
11114}
11115
11116fn franken_find_external_conversation_lookup(
11117 tx: &FrankenTransaction<'_>,
11118 lookup_key: &str,
11119) -> Result<Option<i64>> {
11120 Ok(franken_find_external_conversation_tail_lookup(tx, lookup_key)?.map(|existing| existing.id))
11121}
11122
11123fn franken_insert_external_conversation_tail_lookup_key(
11124 tx: &FrankenTransaction<'_>,
11125 lookup_key: &str,
11126 conversation_id: i64,
11127 ended_at: Option<i64>,
11128 last_message_idx: Option<i64>,
11129 last_message_created_at: Option<i64>,
11130) -> Result<()> {
11131 let params = [
11132 SqliteValue::from(lookup_key),
11133 SqliteValue::from(conversation_id),
11134 SqliteValue::from(ended_at),
11135 SqliteValue::from(last_message_idx),
11136 SqliteValue::from(last_message_created_at),
11137 ];
11138 tx.execute_with_params(
11139 "INSERT OR REPLACE INTO conversation_external_tail_lookup(
11140 lookup_key, conversation_id, ended_at, last_message_idx, last_message_created_at
11141 ) VALUES(?1, ?2, ?3, ?4, ?5)",
11142 ¶ms,
11143 )?;
11144 Ok(())
11145}
11146
11147fn franken_insert_external_conversation_tail_lookup(
11148 tx: &FrankenTransaction<'_>,
11149 source_id: &str,
11150 agent_id: i64,
11151 external_id: &str,
11152 existing: ExistingConversationWithTail,
11153) -> Result<()> {
11154 let lookup_key = conversation_external_lookup_key(source_id, agent_id, external_id);
11155 let ended_at = existing.tail_state.and_then(|state| state.ended_at);
11156 let last_message_idx = existing.tail_state.map(|state| state.last_message_idx);
11157 let last_message_created_at = existing
11158 .tail_state
11159 .map(|state| state.last_message_created_at);
11160 franken_insert_external_conversation_tail_lookup_key(
11161 tx,
11162 &lookup_key,
11163 existing.id,
11164 ended_at,
11165 last_message_idx,
11166 last_message_created_at,
11167 )
11168}
11169
11170fn franken_update_external_conversation_tail_lookup_key(
11171 tx: &FrankenTransaction<'_>,
11172 lookup_key: &str,
11173 ended_at_candidate: Option<i64>,
11174 last_message_idx_candidate: Option<i64>,
11175 last_message_created_at_candidate: Option<i64>,
11176) -> Result<()> {
11177 if ended_at_candidate.is_none()
11178 && last_message_idx_candidate.is_none()
11179 && last_message_created_at_candidate.is_none()
11180 {
11181 return Ok(());
11182 }
11183 tx.execute_compat(
11184 "UPDATE conversation_external_tail_lookup
11185 SET ended_at = CASE
11186 WHEN ?1 IS NULL THEN ended_at
11187 ELSE MAX(IFNULL(ended_at, 0), ?1)
11188 END,
11189 last_message_idx = CASE
11190 WHEN ?2 IS NULL THEN last_message_idx
11191 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
11192 ELSE last_message_idx
11193 END,
11194 last_message_created_at = CASE
11195 WHEN ?3 IS NULL THEN last_message_created_at
11196 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
11197 ELSE last_message_created_at
11198 END
11199 WHERE lookup_key = ?4",
11200 fparams![
11201 ended_at_candidate,
11202 last_message_idx_candidate,
11203 last_message_created_at_candidate,
11204 lookup_key
11205 ],
11206 )?;
11207 Ok(())
11208}
11209
11210fn franken_set_external_conversation_tail_lookup_after_append(
11211 tx: &FrankenTransaction<'_>,
11212 lookup_key: &str,
11213 ended_at: i64,
11214 last_message_idx: i64,
11215 last_message_created_at: i64,
11216) -> Result<()> {
11217 tx.execute_compat(
11218 "UPDATE conversation_external_tail_lookup
11219 SET ended_at = ?1,
11220 last_message_idx = ?2,
11221 last_message_created_at = ?3
11222 WHERE lookup_key = ?4",
11223 fparams![
11224 ended_at,
11225 last_message_idx,
11226 last_message_created_at,
11227 lookup_key
11228 ],
11229 )?;
11230 Ok(())
11231}
11232
11233fn franken_update_external_conversation_tail_after_append(
11234 tx: &FrankenTransaction<'_>,
11235 agent_id: i64,
11236 conv: &Conversation,
11237 used_append_tail_plan: bool,
11238 exact_append_set: bool,
11239 inserted_last_idx: Option<i64>,
11240 inserted_last_created_at: Option<i64>,
11241) -> Result<()> {
11242 let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv) else {
11243 return Ok(());
11244 };
11245
11246 if exact_append_set
11247 && let (Some(last_message_idx), Some(last_message_created_at)) =
11248 (inserted_last_idx, inserted_last_created_at)
11249 {
11250 return franken_set_external_conversation_tail_lookup_after_append(
11251 tx,
11252 &lookup_key,
11253 last_message_created_at,
11254 last_message_idx,
11255 last_message_created_at,
11256 );
11257 }
11258
11259 let ended_at_candidate = if used_append_tail_plan {
11260 inserted_last_created_at
11261 } else {
11262 conv.messages.iter().filter_map(|m| m.created_at).max()
11263 };
11264 franken_update_external_conversation_tail_lookup_key(
11265 tx,
11266 &lookup_key,
11267 ended_at_candidate,
11268 inserted_last_idx,
11269 inserted_last_created_at,
11270 )
11271}
11272
11273fn franken_find_existing_conversation_by_key(
11274 tx: &FrankenTransaction<'_>,
11275 key: &PendingConversationKey,
11276 conv: Option<&Conversation>,
11277) -> Result<Option<i64>> {
11278 franken_find_existing_conversation_by_key_impl(tx, key, conv, false)
11279}
11280
11281fn franken_find_existing_conversation_by_key_after_conflict(
11282 tx: &FrankenTransaction<'_>,
11283 key: &PendingConversationKey,
11284 conv: Option<&Conversation>,
11285) -> Result<Option<i64>> {
11286 franken_find_existing_conversation_by_key_impl(tx, key, conv, true)
11287}
11288
11289fn franken_find_existing_conversation_by_key_impl(
11290 tx: &FrankenTransaction<'_>,
11291 key: &PendingConversationKey,
11292 conv: Option<&Conversation>,
11293 allow_legacy_external_scan: bool,
11294) -> Result<Option<i64>> {
11295 match key {
11296 PendingConversationKey::External {
11297 source_id,
11298 agent_id,
11299 external_id,
11300 } => {
11301 let lookup_key = conversation_external_lookup_key(source_id, *agent_id, external_id);
11302 if let Some(existing_id) = franken_find_external_conversation_lookup(tx, &lookup_key)? {
11303 return Ok(Some(existing_id));
11304 }
11305 if !allow_legacy_external_scan {
11306 return Ok(None);
11307 }
11308
11309 let existing_id = tx
11310 .query_row_map(
11311 "SELECT id
11312 FROM conversations
11313 WHERE source_id = ?1 AND agent_id = ?2 AND external_id = ?3",
11314 fparams![source_id.as_str(), *agent_id, external_id.as_str()],
11315 |row| row.get_typed(0),
11316 )
11317 .optional()?;
11318 if let Some(existing_id) = existing_id {
11319 let tail_state = franken_existing_conversation_append_tail_state(tx, existing_id)?;
11320 franken_insert_external_conversation_tail_lookup_key(
11321 tx,
11322 &lookup_key,
11323 existing_id,
11324 tail_state.and_then(|state| state.ended_at),
11325 tail_state.map(|state| state.last_message_idx),
11326 tail_state.map(|state| state.last_message_created_at),
11327 )?;
11328 Ok(Some(existing_id))
11329 } else {
11330 Ok(None)
11331 }
11332 }
11333 PendingConversationKey::SourcePath {
11334 source_id,
11335 agent_id,
11336 source_path,
11337 started_at,
11338 } => {
11339 let exact_match = tx
11340 .query_row_map(
11341 "SELECT c.id
11342 FROM conversations c
11343 WHERE c.source_id = ?1
11344 AND c.agent_id = ?2
11345 AND c.source_path = ?3
11346 AND ((
11347 COALESCE(
11348 c.started_at,
11349 (SELECT MIN(created_at)
11350 FROM messages
11351 WHERE conversation_id = c.id
11352 AND created_at IS NOT NULL)
11353 ) IS NULL
11354 AND ?4 IS NULL
11355 ) OR COALESCE(
11356 c.started_at,
11357 (SELECT MIN(created_at)
11358 FROM messages
11359 WHERE conversation_id = c.id
11360 AND created_at IS NOT NULL)
11361 ) = ?4)
11362 ORDER BY c.id
11363 LIMIT 1",
11364 fparams![
11365 source_id.as_str(),
11366 *agent_id,
11367 source_path.as_str(),
11368 *started_at
11369 ],
11370 |row| row.get_typed(0),
11371 )
11372 .optional()?;
11373 if exact_match.is_some() {
11374 return Ok(exact_match);
11375 }
11376
11377 let Some(conv) = conv else {
11378 return Ok(None);
11379 };
11380 let incoming_fingerprints = conversation_message_fingerprints(conv);
11381 if incoming_fingerprints.is_empty() {
11382 return Ok(None);
11383 }
11384 let incoming_replay_fingerprints = conversation_message_replay_fingerprints(conv);
11385
11386 let candidates: Vec<(i64, Option<i64>)> = tx.query_map_collect(
11387 "SELECT
11388 c.id,
11389 COALESCE(
11390 c.started_at,
11391 (SELECT MIN(created_at)
11392 FROM messages
11393 WHERE conversation_id = c.id
11394 AND created_at IS NOT NULL)
11395 ) AS effective_started_at
11396 FROM conversations c
11397 WHERE c.source_id = ?1
11398 AND c.agent_id = ?2
11399 AND c.source_path = ?3
11400 ORDER BY c.id",
11401 fparams![source_id.as_str(), *agent_id, source_path.as_str()],
11402 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
11403 )?;
11404
11405 let mut best_candidate: Option<(i64, ConversationMergeEvidence)> = None;
11406 for (candidate_id, candidate_started_at) in candidates {
11407 let existing_fingerprints =
11408 franken_existing_message_fingerprints(tx, candidate_id)?;
11409 let existing_replay_fingerprints =
11410 replay_fingerprints_from_merge_set(&existing_fingerprints);
11411 let Some(evidence) = conversation_merge_evidence(
11412 &incoming_fingerprints,
11413 &incoming_replay_fingerprints,
11414 &existing_fingerprints,
11415 &existing_replay_fingerprints,
11416 *started_at,
11417 candidate_started_at,
11418 ) else {
11419 continue;
11420 };
11421
11422 let candidate_key = (
11423 evidence.exact_overlap,
11424 evidence.replay_overlap,
11425 evidence.started_close,
11426 evidence.smaller_replay_set,
11427 std::cmp::Reverse(evidence.start_distance_ms),
11428 );
11429 let should_replace = best_candidate
11430 .as_ref()
11431 .map(|(_, best_evidence)| {
11432 candidate_key
11433 > (
11434 best_evidence.exact_overlap,
11435 best_evidence.replay_overlap,
11436 best_evidence.started_close,
11437 best_evidence.smaller_replay_set,
11438 std::cmp::Reverse(best_evidence.start_distance_ms),
11439 )
11440 })
11441 .unwrap_or(true);
11442
11443 if should_replace {
11444 best_candidate = Some((candidate_id, evidence));
11445 }
11446 }
11447
11448 Ok(best_candidate.map(|(candidate_id, _)| candidate_id))
11449 }
11450 }
11451}
11452
11453fn franken_insert_conversation_or_get_existing(
11454 tx: &FrankenTransaction<'_>,
11455 agent_id: i64,
11456 workspace_id: Option<i64>,
11457 conv: &Conversation,
11458) -> Result<ConversationInsertStatus> {
11459 let conversation_key = conversation_merge_key(agent_id, conv);
11460 if let Some(existing_id) =
11461 franken_find_existing_conversation_by_key(tx, &conversation_key, Some(conv))?
11462 {
11463 return Ok(ConversationInsertStatus::Existing(existing_id));
11464 }
11465
11466 franken_insert_conversation_or_get_existing_after_miss(
11467 tx,
11468 agent_id,
11469 workspace_id,
11470 conv,
11471 &conversation_key,
11472 )
11473}
11474
11475fn franken_insert_conversation_or_get_existing_after_miss(
11476 tx: &FrankenTransaction<'_>,
11477 agent_id: i64,
11478 workspace_id: Option<i64>,
11479 conv: &Conversation,
11480 conversation_key: &PendingConversationKey,
11481) -> Result<ConversationInsertStatus> {
11482 match franken_insert_conversation(tx, agent_id, workspace_id, conv) {
11483 Ok(Some(conv_id)) => Ok(ConversationInsertStatus::Inserted(conv_id)),
11484 Ok(None) => {
11485 let existing_id =
11488 franken_find_existing_conversation_by_key_after_conflict(
11489 tx,
11490 conversation_key,
11491 Some(conv),
11492 )?
11493 .with_context(|| {
11494 format!(
11495 "conversation INSERT produced a duplicate conflict but existing row was not found for source_id={} agent_id={} external_id={:?} source_path={}",
11496 conv.source_id,
11497 agent_id,
11498 conv.external_id,
11499 conv.source_path.display()
11500 )
11501 })?;
11502 tracing::warn!(
11503 source_id = %conv.source_id,
11504 agent_id,
11505 external_id = ?conv.external_id,
11506 existing_id,
11507 source_path = %conv.source_path.display(),
11508 "conversation INSERT: duplicate gracefully recovered, reusing existing row"
11509 );
11510 Ok(ConversationInsertStatus::Existing(existing_id))
11511 }
11512 Err(error) => {
11513 tracing::error!(
11514 source_id = %conv.source_id,
11515 agent_id,
11516 external_id = ?conv.external_id,
11517 error = %error,
11518 source_path = %conv.source_path.display(),
11519 "franken_insert_conversation failed"
11520 );
11521 Err(error)
11522 }
11523 }
11524}
11525
11526fn franken_insert_conversation(
11532 tx: &FrankenTransaction<'_>,
11533 agent_id: i64,
11534 workspace_id: Option<i64>,
11535 conv: &Conversation,
11536) -> Result<Option<i64>> {
11537 let (metadata_json_str, metadata_bin) = franken_metadata_insert_payload(&conv.metadata_json)?;
11538 let (last_message_idx, last_message_created_at) = conversation_tail_state(conv);
11539 let metadata_bin_bytes = metadata_bin.as_deref();
11540
11541 match tx.execute_compat(
11542 "INSERT INTO conversations(
11543 agent_id, workspace_id, source_id, external_id, title, source_path,
11544 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin,
11545 last_message_idx, last_message_created_at
11546 ) VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14)",
11547 fparams![
11548 agent_id,
11549 workspace_id,
11550 conv.source_id.as_str(),
11551 conv.external_id.as_deref(),
11552 conv.title.as_deref(),
11553 path_to_string(&conv.source_path),
11554 conv.started_at,
11555 conv.ended_at,
11556 conv.approx_tokens,
11557 metadata_json_str.as_deref(),
11558 conv.origin_host.as_deref(),
11559 metadata_bin_bytes,
11560 last_message_idx,
11561 last_message_created_at
11562 ],
11563 ) {
11564 Ok(_) => {
11565 let conv_id = franken_last_rowid(tx)?;
11566 franken_insert_conversation_tail_state(
11567 tx,
11568 conv_id,
11569 conv.ended_at,
11570 last_message_idx,
11571 last_message_created_at,
11572 )?;
11573 if let Some(external_id) = conv.external_id.as_deref() {
11574 franken_insert_external_conversation_tail_lookup(
11575 tx,
11576 conv.source_id.as_str(),
11577 agent_id,
11578 external_id,
11579 ExistingConversationWithTail {
11580 id: conv_id,
11581 tail_state: existing_conversation_tail_state_from_cached(
11582 last_message_idx,
11583 last_message_created_at,
11584 conv.ended_at,
11585 ),
11586 },
11587 )?;
11588 }
11589 Ok(Some(conv_id))
11590 }
11591 Err(frankensqlite::FrankenError::UniqueViolation { .. }) => {
11592 tracing::debug!(
11593 source_id = %conv.source_id,
11594 agent_id,
11595 external_id = ?conv.external_id,
11596 source_path = %conv.source_path.display(),
11597 "conversation INSERT: duplicate provenance conflict"
11598 );
11599 Ok(None)
11600 }
11601 Err(error) => Err(error.into()),
11602 }
11603}
11604
11605type MetadataInsertPayload<'a> = (Option<Cow<'a, str>>, Option<Vec<u8>>);
11606
11607fn franken_metadata_insert_payload(value: &serde_json::Value) -> Result<MetadataInsertPayload<'_>> {
11608 if let Some(raw) = historical_raw_json(value) {
11609 Ok((Some(Cow::Borrowed(raw)), None))
11610 } else if value.is_null() {
11611 Ok((Some(Cow::Borrowed("null")), None))
11612 } else if value.as_object().is_some_and(|object| object.is_empty()) {
11613 Ok((None, None))
11614 } else if let Some(metadata_bin) = serialize_json_to_msgpack(value) {
11615 Ok((None, Some(metadata_bin)))
11616 } else {
11617 Ok((Some(Cow::Owned(serde_json::to_string(value)?)), None))
11618 }
11619}
11620
11621fn franken_insert_new_message(
11622 tx: &FrankenTransaction<'_>,
11623 conversation_id: i64,
11624 msg: &Message,
11625) -> Result<i64> {
11626 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
11627 let extra_bin_bytes = extra_bin.as_deref();
11628
11629 tx.execute_compat(
11630 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
11631 VALUES(?1,?2,?3,?4,?5,?6,?7,?8)",
11632 fparams![
11633 conversation_id,
11634 msg.idx,
11635 role_as_str(&msg.role),
11636 msg.author.as_deref(),
11637 msg.created_at,
11638 msg.content.as_str(),
11639 extra_json_str.as_deref(),
11640 extra_bin_bytes
11641 ],
11642 )?;
11643 franken_last_rowid(tx)
11644}
11645
11646type MessageInsertPayload<'a> = (Option<Cow<'a, str>>, Option<Vec<u8>>);
11647
11648fn franken_message_insert_payload(msg: &Message) -> Result<MessageInsertPayload<'_>> {
11649 if let Some(raw) = historical_raw_json(&msg.extra_json) {
11650 Ok((Some(Cow::Borrowed(raw)), None))
11651 } else if msg.extra_json.is_null() {
11652 Ok((None, None))
11653 } else {
11654 let extra_bin = serialize_json_to_msgpack(&msg.extra_json);
11655 if extra_bin.is_some() {
11656 Ok((None, extra_bin))
11657 } else {
11658 Ok((
11659 Some(Cow::Owned(serde_json::to_string(&msg.extra_json)?)),
11660 None,
11661 ))
11662 }
11663 }
11664}
11665
11666const MESSAGE_INSERT_BATCH_SIZE: usize = 100;
11671
11672const APPEND_MESSAGE_INSERT_BATCH_SIZE: usize = 50;
11678
11679fn message_insert_batch_sql(row_count: usize) -> &'static str {
11680 static MESSAGE_INSERT_BATCH_SQL: std::sync::OnceLock<Vec<String>> = std::sync::OnceLock::new();
11681
11682 let max_batch_size = MESSAGE_INSERT_BATCH_SIZE.max(APPEND_MESSAGE_INSERT_BATCH_SIZE);
11683 let cached_sql = MESSAGE_INSERT_BATCH_SQL.get_or_init(|| {
11684 let mut sql_by_row_count = Vec::with_capacity(max_batch_size + 1);
11685 sql_by_row_count.push(String::new());
11686 for row_count in 1..=max_batch_size {
11687 let placeholders = (0..row_count)
11688 .map(|idx| {
11689 let base = idx * 8;
11690 format!(
11691 "(?{},?{},?{},?{},?{},?{},?{},?{})",
11692 base + 1,
11693 base + 2,
11694 base + 3,
11695 base + 4,
11696 base + 5,
11697 base + 6,
11698 base + 7,
11699 base + 8
11700 )
11701 })
11702 .collect::<Vec<_>>()
11703 .join(",");
11704 sql_by_row_count.push(format!(
11705 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin) VALUES {placeholders}"
11706 ));
11707 }
11708 sql_by_row_count
11709 });
11710
11711 cached_sql
11712 .get(row_count)
11713 .map(String::as_str)
11714 .expect("message insert batch size must be covered by the cached SQL table")
11715}
11716
11717fn franken_batch_insert_new_messages(
11718 tx: &FrankenTransaction<'_>,
11719 conversation_id: i64,
11720 messages: &[&Message],
11721) -> Result<Vec<i64>> {
11722 franken_batch_insert_new_messages_with_batch_size(
11723 tx,
11724 conversation_id,
11725 messages,
11726 MESSAGE_INSERT_BATCH_SIZE,
11727 )
11728}
11729
11730fn franken_append_insert_new_messages(
11731 tx: &FrankenTransaction<'_>,
11732 conversation_id: i64,
11733 messages: &[&Message],
11734) -> Result<Vec<i64>> {
11735 franken_batch_insert_new_messages_with_batch_size(
11736 tx,
11737 conversation_id,
11738 messages,
11739 APPEND_MESSAGE_INSERT_BATCH_SIZE,
11740 )
11741}
11742
11743fn franken_batch_insert_new_messages_with_batch_size(
11744 tx: &FrankenTransaction<'_>,
11745 conversation_id: i64,
11746 messages: &[&Message],
11747 batch_size: usize,
11748) -> Result<Vec<i64>> {
11749 let batch_size = batch_size.max(1);
11750 let mut inserted_ids = Vec::with_capacity(messages.len());
11751 for chunk in messages.chunks(batch_size) {
11752 if chunk.len() == 1 {
11753 inserted_ids.push(franken_insert_new_message(tx, conversation_id, chunk[0])?);
11754 continue;
11755 }
11756 let sql = message_insert_batch_sql(chunk.len());
11757
11758 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 8);
11759 for msg in chunk {
11760 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
11761 param_values.push(SqliteValue::from(conversation_id));
11762 param_values.push(SqliteValue::from(msg.idx));
11763 param_values.push(SqliteValue::from(role_as_str(&msg.role)));
11764 param_values.push(SqliteValue::from(msg.author.as_deref()));
11765 param_values.push(SqliteValue::from(msg.created_at));
11766 param_values.push(SqliteValue::from(msg.content.as_str()));
11767 param_values.push(SqliteValue::from(extra_json_str.as_deref()));
11768 param_values.push(SqliteValue::from(extra_bin.as_deref()));
11769 }
11770
11771 tx.execute_with_params(sql, ¶m_values)?;
11772
11773 let last_id = franken_last_rowid(tx)?;
11774 let first_id = last_id
11775 .checked_sub((chunk.len() - 1) as i64)
11776 .with_context(|| {
11777 format!(
11778 "inferring rowid range for {}-row message batch ending at {last_id}",
11779 chunk.len()
11780 )
11781 })?;
11782 inserted_ids.extend((0..chunk.len()).map(|offset| first_id + offset as i64));
11783 }
11784
11785 Ok(inserted_ids)
11786}
11787
11788#[cfg(test)]
11789fn franken_insert_new_message_with_profile(
11790 tx: &FrankenTransaction<'_>,
11791 conversation_id: i64,
11792 msg: &Message,
11793 profile: &mut MessageInsertSubstageProfile,
11794) -> Result<i64> {
11795 profile.single_row_calls += 1;
11796 profile.batch_rows += 1;
11797
11798 let payload_start = Instant::now();
11799 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
11800 profile.payload_duration += payload_start.elapsed();
11801 let extra_bin_bytes = extra_bin.as_deref();
11802
11803 let execute_start = Instant::now();
11804 tx.execute_compat(
11805 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
11806 VALUES(?1,?2,?3,?4,?5,?6,?7,?8)",
11807 fparams![
11808 conversation_id,
11809 msg.idx,
11810 role_as_str(&msg.role),
11811 msg.author.as_deref(),
11812 msg.created_at,
11813 msg.content.as_str(),
11814 extra_json_str.as_deref(),
11815 extra_bin_bytes
11816 ],
11817 )?;
11818 profile.execute_duration += execute_start.elapsed();
11819
11820 let rowid_start = Instant::now();
11821 let rowid = franken_last_rowid(tx)?;
11822 profile.rowid_duration += rowid_start.elapsed();
11823 Ok(rowid)
11824}
11825
11826#[cfg(test)]
11827fn franken_batch_insert_new_messages_with_profile(
11828 tx: &FrankenTransaction<'_>,
11829 conversation_id: i64,
11830 messages: &[&Message],
11831 profile: &mut MessageInsertSubstageProfile,
11832) -> Result<Vec<i64>> {
11833 franken_batch_insert_new_messages_with_profile_batch_size(
11834 tx,
11835 conversation_id,
11836 messages,
11837 profile,
11838 MESSAGE_INSERT_BATCH_SIZE,
11839 )
11840}
11841
11842#[cfg(test)]
11843fn franken_append_insert_new_messages_with_profile(
11844 tx: &FrankenTransaction<'_>,
11845 conversation_id: i64,
11846 messages: &[&Message],
11847 profile: &mut MessageInsertSubstageProfile,
11848) -> Result<Vec<i64>> {
11849 franken_batch_insert_new_messages_with_profile_batch_size(
11850 tx,
11851 conversation_id,
11852 messages,
11853 profile,
11854 APPEND_MESSAGE_INSERT_BATCH_SIZE,
11855 )
11856}
11857
11858#[cfg(test)]
11859fn franken_batch_insert_new_messages_with_profile_batch_size(
11860 tx: &FrankenTransaction<'_>,
11861 conversation_id: i64,
11862 messages: &[&Message],
11863 profile: &mut MessageInsertSubstageProfile,
11864 batch_size: usize,
11865) -> Result<Vec<i64>> {
11866 let batch_size = batch_size.max(1);
11867 let mut inserted_ids = Vec::with_capacity(messages.len());
11868 for chunk in messages.chunks(batch_size) {
11869 if chunk.len() == 1 {
11870 inserted_ids.push(franken_insert_new_message_with_profile(
11871 tx,
11872 conversation_id,
11873 chunk[0],
11874 profile,
11875 )?);
11876 continue;
11877 }
11878
11879 profile.batch_calls += 1;
11880 profile.batch_rows += chunk.len();
11881
11882 let sql_build_start = Instant::now();
11883 let sql = message_insert_batch_sql(chunk.len());
11884 profile.sql_build_duration += sql_build_start.elapsed();
11885
11886 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 8);
11887 for msg in chunk {
11888 let payload_start = Instant::now();
11889 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
11890 profile.payload_duration += payload_start.elapsed();
11891
11892 let param_build_start = Instant::now();
11893 param_values.push(SqliteValue::from(conversation_id));
11894 param_values.push(SqliteValue::from(msg.idx));
11895 param_values.push(SqliteValue::from(role_as_str(&msg.role)));
11896 param_values.push(SqliteValue::from(msg.author.as_deref()));
11897 param_values.push(SqliteValue::from(msg.created_at));
11898 param_values.push(SqliteValue::from(msg.content.as_str()));
11899 param_values.push(SqliteValue::from(extra_json_str.as_deref()));
11900 param_values.push(SqliteValue::from(extra_bin.as_deref()));
11901 profile.param_build_duration += param_build_start.elapsed();
11902 }
11903
11904 let execute_start = Instant::now();
11905 tx.execute_with_params(sql, ¶m_values)?;
11906 profile.execute_duration += execute_start.elapsed();
11907
11908 let rowid_start = Instant::now();
11909 let last_id = franken_last_rowid(tx)?;
11910 let first_id = last_id
11911 .checked_sub((chunk.len() - 1) as i64)
11912 .with_context(|| {
11913 format!(
11914 "inferring rowid range for {}-row message batch ending at {last_id}",
11915 chunk.len()
11916 )
11917 })?;
11918 inserted_ids.extend((0..chunk.len()).map(|offset| first_id + offset as i64));
11919 profile.rowid_duration += rowid_start.elapsed();
11920 }
11921
11922 Ok(inserted_ids)
11923}
11924
11925fn franken_insert_snippets(
11927 tx: &FrankenTransaction<'_>,
11928 message_id: i64,
11929 snippets: &[Snippet],
11930) -> Result<()> {
11931 for snip in snippets {
11932 let file_path_str = snip.file_path.as_ref().map(path_to_string);
11933 tx.execute_compat(
11934 "INSERT INTO snippets(message_id, file_path, start_line, end_line, language, snippet_text)
11935 VALUES(?1,?2,?3,?4,?5,?6)",
11936 fparams![
11937 message_id,
11938 file_path_str.as_deref(),
11939 snip.start_line,
11940 snip.end_line,
11941 snip.language.as_deref(),
11942 snip.snippet_text.as_deref()
11943 ],
11944 )?;
11945 }
11946 Ok(())
11947}
11948
11949fn franken_existing_message_fingerprints(
11950 tx: &FrankenTransaction<'_>,
11951 conversation_id: i64,
11952) -> Result<HashSet<MessageMergeFingerprint>> {
11953 let rows = tx.query_params(
11954 "SELECT idx, role, author, created_at, content
11955 FROM messages
11956 WHERE conversation_id = ?1",
11957 fparams![conversation_id],
11958 )?;
11959 let mut fingerprints = HashSet::with_capacity(rows.len());
11960 for row in rows {
11961 let role: String = row.get_typed(1)?;
11962 let content: String = row.get_typed(4)?;
11963 fingerprints.insert(MessageMergeFingerprint {
11964 idx: row.get_typed(0)?,
11965 created_at: row.get_typed(3)?,
11966 role: role_from_str(&role),
11967 author: row.get_typed(2)?,
11968 content_hash: *blake3::hash(content.as_bytes()).as_bytes(),
11969 });
11970 }
11971 Ok(fingerprints)
11972}
11973
11974struct ExistingMessageLookup {
11975 by_idx: HashMap<i64, MessageMergeFingerprint>,
11976 replay: HashSet<MessageReplayFingerprint>,
11977}
11978
11979fn franken_existing_message_lookup(
11980 tx: &FrankenTransaction<'_>,
11981 conversation_id: i64,
11982 incoming_messages: &[Message],
11983) -> Result<ExistingMessageLookup> {
11984 if incoming_messages.is_empty() {
11985 return Ok(ExistingMessageLookup {
11986 by_idx: HashMap::new(),
11987 replay: HashSet::new(),
11988 });
11989 }
11990
11991 let min_idx = incoming_messages
11992 .iter()
11993 .map(|msg| msg.idx)
11994 .min()
11995 .unwrap_or(0);
11996 let max_idx = incoming_messages
11997 .iter()
11998 .map(|msg| msg.idx)
11999 .max()
12000 .unwrap_or(min_idx);
12001 let requires_full_scan = incoming_messages.iter().any(|msg| msg.created_at.is_none());
12002 let created_bounds = incoming_messages
12003 .iter()
12004 .filter_map(|msg| msg.created_at)
12005 .fold(None, |bounds: Option<(i64, i64)>, created_at| {
12006 Some(match bounds {
12007 Some((min_created_at, max_created_at)) => (
12008 min_created_at.min(created_at),
12009 max_created_at.max(created_at),
12010 ),
12011 None => (created_at, created_at),
12012 })
12013 });
12014
12015 let mut indexed_by_idx = HashMap::with_capacity(incoming_messages.len());
12016 let mut indexed_replay = HashSet::with_capacity(incoming_messages.len());
12017 let mut exact_idx_match = true;
12018 for msg in incoming_messages {
12019 record_message_lookup_exact_idx_probe();
12020 let Some((role, author, created_at, content)) = tx
12021 .query_row_map(
12022 "SELECT role, author, created_at, content
12023 FROM messages INDEXED BY sqlite_autoindex_messages_1
12024 WHERE conversation_id = ?1 AND idx = ?2
12025 LIMIT 1",
12026 fparams![conversation_id, msg.idx],
12027 |row| {
12028 Ok((
12029 row.get_typed::<String>(0)?,
12030 row.get_typed::<Option<String>>(1)?,
12031 row.get_typed::<Option<i64>>(2)?,
12032 row.get_typed::<String>(3)?,
12033 ))
12034 },
12035 )
12036 .optional()?
12037 else {
12038 exact_idx_match = false;
12039 break;
12040 };
12041 let role = role_from_str(&role);
12042 let content_hash = *blake3::hash(content.as_bytes()).as_bytes();
12043 let fingerprint = MessageMergeFingerprint {
12044 idx: msg.idx,
12045 created_at,
12046 role: role.clone(),
12047 author: author.clone(),
12048 content_hash,
12049 };
12050 if fingerprint != message_merge_fingerprint(msg) {
12051 exact_idx_match = false;
12052 break;
12053 }
12054 indexed_by_idx.insert(msg.idx, fingerprint);
12055 indexed_replay.insert(MessageReplayFingerprint {
12056 created_at,
12057 role,
12058 author,
12059 content_hash,
12060 });
12061 }
12062
12063 if exact_idx_match {
12064 return Ok(ExistingMessageLookup {
12065 by_idx: indexed_by_idx,
12066 replay: indexed_replay,
12067 });
12068 }
12069
12070 let (rows, replay_full_scan) = if requires_full_scan {
12071 let rows = tx.query_params(
12072 "SELECT idx, role, author, created_at, content
12073 FROM messages INDEXED BY sqlite_autoindex_messages_1
12074 WHERE conversation_id = ?1",
12075 fparams![conversation_id],
12076 )?;
12077 record_message_lookup_full_scan_query(rows.len());
12078 (rows, true)
12079 } else if let Some((min_created_at, max_created_at)) = created_bounds {
12080 let mut rows = tx.query_params(
12081 "SELECT idx, role, author, created_at, content
12082 FROM messages INDEXED BY sqlite_autoindex_messages_1
12083 WHERE conversation_id = ?1
12084 AND idx >= ?2
12085 AND idx <= ?3",
12086 fparams![conversation_id, min_idx, max_idx],
12087 )?;
12088 rows.extend(tx.query_params(
12089 "SELECT idx, role, author, created_at, content
12090 FROM messages INDEXED BY sqlite_autoindex_messages_1
12091 WHERE conversation_id = ?1
12092 AND created_at IS NOT NULL
12093 AND created_at >= ?2
12094 AND created_at <= ?3",
12095 fparams![conversation_id, min_created_at, max_created_at],
12096 )?);
12097 record_message_lookup_bounded_queries(2, rows.len());
12098 (rows, false)
12099 } else {
12100 let rows = tx.query_params(
12101 "SELECT idx, role, author, created_at, content
12102 FROM messages INDEXED BY sqlite_autoindex_messages_1
12103 WHERE conversation_id = ?1",
12104 fparams![conversation_id],
12105 )?;
12106 record_message_lookup_full_scan_query(rows.len());
12107 (rows, true)
12108 };
12109
12110 let mut by_idx = HashMap::with_capacity(rows.len());
12111 let mut replay = HashSet::with_capacity(rows.len());
12112 for row in rows {
12113 let idx: i64 = row.get_typed(0)?;
12114 let role: String = row.get_typed(1)?;
12115 let author: Option<String> = row.get_typed(2)?;
12116 let created_at: Option<i64> = row.get_typed(3)?;
12117 let content: String = row.get_typed(4)?;
12118 let role = role_from_str(&role);
12119 let content_hash = *blake3::hash(content.as_bytes()).as_bytes();
12120
12121 if idx >= min_idx && idx <= max_idx {
12122 by_idx.insert(
12123 idx,
12124 MessageMergeFingerprint {
12125 idx,
12126 created_at,
12127 role: role.clone(),
12128 author: author.clone(),
12129 content_hash,
12130 },
12131 );
12132 }
12133
12134 let replay_matches = if replay_full_scan {
12135 true
12136 } else if let Some((min_created_at, max_created_at)) = created_bounds {
12137 created_at.is_some_and(|ts| ts >= min_created_at && ts <= max_created_at)
12138 } else {
12139 true
12140 };
12141 if replay_matches {
12142 replay.insert(MessageReplayFingerprint {
12143 created_at,
12144 role,
12145 author,
12146 content_hash,
12147 });
12148 }
12149 }
12150
12151 Ok(ExistingMessageLookup { by_idx, replay })
12152}
12153
12154fn franken_existing_message_lookup_with_pending(
12155 tx: &FrankenTransaction<'_>,
12156 conversation_id: i64,
12157 incoming_messages: &[Message],
12158 pending_message_fingerprints: &mut HashMap<i64, HashMap<i64, MessageMergeFingerprint>>,
12159 pending_message_replay_fingerprints: &mut HashMap<i64, HashSet<MessageReplayFingerprint>>,
12160) -> Result<ExistingMessageLookup> {
12161 if let (Some(by_idx), Some(replay)) = (
12162 pending_message_fingerprints.get(&conversation_id),
12163 pending_message_replay_fingerprints.get(&conversation_id),
12164 ) {
12165 if incoming_messages.iter().all(|msg| {
12166 by_idx.contains_key(&msg.idx) || replay.contains(&message_replay_fingerprint(msg))
12167 }) {
12168 return Ok(ExistingMessageLookup {
12169 by_idx: by_idx.clone(),
12170 replay: replay.clone(),
12171 });
12172 }
12173
12174 let fresh = franken_existing_message_lookup(tx, conversation_id, incoming_messages)?;
12175 let mut merged_by_idx = by_idx.clone();
12176 let mut merged_replay = replay.clone();
12177 merged_by_idx.extend(fresh.by_idx);
12178 merged_replay.extend(fresh.replay);
12179 pending_message_fingerprints.insert(conversation_id, merged_by_idx.clone());
12180 pending_message_replay_fingerprints.insert(conversation_id, merged_replay.clone());
12181 return Ok(ExistingMessageLookup {
12182 by_idx: merged_by_idx,
12183 replay: merged_replay,
12184 });
12185 }
12186
12187 let lookup = franken_existing_message_lookup(tx, conversation_id, incoming_messages)?;
12188 pending_message_fingerprints.insert(conversation_id, lookup.by_idx.clone());
12189 pending_message_replay_fingerprints.insert(conversation_id, lookup.replay.clone());
12190 Ok(lookup)
12191}
12192
12193fn franken_batch_insert_fts(tx: &FrankenTransaction<'_>, entries: &[FtsEntry]) -> Result<usize> {
12195 if entries.is_empty() {
12196 return Ok(0);
12197 }
12198
12199 let mut inserted = 0;
12200
12201 for chunk in entries.chunks(FTS5_BATCH_SIZE) {
12202 let placeholders: String = chunk
12203 .iter()
12204 .enumerate()
12205 .map(|(i, _)| {
12206 let base = i * 7 + 1; format!(
12208 "(?{},?{},?{},?{},?{},?{},?{})",
12209 base,
12210 base + 1,
12211 base + 2,
12212 base + 3,
12213 base + 4,
12214 base + 5,
12215 base + 6
12216 )
12217 })
12218 .collect::<Vec<_>>()
12219 .join(",");
12220
12221 let sql = format!(
12222 "INSERT INTO fts_messages(rowid, content, title, agent, workspace, source_path, created_at) VALUES {placeholders}"
12223 );
12224
12225 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 7);
12226 for entry in chunk {
12227 param_values.push(SqliteValue::from(entry.message_id));
12228 param_values.push(SqliteValue::from(entry.content.as_str()));
12229 param_values.push(SqliteValue::from(entry.title.as_str()));
12230 param_values.push(SqliteValue::from(entry.agent.as_str()));
12231 param_values.push(SqliteValue::from(entry.workspace.as_str()));
12232 param_values.push(SqliteValue::from(entry.source_path.as_str()));
12233 param_values.push(SqliteValue::from(entry.created_at));
12234 }
12235
12236 match tx.execute_with_params(&sql, ¶m_values) {
12237 Ok(_) => {
12238 inserted += chunk.len();
12239 }
12240 Err(err) => {
12241 tracing::warn!(
12242 error = %err,
12243 chunk_docs = chunk.len(),
12244 "frankensqlite FTS batch insert failed; skipping db-resident FTS maintenance because Tantivy is authoritative"
12245 );
12246 return Ok(inserted);
12247 }
12248 }
12249 }
12250
12251 Ok(inserted)
12252}
12253
12254fn franken_batch_insert_fts_on_connection(
12255 conn: &FrankenConnection,
12256 entries: &[FtsEntry],
12257) -> Result<usize> {
12258 if entries.is_empty() {
12259 return Ok(0);
12260 }
12261
12262 let mut inserted = 0;
12263
12264 for chunk in entries.chunks(FTS5_BATCH_SIZE) {
12265 let placeholders: String = chunk
12266 .iter()
12267 .enumerate()
12268 .map(|(i, _)| {
12269 let base = i * 7 + 1;
12270 format!(
12271 "(?{},?{},?{},?{},?{},?{},?{})",
12272 base,
12273 base + 1,
12274 base + 2,
12275 base + 3,
12276 base + 4,
12277 base + 5,
12278 base + 6
12279 )
12280 })
12281 .collect::<Vec<_>>()
12282 .join(",");
12283
12284 let sql = format!(
12285 "INSERT INTO fts_messages(rowid, content, title, agent, workspace, source_path, created_at) VALUES {placeholders}"
12286 );
12287
12288 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 7);
12289 for entry in chunk {
12290 param_values.push(SqliteValue::from(entry.message_id));
12291 param_values.push(SqliteValue::from(entry.content.as_str()));
12292 param_values.push(SqliteValue::from(entry.title.as_str()));
12293 param_values.push(SqliteValue::from(entry.agent.as_str()));
12294 param_values.push(SqliteValue::from(entry.workspace.as_str()));
12295 param_values.push(SqliteValue::from(entry.source_path.as_str()));
12296 param_values.push(SqliteValue::from(entry.created_at));
12297 }
12298
12299 conn.execute_with_params(&sql, ¶m_values)
12300 .with_context(|| {
12301 format!(
12302 "inserting {} rows into fts_messages during streaming FTS maintenance",
12303 chunk.len()
12304 )
12305 })?;
12306 inserted += chunk.len();
12307 }
12308
12309 Ok(inserted)
12310}
12311
12312fn franken_update_daily_stats_in_tx(
12314 storage: &FrankenStorage,
12315 tx: &FrankenTransaction<'_>,
12316 agent_slug: &str,
12317 source_id: &str,
12318 started_at: Option<i64>,
12319 delta: StatsDelta,
12320) -> Result<()> {
12321 let day_id = started_at
12322 .map(FrankenStorage::day_id_from_millis)
12323 .unwrap_or(0);
12324 let now = FrankenStorage::now_millis();
12325
12326 let targets = [
12327 DailyStatsTarget {
12328 day_id,
12329 agent_slug,
12330 source_id,
12331 },
12332 DailyStatsTarget {
12333 day_id,
12334 agent_slug: "all",
12335 source_id,
12336 },
12337 DailyStatsTarget {
12338 day_id,
12339 agent_slug,
12340 source_id: "all",
12341 },
12342 DailyStatsTarget {
12343 day_id,
12344 agent_slug: "all",
12345 source_id: "all",
12346 },
12347 ];
12348
12349 if agent_slug != "all"
12350 && source_id != "all"
12351 && franken_update_ensured_daily_stats_targets_in_tx(storage, tx, &targets, now, delta)?
12352 {
12353 return Ok(());
12354 }
12355
12356 for target in targets {
12357 franken_apply_daily_stats_delta_in_tx(storage, tx, target, now, delta)?;
12358 }
12359
12360 Ok(())
12361}
12362
12363#[derive(Clone, Copy)]
12364struct DailyStatsTarget<'a> {
12365 day_id: i64,
12366 agent_slug: &'a str,
12367 source_id: &'a str,
12368}
12369
12370fn franken_update_ensured_daily_stats_targets_in_tx(
12371 storage: &FrankenStorage,
12372 tx: &FrankenTransaction<'_>,
12373 targets: &[DailyStatsTarget<'_>; 4],
12374 now: i64,
12375 delta: StatsDelta,
12376) -> Result<bool> {
12377 let cache_keys = targets.map(|target| {
12378 EnsuredDailyStatsKey::new(target.day_id, target.agent_slug, target.source_id)
12379 });
12380 if !storage.daily_stats_keys_already_ensured(&cache_keys) {
12381 return Ok(false);
12382 }
12383
12384 let primary = targets[0];
12385 let rows_changed = tx.execute_compat(
12386 "UPDATE daily_stats
12387 SET session_count = session_count + ?4,
12388 message_count = message_count + ?5,
12389 total_chars = total_chars + ?6,
12390 last_updated = ?7
12391 WHERE day_id = ?1
12392 AND ((agent_slug = ?2 AND source_id = ?3)
12393 OR (agent_slug = 'all' AND source_id = ?3)
12394 OR (agent_slug = ?2 AND source_id = 'all')
12395 OR (agent_slug = 'all' AND source_id = 'all'))",
12396 fparams![
12397 primary.day_id,
12398 primary.agent_slug,
12399 primary.source_id,
12400 delta.session_count_delta,
12401 delta.message_count_delta,
12402 delta.total_chars_delta,
12403 now
12404 ],
12405 )?;
12406 if rows_changed == targets.len() {
12407 return Ok(true);
12408 }
12409
12410 for (target, cache_key) in targets.iter().copied().zip(cache_keys) {
12411 let exists = tx
12412 .query_row_map(
12413 "SELECT 1 FROM daily_stats
12414 WHERE day_id = ?1 AND agent_slug = ?2 AND source_id = ?3
12415 LIMIT 1",
12416 fparams![target.day_id, target.agent_slug, target.source_id],
12417 |row| row.get_typed::<i64>(0),
12418 )
12419 .optional()?
12420 .is_some();
12421 if exists {
12422 continue;
12423 }
12424
12425 tx.execute_compat(
12426 "INSERT INTO daily_stats(day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12427 VALUES(?1,?2,?3,?4,?5,?6,?7)",
12428 fparams![
12429 target.day_id,
12430 target.agent_slug,
12431 target.source_id,
12432 delta.session_count_delta,
12433 delta.message_count_delta,
12434 delta.total_chars_delta,
12435 now
12436 ],
12437 )?;
12438 storage.mark_daily_stats_key_ensured(cache_key);
12439 }
12440
12441 Ok(true)
12442}
12443
12444fn franken_apply_daily_stats_delta_in_tx(
12445 storage: &FrankenStorage,
12446 tx: &FrankenTransaction<'_>,
12447 target: DailyStatsTarget<'_>,
12448 now: i64,
12449 delta: StatsDelta,
12450) -> Result<()> {
12451 let cache_key = EnsuredDailyStatsKey::new(target.day_id, target.agent_slug, target.source_id);
12452 if storage.daily_stats_key_already_ensured(&cache_key) {
12453 let rows_changed = tx.execute_compat(
12454 "UPDATE daily_stats
12455 SET session_count = session_count + ?4,
12456 message_count = message_count + ?5,
12457 total_chars = total_chars + ?6,
12458 last_updated = ?7
12459 WHERE day_id = ?1 AND agent_slug = ?2 AND source_id = ?3",
12460 fparams![
12461 target.day_id,
12462 target.agent_slug,
12463 target.source_id,
12464 delta.session_count_delta,
12465 delta.message_count_delta,
12466 delta.total_chars_delta,
12467 now
12468 ],
12469 )?;
12470 if rows_changed > 0 {
12471 return Ok(());
12472 }
12473 }
12474
12475 tx.execute_compat(
12476 "INSERT INTO daily_stats(day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12477 VALUES(?1,?2,?3,?4,?5,?6,?7)
12478 ON CONFLICT(day_id, agent_slug, source_id) DO UPDATE SET
12479 session_count = session_count + excluded.session_count,
12480 message_count = message_count + excluded.message_count,
12481 total_chars = total_chars + excluded.total_chars,
12482 last_updated = excluded.last_updated",
12483 fparams![
12484 target.day_id,
12485 target.agent_slug,
12486 target.source_id,
12487 delta.session_count_delta,
12488 delta.message_count_delta,
12489 delta.total_chars_delta,
12490 now
12491 ],
12492 )?;
12493 storage.mark_daily_stats_key_ensured(cache_key);
12494 Ok(())
12495}
12496
12497fn franken_update_daily_stats_batched_in_tx(
12503 tx: &FrankenTransaction<'_>,
12504 entries: &[(i64, String, String, StatsDelta)],
12505) -> Result<usize> {
12506 if entries.is_empty() {
12507 return Ok(0);
12508 }
12509
12510 let now = FrankenStorage::now_millis();
12511 let mut total_affected = 0;
12512
12513 for (day_id, agent, source, delta) in entries {
12518 total_affected += tx.execute_compat(
12519 "INSERT INTO daily_stats (day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12520 VALUES(?1,?2,?3,?4,?5,?6,?7)
12521 ON CONFLICT(day_id, agent_slug, source_id) DO UPDATE SET
12522 session_count = session_count + excluded.session_count,
12523 message_count = message_count + excluded.message_count,
12524 total_chars = total_chars + excluded.total_chars,
12525 last_updated = excluded.last_updated",
12526 fparams![
12527 *day_id,
12528 agent.as_str(),
12529 source.as_str(),
12530 delta.session_count_delta,
12531 delta.message_count_delta,
12532 delta.total_chars_delta,
12533 now
12534 ],
12535 )?;
12536 }
12537
12538 Ok(total_affected)
12539}
12540
12541fn franken_insert_token_usage_batched_in_tx(
12547 tx: &FrankenTransaction<'_>,
12548 entries: &[TokenUsageEntry],
12549) -> Result<usize> {
12550 if entries.is_empty() {
12551 return Ok(0);
12552 }
12553
12554 let mut total_inserted = 0;
12555
12556 for e in entries {
12557 let params_vec: Vec<ParamValue> = vec![
12558 ParamValue::from(e.message_id),
12559 ParamValue::from(e.conversation_id),
12560 ParamValue::from(e.agent_id),
12561 ParamValue::from(e.workspace_id),
12562 ParamValue::from(e.source_id.clone()),
12563 ParamValue::from(e.timestamp_ms),
12564 ParamValue::from(e.day_id),
12565 ParamValue::from(e.model_name.clone()),
12566 ParamValue::from(e.model_family.clone()),
12567 ParamValue::from(e.model_tier.clone()),
12568 ParamValue::from(e.service_tier.clone()),
12569 ParamValue::from(e.provider.clone()),
12570 ParamValue::from(e.input_tokens),
12571 ParamValue::from(e.output_tokens),
12572 ParamValue::from(e.cache_read_tokens),
12573 ParamValue::from(e.cache_creation_tokens),
12574 ParamValue::from(e.thinking_tokens),
12575 ParamValue::from(e.total_tokens),
12576 ParamValue::from(e.estimated_cost_usd),
12577 ParamValue::from(e.role.clone()),
12578 ParamValue::from(e.content_chars),
12579 ParamValue::from(e.has_tool_calls as i64),
12580 ParamValue::from(e.tool_call_count as i64),
12581 ParamValue::from(e.data_source.clone()),
12582 ];
12583
12584 let values = param_slice_to_values(¶ms_vec);
12585 total_inserted += tx.execute_with_params(
12586 "INSERT OR IGNORE INTO token_usage (
12587 message_id, conversation_id, agent_id, workspace_id, source_id,
12588 timestamp_ms, day_id,
12589 model_name, model_family, model_tier, service_tier, provider,
12590 input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens,
12591 thinking_tokens, total_tokens, estimated_cost_usd,
12592 role, content_chars, has_tool_calls, tool_call_count, data_source
12593 )
12594 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22,?23,?24)",
12595 &values,
12596 )?;
12597 }
12598
12599 Ok(total_inserted)
12600}
12601
12602fn franken_update_token_daily_stats_batched_in_tx(
12604 tx: &FrankenTransaction<'_>,
12605 entries: &[(i64, String, String, String, TokenStatsDelta)],
12606) -> Result<usize> {
12607 if entries.is_empty() {
12608 return Ok(0);
12609 }
12610
12611 let now = FrankenStorage::now_millis();
12612 let mut total_affected = 0;
12613
12614 for (day_id, agent, source, model, delta) in entries {
12615 total_affected += tx.execute_compat(
12616 "INSERT INTO token_daily_stats (
12617 day_id, agent_slug, source_id, model_family,
12618 api_call_count, user_message_count, assistant_message_count, tool_message_count,
12619 total_input_tokens, total_output_tokens, total_cache_read_tokens,
12620 total_cache_creation_tokens, total_thinking_tokens, grand_total_tokens,
12621 total_content_chars, total_tool_calls, estimated_cost_usd, session_count,
12622 last_updated
12623 )
12624 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19)
12625 ON CONFLICT(day_id, agent_slug, source_id, model_family) DO UPDATE SET
12626 api_call_count = api_call_count + excluded.api_call_count,
12627 user_message_count = user_message_count + excluded.user_message_count,
12628 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
12629 tool_message_count = tool_message_count + excluded.tool_message_count,
12630 total_input_tokens = total_input_tokens + excluded.total_input_tokens,
12631 total_output_tokens = total_output_tokens + excluded.total_output_tokens,
12632 total_cache_read_tokens = total_cache_read_tokens + excluded.total_cache_read_tokens,
12633 total_cache_creation_tokens = total_cache_creation_tokens + excluded.total_cache_creation_tokens,
12634 total_thinking_tokens = total_thinking_tokens + excluded.total_thinking_tokens,
12635 grand_total_tokens = grand_total_tokens + excluded.grand_total_tokens,
12636 total_content_chars = total_content_chars + excluded.total_content_chars,
12637 total_tool_calls = total_tool_calls + excluded.total_tool_calls,
12638 estimated_cost_usd = estimated_cost_usd + excluded.estimated_cost_usd,
12639 session_count = session_count + excluded.session_count,
12640 last_updated = excluded.last_updated",
12641 fparams![
12642 *day_id,
12643 agent.as_str(),
12644 source.as_str(),
12645 model.as_str(),
12646 delta.api_call_count,
12647 delta.user_message_count,
12648 delta.assistant_message_count,
12649 delta.tool_message_count,
12650 delta.total_input_tokens,
12651 delta.total_output_tokens,
12652 delta.total_cache_read_tokens,
12653 delta.total_cache_creation_tokens,
12654 delta.total_thinking_tokens,
12655 delta.grand_total_tokens,
12656 delta.total_content_chars,
12657 delta.total_tool_calls,
12658 delta.estimated_cost_usd,
12659 delta.session_count,
12660 now
12661 ],
12662 )?;
12663 }
12664
12665 Ok(total_affected)
12666}
12667
12668fn franken_insert_message_metrics_batched_in_tx(
12674 tx: &FrankenTransaction<'_>,
12675 entries: &[MessageMetricsEntry],
12676) -> Result<usize> {
12677 if entries.is_empty() {
12678 return Ok(0);
12679 }
12680
12681 let mut total_inserted = 0;
12682
12683 for e in entries {
12684 let params_vec: Vec<ParamValue> = vec![
12685 ParamValue::from(e.message_id),
12686 ParamValue::from(e.created_at_ms),
12687 ParamValue::from(e.hour_id),
12688 ParamValue::from(e.day_id),
12689 ParamValue::from(e.agent_slug.clone()),
12690 ParamValue::from(e.workspace_id),
12691 ParamValue::from(e.source_id.clone()),
12692 ParamValue::from(e.role.clone()),
12693 ParamValue::from(e.content_chars),
12694 ParamValue::from(e.content_tokens_est),
12695 ParamValue::from(e.model_name.clone()),
12696 ParamValue::from(e.model_family.clone()),
12697 ParamValue::from(e.model_tier.clone()),
12698 ParamValue::from(e.provider.clone()),
12699 ParamValue::from(e.api_input_tokens),
12700 ParamValue::from(e.api_output_tokens),
12701 ParamValue::from(e.api_cache_read_tokens),
12702 ParamValue::from(e.api_cache_creation_tokens),
12703 ParamValue::from(e.api_thinking_tokens),
12704 ParamValue::from(e.api_service_tier.clone()),
12705 ParamValue::from(e.api_data_source.clone()),
12706 ParamValue::from(e.tool_call_count),
12707 ParamValue::from(e.has_tool_calls as i64),
12708 ParamValue::from(e.has_plan as i64),
12709 ];
12710
12711 let values = param_slice_to_values(¶ms_vec);
12712 total_inserted += tx.execute_with_params(
12713 "INSERT OR IGNORE INTO message_metrics (
12714 message_id, created_at_ms, hour_id, day_id,
12715 agent_slug, workspace_id, source_id, role,
12716 content_chars, content_tokens_est,
12717 model_name, model_family, model_tier, provider,
12718 api_input_tokens, api_output_tokens, api_cache_read_tokens,
12719 api_cache_creation_tokens, api_thinking_tokens,
12720 api_service_tier, api_data_source,
12721 tool_call_count, has_tool_calls, has_plan
12722 )
12723 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22,?23,?24)",
12724 &values,
12725 )?;
12726 }
12727
12728 Ok(total_inserted)
12729}
12730
12731fn franken_flush_rollup_table(
12733 tx: &FrankenTransaction<'_>,
12734 table: &str,
12735 bucket_col: &str,
12736 deltas: &HashMap<(i64, String, i64, String), UsageRollupDelta>,
12737 now: i64,
12738) -> Result<usize> {
12739 if deltas.is_empty() {
12740 return Ok(0);
12741 }
12742
12743 let mut total_affected = 0;
12744
12745 for ((bucket_id, agent, workspace_id, source), d) in deltas {
12746 let sql = format!(
12747 "INSERT INTO {table} (
12748 {bucket_col}, agent_slug, workspace_id, source_id,
12749 message_count, user_message_count, assistant_message_count,
12750 tool_call_count, plan_message_count, plan_content_tokens_est_total,
12751 plan_api_tokens_total, api_coverage_message_count,
12752 content_tokens_est_total, content_tokens_est_user, content_tokens_est_assistant,
12753 api_tokens_total, api_input_tokens_total, api_output_tokens_total,
12754 api_cache_read_tokens_total, api_cache_creation_tokens_total,
12755 api_thinking_tokens_total, last_updated
12756 )
12757 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22)
12758 ON CONFLICT({bucket_col}, agent_slug, workspace_id, source_id) DO UPDATE SET
12759 message_count = message_count + excluded.message_count,
12760 user_message_count = user_message_count + excluded.user_message_count,
12761 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
12762 tool_call_count = tool_call_count + excluded.tool_call_count,
12763 plan_message_count = plan_message_count + excluded.plan_message_count,
12764 plan_content_tokens_est_total = plan_content_tokens_est_total + excluded.plan_content_tokens_est_total,
12765 plan_api_tokens_total = plan_api_tokens_total + excluded.plan_api_tokens_total,
12766 api_coverage_message_count = api_coverage_message_count + excluded.api_coverage_message_count,
12767 content_tokens_est_total = content_tokens_est_total + excluded.content_tokens_est_total,
12768 content_tokens_est_user = content_tokens_est_user + excluded.content_tokens_est_user,
12769 content_tokens_est_assistant = content_tokens_est_assistant + excluded.content_tokens_est_assistant,
12770 api_tokens_total = api_tokens_total + excluded.api_tokens_total,
12771 api_input_tokens_total = api_input_tokens_total + excluded.api_input_tokens_total,
12772 api_output_tokens_total = api_output_tokens_total + excluded.api_output_tokens_total,
12773 api_cache_read_tokens_total = api_cache_read_tokens_total + excluded.api_cache_read_tokens_total,
12774 api_cache_creation_tokens_total = api_cache_creation_tokens_total + excluded.api_cache_creation_tokens_total,
12775 api_thinking_tokens_total = api_thinking_tokens_total + excluded.api_thinking_tokens_total,
12776 last_updated = excluded.last_updated"
12777 );
12778
12779 total_affected += tx.execute_compat(
12780 &sql,
12781 fparams![
12782 *bucket_id,
12783 agent.as_str(),
12784 *workspace_id,
12785 source.as_str(),
12786 d.message_count,
12787 d.user_message_count,
12788 d.assistant_message_count,
12789 d.tool_call_count,
12790 d.plan_message_count,
12791 d.plan_content_tokens_est_total,
12792 d.plan_api_tokens_total,
12793 d.api_coverage_message_count,
12794 d.content_tokens_est_total,
12795 d.content_tokens_est_user,
12796 d.content_tokens_est_assistant,
12797 d.api_tokens_total,
12798 d.api_input_tokens_total,
12799 d.api_output_tokens_total,
12800 d.api_cache_read_tokens_total,
12801 d.api_cache_creation_tokens_total,
12802 d.api_thinking_tokens_total,
12803 now
12804 ],
12805 )?;
12806 }
12807
12808 Ok(total_affected)
12809}
12810
12811fn franken_flush_model_daily_rollup_table(
12813 tx: &FrankenTransaction<'_>,
12814 deltas: &HashMap<(i64, String, i64, String, String, String), UsageRollupDelta>,
12815 now: i64,
12816) -> Result<usize> {
12817 if deltas.is_empty() {
12818 return Ok(0);
12819 }
12820
12821 let mut total_affected = 0;
12822
12823 for ((day_id, agent, workspace_id, source, model_family, model_tier), d) in deltas {
12824 total_affected += tx.execute_compat(
12825 "INSERT INTO usage_models_daily (
12826 day_id, agent_slug, workspace_id, source_id, model_family, model_tier,
12827 message_count, user_message_count, assistant_message_count,
12828 tool_call_count, plan_message_count, api_coverage_message_count,
12829 content_tokens_est_total, content_tokens_est_user, content_tokens_est_assistant,
12830 api_tokens_total, api_input_tokens_total, api_output_tokens_total,
12831 api_cache_read_tokens_total, api_cache_creation_tokens_total,
12832 api_thinking_tokens_total, last_updated
12833 )
12834 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22)
12835 ON CONFLICT(day_id, agent_slug, workspace_id, source_id, model_family, model_tier) DO UPDATE SET
12836 message_count = message_count + excluded.message_count,
12837 user_message_count = user_message_count + excluded.user_message_count,
12838 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
12839 tool_call_count = tool_call_count + excluded.tool_call_count,
12840 plan_message_count = plan_message_count + excluded.plan_message_count,
12841 api_coverage_message_count = api_coverage_message_count + excluded.api_coverage_message_count,
12842 content_tokens_est_total = content_tokens_est_total + excluded.content_tokens_est_total,
12843 content_tokens_est_user = content_tokens_est_user + excluded.content_tokens_est_user,
12844 content_tokens_est_assistant = content_tokens_est_assistant + excluded.content_tokens_est_assistant,
12845 api_tokens_total = api_tokens_total + excluded.api_tokens_total,
12846 api_input_tokens_total = api_input_tokens_total + excluded.api_input_tokens_total,
12847 api_output_tokens_total = api_output_tokens_total + excluded.api_output_tokens_total,
12848 api_cache_read_tokens_total = api_cache_read_tokens_total + excluded.api_cache_read_tokens_total,
12849 api_cache_creation_tokens_total = api_cache_creation_tokens_total + excluded.api_cache_creation_tokens_total,
12850 api_thinking_tokens_total = api_thinking_tokens_total + excluded.api_thinking_tokens_total,
12851 last_updated = excluded.last_updated",
12852 fparams![
12853 *day_id,
12854 agent.as_str(),
12855 *workspace_id,
12856 source.as_str(),
12857 model_family.as_str(),
12858 model_tier.as_str(),
12859 d.message_count,
12860 d.user_message_count,
12861 d.assistant_message_count,
12862 d.tool_call_count,
12863 d.plan_message_count,
12864 d.api_coverage_message_count,
12865 d.content_tokens_est_total,
12866 d.content_tokens_est_user,
12867 d.content_tokens_est_assistant,
12868 d.api_tokens_total,
12869 d.api_input_tokens_total,
12870 d.api_output_tokens_total,
12871 d.api_cache_read_tokens_total,
12872 d.api_cache_creation_tokens_total,
12873 d.api_thinking_tokens_total,
12874 now
12875 ],
12876 )?;
12877 }
12878
12879 Ok(total_affected)
12880}
12881
12882fn franken_flush_analytics_rollups_in_tx(
12884 tx: &FrankenTransaction<'_>,
12885 agg: &AnalyticsRollupAggregator,
12886) -> Result<(usize, usize, usize)> {
12887 let now = FrankenStorage::now_millis();
12888
12889 let hourly_affected =
12890 franken_flush_rollup_table(tx, "usage_hourly", "hour_id", &agg.hourly, now)?;
12891 let daily_affected = franken_flush_rollup_table(tx, "usage_daily", "day_id", &agg.daily, now)?;
12892 let models_daily_affected = franken_flush_model_daily_rollup_table(tx, &agg.models_daily, now)?;
12893
12894 Ok((hourly_affected, daily_affected, models_daily_affected))
12895}
12896
12897fn franken_update_conversation_token_summaries_in_tx(
12899 tx: &FrankenTransaction<'_>,
12900 conversation_id: i64,
12901) -> Result<()> {
12902 tx.execute_compat(
12903 "UPDATE conversations SET
12904 total_input_tokens = (SELECT SUM(input_tokens) FROM token_usage WHERE conversation_id = ?1),
12905 total_output_tokens = (SELECT SUM(output_tokens) FROM token_usage WHERE conversation_id = ?1),
12906 total_cache_read_tokens = (SELECT SUM(cache_read_tokens) FROM token_usage WHERE conversation_id = ?1),
12907 total_cache_creation_tokens = (SELECT SUM(cache_creation_tokens) FROM token_usage WHERE conversation_id = ?1),
12908 grand_total_tokens = (SELECT SUM(total_tokens) FROM token_usage WHERE conversation_id = ?1),
12909 estimated_cost_usd = (SELECT SUM(estimated_cost_usd) FROM token_usage WHERE conversation_id = ?1),
12910 primary_model = (SELECT model_name FROM token_usage WHERE conversation_id = ?1
12911 AND model_name IS NOT NULL
12912 GROUP BY model_name ORDER BY COUNT(*) DESC LIMIT 1),
12913 api_call_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
12914 AND data_source = 'api'),
12915 tool_call_count = (SELECT SUM(tool_call_count) FROM token_usage WHERE conversation_id = ?1),
12916 user_message_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
12917 AND role = 'user'),
12918 assistant_message_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
12919 AND role IN ('assistant', 'agent'))
12920 WHERE id = ?1",
12921 fparams![conversation_id],
12922 )?;
12923 Ok(())
12924}
12925
12926impl FrankenStorage {
12927 pub fn rebuild_token_daily_stats(&self) -> Result<usize> {
12929 const CONVERSATION_BATCH_SIZE: usize = 1_000;
12930 const TOKEN_USAGE_BATCH_SIZE: usize = 10_000;
12931
12932 let total_usage_rows: i64 =
12933 self.conn
12934 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
12935 row.get_typed(0)
12936 })?;
12937 tracing::info!(
12938 target: "cass::analytics",
12939 total_usage_rows,
12940 "token_daily_stats_rebuild_start"
12941 );
12942
12943 let mut tx = self.conn.transaction()?;
12944 tx.execute("DELETE FROM token_daily_stats")?;
12945
12946 let mut last_conversation_id = 0_i64;
12947 let mut rows_created = 0_usize;
12948
12949 loop {
12950 let conversation_rows = tx.query_map_collect(
12951 "SELECT c.id, c.started_at, c.source_id,
12952 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown')
12953 FROM conversations c
12954 WHERE c.id > ?1
12955 ORDER BY c.id
12956 LIMIT ?2",
12957 fparams![last_conversation_id, CONVERSATION_BATCH_SIZE as i64],
12958 |row| {
12959 Ok((
12960 row.get_typed::<i64>(0)?,
12961 row.get_typed::<Option<i64>>(1)?,
12962 row.get_typed::<String>(2)?,
12963 row.get_typed::<String>(3)?,
12964 ))
12965 },
12966 )?;
12967 if conversation_rows.is_empty() {
12968 break;
12969 }
12970
12971 let mut aggregate = TokenStatsAggregator::new();
12972
12973 for (conversation_id, started_at, source_id, agent_slug) in conversation_rows {
12974 last_conversation_id = conversation_id;
12975 let conversation_day_id = started_at.map(Self::day_id_from_millis).unwrap_or(0);
12976 let mut last_token_usage_id = 0_i64;
12977 let mut session_model_family = String::from("unknown");
12978
12979 loop {
12980 let usage_rows = tx.query_map_collect(
12981 "SELECT id, day_id, role,
12982 COALESCE(model_family, 'unknown'),
12983 input_tokens, output_tokens, cache_read_tokens,
12984 cache_creation_tokens, thinking_tokens,
12985 has_tool_calls, tool_call_count,
12986 content_chars, estimated_cost_usd
12987 FROM token_usage
12988 WHERE conversation_id = ?1
12989 AND id > ?2
12990 ORDER BY id
12991 LIMIT ?3",
12992 fparams![
12993 conversation_id,
12994 last_token_usage_id,
12995 TOKEN_USAGE_BATCH_SIZE as i64
12996 ],
12997 |row| {
12998 Ok((
12999 row.get_typed::<i64>(0)?,
13000 row.get_typed::<i64>(1)?,
13001 row.get_typed::<String>(2)?,
13002 row.get_typed::<String>(3)?,
13003 row.get_typed::<Option<i64>>(4)?,
13004 row.get_typed::<Option<i64>>(5)?,
13005 row.get_typed::<Option<i64>>(6)?,
13006 row.get_typed::<Option<i64>>(7)?,
13007 row.get_typed::<Option<i64>>(8)?,
13008 row.get_typed::<i64>(9)?,
13009 row.get_typed::<i64>(10)?,
13010 row.get_typed::<i64>(11)?,
13011 row.get_typed::<Option<f64>>(12)?,
13012 ))
13013 },
13014 )?;
13015 if usage_rows.is_empty() {
13016 break;
13017 }
13018
13019 for (
13020 token_usage_id,
13021 day_id,
13022 role,
13023 model_family,
13024 input_tokens,
13025 output_tokens,
13026 cache_read_tokens,
13027 cache_creation_tokens,
13028 thinking_tokens,
13029 has_tool_calls,
13030 tool_call_count,
13031 content_chars,
13032 estimated_cost_usd,
13033 ) in usage_rows
13034 {
13035 last_token_usage_id = token_usage_id;
13036 if model_family != "unknown" {
13037 session_model_family = model_family.clone();
13038 }
13039 let usage = crate::connectors::ExtractedTokenUsage {
13040 model_name: None,
13041 provider: None,
13042 input_tokens,
13043 output_tokens,
13044 cache_read_tokens,
13045 cache_creation_tokens,
13046 thinking_tokens,
13047 service_tier: None,
13048 has_tool_calls: has_tool_calls != 0,
13049 tool_call_count: u32::try_from(tool_call_count.max(0)).unwrap_or(0),
13050 data_source: franken_agent_detection::TokenDataSource::Api,
13051 };
13052 aggregate.record(
13053 &agent_slug,
13054 &source_id,
13055 day_id,
13056 &model_family,
13057 &role,
13058 &usage,
13059 content_chars,
13060 estimated_cost_usd.unwrap_or(0.0),
13061 );
13062 }
13063 }
13064
13065 aggregate.record_session(
13066 &agent_slug,
13067 &source_id,
13068 conversation_day_id,
13069 &session_model_family,
13070 );
13071 }
13072
13073 let entries = aggregate.expand();
13074 rows_created = rows_created.saturating_add(entries.len());
13075 franken_update_token_daily_stats_batched_in_tx(&tx, &entries)?;
13076 }
13077
13078 tx.commit()?;
13079
13080 tracing::info!(
13081 target: "cass::analytics",
13082 rows_created,
13083 "token_daily_stats_rebuild_complete"
13084 );
13085
13086 Ok(rows_created)
13087 }
13088
13089 pub fn rebuild_analytics(&self) -> Result<AnalyticsRebuildResult> {
13092 let start = Instant::now();
13093
13094 let total_messages: i64 =
13095 self.conn
13096 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
13097 row.get_typed(0)
13098 })?;
13099 tracing::info!(
13100 target: "cass::analytics",
13101 total_messages,
13102 "analytics_rebuild_start"
13103 );
13104
13105 let mut tx = self.conn.transaction()?;
13106
13107 tx.execute("DELETE FROM message_metrics")?;
13108 tx.execute("DELETE FROM usage_hourly")?;
13109 tx.execute("DELETE FROM usage_daily")?;
13110 tx.execute("DELETE FROM usage_models_daily")?;
13111
13112 const CHUNK_SIZE: i64 = 10_000;
13113 let mut offset: i64 = 0;
13114 let mut total_inserted: usize = 0;
13115 let mut usage_hourly_rows: usize = 0;
13116 let mut usage_daily_rows: usize = 0;
13117 let mut usage_models_daily_rows: usize = 0;
13118
13119 loop {
13120 #[allow(clippy::type_complexity)]
13121 let rows: Vec<(
13122 i64,
13123 String,
13124 String,
13125 Option<serde_json::Value>,
13126 Option<i64>,
13127 Option<i64>,
13128 String,
13129 Option<i64>,
13130 String,
13131 )> = tx.query_map_collect(
13132 "SELECT m.id, m.idx, m.role, m.content, m.extra_json, m.extra_bin,
13138 m.created_at,
13139 c.id AS conv_id, c.started_at AS conv_started_at,
13140 c.source_id, c.workspace_id,
13141 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown') AS agent_slug
13142 FROM messages m
13143 JOIN conversations c ON m.conversation_id = c.id
13144 ORDER BY m.id
13145 LIMIT ?1 OFFSET ?2",
13146 fparams![CHUNK_SIZE, offset],
13147 |row| {
13148 let msg_id: i64 = row.get_typed(0)?;
13149 let role: String = row.get_typed(2)?;
13150 let content: String = row.get_typed(3)?;
13151 let extra_json = row
13152 .get_typed::<Option<String>>(4)?
13153 .and_then(|s| serde_json::from_str(&s).ok())
13154 .or_else(|| {
13155 row.get_typed::<Option<Vec<u8>>>(5)
13156 .ok()
13157 .flatten()
13158 .and_then(|b| rmp_serde::from_slice(&b).ok())
13159 });
13160 let msg_ts: Option<i64> = row.get_typed(6)?;
13161 let conv_started_at: Option<i64> = row.get_typed(8)?;
13162 let source_id: String = row.get_typed(9)?;
13163 let workspace_id: Option<i64> = row.get_typed(10)?;
13164 let agent_slug: String = row.get_typed(11)?;
13165 let effective_ts = msg_ts.or(conv_started_at).unwrap_or(0);
13166
13167 Ok((
13168 msg_id,
13169 role,
13170 content,
13171 extra_json,
13172 Some(effective_ts),
13173 workspace_id,
13174 source_id,
13175 conv_started_at,
13176 agent_slug,
13177 ))
13178 },
13179 )?;
13180
13181 if rows.is_empty() {
13182 break;
13183 }
13184
13185 let chunk_len = rows.len();
13186 let mut entries = Vec::with_capacity(chunk_len);
13187 let mut rollup_agg = AnalyticsRollupAggregator::new();
13188
13189 for (
13190 msg_id,
13191 role,
13192 content,
13193 extra_json,
13194 effective_ts,
13195 workspace_id,
13196 source_id,
13197 _conv_started_at,
13198 agent_slug,
13199 ) in &rows
13200 {
13201 let ts = effective_ts.unwrap_or(0);
13202 let day_id = Self::day_id_from_millis(ts);
13203 let hour_id = Self::hour_id_from_millis(ts);
13204 let content_chars = content.len() as i64;
13205 let content_tokens_est = content_chars / 4;
13206 let extra = extra_json
13207 .as_ref()
13208 .cloned()
13209 .unwrap_or(serde_json::Value::Null);
13210 let usage =
13211 crate::connectors::extract_tokens_for_agent(agent_slug, &extra, content, role);
13212 let model_info = usage
13213 .model_name
13214 .as_deref()
13215 .map(crate::connectors::normalize_model);
13216 let model_family = model_info
13217 .as_ref()
13218 .map(|i| i.family.clone())
13219 .unwrap_or_else(|| "unknown".into());
13220 let model_tier = model_info
13221 .as_ref()
13222 .map(|i| i.tier.clone())
13223 .unwrap_or_else(|| "unknown".into());
13224 let provider = usage
13225 .provider
13226 .clone()
13227 .or_else(|| model_info.as_ref().map(|i| i.provider.clone()))
13228 .unwrap_or_else(|| "unknown".into());
13229
13230 let entry = MessageMetricsEntry {
13231 message_id: *msg_id,
13232 created_at_ms: ts,
13233 hour_id,
13234 day_id,
13235 agent_slug: agent_slug.clone(),
13236 workspace_id: workspace_id.unwrap_or(0),
13237 source_id: source_id.clone(),
13238 role: role.clone(),
13239 content_chars,
13240 content_tokens_est,
13241 model_name: usage.model_name.clone(),
13242 model_family,
13243 model_tier,
13244 provider,
13245 api_input_tokens: usage.input_tokens,
13246 api_output_tokens: usage.output_tokens,
13247 api_cache_read_tokens: usage.cache_read_tokens,
13248 api_cache_creation_tokens: usage.cache_creation_tokens,
13249 api_thinking_tokens: usage.thinking_tokens,
13250 api_service_tier: usage.service_tier,
13251 api_data_source: usage.data_source.as_str().to_string(),
13252 tool_call_count: usage.tool_call_count as i64,
13253 has_tool_calls: usage.has_tool_calls,
13254 has_plan: has_plan_for_role(role, content),
13255 };
13256 rollup_agg.record(&entry);
13257 entries.push(entry);
13258 }
13259
13260 total_inserted += franken_insert_message_metrics_batched_in_tx(&tx, &entries)?;
13261 let (hourly, daily, models_daily) =
13262 franken_flush_analytics_rollups_in_tx(&tx, &rollup_agg)?;
13263 usage_hourly_rows += hourly;
13264 usage_daily_rows += daily;
13265 usage_models_daily_rows += models_daily;
13266 offset += chunk_len as i64;
13267
13268 tracing::debug!(
13269 target: "cass::analytics",
13270 offset,
13271 chunk = chunk_len,
13272 inserted = entries.len(),
13273 total = total_inserted,
13274 "analytics_rebuild_chunk"
13275 );
13276
13277 if (chunk_len as i64) < CHUNK_SIZE {
13278 break;
13279 }
13280 }
13281
13282 tx.commit()?;
13283
13284 let elapsed = start.elapsed();
13285 let elapsed_ms = elapsed.as_millis() as u64;
13286 let msgs_per_sec = if elapsed_ms > 0 {
13287 (total_inserted as f64) / (elapsed_ms as f64 / 1000.0)
13288 } else {
13289 0.0
13290 };
13291
13292 tracing::info!(
13293 target: "cass::analytics",
13294 message_metrics_rows = total_inserted,
13295 usage_hourly_rows,
13296 usage_daily_rows,
13297 usage_models_daily_rows,
13298 elapsed_ms,
13299 messages_per_sec = format!("{:.0}", msgs_per_sec),
13300 "analytics_rebuild_complete"
13301 );
13302
13303 Ok(AnalyticsRebuildResult {
13304 message_metrics_rows: total_inserted,
13305 usage_hourly_rows,
13306 usage_daily_rows,
13307 usage_models_daily_rows,
13308 elapsed_ms,
13309 messages_per_sec: msgs_per_sec,
13310 })
13311 }
13312
13313 pub fn rebuild_daily_stats(&self) -> Result<DailyStatsRebuildResult> {
13315 const DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE: usize = 1_000;
13316 const DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE: usize = 10_000;
13317
13318 let mut conversation_batch_size = rebuild_batch_size_env(
13319 "CASS_DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE",
13320 DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE,
13321 );
13322 let mut message_batch_size = rebuild_batch_size_env(
13323 "CASS_DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE",
13324 DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE,
13325 );
13326
13327 let total_messages: i64 =
13328 self.conn
13329 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
13330 row.get_typed(0)
13331 })?;
13332 let message_metrics_rows: i64 =
13333 self.conn
13334 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
13335 row.get_typed(0)
13336 })?;
13337 let use_message_metrics = total_messages > 0 && total_messages == message_metrics_rows;
13338
13339 tracing::info!(
13340 target: "cass::perf::daily_stats",
13341 total_messages,
13342 message_metrics_rows,
13343 use_message_metrics,
13344 "daily_stats rebuild selected message source"
13345 );
13346
13347 let mut tx = self.conn.transaction()?;
13348 tx.execute("DELETE FROM daily_stats")?;
13349
13350 let mut last_conversation_id = 0_i64;
13351 let mut conversation_batch_count = 0_usize;
13352 let mut conversations_processed = 0_usize;
13353 let mut messages_processed = 0_usize;
13354 let mut message_batch_count = 0_usize;
13355 let mut raw_entries_flushed = 0_usize;
13356 let mut expanded_entries_flushed = 0_usize;
13357 let message_scan_sql = if use_message_metrics {
13358 "SELECT m.idx, mm.content_chars
13359 FROM messages m
13360 JOIN message_metrics mm ON mm.message_id = m.id
13361 WHERE m.conversation_id = ?1
13362 AND m.idx > ?2
13363 ORDER BY m.conversation_id, m.idx
13364 LIMIT ?3"
13365 } else {
13366 "SELECT m.idx, COALESCE(LENGTH(CAST(m.content AS BLOB)), 0)
13367 FROM messages m
13368 WHERE m.conversation_id = ?1
13369 AND m.idx > ?2
13370 ORDER BY m.conversation_id, m.idx
13371 LIMIT ?3"
13372 };
13373
13374 loop {
13375 let conversation_rows = match self.conn.query_with_params(
13381 "SELECT c.id, c.started_at,
13382 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown'),
13383 c.source_id
13384 FROM conversations c
13385 WHERE c.id > ?1
13386 ORDER BY c.id
13387 LIMIT ?2",
13388 ¶ms_from_iter([
13389 ParamValue::from(last_conversation_id),
13390 ParamValue::from(conversation_batch_size as i64),
13391 ]),
13392 ) {
13393 Ok(rows) => rows,
13394 Err(err) if is_out_of_memory_error(&err) && conversation_batch_size > 1 => {
13395 let previous_batch_size = conversation_batch_size;
13396 conversation_batch_size = (conversation_batch_size / 2).max(1);
13397 tracing::warn!(
13398 previous_batch_size,
13399 conversation_batch_size,
13400 last_conversation_id,
13401 "daily_stats conversation scan ran out of memory; retrying with smaller batch"
13402 );
13403 continue;
13404 }
13405 Err(err) => return Err(err.into()),
13406 };
13407 if conversation_rows.is_empty() {
13408 break;
13409 }
13410
13411 let mut aggregate = StatsAggregator::new();
13412 let mut conversation_batch_meta: Vec<(i64, i64, String, String)> =
13413 Vec::with_capacity(conversation_rows.len());
13414 for row in &conversation_rows {
13415 let conversation_id: i64 = row.get_typed(0)?;
13416 let started_at: Option<i64> = row.get_typed(1)?;
13417 let agent_slug: String = row.get_typed(2)?;
13418 let source_id: String = row.get_typed(3)?;
13419 last_conversation_id = conversation_id;
13420 let day_id = started_at.map(Self::day_id_from_millis).unwrap_or(0);
13421 aggregate.record_delta(&agent_slug, &source_id, day_id, 1, 0, 0);
13422 conversation_batch_meta.push((conversation_id, day_id, agent_slug, source_id));
13423 conversations_processed += 1;
13424 }
13425
13426 conversation_batch_count += 1;
13427 raw_entries_flushed += aggregate.raw_entry_count();
13428 let entries = aggregate.expand();
13429 expanded_entries_flushed += entries.len();
13430 if !entries.is_empty() {
13431 franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
13432 }
13433 if conversation_batch_count.is_multiple_of(25) {
13434 tracing::info!(
13435 target: "cass::perf::daily_stats",
13436 conversations_processed,
13437 batches = conversation_batch_count,
13438 batch_size = conversation_batch_size,
13439 last_conversation_id,
13440 "daily_stats rebuild conversation scan progress"
13441 );
13442 }
13443 if conversation_batch_meta.is_empty() {
13444 continue;
13445 }
13446
13447 for (conversation_id, day_id, agent_slug, source_id) in conversation_batch_meta {
13448 let mut cursor_message_idx = -1_i64;
13449 loop {
13450 let message_rows = match self.conn.query_with_params(
13451 message_scan_sql,
13452 ¶ms_from_iter([
13453 ParamValue::from(conversation_id),
13454 ParamValue::from(cursor_message_idx),
13455 ParamValue::from(message_batch_size as i64),
13456 ]),
13457 ) {
13458 Ok(rows) => rows,
13459 Err(err) if is_out_of_memory_error(&err) && message_batch_size > 1 => {
13460 let previous_batch_size = message_batch_size;
13461 message_batch_size = (message_batch_size / 2).max(1);
13462 tracing::warn!(
13463 previous_batch_size,
13464 message_batch_size,
13465 conversation_id,
13466 cursor_message_idx,
13467 "daily_stats message scan ran out of memory; retrying with smaller batch"
13468 );
13469 continue;
13470 }
13471 Err(err) => return Err(err.into()),
13472 };
13473 if message_rows.is_empty() {
13474 break;
13475 }
13476
13477 let mut aggregate = StatsAggregator::new();
13478 for row in &message_rows {
13479 let message_idx: i64 = row.get_typed(0)?;
13480 let content_len: i64 = row.get_typed(1)?;
13481 cursor_message_idx = message_idx;
13482 aggregate.record_delta(&agent_slug, &source_id, day_id, 0, 1, content_len);
13483 messages_processed += 1;
13484 }
13485
13486 message_batch_count += 1;
13487 raw_entries_flushed += aggregate.raw_entry_count();
13488 let entries = aggregate.expand();
13489 expanded_entries_flushed += entries.len();
13490 if !entries.is_empty() {
13491 franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
13492 }
13493 if message_batch_count.is_multiple_of(50) {
13494 tracing::info!(
13495 target: "cass::perf::daily_stats",
13496 messages_processed,
13497 batches = message_batch_count,
13498 batch_size = message_batch_size,
13499 source = if use_message_metrics {
13500 "message_metrics"
13501 } else {
13502 "messages"
13503 },
13504 conversation_id,
13505 cursor_message_idx,
13506 "daily_stats rebuild message scan progress"
13507 );
13508 }
13509 }
13510 }
13511 }
13512
13513 let rows_created: i64 =
13514 tx.query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
13515 row.get_typed(0)
13516 })?;
13517 let total_sessions: i64 = tx.query_row_map(
13518 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
13519 fparams![],
13520 |row| row.get_typed(0),
13521 )?;
13522
13523 tx.commit()?;
13524
13525 tracing::info!(
13526 target: "cass::perf::daily_stats",
13527 rows_created,
13528 total_sessions,
13529 conversations_processed,
13530 conversation_batches = conversation_batch_count,
13531 conversation_batch_size,
13532 message_batches = message_batch_count,
13533 message_batch_size,
13534 messages_processed,
13535 use_message_metrics,
13536 raw_entries_flushed,
13537 expanded_entries_flushed,
13538 "Daily stats rebuilt from conversations"
13539 );
13540
13541 Ok(DailyStatsRebuildResult {
13542 rows_created,
13543 total_sessions,
13544 })
13545 }
13546}
13547
13548#[derive(Debug, Default)]
13575pub struct IndexingCache {
13576 agent_ids: HashMap<String, i64>,
13577 workspace_ids: HashMap<PathBuf, i64>,
13578 hits: u64,
13579 misses: u64,
13580}
13581
13582pub trait IndexingCacheStorage {
13583 fn ensure_indexing_agent(&self, agent: &Agent) -> Result<i64>;
13584 fn ensure_indexing_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64>;
13585}
13586
13587impl IndexingCacheStorage for FrankenStorage {
13588 fn ensure_indexing_agent(&self, agent: &Agent) -> Result<i64> {
13589 self.ensure_agent(agent)
13590 }
13591
13592 fn ensure_indexing_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64> {
13593 self.ensure_workspace(path, display_name)
13594 }
13595}
13596
13597impl IndexingCache {
13600 pub fn new() -> Self {
13602 Self {
13603 agent_ids: HashMap::new(),
13604 workspace_ids: HashMap::new(),
13605 hits: 0,
13606 misses: 0,
13607 }
13608 }
13609
13610 pub fn is_enabled() -> bool {
13613 dotenvy::var("CASS_SQLITE_CACHE")
13614 .map(|v| v != "0" && v.to_lowercase() != "false")
13615 .unwrap_or(true)
13616 }
13617
13618 pub fn get_or_insert_agent<S>(&mut self, storage: &S, agent: &Agent) -> Result<i64>
13623 where
13624 S: IndexingCacheStorage + ?Sized,
13625 {
13626 if let Some(&cached) = self.agent_ids.get(&agent.slug) {
13627 self.hits += 1;
13628 return Ok(cached);
13629 }
13630
13631 self.misses += 1;
13632 let id = storage.ensure_indexing_agent(agent)?;
13633 self.agent_ids.insert(agent.slug.clone(), id);
13634 Ok(id)
13635 }
13636
13637 pub fn get_or_insert_workspace(
13642 &mut self,
13643 storage: &(impl IndexingCacheStorage + ?Sized),
13644 path: &Path,
13645 display_name: Option<&str>,
13646 ) -> Result<i64> {
13647 if let Some(&cached) = self.workspace_ids.get(path) {
13648 self.hits += 1;
13649 return Ok(cached);
13650 }
13651
13652 self.misses += 1;
13653 let id = storage.ensure_indexing_workspace(path, display_name)?;
13654 self.workspace_ids.insert(path.to_path_buf(), id);
13655 Ok(id)
13656 }
13657
13658 pub fn stats(&self) -> (u64, u64, f64) {
13660 let total = self.hits + self.misses;
13661 let hit_rate = if total > 0 {
13662 self.hits as f64 / total as f64
13663 } else {
13664 0.0
13665 };
13666 (self.hits, self.misses, hit_rate)
13667 }
13668
13669 pub fn clear(&mut self) {
13671 self.agent_ids.clear();
13672 self.workspace_ids.clear();
13673 self.hits = 0;
13674 self.misses = 0;
13675 }
13676
13677 pub fn agent_count(&self) -> usize {
13679 self.agent_ids.len()
13680 }
13681
13682 pub fn workspace_count(&self) -> usize {
13684 self.workspace_ids.len()
13685 }
13686}
13687
13688#[derive(Clone, Copy, Debug, Default)]
13697pub struct StatsDelta {
13698 pub session_count_delta: i64,
13699 pub message_count_delta: i64,
13700 pub total_chars_delta: i64,
13701}
13702
13703#[derive(Debug, Default)]
13719pub struct StatsAggregator {
13720 deltas: HashMap<(i64, String, String), StatsDelta>,
13723}
13724
13725impl StatsAggregator {
13726 pub fn new() -> Self {
13728 Self {
13729 deltas: HashMap::new(),
13730 }
13731 }
13732
13733 pub fn record(
13744 &mut self,
13745 agent_slug: &str,
13746 source_id: &str,
13747 day_id: i64,
13748 message_count: i64,
13749 total_chars: i64,
13750 ) {
13751 self.record_delta(agent_slug, source_id, day_id, 1, message_count, total_chars);
13752 }
13753
13754 pub fn record_delta(
13757 &mut self,
13758 agent_slug: &str,
13759 source_id: &str,
13760 day_id: i64,
13761 session_count_delta: i64,
13762 message_count_delta: i64,
13763 total_chars_delta: i64,
13764 ) {
13765 if session_count_delta == 0 && message_count_delta == 0 && total_chars_delta == 0 {
13766 return;
13767 }
13768 let key = (day_id, agent_slug.to_owned(), source_id.to_owned());
13769 let delta = self.deltas.entry(key).or_default();
13770 delta.session_count_delta += session_count_delta;
13771 delta.message_count_delta += message_count_delta;
13772 delta.total_chars_delta += total_chars_delta;
13773 }
13774
13775 pub fn expand(&self) -> Vec<(i64, String, String, StatsDelta)> {
13783 let mut expanded: HashMap<(i64, String, String), StatsDelta> = HashMap::new();
13784
13785 for ((day_id, agent, source), delta) in &self.deltas {
13786 let permutations = [
13787 (agent.as_str(), source.as_str()),
13788 ("all", source.as_str()),
13789 (agent.as_str(), "all"),
13790 ("all", "all"),
13791 ];
13792
13793 for idx in 0..permutations.len() {
13795 let (a, s) = permutations[idx];
13796 if permutations[..idx].contains(&(a, s)) {
13797 continue;
13798 }
13799 let key = (*day_id, a.to_owned(), s.to_owned());
13800 let entry = expanded.entry(key).or_default();
13801 entry.session_count_delta += delta.session_count_delta;
13802 entry.message_count_delta += delta.message_count_delta;
13803 entry.total_chars_delta += delta.total_chars_delta;
13804 }
13805 }
13806
13807 let mut out: Vec<(i64, String, String, StatsDelta)> = expanded
13808 .into_iter()
13809 .map(|((d, a, s), delta)| (d, a, s, delta))
13810 .collect();
13811 out.sort_by(|(d1, a1, s1, _), (d2, a2, s2, _)| {
13812 d1.cmp(d2).then_with(|| a1.cmp(a2)).then_with(|| s1.cmp(s2))
13813 });
13814 out
13815 }
13816
13817 pub fn is_empty(&self) -> bool {
13819 self.deltas.is_empty()
13820 }
13821
13822 pub fn raw_entry_count(&self) -> usize {
13824 self.deltas.len()
13825 }
13826}
13827
13828#[derive(Clone, Debug, Default)]
13837pub struct TokenStatsDelta {
13838 pub api_call_count: i64,
13839 pub user_message_count: i64,
13840 pub assistant_message_count: i64,
13841 pub tool_message_count: i64,
13842 pub total_input_tokens: i64,
13843 pub total_output_tokens: i64,
13844 pub total_cache_read_tokens: i64,
13845 pub total_cache_creation_tokens: i64,
13846 pub total_thinking_tokens: i64,
13847 pub grand_total_tokens: i64,
13848 pub total_content_chars: i64,
13849 pub total_tool_calls: i64,
13850 pub estimated_cost_usd: f64,
13851 pub session_count: i64,
13852}
13853
13854#[derive(Debug, Default)]
13860pub struct TokenStatsAggregator {
13861 deltas: HashMap<(i64, String, String, String), TokenStatsDelta>,
13863}
13864
13865impl TokenStatsAggregator {
13866 pub fn new() -> Self {
13867 Self {
13868 deltas: HashMap::new(),
13869 }
13870 }
13871
13872 #[allow(clippy::too_many_arguments)]
13874 pub fn record(
13875 &mut self,
13876 agent_slug: &str,
13877 source_id: &str,
13878 day_id: i64,
13879 model_family: &str,
13880 role: &str,
13881 usage: &crate::connectors::ExtractedTokenUsage,
13882 content_chars: i64,
13883 estimated_cost_usd: f64,
13884 ) {
13885 let key = (
13886 day_id,
13887 agent_slug.to_owned(),
13888 source_id.to_owned(),
13889 model_family.to_owned(),
13890 );
13891 let delta = self.deltas.entry(key).or_default();
13892
13893 delta.api_call_count += 1;
13894 match role {
13895 "user" => delta.user_message_count += 1,
13896 "assistant" | "agent" => delta.assistant_message_count += 1,
13897 "tool" => delta.tool_message_count += 1,
13898 _ => {}
13899 }
13900
13901 delta.total_input_tokens += usage.input_tokens.unwrap_or(0);
13902 delta.total_output_tokens += usage.output_tokens.unwrap_or(0);
13903 delta.total_cache_read_tokens += usage.cache_read_tokens.unwrap_or(0);
13904 delta.total_cache_creation_tokens += usage.cache_creation_tokens.unwrap_or(0);
13905 delta.total_thinking_tokens += usage.thinking_tokens.unwrap_or(0);
13906 delta.grand_total_tokens += usage.total_tokens().unwrap_or(0);
13907 delta.total_content_chars += content_chars;
13908 delta.total_tool_calls += usage.tool_call_count as i64;
13909 delta.estimated_cost_usd += estimated_cost_usd;
13910 }
13911
13912 pub fn record_session(
13914 &mut self,
13915 agent_slug: &str,
13916 source_id: &str,
13917 day_id: i64,
13918 model_family: &str,
13919 ) {
13920 let key = (
13921 day_id,
13922 agent_slug.to_owned(),
13923 source_id.to_owned(),
13924 model_family.to_owned(),
13925 );
13926 self.deltas.entry(key).or_default().session_count += 1;
13927 }
13928
13929 pub fn expand(&self) -> Vec<(i64, String, String, String, TokenStatsDelta)> {
13936 let mut expanded: HashMap<(i64, String, String, String), TokenStatsDelta> = HashMap::new();
13937
13938 for ((day_id, agent, source, model), delta) in &self.deltas {
13939 let permutations = [
13940 (agent.as_str(), source.as_str(), model.as_str()),
13941 ("all", source.as_str(), model.as_str()),
13942 (agent.as_str(), "all", model.as_str()),
13943 (agent.as_str(), source.as_str(), "all"),
13944 ("all", "all", "all"),
13945 ];
13946
13947 for idx in 0..permutations.len() {
13948 let (a, s, m) = permutations[idx];
13949 if permutations[..idx].contains(&(a, s, m)) {
13951 continue;
13952 }
13953 let key = (*day_id, a.to_owned(), s.to_owned(), m.to_owned());
13954 let entry = expanded.entry(key).or_default();
13955 entry.api_call_count += delta.api_call_count;
13956 entry.user_message_count += delta.user_message_count;
13957 entry.assistant_message_count += delta.assistant_message_count;
13958 entry.tool_message_count += delta.tool_message_count;
13959 entry.total_input_tokens += delta.total_input_tokens;
13960 entry.total_output_tokens += delta.total_output_tokens;
13961 entry.total_cache_read_tokens += delta.total_cache_read_tokens;
13962 entry.total_cache_creation_tokens += delta.total_cache_creation_tokens;
13963 entry.total_thinking_tokens += delta.total_thinking_tokens;
13964 entry.grand_total_tokens += delta.grand_total_tokens;
13965 entry.total_content_chars += delta.total_content_chars;
13966 entry.total_tool_calls += delta.total_tool_calls;
13967 entry.estimated_cost_usd += delta.estimated_cost_usd;
13968 entry.session_count += delta.session_count;
13969 }
13970 }
13971
13972 let mut out: Vec<(i64, String, String, String, TokenStatsDelta)> = expanded
13973 .into_iter()
13974 .map(|((d, a, s, m), delta)| (d, a, s, m, delta))
13975 .collect();
13976 out.sort_by(|(d1, a1, s1, m1, _), (d2, a2, s2, m2, _)| {
13977 d1.cmp(d2)
13978 .then_with(|| a1.cmp(a2))
13979 .then_with(|| s1.cmp(s2))
13980 .then_with(|| m1.cmp(m2))
13981 });
13982 out
13983 }
13984
13985 pub fn is_empty(&self) -> bool {
13986 self.deltas.is_empty()
13987 }
13988
13989 pub fn raw_entry_count(&self) -> usize {
13990 self.deltas.len()
13991 }
13992}
13993
13994#[derive(Clone, Debug, Default)]
14002pub struct UsageRollupDelta {
14003 pub message_count: i64,
14004 pub user_message_count: i64,
14005 pub assistant_message_count: i64,
14006 pub tool_call_count: i64,
14007 pub plan_message_count: i64,
14008 pub plan_content_tokens_est_total: i64,
14009 pub plan_api_tokens_total: i64,
14010 pub api_coverage_message_count: i64,
14011 pub content_tokens_est_total: i64,
14012 pub content_tokens_est_user: i64,
14013 pub content_tokens_est_assistant: i64,
14014 pub api_tokens_total: i64,
14015 pub api_input_tokens_total: i64,
14016 pub api_output_tokens_total: i64,
14017 pub api_cache_read_tokens_total: i64,
14018 pub api_cache_creation_tokens_total: i64,
14019 pub api_thinking_tokens_total: i64,
14020}
14021
14022#[derive(Debug, Clone)]
14024pub struct MessageMetricsEntry {
14025 pub message_id: i64,
14026 pub created_at_ms: i64,
14027 pub hour_id: i64,
14028 pub day_id: i64,
14029 pub agent_slug: String,
14030 pub workspace_id: i64,
14031 pub source_id: String,
14032 pub role: String,
14033 pub content_chars: i64,
14034 pub content_tokens_est: i64,
14035 pub model_name: Option<String>,
14036 pub model_family: String,
14037 pub model_tier: String,
14038 pub provider: String,
14039 pub api_input_tokens: Option<i64>,
14040 pub api_output_tokens: Option<i64>,
14041 pub api_cache_read_tokens: Option<i64>,
14042 pub api_cache_creation_tokens: Option<i64>,
14043 pub api_thinking_tokens: Option<i64>,
14044 pub api_service_tier: Option<String>,
14045 pub api_data_source: String,
14046 pub tool_call_count: i64,
14047 pub has_tool_calls: bool,
14048 pub has_plan: bool,
14049}
14050
14051#[derive(Debug, Default)]
14056pub struct AnalyticsRollupAggregator {
14057 hourly: HashMap<(i64, String, i64, String), UsageRollupDelta>,
14058 daily: HashMap<(i64, String, i64, String), UsageRollupDelta>,
14059 models_daily: HashMap<(i64, String, i64, String, String, String), UsageRollupDelta>,
14060}
14061
14062impl AnalyticsRollupAggregator {
14063 pub fn new() -> Self {
14064 Self::default()
14065 }
14066
14067 pub fn record(&mut self, entry: &MessageMetricsEntry) {
14069 let content_est = entry.content_tokens_est;
14070 let api_total = entry.api_input_tokens.unwrap_or(0)
14071 + entry.api_output_tokens.unwrap_or(0)
14072 + entry.api_cache_read_tokens.unwrap_or(0)
14073 + entry.api_cache_creation_tokens.unwrap_or(0)
14074 + entry.api_thinking_tokens.unwrap_or(0);
14075 let is_api = entry.api_data_source == "api";
14076 let is_user = entry.role == "user";
14077 let is_assistant = entry.role == "assistant" || entry.role == "agent";
14078
14079 for (map, bucket_id) in [
14081 (&mut self.hourly, entry.hour_id),
14082 (&mut self.daily, entry.day_id),
14083 ] {
14084 let key = (
14085 bucket_id,
14086 entry.agent_slug.clone(),
14087 entry.workspace_id,
14088 entry.source_id.clone(),
14089 );
14090 let d = map.entry(key).or_default();
14091 d.message_count += 1;
14092 if is_user {
14093 d.user_message_count += 1;
14094 d.content_tokens_est_user += content_est;
14095 }
14096 if is_assistant {
14097 d.assistant_message_count += 1;
14098 d.content_tokens_est_assistant += content_est;
14099 }
14100 d.tool_call_count += entry.tool_call_count;
14101 if entry.has_plan {
14102 d.plan_message_count += 1;
14103 d.plan_content_tokens_est_total += content_est;
14104 if is_api {
14105 d.plan_api_tokens_total += api_total;
14106 }
14107 }
14108 if is_api {
14109 d.api_coverage_message_count += 1;
14110 d.api_tokens_total += api_total;
14111 d.api_input_tokens_total += entry.api_input_tokens.unwrap_or(0);
14112 d.api_output_tokens_total += entry.api_output_tokens.unwrap_or(0);
14113 d.api_cache_read_tokens_total += entry.api_cache_read_tokens.unwrap_or(0);
14114 d.api_cache_creation_tokens_total += entry.api_cache_creation_tokens.unwrap_or(0);
14115 d.api_thinking_tokens_total += entry.api_thinking_tokens.unwrap_or(0);
14116 }
14117 d.content_tokens_est_total += content_est;
14118 }
14119
14120 let model_key = (
14121 entry.day_id,
14122 entry.agent_slug.clone(),
14123 entry.workspace_id,
14124 entry.source_id.clone(),
14125 entry.model_family.clone(),
14126 entry.model_tier.clone(),
14127 );
14128 let d = self.models_daily.entry(model_key).or_default();
14129 d.message_count += 1;
14130 if is_user {
14131 d.user_message_count += 1;
14132 d.content_tokens_est_user += content_est;
14133 }
14134 if is_assistant {
14135 d.assistant_message_count += 1;
14136 d.content_tokens_est_assistant += content_est;
14137 }
14138 d.tool_call_count += entry.tool_call_count;
14139 if entry.has_plan {
14140 d.plan_message_count += 1;
14141 d.plan_content_tokens_est_total += content_est;
14142 if is_api {
14143 d.plan_api_tokens_total += api_total;
14144 }
14145 }
14146 if is_api {
14147 d.api_coverage_message_count += 1;
14148 d.api_tokens_total += api_total;
14149 d.api_input_tokens_total += entry.api_input_tokens.unwrap_or(0);
14150 d.api_output_tokens_total += entry.api_output_tokens.unwrap_or(0);
14151 d.api_cache_read_tokens_total += entry.api_cache_read_tokens.unwrap_or(0);
14152 d.api_cache_creation_tokens_total += entry.api_cache_creation_tokens.unwrap_or(0);
14153 d.api_thinking_tokens_total += entry.api_thinking_tokens.unwrap_or(0);
14154 }
14155 d.content_tokens_est_total += content_est;
14156 }
14157
14158 pub fn is_empty(&self) -> bool {
14159 self.hourly.is_empty() && self.daily.is_empty() && self.models_daily.is_empty()
14160 }
14161
14162 pub fn hourly_entry_count(&self) -> usize {
14163 self.hourly.len()
14164 }
14165
14166 pub fn daily_entry_count(&self) -> usize {
14167 self.daily.len()
14168 }
14169
14170 pub fn models_daily_entry_count(&self) -> usize {
14171 self.models_daily.len()
14172 }
14173}
14174
14175fn has_plan_for_role(role: &str, content: &str) -> bool {
14179 let role = role.trim();
14180 (role.eq_ignore_ascii_case("assistant") || role.eq_ignore_ascii_case("agent"))
14181 && has_plan_heuristic(content)
14182}
14183
14184fn has_plan_heuristic(content: &str) -> bool {
14191 if content.len() < 24 {
14192 return false;
14193 }
14194
14195 let lower = content.to_lowercase();
14196
14197 let looks_like_tool_blob = lower.contains("```")
14199 || lower.contains("\"tool\"")
14200 || lower.contains("stdout:")
14201 || lower.contains("stderr:")
14202 || lower.contains("exit code:");
14203
14204 let mut lines: Vec<&str> = Vec::with_capacity(60);
14205 let mut in_fenced_code = false;
14206 for raw in lower.lines() {
14207 let line = raw.trim();
14208 if line.starts_with("```") {
14209 in_fenced_code = !in_fenced_code;
14210 continue;
14211 }
14212 if in_fenced_code || line.is_empty() {
14213 continue;
14214 }
14215 lines.push(line);
14216 if lines.len() >= 60 {
14217 break;
14218 }
14219 }
14220
14221 let header_pos = lines.iter().position(|line| {
14222 line.starts_with("## plan")
14223 || line.starts_with("# plan")
14224 || line.starts_with("plan:")
14225 || line.starts_with("implementation plan")
14226 || line.starts_with("next steps:")
14227 || line.starts_with("action plan:")
14228 });
14229 let preview_top = lines.iter().take(8).copied().collect::<Vec<_>>().join("\n");
14230 let header_near_top = header_pos.is_some_and(|idx| idx <= 6) || preview_top.contains("plan:");
14231
14232 if !header_near_top {
14233 return false;
14234 }
14235 if looks_like_tool_blob && header_pos.is_none() {
14236 return false;
14237 }
14238
14239 let numbered_steps = lines
14240 .iter()
14241 .filter(|line| is_numbered_step_line(line))
14242 .count();
14243 let bullet_steps = lines
14244 .iter()
14245 .filter(|line| {
14246 line.starts_with("- ")
14247 || line.starts_with("* ")
14248 || line.starts_with("+ ")
14249 || line.starts_with("- [ ] ")
14250 || line.starts_with("- [x] ")
14251 })
14252 .count();
14253
14254 numbered_steps >= 2 || (numbered_steps >= 1 && bullet_steps >= 1) || bullet_steps >= 3
14255}
14256
14257fn is_numbered_step_line(line: &str) -> bool {
14258 let trimmed = line.trim_start();
14259 let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
14260 if digit_count == 0 || digit_count > 3 {
14261 return false;
14262 }
14263 let rest = &trimmed[digit_count..];
14264 rest.starts_with(". ") || rest.starts_with(") ")
14265}
14266
14267#[derive(Debug, Clone)]
14269pub struct TokenUsageEntry {
14270 pub message_id: i64,
14271 pub conversation_id: i64,
14272 pub agent_id: i64,
14273 pub workspace_id: Option<i64>,
14274 pub source_id: String,
14275 pub timestamp_ms: i64,
14276 pub day_id: i64,
14277 pub model_name: Option<String>,
14278 pub model_family: Option<String>,
14279 pub model_tier: Option<String>,
14280 pub service_tier: Option<String>,
14281 pub provider: Option<String>,
14282 pub input_tokens: Option<i64>,
14283 pub output_tokens: Option<i64>,
14284 pub cache_read_tokens: Option<i64>,
14285 pub cache_creation_tokens: Option<i64>,
14286 pub thinking_tokens: Option<i64>,
14287 pub total_tokens: Option<i64>,
14288 pub estimated_cost_usd: Option<f64>,
14289 pub role: String,
14290 pub content_chars: i64,
14291 pub has_tool_calls: bool,
14292 pub tool_call_count: u32,
14293 pub data_source: String,
14294}
14295
14296#[derive(Debug, Clone)]
14302pub struct PricingEntry {
14303 pub model_pattern: String,
14304 pub provider: String,
14305 pub input_cost_per_mtok: f64,
14306 pub output_cost_per_mtok: f64,
14307 pub cache_read_cost_per_mtok: Option<f64>,
14308 pub cache_creation_cost_per_mtok: Option<f64>,
14309 pub effective_day_id: i64,
14311}
14312
14313#[derive(Debug, Clone, Default)]
14315pub struct PricingDiagnostics {
14316 pub priced_count: u64,
14317 pub unpriced_count: u64,
14318 pub unknown_models: HashMap<String, u64>,
14320}
14321
14322impl PricingDiagnostics {
14323 fn record_priced(&mut self) {
14324 self.priced_count += 1;
14325 }
14326
14327 fn record_unpriced(&mut self, model_name: Option<&str>) {
14328 self.unpriced_count += 1;
14329 let key = model_name.unwrap_or("(none)").to_string();
14330 *self.unknown_models.entry(key).or_insert(0) += 1;
14331 }
14332
14333 pub fn log_summary(&self) {
14335 let total = self.priced_count + self.unpriced_count;
14336 if total == 0 {
14337 return;
14338 }
14339 let pct = (self.priced_count as f64 / total as f64) * 100.0;
14340 tracing::info!(
14341 target: "cass::analytics::pricing",
14342 priced = self.priced_count,
14343 unpriced = self.unpriced_count,
14344 total = total,
14345 coverage_pct = format!("{pct:.1}%"),
14346 "pricing coverage"
14347 );
14348 if !self.unknown_models.is_empty() {
14349 let mut sorted: Vec<_> = self.unknown_models.iter().collect();
14350 sorted.sort_by(|a, b| b.1.cmp(a.1));
14351 for (model, count) in sorted.iter().take(5) {
14352 tracing::debug!(
14353 target: "cass::analytics::pricing",
14354 model = model.as_str(),
14355 count = count,
14356 "unknown model (no pricing)"
14357 );
14358 }
14359 }
14360 }
14361}
14362
14363#[derive(Debug, Clone)]
14365pub struct PricingTable {
14366 entries: Vec<PricingEntry>,
14367}
14368
14369impl PricingTable {
14370 pub fn load(conn: &FrankenConnection) -> Result<Self> {
14372 Self::franken_load(conn)
14373 }
14374
14375 pub fn franken_load(conn: &FrankenConnection) -> Result<Self> {
14377 let rows = conn.query(
14378 "SELECT model_pattern, provider, input_cost_per_mtok, output_cost_per_mtok,
14379 cache_read_cost_per_mtok, cache_creation_cost_per_mtok, effective_date
14380 FROM model_pricing
14381 ORDER BY effective_date DESC",
14382 )?;
14383 let mut entries = Vec::with_capacity(rows.len());
14384 for row in &rows {
14385 let effective_date: String = row.get_typed(6)?;
14386 let effective_day_id = date_str_to_day_id(&effective_date)?;
14387 entries.push(PricingEntry {
14388 model_pattern: row.get_typed(0)?,
14389 provider: row.get_typed(1)?,
14390 input_cost_per_mtok: row.get_typed(2)?,
14391 output_cost_per_mtok: row.get_typed(3)?,
14392 cache_read_cost_per_mtok: row.get_typed(4)?,
14393 cache_creation_cost_per_mtok: row.get_typed(5)?,
14394 effective_day_id,
14395 });
14396 }
14397 Ok(Self { entries })
14398 }
14399
14400 pub fn lookup(&self, model_name: &str, message_day_id: i64) -> Option<&PricingEntry> {
14408 let mut best: Option<&PricingEntry> = None;
14409
14410 for entry in &self.entries {
14411 if entry.effective_day_id > message_day_id {
14412 continue;
14413 }
14414 if !sql_like_match(model_name, &entry.model_pattern) {
14415 continue;
14416 }
14417
14418 match best {
14419 None => best = Some(entry),
14420 Some(current) => {
14421 if entry.effective_day_id > current.effective_day_id
14422 || (entry.effective_day_id == current.effective_day_id
14423 && entry.model_pattern.len() > current.model_pattern.len())
14424 {
14425 best = Some(entry);
14426 }
14427 }
14428 }
14429 }
14430
14431 best
14432 }
14433
14434 pub fn compute_cost(
14438 &self,
14439 model_name: Option<&str>,
14440 message_day_id: i64,
14441 input_tokens: Option<i64>,
14442 output_tokens: Option<i64>,
14443 cache_read_tokens: Option<i64>,
14444 cache_creation_tokens: Option<i64>,
14445 ) -> Option<f64> {
14446 let model = model_name?;
14447 let pricing = self.lookup(model, message_day_id)?;
14448
14449 if input_tokens.is_none() && output_tokens.is_none() {
14450 return None;
14451 }
14452
14453 let mut cost = 0.0;
14454 let cache_read = cache_read_tokens.unwrap_or(0);
14455 let cache_creation = cache_creation_tokens.unwrap_or(0);
14456 let non_cache_input = input_tokens
14459 .unwrap_or(0)
14460 .saturating_sub(cache_read)
14461 .saturating_sub(cache_creation)
14462 .max(0);
14463 cost += non_cache_input as f64 * pricing.input_cost_per_mtok / 1_000_000.0;
14464 cost += output_tokens.unwrap_or(0) as f64 * pricing.output_cost_per_mtok / 1_000_000.0;
14465
14466 if let Some(cache_price) = pricing.cache_read_cost_per_mtok {
14467 cost += cache_read as f64 * cache_price / 1_000_000.0;
14468 }
14469 if let Some(cache_price) = pricing.cache_creation_cost_per_mtok {
14470 cost += cache_creation as f64 * cache_price / 1_000_000.0;
14471 }
14472
14473 Some(cost)
14474 }
14475
14476 pub fn is_empty(&self) -> bool {
14478 self.entries.is_empty()
14479 }
14480}
14481
14482fn date_str_to_day_id(s: &str) -> Result<i64> {
14485 use chrono::NaiveDate;
14486 const EPOCH_2020: NaiveDate = match NaiveDate::from_ymd_opt(2020, 1, 1) {
14487 Some(d) => d,
14488 None => unreachable!(),
14489 };
14490 NaiveDate::parse_from_str(s, "%Y-%m-%d")
14491 .map(|d| (d - EPOCH_2020).num_days())
14492 .with_context(|| format!("invalid effective_date '{s}'"))
14493}
14494
14495fn sql_like_match(value: &str, pattern: &str) -> bool {
14497 sql_like_match_bytes(
14498 value.to_ascii_lowercase().as_bytes(),
14499 pattern.to_ascii_lowercase().as_bytes(),
14500 )
14501}
14502
14503fn utf8_char_len(b: u8) -> usize {
14505 if b < 0x80 {
14506 1
14507 } else if b < 0xE0 {
14508 2
14509 } else if b < 0xF0 {
14510 3
14511 } else {
14512 4
14513 }
14514}
14515
14516fn sql_like_match_bytes(val: &[u8], pat: &[u8]) -> bool {
14517 if pat.is_empty() {
14518 return val.is_empty();
14519 }
14520 match pat[0] {
14521 b'%' => {
14522 let mut p = 1;
14523 while p < pat.len() && pat[p] == b'%' {
14524 p += 1;
14525 }
14526 let rest = &pat[p..];
14527 let mut i = 0;
14529 while i <= val.len() {
14530 if sql_like_match_bytes(&val[i..], rest) {
14531 return true;
14532 }
14533 if i < val.len() {
14534 i += utf8_char_len(val[i]);
14535 } else {
14536 break;
14537 }
14538 }
14539 false
14540 }
14541 b'_' => {
14542 if val.is_empty() {
14544 return false;
14545 }
14546 let char_len = utf8_char_len(val[0]);
14547 val.len() >= char_len && sql_like_match_bytes(&val[char_len..], &pat[1..])
14548 }
14549 c => !val.is_empty() && val[0] == c && sql_like_match_bytes(&val[1..], &pat[1..]),
14550 }
14551}
14552
14553fn rebuild_batch_size_env(var: &str, default: usize) -> usize {
14554 dotenvy::var(var)
14555 .ok()
14556 .and_then(|raw| raw.parse::<usize>().ok())
14557 .filter(|value| *value > 0)
14558 .unwrap_or(default)
14559}
14560
14561fn is_out_of_memory_error<E: OutOfMemoryProbe + ?Sized>(err: &E) -> bool {
14571 err.is_out_of_memory()
14572}
14573
14574trait OutOfMemoryProbe {
14575 fn is_out_of_memory(&self) -> bool;
14576}
14577
14578impl OutOfMemoryProbe for anyhow::Error {
14579 fn is_out_of_memory(&self) -> bool {
14580 self.chain().any(|cause| {
14581 if cause
14582 .downcast_ref::<frankensqlite::FrankenError>()
14583 .is_some_and(|err| matches!(err, frankensqlite::FrankenError::OutOfMemory))
14584 {
14585 return true;
14586 }
14587 is_exact_out_of_memory_message(&cause.to_string())
14588 })
14589 }
14590}
14591
14592impl OutOfMemoryProbe for frankensqlite::FrankenError {
14593 fn is_out_of_memory(&self) -> bool {
14594 matches!(self, frankensqlite::FrankenError::OutOfMemory)
14595 }
14596}
14597
14598fn is_exact_out_of_memory_message(message: &str) -> bool {
14599 matches!(
14600 message.trim().to_ascii_lowercase().as_str(),
14601 "out of memory" | "not enough memory"
14602 )
14603}
14604
14605#[derive(Debug, Clone)]
14611pub struct DailyCount {
14612 pub day_id: i64,
14613 pub sessions: i64,
14614 pub messages: i64,
14615 pub chars: i64,
14616}
14617
14618#[derive(Debug, Clone)]
14620pub struct AnalyticsRebuildResult {
14621 pub message_metrics_rows: usize,
14622 pub usage_hourly_rows: usize,
14623 pub usage_daily_rows: usize,
14624 pub usage_models_daily_rows: usize,
14625 pub elapsed_ms: u64,
14626 pub messages_per_sec: f64,
14627}
14628
14629#[derive(Debug, Clone)]
14631pub struct DailyStatsRebuildResult {
14632 pub rows_created: i64,
14633 pub total_sessions: i64,
14634}
14635
14636#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
14638pub struct AgentArchivePurgeResult {
14639 pub conversations_deleted: usize,
14640 pub messages_deleted: usize,
14641}
14642
14643#[derive(Debug, Clone)]
14645pub struct DailyStatsHealth {
14646 pub populated: bool,
14647 pub row_count: i64,
14648 pub oldest_update_ms: Option<i64>,
14649 pub conversation_count: i64,
14650 pub materialized_total: i64,
14651 pub drift: i64,
14652}
14653
14654const FTS5_BATCH_SIZE: usize = 100;
14662
14663#[derive(Debug, Clone)]
14664struct FtsRebuildMessageRow {
14665 rowid: i64,
14666 message_id: i64,
14667 conversation_id: i64,
14668 content: String,
14669 created_at: Option<i64>,
14670}
14671
14672#[derive(Debug, Clone)]
14673struct FtsConversationProjection {
14674 title: String,
14675 agent_id: Option<i64>,
14676 workspace_id: Option<i64>,
14677 source_path: String,
14678}
14679
14680#[derive(Debug, Clone)]
14682pub struct FtsEntry {
14683 pub content: String,
14684 pub title: String,
14685 pub agent: String,
14686 pub workspace: String,
14687 pub source_path: String,
14688 pub created_at: Option<i64>,
14689 pub message_id: i64,
14690}
14691
14692impl FtsEntry {
14693 pub fn from_message(message_id: i64, msg: &Message, conv: &Conversation) -> Self {
14695 FtsEntry {
14696 content: msg.content.clone(),
14697 title: conv.title.clone().unwrap_or_default(),
14698 agent: conv.agent_slug.clone(),
14699 workspace: conv
14700 .workspace
14701 .as_ref()
14702 .map(|p| p.to_string_lossy().into_owned())
14703 .unwrap_or_default(),
14704 source_path: path_to_string(&conv.source_path),
14705 created_at: msg.created_at.or(conv.started_at),
14706 message_id,
14707 }
14708 }
14709}
14710
14711const FTS_ENTRY_BATCH_MAX_DOCS: usize = 512;
14712const FTS_ENTRY_BATCH_MAX_CHARS: usize = 1024 * 1024;
14713
14714const FTS_REBUILD_BATCH_SIZE_DEFAULT: usize = 5_000;
14719
14720fn fts_rebuild_batch_size() -> usize {
14723 dotenvy::var("CASS_FTS_REBUILD_BATCH_SIZE")
14724 .ok()
14725 .and_then(|v| v.parse::<usize>().ok())
14726 .filter(|&n| n > 0)
14727 .unwrap_or(FTS_REBUILD_BATCH_SIZE_DEFAULT)
14728}
14729
14730fn flush_pending_fts_entries(
14731 storage: &FrankenStorage,
14732 tx: &FrankenTransaction<'_>,
14733 entries: &mut Vec<FtsEntry>,
14734 pending_chars: &mut usize,
14735 inserted_total: &mut usize,
14736) -> Result<()> {
14737 if entries.is_empty() {
14738 return Ok(());
14739 }
14740
14741 if storage.fts_messages_present_cached(tx) {
14742 *inserted_total += franken_batch_insert_fts(tx, entries)?;
14743 }
14744 entries.clear();
14745 *pending_chars = 0;
14746 Ok(())
14747}
14748
14749fn path_to_string<P: AsRef<Path>>(p: P) -> String {
14750 p.as_ref().to_string_lossy().into_owned()
14751}
14752
14753fn role_str(role: &MessageRole) -> String {
14754 role_as_str(role).to_owned()
14755}
14756
14757fn role_as_str(role: &MessageRole) -> &str {
14758 match role {
14759 MessageRole::User => "user",
14760 MessageRole::Agent => "agent",
14761 MessageRole::Tool => "tool",
14762 MessageRole::System => "system",
14763 MessageRole::Other(v) => v.as_str(),
14764 }
14765}
14766
14767fn agent_kind_str(kind: AgentKind) -> String {
14768 match kind {
14769 AgentKind::Cli => "cli".into(),
14770 AgentKind::VsCode => "vscode".into(),
14771 AgentKind::Hybrid => "hybrid".into(),
14772 }
14773}
14774
14775#[cfg(test)]
14780mod tests {
14781 use super::*;
14782 use serial_test::serial;
14783 use tempfile::TempDir;
14784
14785 struct EnvGuard {
14786 key: &'static str,
14787 previous: Option<String>,
14788 }
14789
14790 impl Drop for EnvGuard {
14791 fn drop(&mut self) {
14792 if let Some(value) = &self.previous {
14793 unsafe {
14795 std::env::set_var(self.key, value);
14796 }
14797 } else {
14798 unsafe {
14800 std::env::remove_var(self.key);
14801 }
14802 }
14803 }
14804 }
14805
14806 fn set_env_var(key: &'static str, value: impl AsRef<str>) -> EnvGuard {
14807 let previous = dotenvy::var(key).ok();
14808 unsafe {
14810 std::env::set_var(key, value.as_ref());
14811 }
14812 EnvGuard { key, previous }
14813 }
14814
14815 #[test]
14816 fn doctor_mutation_open_guard_only_targets_canonical_archive_db() {
14817 let dir = TempDir::new().unwrap();
14818 let canonical = dir.path().join("agent_search.db");
14819 let scratch = dir.path().join("scratch.db");
14820
14821 assert_eq!(
14822 doctor_mutation_lock_path_for_db_open(&canonical),
14823 Some(dir.path().join("doctor/locks/doctor-repair.lock"))
14824 );
14825 assert_eq!(doctor_mutation_lock_path_for_db_open(&scratch), None);
14826 }
14827
14828 #[test]
14829 fn doctor_lock_metadata_pid_detection_is_exact() {
14830 let current = std::process::id();
14831
14832 assert!(doctor_lock_metadata_pid_is_current_process(&format!(
14833 "schema_version=1\npid={current}\nmode=safe_auto_run\n"
14834 )));
14835 assert!(!doctor_lock_metadata_pid_is_current_process(
14836 "schema_version=1\npid=not-a-pid\n"
14837 ));
14838 assert!(!doctor_lock_metadata_pid_is_current_process(&format!(
14839 "pid={}\n",
14840 current.saturating_add(1)
14841 )));
14842 }
14843
14844 #[test]
14845 fn doctor_storage_open_refuses_active_doctor_mutation_lock_from_other_process() {
14846 use std::io::Write as _;
14847
14848 let dir = TempDir::new().unwrap();
14849 let db_path = dir.path().join("agent_search.db");
14850 {
14851 let storage = FrankenStorage::open(&db_path).unwrap();
14852 storage.close().unwrap();
14853 }
14854
14855 let lock_path = doctor_mutation_lock_path_for_db_open(&db_path).unwrap();
14856 let mut lock_file = fs::OpenOptions::new()
14857 .create(true)
14858 .truncate(false)
14859 .read(true)
14860 .write(true)
14861 .open(&lock_path)
14862 .unwrap();
14863 fs2::FileExt::try_lock_exclusive(&lock_file).unwrap();
14864 lock_file.set_len(0).unwrap();
14865 lock_file.write_all(b"schema_version=1\npid=1\n").unwrap();
14866 lock_file.sync_all().unwrap();
14867
14868 let err =
14869 open_franken_raw_readonly_connection_with_timeout(&db_path, Duration::from_millis(25))
14870 .expect_err("active doctor mutation lock must block canonical DB opens");
14871 let message = err.to_string();
14872 assert!(
14873 message.contains("doctor mutation lock") && message.contains("active"),
14874 "error should identify the active doctor mutation lock: {message}"
14875 );
14876
14877 fs2::FileExt::unlock(&lock_file).unwrap();
14878 }
14879
14880 #[test]
14881 fn doctor_storage_open_allows_current_doctor_process_probe() {
14882 use std::io::Write as _;
14883
14884 let dir = TempDir::new().unwrap();
14885 let db_path = dir.path().join("agent_search.db");
14886 {
14887 let storage = FrankenStorage::open(&db_path).unwrap();
14888 storage.close().unwrap();
14889 }
14890
14891 let lock_path = doctor_mutation_lock_path_for_db_open(&db_path).unwrap();
14892 let mut lock_file = fs::OpenOptions::new()
14893 .create(true)
14894 .truncate(false)
14895 .read(true)
14896 .write(true)
14897 .open(&lock_path)
14898 .unwrap();
14899 fs2::FileExt::try_lock_exclusive(&lock_file).unwrap();
14900 lock_file.set_len(0).unwrap();
14901 write!(lock_file, "schema_version=1\npid={}\n", std::process::id()).unwrap();
14902 lock_file.sync_all().unwrap();
14903
14904 let conn =
14905 open_franken_raw_readonly_connection_with_timeout(&db_path, Duration::from_millis(25))
14906 .expect(
14907 "doctor process must be able to run post-repair read probes under its own lock",
14908 );
14909 drop(conn);
14910
14911 fs2::FileExt::unlock(&lock_file).unwrap();
14912 }
14913
14914 #[test]
14915 fn autocommit_retain_disable_tries_compat_then_canonical_pragma() {
14916 let mut attempts = Vec::new();
14917
14918 let selected = disable_autocommit_retain(|pragma| {
14919 attempts.push(pragma);
14920 if pragma == "PRAGMA fsqlite.autocommit_retain = OFF;" {
14921 Err("compat namespace unavailable")
14922 } else {
14923 Ok(())
14924 }
14925 })
14926 .expect("canonical pragma should disable autocommit retain");
14927
14928 assert_eq!(selected, "PRAGMA autocommit_retain = OFF;");
14929 assert_eq!(attempts, AUTOCOMMIT_RETAIN_OFF_PRAGMAS);
14930 }
14931
14932 #[test]
14933 fn autocommit_retain_disable_fails_closed_when_no_pragma_works() {
14934 let mut attempts = Vec::new();
14935
14936 let err = disable_autocommit_retain(|pragma| {
14937 attempts.push(pragma);
14938 Err("unsupported pragma")
14939 })
14940 .expect_err("unsupported autocommit retain controls should fail closed");
14941
14942 assert_eq!(attempts, AUTOCOMMIT_RETAIN_OFF_PRAGMAS);
14943 let message = err.to_string();
14944 assert!(
14945 message.contains("refusing to keep a long-lived MVCC connection"),
14946 "error should force callers away from unbounded snapshot retention: {message}"
14947 );
14948 assert!(
14949 message.contains("PRAGMA fsqlite.autocommit_retain = OFF;")
14950 && message.contains("PRAGMA autocommit_retain = OFF;"),
14951 "error should preserve attempted PRAGMAs for diagnostics: {message}"
14952 );
14953 }
14954
14955 fn rusqlite_test_fixture_conn(db_path: &Path) -> rusqlite::Connection {
14964 rusqlite::Connection::open(db_path).expect("open rusqlite test fixture connection")
14965 }
14966
14967 fn seed_historical_db_direct(
14968 db_path: &Path,
14969 conversations: &[crate::model::types::Conversation],
14970 ) {
14971 if let Some(parent) = db_path.parent() {
14972 fs::create_dir_all(parent).unwrap();
14973 }
14974
14975 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
14976 conn.execute_batch(HISTORICAL_RECOVERY_CORE_SCHEMA).unwrap();
14977 conn.execute_compat(
14978 "INSERT INTO agents(id, slug, name, version, kind, created_at, updated_at)
14979 VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7)",
14980 fparams![1_i64, "codex", "Codex", "0.2.3", "cli", 0_i64, 0_i64],
14981 )
14982 .unwrap();
14983
14984 let mut next_message_id = 1_i64;
14985 for (conv_index, conv) in conversations.iter().enumerate() {
14986 let conversation_id = i64::try_from(conv_index + 1).unwrap();
14987 let workspace_id = conv.workspace.as_ref().map(|workspace| {
14988 let workspace_id = conversation_id;
14989 let workspace_path = workspace.to_string_lossy().into_owned();
14990 conn.execute_compat(
14991 "INSERT INTO workspaces(id, path, display_name) VALUES(?1, ?2, ?3)",
14992 fparams![
14993 workspace_id,
14994 workspace_path.as_str(),
14995 workspace_path.as_str()
14996 ],
14997 )
14998 .unwrap();
14999 workspace_id
15000 });
15001 let source_path = conv.source_path.to_string_lossy().into_owned();
15002 let metadata_json = conv.metadata_json.to_string();
15003 conn.execute_compat(
15004 "INSERT INTO conversations (
15005 id, agent_id, workspace_id, source_id, external_id, title, source_path,
15006 started_at, ended_at, approx_tokens, metadata_json, origin_host
15007 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
15008 fparams![
15009 conversation_id,
15010 1_i64,
15011 workspace_id,
15012 conv.source_id.as_str(),
15013 conv.external_id.as_deref(),
15014 conv.title.as_deref(),
15015 source_path.as_str(),
15016 conv.started_at,
15017 conv.ended_at,
15018 conv.approx_tokens,
15019 metadata_json.as_str(),
15020 conv.origin_host.as_deref()
15021 ],
15022 )
15023 .unwrap();
15024
15025 for msg in &conv.messages {
15026 let extra_json = msg.extra_json.to_string();
15027 let role = role_str(&msg.role);
15028 conn.execute_compat(
15029 "INSERT INTO messages(
15030 id, conversation_id, idx, role, author, created_at, content, extra_json
15031 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
15032 fparams![
15033 next_message_id,
15034 conversation_id,
15035 msg.idx,
15036 role.as_str(),
15037 msg.author.as_deref(),
15038 msg.created_at,
15039 msg.content.as_str(),
15040 extra_json.as_str()
15041 ],
15042 )
15043 .unwrap();
15044 next_message_id += 1;
15045 }
15046 }
15047 }
15048
15049 #[test]
15054 fn is_user_data_file_detects_bookmarks() {
15055 assert!(is_user_data_file(Path::new("/data/bookmarks.db")));
15056 assert!(is_user_data_file(Path::new("bookmarks.db")));
15057 }
15058
15059 #[test]
15060 fn is_user_data_file_detects_tui_state() {
15061 assert!(is_user_data_file(Path::new("/data/tui_state.json")));
15062 }
15063
15064 #[test]
15065 fn is_user_data_file_detects_sources_toml() {
15066 assert!(is_user_data_file(Path::new("/config/sources.toml")));
15067 }
15068
15069 #[test]
15070 fn is_user_data_file_detects_env() {
15071 assert!(is_user_data_file(Path::new(".env")));
15072 }
15073
15074 #[test]
15075 fn is_user_data_file_rejects_other_files() {
15076 assert!(!is_user_data_file(Path::new("index.db")));
15077 assert!(!is_user_data_file(Path::new("conversations.db")));
15078 assert!(!is_user_data_file(Path::new("random.txt")));
15079 }
15080
15081 #[test]
15086 fn create_backup_returns_none_for_nonexistent() {
15087 let dir = TempDir::new().unwrap();
15088 let db_path = dir.path().join("nonexistent.db");
15089 let result = create_backup(&db_path).unwrap();
15090 assert!(result.is_none());
15091 }
15092
15093 #[test]
15094 fn create_backup_creates_named_file() {
15095 let dir = TempDir::new().unwrap();
15096 let db_path = dir.path().join("test.db");
15097 std::fs::write(&db_path, b"test data").unwrap();
15098
15099 let backup_path = create_backup(&db_path).unwrap();
15100 assert!(backup_path.is_some());
15101 let backup = backup_path.unwrap();
15102 assert!(backup.exists());
15103 assert!(
15104 backup
15105 .file_name()
15106 .unwrap()
15107 .to_str()
15108 .unwrap()
15109 .contains("backup")
15110 );
15111 }
15112
15113 #[test]
15114 fn create_backup_paths_are_unique() {
15115 let dir = TempDir::new().unwrap();
15116 let db_path = dir.path().join("test.db");
15117 std::fs::write(&db_path, b"test data").unwrap();
15118
15119 let first = create_backup(&db_path).unwrap().unwrap();
15120 let second = create_backup(&db_path).unwrap().unwrap();
15121
15122 assert_ne!(first, second);
15123 assert!(first.exists());
15124 assert!(second.exists());
15125 }
15126
15127 #[test]
15128 fn lexical_rebuild_messages_query_uses_conversation_idx_access_path() {
15129 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
15130 use std::path::PathBuf;
15131
15132 let dir = TempDir::new().unwrap();
15133 let db_path = dir.path().join("agent_search.db");
15134 let storage = SqliteStorage::open(&db_path).unwrap();
15135
15136 let agent = Agent {
15137 id: None,
15138 slug: "claude_code".into(),
15139 name: "Claude Code".into(),
15140 version: None,
15141 kind: AgentKind::Cli,
15142 };
15143 let agent_id = storage.ensure_agent(&agent).unwrap();
15144 let conversation = Conversation {
15145 id: None,
15146 agent_slug: "claude_code".into(),
15147 workspace: Some(PathBuf::from("/tmp/workspace")),
15148 external_id: Some("conv-1".into()),
15149 title: Some("Lexical rebuild".into()),
15150 source_path: PathBuf::from("/tmp/conv-1.jsonl"),
15151 started_at: Some(1_700_000_000_000),
15152 ended_at: Some(1_700_000_000_100),
15153 approx_tokens: None,
15154 metadata_json: serde_json::Value::Null,
15155 messages: vec![
15156 Message {
15157 id: None,
15158 idx: 0,
15159 role: MessageRole::User,
15160 author: Some("user".into()),
15161 created_at: Some(1_700_000_000_010),
15162 content: "first".into(),
15163 extra_json: serde_json::Value::Null,
15164 snippets: Vec::new(),
15165 },
15166 Message {
15167 id: None,
15168 idx: 1,
15169 role: MessageRole::Agent,
15170 author: Some("assistant".into()),
15171 created_at: Some(1_700_000_000_020),
15172 content: "second".into(),
15173 extra_json: serde_json::Value::Null,
15174 snippets: Vec::new(),
15175 },
15176 ],
15177 source_id: LOCAL_SOURCE_ID.into(),
15178 origin_host: None,
15179 };
15180 storage
15181 .insert_conversation_tree(agent_id, None, &conversation)
15182 .unwrap();
15183 let conversation_id = storage
15184 .conn
15185 .query_row_map(
15186 "SELECT id FROM conversations WHERE external_id = ?1",
15187 fparams!["conv-1"],
15188 |row| row.get_typed::<i64>(0),
15189 )
15190 .unwrap();
15191
15192 let opcodes: Vec<String> = storage
15193 .conn
15194 .query_map_collect(
15195 "EXPLAIN \
15196 SELECT id, idx, role, author, created_at, content \
15197 FROM messages \
15198 WHERE conversation_id = ?1 ORDER BY idx",
15199 fparams![conversation_id],
15200 |row| row.get_typed(1),
15201 )
15202 .unwrap();
15203
15204 assert!(
15205 opcodes.iter().any(|opcode| opcode == "SeekGE"),
15206 "expected lexical rebuild message fetch to seek into the conversation_id/idx access path, got {opcodes:?}"
15207 );
15208 assert!(
15209 !opcodes.iter().any(|opcode| opcode == "SorterOpen"),
15210 "expected lexical rebuild message fetch to avoid sorter temp b-trees, got {opcodes:?}"
15211 );
15212 }
15213
15214 #[test]
15215 fn schema_check_rebuild_classification_ignores_transient_errors() {
15216 assert!(!schema_check_error_requires_rebuild(
15217 &frankensqlite::FrankenError::Busy
15218 ));
15219 assert!(!schema_check_error_requires_rebuild(
15220 &frankensqlite::FrankenError::DatabaseLocked {
15221 path: PathBuf::from("/tmp/test.db"),
15222 }
15223 ));
15224 assert!(!schema_check_error_requires_rebuild(
15225 &frankensqlite::FrankenError::CannotOpen {
15226 path: PathBuf::from("/tmp/test.db"),
15227 }
15228 ));
15229 assert!(!schema_check_error_requires_rebuild(
15230 &frankensqlite::FrankenError::Io(std::io::Error::other("disk hiccup"))
15231 ));
15232 }
15233
15234 #[test]
15235 fn schema_check_rebuild_classification_keeps_corruption_errors() {
15236 assert!(schema_check_error_requires_rebuild(
15237 &frankensqlite::FrankenError::DatabaseCorrupt {
15238 detail: "bad header".to_string(),
15239 }
15240 ));
15241 assert!(schema_check_error_requires_rebuild(
15242 &frankensqlite::FrankenError::WalCorrupt {
15243 detail: "bad wal".to_string(),
15244 }
15245 ));
15246 assert!(schema_check_error_requires_rebuild(
15247 &frankensqlite::FrankenError::NotADatabase {
15248 path: PathBuf::from("/tmp/test.db"),
15249 }
15250 ));
15251 assert!(schema_check_error_requires_rebuild(
15252 &frankensqlite::FrankenError::ShortRead {
15253 expected: 4096,
15254 actual: 64,
15255 }
15256 ));
15257 }
15258
15259 #[test]
15260 fn create_backup_refuses_raw_copy_after_retryable_vacuum_errors() {
15261 let retryable_errors = [
15262 frankensqlite::FrankenError::Busy,
15263 frankensqlite::FrankenError::BusyRecovery,
15264 frankensqlite::FrankenError::BusySnapshot {
15265 conflicting_pages: "1,2".to_string(),
15266 },
15267 frankensqlite::FrankenError::DatabaseLocked {
15268 path: PathBuf::from("/tmp/test.db"),
15269 },
15270 frankensqlite::FrankenError::LockFailed {
15271 detail: "fcntl lock still held".to_string(),
15272 },
15273 frankensqlite::FrankenError::WriteConflict { page: 7, holder: 9 },
15274 frankensqlite::FrankenError::SerializationFailure { page: 11 },
15275 frankensqlite::FrankenError::Internal("database is locked".to_string()),
15276 ];
15277
15278 for err in retryable_errors {
15279 assert!(
15280 backup_vacuum_error_requires_consistent_retry(&err),
15281 "retryable VACUUM failure must not fall back to raw bundle copy: {err}"
15282 );
15283 }
15284
15285 assert!(!backup_vacuum_error_requires_consistent_retry(
15286 &frankensqlite::FrankenError::NotADatabase {
15287 path: PathBuf::from("/tmp/test.db")
15288 }
15289 ));
15290 assert!(!backup_vacuum_error_requires_consistent_retry(
15291 &frankensqlite::FrankenError::DatabaseCorrupt {
15292 detail: "bad header".to_string()
15293 }
15294 ));
15295 }
15296
15297 #[test]
15298 fn create_backup_uses_hidden_vacuum_stage_path() {
15299 let backup_path = PathBuf::from("/tmp/test.db.backup.123.456.0");
15300 let stage_path = vacuum_stage_backup_path(&backup_path);
15301 let stage_name = stage_path
15302 .file_name()
15303 .and_then(|name| name.to_str())
15304 .unwrap_or_default();
15305
15306 assert!(stage_name.starts_with('.'));
15307 assert!(stage_name.ends_with(".vacuum-in-progress"));
15308 assert!(
15309 !is_backup_root_name(stage_name, "test.db.backup."),
15310 "incomplete VACUUM output must not be discoverable as a backup root"
15311 );
15312 }
15313
15314 #[test]
15315 fn create_backup_preserves_content() {
15316 let dir = TempDir::new().unwrap();
15317 let db_path = dir.path().join("test.db");
15318 let original_content = b"test database content 12345";
15319 std::fs::write(&db_path, original_content).unwrap();
15320
15321 let backup_path = create_backup(&db_path).unwrap().unwrap();
15322 let backup_content = std::fs::read(&backup_path).unwrap();
15323 assert_eq!(backup_content, original_content);
15324 }
15325
15326 #[test]
15327 fn create_backup_copies_sidecars_when_present() {
15328 let dir = TempDir::new().unwrap();
15329 let db_path = dir.path().join("test.db");
15330 std::fs::write(&db_path, b"db").unwrap();
15331 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15332 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15333
15334 let backup_path = create_backup(&db_path).unwrap().unwrap();
15335
15336 assert_eq!(
15337 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15338 b"wal"
15339 );
15340 assert_eq!(
15341 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15342 b"shm"
15343 );
15344 }
15345
15346 #[test]
15347 #[cfg(unix)]
15348 fn create_backup_rejects_symlink_root_during_raw_fallback() {
15349 use std::os::unix::fs::symlink;
15350
15351 let dir = TempDir::new().unwrap();
15352 let outside_db = dir.path().join("outside.db");
15353 let db_path = dir.path().join("test.db");
15354 std::fs::write(&outside_db, b"not sqlite").unwrap();
15355 symlink(&outside_db, &db_path).unwrap();
15356
15357 let err = create_backup(&db_path).unwrap_err();
15358
15359 assert!(
15360 err.to_string().contains("bundle symlink"),
15361 "unexpected error: {err:#}"
15362 );
15363 assert_eq!(std::fs::read(&outside_db).unwrap(), b"not sqlite");
15364 let backup_roots: Vec<_> = std::fs::read_dir(dir.path())
15365 .unwrap()
15366 .filter_map(|entry| entry.ok())
15367 .map(|entry| entry.file_name().to_string_lossy().into_owned())
15368 .filter(|name| name.starts_with("test.db.backup."))
15369 .collect();
15370 assert!(
15371 backup_roots.is_empty(),
15372 "symlinked backup source must not publish backup roots: {backup_roots:?}"
15373 );
15374 }
15375
15376 #[test]
15377 #[cfg(unix)]
15378 fn create_backup_rejects_symlink_sidecar_without_partial_backup() {
15379 use std::os::unix::fs::symlink;
15380
15381 let dir = TempDir::new().unwrap();
15382 let db_path = dir.path().join("test.db");
15383 let outside_wal = dir.path().join("outside.wal");
15384 let wal_path = database_sidecar_path(&db_path, "-wal");
15385 std::fs::write(&db_path, b"not sqlite").unwrap();
15386 std::fs::write(&outside_wal, b"outside wal").unwrap();
15387 symlink(&outside_wal, &wal_path).unwrap();
15388
15389 let err = create_backup(&db_path).unwrap_err();
15390
15391 assert!(
15392 err.to_string().contains("bundle symlink"),
15393 "unexpected error: {err:#}"
15394 );
15395 assert_eq!(std::fs::read(&outside_wal).unwrap(), b"outside wal");
15396 let backup_roots: Vec<_> = std::fs::read_dir(dir.path())
15397 .unwrap()
15398 .filter_map(|entry| entry.ok())
15399 .map(|entry| entry.file_name().to_string_lossy().into_owned())
15400 .filter(|name| name.starts_with("test.db.backup."))
15401 .collect();
15402 assert!(
15403 backup_roots.is_empty(),
15404 "sidecar preflight failure must not leave a partial backup root: {backup_roots:?}"
15405 );
15406 }
15407
15408 #[test]
15413 fn cleanup_old_backups_keeps_recent() {
15414 let dir = TempDir::new().unwrap();
15415 let db_path = dir.path().join("test.db");
15416
15417 for i in 0..5 {
15419 let backup_name = format!("test.db.backup.{}", 1000 + i);
15420 std::fs::write(dir.path().join(&backup_name), format!("backup {i}")).unwrap();
15421 }
15422
15423 cleanup_old_backups(&db_path, 3).unwrap();
15424
15425 let backups: Vec<_> = std::fs::read_dir(dir.path())
15427 .unwrap()
15428 .filter_map(|e| e.ok())
15429 .filter(|e| e.file_name().to_str().unwrap_or("").contains("backup"))
15430 .collect();
15431
15432 assert_eq!(backups.len(), 3);
15433 }
15434
15435 #[test]
15436 fn cleanup_old_backups_ignores_wal_and_shm_sidecars() {
15437 let dir = TempDir::new().unwrap();
15438 let db_path = dir.path().join("test.db");
15439
15440 for i in 0..3 {
15441 let backup_name = format!("test.db.backup.{}", 1000 + i);
15442 let backup_path = dir.path().join(&backup_name);
15443 std::fs::write(&backup_path, format!("backup {i}")).unwrap();
15444 std::fs::write(format!("{}-wal", backup_path.display()), b"wal").unwrap();
15445 std::fs::write(format!("{}-shm", backup_path.display()), b"shm").unwrap();
15446 std::thread::sleep(std::time::Duration::from_millis(20));
15447 }
15448
15449 cleanup_old_backups(&db_path, 2).unwrap();
15450
15451 let mut roots = Vec::new();
15452 let mut wals = Vec::new();
15453 let mut shms = Vec::new();
15454 for entry in std::fs::read_dir(dir.path())
15455 .unwrap()
15456 .filter_map(|e| e.ok())
15457 {
15458 let name = entry.file_name().to_string_lossy().into_owned();
15459 if name.ends_with("-wal") {
15460 wals.push(name);
15461 } else if name.ends_with("-shm") {
15462 shms.push(name);
15463 } else if name.contains("backup") {
15464 roots.push(name);
15465 }
15466 }
15467
15468 assert_eq!(roots.len(), 2, "should keep two backup roots");
15469 assert_eq!(
15470 wals.len(),
15471 2,
15472 "should keep WAL sidecars only for retained backups"
15473 );
15474 assert_eq!(
15475 shms.len(),
15476 2,
15477 "should keep SHM sidecars only for retained backups"
15478 );
15479 }
15480
15481 #[test]
15482 fn move_database_bundle_moves_database_and_sidecars() {
15483 let dir = TempDir::new().unwrap();
15484 let db_path = dir.path().join("test.db");
15485 let backup_path = dir.path().join("test.db.corrupt");
15486
15487 std::fs::write(&db_path, b"db").unwrap();
15488 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15489 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15490
15491 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15492 assert_eq!(
15493 moved,
15494 DatabaseBundleMoveResult {
15495 database: true,
15496 wal: true,
15497 shm: true
15498 }
15499 );
15500 assert!(moved.moved_any());
15501
15502 assert!(!db_path.exists());
15503 assert!(!database_sidecar_path(&db_path, "-wal").exists());
15504 assert!(!database_sidecar_path(&db_path, "-shm").exists());
15505
15506 assert_eq!(std::fs::read(&backup_path).unwrap(), b"db");
15507 assert_eq!(
15508 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15509 b"wal"
15510 );
15511 assert_eq!(
15512 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15513 b"shm"
15514 );
15515 }
15516
15517 #[test]
15518 fn move_database_bundle_preserves_orphan_sidecars_without_main_db() {
15519 let dir = TempDir::new().unwrap();
15520 let db_path = dir.path().join("test.db");
15521 let backup_path = dir.path().join("test.db.corrupt");
15522
15523 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15524 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15525
15526 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15527 assert_eq!(
15528 moved,
15529 DatabaseBundleMoveResult {
15530 database: false,
15531 wal: true,
15532 shm: true
15533 }
15534 );
15535 assert!(moved.moved_any());
15536 assert!(!db_path.exists());
15537 assert!(!database_sidecar_path(&db_path, "-wal").exists());
15538 assert!(!database_sidecar_path(&db_path, "-shm").exists());
15539 assert_eq!(
15540 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15541 b"wal"
15542 );
15543 assert_eq!(
15544 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15545 b"shm"
15546 );
15547 }
15548
15549 #[test]
15550 #[cfg(unix)]
15551 fn move_database_bundle_moves_dangling_symlink_database_root() {
15552 use std::os::unix::fs::symlink;
15553
15554 let dir = TempDir::new().unwrap();
15555 let db_path = dir.path().join("test.db");
15556 let backup_path = dir.path().join("test.db.corrupt");
15557 let missing_target = dir.path().join("missing-target.db");
15558
15559 symlink(&missing_target, &db_path).unwrap();
15560
15561 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15562
15563 assert_eq!(
15564 moved,
15565 DatabaseBundleMoveResult {
15566 database: true,
15567 wal: false,
15568 shm: false
15569 }
15570 );
15571 assert!(std::fs::symlink_metadata(&db_path).is_err());
15572 assert!(
15573 std::fs::symlink_metadata(&backup_path)
15574 .unwrap()
15575 .file_type()
15576 .is_symlink()
15577 );
15578 assert!(!missing_target.exists());
15579 }
15580
15581 #[test]
15582 #[cfg(unix)]
15583 fn move_database_bundle_moves_dangling_symlink_sidecars_without_main_db() {
15584 use std::os::unix::fs::symlink;
15585
15586 let dir = TempDir::new().unwrap();
15587 let db_path = dir.path().join("test.db");
15588 let backup_path = dir.path().join("test.db.corrupt");
15589 let missing_wal_target = dir.path().join("missing-wal");
15590 let missing_shm_target = dir.path().join("missing-shm");
15591 let wal_path = database_sidecar_path(&db_path, "-wal");
15592 let shm_path = database_sidecar_path(&db_path, "-shm");
15593
15594 symlink(&missing_wal_target, &wal_path).unwrap();
15595 symlink(&missing_shm_target, &shm_path).unwrap();
15596
15597 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15598
15599 assert_eq!(
15600 moved,
15601 DatabaseBundleMoveResult {
15602 database: false,
15603 wal: true,
15604 shm: true
15605 }
15606 );
15607 assert!(std::fs::symlink_metadata(&wal_path).is_err());
15608 assert!(std::fs::symlink_metadata(&shm_path).is_err());
15609 assert!(
15610 std::fs::symlink_metadata(database_sidecar_path(&backup_path, "-wal"))
15611 .unwrap()
15612 .file_type()
15613 .is_symlink()
15614 );
15615 assert!(
15616 std::fs::symlink_metadata(database_sidecar_path(&backup_path, "-shm"))
15617 .unwrap()
15618 .file_type()
15619 .is_symlink()
15620 );
15621 assert!(!missing_wal_target.exists());
15622 assert!(!missing_shm_target.exists());
15623 }
15624
15625 #[test]
15626 fn copy_database_bundle_copies_database_and_sidecars() {
15627 let dir = TempDir::new().unwrap();
15628 let db_path = dir.path().join("test.db");
15629 let copied_path = dir.path().join("copy.db");
15630
15631 std::fs::write(&db_path, b"db").unwrap();
15632 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15633 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15634
15635 copy_database_bundle(&db_path, &copied_path).unwrap();
15636
15637 assert_eq!(std::fs::read(&copied_path).unwrap(), b"db");
15638 assert_eq!(
15639 std::fs::read(database_sidecar_path(&copied_path, "-wal")).unwrap(),
15640 b"wal"
15641 );
15642 assert_eq!(
15643 std::fs::read(database_sidecar_path(&copied_path, "-shm")).unwrap(),
15644 b"shm"
15645 );
15646 assert_eq!(std::fs::read(&db_path).unwrap(), b"db");
15647 }
15648
15649 #[test]
15650 fn copy_database_bundle_creates_destination_parent() {
15651 let dir = TempDir::new().unwrap();
15652 let db_path = dir.path().join("test.db");
15653 let copied_path = dir.path().join("nested/copies/copy.db");
15654
15655 std::fs::write(&db_path, b"db").unwrap();
15656 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15657
15658 copy_database_bundle(&db_path, &copied_path).unwrap();
15659
15660 assert!(copied_path.parent().unwrap().is_dir());
15661 assert_eq!(std::fs::read(&copied_path).unwrap(), b"db");
15662 assert_eq!(
15663 std::fs::read(database_sidecar_path(&copied_path, "-wal")).unwrap(),
15664 b"wal"
15665 );
15666 }
15667
15668 #[test]
15669 #[cfg(unix)]
15670 fn copy_database_bundle_rejects_symlink_source_root() {
15671 use std::os::unix::fs::symlink;
15672
15673 let dir = TempDir::new().unwrap();
15674 let outside_db = dir.path().join("outside.db");
15675 let db_path = dir.path().join("test.db");
15676 let copied_path = dir.path().join("copy.db");
15677
15678 std::fs::write(&outside_db, b"outside").unwrap();
15679 symlink(&outside_db, &db_path).unwrap();
15680
15681 let err = copy_database_bundle(&db_path, &copied_path).unwrap_err();
15682
15683 assert!(
15684 err.to_string().contains("bundle symlink"),
15685 "unexpected error: {err:#}"
15686 );
15687 assert!(!copied_path.exists());
15688 assert_eq!(std::fs::read(&outside_db).unwrap(), b"outside");
15689 }
15690
15691 #[test]
15692 #[cfg(unix)]
15693 fn copy_database_bundle_rejects_symlink_sidecar() {
15694 use std::os::unix::fs::symlink;
15695
15696 let dir = TempDir::new().unwrap();
15697 let db_path = dir.path().join("test.db");
15698 let copied_path = dir.path().join("copy.db");
15699 let outside_wal = dir.path().join("outside.wal");
15700 let wal_path = database_sidecar_path(&db_path, "-wal");
15701
15702 std::fs::write(&db_path, b"db").unwrap();
15703 std::fs::write(&outside_wal, b"outside wal").unwrap();
15704 symlink(&outside_wal, &wal_path).unwrap();
15705
15706 let err = copy_database_bundle(&db_path, &copied_path).unwrap_err();
15707
15708 assert!(
15709 err.to_string().contains("bundle symlink"),
15710 "unexpected error: {err:#}"
15711 );
15712 assert_eq!(std::fs::read(&outside_wal).unwrap(), b"outside wal");
15713 assert!(!copied_path.exists());
15714 assert!(!database_sidecar_path(&copied_path, "-wal").exists());
15715 }
15716
15717 #[test]
15718 fn move_database_bundle_creates_destination_parent_and_moves_sidecars() {
15719 let dir = TempDir::new().unwrap();
15720 let db_path = dir.path().join("test.db");
15721 let backup_path = dir.path().join("nested/backups/test.db.corrupt");
15722
15723 std::fs::write(&db_path, b"db").unwrap();
15724 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15725 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15726
15727 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15728 assert_eq!(
15729 moved,
15730 DatabaseBundleMoveResult {
15731 database: true,
15732 wal: true,
15733 shm: true
15734 }
15735 );
15736 assert!(backup_path.parent().unwrap().is_dir());
15737 assert_eq!(std::fs::read(&backup_path).unwrap(), b"db");
15738 assert_eq!(
15739 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15740 b"wal"
15741 );
15742 assert_eq!(
15743 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15744 b"shm"
15745 );
15746 }
15747
15748 #[test]
15749 fn remove_database_files_removes_orphan_sidecars_without_main_db() {
15750 let dir = TempDir::new().unwrap();
15751 let db_path = dir.path().join("test.db");
15752
15753 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15754 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15755
15756 remove_database_files(&db_path).unwrap();
15757
15758 assert!(!db_path.exists());
15759 assert!(!database_sidecar_path(&db_path, "-wal").exists());
15760 assert!(!database_sidecar_path(&db_path, "-shm").exists());
15761 }
15762
15763 #[test]
15764 fn cleanup_old_backups_ignores_backup_named_directories() {
15765 let dir = TempDir::new().unwrap();
15766 let db_path = dir.path().join("test.db");
15767
15768 for i in 0..3 {
15769 let backup_name = format!("test.db.backup.{}", 1000 + i);
15770 std::fs::write(dir.path().join(&backup_name), format!("backup {i}")).unwrap();
15771 }
15772 std::fs::create_dir(dir.path().join("test.db.backup.directory")).unwrap();
15773
15774 cleanup_old_backups(&db_path, 2).unwrap();
15775
15776 let mut backup_files = Vec::new();
15777 let mut backup_dirs = Vec::new();
15778 for entry in std::fs::read_dir(dir.path())
15779 .unwrap()
15780 .filter_map(|e| e.ok())
15781 {
15782 let name = entry.file_name().to_string_lossy().into_owned();
15783 if !name.starts_with("test.db.backup.") {
15784 continue;
15785 }
15786 if entry.path().is_dir() {
15787 backup_dirs.push(name);
15788 } else {
15789 backup_files.push(name);
15790 }
15791 }
15792
15793 assert_eq!(
15794 backup_files.len(),
15795 2,
15796 "only real backup files count toward retention"
15797 );
15798 assert_eq!(
15799 backup_dirs.len(),
15800 1,
15801 "backup-named directories should be ignored"
15802 );
15803 }
15804
15805 #[test]
15810 fn open_creates_new_database() {
15811 let dir = TempDir::new().unwrap();
15812 let db_path = dir.path().join("new.db");
15813 assert!(!db_path.exists());
15814
15815 let storage = SqliteStorage::open(&db_path).unwrap();
15816 assert!(db_path.exists());
15817 storage.close().unwrap();
15818 }
15819
15820 #[test]
15821 fn open_readonly_fails_for_nonexistent() {
15822 let dir = TempDir::new().unwrap();
15823 let db_path = dir.path().join("nonexistent.db");
15824 let result = SqliteStorage::open_readonly(&db_path);
15825 assert!(result.is_err());
15826 }
15827
15828 #[test]
15829 fn open_readonly_succeeds_for_existing() {
15830 let dir = TempDir::new().unwrap();
15831 let db_path = dir.path().join("existing.db");
15832
15833 let _storage = SqliteStorage::open(&db_path).unwrap();
15835 drop(_storage);
15836
15837 let storage = SqliteStorage::open_readonly(&db_path).unwrap();
15839 assert!(storage.schema_version().is_ok());
15840 }
15841
15842 #[test]
15843 fn reopen_existing_current_schema_is_idempotent() {
15844 let dir = TempDir::new().unwrap();
15845 let db_path = dir.path().join("existing.db");
15846
15847 {
15849 let storage = SqliteStorage::open(&db_path).unwrap();
15850 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
15851 }
15852
15853 let reopened = SqliteStorage::open(&db_path).unwrap();
15855 assert_eq!(
15856 reopened.schema_version().unwrap(),
15857 CURRENT_SCHEMA_VERSION,
15858 "reopening current schema DB should be idempotent"
15859 );
15860 }
15861
15862 #[test]
15863 fn open_or_rebuild_current_schema_does_not_trigger_rebuild() {
15864 let dir = TempDir::new().unwrap();
15865 let db_path = dir.path().join("existing.db");
15866
15867 {
15869 let storage = SqliteStorage::open(&db_path).unwrap();
15870 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
15871 }
15872
15873 let reopened = SqliteStorage::open_or_rebuild(&db_path)
15875 .expect("current schema DB should open without rebuild");
15876 assert_eq!(reopened.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
15877 }
15878
15879 #[test]
15880 fn open_or_rebuild_does_not_treat_non_database_paths_as_corruption() {
15881 let dir = TempDir::new().unwrap();
15882 let db_path = dir.path().join("db_dir");
15883 std::fs::create_dir(&db_path).unwrap();
15884
15885 let result = SqliteStorage::open_or_rebuild(&db_path);
15886
15887 match result {
15888 Err(MigrationError::Database(_)) | Err(MigrationError::Io(_)) => {}
15889 Err(MigrationError::RebuildRequired { reason, .. }) => {
15890 panic!("should not rebuild non-database path: {reason}")
15891 }
15892 Err(MigrationError::Other(msg)) => {
15893 panic!("should preserve underlying open error, got Other: {msg}")
15894 }
15895 Ok(_) => panic!("directory path must not open as a database"),
15896 }
15897
15898 assert!(
15899 db_path.is_dir(),
15900 "non-database directory must be left in place"
15901 );
15902 }
15903
15904 #[test]
15909 fn schema_version_returns_current() {
15910 let dir = TempDir::new().unwrap();
15911 let db_path = dir.path().join("test.db");
15912 let storage = SqliteStorage::open(&db_path).unwrap();
15913 let version = storage.schema_version().unwrap();
15914 assert!(version >= 5, "Schema version should be at least 5");
15915 }
15916
15917 #[test]
15922 fn migration_v13_creates_analytics_tables() {
15923 let dir = TempDir::new().unwrap();
15924 let db_path = dir.path().join("test.db");
15925 let storage = SqliteStorage::open(&db_path).unwrap();
15926
15927 let version = storage.schema_version().unwrap();
15929 assert_eq!(
15930 version, CURRENT_SCHEMA_VERSION,
15931 "Schema version must match CURRENT_SCHEMA_VERSION after migration"
15932 );
15933
15934 let conn = storage.raw();
15935
15936 fn col_names(conn: &FrankenConnection, table: &str) -> Vec<String> {
15938 conn.query_map_collect(
15939 &format!("PRAGMA table_info({})", table),
15940 fparams![],
15941 |row: &FrankenRow| row.get_typed(1),
15942 )
15943 .unwrap()
15944 }
15945
15946 fn idx_names(conn: &FrankenConnection, table: &str) -> Vec<String> {
15948 conn.query_map_collect(
15949 &format!("PRAGMA index_list({})", table),
15950 fparams![],
15951 |row: &FrankenRow| row.get_typed(1),
15952 )
15953 .unwrap()
15954 }
15955
15956 let mm_cols = col_names(conn, "message_metrics");
15958 for expected in &[
15959 "message_id",
15960 "hour_id",
15961 "day_id",
15962 "content_tokens_est",
15963 "model_name",
15964 "model_family",
15965 "model_tier",
15966 "provider",
15967 "api_input_tokens",
15968 "has_plan",
15969 "agent_slug",
15970 "role",
15971 "api_data_source",
15972 ] {
15973 assert!(
15974 mm_cols.contains(&expected.to_string()),
15975 "message_metrics missing column: {expected}"
15976 );
15977 }
15978
15979 let uh_cols = col_names(conn, "usage_hourly");
15981 for expected in &[
15982 "hour_id",
15983 "plan_message_count",
15984 "plan_content_tokens_est_total",
15985 "plan_api_tokens_total",
15986 "api_coverage_message_count",
15987 "content_tokens_est_user",
15988 "api_thinking_tokens_total",
15989 ] {
15990 assert!(
15991 uh_cols.contains(&expected.to_string()),
15992 "usage_hourly missing column: {expected}"
15993 );
15994 }
15995
15996 let ud_cols = col_names(conn, "usage_daily");
15998 for expected in &[
15999 "day_id",
16000 "plan_content_tokens_est_total",
16001 "plan_api_tokens_total",
16002 "api_thinking_tokens_total",
16003 "content_tokens_est_assistant",
16004 "message_count",
16005 ] {
16006 assert!(
16007 ud_cols.contains(&expected.to_string()),
16008 "usage_daily missing column: {expected}"
16009 );
16010 }
16011
16012 let umd_cols = col_names(conn, "usage_models_daily");
16014 for expected in &[
16015 "day_id",
16016 "model_family",
16017 "model_tier",
16018 "message_count",
16019 "api_tokens_total",
16020 "api_coverage_message_count",
16021 ] {
16022 assert!(
16023 umd_cols.contains(&expected.to_string()),
16024 "usage_models_daily missing column: {expected}"
16025 );
16026 }
16027
16028 let mm_idxs = idx_names(conn, "message_metrics");
16030 assert!(
16031 mm_idxs.iter().any(|n| n.contains("idx_mm_hour")),
16032 "message_metrics must have hour index"
16033 );
16034 assert!(
16035 mm_idxs.iter().any(|n| n.contains("idx_mm_agent_day")),
16036 "message_metrics must have agent+day index"
16037 );
16038 assert!(
16039 mm_idxs
16040 .iter()
16041 .any(|n| n.contains("idx_mm_model_family_day")),
16042 "message_metrics must have model_family+day index"
16043 );
16044
16045 let uh_idxs = idx_names(conn, "usage_hourly");
16047 assert!(
16048 uh_idxs.iter().any(|n| n.contains("idx_uh_agent")),
16049 "usage_hourly must have agent index"
16050 );
16051
16052 let ud_idxs = idx_names(conn, "usage_daily");
16054 assert!(
16055 ud_idxs.iter().any(|n| n.contains("idx_ud_agent")),
16056 "usage_daily must have agent index"
16057 );
16058
16059 let umd_idxs = idx_names(conn, "usage_models_daily");
16061 assert!(
16062 umd_idxs.iter().any(|n| n.contains("idx_umd_model_day")),
16063 "usage_models_daily must have model+day index"
16064 );
16065
16066 let conversation_cols = col_names(conn, "conversations");
16067 assert!(
16068 conversation_cols.contains(&"last_message_idx".to_string())
16069 && conversation_cols.contains(&"last_message_created_at".to_string()),
16070 "fresh schema must include V15 tail columns without ALTER TABLE on conversations"
16071 );
16072 let fts_schema_rows: i64 = conn
16073 .query_row_map(
16074 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
16075 fparams![],
16076 |row: &FrankenRow| row.get_typed(0),
16077 )
16078 .unwrap();
16079 assert_eq!(
16080 fts_schema_rows, 0,
16081 "fresh schema should not create and immediately drop derived fts_messages"
16082 );
16083 let integrity: Vec<String> = conn
16084 .query_map_collect("PRAGMA integrity_check;", fparams![], |row: &FrankenRow| {
16085 row.get_typed(0)
16086 })
16087 .unwrap();
16088 assert_eq!(
16089 integrity,
16090 vec!["ok".to_string()],
16091 "fresh schema must pass SQLite integrity_check"
16092 );
16093 }
16094
16095 #[test]
16096 fn hour_id_round_trip() {
16097 let ts_ms = 1_770_508_800_000_i64;
16099 let hour_id = SqliteStorage::hour_id_from_millis(ts_ms);
16100 let day_id = SqliteStorage::day_id_from_millis(ts_ms);
16101
16102 assert_eq!(hour_id / 24, day_id, "hour_id/24 should equal day_id");
16104
16105 let back = SqliteStorage::millis_from_hour_id(hour_id);
16107 assert!(
16108 back <= ts_ms && ts_ms - back < 3_600_000,
16109 "Round-trip should land within the same hour"
16110 );
16111 }
16112
16113 #[test]
16114 fn day_and_hour_ids_floor_negative_millis() {
16115 let ts_ms = -1_i64;
16118 let expected_secs = -1_i64;
16119 let epoch_2020_secs = 1_577_836_800_i64;
16120
16121 assert_eq!(
16122 SqliteStorage::day_id_from_millis(ts_ms),
16123 (expected_secs - epoch_2020_secs).div_euclid(86_400)
16124 );
16125 assert_eq!(
16126 SqliteStorage::hour_id_from_millis(ts_ms),
16127 (expected_secs - epoch_2020_secs).div_euclid(3_600)
16128 );
16129 }
16130
16131 #[test]
16132 fn migration_v13_from_v10() {
16133 let dir = TempDir::new().unwrap();
16134 let db_path = dir.path().join("test.db");
16135
16136 {
16138 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
16139 conn.execute_batch("PRAGMA journal_mode=WAL;").unwrap();
16140 conn.execute_batch(
16141 "CREATE TABLE IF NOT EXISTS meta (key TEXT PRIMARY KEY, value TEXT);",
16142 )
16143 .unwrap();
16144 conn.execute("INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', '10')")
16145 .unwrap();
16146 let mut tx = conn.transaction().unwrap();
16148 tx.execute_batch(MIGRATION_V1).unwrap();
16149 tx.execute_batch(MIGRATION_V2).unwrap();
16150 tx.execute_batch(MIGRATION_V4).unwrap();
16151 tx.execute_batch(MIGRATION_V5).unwrap();
16152 tx.execute_batch(MIGRATION_V6).unwrap();
16153 tx.execute_batch(MIGRATION_V7).unwrap();
16154 tx.execute_batch(MIGRATION_V8).unwrap();
16155 tx.execute_batch(MIGRATION_V9).unwrap();
16156 tx.execute_batch(MIGRATION_V10).unwrap();
16157 tx.execute("UPDATE meta SET value = '10' WHERE key = 'schema_version'")
16158 .unwrap();
16159 tx.commit().unwrap();
16160 }
16161 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
16162
16163 let storage = SqliteStorage::open(&db_path).unwrap();
16165 let version = storage.schema_version().unwrap();
16166 assert_eq!(
16167 version, CURRENT_SCHEMA_VERSION,
16168 "Should have migrated from v10 to the current schema"
16169 );
16170
16171 let count: i64 = storage
16173 .raw()
16174 .query_row_map(
16175 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name IN ('message_metrics', 'usage_hourly', 'usage_daily', 'usage_models_daily')",
16176 &[],
16177 |row: &FrankenRow| row.get_typed::<i64>(0),
16178 )
16179 .unwrap();
16180 assert_eq!(count, 4, "All 4 analytics tables should exist");
16181 }
16182
16183 #[test]
16188 fn analytics_ingest_populates_metrics_and_rollups() {
16189 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
16190 use std::path::PathBuf;
16191
16192 let dir = TempDir::new().unwrap();
16193 let db_path = dir.path().join("test.db");
16194 let storage = SqliteStorage::open(&db_path).unwrap();
16195
16196 let agent = Agent {
16198 id: None,
16199 slug: "claude_code".into(),
16200 name: "Claude Code".into(),
16201 version: Some("1.0".into()),
16202 kind: AgentKind::Cli,
16203 };
16204 let agent_id = storage.ensure_agent(&agent).unwrap();
16205
16206 let ts_ms = 1_770_551_400_000_i64;
16209 let expected_day = SqliteStorage::day_id_from_millis(ts_ms);
16210 let expected_hour = SqliteStorage::hour_id_from_millis(ts_ms);
16211
16212 let usage_json = serde_json::json!({
16214 "message": {
16215 "model": "claude-opus-4-6",
16216 "usage": {
16217 "input_tokens": 100,
16218 "output_tokens": 50,
16219 "cache_read_input_tokens": 200,
16220 "cache_creation_input_tokens": 30,
16221 "service_tier": "standard"
16222 }
16223 }
16224 });
16225
16226 let conv = Conversation {
16227 id: None,
16228 agent_slug: "claude_code".into(),
16229 workspace: None,
16230 external_id: Some("test-conv-1".into()),
16231 title: Some("Test conversation".into()),
16232 source_path: PathBuf::from("/tmp/test.jsonl"),
16233 started_at: Some(ts_ms),
16234 ended_at: Some(ts_ms + 60_000),
16235 approx_tokens: None,
16236 metadata_json: serde_json::Value::Null,
16237 messages: vec![
16238 Message {
16239 id: None,
16240 idx: 0,
16241 role: MessageRole::User,
16242 author: None,
16243 created_at: Some(ts_ms),
16244 content: "Hello, can you help me with a plan?".into(),
16245 extra_json: serde_json::Value::Null,
16246 snippets: vec![],
16247 },
16248 Message {
16249 id: None,
16250 idx: 1,
16251 role: MessageRole::Agent,
16252 author: None,
16253 created_at: Some(ts_ms + 30_000),
16254 content: "## Plan\n\n1. First step\n2. Second step\n3. Third step".into(),
16255 extra_json: usage_json,
16256 snippets: vec![],
16257 },
16258 Message {
16259 id: None,
16260 idx: 2,
16261 role: MessageRole::User,
16262 author: None,
16263 created_at: Some(ts_ms + 60_000),
16264 content: "Great, let's proceed!".into(),
16265 extra_json: serde_json::Value::Null,
16266 snippets: vec![],
16267 },
16268 ],
16269 source_id: "local".into(),
16270 origin_host: None,
16271 };
16272
16273 let outcomes = storage
16274 .insert_conversations_batched(&[(agent_id, None, &conv)])
16275 .unwrap();
16276 assert_eq!(outcomes.len(), 1);
16277 assert_eq!(outcomes[0].inserted_indices.len(), 3);
16278
16279 let conn = storage.raw();
16280
16281 let mm_count: i64 = conn
16283 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
16284 row.get_typed::<i64>(0)
16285 })
16286 .unwrap();
16287 assert_eq!(mm_count, 3, "Should have 3 message_metrics rows");
16288
16289 #[allow(clippy::type_complexity)]
16291 let rows: Vec<(i64, i64, String, i64, i64, String, String, String, String)> = conn
16292 .query_map_collect(
16293 "SELECT hour_id, day_id, role, content_tokens_est, has_plan, api_data_source, model_family, model_tier, provider FROM message_metrics ORDER BY message_id",
16294 fparams![],
16295 |row: &FrankenRow| {
16296 Ok((
16297 row.get_typed(0)?,
16298 row.get_typed(1)?,
16299 row.get_typed(2)?,
16300 row.get_typed(3)?,
16301 row.get_typed(4)?,
16302 row.get_typed(5)?,
16303 row.get_typed(6)?,
16304 row.get_typed(7)?,
16305 row.get_typed(8)?,
16306 ))
16307 },
16308 )
16309 .unwrap();
16310
16311 assert_eq!(rows.len(), 3);
16312 assert_eq!(rows[0].0, expected_hour);
16314 assert_eq!(rows[0].1, expected_day);
16315 assert_eq!(rows[0].2, "user");
16317 assert_eq!(
16319 rows[1].4, 1,
16320 "Assistant message with plan should have has_plan=1"
16321 );
16322 assert_eq!(
16324 rows[1].5, "api",
16325 "Claude Code assistant message should have api data source"
16326 );
16327 assert_eq!(rows[0].5, "estimated");
16329 assert_eq!(rows[2].5, "estimated");
16330 assert_eq!(rows[1].6, "claude");
16331 assert_eq!(rows[1].7, "opus");
16332 assert_eq!(rows[1].8, "anthropic");
16333 assert_eq!(rows[0].6, "unknown");
16334 let user_chars = "Hello, can you help me with a plan?".len() as i64;
16336 assert_eq!(rows[0].3, user_chars / 4);
16337
16338 let (uh_msg, uh_user, uh_asst, uh_plan, uh_plan_content, uh_plan_api, uh_api_cov): (
16340 i64,
16341 i64,
16342 i64,
16343 i64,
16344 i64,
16345 i64,
16346 i64,
16347 ) = conn
16348 .query_row_map(
16349 "SELECT message_count, user_message_count, assistant_message_count, plan_message_count,
16350 plan_content_tokens_est_total, plan_api_tokens_total, api_coverage_message_count
16351 FROM usage_hourly WHERE hour_id = ?",
16352 fparams![expected_hour],
16353 |row: &FrankenRow| {
16354 Ok((
16355 row.get_typed(0)?,
16356 row.get_typed(1)?,
16357 row.get_typed(2)?,
16358 row.get_typed(3)?,
16359 row.get_typed(4)?,
16360 row.get_typed(5)?,
16361 row.get_typed(6)?,
16362 ))
16363 },
16364 )
16365 .unwrap();
16366 assert_eq!(uh_msg, 3, "Hourly rollup should have 3 messages");
16367 assert_eq!(uh_user, 2, "Hourly rollup should have 2 user messages");
16368 assert_eq!(uh_asst, 1, "Hourly rollup should have 1 assistant message");
16369 assert_eq!(uh_plan, 1, "Hourly rollup should have 1 plan message");
16370 assert!(
16371 uh_plan_content > 0,
16372 "Hourly rollup should include plan content tokens"
16373 );
16374 assert!(
16375 uh_plan_api > 0,
16376 "Hourly rollup should include plan API tokens"
16377 );
16378 assert_eq!(
16379 uh_api_cov, 1,
16380 "Hourly rollup should have 1 API-covered message"
16381 );
16382
16383 let (ud_msg, ud_api_cov): (i64, i64) = conn
16385 .query_row_map(
16386 "SELECT message_count, api_coverage_message_count FROM usage_daily WHERE day_id = ?",
16387 fparams![expected_day],
16388 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?)),
16389 )
16390 .unwrap();
16391 assert_eq!(ud_msg, 3, "Daily rollup should match hourly");
16392 assert_eq!(
16393 ud_api_cov, 1,
16394 "Daily api_coverage should be 1 (only assistant msg has real API data)"
16395 );
16396
16397 let api_only_input: i64 = conn
16399 .query_row_map(
16400 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE day_id = ? AND api_data_source = 'api'",
16401 fparams![expected_day],
16402 |row: &FrankenRow| row.get_typed::<i64>(0),
16403 )
16404 .unwrap();
16405 assert_eq!(
16406 api_only_input, 100,
16407 "Only API-sourced input tokens should be 100"
16408 );
16409
16410 let mm_total_content_est: i64 = conn
16412 .query_row_map(
16413 "SELECT SUM(content_tokens_est) FROM message_metrics WHERE day_id = ?",
16414 fparams![expected_day],
16415 |row| row.get_typed::<i64>(0),
16416 )
16417 .unwrap();
16418 let mm_plan_content_est: i64 = conn
16419 .query_row_map(
16420 "SELECT COALESCE(SUM(content_tokens_est), 0) FROM message_metrics WHERE day_id = ? AND has_plan = 1",
16421 fparams![expected_day],
16422 |row: &FrankenRow| row.get_typed::<i64>(0),
16423 )
16424 .unwrap();
16425 let mm_plan_api_total: i64 = conn
16426 .query_row_map(
16427 "SELECT COALESCE(SUM(COALESCE(api_input_tokens, 0) + COALESCE(api_output_tokens, 0) + COALESCE(api_cache_read_tokens, 0) + COALESCE(api_cache_creation_tokens, 0) + COALESCE(api_thinking_tokens, 0)), 0)
16428 FROM message_metrics WHERE day_id = ? AND has_plan = 1 AND api_data_source = 'api'",
16429 fparams![expected_day],
16430 |row: &FrankenRow| row.get_typed::<i64>(0),
16431 )
16432 .unwrap();
16433 let ud_content_est: i64 = conn
16434 .query_row_map(
16435 "SELECT content_tokens_est_total FROM usage_daily WHERE day_id = ?",
16436 fparams![expected_day],
16437 |row| row.get_typed::<i64>(0),
16438 )
16439 .unwrap();
16440 let (ud_plan_content_est, ud_plan_api_total): (i64, i64) = conn
16441 .query_row_map(
16442 "SELECT plan_content_tokens_est_total, plan_api_tokens_total FROM usage_daily WHERE day_id = ?",
16443 fparams![expected_day],
16444 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?)),
16445 )
16446 .unwrap();
16447 assert_eq!(
16448 mm_total_content_est, ud_content_est,
16449 "Daily rollup content_tokens_est_total must equal SUM of message_metrics"
16450 );
16451 assert_eq!(
16452 mm_plan_content_est, ud_plan_content_est,
16453 "Daily rollup plan_content_tokens_est_total must equal planned message_metrics content sum"
16454 );
16455 assert_eq!(
16456 mm_plan_api_total, ud_plan_api_total,
16457 "Daily rollup plan_api_tokens_total must equal planned message_metrics API token sum"
16458 );
16459
16460 let (claude_msg, claude_user, claude_asst, claude_api_total, claude_api_cov): (
16462 i64,
16463 i64,
16464 i64,
16465 i64,
16466 i64,
16467 ) = conn
16468 .query_row_map(
16469 "SELECT message_count, user_message_count, assistant_message_count, api_tokens_total, api_coverage_message_count
16470 FROM usage_models_daily
16471 WHERE day_id = ? AND model_family = 'claude' AND model_tier = 'opus'",
16472 fparams![expected_day],
16473 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?, row.get_typed(3)?, row.get_typed(4)?)),
16474 )
16475 .unwrap();
16476 assert_eq!(claude_msg, 1);
16477 assert_eq!(claude_user, 0);
16478 assert_eq!(claude_asst, 1);
16479 assert_eq!(claude_api_total, 380);
16480 assert_eq!(claude_api_cov, 1);
16481
16482 let unknown_msg: i64 = conn
16483 .query_row_map(
16484 "SELECT message_count FROM usage_models_daily
16485 WHERE day_id = ? AND model_family = 'unknown' AND model_tier = 'unknown'",
16486 fparams![expected_day],
16487 |row| row.get_typed(0),
16488 )
16489 .unwrap();
16490 assert_eq!(
16491 unknown_msg, 2,
16492 "user messages should land in unknown model bucket"
16493 );
16494 }
16495
16496 #[test]
16497 fn has_plan_heuristic_detects_plans() {
16498 assert!(has_plan_heuristic(
16499 "## Plan\n\n1. First step\n2. Second step"
16500 ));
16501 assert!(has_plan_heuristic(
16502 "# Plan\nHere is what we will do:\n1. Step one\n2. Step two"
16503 ));
16504 assert!(has_plan_heuristic(
16505 "Plan:\n- Gather baseline\n- Implement changes\n- Validate with tests"
16506 ));
16507 assert!(has_plan_heuristic(
16508 "Next steps:\n1. Update schema\n2. Rebuild rollups"
16509 ));
16510 assert!(!has_plan_heuristic("Hello world"));
16511 assert!(!has_plan_heuristic("Short"));
16512 assert!(!has_plan_heuristic(
16513 "This is a regular message without plans"
16514 ));
16515 assert!(!has_plan_heuristic(
16516 "```json\n{\"tool\":\"shell\",\"stdout\":\"1. install\\n2. run\"}\n```"
16517 ));
16518 }
16519
16520 #[test]
16521 fn has_plan_for_role_only_counts_assistant_messages() {
16522 let plan_text = "## Plan\n1. First\n2. Second";
16523 assert!(has_plan_for_role("assistant", plan_text));
16524 assert!(has_plan_for_role("agent", plan_text));
16525 assert!(has_plan_for_role("Assistant", plan_text));
16526 assert!(!has_plan_for_role("user", plan_text));
16527 assert!(!has_plan_for_role("tool", plan_text));
16528 }
16529
16530 #[test]
16531 fn api_rollups_require_api_data_source() {
16532 let mut agg = AnalyticsRollupAggregator::new();
16533
16534 let estimated_plan = MessageMetricsEntry {
16535 message_id: 1,
16536 created_at_ms: 0,
16537 hour_id: 1,
16538 day_id: 1,
16539 agent_slug: "codex".into(),
16540 workspace_id: 0,
16541 source_id: "local".into(),
16542 role: "assistant".into(),
16543 content_chars: 120,
16544 content_tokens_est: 30,
16545 model_name: None,
16546 model_family: "unknown".into(),
16547 model_tier: "unknown".into(),
16548 provider: "unknown".into(),
16549 api_input_tokens: Some(100),
16550 api_output_tokens: Some(50),
16551 api_cache_read_tokens: Some(0),
16552 api_cache_creation_tokens: Some(0),
16553 api_thinking_tokens: Some(0),
16554 api_service_tier: None,
16555 api_data_source: "estimated".into(),
16556 tool_call_count: 0,
16557 has_tool_calls: false,
16558 has_plan: true,
16559 };
16560 agg.record(&estimated_plan);
16561
16562 let api_plan = MessageMetricsEntry {
16563 message_id: 2,
16564 created_at_ms: 0,
16565 hour_id: 1,
16566 day_id: 1,
16567 agent_slug: "codex".into(),
16568 workspace_id: 0,
16569 source_id: "local".into(),
16570 role: "assistant".into(),
16571 content_chars: 80,
16572 content_tokens_est: 20,
16573 model_name: None,
16574 model_family: "unknown".into(),
16575 model_tier: "unknown".into(),
16576 provider: "unknown".into(),
16577 api_input_tokens: Some(40),
16578 api_output_tokens: Some(10),
16579 api_cache_read_tokens: Some(0),
16580 api_cache_creation_tokens: Some(0),
16581 api_thinking_tokens: Some(0),
16582 api_service_tier: None,
16583 api_data_source: "api".into(),
16584 tool_call_count: 0,
16585 has_tool_calls: false,
16586 has_plan: true,
16587 };
16588 agg.record(&api_plan);
16589
16590 let key = (1_i64, "codex".to_string(), 0_i64, "local".to_string());
16591 let hourly = agg.hourly.get(&key).expect("hourly rollup key must exist");
16592 let daily = agg.daily.get(&key).expect("daily rollup key must exist");
16593 let model_key = (
16594 1_i64,
16595 "codex".to_string(),
16596 0_i64,
16597 "local".to_string(),
16598 "unknown".to_string(),
16599 "unknown".to_string(),
16600 );
16601 let models_daily = agg
16602 .models_daily
16603 .get(&model_key)
16604 .expect("model rollup key must exist");
16605
16606 assert_eq!(hourly.plan_message_count, 2);
16608 assert_eq!(hourly.plan_content_tokens_est_total, 50);
16609 assert_eq!(hourly.plan_api_tokens_total, 50);
16611 assert_eq!(daily.plan_api_tokens_total, 50);
16612 assert_eq!(models_daily.plan_api_tokens_total, 50);
16613 assert_eq!(hourly.api_tokens_total, 50);
16615 assert_eq!(hourly.api_input_tokens_total, 40);
16616 assert_eq!(hourly.api_output_tokens_total, 10);
16617 assert_eq!(hourly.api_coverage_message_count, 1);
16618 assert_eq!(daily.api_tokens_total, 50);
16619 assert_eq!(models_daily.api_tokens_total, 50);
16620 }
16621
16622 #[test]
16623 fn has_plan_heuristic_curated_corpus_thresholds() {
16624 let positives = [
16626 "## Plan\n1. Inspect current schema\n2. Add migration\n3. Verify rebuild",
16627 "Plan:\n1) Reproduce\n2) Patch\n3) Add tests",
16628 "Implementation plan:\n- Parse inputs\n- Update rollups\n- Run checks",
16629 "Next steps:\n1. Reserve file\n2. Implement\n3. Report status",
16630 "# Plan\n1. Gather requirements\n2. Ship changes",
16631 "Action plan:\n- Identify root cause\n- Fix it\n- Validate",
16632 ];
16633
16634 let negatives = [
16636 "The plan is to move fast and fix things later.",
16637 "```json\n{\"tool\":\"shell\",\"stdout\":\"1. ls\\n2. cat\"}\n```",
16638 "stdout:\n1. Build started\n2. Build finished\nexit code: 0",
16639 "I can help with that request. Let me know if you want details.",
16640 "Here is a list:\n- apples\n- oranges",
16641 "Status update: completed tasks and blockers below.",
16642 ];
16643
16644 let tp = positives
16645 .iter()
16646 .filter(|msg| has_plan_heuristic(msg))
16647 .count();
16648 let fp = negatives
16649 .iter()
16650 .filter(|msg| has_plan_heuristic(msg))
16651 .count();
16652
16653 let recall = tp as f64 / positives.len() as f64;
16654 let false_positive_rate = fp as f64 / negatives.len() as f64;
16655
16656 assert!(
16657 recall >= 0.80,
16658 "plan heuristic recall too low: got {recall:.2}"
16659 );
16660 assert!(
16661 false_positive_rate <= 0.20,
16662 "plan heuristic false-positive rate too high: got {false_positive_rate:.2}"
16663 );
16664 }
16665
16666 #[test]
16667 fn rebuild_analytics_repopulates_from_messages() {
16668 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
16669 use std::path::PathBuf;
16670
16671 let dir = TempDir::new().unwrap();
16672 let db_path = dir.path().join("test.db");
16673 let storage = SqliteStorage::open(&db_path).unwrap();
16674
16675 let agent = Agent {
16677 id: None,
16678 slug: "claude_code".into(),
16679 name: "Claude Code".into(),
16680 version: Some("1.0".into()),
16681 kind: AgentKind::Cli,
16682 };
16683 let agent_id = storage.ensure_agent(&agent).unwrap();
16684
16685 let ts_ms = 1_770_551_400_000_i64;
16687 let expected_day = SqliteStorage::day_id_from_millis(ts_ms);
16688 let expected_hour = SqliteStorage::hour_id_from_millis(ts_ms);
16689
16690 let usage_json = serde_json::json!({
16691 "message": {
16692 "model": "claude-opus-4-6",
16693 "usage": {
16694 "input_tokens": 100,
16695 "output_tokens": 50,
16696 "cache_read_input_tokens": 200,
16697 "cache_creation_input_tokens": 30,
16698 "service_tier": "standard"
16699 }
16700 }
16701 });
16702
16703 let conv = Conversation {
16704 id: None,
16705 agent_slug: "claude_code".into(),
16706 workspace: None,
16707 external_id: Some("test-rebuild-1".into()),
16708 title: Some("Test conversation".into()),
16709 source_path: PathBuf::from("/tmp/test.jsonl"),
16710 started_at: Some(ts_ms),
16711 ended_at: Some(ts_ms + 60_000),
16712 approx_tokens: None,
16713 metadata_json: serde_json::Value::Null,
16714 messages: vec![
16715 Message {
16716 id: None,
16717 idx: 0,
16718 role: MessageRole::User,
16719 author: None,
16720 created_at: Some(ts_ms),
16721 content: "Hello, can you help me with a plan?".into(),
16722 extra_json: serde_json::Value::Null,
16723 snippets: vec![],
16724 },
16725 Message {
16726 id: None,
16727 idx: 1,
16728 role: MessageRole::Agent,
16729 author: None,
16730 created_at: Some(ts_ms + 30_000),
16731 content: "## Plan\n\n1. First step\n2. Second step\n3. Third step".into(),
16732 extra_json: usage_json,
16733 snippets: vec![],
16734 },
16735 Message {
16736 id: None,
16737 idx: 2,
16738 role: MessageRole::User,
16739 author: None,
16740 created_at: Some(ts_ms + 60_000),
16741 content: "Great, let's proceed!".into(),
16742 extra_json: serde_json::Value::Null,
16743 snippets: vec![],
16744 },
16745 ],
16746 source_id: "local".into(),
16747 origin_host: None,
16748 };
16749
16750 storage
16751 .insert_conversations_batched(&[(agent_id, None, &conv)])
16752 .unwrap();
16753
16754 let conn = storage.raw();
16756 let orig_mm: i64 = conn
16757 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
16758 row.get_typed(0)
16759 })
16760 .unwrap();
16761 let orig_hourly: i64 = conn
16762 .query_row_map("SELECT COUNT(*) FROM usage_hourly", &[], |row| {
16763 row.get_typed(0)
16764 })
16765 .unwrap();
16766 let orig_daily: i64 = conn
16767 .query_row_map("SELECT COUNT(*) FROM usage_daily", &[], |row| {
16768 row.get_typed(0)
16769 })
16770 .unwrap();
16771 let orig_models_daily: i64 = conn
16772 .query_row_map("SELECT COUNT(*) FROM usage_models_daily", &[], |row| {
16773 row.get_typed(0)
16774 })
16775 .unwrap();
16776 let orig_api_input: i64 = conn
16777 .query_row_map(
16778 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE api_data_source = 'api'",
16779 &[],
16780 |row: &FrankenRow| row.get_typed(0),
16781 )
16782 .unwrap();
16783
16784 assert_eq!(orig_mm, 3);
16785 assert!(orig_hourly > 0);
16786 assert!(orig_daily > 0);
16787 assert!(orig_models_daily > 0);
16788
16789 conn.execute("DELETE FROM message_metrics").unwrap();
16791 conn.execute("DELETE FROM usage_hourly").unwrap();
16792 conn.execute("DELETE FROM usage_daily").unwrap();
16793 conn.execute("DELETE FROM usage_models_daily").unwrap();
16794
16795 let zero: i64 = conn
16797 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
16798 row.get_typed(0)
16799 })
16800 .unwrap();
16801 assert_eq!(zero, 0);
16802
16803 let result = storage.rebuild_analytics().unwrap();
16805
16806 assert_eq!(result.message_metrics_rows, 3);
16807 assert!(result.usage_hourly_rows > 0);
16808 assert!(result.usage_daily_rows > 0);
16809 assert!(result.usage_models_daily_rows > 0);
16810 assert!(
16811 result.elapsed_ms < 10_000,
16812 "Rebuild should be fast for 3 msgs"
16813 );
16814
16815 let conn = storage.raw();
16817 let rebuilt_mm: i64 = conn
16818 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
16819 row.get_typed(0)
16820 })
16821 .unwrap();
16822 assert_eq!(
16823 rebuilt_mm, orig_mm,
16824 "Rebuilt message_metrics count should match"
16825 );
16826
16827 let rebuilt_hourly: i64 = conn
16828 .query_row_map("SELECT COUNT(*) FROM usage_hourly", &[], |row| {
16829 row.get_typed(0)
16830 })
16831 .unwrap();
16832 assert_eq!(
16833 rebuilt_hourly, orig_hourly,
16834 "Rebuilt hourly rows should match"
16835 );
16836
16837 let rebuilt_daily: i64 = conn
16838 .query_row_map("SELECT COUNT(*) FROM usage_daily", &[], |row| {
16839 row.get_typed(0)
16840 })
16841 .unwrap();
16842 assert_eq!(rebuilt_daily, orig_daily, "Rebuilt daily rows should match");
16843
16844 let rebuilt_models_daily: i64 = conn
16845 .query_row_map("SELECT COUNT(*) FROM usage_models_daily", &[], |row| {
16846 row.get_typed(0)
16847 })
16848 .unwrap();
16849 assert_eq!(
16850 rebuilt_models_daily, orig_models_daily,
16851 "Rebuilt model rollup rows should match"
16852 );
16853
16854 let rebuilt_api_input: i64 = conn
16856 .query_row_map(
16857 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE api_data_source = 'api'",
16858 &[],
16859 |row: &FrankenRow| row.get_typed(0),
16860 )
16861 .unwrap();
16862 assert_eq!(
16863 rebuilt_api_input, orig_api_input,
16864 "Rebuilt API input tokens should match original"
16865 );
16866
16867 let (uh_msg, uh_user, uh_asst, uh_plan, uh_plan_content, uh_plan_api): (
16869 i64,
16870 i64,
16871 i64,
16872 i64,
16873 i64,
16874 i64,
16875 ) = conn
16876 .query_row_map(
16877 "SELECT message_count, user_message_count, assistant_message_count, plan_message_count,
16878 plan_content_tokens_est_total, plan_api_tokens_total
16879 FROM usage_hourly WHERE hour_id = ?",
16880 fparams![expected_hour],
16881 |row: &FrankenRow| {
16882 Ok((
16883 row.get_typed(0)?,
16884 row.get_typed(1)?,
16885 row.get_typed(2)?,
16886 row.get_typed(3)?,
16887 row.get_typed(4)?,
16888 row.get_typed(5)?,
16889 ))
16890 },
16891 )
16892 .unwrap();
16893 assert_eq!(uh_msg, 3);
16894 assert_eq!(uh_user, 2);
16895 assert_eq!(uh_asst, 1);
16896 assert_eq!(uh_plan, 1);
16897 assert!(uh_plan_content > 0);
16898 assert!(uh_plan_api > 0);
16899
16900 let ud_msg: i64 = conn
16901 .query_row_map(
16902 "SELECT message_count FROM usage_daily WHERE day_id = ?",
16903 fparams![expected_day],
16904 |row| row.get_typed(0),
16905 )
16906 .unwrap();
16907 assert_eq!(ud_msg, 3);
16908 }
16909
16910 #[test]
16911 fn insert_conversations_batched_flushes_large_fts_batches() {
16912 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
16913 use std::path::PathBuf;
16914
16915 let dir = TempDir::new().unwrap();
16916 let db_path = dir.path().join("test.db");
16917 let storage = SqliteStorage::open(&db_path).unwrap();
16918 storage
16923 .ensure_search_fallback_fts_consistency()
16924 .expect("ensure FTS consistency before insert");
16925
16926 let agent = Agent {
16927 id: None,
16928 slug: "codex".into(),
16929 name: "Codex".into(),
16930 version: Some("0.2.3".into()),
16931 kind: AgentKind::Cli,
16932 };
16933 let agent_id = storage.ensure_agent(&agent).unwrap();
16934
16935 let content = "y".repeat((FTS_ENTRY_BATCH_MAX_CHARS / 2) + 1);
16936 let messages: Vec<_> = (0_i64..2)
16937 .map(|i| Message {
16938 id: None,
16939 idx: i,
16940 role: MessageRole::Agent,
16941 author: None,
16942 created_at: Some(1_700_000_000_000 + i),
16943 content: format!("{i}-{content}"),
16944 extra_json: serde_json::Value::Null,
16945 snippets: Vec::new(),
16946 })
16947 .collect();
16948 let conv = Conversation {
16949 id: None,
16950 agent_slug: "codex".into(),
16951 workspace: Some(PathBuf::from("/tmp/workspace")),
16952 external_id: Some("fts-large-batch".into()),
16953 title: Some("FTS Large Batch".into()),
16954 source_path: PathBuf::from("/tmp/rollout.jsonl"),
16955 started_at: Some(1_700_000_000_000),
16956 ended_at: Some(1_700_000_000_999),
16957 approx_tokens: None,
16958 metadata_json: serde_json::Value::Null,
16959 messages,
16960 source_id: "local".into(),
16961 origin_host: None,
16962 };
16963
16964 let outcomes = storage
16965 .insert_conversations_batched(&[(agent_id, None, &conv)])
16966 .unwrap();
16967 assert_eq!(outcomes.len(), 1);
16968 assert_eq!(outcomes[0].inserted_indices.len(), conv.messages.len());
16969
16970 let message_count: i64 = storage
16971 .conn
16972 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
16973 row.get_typed(0)
16974 })
16975 .unwrap();
16976 let fts_count: i64 = storage
16977 .conn
16978 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
16979 row.get_typed(0)
16980 })
16981 .unwrap();
16982
16983 assert_eq!(message_count, conv.messages.len() as i64);
16984 assert_eq!(fts_count, conv.messages.len() as i64);
16985 }
16986
16987 fn make_profiled_storage_remote_conversation(
16988 external_id: i64,
16989 msg_count: usize,
16990 ) -> Conversation {
16991 Conversation {
16992 id: None,
16993 agent_slug: "codex".into(),
16994 workspace: Some(PathBuf::from("/ws/profiled-storage-remote")),
16995 external_id: Some(format!("profiled-storage-remote-{external_id}")),
16996 title: Some(format!(
16997 "Profiled storage remote conversation {external_id}"
16998 )),
16999 source_path: PathBuf::from(format!("/log/profiled-storage-remote-{external_id}.jsonl")),
17000 started_at: Some(10_000 + external_id * 100),
17001 ended_at: Some(10_000 + external_id * 100 + msg_count as i64),
17002 approx_tokens: Some(msg_count as i64 * 32),
17003 metadata_json: serde_json::json!({ "bench": true }),
17004 messages: (0..msg_count)
17005 .map(|idx| Message {
17006 id: None,
17007 idx: idx as i64,
17008 role: if idx % 2 == 0 {
17009 MessageRole::User
17010 } else {
17011 MessageRole::Agent
17012 },
17013 author: Some("tester".into()),
17014 created_at: Some(20_000 + external_id * 100 + idx as i64),
17015 content: format!(
17016 "profiled storage remote content ext={external_id} idx={idx} {}",
17017 "x".repeat(64)
17018 ),
17019 extra_json: serde_json::json!({ "idx": idx }),
17020 snippets: Vec::new(),
17021 })
17022 .collect(),
17023 source_id: "profiled-storage-remote-source".into(),
17024 origin_host: Some("builder-profile".into()),
17025 }
17026 }
17027
17028 fn make_profiled_append_remote_merge_conversation(
17029 external_id: i64,
17030 msg_count: usize,
17031 ) -> Conversation {
17032 let base_ts = 100_000 + external_id * 1_000;
17033 Conversation {
17034 id: None,
17035 agent_slug: "codex".into(),
17036 workspace: Some(PathBuf::from("/ws/profiled-append-remote")),
17037 external_id: Some(format!("profiled-append-remote-{external_id}")),
17038 title: Some(format!("Profiled append remote conversation {external_id}")),
17039 source_path: PathBuf::from(format!("/log/profiled-append-remote-{external_id}.jsonl")),
17040 started_at: Some(base_ts),
17041 ended_at: Some(base_ts + msg_count as i64),
17042 approx_tokens: Some(msg_count as i64 * 50),
17043 metadata_json: serde_json::json!({ "bench": true }),
17044 messages: (0..msg_count)
17045 .map(|idx| Message {
17046 id: None,
17047 idx: idx as i64,
17048 role: if idx % 2 == 0 {
17049 MessageRole::User
17050 } else {
17051 MessageRole::Agent
17052 },
17053 author: Some(format!("model-{}", external_id % 5)),
17054 created_at: Some(base_ts + idx as i64),
17055 content: format!(
17056 "Profiled append remote conversation {} message {}: Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
17057 external_id, idx
17058 ),
17059 extra_json: serde_json::json!({ "bench": true }),
17060 snippets: Vec::new(),
17061 })
17062 .collect(),
17063 source_id: "profiled-append-remote-source".into(),
17064 origin_host: Some("builder-profile".into()),
17065 }
17066 }
17067
17068 #[test]
17069 fn insert_conversation_tree_batched_new_message_ids_match_snippet_rows() {
17070 let dir = TempDir::new().unwrap();
17071 let db_path = dir.path().join("batched-message-ids.db");
17072 let storage = SqliteStorage::open(&db_path).unwrap();
17073 let agent_id = storage
17074 .ensure_agent(&Agent {
17075 id: None,
17076 slug: "codex".into(),
17077 name: "Codex".into(),
17078 version: None,
17079 kind: AgentKind::Cli,
17080 })
17081 .unwrap();
17082 let workspace_id = storage
17083 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17084 .unwrap();
17085 let mut conv = make_profiled_storage_remote_conversation(42, 5);
17086 for (idx, msg) in conv.messages.iter_mut().enumerate() {
17087 msg.snippets.push(Snippet {
17088 id: None,
17089 file_path: Some(PathBuf::from(format!("src/file_{idx}.rs"))),
17090 start_line: Some((idx + 1) as i64),
17091 end_line: Some((idx + 2) as i64),
17092 language: Some("rust".into()),
17093 snippet_text: Some(format!("fn snippet_{idx}() {{}}")),
17094 });
17095 }
17096 let outcome = storage
17097 .insert_conversation_tree(agent_id, Some(workspace_id), &conv)
17098 .unwrap();
17099
17100 let message_count: i64 = storage
17101 .conn
17102 .query_row_map(
17103 "SELECT COUNT(*) FROM messages WHERE conversation_id = ?1",
17104 fparams![outcome.conversation_id],
17105 |row| row.get_typed(0),
17106 )
17107 .unwrap();
17108 let joined_snippet_count: i64 = storage
17109 .conn
17110 .query_row_map(
17111 "SELECT COUNT(*)
17112 FROM snippets s
17113 JOIN messages m ON s.message_id = m.id
17114 WHERE m.conversation_id = ?1",
17115 fparams![outcome.conversation_id],
17116 |row| row.get_typed(0),
17117 )
17118 .unwrap();
17119
17120 assert_eq!(message_count, conv.messages.len() as i64);
17121 assert_eq!(joined_snippet_count, conv.messages.len() as i64);
17122 }
17123
17124 #[test]
17125 fn insert_conversation_tree_batched_appended_message_ids_match_snippet_rows() {
17126 let dir = TempDir::new().unwrap();
17127 let db_path = dir.path().join("batched-append-message-ids.db");
17128 let storage = SqliteStorage::open(&db_path).unwrap();
17129 let agent_id = storage
17130 .ensure_agent(&Agent {
17131 id: None,
17132 slug: "codex".into(),
17133 name: "Codex".into(),
17134 version: None,
17135 kind: AgentKind::Cli,
17136 })
17137 .unwrap();
17138 let workspace_id = storage
17139 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17140 .unwrap();
17141
17142 let mut initial = make_profiled_storage_remote_conversation(77, 2);
17143 for (idx, msg) in initial.messages.iter_mut().enumerate() {
17144 msg.snippets.push(Snippet {
17145 id: None,
17146 file_path: Some(PathBuf::from(format!("src/append_initial_{idx}.rs"))),
17147 start_line: Some((idx + 1) as i64),
17148 end_line: Some((idx + 2) as i64),
17149 language: Some("rust".into()),
17150 snippet_text: Some(format!("fn append_initial_{idx}() {{}}")),
17151 });
17152 }
17153 let first = storage
17154 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
17155 .unwrap();
17156 assert_eq!(first.inserted_indices, vec![0, 1]);
17157
17158 let mut appended = make_profiled_storage_remote_conversation(77, 5);
17159 for (idx, msg) in appended.messages.iter_mut().enumerate() {
17160 msg.snippets.push(Snippet {
17161 id: None,
17162 file_path: Some(PathBuf::from(format!("src/append_full_{idx}.rs"))),
17163 start_line: Some((idx + 10) as i64),
17164 end_line: Some((idx + 11) as i64),
17165 language: Some("rust".into()),
17166 snippet_text: Some(format!("fn append_full_{idx}() {{}}")),
17167 });
17168 }
17169 let second = storage
17170 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
17171 .unwrap();
17172 assert_eq!(second.conversation_id, first.conversation_id);
17173 assert_eq!(second.inserted_indices, vec![2, 3, 4]);
17174
17175 let message_count: i64 = storage
17176 .conn
17177 .query_row_map(
17178 "SELECT COUNT(*) FROM messages WHERE conversation_id = ?1",
17179 fparams![first.conversation_id],
17180 |row| row.get_typed(0),
17181 )
17182 .unwrap();
17183 let joined_snippets: Vec<(i64, String)> = storage
17184 .conn
17185 .query_map_collect(
17186 "SELECT m.idx, s.file_path
17187 FROM snippets s
17188 JOIN messages m ON s.message_id = m.id
17189 WHERE m.conversation_id = ?1
17190 ORDER BY m.idx, s.id",
17191 fparams![first.conversation_id],
17192 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
17193 )
17194 .unwrap();
17195
17196 assert_eq!(message_count, 5);
17197 assert_eq!(
17198 joined_snippets,
17199 vec![
17200 (0, "src/append_initial_0.rs".to_string()),
17201 (1, "src/append_initial_1.rs".to_string()),
17202 (2, "src/append_full_2.rs".to_string()),
17203 (3, "src/append_full_3.rs".to_string()),
17204 (4, "src/append_full_4.rs".to_string()),
17205 ]
17206 );
17207 }
17208
17209 #[test]
17210 fn insert_conversation_tree_rehydrates_external_lookup_after_manual_clear() {
17211 let dir = TempDir::new().unwrap();
17212 let db_path = dir.path().join("external-lookup-rehydrate.db");
17213 let storage = SqliteStorage::open(&db_path).unwrap();
17214 let agent_id = storage
17215 .ensure_agent(&Agent {
17216 id: None,
17217 slug: "codex".into(),
17218 name: "Codex".into(),
17219 version: None,
17220 kind: AgentKind::Cli,
17221 })
17222 .unwrap();
17223 let workspace_id = storage
17224 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17225 .unwrap();
17226
17227 let initial = make_profiled_storage_remote_conversation(88, 2);
17228 let first = storage
17229 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
17230 .unwrap();
17231 let external_id = initial.external_id.as_deref().unwrap();
17232 let lookup_key =
17233 conversation_external_lookup_key(&initial.source_id, agent_id, external_id);
17234 let lookup_id: i64 = storage
17235 .conn
17236 .query_row_map(
17237 "SELECT conversation_id
17238 FROM conversation_external_tail_lookup
17239 WHERE lookup_key = ?1",
17240 fparams![lookup_key.as_str()],
17241 |row| row.get_typed(0),
17242 )
17243 .unwrap();
17244 assert_eq!(lookup_id, first.conversation_id);
17245
17246 storage
17247 .conn
17248 .execute_compat(
17249 "DELETE FROM conversation_external_tail_lookup WHERE lookup_key = ?1",
17250 fparams![lookup_key.as_str()],
17251 )
17252 .unwrap();
17253
17254 let appended = make_profiled_storage_remote_conversation(88, 4);
17255 let second = storage
17256 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
17257 .unwrap();
17258 assert_eq!(second.conversation_id, first.conversation_id);
17259 assert_eq!(second.inserted_indices, vec![2, 3]);
17260
17261 let conversation_count: i64 = storage
17262 .conn
17263 .query_row_map(
17264 "SELECT COUNT(*)
17265 FROM conversations
17266 WHERE source_id = ?1 AND agent_id = ?2 AND external_id = ?3",
17267 fparams![initial.source_id.as_str(), agent_id, external_id],
17268 |row| row.get_typed(0),
17269 )
17270 .unwrap();
17271 let restored_lookup: (i64, Option<i64>, Option<i64>, Option<i64>) = storage
17272 .conn
17273 .query_row_map(
17274 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
17275 FROM conversation_external_tail_lookup
17276 WHERE lookup_key = ?1",
17277 fparams![lookup_key.as_str()],
17278 |row| {
17279 Ok((
17280 row.get_typed(0)?,
17281 row.get_typed(1)?,
17282 row.get_typed(2)?,
17283 row.get_typed(3)?,
17284 ))
17285 },
17286 )
17287 .unwrap();
17288 let tail_state: (Option<i64>, Option<i64>, Option<i64>) = storage
17289 .conn
17290 .query_row_map(
17291 "SELECT ended_at, last_message_idx, last_message_created_at
17292 FROM conversation_tail_state
17293 WHERE conversation_id = ?1",
17294 fparams![first.conversation_id],
17295 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
17296 )
17297 .unwrap();
17298 assert_eq!(conversation_count, 1);
17299 assert_eq!(
17300 restored_lookup,
17301 (
17302 first.conversation_id,
17303 tail_state.0,
17304 tail_state.1,
17305 tail_state.2
17306 )
17307 );
17308 assert_eq!(
17309 tail_state,
17310 (
17311 appended.messages[3].created_at,
17312 Some(3),
17313 appended.messages[3].created_at
17314 )
17315 );
17316 }
17317
17318 #[test]
17319 fn insert_conversation_tree_recreates_daily_stats_after_manual_clear() {
17320 let dir = TempDir::new().unwrap();
17321 let db_path = dir.path().join("test.db");
17322 let storage = SqliteStorage::open(&db_path).unwrap();
17323 let agent_id = storage
17324 .ensure_agent(&Agent {
17325 id: None,
17326 slug: "codex".into(),
17327 name: "Codex".into(),
17328 version: None,
17329 kind: AgentKind::Cli,
17330 })
17331 .unwrap();
17332 let workspace = PathBuf::from("/ws/profiled-storage-remote");
17333 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17334
17335 storage
17336 .insert_conversation_tree(
17337 agent_id,
17338 Some(workspace_id),
17339 &make_profiled_storage_remote_conversation(0, 3),
17340 )
17341 .unwrap();
17342 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17343
17344 storage
17345 .insert_conversation_tree(
17346 agent_id,
17347 Some(workspace_id),
17348 &make_profiled_storage_remote_conversation(1, 2),
17349 )
17350 .unwrap();
17351
17352 let row_count: i64 = storage
17353 .conn
17354 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
17355 row.get_typed(0)
17356 })
17357 .unwrap();
17358 let (session_count, message_count): (i64, i64) = storage
17359 .conn
17360 .query_row_map(
17361 "SELECT session_count, message_count
17362 FROM daily_stats
17363 WHERE agent_slug = 'all' AND source_id = 'all'",
17364 fparams![],
17365 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
17366 )
17367 .unwrap();
17368
17369 assert_eq!(row_count, 4);
17370 assert_eq!(session_count, 1);
17371 assert_eq!(message_count, 2);
17372 }
17373
17374 #[test]
17375 #[serial]
17376 fn insert_conversation_tree_stage_profile_tracks_steady_state_remote_reuse() {
17377 let _defer_guard = set_env_var("CASS_DEFER_LEXICAL_UPDATES", "0");
17378
17379 for &(msg_count, iterations) in &[(5usize, 80usize), (20, 50), (50, 24)] {
17380 let dir = TempDir::new().unwrap();
17381 let db_path = dir.path().join(format!("profile-{msg_count}.db"));
17382 let storage = SqliteStorage::open(&db_path).unwrap();
17383 let agent_id = storage
17384 .ensure_agent(&Agent {
17385 id: None,
17386 slug: "codex".into(),
17387 name: "Codex".into(),
17388 version: None,
17389 kind: AgentKind::Cli,
17390 })
17391 .unwrap();
17392 let workspace = PathBuf::from("/ws/profiled-storage-remote");
17393 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17394
17395 storage
17396 .insert_conversation_tree(
17397 agent_id,
17398 Some(workspace_id),
17399 &make_profiled_storage_remote_conversation(0, msg_count),
17400 )
17401 .unwrap();
17402
17403 let mut profile = InsertConversationTreePerfProfile::default();
17404 for external_id in 1..=iterations {
17405 storage
17406 .insert_conversation_tree_with_profile(
17407 agent_id,
17408 Some(workspace_id),
17409 &make_profiled_storage_remote_conversation(external_id as i64, msg_count),
17410 &mut profile,
17411 )
17412 .unwrap();
17413 }
17414
17415 let accounted_duration = profile.source_duration
17416 + profile.tx_open_duration
17417 + profile.existing_lookup_duration
17418 + profile.conversation_row_duration
17419 + profile.message_insert_duration
17420 + profile.snippet_insert_duration
17421 + profile.fts_entry_duration
17422 + profile.fts_flush_duration
17423 + profile.analytics_duration
17424 + profile.commit_duration;
17425 assert_eq!(profile.invocations, iterations);
17426 assert_eq!(profile.messages, iterations * msg_count);
17427 assert_eq!(profile.inserted_messages, iterations * msg_count);
17428 assert!(
17429 profile.total_duration >= accounted_duration,
17430 "accounted stage durations cannot exceed total duration"
17431 );
17432
17433 profile.log_summary(&format!("remote_reuse_{msg_count}_msgs"));
17434 }
17435 }
17436
17437 #[test]
17438 #[serial]
17439 fn insert_conversation_tree_stage_profile_tracks_append_remote_source_merge() {
17440 let _defer_guard = set_env_var("CASS_DEFER_LEXICAL_UPDATES", "0");
17441
17442 for &(msg_count, iterations) in &[(5usize, 80usize), (20, 50), (50, 24)] {
17443 let dir = TempDir::new().unwrap();
17444 let db_path = dir.path().join(format!("append-profile-{msg_count}.db"));
17445 let storage = SqliteStorage::open(&db_path).unwrap();
17446 let agent_id = storage
17447 .ensure_agent(&Agent {
17448 id: None,
17449 slug: "codex".into(),
17450 name: "Codex".into(),
17451 version: None,
17452 kind: AgentKind::Cli,
17453 })
17454 .unwrap();
17455 let workspace = PathBuf::from("/ws/profiled-append-remote");
17456 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17457
17458 for external_id in 0..iterations {
17459 storage
17460 .insert_conversation_tree(
17461 agent_id,
17462 Some(workspace_id),
17463 &make_profiled_append_remote_merge_conversation(
17464 external_id as i64,
17465 msg_count,
17466 ),
17467 )
17468 .unwrap();
17469 }
17470
17471 let mut profile = InsertConversationTreePerfProfile::default();
17472 for external_id in 0..iterations {
17473 storage
17474 .append_existing_conversation_with_profile(
17475 agent_id,
17476 Some(workspace_id),
17477 &make_profiled_append_remote_merge_conversation(
17478 external_id as i64,
17479 msg_count * 2,
17480 ),
17481 &mut profile,
17482 )
17483 .unwrap();
17484 }
17485
17486 let accounted_duration = profile.source_duration
17487 + profile.tx_open_duration
17488 + profile.existing_lookup_duration
17489 + profile.existing_idx_lookup_duration
17490 + profile.existing_replay_lookup_duration
17491 + profile.dedupe_filter_duration
17492 + profile.conversation_row_duration
17493 + profile.message_insert_duration
17494 + profile.snippet_insert_duration
17495 + profile.fts_entry_duration
17496 + profile.fts_flush_duration
17497 + profile.analytics_duration
17498 + profile.commit_duration;
17499 assert_eq!(profile.invocations, iterations);
17500 assert_eq!(profile.messages, iterations * msg_count * 2);
17501 assert_eq!(profile.inserted_messages, iterations * msg_count);
17502 assert!(
17503 profile.total_duration >= accounted_duration,
17504 "accounted append stage durations cannot exceed total duration"
17505 );
17506
17507 profile.log_summary(&format!("append_remote_merge_{msg_count}_msgs"));
17508 }
17509 }
17510
17511 #[test]
17512 fn rebuild_daily_stats_recomputes_materialized_totals_without_monolithic_group_by() {
17513 let dir = TempDir::new().unwrap();
17514 let db_path = dir.path().join("test.db");
17515 let storage = SqliteStorage::open(&db_path).unwrap();
17516 let started_at = 1_700_000_000_000_i64;
17517 let day_id = FrankenStorage::day_id_from_millis(started_at);
17518 let hour_id = FrankenStorage::hour_id_from_millis(started_at);
17519
17520 storage
17521 .conn
17522 .execute_compat(
17523 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17524 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17525 fparams![1_i64, "codex", "Codex", "cli"],
17526 )
17527 .unwrap();
17528 storage
17529 .conn
17530 .execute_compat(
17531 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17532 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17533 fparams![2_i64, "claude", "Claude", "cli"],
17534 )
17535 .unwrap();
17536
17537 storage
17538 .conn
17539 .execute_compat(
17540 "INSERT INTO conversations (
17541 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17542 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17543 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, ?8, NULL, ?9, NULL, NULL)",
17544 fparams![
17545 1_i64,
17546 1_i64,
17547 LOCAL_SOURCE_ID,
17548 "daily-a",
17549 "Daily A",
17550 "/tmp/daily-a.jsonl",
17551 started_at,
17552 started_at + 200,
17553 "{}"
17554 ],
17555 )
17556 .unwrap();
17557 storage
17558 .conn
17559 .execute_compat(
17560 "INSERT INTO conversations (
17561 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17562 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17563 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, ?8, NULL, ?9, NULL, NULL)",
17564 fparams![
17565 2_i64,
17566 2_i64,
17567 LOCAL_SOURCE_ID,
17568 "daily-b",
17569 "Daily B",
17570 "/tmp/daily-b.jsonl",
17571 started_at,
17572 started_at + 300,
17573 "{}"
17574 ],
17575 )
17576 .unwrap();
17577
17578 storage
17579 .conn
17580 .execute_compat(
17581 "INSERT INTO messages (
17582 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17583 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17584 fparams![1_i64, 1_i64, 0_i64, "user", started_at, "hello"],
17585 )
17586 .unwrap();
17587 storage
17588 .conn
17589 .execute_compat(
17590 "INSERT INTO messages (
17591 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17592 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17593 fparams![2_i64, 1_i64, 1_i64, "assistant", started_at + 100, "response"],
17594 )
17595 .unwrap();
17596 storage
17597 .conn
17598 .execute_compat(
17599 "INSERT INTO messages (
17600 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17601 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17602 fparams![3_i64, 2_i64, 0_i64, "user", started_at + 50, "abc"],
17603 )
17604 .unwrap();
17605
17606 for (message_id, agent_slug, role, content_len) in [
17607 (1_i64, "codex", "user", 5_i64),
17608 (2_i64, "codex", "assistant", 8_i64),
17609 (3_i64, "claude", "user", 3_i64),
17610 ] {
17611 storage
17612 .conn
17613 .execute_compat(
17614 "INSERT INTO message_metrics (
17615 message_id, created_at_ms, hour_id, day_id, agent_slug, workspace_id, source_id,
17616 role, content_chars, content_tokens_est, api_input_tokens, api_output_tokens,
17617 api_cache_read_tokens, api_cache_creation_tokens, api_thinking_tokens,
17618 api_service_tier, api_data_source, tool_call_count, has_tool_calls, has_plan,
17619 model_name, model_family, model_tier, provider
17620 ) VALUES (
17621 ?1, ?2, ?3, ?4, ?5, ?6, ?7,
17622 ?8, ?9, ?10, ?11, ?12,
17623 ?13, ?14, ?15,
17624 ?16, ?17, ?18, ?19, ?20,
17625 ?21, ?22, ?23, ?24
17626 )",
17627 fparams![
17628 message_id,
17629 started_at,
17630 hour_id,
17631 day_id,
17632 agent_slug,
17633 0_i64,
17634 LOCAL_SOURCE_ID,
17635 role,
17636 content_len,
17637 content_len / 4,
17638 0_i64,
17639 0_i64,
17640 0_i64,
17641 0_i64,
17642 0_i64,
17643 "",
17644 "estimated",
17645 0_i64,
17646 0_i64,
17647 0_i64,
17648 "",
17649 "unknown",
17650 "unknown",
17651 "unknown"
17652 ],
17653 )
17654 .unwrap();
17655 }
17656
17657 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17658
17659 let rebuilt = storage.rebuild_daily_stats().unwrap();
17660 assert_eq!(rebuilt.total_sessions, 2);
17661
17662 let health = storage.daily_stats_health().unwrap();
17663 assert_eq!(health.conversation_count, 2);
17664 assert_eq!(health.materialized_total, 2);
17665 assert_eq!(health.drift, 0);
17666
17667 let total_messages: i64 = storage
17668 .conn
17669 .query_row_map(
17670 "SELECT message_count FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
17671 fparams![],
17672 |row| row.get_typed(0),
17673 )
17674 .unwrap();
17675 assert_eq!(total_messages, 3);
17676 }
17677
17678 #[test]
17679 fn rebuild_daily_stats_preserves_byte_counts_with_message_metrics() {
17680 let dir = TempDir::new().unwrap();
17681 let db_path = dir.path().join("test.db");
17682 let storage = SqliteStorage::open(&db_path).unwrap();
17683
17684 let content = "ASCII🙂é漢字";
17685 let expected_bytes = content.len() as i64;
17686 let started_at = 1_704_067_200_000_i64;
17687 let day_id = FrankenStorage::day_id_from_millis(started_at);
17688 let hour_id = FrankenStorage::hour_id_from_millis(started_at);
17689
17690 storage
17691 .conn
17692 .execute_compat(
17693 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17694 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17695 fparams![1_i64, "tester", "Tester", "cli"],
17696 )
17697 .unwrap();
17698 storage
17699 .conn
17700 .execute_compat(
17701 "INSERT INTO conversations (
17702 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17703 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17704 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, NULL, NULL, ?8, NULL, NULL)",
17705 fparams![
17706 1_i64,
17707 1_i64,
17708 LOCAL_SOURCE_ID,
17709 "unicode-metrics",
17710 "Unicode Metrics",
17711 "/tmp/unicode-metrics.jsonl",
17712 started_at,
17713 "{}"
17714 ],
17715 )
17716 .unwrap();
17717 storage
17718 .conn
17719 .execute_compat(
17720 "INSERT INTO messages (
17721 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17722 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17723 fparams![1_i64, 1_i64, 0_i64, "user", started_at, content],
17724 )
17725 .unwrap();
17726 storage
17727 .conn
17728 .execute_compat(
17729 "INSERT INTO message_metrics (
17730 message_id, created_at_ms, hour_id, day_id, agent_slug, workspace_id, source_id,
17731 role, content_chars, content_tokens_est, api_input_tokens, api_output_tokens,
17732 api_cache_read_tokens, api_cache_creation_tokens, api_thinking_tokens,
17733 api_service_tier, api_data_source, tool_call_count, has_tool_calls, has_plan,
17734 model_name, model_family, model_tier, provider
17735 ) VALUES (
17736 ?1, ?2, ?3, ?4, ?5, ?6, ?7,
17737 ?8, ?9, ?10, ?11, ?12,
17738 ?13, ?14, ?15,
17739 ?16, ?17, ?18, ?19, ?20,
17740 ?21, ?22, ?23, ?24
17741 )",
17742 fparams![
17743 1_i64,
17744 started_at,
17745 hour_id,
17746 day_id,
17747 "tester",
17748 0_i64,
17749 LOCAL_SOURCE_ID,
17750 "user",
17751 expected_bytes,
17752 expected_bytes / 4,
17753 0_i64,
17754 0_i64,
17755 0_i64,
17756 0_i64,
17757 0_i64,
17758 "",
17759 "estimated",
17760 0_i64,
17761 0_i64,
17762 0_i64,
17763 "",
17764 "unknown",
17765 "unknown",
17766 "unknown"
17767 ],
17768 )
17769 .unwrap();
17770
17771 let mut tx = storage.conn.transaction().unwrap();
17772 franken_update_daily_stats_in_tx(
17773 &storage,
17774 &tx,
17775 "tester",
17776 LOCAL_SOURCE_ID,
17777 Some(started_at),
17778 StatsDelta {
17779 session_count_delta: 1,
17780 message_count_delta: 1,
17781 total_chars_delta: expected_bytes,
17782 },
17783 )
17784 .unwrap();
17785 tx.commit().unwrap();
17786
17787 let inline_total: i64 = storage
17788 .conn
17789 .query_row_map(
17790 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
17791 fparams![],
17792 |row| row.get_typed(0),
17793 )
17794 .unwrap();
17795 assert_eq!(inline_total, expected_bytes);
17796
17797 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17798
17799 let rebuilt = storage.rebuild_daily_stats().unwrap();
17800 assert_eq!(rebuilt.total_sessions, 1);
17801
17802 let rebuilt_total: i64 = storage
17803 .conn
17804 .query_row_map(
17805 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
17806 fparams![],
17807 |row| row.get_typed(0),
17808 )
17809 .unwrap();
17810 assert_eq!(rebuilt_total, expected_bytes);
17811 }
17812
17813 #[test]
17814 fn rebuild_daily_stats_raw_fallback_preserves_byte_counts() {
17815 let dir = TempDir::new().unwrap();
17816 let db_path = dir.path().join("test.db");
17817 let storage = SqliteStorage::open(&db_path).unwrap();
17818
17819 let content = "fallback🙂é漢字";
17820 let expected_bytes = content.len() as i64;
17821 let started_at = 1_704_067_200_000_i64;
17822 storage
17823 .conn
17824 .execute_compat(
17825 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17826 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17827 fparams![1_i64, "tester", "Tester", "cli"],
17828 )
17829 .unwrap();
17830 storage
17831 .conn
17832 .execute_compat(
17833 "INSERT INTO conversations (
17834 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17835 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17836 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, NULL, NULL, ?8, NULL, NULL)",
17837 fparams![
17838 1_i64,
17839 1_i64,
17840 LOCAL_SOURCE_ID,
17841 "unicode-fallback",
17842 "Unicode Fallback",
17843 "/tmp/unicode-fallback.jsonl",
17844 started_at,
17845 "{}"
17846 ],
17847 )
17848 .unwrap();
17849 storage
17850 .conn
17851 .execute_compat(
17852 "INSERT INTO messages (
17853 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17854 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17855 fparams![1_i64, 1_i64, 0_i64, "assistant", started_at, content],
17856 )
17857 .unwrap();
17858
17859 let mut tx = storage.conn.transaction().unwrap();
17860 franken_update_daily_stats_in_tx(
17861 &storage,
17862 &tx,
17863 "tester",
17864 LOCAL_SOURCE_ID,
17865 Some(started_at),
17866 StatsDelta {
17867 session_count_delta: 1,
17868 message_count_delta: 1,
17869 total_chars_delta: expected_bytes,
17870 },
17871 )
17872 .unwrap();
17873 tx.commit().unwrap();
17874
17875 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17876
17877 let rebuilt = storage.rebuild_daily_stats().unwrap();
17878 assert_eq!(rebuilt.total_sessions, 1);
17879
17880 let rebuilt_total: i64 = storage
17881 .conn
17882 .query_row_map(
17883 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
17884 fparams![],
17885 |row| row.get_typed(0),
17886 )
17887 .unwrap();
17888 assert_eq!(rebuilt_total, expected_bytes);
17889 }
17890
17891 #[test]
17892 fn insert_conversations_batched_appends_duplicate_external_id() {
17893 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
17894 use std::path::PathBuf;
17895
17896 let dir = TempDir::new().unwrap();
17897 let db_path = dir.path().join("test.db");
17898 let storage = SqliteStorage::open(&db_path).unwrap();
17899
17900 let agent = Agent {
17901 id: None,
17902 slug: "codex".into(),
17903 name: "Codex".into(),
17904 version: Some("0.2.3".into()),
17905 kind: AgentKind::Cli,
17906 };
17907 let agent_id = storage.ensure_agent(&agent).unwrap();
17908
17909 let base_conv = |messages: Vec<Message>| Conversation {
17910 id: None,
17911 agent_slug: "codex".into(),
17912 workspace: Some(PathBuf::from("/tmp/workspace")),
17913 external_id: Some("shared-session".into()),
17914 title: Some("Shared Session".into()),
17915 source_path: PathBuf::from("/tmp/rollout.jsonl"),
17916 started_at: Some(1_700_000_000_000),
17917 ended_at: Some(1_700_000_000_999),
17918 approx_tokens: None,
17919 metadata_json: serde_json::Value::Null,
17920 messages,
17921 source_id: "local".into(),
17922 origin_host: None,
17923 };
17924
17925 let conv_a = base_conv(vec![
17926 Message {
17927 id: None,
17928 idx: 0,
17929 role: MessageRole::User,
17930 author: None,
17931 created_at: Some(1_700_000_000_000),
17932 content: "first".into(),
17933 extra_json: serde_json::Value::Null,
17934 snippets: Vec::new(),
17935 },
17936 Message {
17937 id: None,
17938 idx: 1,
17939 role: MessageRole::Agent,
17940 author: None,
17941 created_at: Some(1_700_000_000_100),
17942 content: "second".into(),
17943 extra_json: serde_json::Value::Null,
17944 snippets: Vec::new(),
17945 },
17946 ]);
17947 let conv_b = base_conv(vec![
17948 Message {
17949 id: None,
17950 idx: 0,
17951 role: MessageRole::User,
17952 author: None,
17953 created_at: Some(1_700_000_000_000),
17954 content: "first".into(),
17955 extra_json: serde_json::Value::Null,
17956 snippets: Vec::new(),
17957 },
17958 Message {
17959 id: None,
17960 idx: 1,
17961 role: MessageRole::Agent,
17962 author: None,
17963 created_at: Some(1_700_000_000_100),
17964 content: "second".into(),
17965 extra_json: serde_json::Value::Null,
17966 snippets: Vec::new(),
17967 },
17968 Message {
17969 id: None,
17970 idx: 2,
17971 role: MessageRole::User,
17972 author: None,
17973 created_at: Some(1_700_000_000_200),
17974 content: "third".into(),
17975 extra_json: serde_json::Value::Null,
17976 snippets: Vec::new(),
17977 },
17978 Message {
17979 id: None,
17980 idx: 3,
17981 role: MessageRole::Agent,
17982 author: None,
17983 created_at: Some(1_700_000_000_300),
17984 content: "fourth".into(),
17985 extra_json: serde_json::Value::Null,
17986 snippets: Vec::new(),
17987 },
17988 ]);
17989
17990 let outcomes = storage
17991 .insert_conversations_batched(&[(agent_id, None, &conv_a), (agent_id, None, &conv_b)])
17992 .unwrap();
17993 assert_eq!(outcomes.len(), 2);
17994 assert_eq!(outcomes[0].inserted_indices, vec![0, 1]);
17995 assert_eq!(outcomes[1].inserted_indices, vec![2, 3]);
17996 assert_eq!(outcomes[0].conversation_id, outcomes[1].conversation_id);
17997
17998 let conversation_count: i64 = storage
17999 .conn
18000 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18001 row.get_typed(0)
18002 })
18003 .unwrap();
18004 let conversation_count_not_indexed: i64 = storage
18005 .conn
18006 .query_row_map(
18007 "SELECT COUNT(*) FROM conversations NOT INDEXED",
18008 fparams![],
18009 |row| row.get_typed(0),
18010 )
18011 .unwrap();
18012 let conversation_count_source_index: i64 = storage
18013 .conn
18014 .query_row_map(
18015 "SELECT COUNT(*) FROM conversations INDEXED BY idx_conversations_source_id",
18016 fparams![],
18017 |row| row.get_typed(0),
18018 )
18019 .unwrap();
18020 let message_count: i64 = storage
18021 .conn
18022 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
18023 row.get_typed(0)
18024 })
18025 .unwrap();
18026 let reopened_storage = SqliteStorage::open(&db_path).unwrap();
18027 let reopened_conversation_count: i64 = reopened_storage
18028 .conn
18029 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18030 row.get_typed(0)
18031 })
18032 .unwrap();
18033 let reopened_conversation_count_not_indexed: i64 = reopened_storage
18034 .conn
18035 .query_row_map(
18036 "SELECT COUNT(*) FROM conversations NOT INDEXED",
18037 fparams![],
18038 |row| row.get_typed(0),
18039 )
18040 .unwrap();
18041 let reopened_conversation_ids: Vec<i64> = reopened_storage
18042 .conn
18043 .query_map_collect(
18044 "SELECT id FROM conversations ORDER BY id",
18045 fparams![],
18046 |row| row.get_typed(0),
18047 )
18048 .unwrap();
18049 let reopened_conversation_ids_not_indexed: Vec<i64> = reopened_storage
18050 .conn
18051 .query_map_collect(
18052 "SELECT id FROM conversations NOT INDEXED ORDER BY id",
18053 fparams![],
18054 |row| row.get_typed(0),
18055 )
18056 .unwrap();
18057 let reopened_conversation_ids_source_index: Vec<i64> = reopened_storage
18058 .conn
18059 .query_map_collect(
18060 "SELECT id FROM conversations INDEXED BY idx_conversations_source_id ORDER BY id",
18061 fparams![],
18062 |row| row.get_typed(0),
18063 )
18064 .unwrap();
18065
18066 assert_eq!(reopened_conversation_ids, vec![outcomes[0].conversation_id]);
18067 assert_eq!(
18068 reopened_conversation_ids_not_indexed,
18069 vec![outcomes[0].conversation_id]
18070 );
18071 assert_eq!(
18072 reopened_conversation_ids_source_index,
18073 vec![outcomes[0].conversation_id]
18074 );
18075 assert_eq!(reopened_conversation_count, 1);
18076 assert_eq!(reopened_conversation_count_not_indexed, 1);
18077 assert_eq!(conversation_count_not_indexed, 1);
18078 assert_eq!(conversation_count_source_index, 1);
18079 assert_eq!(conversation_count, 1);
18080 assert_eq!(message_count, 4);
18081 }
18082
18083 #[test]
18084 fn franken_insert_conversation_or_get_existing_recovers_unique_conflict() {
18085 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18086 use std::path::PathBuf;
18087
18088 let dir = TempDir::new().unwrap();
18089 let db_path = dir.path().join("test.db");
18090 let storage = SqliteStorage::open(&db_path).unwrap();
18091
18092 let agent = Agent {
18093 id: None,
18094 slug: "codex".into(),
18095 name: "Codex".into(),
18096 version: Some("0.2.3".into()),
18097 kind: AgentKind::Cli,
18098 };
18099 let agent_id = storage.ensure_agent(&agent).unwrap();
18100
18101 let conv = Conversation {
18102 id: None,
18103 agent_slug: "codex".into(),
18104 workspace: Some(PathBuf::from("/tmp/workspace")),
18105 external_id: Some("recover-duplicate".into()),
18106 title: Some("Recover Duplicate".into()),
18107 source_path: PathBuf::from("/tmp/rollout.jsonl"),
18108 started_at: Some(1_700_000_000_000),
18109 ended_at: Some(1_700_000_000_100),
18110 approx_tokens: None,
18111 metadata_json: serde_json::Value::Null,
18112 messages: vec![Message {
18113 id: None,
18114 idx: 0,
18115 role: MessageRole::User,
18116 author: None,
18117 created_at: Some(1_700_000_000_000),
18118 content: "hello".into(),
18119 extra_json: serde_json::Value::Null,
18120 snippets: Vec::new(),
18121 }],
18122 source_id: "local".into(),
18123 origin_host: None,
18124 };
18125
18126 let tx = storage.conn.transaction().unwrap();
18127 let inserted_id = franken_insert_conversation(&tx, agent_id, None, &conv)
18128 .unwrap()
18129 .expect("first insert should succeed");
18130
18131 let conversation_key = conversation_merge_key(agent_id, &conv);
18132 let resolved = franken_insert_conversation_or_get_existing_after_miss(
18133 &tx,
18134 agent_id,
18135 None,
18136 &conv,
18137 &conversation_key,
18138 )
18139 .unwrap();
18140
18141 match resolved {
18142 ConversationInsertStatus::Existing(existing_id) => {
18143 assert_eq!(existing_id, inserted_id);
18144 }
18145 ConversationInsertStatus::Inserted(new_id) => {
18146 panic!("expected existing conversation id, got freshly inserted {new_id}");
18147 }
18148 }
18149
18150 let conversation_count: i64 = tx
18151 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18152 row.get_typed(0)
18153 })
18154 .unwrap();
18155 assert_eq!(conversation_count, 1);
18156 }
18157
18158 #[test]
18159 fn insert_conversations_batched_merges_duplicate_external_id_with_gaps() {
18160 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18161 use std::path::PathBuf;
18162
18163 let dir = TempDir::new().unwrap();
18164 let db_path = dir.path().join("test.db");
18165 let storage = SqliteStorage::open(&db_path).unwrap();
18166
18167 let agent = Agent {
18168 id: None,
18169 slug: "codex".into(),
18170 name: "Codex".into(),
18171 version: Some("0.2.3".into()),
18172 kind: AgentKind::Cli,
18173 };
18174 let agent_id = storage.ensure_agent(&agent).unwrap();
18175
18176 let base_conv = |messages: Vec<Message>| Conversation {
18177 id: None,
18178 agent_slug: "codex".into(),
18179 workspace: Some(PathBuf::from("/tmp/workspace")),
18180 external_id: Some("shared-session-gap".into()),
18181 title: Some("Shared Session Gap".into()),
18182 source_path: PathBuf::from("/tmp/rollout.jsonl"),
18183 started_at: Some(1_700_000_000_000),
18184 ended_at: Some(1_700_000_000_999),
18185 approx_tokens: None,
18186 metadata_json: serde_json::Value::Null,
18187 messages,
18188 source_id: "local".into(),
18189 origin_host: None,
18190 };
18191
18192 let conv_a = base_conv(vec![
18193 Message {
18194 id: None,
18195 idx: 2,
18196 role: MessageRole::User,
18197 author: None,
18198 created_at: Some(1_700_000_000_200),
18199 content: "third".into(),
18200 extra_json: serde_json::Value::Null,
18201 snippets: Vec::new(),
18202 },
18203 Message {
18204 id: None,
18205 idx: 3,
18206 role: MessageRole::Agent,
18207 author: None,
18208 created_at: Some(1_700_000_000_300),
18209 content: "fourth".into(),
18210 extra_json: serde_json::Value::Null,
18211 snippets: Vec::new(),
18212 },
18213 ]);
18214 let conv_b = base_conv(vec![
18215 Message {
18216 id: None,
18217 idx: 0,
18218 role: MessageRole::User,
18219 author: None,
18220 created_at: Some(1_700_000_000_000),
18221 content: "first".into(),
18222 extra_json: serde_json::Value::Null,
18223 snippets: Vec::new(),
18224 },
18225 Message {
18226 id: None,
18227 idx: 1,
18228 role: MessageRole::Agent,
18229 author: None,
18230 created_at: Some(1_700_000_000_100),
18231 content: "second".into(),
18232 extra_json: serde_json::Value::Null,
18233 snippets: Vec::new(),
18234 },
18235 Message {
18236 id: None,
18237 idx: 3,
18238 role: MessageRole::Agent,
18239 author: None,
18240 created_at: Some(1_700_000_000_300),
18241 content: "fourth".into(),
18242 extra_json: serde_json::Value::Null,
18243 snippets: Vec::new(),
18244 },
18245 ]);
18246
18247 let outcomes = storage
18248 .insert_conversations_batched(&[(agent_id, None, &conv_a), (agent_id, None, &conv_b)])
18249 .unwrap();
18250 assert_eq!(outcomes.len(), 2);
18251 assert_eq!(outcomes[0].inserted_indices, vec![2, 3]);
18252 assert_eq!(outcomes[1].inserted_indices, vec![0, 1]);
18253 assert_eq!(outcomes[0].conversation_id, outcomes[1].conversation_id);
18254
18255 let stored_indices: Vec<i64> = storage
18256 .conn
18257 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
18258 row.get_typed(0)
18259 })
18260 .unwrap();
18261 assert_eq!(stored_indices, vec![0, 1, 2, 3]);
18262 }
18263
18264 #[test]
18265 fn insert_conversations_batched_refreshes_partial_pending_message_lookup() {
18266 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18267 use std::path::PathBuf;
18268
18269 let dir = TempDir::new().unwrap();
18270 let db_path = dir.path().join("test.db");
18271 let storage = SqliteStorage::open(&db_path).unwrap();
18272
18273 let agent = Agent {
18274 id: None,
18275 slug: "codex".into(),
18276 name: "Codex".into(),
18277 version: Some("0.2.3".into()),
18278 kind: AgentKind::Cli,
18279 };
18280 let agent_id = storage.ensure_agent(&agent).unwrap();
18281
18282 let make_message = |idx: i64, content: &str| Message {
18283 id: None,
18284 idx,
18285 role: if idx == 0 {
18286 MessageRole::User
18287 } else {
18288 MessageRole::Agent
18289 },
18290 author: None,
18291 created_at: Some(1_700_000_000_000 + idx),
18292 content: content.into(),
18293 extra_json: serde_json::Value::Null,
18294 snippets: Vec::new(),
18295 };
18296
18297 let base_conv = |messages: Vec<Message>| Conversation {
18298 id: None,
18299 agent_slug: "codex".into(),
18300 workspace: Some(PathBuf::from("/tmp/workspace")),
18301 external_id: Some("partial-cache-session".into()),
18302 title: Some("Partial cache session".into()),
18303 source_path: PathBuf::from("/tmp/partial-cache.jsonl"),
18304 started_at: Some(1_700_000_000_000),
18305 ended_at: Some(1_700_000_000_100),
18306 approx_tokens: None,
18307 metadata_json: serde_json::Value::Null,
18308 messages,
18309 source_id: "local".into(),
18310 origin_host: None,
18311 };
18312
18313 let canonical = base_conv(vec![
18314 make_message(0, "canonical zero"),
18315 make_message(20, "canonical twenty"),
18316 ]);
18317 storage
18318 .insert_conversation_tree(agent_id, None, &canonical)
18319 .unwrap();
18320
18321 let exact_prefix = base_conv(vec![make_message(0, "canonical zero")]);
18322 let conflicting_tail = base_conv(vec![make_message(20, "conflicting twenty")]);
18323
18324 let outcomes = storage
18325 .insert_conversations_batched(&[
18326 (agent_id, None, &exact_prefix),
18327 (agent_id, None, &conflicting_tail),
18328 ])
18329 .unwrap();
18330
18331 assert_eq!(outcomes.len(), 2);
18332 assert!(outcomes[0].inserted_indices.is_empty());
18333 assert!(
18334 outcomes[1].inserted_indices.is_empty(),
18335 "the second batch item must refresh the partial pending lookup and retain the canonical idx=20 row"
18336 );
18337
18338 let stored_messages: Vec<(i64, String)> = storage
18339 .conn
18340 .query_map_collect(
18341 "SELECT idx, content FROM messages ORDER BY idx",
18342 fparams![],
18343 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
18344 )
18345 .unwrap();
18346 assert_eq!(
18347 stored_messages,
18348 vec![
18349 (0, "canonical zero".to_string()),
18350 (20, "canonical twenty".to_string()),
18351 ]
18352 );
18353 }
18354
18355 #[test]
18356 fn insert_conversations_batched_reprocessing_conversation_is_idempotent() {
18357 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18358 use std::path::PathBuf;
18359
18360 const MESSAGE_COUNT: i64 = 64;
18361
18362 let dir = TempDir::new().unwrap();
18363 let db_path = dir.path().join("test.db");
18364 let storage = SqliteStorage::open(&db_path).unwrap();
18365
18366 let agent = Agent {
18367 id: None,
18368 slug: "codex".into(),
18369 name: "Codex".into(),
18370 version: Some("0.2.3".into()),
18371 kind: AgentKind::Cli,
18372 };
18373 let agent_id = storage.ensure_agent(&agent).unwrap();
18374
18375 let messages: Vec<Message> = (0..MESSAGE_COUNT)
18376 .map(|idx| Message {
18377 id: None,
18378 idx,
18379 role: if idx % 2 == 0 {
18380 MessageRole::User
18381 } else {
18382 MessageRole::Agent
18383 },
18384 author: None,
18385 created_at: Some(1_700_000_000_000 + idx),
18386 content: format!("message {idx}"),
18387 extra_json: serde_json::Value::Null,
18388 snippets: Vec::new(),
18389 })
18390 .collect();
18391
18392 let conversation = Conversation {
18393 id: None,
18394 agent_slug: "codex".into(),
18395 workspace: Some(PathBuf::from("/tmp/workspace")),
18396 external_id: Some("large-reprocess-session".into()),
18397 title: Some("Large Reprocess Session".into()),
18398 source_path: PathBuf::from("/tmp/large-reprocess-session.jsonl"),
18399 started_at: Some(1_700_000_000_000),
18400 ended_at: Some(1_700_000_000_000 + MESSAGE_COUNT - 1),
18401 approx_tokens: None,
18402 metadata_json: serde_json::Value::Null,
18403 messages,
18404 source_id: "local".into(),
18405 origin_host: None,
18406 };
18407
18408 let first = storage
18409 .insert_conversations_batched(&[(agent_id, None, &conversation)])
18410 .unwrap();
18411 let second = storage
18412 .insert_conversations_batched(&[(agent_id, None, &conversation)])
18413 .unwrap();
18414
18415 assert_eq!(first.len(), 1);
18416 assert_eq!(second.len(), 1);
18417 assert_eq!(first[0].inserted_indices.len(), MESSAGE_COUNT as usize);
18418 assert!(
18419 second[0].inserted_indices.is_empty(),
18420 "full reprocessing of a large conversation must not attempt duplicate idx inserts"
18421 );
18422 assert_eq!(first[0].conversation_id, second[0].conversation_id);
18423
18424 let conversation_count: i64 = storage
18425 .conn
18426 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18427 row.get_typed(0)
18428 })
18429 .unwrap();
18430 let message_count: i64 = storage
18431 .conn
18432 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
18433 row.get_typed(0)
18434 })
18435 .unwrap();
18436
18437 assert_eq!(conversation_count, 1);
18438 assert_eq!(message_count, MESSAGE_COUNT);
18439 }
18440
18441 #[test]
18442 fn parallel_insert_conversation_tree_keeps_unique_external_ids_distinct() {
18443 use crate::connectors::{NormalizedConversation, NormalizedMessage};
18444 use crate::indexer::persist::map_to_internal;
18445 use crate::model::types::{Agent, AgentKind};
18446 use frankensqlite::compat::{ConnectionExt, RowExt};
18447 use rand::RngExt;
18448 use rayon::prelude::*;
18449
18450 fn retryable_franken_error(err: &anyhow::Error) -> bool {
18451 err.downcast_ref::<frankensqlite::FrankenError>()
18452 .or_else(|| {
18453 err.root_cause()
18454 .downcast_ref::<frankensqlite::FrankenError>()
18455 })
18456 .is_some_and(|inner| {
18457 matches!(
18458 inner,
18459 frankensqlite::FrankenError::Busy
18460 | frankensqlite::FrankenError::BusyRecovery
18461 | frankensqlite::FrankenError::BusySnapshot { .. }
18462 | frankensqlite::FrankenError::WriteConflict { .. }
18463 | frankensqlite::FrankenError::SerializationFailure { .. }
18464 )
18465 })
18466 }
18467
18468 fn with_retry<F, T>(mut f: F) -> anyhow::Result<T>
18469 where
18470 F: FnMut() -> anyhow::Result<T>,
18471 {
18472 let mut rng = rand::rng();
18473 let mut backoff_ms = 4_u64;
18474 for attempt in 0..=24 {
18475 match f() {
18476 Ok(value) => return Ok(value),
18477 Err(err) if attempt < 24 && retryable_franken_error(&err) => {
18478 let sleep_ms = backoff_ms + rng.random_range(0..=backoff_ms);
18479 std::thread::sleep(Duration::from_millis(sleep_ms));
18480 backoff_ms = (backoff_ms * 2).min(512);
18481 }
18482 Err(err) => return Err(err),
18483 }
18484 }
18485 unreachable!("retry loop must return on success or final failure")
18486 }
18487
18488 let dir = TempDir::new().unwrap();
18489 let db_path = dir.path().join("parallel_insert_conversation_tree.db");
18490 let seed = FrankenStorage::open(&db_path).unwrap();
18491 drop(seed);
18492
18493 let conversations: Vec<NormalizedConversation> = (0..10)
18494 .map(|i| NormalizedConversation {
18495 agent_slug: format!("agent-{}", i % 3),
18496 external_id: Some(format!("conv-{i}")),
18497 title: Some(format!("Conversation {i}")),
18498 workspace: Some(PathBuf::from(format!("/ws/{i}"))),
18499 source_path: PathBuf::from(format!("/log/{i}.jsonl")),
18500 started_at: Some(1_000 + i * 100),
18501 ended_at: Some(1_000 + i * 100 + 50),
18502 metadata: serde_json::json!({}),
18503 messages: (0..3)
18504 .map(|j| NormalizedMessage {
18505 idx: j,
18506 role: if j % 2 == 0 { "user" } else { "assistant" }.to_string(),
18507 author: Some("tester".into()),
18508 created_at: Some(1_000 + i * 100 + j * 10),
18509 content: format!("parallel-distinct-test conv={i} msg={j}"),
18510 extra: serde_json::json!({}),
18511 snippets: vec![],
18512 invocations: Vec::new(),
18513 })
18514 .collect(),
18515 })
18516 .collect();
18517
18518 let mut outcomes: Vec<(String, i64, Vec<i64>)> = conversations
18519 .par_chunks(3)
18520 .map(|chunk| {
18521 let storage = FrankenStorage::open_writer(&db_path).unwrap();
18522 let mut agent_cache: HashMap<String, i64> = HashMap::new();
18523 let mut workspace_cache: HashMap<PathBuf, i64> = HashMap::new();
18524 let mut chunk_outcomes = Vec::with_capacity(chunk.len());
18525
18526 for conv in chunk {
18527 let agent_slug = conv.agent_slug.clone();
18528 let workspace = conv.workspace.clone();
18529 let external_id = conv.external_id.clone().expect("external id");
18530 let internal = map_to_internal(conv);
18531 let outcome = with_retry(|| {
18532 let agent_id = if let Some(id) = agent_cache.get(&agent_slug) {
18533 *id
18534 } else {
18535 let agent = Agent {
18536 id: None,
18537 slug: agent_slug.clone(),
18538 name: agent_slug.clone(),
18539 version: None,
18540 kind: AgentKind::Cli,
18541 };
18542 let id = storage.ensure_agent(&agent)?;
18543 agent_cache.insert(agent_slug.clone(), id);
18544 id
18545 };
18546 let workspace_id = if let Some(path) = &workspace {
18547 if let Some(id) = workspace_cache.get(path) {
18548 Some(*id)
18549 } else {
18550 let id = storage.ensure_workspace(path, None)?;
18551 workspace_cache.insert(path.clone(), id);
18552 Some(id)
18553 }
18554 } else {
18555 None
18556 };
18557 storage.insert_conversation_tree(agent_id, workspace_id, &internal)
18558 })
18559 .unwrap();
18560 chunk_outcomes.push((
18561 external_id,
18562 outcome.conversation_id,
18563 outcome.inserted_indices,
18564 ));
18565 }
18566
18567 storage.close().unwrap();
18568 chunk_outcomes
18569 })
18570 .flatten()
18571 .collect();
18572 outcomes.sort_by(|left, right| left.0.cmp(&right.0));
18573
18574 assert!(
18575 outcomes
18576 .iter()
18577 .all(|(_, _, inserted_indices)| inserted_indices == &vec![0, 1, 2]),
18578 "unique external ids must not be routed through the existing-conversation merge path: {outcomes:?}"
18579 );
18580
18581 let distinct_ids: HashSet<i64> = outcomes
18582 .iter()
18583 .map(|(_, conversation_id, _)| *conversation_id)
18584 .collect();
18585 assert_eq!(
18586 distinct_ids.len(),
18587 conversations.len(),
18588 "unique external ids must produce distinct conversation ids: {outcomes:?}"
18589 );
18590
18591 let reader = FrankenStorage::open(&db_path).unwrap();
18592 let stored_rows: Vec<(i64, String)> = reader
18593 .raw()
18594 .query_map_collect(
18595 "SELECT id, external_id FROM conversations ORDER BY id",
18596 &[],
18597 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
18598 )
18599 .unwrap();
18600 let stored_count: i64 = reader
18601 .raw()
18602 .query_row_map("SELECT COUNT(*) FROM conversations", &[], |row| {
18603 row.get_typed(0)
18604 })
18605 .unwrap();
18606
18607 assert_eq!(
18608 stored_count as usize,
18609 conversations.len(),
18610 "parallel distinct inserts must persist one row per external id; rows={stored_rows:?}; outcomes={outcomes:?}"
18611 );
18612 assert_eq!(
18613 stored_rows.len(),
18614 conversations.len(),
18615 "parallel distinct inserts must remain visible after reopening; rows={stored_rows:?}; outcomes={outcomes:?}"
18616 );
18617 }
18618
18619 #[test]
18620 fn insert_conversation_tree_merges_duplicate_external_id_with_gaps() {
18621 use crate::connectors::{NormalizedConversation, NormalizedMessage};
18622 use crate::indexer::persist::map_to_internal;
18623 use crate::model::types::{Agent, AgentKind};
18624 use std::path::PathBuf;
18625
18626 let dir = TempDir::new().unwrap();
18627 let db_path = dir.path().join("test.db");
18628 let storage = SqliteStorage::open(&db_path).unwrap();
18629
18630 let agent = Agent {
18631 id: None,
18632 slug: "codex".into(),
18633 name: "Codex".into(),
18634 version: Some("0.2.3".into()),
18635 kind: AgentKind::Cli,
18636 };
18637 let agent_id = storage.ensure_agent(&agent).unwrap();
18638
18639 let base_conv = |messages: Vec<NormalizedMessage>| NormalizedConversation {
18640 agent_slug: "codex".into(),
18641 workspace: Some(PathBuf::from("/tmp/workspace")),
18642 external_id: Some("tree-gap-session".into()),
18643 title: Some("Tree Gap Session".into()),
18644 source_path: PathBuf::from("/tmp/tree.jsonl"),
18645 started_at: Some(1_700_000_000_000),
18646 ended_at: Some(1_700_000_000_999),
18647 metadata: serde_json::Value::Null,
18648 messages,
18649 };
18650
18651 let conv_a = map_to_internal(&base_conv(vec![
18652 NormalizedMessage {
18653 idx: 2,
18654 role: "user".into(),
18655 author: None,
18656 created_at: Some(1_700_000_000_200),
18657 content: "third".into(),
18658 extra: serde_json::Value::Null,
18659 snippets: Vec::new(),
18660 invocations: Vec::new(),
18661 },
18662 NormalizedMessage {
18663 idx: 3,
18664 role: "assistant".into(),
18665 author: None,
18666 created_at: Some(1_700_000_000_300),
18667 content: "fourth".into(),
18668 extra: serde_json::Value::Null,
18669 snippets: Vec::new(),
18670 invocations: Vec::new(),
18671 },
18672 ]));
18673 let conv_b = map_to_internal(&base_conv(vec![
18674 NormalizedMessage {
18675 idx: 0,
18676 role: "user".into(),
18677 author: None,
18678 created_at: Some(1_700_000_000_000),
18679 content: "first".into(),
18680 extra: serde_json::Value::Null,
18681 snippets: Vec::new(),
18682 invocations: Vec::new(),
18683 },
18684 NormalizedMessage {
18685 idx: 1,
18686 role: "assistant".into(),
18687 author: None,
18688 created_at: Some(1_700_000_000_100),
18689 content: "second".into(),
18690 extra: serde_json::Value::Null,
18691 snippets: Vec::new(),
18692 invocations: Vec::new(),
18693 },
18694 NormalizedMessage {
18695 idx: 3,
18696 role: "assistant".into(),
18697 author: None,
18698 created_at: Some(1_700_000_000_300),
18699 content: "fourth".into(),
18700 extra: serde_json::Value::Null,
18701 snippets: Vec::new(),
18702 invocations: Vec::new(),
18703 },
18704 ]));
18705
18706 let first = storage
18707 .insert_conversation_tree(agent_id, None, &conv_a)
18708 .unwrap();
18709 let second = storage
18710 .insert_conversation_tree(agent_id, None, &conv_b)
18711 .unwrap();
18712
18713 assert_eq!(first.inserted_indices, vec![2, 3]);
18714 assert_eq!(second.inserted_indices, vec![0, 1]);
18715 assert_eq!(first.conversation_id, second.conversation_id);
18716
18717 let stored_indices: Vec<i64> = storage
18718 .conn
18719 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
18720 row.get_typed(0)
18721 })
18722 .unwrap();
18723 assert_eq!(stored_indices, vec![0, 1, 2, 3]);
18724 }
18725
18726 #[test]
18727 fn insert_conversation_tree_skips_duplicate_message_indices_for_new_conversation() {
18728 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18729 use std::path::PathBuf;
18730
18731 let dir = TempDir::new().unwrap();
18732 let db_path = dir.path().join("test.db");
18733 let storage = SqliteStorage::open(&db_path).unwrap();
18734
18735 let agent = Agent {
18736 id: None,
18737 slug: "codex".into(),
18738 name: "Codex".into(),
18739 version: Some("0.2.3".into()),
18740 kind: AgentKind::Cli,
18741 };
18742 let agent_id = storage.ensure_agent(&agent).unwrap();
18743
18744 let conversation = Conversation {
18745 id: None,
18746 agent_slug: "codex".into(),
18747 workspace: Some(PathBuf::from("/tmp/workspace")),
18748 external_id: Some("duplicate-new-session".into()),
18749 title: Some("Duplicate New Session".into()),
18750 source_path: PathBuf::from("/tmp/duplicate-new-session.jsonl"),
18751 started_at: Some(1_700_000_000_000),
18752 ended_at: Some(1_700_000_000_999),
18753 approx_tokens: None,
18754 metadata_json: serde_json::Value::Null,
18755 messages: vec![
18756 Message {
18757 id: None,
18758 idx: 0,
18759 role: MessageRole::User,
18760 author: None,
18761 created_at: Some(1_700_000_000_000),
18762 content: "first canonical".into(),
18763 extra_json: serde_json::Value::Null,
18764 snippets: Vec::new(),
18765 },
18766 Message {
18767 id: None,
18768 idx: 0,
18769 role: MessageRole::User,
18770 author: None,
18771 created_at: Some(1_700_000_000_001),
18772 content: "duplicate idx should be skipped".into(),
18773 extra_json: serde_json::Value::Null,
18774 snippets: Vec::new(),
18775 },
18776 Message {
18777 id: None,
18778 idx: 1,
18779 role: MessageRole::Agent,
18780 author: None,
18781 created_at: Some(1_700_000_000_100),
18782 content: "second".into(),
18783 extra_json: serde_json::Value::Null,
18784 snippets: Vec::new(),
18785 },
18786 ],
18787 source_id: "local".into(),
18788 origin_host: None,
18789 };
18790
18791 let outcome = storage
18792 .insert_conversation_tree(agent_id, None, &conversation)
18793 .unwrap();
18794
18795 assert_eq!(outcome.inserted_indices, vec![0, 1]);
18796
18797 let stored_messages: Vec<(i64, String)> = storage
18798 .conn
18799 .query_map_collect(
18800 "SELECT idx, content FROM messages ORDER BY idx",
18801 fparams![],
18802 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
18803 )
18804 .unwrap();
18805 assert_eq!(
18806 stored_messages,
18807 vec![
18808 (0, "first canonical".to_string()),
18809 (1, "second".to_string())
18810 ]
18811 );
18812 }
18813
18814 #[test]
18815 fn insert_conversation_tree_merges_duplicate_source_path_without_external_id() {
18816 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18817 use std::path::PathBuf;
18818
18819 let dir = TempDir::new().unwrap();
18820 let db_path = dir.path().join("test.db");
18821 let storage = SqliteStorage::open(&db_path).unwrap();
18822
18823 let agent = Agent {
18824 id: None,
18825 slug: "codex".into(),
18826 name: "Codex".into(),
18827 version: Some("0.2.3".into()),
18828 kind: AgentKind::Cli,
18829 };
18830 let agent_id = storage.ensure_agent(&agent).unwrap();
18831
18832 let base_conv = |messages: Vec<Message>| Conversation {
18833 id: None,
18834 agent_slug: "codex".into(),
18835 workspace: Some(PathBuf::from("/tmp/workspace")),
18836 external_id: None,
18837 title: Some("Source Path Merge".into()),
18838 source_path: PathBuf::from("/tmp/shared-session.jsonl"),
18839 started_at: Some(1_700_000_000_000),
18840 ended_at: Some(1_700_000_000_999),
18841 approx_tokens: None,
18842 metadata_json: serde_json::Value::Null,
18843 messages,
18844 source_id: "local".into(),
18845 origin_host: None,
18846 };
18847
18848 let first = storage
18849 .insert_conversation_tree(
18850 agent_id,
18851 None,
18852 &base_conv(vec![
18853 Message {
18854 id: None,
18855 idx: 0,
18856 role: MessageRole::User,
18857 author: None,
18858 created_at: Some(1_700_000_000_000),
18859 content: "first".into(),
18860 extra_json: serde_json::Value::Null,
18861 snippets: Vec::new(),
18862 },
18863 Message {
18864 id: None,
18865 idx: 1,
18866 role: MessageRole::Agent,
18867 author: None,
18868 created_at: Some(1_700_000_000_100),
18869 content: "second".into(),
18870 extra_json: serde_json::Value::Null,
18871 snippets: Vec::new(),
18872 },
18873 ]),
18874 )
18875 .unwrap();
18876
18877 let second = storage
18878 .insert_conversation_tree(
18879 agent_id,
18880 None,
18881 &base_conv(vec![
18882 Message {
18883 id: None,
18884 idx: 1,
18885 role: MessageRole::Agent,
18886 author: None,
18887 created_at: Some(1_700_000_000_100),
18888 content: "second".into(),
18889 extra_json: serde_json::Value::Null,
18890 snippets: Vec::new(),
18891 },
18892 Message {
18893 id: None,
18894 idx: 2,
18895 role: MessageRole::User,
18896 author: None,
18897 created_at: Some(1_700_000_000_200),
18898 content: "third".into(),
18899 extra_json: serde_json::Value::Null,
18900 snippets: Vec::new(),
18901 },
18902 ]),
18903 )
18904 .unwrap();
18905
18906 assert_eq!(first.conversation_id, second.conversation_id);
18907 assert_eq!(first.inserted_indices, vec![0, 1]);
18908 assert_eq!(second.inserted_indices, vec![2]);
18909
18910 let stored_indices: Vec<i64> = storage
18911 .conn
18912 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
18913 row.get_typed(0)
18914 })
18915 .unwrap();
18916 assert_eq!(stored_indices, vec![0, 1, 2]);
18917 }
18918
18919 #[test]
18920 fn insert_conversation_tree_merges_source_path_duplicates_with_start_drift() {
18921 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18922 use std::path::PathBuf;
18923
18924 let dir = TempDir::new().unwrap();
18925 let db_path = dir.path().join("test.db");
18926 let storage = SqliteStorage::open(&db_path).unwrap();
18927
18928 let agent = Agent {
18929 id: None,
18930 slug: "codex".into(),
18931 name: "Codex".into(),
18932 version: Some("0.2.3".into()),
18933 kind: AgentKind::Cli,
18934 };
18935 let agent_id = storage.ensure_agent(&agent).unwrap();
18936
18937 let base_conv = |started_at: Option<i64>, messages: Vec<Message>| Conversation {
18938 id: None,
18939 agent_slug: "codex".into(),
18940 workspace: Some(PathBuf::from("/tmp/workspace")),
18941 external_id: None,
18942 title: Some("Drift Merge".into()),
18943 source_path: PathBuf::from("/tmp/drift-session.jsonl"),
18944 started_at,
18945 ended_at: Some(1_700_000_000_999),
18946 approx_tokens: None,
18947 metadata_json: serde_json::Value::Null,
18948 messages,
18949 source_id: "local".into(),
18950 origin_host: None,
18951 };
18952
18953 let first = storage
18954 .insert_conversation_tree(
18955 agent_id,
18956 None,
18957 &base_conv(
18958 Some(1_700_000_000_000),
18959 vec![
18960 Message {
18961 id: None,
18962 idx: 0,
18963 role: MessageRole::User,
18964 author: None,
18965 created_at: Some(1_700_000_000_000),
18966 content: "first".into(),
18967 extra_json: serde_json::Value::Null,
18968 snippets: Vec::new(),
18969 },
18970 Message {
18971 id: None,
18972 idx: 1,
18973 role: MessageRole::Agent,
18974 author: None,
18975 created_at: Some(1_700_000_000_100),
18976 content: "second".into(),
18977 extra_json: serde_json::Value::Null,
18978 snippets: Vec::new(),
18979 },
18980 ],
18981 ),
18982 )
18983 .unwrap();
18984
18985 let second = storage
18986 .insert_conversation_tree(
18987 agent_id,
18988 None,
18989 &base_conv(
18990 Some(1_700_000_004_000),
18991 vec![
18992 Message {
18993 id: None,
18994 idx: 1,
18995 role: MessageRole::Agent,
18996 author: None,
18997 created_at: Some(1_700_000_000_100),
18998 content: "second".into(),
18999 extra_json: serde_json::Value::Null,
19000 snippets: Vec::new(),
19001 },
19002 Message {
19003 id: None,
19004 idx: 2,
19005 role: MessageRole::User,
19006 author: None,
19007 created_at: Some(1_700_000_004_200),
19008 content: "third".into(),
19009 extra_json: serde_json::Value::Null,
19010 snippets: Vec::new(),
19011 },
19012 ],
19013 ),
19014 )
19015 .unwrap();
19016
19017 assert_eq!(first.conversation_id, second.conversation_id);
19018 assert_eq!(second.inserted_indices, vec![2]);
19019 }
19020
19021 #[test]
19022 fn insert_conversation_tree_keeps_single_message_overlap_sessions_separate() {
19023 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19024 use std::path::PathBuf;
19025
19026 let dir = TempDir::new().unwrap();
19027 let db_path = dir.path().join("test.db");
19028 let storage = SqliteStorage::open(&db_path).unwrap();
19029
19030 let agent = Agent {
19031 id: None,
19032 slug: "codex".into(),
19033 name: "Codex".into(),
19034 version: Some("0.2.3".into()),
19035 kind: AgentKind::Cli,
19036 };
19037 let agent_id = storage.ensure_agent(&agent).unwrap();
19038
19039 let make_conv = |started_at: i64, idx: i64, content: &str| Conversation {
19040 id: None,
19041 agent_slug: "codex".into(),
19042 workspace: Some(PathBuf::from("/tmp/workspace")),
19043 external_id: None,
19044 title: Some("Partial overlap".into()),
19045 source_path: PathBuf::from("/tmp/reused-session.jsonl"),
19046 started_at: Some(started_at),
19047 ended_at: Some(started_at + 500),
19048 approx_tokens: None,
19049 metadata_json: serde_json::Value::Null,
19050 messages: vec![Message {
19051 id: None,
19052 idx,
19053 role: MessageRole::User,
19054 author: None,
19055 created_at: Some(started_at),
19056 content: content.into(),
19057 extra_json: serde_json::Value::Null,
19058 snippets: Vec::new(),
19059 }],
19060 source_id: "local".into(),
19061 origin_host: None,
19062 };
19063
19064 storage
19065 .insert_conversation_tree(
19066 agent_id,
19067 None,
19068 &Conversation {
19069 messages: vec![
19070 Message {
19071 id: None,
19072 idx: 0,
19073 role: MessageRole::User,
19074 author: None,
19075 created_at: Some(1_700_000_000_000),
19076 content: "shared opener".into(),
19077 extra_json: serde_json::Value::Null,
19078 snippets: Vec::new(),
19079 },
19080 Message {
19081 id: None,
19082 idx: 1,
19083 role: MessageRole::Agent,
19084 author: None,
19085 created_at: Some(1_700_000_000_100),
19086 content: "first session unique".into(),
19087 extra_json: serde_json::Value::Null,
19088 snippets: Vec::new(),
19089 },
19090 ],
19091 ..make_conv(1_700_000_000_000, 0, "unused")
19092 },
19093 )
19094 .unwrap();
19095 storage
19096 .insert_conversation_tree(
19097 agent_id,
19098 None,
19099 &make_conv(1_700_000_900_000, 0, "shared opener"),
19100 )
19101 .unwrap();
19102
19103 let conversation_count: i64 = storage
19104 .conn
19105 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
19106 row.get_typed(0)
19107 })
19108 .unwrap();
19109 assert_eq!(conversation_count, 2);
19110 }
19111
19112 #[test]
19113 fn insert_conversation_tree_keeps_distinct_source_path_sessions_separate() {
19114 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19115 use std::path::PathBuf;
19116
19117 let dir = TempDir::new().unwrap();
19118 let db_path = dir.path().join("test.db");
19119 let storage = SqliteStorage::open(&db_path).unwrap();
19120
19121 let agent = Agent {
19122 id: None,
19123 slug: "codex".into(),
19124 name: "Codex".into(),
19125 version: Some("0.2.3".into()),
19126 kind: AgentKind::Cli,
19127 };
19128 let agent_id = storage.ensure_agent(&agent).unwrap();
19129
19130 let make_conv = |started_at: i64, created_at: i64, content: &str| Conversation {
19131 id: None,
19132 agent_slug: "codex".into(),
19133 workspace: Some(PathBuf::from("/tmp/workspace")),
19134 external_id: None,
19135 title: Some("Same Path Different Session".into()),
19136 source_path: PathBuf::from("/tmp/reused-session.jsonl"),
19137 started_at: Some(started_at),
19138 ended_at: Some(started_at + 500),
19139 approx_tokens: None,
19140 metadata_json: serde_json::Value::Null,
19141 messages: vec![Message {
19142 id: None,
19143 idx: 0,
19144 role: MessageRole::User,
19145 author: None,
19146 created_at: Some(created_at),
19147 content: content.into(),
19148 extra_json: serde_json::Value::Null,
19149 snippets: Vec::new(),
19150 }],
19151 source_id: "local".into(),
19152 origin_host: None,
19153 };
19154
19155 storage
19156 .insert_conversation_tree(
19157 agent_id,
19158 None,
19159 &make_conv(1_700_000_000_000, 1_700_000_000_000, "first session"),
19160 )
19161 .unwrap();
19162 storage
19163 .insert_conversation_tree(
19164 agent_id,
19165 None,
19166 &make_conv(1_700_000_900_000, 1_700_000_900_000, "second session"),
19167 )
19168 .unwrap();
19169
19170 let conversation_count: i64 = storage
19171 .conn
19172 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
19173 row.get_typed(0)
19174 })
19175 .unwrap();
19176 assert_eq!(conversation_count, 2);
19177 }
19178
19179 #[test]
19180 fn insert_conversation_tree_merges_replay_equivalent_messages_with_shifted_idx() {
19181 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19182 use std::path::PathBuf;
19183
19184 let dir = TempDir::new().unwrap();
19185 let db_path = dir.path().join("test.db");
19186 let storage = SqliteStorage::open(&db_path).unwrap();
19187
19188 let agent = Agent {
19189 id: None,
19190 slug: "codex".into(),
19191 name: "Codex".into(),
19192 version: Some("0.2.3".into()),
19193 kind: AgentKind::Cli,
19194 };
19195 let agent_id = storage.ensure_agent(&agent).unwrap();
19196
19197 let make_conv = |started_at: i64, messages: Vec<Message>| Conversation {
19198 id: None,
19199 agent_slug: "codex".into(),
19200 workspace: Some(PathBuf::from("/tmp/workspace")),
19201 external_id: None,
19202 title: Some("Shifted replay".into()),
19203 source_path: PathBuf::from("/tmp/replay-session.jsonl"),
19204 started_at: Some(started_at),
19205 ended_at: Some(started_at + 500),
19206 approx_tokens: None,
19207 metadata_json: serde_json::Value::Null,
19208 messages,
19209 source_id: "local".into(),
19210 origin_host: None,
19211 };
19212
19213 let first = storage
19214 .insert_conversation_tree(
19215 agent_id,
19216 None,
19217 &make_conv(
19218 1_700_000_000_000,
19219 vec![
19220 Message {
19221 id: None,
19222 idx: 0,
19223 role: MessageRole::User,
19224 author: None,
19225 created_at: Some(1_700_000_000_000),
19226 content: "first".into(),
19227 extra_json: serde_json::Value::Null,
19228 snippets: Vec::new(),
19229 },
19230 Message {
19231 id: None,
19232 idx: 1,
19233 role: MessageRole::Agent,
19234 author: None,
19235 created_at: Some(1_700_000_000_100),
19236 content: "second".into(),
19237 extra_json: serde_json::Value::Null,
19238 snippets: Vec::new(),
19239 },
19240 ],
19241 ),
19242 )
19243 .unwrap();
19244
19245 let second = storage
19246 .insert_conversation_tree(
19247 agent_id,
19248 None,
19249 &make_conv(
19250 1_700_000_900_000,
19251 vec![
19252 Message {
19253 id: None,
19254 idx: 10,
19255 role: MessageRole::User,
19256 author: None,
19257 created_at: Some(1_700_000_000_000),
19258 content: "first".into(),
19259 extra_json: serde_json::Value::Null,
19260 snippets: Vec::new(),
19261 },
19262 Message {
19263 id: None,
19264 idx: 11,
19265 role: MessageRole::Agent,
19266 author: None,
19267 created_at: Some(1_700_000_000_100),
19268 content: "second".into(),
19269 extra_json: serde_json::Value::Null,
19270 snippets: Vec::new(),
19271 },
19272 Message {
19273 id: None,
19274 idx: 12,
19275 role: MessageRole::User,
19276 author: None,
19277 created_at: Some(1_700_000_000_200),
19278 content: "third".into(),
19279 extra_json: serde_json::Value::Null,
19280 snippets: Vec::new(),
19281 },
19282 ],
19283 ),
19284 )
19285 .unwrap();
19286
19287 assert_eq!(first.conversation_id, second.conversation_id);
19288 assert_eq!(second.inserted_indices, vec![12]);
19289
19290 let stored_indices: Vec<i64> = storage
19291 .conn
19292 .query_map_collect(
19293 "SELECT idx FROM messages WHERE conversation_id = ?1 ORDER BY idx",
19294 fparams![first.conversation_id],
19295 |row| row.get_typed(0),
19296 )
19297 .unwrap();
19298 assert_eq!(stored_indices, vec![0, 1, 12]);
19299 }
19300
19301 #[test]
19302 fn salvage_historical_databases_imports_backups_once_and_merges_overlap() {
19303 use crate::model::types::{Conversation, Message, MessageRole};
19304 use std::path::PathBuf;
19305
19306 fn base_conv(source_path: &str, messages: Vec<Message>) -> Conversation {
19307 Conversation {
19308 id: None,
19309 agent_slug: "codex".into(),
19310 workspace: Some(PathBuf::from("/tmp/workspace")),
19311 external_id: None,
19312 title: Some("Recovered".into()),
19313 source_path: PathBuf::from(source_path),
19314 started_at: Some(1_700_000_000_000),
19315 ended_at: Some(1_700_000_000_999),
19316 approx_tokens: None,
19317 metadata_json: serde_json::Value::Null,
19318 messages,
19319 source_id: "local".into(),
19320 origin_host: None,
19321 }
19322 }
19323
19324 let dir = TempDir::new().unwrap();
19325 let canonical_db = dir.path().join("agent_search.db");
19326 let storage = SqliteStorage::open(&canonical_db).unwrap();
19327
19328 let overlapping_a = base_conv(
19329 "/tmp/shared-history.jsonl",
19330 vec![
19331 Message {
19332 id: None,
19333 idx: 0,
19334 role: MessageRole::User,
19335 author: None,
19336 created_at: Some(1_700_000_000_000),
19337 content: "first".into(),
19338 extra_json: serde_json::Value::Null,
19339 snippets: Vec::new(),
19340 },
19341 Message {
19342 id: None,
19343 idx: 1,
19344 role: MessageRole::Agent,
19345 author: None,
19346 created_at: Some(1_700_000_000_100),
19347 content: "second".into(),
19348 extra_json: serde_json::Value::Null,
19349 snippets: Vec::new(),
19350 },
19351 ],
19352 );
19353 let overlapping_b = base_conv(
19354 "/tmp/shared-history.jsonl",
19355 vec![
19356 Message {
19357 id: None,
19358 idx: 1,
19359 role: MessageRole::Agent,
19360 author: None,
19361 created_at: Some(1_700_000_000_100),
19362 content: "second".into(),
19363 extra_json: serde_json::Value::Null,
19364 snippets: Vec::new(),
19365 },
19366 Message {
19367 id: None,
19368 idx: 2,
19369 role: MessageRole::User,
19370 author: None,
19371 created_at: Some(1_700_000_000_200),
19372 content: "third".into(),
19373 extra_json: serde_json::Value::Null,
19374 snippets: Vec::new(),
19375 },
19376 ],
19377 );
19378 let unique = Conversation {
19379 source_path: PathBuf::from("/tmp/unique-history.jsonl"),
19380 messages: vec![Message {
19381 id: None,
19382 idx: 0,
19383 role: MessageRole::User,
19384 author: None,
19385 created_at: Some(1_700_000_001_000),
19386 content: "unique".into(),
19387 extra_json: serde_json::Value::Null,
19388 snippets: Vec::new(),
19389 }],
19390 started_at: Some(1_700_000_001_000),
19391 ended_at: Some(1_700_000_001_100),
19392 ..base_conv("/tmp/unique-history.jsonl", Vec::new())
19393 };
19394
19395 seed_historical_db_direct(
19396 &dir.path()
19397 .join("backups/agent_search.db.20260322T020200.bak"),
19398 std::slice::from_ref(&overlapping_a),
19399 );
19400 seed_historical_db_direct(
19401 &dir.path().join("agent_search.corrupt.20260324_212907"),
19402 &[overlapping_b, unique],
19403 );
19404
19405 let first = storage.salvage_historical_databases(&canonical_db).unwrap();
19406 assert_eq!(first.bundles_considered, 2);
19407 assert_eq!(first.bundles_imported, 2);
19408 assert_eq!(first.messages_imported, 4);
19409
19410 let conversations = storage.list_conversations(10, 0).unwrap();
19411 assert_eq!(conversations.len(), 2);
19412
19413 let shared_id = conversations
19414 .iter()
19415 .find(|conv| conv.source_path == std::path::Path::new("/tmp/shared-history.jsonl"))
19416 .and_then(|conv| conv.id)
19417 .unwrap();
19418 let shared_indices: Vec<i64> = storage
19419 .fetch_messages(shared_id)
19420 .unwrap()
19421 .into_iter()
19422 .map(|msg| msg.idx)
19423 .collect();
19424 assert_eq!(shared_indices, vec![0, 1, 2]);
19425
19426 let second = storage.salvage_historical_databases(&canonical_db).unwrap();
19427 assert_eq!(second.bundles_imported, 0);
19428 assert_eq!(second.messages_imported, 0);
19429 }
19430
19431 #[test]
19432 fn salvage_historical_databases_normalizes_host_only_remote_provenance() {
19433 use crate::model::types::{Conversation, Message, MessageRole};
19434 use std::path::PathBuf;
19435
19436 let dir = TempDir::new().unwrap();
19437 let canonical_db = dir.path().join("agent_search.db");
19438 let storage = SqliteStorage::open(&canonical_db).unwrap();
19439
19440 let host_only_remote = Conversation {
19441 id: None,
19442 agent_slug: "codex".into(),
19443 workspace: Some(PathBuf::from("/tmp/workspace")),
19444 external_id: None,
19445 title: Some("Recovered Host Only Remote".into()),
19446 source_path: PathBuf::from("/tmp/host-only-history.jsonl"),
19447 started_at: Some(1_700_000_000_000),
19448 ended_at: Some(1_700_000_000_999),
19449 approx_tokens: None,
19450 metadata_json: serde_json::Value::Null,
19451 messages: vec![Message {
19452 id: None,
19453 idx: 0,
19454 role: MessageRole::User,
19455 author: None,
19456 created_at: Some(1_700_000_000_000),
19457 content: "host-only remote".into(),
19458 extra_json: serde_json::Value::Null,
19459 snippets: Vec::new(),
19460 }],
19461 source_id: " ".into(),
19462 origin_host: Some("builder-5".into()),
19463 };
19464
19465 let historical_db = dir
19466 .path()
19467 .join("backups/agent_search.db.20260322T020200.bak");
19468 seed_historical_db_direct(&historical_db, std::slice::from_ref(&host_only_remote));
19469
19470 let historical_conn =
19471 FrankenConnection::open(historical_db.to_string_lossy().into_owned()).unwrap();
19472 historical_conn
19473 .execute_compat(
19474 "INSERT INTO sources(id, kind, host_label, created_at, updated_at) VALUES(?1, ?2, ?3, ?4, ?5)",
19475 fparams![" ", "ssh", "builder-5", 0_i64, 0_i64],
19476 )
19477 .unwrap();
19478 historical_conn
19479 .execute_compat(
19480 "UPDATE conversations SET source_id = ?1, origin_host = ?2 WHERE source_path = ?3",
19481 fparams![" ", "builder-5", "/tmp/host-only-history.jsonl"],
19482 )
19483 .unwrap();
19484 historical_conn
19485 .execute_compat("DELETE FROM sources WHERE id = ?1", fparams!["builder-5"])
19486 .unwrap();
19487 drop(historical_conn);
19488
19489 let first = storage.salvage_historical_databases(&canonical_db).unwrap();
19490 assert_eq!(first.bundles_imported, 1);
19491 assert_eq!(first.messages_imported, 1);
19492
19493 let source_ids = storage.get_source_ids().unwrap();
19494 assert_eq!(source_ids, vec!["builder-5".to_string()]);
19495
19496 let conversations = storage.list_conversations(10, 0).unwrap();
19497 assert_eq!(conversations.len(), 1);
19498 assert_eq!(conversations[0].source_id, "builder-5");
19499 assert_eq!(conversations[0].origin_host.as_deref(), Some("builder-5"));
19500 }
19501
19502 #[test]
19503 fn historical_salvage_retry_splits_single_conversation_until_it_fits() {
19504 use crate::model::types::{Conversation, Message, MessageRole};
19505 use std::path::PathBuf;
19506
19507 let mut attempts: Vec<Vec<usize>> = Vec::new();
19508 let entry = HistoricalBatchEntry {
19509 source_row_id: 77,
19510 agent_id: 1,
19511 workspace_id: None,
19512 conversation: Conversation {
19513 id: None,
19514 agent_slug: "gemini".into(),
19515 workspace: Some(PathBuf::from("/tmp/workspace")),
19516 external_id: Some("conv-77".into()),
19517 title: Some("Large recovered conversation".into()),
19518 source_path: PathBuf::from("/tmp/history.jsonl"),
19519 started_at: Some(1_700_000_000_000),
19520 ended_at: Some(1_700_000_000_999),
19521 approx_tokens: None,
19522 metadata_json: serde_json::Value::Null,
19523 messages: (0..4)
19524 .map(|idx| Message {
19525 id: None,
19526 idx,
19527 role: MessageRole::User,
19528 author: None,
19529 created_at: Some(1_700_000_000_000 + idx),
19530 content: format!("message-{idx}"),
19531 extra_json: serde_json::Value::Null,
19532 snippets: Vec::new(),
19533 })
19534 .collect(),
19535 source_id: LOCAL_SOURCE_ID.into(),
19536 origin_host: None,
19537 },
19538 };
19539
19540 let totals = SqliteStorage::import_historical_batch_with_retry(
19541 std::slice::from_ref(&entry),
19542 &mut |batch| {
19543 attempts.push(
19544 batch
19545 .iter()
19546 .map(|entry| entry.conversation.messages.len())
19547 .collect(),
19548 );
19549 let total_messages: usize = batch
19550 .iter()
19551 .map(|entry| entry.conversation.messages.len())
19552 .sum();
19553 if total_messages > 1 {
19554 Err(anyhow!("out of memory"))
19555 } else {
19556 Ok(HistoricalBatchImportTotals {
19557 inserted_source_rows: batch.len(),
19558 inserted_messages: total_messages,
19559 })
19560 }
19561 },
19562 )
19563 .unwrap();
19564
19565 assert_eq!(
19566 totals,
19567 HistoricalBatchImportTotals {
19568 inserted_source_rows: 1,
19569 inserted_messages: 4,
19570 }
19571 );
19572 assert_eq!(attempts.first().cloned(), Some(vec![4]));
19573 assert!(
19574 attempts.iter().filter(|sizes| sizes == &&vec![1]).count() >= 4,
19575 "expected recursive fallback to reach one-message slices"
19576 );
19577 }
19578
19579 #[test]
19580 fn salvage_historical_databases_resumes_from_progress_checkpoint() {
19581 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19582 use std::path::PathBuf;
19583
19584 fn make_conv(source_path: &str, idx_seed: i64) -> Conversation {
19585 Conversation {
19586 id: None,
19587 agent_slug: "codex".into(),
19588 workspace: Some(PathBuf::from("/tmp/workspace")),
19589 external_id: Some(format!("conv-{idx_seed}")),
19590 title: Some(format!("Recovered {idx_seed}")),
19591 source_path: PathBuf::from(source_path),
19592 started_at: Some(1_700_000_000_000 + idx_seed),
19593 ended_at: Some(1_700_000_000_100 + idx_seed),
19594 approx_tokens: None,
19595 metadata_json: serde_json::Value::Null,
19596 messages: vec![Message {
19597 id: None,
19598 idx: 0,
19599 role: MessageRole::User,
19600 author: None,
19601 created_at: Some(1_700_000_000_000 + idx_seed),
19602 content: format!("message-{idx_seed}"),
19603 extra_json: serde_json::Value::Null,
19604 snippets: Vec::new(),
19605 }],
19606 source_id: LOCAL_SOURCE_ID.into(),
19607 origin_host: None,
19608 }
19609 }
19610
19611 let dir = TempDir::new().unwrap();
19612 let canonical_db = dir.path().join("agent_search.db");
19613 let backup_db = dir
19614 .path()
19615 .join("backups/agent_search.db.20260322T020200.bak");
19616 let storage = SqliteStorage::open(&canonical_db).unwrap();
19617 let conv_a = make_conv("/tmp/one.jsonl", 1);
19618 let conv_b = make_conv("/tmp/two.jsonl", 2);
19619 let conv_c = make_conv("/tmp/three.jsonl", 3);
19620 seed_historical_db_direct(
19621 &backup_db,
19622 &[conv_a.clone(), conv_b.clone(), conv_c.clone()],
19623 );
19624
19625 let agent = Agent {
19626 id: None,
19627 slug: "codex".into(),
19628 name: "Codex".into(),
19629 version: Some("0.2.3".into()),
19630 kind: AgentKind::Cli,
19631 };
19632 let agent_id = storage.ensure_agent(&agent).unwrap();
19633 storage
19634 .insert_conversation_tree(agent_id, None, &conv_a)
19635 .unwrap();
19636
19637 let bundle = discover_historical_database_bundles(&canonical_db)
19638 .into_iter()
19639 .find(|bundle| bundle.root_path == backup_db)
19640 .unwrap();
19641 let first_row_id: i64 = FrankenConnection::open(backup_db.to_string_lossy().into_owned())
19642 .unwrap()
19643 .query_row_map(
19644 "SELECT id FROM conversations WHERE source_path = ?1",
19645 fparams!["/tmp/one.jsonl"],
19646 |row| row.get_typed(0),
19647 )
19648 .unwrap();
19649 storage
19650 .record_historical_bundle_progress(&bundle, "direct-readonly", first_row_id, 50, 99)
19651 .unwrap();
19652
19653 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
19654 assert_eq!(outcome.bundles_imported, 1);
19655 assert_eq!(outcome.conversations_imported, 52);
19656 assert_eq!(outcome.messages_imported, 101);
19657 assert_eq!(storage.list_conversations(10, 0).unwrap().len(), 3);
19658
19659 let progress_key = SqliteStorage::historical_bundle_progress_key(&bundle);
19660 let progress_left: Option<String> = storage
19661 .conn
19662 .query_row_map(
19663 "SELECT value FROM meta WHERE key = ?1",
19664 fparams![progress_key.as_str()],
19665 |row| row.get_typed(0),
19666 )
19667 .optional()
19668 .unwrap();
19669 assert!(
19670 progress_left.is_none(),
19671 "completed salvage should clear bundle progress"
19672 );
19673
19674 let second = storage.salvage_historical_databases(&canonical_db).unwrap();
19675 assert_eq!(second.bundles_imported, 0);
19676 assert_eq!(second.messages_imported, 0);
19677 }
19678
19679 #[test]
19680 fn salvage_historical_databases_skips_bundle_when_checkpoint_covers_backup() {
19681 use crate::model::types::{Conversation, Message, MessageRole};
19687 use std::path::PathBuf;
19688
19689 fn make_conv(source_path: &str, idx_seed: i64) -> Conversation {
19690 Conversation {
19691 id: None,
19692 agent_slug: "codex".into(),
19693 workspace: Some(PathBuf::from("/tmp/workspace")),
19694 external_id: Some(format!("conv-{idx_seed}")),
19695 title: Some(format!("Recovered {idx_seed}")),
19696 source_path: PathBuf::from(source_path),
19697 started_at: Some(1_700_000_000_000 + idx_seed),
19698 ended_at: Some(1_700_000_000_100 + idx_seed),
19699 approx_tokens: None,
19700 metadata_json: serde_json::Value::Null,
19701 messages: vec![Message {
19702 id: None,
19703 idx: 0,
19704 role: MessageRole::User,
19705 author: None,
19706 created_at: Some(1_700_000_000_000 + idx_seed),
19707 content: format!("message-{idx_seed}"),
19708 extra_json: serde_json::Value::Null,
19709 snippets: Vec::new(),
19710 }],
19711 source_id: LOCAL_SOURCE_ID.into(),
19712 origin_host: None,
19713 }
19714 }
19715
19716 let dir = TempDir::new().unwrap();
19717 let canonical_db = dir.path().join("agent_search.db");
19718 let backup_db = dir
19719 .path()
19720 .join("backups/agent_search.db.20260322T020200.bak");
19721 let storage = SqliteStorage::open(&canonical_db).unwrap();
19722 seed_historical_db_direct(
19723 &backup_db,
19724 &[
19725 make_conv("/tmp/one.jsonl", 1),
19726 make_conv("/tmp/two.jsonl", 2),
19727 make_conv("/tmp/three.jsonl", 3),
19728 ],
19729 );
19730
19731 let bundle = discover_historical_database_bundles(&canonical_db)
19732 .into_iter()
19733 .find(|bundle| bundle.root_path == backup_db)
19734 .unwrap();
19735
19736 let backup_max_id: i64 = FrankenConnection::open(backup_db.to_string_lossy().into_owned())
19738 .unwrap()
19739 .query_row_map(
19740 "SELECT COALESCE(MAX(id), 0) FROM conversations",
19741 fparams![],
19742 |row| row.get_typed(0),
19743 )
19744 .unwrap();
19745 assert!(backup_max_id > 0, "seeded backup should have conversations");
19746 storage
19747 .record_historical_bundle_progress(&bundle, "direct-readonly", backup_max_id, 3, 3)
19748 .unwrap();
19749
19750 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
19751 assert_eq!(
19752 outcome.bundles_imported, 0,
19753 "fully-checkpointed bundle must not be re-scanned"
19754 );
19755 assert_eq!(outcome.conversations_imported, 0);
19756 assert_eq!(outcome.messages_imported, 0);
19757 assert_eq!(
19758 storage.list_conversations(10, 0).unwrap().len(),
19759 0,
19760 "skip path must not import anything"
19761 );
19762 assert!(
19763 storage.historical_bundle_already_imported(&bundle).unwrap(),
19764 "skipped bundle must be ledgered as salvaged so future runs short-circuit"
19765 );
19766
19767 let progress_key = SqliteStorage::historical_bundle_progress_key(&bundle);
19768 let progress_left: Option<String> = storage
19769 .conn
19770 .query_row_map(
19771 "SELECT value FROM meta WHERE key = ?1",
19772 fparams![progress_key.as_str()],
19773 |row| row.get_typed(0),
19774 )
19775 .optional()
19776 .unwrap();
19777 assert!(
19778 progress_left.is_none(),
19779 "skip path must clear the bundle progress checkpoint"
19780 );
19781 }
19782
19783 #[test]
19784 fn list_conversations_for_lexical_rebuild_uses_stable_id_order() {
19785 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19786 use std::path::PathBuf;
19787
19788 let dir = TempDir::new().unwrap();
19789 let db_path = dir.path().join("agent_search.db");
19790 let storage = SqliteStorage::open(&db_path).unwrap();
19791 let agent = Agent {
19792 id: None,
19793 slug: "codex".into(),
19794 name: "Codex".into(),
19795 version: Some("0.2.3".into()),
19796 kind: AgentKind::Cli,
19797 };
19798 let agent_id = storage.ensure_agent(&agent).unwrap();
19799
19800 let make_conv = |source_path: &str, started_at: i64| Conversation {
19801 id: None,
19802 agent_slug: "codex".into(),
19803 workspace: Some(PathBuf::from("/tmp/workspace")),
19804 external_id: Some(source_path.to_string()),
19805 title: Some(source_path.to_string()),
19806 source_path: PathBuf::from(source_path),
19807 started_at: Some(started_at),
19808 ended_at: Some(started_at + 1),
19809 approx_tokens: None,
19810 metadata_json: serde_json::Value::Null,
19811 messages: vec![Message {
19812 id: None,
19813 idx: 0,
19814 role: MessageRole::User,
19815 author: None,
19816 created_at: Some(started_at),
19817 content: format!("message for {source_path}"),
19818 extra_json: serde_json::Value::Null,
19819 snippets: Vec::new(),
19820 }],
19821 source_id: LOCAL_SOURCE_ID.into(),
19822 origin_host: None,
19823 };
19824
19825 let conv_a = make_conv("/tmp/a.jsonl", 3_000);
19826 let conv_b = make_conv("/tmp/b.jsonl", 1_000);
19827 let conv_c = make_conv("/tmp/c.jsonl", 2_000);
19828
19829 storage
19830 .insert_conversation_tree(agent_id, None, &conv_a)
19831 .unwrap();
19832 storage
19833 .insert_conversation_tree(agent_id, None, &conv_b)
19834 .unwrap();
19835 storage
19836 .insert_conversation_tree(agent_id, None, &conv_c)
19837 .unwrap();
19838
19839 let user_order: Vec<PathBuf> = storage
19840 .list_conversations(10, 0)
19841 .unwrap()
19842 .into_iter()
19843 .map(|conv| conv.source_path)
19844 .collect();
19845 assert_eq!(
19846 user_order,
19847 vec![
19848 PathBuf::from("/tmp/a.jsonl"),
19849 PathBuf::from("/tmp/c.jsonl"),
19850 PathBuf::from("/tmp/b.jsonl"),
19851 ]
19852 );
19853
19854 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
19855 let rebuild_order: Vec<PathBuf> = storage
19856 .list_conversations_for_lexical_rebuild_after_id(10, 0, &agent_slugs, &workspace_paths)
19857 .unwrap()
19858 .into_iter()
19859 .map(|conv| conv.source_path)
19860 .collect();
19861 assert_eq!(
19862 rebuild_order,
19863 vec![
19864 PathBuf::from("/tmp/a.jsonl"),
19865 PathBuf::from("/tmp/b.jsonl"),
19866 PathBuf::from("/tmp/c.jsonl"),
19867 ]
19868 );
19869
19870 let first_page = storage
19871 .list_conversations_for_lexical_rebuild_after_id(2, 0, &agent_slugs, &workspace_paths)
19872 .unwrap();
19873 let first_page_paths: Vec<PathBuf> = first_page
19874 .iter()
19875 .map(|conv| conv.source_path.clone())
19876 .collect();
19877 assert_eq!(
19878 first_page_paths,
19879 vec![PathBuf::from("/tmp/a.jsonl"), PathBuf::from("/tmp/b.jsonl")]
19880 );
19881
19882 let second_page = storage
19883 .list_conversations_for_lexical_rebuild_after_id(
19884 2,
19885 first_page
19886 .last()
19887 .and_then(|conv| conv.id)
19888 .expect("first page should include an id"),
19889 &agent_slugs,
19890 &workspace_paths,
19891 )
19892 .unwrap();
19893 let second_page_paths: Vec<PathBuf> = second_page
19894 .iter()
19895 .map(|conv| conv.source_path.clone())
19896 .collect();
19897 assert_eq!(second_page_paths, vec![PathBuf::from("/tmp/c.jsonl")]);
19898
19899 let bounded_page = storage
19900 .list_conversations_for_lexical_rebuild_after_id_through_id(
19901 10,
19902 0,
19903 first_page
19904 .last()
19905 .and_then(|conv| conv.id)
19906 .expect("first page should include an id"),
19907 &agent_slugs,
19908 &workspace_paths,
19909 )
19910 .unwrap();
19911 let bounded_paths: Vec<PathBuf> = bounded_page
19912 .iter()
19913 .map(|conv| conv.source_path.clone())
19914 .collect();
19915 assert_eq!(
19916 bounded_paths,
19917 vec![PathBuf::from("/tmp/a.jsonl"), PathBuf::from("/tmp/b.jsonl")]
19918 );
19919 }
19920
19921 #[test]
19922 fn keyset_traversal_handles_sparse_holey_conversation_ids() {
19923 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19924 use std::path::PathBuf;
19925
19926 let dir = TempDir::new().unwrap();
19927 let db_path = dir.path().join("agent_search.db");
19928 let storage = SqliteStorage::open(&db_path).unwrap();
19929 let agent = Agent {
19930 id: None,
19931 slug: "codex".into(),
19932 name: "Codex".into(),
19933 version: Some("0.2.3".into()),
19934 kind: AgentKind::Cli,
19935 };
19936 let agent_id = storage.ensure_agent(&agent).unwrap();
19937
19938 let make_conv = |label: &str, ts: i64| Conversation {
19939 id: None,
19940 agent_slug: "codex".into(),
19941 workspace: Some(PathBuf::from("/tmp/workspace")),
19942 external_id: Some(label.to_string()),
19943 title: Some(label.to_string()),
19944 source_path: PathBuf::from(format!("/tmp/{label}.jsonl")),
19945 started_at: Some(ts),
19946 ended_at: Some(ts + 1),
19947 approx_tokens: None,
19948 metadata_json: serde_json::Value::Null,
19949 messages: vec![Message {
19950 id: None,
19951 idx: 0,
19952 role: MessageRole::User,
19953 author: None,
19954 created_at: Some(ts),
19955 content: format!("msg for {label}"),
19956 extra_json: serde_json::Value::Null,
19957 snippets: Vec::new(),
19958 }],
19959 source_id: LOCAL_SOURCE_ID.into(),
19960 origin_host: None,
19961 };
19962
19963 for i in 0..6 {
19964 storage
19965 .insert_conversation_tree(
19966 agent_id,
19967 None,
19968 &make_conv(&format!("conv-{i}"), 1000 + i),
19969 )
19970 .unwrap();
19971 }
19972
19973 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
19974 storage
19975 .conn
19976 .execute_compat("DELETE FROM conversations WHERE id IN (2, 4)", fparams![])
19977 .unwrap();
19978 storage
19979 .conn
19980 .execute_compat(
19981 "DELETE FROM messages WHERE conversation_id IN (2, 4)",
19982 fparams![],
19983 )
19984 .unwrap();
19985 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
19986
19987 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
19988
19989 let page1 = storage
19990 .list_conversations_for_lexical_rebuild_after_id(2, 0, &agent_slugs, &workspace_paths)
19991 .unwrap();
19992 assert_eq!(page1.len(), 2);
19993 let page1_ids: Vec<i64> = page1.iter().map(|c| c.id.unwrap()).collect();
19994 assert_eq!(page1_ids, vec![1, 3]);
19995
19996 let page2 = storage
19997 .list_conversations_for_lexical_rebuild_after_id(
19998 2,
19999 *page1_ids.last().unwrap(),
20000 &agent_slugs,
20001 &workspace_paths,
20002 )
20003 .unwrap();
20004 assert_eq!(page2.len(), 2);
20005 let page2_ids: Vec<i64> = page2.iter().map(|c| c.id.unwrap()).collect();
20006 assert_eq!(page2_ids, vec![5, 6]);
20007
20008 let page3 = storage
20009 .list_conversations_for_lexical_rebuild_after_id(
20010 2,
20011 *page2_ids.last().unwrap(),
20012 &agent_slugs,
20013 &workspace_paths,
20014 )
20015 .unwrap();
20016 assert!(page3.is_empty());
20017
20018 let all_ids: Vec<i64> = page1_ids.iter().chain(page2_ids.iter()).copied().collect();
20019 assert_eq!(all_ids, vec![1, 3, 5, 6]);
20020 }
20021
20022 #[test]
20023 fn keyset_traversal_through_id_with_sparse_ranges() {
20024 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20025 use std::path::PathBuf;
20026
20027 let dir = TempDir::new().unwrap();
20028 let db_path = dir.path().join("agent_search.db");
20029 let storage = SqliteStorage::open(&db_path).unwrap();
20030 let agent = Agent {
20031 id: None,
20032 slug: "codex".into(),
20033 name: "Codex".into(),
20034 version: Some("0.2.3".into()),
20035 kind: AgentKind::Cli,
20036 };
20037 let agent_id = storage.ensure_agent(&agent).unwrap();
20038
20039 let make_conv = |label: &str, ts: i64| Conversation {
20040 id: None,
20041 agent_slug: "codex".into(),
20042 workspace: Some(PathBuf::from("/tmp/workspace")),
20043 external_id: Some(label.to_string()),
20044 title: Some(label.to_string()),
20045 source_path: PathBuf::from(format!("/tmp/{label}.jsonl")),
20046 started_at: Some(ts),
20047 ended_at: Some(ts + 1),
20048 approx_tokens: None,
20049 metadata_json: serde_json::Value::Null,
20050 messages: vec![Message {
20051 id: None,
20052 idx: 0,
20053 role: MessageRole::User,
20054 author: None,
20055 created_at: Some(ts),
20056 content: format!("msg for {label}"),
20057 extra_json: serde_json::Value::Null,
20058 snippets: Vec::new(),
20059 }],
20060 source_id: LOCAL_SOURCE_ID.into(),
20061 origin_host: None,
20062 };
20063
20064 for i in 0..10 {
20065 storage
20066 .insert_conversation_tree(
20067 agent_id,
20068 None,
20069 &make_conv(&format!("conv-{i}"), 1000 + i),
20070 )
20071 .unwrap();
20072 }
20073
20074 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
20075 storage
20076 .conn
20077 .execute_compat(
20078 "DELETE FROM conversations WHERE id IN (3, 5, 7, 8)",
20079 fparams![],
20080 )
20081 .unwrap();
20082 storage
20083 .conn
20084 .execute_compat(
20085 "DELETE FROM messages WHERE conversation_id IN (3, 5, 7, 8)",
20086 fparams![],
20087 )
20088 .unwrap();
20089 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
20090
20091 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
20092
20093 let through_5 = storage
20094 .list_conversations_for_lexical_rebuild_after_id_through_id(
20095 100,
20096 0,
20097 5,
20098 &agent_slugs,
20099 &workspace_paths,
20100 )
20101 .unwrap();
20102 let through_5_ids: Vec<i64> = through_5.iter().map(|c| c.id.unwrap()).collect();
20103 assert_eq!(through_5_ids, vec![1, 2, 4]);
20104
20105 let after_4_through_10 = storage
20106 .list_conversations_for_lexical_rebuild_after_id_through_id(
20107 100,
20108 4,
20109 10,
20110 &agent_slugs,
20111 &workspace_paths,
20112 )
20113 .unwrap();
20114 let ids: Vec<i64> = after_4_through_10.iter().map(|c| c.id.unwrap()).collect();
20115 assert_eq!(ids, vec![6, 9, 10]);
20116
20117 let after_10 = storage
20118 .list_conversations_for_lexical_rebuild_after_id_through_id(
20119 100,
20120 10,
20121 20,
20122 &agent_slugs,
20123 &workspace_paths,
20124 )
20125 .unwrap();
20126 assert!(after_10.is_empty());
20127 }
20128
20129 #[test]
20130 fn list_conversation_footprints_for_lexical_rebuild_estimates_bytes_and_keeps_empty_conversations()
20131 {
20132 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20133 use std::path::PathBuf;
20134
20135 let dir = TempDir::new().unwrap();
20136 let db_path = dir.path().join("agent_search.db");
20137 let storage = SqliteStorage::open(&db_path).unwrap();
20138 let agent = Agent {
20139 id: None,
20140 slug: "codex".into(),
20141 name: "Codex".into(),
20142 version: Some("0.2.3".into()),
20143 kind: AgentKind::Cli,
20144 };
20145 let agent_id = storage.ensure_agent(&agent).unwrap();
20146
20147 let insert = |external_id: &str, base_ts: i64, messages: Vec<Message>| {
20148 storage
20149 .insert_conversation_tree(
20150 agent_id,
20151 None,
20152 &Conversation {
20153 id: None,
20154 agent_slug: "codex".into(),
20155 workspace: Some(PathBuf::from("/tmp/workspace")),
20156 external_id: Some(external_id.to_string()),
20157 title: Some(external_id.to_string()),
20158 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
20159 started_at: Some(base_ts),
20160 ended_at: Some(base_ts + 100),
20161 approx_tokens: None,
20162 metadata_json: serde_json::Value::Null,
20163 messages,
20164 source_id: LOCAL_SOURCE_ID.into(),
20165 origin_host: None,
20166 },
20167 )
20168 .unwrap()
20169 .conversation_id
20170 };
20171
20172 let ascii_id = insert(
20173 "footprint-ascii",
20174 1_700_000_000_000,
20175 vec![
20176 Message {
20177 id: None,
20178 idx: 0,
20179 role: MessageRole::User,
20180 author: None,
20181 created_at: Some(1_700_000_000_001),
20182 content: "abc".into(),
20183 extra_json: serde_json::Value::Null,
20184 snippets: Vec::new(),
20185 },
20186 Message {
20187 id: None,
20188 idx: 1,
20189 role: MessageRole::Agent,
20190 author: None,
20191 created_at: Some(1_700_000_000_002),
20192 content: "defg".into(),
20193 extra_json: serde_json::Value::Null,
20194 snippets: Vec::new(),
20195 },
20196 ],
20197 );
20198 let empty_id = insert("footprint-empty", 1_700_000_001_000, Vec::new());
20199 let utf8_id = insert(
20200 "footprint-utf8",
20201 1_700_000_002_000,
20202 vec![Message {
20203 id: None,
20204 idx: 0,
20205 role: MessageRole::Tool,
20206 author: None,
20207 created_at: Some(1_700_000_002_001),
20208 content: "hé🙂".into(),
20209 extra_json: serde_json::Value::Null,
20210 snippets: Vec::new(),
20211 }],
20212 );
20213 let sparse_id = insert(
20214 "footprint-sparse",
20215 1_700_000_003_000,
20216 vec![Message {
20217 id: None,
20218 idx: 10,
20219 role: MessageRole::User,
20220 author: None,
20221 created_at: Some(1_700_000_003_010),
20222 content: "sparse".into(),
20223 extra_json: serde_json::Value::Null,
20224 snippets: Vec::new(),
20225 }],
20226 );
20227 storage
20228 .conn
20229 .execute_compat(
20230 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
20231 fparams![utf8_id],
20232 )
20233 .unwrap();
20234
20235 let footprints = storage
20236 .list_conversation_footprints_for_lexical_rebuild()
20237 .unwrap();
20238 assert_eq!(
20239 footprints,
20240 vec![
20241 LexicalRebuildConversationFootprintRow {
20242 conversation_id: ascii_id,
20243 message_count: 2,
20244 message_bytes: 2 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20245 },
20246 LexicalRebuildConversationFootprintRow {
20247 conversation_id: empty_id,
20248 message_count: 0,
20249 message_bytes: 0,
20250 },
20251 LexicalRebuildConversationFootprintRow {
20252 conversation_id: utf8_id,
20253 message_count: 1,
20254 message_bytes: LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20255 },
20256 LexicalRebuildConversationFootprintRow {
20257 conversation_id: sparse_id,
20258 message_count: 11,
20259 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20260 },
20261 ]
20262 );
20263 }
20264
20265 #[test]
20266 fn list_conversation_footprints_for_lexical_rebuild_falls_back_for_missing_tail_cache() {
20267 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20268 use std::path::PathBuf;
20269
20270 let dir = TempDir::new().unwrap();
20271 let db_path = dir.path().join("agent_search.db");
20272 let storage = SqliteStorage::open(&db_path).unwrap();
20273 let agent = Agent {
20274 id: None,
20275 slug: "codex".into(),
20276 name: "Codex".into(),
20277 version: Some("0.2.3".into()),
20278 kind: AgentKind::Cli,
20279 };
20280 let agent_id = storage.ensure_agent(&agent).unwrap();
20281 let conversation_id = storage
20282 .insert_conversation_tree(
20283 agent_id,
20284 None,
20285 &Conversation {
20286 id: None,
20287 agent_slug: "codex".into(),
20288 workspace: Some(PathBuf::from("/tmp/workspace")),
20289 external_id: Some("footprint-missing-tail".to_string()),
20290 title: Some("footprint-missing-tail".to_string()),
20291 source_path: PathBuf::from("/tmp/footprint-missing-tail.jsonl"),
20292 started_at: Some(1_700_000_000_000),
20293 ended_at: Some(1_700_000_000_100),
20294 approx_tokens: None,
20295 metadata_json: serde_json::Value::Null,
20296 messages: vec![Message {
20297 id: None,
20298 idx: 10,
20299 role: MessageRole::User,
20300 author: None,
20301 created_at: Some(1_700_000_000_010),
20302 content: "legacy sparse tail".into(),
20303 extra_json: serde_json::Value::Null,
20304 snippets: Vec::new(),
20305 }],
20306 source_id: LOCAL_SOURCE_ID.into(),
20307 origin_host: None,
20308 },
20309 )
20310 .unwrap()
20311 .conversation_id;
20312
20313 storage
20314 .conn
20315 .execute_compat(
20316 "UPDATE conversations
20317 SET last_message_idx = NULL, last_message_created_at = NULL
20318 WHERE id = ?1",
20319 fparams![conversation_id],
20320 )
20321 .unwrap();
20322 storage
20323 .conn
20324 .execute_compat(
20325 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
20326 fparams![conversation_id],
20327 )
20328 .unwrap();
20329
20330 let footprints = storage
20331 .list_conversation_footprints_for_lexical_rebuild()
20332 .unwrap();
20333
20334 assert_eq!(
20335 footprints,
20336 vec![LexicalRebuildConversationFootprintRow {
20337 conversation_id,
20338 message_count: 11,
20339 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20340 }],
20341 "missing tail-cache metadata should fall back to messages MAX(idx) instead of treating legacy conversations as empty"
20342 );
20343 }
20344
20345 #[test]
20346 fn list_conversation_footprints_for_lexical_rebuild_raises_stale_low_tail_cache() {
20347 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20348 use std::path::PathBuf;
20349
20350 let dir = TempDir::new().unwrap();
20351 let db_path = dir.path().join("agent_search.db");
20352 let storage = SqliteStorage::open(&db_path).unwrap();
20353 let agent = Agent {
20354 id: None,
20355 slug: "codex".into(),
20356 name: "Codex".into(),
20357 version: Some("0.2.3".into()),
20358 kind: AgentKind::Cli,
20359 };
20360 let agent_id = storage.ensure_agent(&agent).unwrap();
20361 let conversation_id = storage
20362 .insert_conversation_tree(
20363 agent_id,
20364 None,
20365 &Conversation {
20366 id: None,
20367 agent_slug: "codex".into(),
20368 workspace: Some(PathBuf::from("/tmp/workspace")),
20369 external_id: Some("footprint-stale-tail".to_string()),
20370 title: Some("footprint-stale-tail".to_string()),
20371 source_path: PathBuf::from("/tmp/footprint-stale-tail.jsonl"),
20372 started_at: Some(1_700_000_000_000),
20373 ended_at: Some(1_700_000_000_100),
20374 approx_tokens: None,
20375 metadata_json: serde_json::Value::Null,
20376 messages: (0..3)
20377 .map(|idx| Message {
20378 id: None,
20379 idx,
20380 role: MessageRole::User,
20381 author: None,
20382 created_at: Some(1_700_000_000_010 + idx),
20383 content: format!("message {idx}"),
20384 extra_json: serde_json::Value::Null,
20385 snippets: Vec::new(),
20386 })
20387 .collect(),
20388 source_id: LOCAL_SOURCE_ID.into(),
20389 origin_host: None,
20390 },
20391 )
20392 .unwrap()
20393 .conversation_id;
20394
20395 storage
20396 .conn
20397 .execute_compat(
20398 "UPDATE conversations
20399 SET last_message_idx = 0, last_message_created_at = 1700000000010
20400 WHERE id = ?1",
20401 fparams![conversation_id],
20402 )
20403 .unwrap();
20404 storage
20405 .conn
20406 .execute_compat(
20407 "UPDATE conversation_tail_state
20408 SET last_message_idx = 0, last_message_created_at = 1700000000010
20409 WHERE conversation_id = ?1",
20410 fparams![conversation_id],
20411 )
20412 .unwrap();
20413
20414 let footprints = storage
20415 .list_conversation_footprints_for_lexical_rebuild()
20416 .unwrap();
20417
20418 assert_eq!(
20419 footprints,
20420 vec![LexicalRebuildConversationFootprintRow {
20421 conversation_id,
20422 message_count: 3,
20423 message_bytes: 3 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20424 }],
20425 "stale-low tail caches must not under-plan lexical shards and trip doc>plan invariants"
20426 );
20427 }
20428
20429 #[test]
20430 fn list_conversation_footprints_for_lexical_rebuild_tolerates_missing_tail_state_table() {
20431 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20432 use std::path::PathBuf;
20433
20434 let dir = TempDir::new().unwrap();
20435 let db_path = dir.path().join("agent_search.db");
20436 let storage = SqliteStorage::open(&db_path).unwrap();
20437 let agent = Agent {
20438 id: None,
20439 slug: "codex".into(),
20440 name: "Codex".into(),
20441 version: Some("0.2.3".into()),
20442 kind: AgentKind::Cli,
20443 };
20444 let agent_id = storage.ensure_agent(&agent).unwrap();
20445 let conversation_id = storage
20446 .insert_conversation_tree(
20447 agent_id,
20448 None,
20449 &Conversation {
20450 id: None,
20451 agent_slug: "codex".into(),
20452 workspace: Some(PathBuf::from("/tmp/workspace")),
20453 external_id: Some("footprint-missing-tail-table".to_string()),
20454 title: Some("footprint-missing-tail-table".to_string()),
20455 source_path: PathBuf::from("/tmp/footprint-missing-tail-table.jsonl"),
20456 started_at: Some(1_700_000_000_000),
20457 ended_at: Some(1_700_000_000_100),
20458 approx_tokens: None,
20459 metadata_json: serde_json::Value::Null,
20460 messages: vec![Message {
20461 id: None,
20462 idx: 10,
20463 role: MessageRole::User,
20464 author: None,
20465 created_at: Some(1_700_000_000_010),
20466 content: "legacy sparse tail without hot table".into(),
20467 extra_json: serde_json::Value::Null,
20468 snippets: Vec::new(),
20469 }],
20470 source_id: LOCAL_SOURCE_ID.into(),
20471 origin_host: None,
20472 },
20473 )
20474 .unwrap()
20475 .conversation_id;
20476
20477 storage
20478 .conn
20479 .execute_compat(
20480 "UPDATE conversations
20481 SET last_message_idx = NULL, last_message_created_at = NULL
20482 WHERE id = ?1",
20483 fparams![conversation_id],
20484 )
20485 .unwrap();
20486 storage
20487 .conn
20488 .execute_compat("DROP TABLE conversation_tail_state", fparams![])
20489 .unwrap();
20490
20491 let footprints = storage
20492 .list_conversation_footprints_for_lexical_rebuild()
20493 .unwrap();
20494
20495 assert_eq!(
20496 footprints,
20497 vec![LexicalRebuildConversationFootprintRow {
20498 conversation_id,
20499 message_count: 11,
20500 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20501 }],
20502 "read-only lexical self-heal must tolerate pre-tail-cache databases and use messages MAX(idx)"
20503 );
20504 }
20505
20506 #[test]
20507 fn list_conversation_footprints_for_lexical_rebuild_tolerates_legacy_search_demo_fixture() {
20508 let fixture_db = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
20509 .join("tests")
20510 .join("fixtures")
20511 .join("search_demo_data")
20512 .join("agent_search.db");
20513 let storage = FrankenStorage::open_readonly(&fixture_db).unwrap();
20514
20515 let footprints = storage
20516 .list_conversation_footprints_for_lexical_rebuild()
20517 .unwrap();
20518
20519 assert!(
20520 !footprints.is_empty(),
20521 "search self-heal should be able to plan a lexical rebuild from the legacy search demo fixture"
20522 );
20523 assert!(
20524 footprints
20525 .iter()
20526 .all(|footprint| footprint.message_count > 0),
20527 "legacy fixture conversations should derive message counts from messages when tail caches are absent"
20528 );
20529 }
20530
20531 #[test]
20532 fn lexical_rebuild_listing_normalizes_host_only_remote_source_from_blank_source_id() {
20533 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20534 use std::path::PathBuf;
20535
20536 let dir = TempDir::new().unwrap();
20537 let db_path = dir.path().join("agent_search.db");
20538 let storage = SqliteStorage::open(&db_path).unwrap();
20539 let agent = Agent {
20540 id: None,
20541 slug: "codex".into(),
20542 name: "Codex".into(),
20543 version: Some("0.2.3".into()),
20544 kind: AgentKind::Cli,
20545 };
20546 let agent_id = storage.ensure_agent(&agent).unwrap();
20547 let conversation = Conversation {
20548 id: None,
20549 agent_slug: "codex".into(),
20550 workspace: Some(PathBuf::from("/tmp/workspace")),
20551 external_id: Some("legacy-blank-source".into()),
20552 title: Some("Legacy blank source".into()),
20553 source_path: PathBuf::from("/tmp/legacy-blank-source.jsonl"),
20554 started_at: Some(1_700_000_000_000),
20555 ended_at: Some(1_700_000_000_100),
20556 approx_tokens: None,
20557 metadata_json: serde_json::Value::Null,
20558 messages: vec![Message {
20559 id: None,
20560 idx: 0,
20561 role: MessageRole::User,
20562 author: None,
20563 created_at: Some(1_700_000_000_000),
20564 content: "hello".into(),
20565 extra_json: serde_json::Value::Null,
20566 snippets: Vec::new(),
20567 }],
20568 source_id: LOCAL_SOURCE_ID.into(),
20569 origin_host: None,
20570 };
20571
20572 let conversation_id = storage
20573 .insert_conversation_tree(agent_id, None, &conversation)
20574 .unwrap()
20575 .conversation_id;
20576 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
20577 storage
20578 .conn
20579 .execute_compat(
20580 "UPDATE conversations SET source_id = ?1, origin_host = ?2 WHERE id = ?3",
20581 fparams![" ", "dev@laptop", conversation_id],
20582 )
20583 .unwrap();
20584 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
20585
20586 let listed = storage.list_conversations(10, 0).unwrap();
20587 assert_eq!(listed.len(), 1);
20588 assert_eq!(listed[0].source_id, "dev@laptop");
20589 assert_eq!(listed[0].origin_host.as_deref(), Some("dev@laptop"));
20590
20591 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
20592 let rebuild_listed = storage
20593 .list_conversations_for_lexical_rebuild_after_id(10, 0, &agent_slugs, &workspace_paths)
20594 .unwrap();
20595 assert_eq!(rebuild_listed.len(), 1);
20596 assert_eq!(rebuild_listed[0].source_id, "dev@laptop");
20597 assert_eq!(rebuild_listed[0].origin_host.as_deref(), Some("dev@laptop"));
20598 }
20599
20600 #[test]
20601 fn seed_canonical_from_best_historical_bundle_copies_data_and_resets_runtime_meta() {
20602 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20603 use std::path::PathBuf;
20604
20605 let dir = TempDir::new().unwrap();
20606 let canonical_db = dir.path().join("agent_search.db");
20607 let source_db = dir
20608 .path()
20609 .join("backups/agent_search.db.20260322T020200.bak");
20610
20611 fs::create_dir_all(source_db.parent().unwrap()).unwrap();
20612
20613 let source = SqliteStorage::open(&source_db).unwrap();
20614 let agent = Agent {
20615 id: None,
20616 slug: "codex".into(),
20617 name: "Codex".into(),
20618 version: Some("0.2.3".into()),
20619 kind: AgentKind::Cli,
20620 };
20621 let agent_id = source.ensure_agent(&agent).unwrap();
20622 let conversation = Conversation {
20623 id: None,
20624 agent_slug: "codex".into(),
20625 workspace: Some(PathBuf::from("/tmp/workspace")),
20626 external_id: Some("seed-conv".into()),
20627 title: Some("Historical seed".into()),
20628 source_path: PathBuf::from("/tmp/historical-seed.jsonl"),
20629 started_at: Some(1_700_000_000_000),
20630 ended_at: Some(1_700_000_000_100),
20631 approx_tokens: Some(42),
20632 metadata_json: serde_json::json!({"seed": true}),
20633 messages: vec![Message {
20634 id: None,
20635 idx: 0,
20636 role: MessageRole::Agent,
20637 author: Some("assistant".into()),
20638 created_at: Some(1_700_000_000_050),
20639 content: "seeded message".into(),
20640 extra_json: serde_json::json!({"usage": {"total_tokens": 12}}),
20641 snippets: Vec::new(),
20642 }],
20643 source_id: LOCAL_SOURCE_ID.into(),
20644 origin_host: None,
20645 };
20646 source
20647 .insert_conversation_tree(agent_id, None, &conversation)
20648 .unwrap();
20649 source.set_last_scan_ts(123).unwrap();
20650 source.set_last_indexed_at(456).unwrap();
20651 source.set_last_embedded_message_id(789).unwrap();
20652 source
20653 .conn
20654 .execute_compat(
20655 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
20656 fparams!["historical_bundle_salvaged:stale", "{\"stale\":true}"],
20657 )
20658 .unwrap();
20659 drop(source);
20660
20661 let legacy_v13_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, content='', tokenize='porter')";
20672 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
20673 let legacy = rusqlite_test_fixture_conn(&source_db);
20674 legacy
20675 .execute_batch(
20676 "UPDATE meta SET value = '13' WHERE key = 'schema_version';
20677 DELETE FROM _schema_migrations WHERE version = 14;
20678 PRAGMA writable_schema = ON;",
20679 )
20680 .unwrap();
20681 legacy
20682 .execute(
20683 "DELETE FROM meta WHERE key = ?1",
20684 [FTS_FRANKEN_REBUILD_META_KEY],
20685 )
20686 .unwrap();
20687 legacy
20689 .execute(
20690 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
20691 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
20692 [legacy_v13_fts_sql],
20693 )
20694 .unwrap();
20695 legacy
20697 .execute(
20698 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
20699 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
20700 [duplicate_legacy_fts_sql],
20701 )
20702 .unwrap();
20703 legacy
20704 .execute_batch("PRAGMA writable_schema = OFF;")
20705 .unwrap();
20706 drop(legacy);
20707
20708 {
20711 let verify = rusqlite_test_fixture_conn(&source_db);
20712 verify
20713 .execute_batch("PRAGMA writable_schema = ON;")
20714 .unwrap();
20715 let fts_entries: i64 = verify
20716 .query_row(
20717 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
20718 [],
20719 |row| row.get(0),
20720 )
20721 .unwrap();
20722 assert_eq!(
20723 fts_entries, 2,
20724 "test fixture should reproduce the duplicate legacy fts_messages rows"
20725 );
20726 let msg_count: i64 = verify
20727 .query_row("SELECT COUNT(*) FROM messages", [], |row| row.get(0))
20728 .unwrap();
20729 assert_eq!(msg_count, 1);
20730 }
20731
20732 let fresh = SqliteStorage::open(&canonical_db).unwrap();
20733 drop(fresh);
20734
20735 let outcome = seed_canonical_from_best_historical_bundle(&canonical_db)
20736 .unwrap()
20737 .unwrap();
20738 assert_eq!(outcome.bundles_imported, 1);
20739 assert_eq!(outcome.conversations_imported, 1);
20740 assert_eq!(outcome.messages_imported, 1);
20741
20742 let readonly = open_franken_with_flags(
20743 &canonical_db.to_string_lossy(),
20744 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
20745 )
20746 .unwrap();
20747 let readonly_message_count: i64 = readonly
20748 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
20749 row.get_typed(0)
20750 })
20751 .unwrap();
20752 assert_eq!(readonly_message_count, 1);
20753
20754 let seeded = SqliteStorage::open(&canonical_db).unwrap();
20755 assert_eq!(
20756 seeded
20757 .count_sessions_in_range(None, None, None, None)
20758 .unwrap()
20759 .0,
20760 1
20761 );
20762 let message_count: i64 = seeded
20763 .conn
20764 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
20765 row.get_typed(0)
20766 })
20767 .unwrap();
20768 assert_eq!(message_count, 1);
20769 assert_eq!(seeded.get_last_scan_ts().unwrap(), None);
20770 assert_eq!(seeded.get_last_embedded_message_id().unwrap(), None);
20771
20772 let last_indexed: Option<String> = seeded
20773 .conn
20774 .query_row_map(
20775 "SELECT value FROM meta WHERE key = 'last_indexed_at'",
20776 fparams![],
20777 |row| row.get_typed(0),
20778 )
20779 .optional()
20780 .unwrap();
20781 assert!(last_indexed.is_none());
20782
20783 let salvage_keys: Vec<String> = seeded
20784 .conn
20785 .query_map_collect(
20786 "SELECT key FROM meta WHERE key LIKE 'historical_bundle_salvaged:%' ORDER BY key",
20787 fparams![],
20788 |row| row.get_typed(0),
20789 )
20790 .unwrap();
20791 assert_eq!(salvage_keys.len(), 1);
20792
20793 let reopened_readonly = open_franken_with_flags(
20794 &canonical_db.to_string_lossy(),
20795 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
20796 )
20797 .unwrap();
20798 let reopened_fts_entries: i64 = reopened_readonly
20799 .query_row_map(
20800 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
20801 fparams![],
20802 |row| row.get_typed(0),
20803 )
20804 .unwrap();
20805 assert_eq!(
20806 reopened_fts_entries, 1,
20807 "seeded canonical db should keep a single stock-SQLite fts_messages schema row"
20808 );
20809 let reopened_message_count: i64 = reopened_readonly
20810 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
20811 row.get_typed(0)
20812 })
20813 .unwrap();
20814 assert_eq!(reopened_message_count, 1);
20815
20816 let franken_seeded = FrankenStorage::open(&canonical_db).unwrap();
20817 assert_eq!(
20818 franken_seeded.schema_version().unwrap(),
20819 CURRENT_SCHEMA_VERSION
20820 );
20821 franken_seeded
20828 .ensure_search_fallback_fts_consistency()
20829 .expect("ensure FTS consistency after seed");
20830 let post_franken_schema_rows: i64 = franken_seeded
20831 .raw()
20832 .query_row_map(
20833 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
20834 fparams![],
20835 |row| row.get_typed(0),
20836 )
20837 .unwrap();
20838 assert_eq!(post_franken_schema_rows, 1);
20839 assert!(
20840 franken_seeded
20841 .raw()
20842 .query("SELECT rowid FROM fts_messages LIMIT 1")
20843 .is_ok()
20844 );
20845 }
20846
20847 #[test]
20848 fn failed_baseline_seed_preserves_existing_canonical_bundle() {
20849 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20850 use std::path::PathBuf;
20851
20852 let dir = TempDir::new().unwrap();
20853 let canonical_db = dir.path().join("agent_search.db");
20854 let source_db = dir
20855 .path()
20856 .join("backups/agent_search.db.20260325T120000Z.bad-seed.bak");
20857
20858 fs::create_dir_all(source_db.parent().unwrap()).unwrap();
20859
20860 let canonical = SqliteStorage::open(&canonical_db).unwrap();
20861 canonical
20862 .conn
20863 .execute_compat(
20864 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
20865 fparams!["sentinel", "keep-me"],
20866 )
20867 .unwrap();
20868 drop(canonical);
20869
20870 let source = SqliteStorage::open(&source_db).unwrap();
20871 let agent = Agent {
20872 id: None,
20873 slug: "codex".into(),
20874 name: "Codex".into(),
20875 version: Some("0.2.3".into()),
20876 kind: AgentKind::Cli,
20877 };
20878 let agent_id = source.ensure_agent(&agent).unwrap();
20879 let conversation = Conversation {
20880 id: None,
20881 agent_slug: "codex".into(),
20882 workspace: Some(PathBuf::from("/tmp/workspace")),
20883 external_id: Some("bad-seed-conv".into()),
20884 title: Some("Bad seed".into()),
20885 source_path: PathBuf::from("/tmp/bad-seed.jsonl"),
20886 started_at: Some(1_700_000_000_000),
20887 ended_at: Some(1_700_000_000_100),
20888 approx_tokens: Some(42),
20889 metadata_json: serde_json::json!({"seed": "bad"}),
20890 messages: vec![Message {
20891 id: None,
20892 idx: 0,
20893 role: MessageRole::Agent,
20894 author: Some("assistant".into()),
20895 created_at: Some(1_700_000_000_050),
20896 content: "this seed should fail".into(),
20897 extra_json: serde_json::Value::Null,
20898 snippets: Vec::new(),
20899 }],
20900 source_id: LOCAL_SOURCE_ID.into(),
20901 origin_host: None,
20902 };
20903 source
20904 .insert_conversation_tree(agent_id, None, &conversation)
20905 .unwrap();
20906 drop(source);
20907
20908 let legacy = FrankenConnection::open(source_db.to_string_lossy().into_owned()).unwrap();
20909 legacy
20910 .execute("UPDATE meta SET value = '12' WHERE key = 'schema_version'")
20911 .unwrap();
20912 drop(legacy);
20913
20914 let err = seed_canonical_from_best_historical_bundle(&canonical_db).unwrap_err();
20915 assert!(
20916 err.to_string()
20917 .contains("schema_version 12 is too old for baseline import"),
20918 "unexpected seed error: {err:#}"
20919 );
20920
20921 let reopened = SqliteStorage::open(&canonical_db).unwrap();
20922 let sentinel: Option<String> = reopened
20923 .conn
20924 .query_row_map(
20925 "SELECT value FROM meta WHERE key = 'sentinel'",
20926 fparams![],
20927 |row| row.get_typed(0),
20928 )
20929 .optional()
20930 .unwrap();
20931 assert_eq!(sentinel.as_deref(), Some("keep-me"));
20932
20933 let conversation_count: i64 = reopened
20934 .conn
20935 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
20936 row.get_typed(0)
20937 })
20938 .unwrap();
20939 assert_eq!(conversation_count, 0);
20940
20941 let readonly = open_franken_with_flags(
20942 &canonical_db.to_string_lossy(),
20943 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
20944 )
20945 .unwrap();
20946 let readonly_conversation_count: i64 = readonly
20947 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
20948 row.get_typed(0)
20949 })
20950 .unwrap();
20951 assert_eq!(readonly_conversation_count, 0);
20952 }
20953
20954 #[test]
20955 fn fetch_messages_for_lexical_rebuild_skips_extra_json() {
20956 let dir = TempDir::new().unwrap();
20957 let db_path = dir.path().join("test.db");
20958 let storage = SqliteStorage::open(&db_path).unwrap();
20959
20960 let agent = Agent {
20961 id: None,
20962 slug: "codex".into(),
20963 name: "Codex".into(),
20964 version: Some("0.2.3".into()),
20965 kind: AgentKind::Cli,
20966 };
20967 let agent_id = storage.ensure_agent(&agent).unwrap();
20968
20969 let conversation = Conversation {
20970 id: None,
20971 agent_slug: "codex".into(),
20972 workspace: Some(PathBuf::from("/tmp/workspace")),
20973 external_id: Some("lexical-rebuild-test".into()),
20974 title: Some("Lexical rebuild".into()),
20975 source_path: PathBuf::from("/tmp/lexical-rebuild.jsonl"),
20976 started_at: Some(1_700_000_000_000),
20977 ended_at: Some(1_700_000_000_100),
20978 approx_tokens: Some(42),
20979 metadata_json: serde_json::Value::Null,
20980 messages: vec![Message {
20981 id: None,
20982 idx: 0,
20983 role: MessageRole::Agent,
20984 author: Some("assistant".into()),
20985 created_at: Some(1_700_000_000_050),
20986 content: "indexed text".into(),
20987 extra_json: serde_json::json!({
20988 "usage": { "total_tokens": 1234 },
20989 "irrelevant_blob": "still preserved in canonical storage"
20990 }),
20991 snippets: Vec::new(),
20992 }],
20993 source_id: LOCAL_SOURCE_ID.into(),
20994 origin_host: None,
20995 };
20996
20997 let inserted = storage
20998 .insert_conversation_tree(agent_id, None, &conversation)
20999 .unwrap();
21000 let conversation_id = inserted.conversation_id;
21001
21002 let stored = storage.fetch_messages(conversation_id).unwrap();
21003 assert_eq!(stored.len(), 1);
21004 assert!(!stored[0].extra_json.is_null());
21005
21006 let lexical = storage
21007 .fetch_messages_for_lexical_rebuild(conversation_id)
21008 .unwrap();
21009 assert_eq!(lexical.len(), 1);
21010 assert_eq!(lexical[0].content, "indexed text");
21011 assert_eq!(lexical[0].author.as_deref(), Some("assistant"));
21012 assert!(lexical[0].extra_json.is_null());
21013 }
21014
21015 #[test]
21016 fn fetch_messages_for_lexical_rebuild_batch_groups_and_orders_messages() {
21017 let dir = TempDir::new().unwrap();
21018 let db_path = dir.path().join("test.db");
21019 let storage = SqliteStorage::open(&db_path).unwrap();
21020
21021 let agent = Agent {
21022 id: None,
21023 slug: "codex".into(),
21024 name: "Codex".into(),
21025 version: Some("0.2.3".into()),
21026 kind: AgentKind::Cli,
21027 };
21028 let agent_id = storage.ensure_agent(&agent).unwrap();
21029
21030 let first = Conversation {
21031 id: None,
21032 agent_slug: "codex".into(),
21033 workspace: Some(PathBuf::from("/tmp/workspace")),
21034 external_id: Some("lexical-batch-1".into()),
21035 title: Some("Lexical batch 1".into()),
21036 source_path: PathBuf::from("/tmp/lexical-batch-1.jsonl"),
21037 started_at: Some(1_700_000_000_000),
21038 ended_at: Some(1_700_000_000_100),
21039 approx_tokens: Some(42),
21040 metadata_json: serde_json::Value::Null,
21041 messages: vec![
21042 Message {
21043 id: None,
21044 idx: 0,
21045 role: MessageRole::User,
21046 author: Some("user".into()),
21047 created_at: Some(1_700_000_000_010),
21048 content: "first-a".into(),
21049 extra_json: serde_json::json!({"opaque": true}),
21050 snippets: Vec::new(),
21051 },
21052 Message {
21053 id: None,
21054 idx: 1,
21055 role: MessageRole::Agent,
21056 author: Some("assistant".into()),
21057 created_at: Some(1_700_000_000_020),
21058 content: "first-b".into(),
21059 extra_json: serde_json::json!({"opaque": true}),
21060 snippets: Vec::new(),
21061 },
21062 ],
21063 source_id: LOCAL_SOURCE_ID.into(),
21064 origin_host: None,
21065 };
21066
21067 let second = Conversation {
21068 id: None,
21069 agent_slug: "codex".into(),
21070 workspace: Some(PathBuf::from("/tmp/workspace")),
21071 external_id: Some("lexical-batch-2".into()),
21072 title: Some("Lexical batch 2".into()),
21073 source_path: PathBuf::from("/tmp/lexical-batch-2.jsonl"),
21074 started_at: Some(1_700_000_000_200),
21075 ended_at: Some(1_700_000_000_300),
21076 approx_tokens: Some(84),
21077 metadata_json: serde_json::Value::Null,
21078 messages: vec![Message {
21079 id: None,
21080 idx: 0,
21081 role: MessageRole::Tool,
21082 author: Some("tool".into()),
21083 created_at: Some(1_700_000_000_210),
21084 content: "second-a".into(),
21085 extra_json: serde_json::json!({"opaque": true}),
21086 snippets: Vec::new(),
21087 }],
21088 source_id: LOCAL_SOURCE_ID.into(),
21089 origin_host: None,
21090 };
21091 let third = Conversation {
21092 external_id: Some("lexical-batch-3".into()),
21093 title: Some("Lexical batch 3".into()),
21094 source_path: PathBuf::from("/tmp/lexical-batch-3.jsonl"),
21095 messages: vec![Message {
21096 id: None,
21097 idx: 0,
21098 role: MessageRole::System,
21099 author: Some("system".into()),
21100 created_at: Some(1_700_000_000_410),
21101 content: "third-a".into(),
21102 extra_json: serde_json::json!({"opaque": true}),
21103 snippets: Vec::new(),
21104 }],
21105 ..second.clone()
21106 };
21107
21108 let first_id = storage
21109 .insert_conversation_tree(agent_id, None, &first)
21110 .unwrap()
21111 .conversation_id;
21112 let second_id = storage
21113 .insert_conversation_tree(agent_id, None, &second)
21114 .unwrap()
21115 .conversation_id;
21116 let third_id = storage
21117 .insert_conversation_tree(agent_id, None, &third)
21118 .unwrap()
21119 .conversation_id;
21120
21121 let lexical = storage
21122 .fetch_messages_for_lexical_rebuild_batch(&[third_id, first_id], None, None)
21123 .unwrap();
21124
21125 let first_messages = lexical.get(&first_id).expect("first conversation");
21126 assert_eq!(first_messages.len(), 2);
21127 assert_eq!(first_messages[0].content, "first-a");
21128 assert_eq!(first_messages[1].content, "first-b");
21129 assert!(
21130 first_messages
21131 .iter()
21132 .all(|message| message.extra_json.is_null())
21133 );
21134
21135 assert!(
21136 !lexical.contains_key(&second_id),
21137 "batch fetch must exclude conversations not requested by the caller"
21138 );
21139
21140 let third_messages = lexical.get(&third_id).expect("third conversation");
21141 assert_eq!(third_messages.len(), 1);
21142 assert_eq!(third_messages[0].content, "third-a");
21143 assert!(third_messages[0].extra_json.is_null());
21144 }
21145
21146 #[test]
21147 fn fetch_messages_for_lexical_rebuild_batch_enforces_content_byte_guardrail() {
21148 let dir = TempDir::new().unwrap();
21149 let db_path = dir.path().join("test.db");
21150 let storage = SqliteStorage::open(&db_path).unwrap();
21151
21152 let agent = Agent {
21153 id: None,
21154 slug: "codex".into(),
21155 name: "Codex".into(),
21156 version: Some("0.2.3".into()),
21157 kind: AgentKind::Cli,
21158 };
21159 let agent_id = storage.ensure_agent(&agent).unwrap();
21160
21161 let conversation = Conversation {
21162 id: None,
21163 agent_slug: "codex".into(),
21164 workspace: Some(PathBuf::from("/tmp/workspace")),
21165 external_id: Some("lexical-batch-guard".into()),
21166 title: Some("Lexical batch guard".into()),
21167 source_path: PathBuf::from("/tmp/lexical-batch-guard.jsonl"),
21168 started_at: Some(1_700_000_000_000),
21169 ended_at: Some(1_700_000_000_100),
21170 approx_tokens: Some(42),
21171 metadata_json: serde_json::Value::Null,
21172 messages: vec![
21173 Message {
21174 id: None,
21175 idx: 0,
21176 role: MessageRole::User,
21177 author: Some("user".into()),
21178 created_at: Some(1_700_000_000_010),
21179 content: "123456".into(),
21180 extra_json: serde_json::Value::Null,
21181 snippets: Vec::new(),
21182 },
21183 Message {
21184 id: None,
21185 idx: 1,
21186 role: MessageRole::Agent,
21187 author: Some("assistant".into()),
21188 created_at: Some(1_700_000_000_020),
21189 content: "abcdef".into(),
21190 extra_json: serde_json::Value::Null,
21191 snippets: Vec::new(),
21192 },
21193 ],
21194 source_id: LOCAL_SOURCE_ID.into(),
21195 origin_host: None,
21196 };
21197
21198 let conversation_id = storage
21199 .insert_conversation_tree(agent_id, None, &conversation)
21200 .unwrap()
21201 .conversation_id;
21202
21203 let error = storage
21204 .fetch_messages_for_lexical_rebuild_batch(&[conversation_id], Some(10), Some(8))
21205 .expect_err("guardrail should reject oversized batch content");
21206
21207 let message = format!("{error:#}");
21208 assert!(
21209 message.contains("content-byte guardrail"),
21210 "expected guardrail reason in error, got {message}"
21211 );
21212 }
21213
21214 #[test]
21215 fn fetch_messages_handles_manual_rows_inserted_via_raw_connection() {
21216 let dir = TempDir::new().unwrap();
21217 let db_path = dir.path().join("manual-rows.db");
21218 let storage = FrankenStorage::open(&db_path).unwrap();
21219 let conn = storage.raw();
21220
21221 conn.execute(
21222 "INSERT INTO agents (id, slug, name, kind, created_at, updated_at)
21223 VALUES (1, 'claude_code', 'Claude Code', 'local', 0, 0)",
21224 )
21225 .unwrap();
21226 conn.execute(
21227 "INSERT INTO conversations
21228 (id, agent_id, external_id, title, source_path, source_id, started_at)
21229 VALUES (1, 1, 'manual-ext', 'Manual Session', '/tmp/manual.jsonl', 'local', 200)",
21230 )
21231 .unwrap();
21232 conn.execute(
21233 "INSERT INTO messages
21234 (id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
21235 VALUES (1, 1, 0, 'user', 'tester', 1700000000000, 'manual body', '{\"k\":1}', NULL)",
21236 )
21237 .unwrap();
21238
21239 let lexical = storage.fetch_messages_for_lexical_rebuild(1).unwrap();
21240 assert_eq!(lexical.len(), 1);
21241 assert_eq!(lexical[0].content, "manual body");
21242
21243 let full = storage.fetch_messages(1).unwrap();
21244 assert_eq!(full.len(), 1);
21245 assert_eq!(full[0].content, "manual body");
21246 assert_eq!(full[0].author.as_deref(), Some("tester"));
21247 assert_eq!(full[0].extra_json, serde_json::json!({ "k": 1 }));
21248 }
21249
21250 #[test]
21251 fn lexical_rebuild_batch_messages_query_avoids_sorter_temp_btrees() {
21252 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21253 use std::path::PathBuf;
21254
21255 let dir = TempDir::new().unwrap();
21256 let db_path = dir.path().join("agent_search.db");
21257 let storage = SqliteStorage::open(&db_path).unwrap();
21258
21259 let agent = Agent {
21260 id: None,
21261 slug: "claude_code".into(),
21262 name: "Claude Code".into(),
21263 version: None,
21264 kind: AgentKind::Cli,
21265 };
21266 let agent_id = storage.ensure_agent(&agent).unwrap();
21267
21268 for (external_id, base_ts) in [
21269 ("conv-1", 1_700_000_000_000_i64),
21270 ("conv-2", 1_700_000_001_000_i64),
21271 ] {
21272 let conversation = Conversation {
21273 id: None,
21274 agent_slug: "claude_code".into(),
21275 workspace: Some(PathBuf::from("/tmp/workspace")),
21276 external_id: Some(external_id.to_string()),
21277 title: Some("Lexical rebuild".into()),
21278 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
21279 started_at: Some(base_ts),
21280 ended_at: Some(base_ts + 100),
21281 approx_tokens: None,
21282 metadata_json: serde_json::Value::Null,
21283 messages: vec![
21284 Message {
21285 id: None,
21286 idx: 0,
21287 role: MessageRole::User,
21288 author: Some("user".into()),
21289 created_at: Some(base_ts + 10),
21290 content: format!("{external_id}-first"),
21291 extra_json: serde_json::Value::Null,
21292 snippets: Vec::new(),
21293 },
21294 Message {
21295 id: None,
21296 idx: 1,
21297 role: MessageRole::Agent,
21298 author: Some("assistant".into()),
21299 created_at: Some(base_ts + 20),
21300 content: format!("{external_id}-second"),
21301 extra_json: serde_json::Value::Null,
21302 snippets: Vec::new(),
21303 },
21304 ],
21305 source_id: LOCAL_SOURCE_ID.into(),
21306 origin_host: None,
21307 };
21308 storage
21309 .insert_conversation_tree(agent_id, None, &conversation)
21310 .unwrap();
21311 }
21312
21313 let conversation_ids: Vec<i64> = storage
21314 .conn
21315 .query_map_collect(
21316 "SELECT id FROM conversations ORDER BY id",
21317 fparams![],
21318 |row| row.get_typed(0),
21319 )
21320 .unwrap();
21321 assert_eq!(conversation_ids.len(), 2);
21322
21323 let plan_details: Vec<String> = storage
21324 .conn
21325 .query_map_collect(
21326 "EXPLAIN QUERY PLAN \
21327 SELECT conversation_id, id, idx, role, author, created_at, content \
21328 FROM messages \
21329 WHERE conversation_id IN (?1, ?2) \
21330 ORDER BY conversation_id ASC, idx ASC",
21331 fparams![conversation_ids[0], conversation_ids[1]],
21332 |row| row.get_typed(3),
21333 )
21334 .unwrap();
21335
21336 assert!(
21337 plan_details
21338 .iter()
21339 .any(|detail| detail.contains("sqlite_autoindex_messages_1")),
21340 "expected batched lexical rebuild fetch to use the conversation_id/idx composite index, got {plan_details:?}"
21341 );
21342 assert!(
21343 !plan_details
21344 .iter()
21345 .any(|detail| detail.contains("TEMP B-TREE")),
21346 "expected batched lexical rebuild fetch to avoid sorter temp b-trees, got {plan_details:?}"
21347 );
21348 }
21349
21350 #[test]
21351 fn stream_messages_for_lexical_rebuild_groups_and_orders_messages() {
21352 let dir = TempDir::new().unwrap();
21353 let db_path = dir.path().join("test.db");
21354 let storage = SqliteStorage::open(&db_path).unwrap();
21355
21356 let agent = Agent {
21357 id: None,
21358 slug: "codex".into(),
21359 name: "Codex".into(),
21360 version: Some("0.2.3".into()),
21361 kind: AgentKind::Cli,
21362 };
21363 let agent_id = storage.ensure_agent(&agent).unwrap();
21364
21365 let first = Conversation {
21366 id: None,
21367 agent_slug: "codex".into(),
21368 workspace: Some(PathBuf::from("/tmp/workspace")),
21369 external_id: Some("lexical-stream-1".into()),
21370 title: Some("Lexical stream 1".into()),
21371 source_path: PathBuf::from("/tmp/lexical-stream-1.jsonl"),
21372 started_at: Some(1_700_000_000_000),
21373 ended_at: Some(1_700_000_000_100),
21374 approx_tokens: Some(42),
21375 metadata_json: serde_json::Value::Null,
21376 messages: vec![
21377 Message {
21378 id: None,
21379 idx: 0,
21380 role: MessageRole::User,
21381 author: Some("user".into()),
21382 created_at: Some(1_700_000_000_010),
21383 content: "first-a".into(),
21384 extra_json: serde_json::json!({"opaque": true}),
21385 snippets: Vec::new(),
21386 },
21387 Message {
21388 id: None,
21389 idx: 1,
21390 role: MessageRole::Agent,
21391 author: Some("assistant".into()),
21392 created_at: Some(1_700_000_000_020),
21393 content: "first-b".into(),
21394 extra_json: serde_json::json!({"opaque": true}),
21395 snippets: Vec::new(),
21396 },
21397 ],
21398 source_id: LOCAL_SOURCE_ID.into(),
21399 origin_host: None,
21400 };
21401
21402 let second = Conversation {
21403 id: None,
21404 agent_slug: "codex".into(),
21405 workspace: Some(PathBuf::from("/tmp/workspace")),
21406 external_id: Some("lexical-stream-2".into()),
21407 title: Some("Lexical stream 2".into()),
21408 source_path: PathBuf::from("/tmp/lexical-stream-2.jsonl"),
21409 started_at: Some(1_700_000_000_200),
21410 ended_at: Some(1_700_000_000_300),
21411 approx_tokens: Some(84),
21412 metadata_json: serde_json::Value::Null,
21413 messages: vec![Message {
21414 id: None,
21415 idx: 0,
21416 role: MessageRole::Tool,
21417 author: Some("tool".into()),
21418 created_at: Some(1_700_000_000_210),
21419 content: "second-a".into(),
21420 extra_json: serde_json::json!({"opaque": true}),
21421 snippets: Vec::new(),
21422 }],
21423 source_id: LOCAL_SOURCE_ID.into(),
21424 origin_host: None,
21425 };
21426
21427 let first_id = storage
21428 .insert_conversation_tree(agent_id, None, &first)
21429 .unwrap()
21430 .conversation_id;
21431 let second_id = storage
21432 .insert_conversation_tree(agent_id, None, &second)
21433 .unwrap()
21434 .conversation_id;
21435
21436 let mut streamed = Vec::new();
21437 storage
21438 .stream_messages_for_lexical_rebuild_from_conversation_id(first_id, |row| {
21439 streamed.push((
21440 row.conversation_id,
21441 row.idx,
21442 row.role,
21443 row.author,
21444 row.content,
21445 ));
21446 Ok(())
21447 })
21448 .unwrap();
21449
21450 assert_eq!(
21451 streamed,
21452 vec![
21453 (
21454 first_id,
21455 0,
21456 "user".to_string(),
21457 Some("user".to_string()),
21458 "first-a".to_string(),
21459 ),
21460 (
21461 first_id,
21462 1,
21463 "agent".to_string(),
21464 Some("assistant".to_string()),
21465 "first-b".to_string(),
21466 ),
21467 (
21468 second_id,
21469 0,
21470 "tool".to_string(),
21471 Some("tool".to_string()),
21472 "second-a".to_string(),
21473 ),
21474 ]
21475 );
21476 }
21477
21478 #[test]
21479 fn stream_messages_for_lexical_rebuild_between_conversation_ids_respects_upper_bound() {
21480 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21481 use std::path::PathBuf;
21482
21483 let dir = TempDir::new().unwrap();
21484 let db_path = dir.path().join("agent_search.db");
21485 let storage = SqliteStorage::open(&db_path).unwrap();
21486
21487 let agent = Agent {
21488 id: None,
21489 slug: "claude_code".into(),
21490 name: "Claude Code".into(),
21491 version: Some("1.2.3".into()),
21492 kind: AgentKind::Cli,
21493 };
21494 let agent_id = storage.ensure_agent(&agent).unwrap();
21495
21496 let first = Conversation {
21497 id: None,
21498 agent_slug: "claude_code".into(),
21499 workspace: Some(PathBuf::from("/tmp/workspace")),
21500 external_id: Some("lexical-range-1".into()),
21501 title: Some("Lexical range 1".into()),
21502 source_path: PathBuf::from("/tmp/lexical-range-1.jsonl"),
21503 started_at: Some(1_700_000_000_000),
21504 ended_at: Some(1_700_000_000_100),
21505 approx_tokens: Some(42),
21506 metadata_json: serde_json::Value::Null,
21507 messages: vec![Message {
21508 id: None,
21509 idx: 0,
21510 role: MessageRole::User,
21511 author: Some("user".into()),
21512 created_at: Some(1_700_000_000_010),
21513 content: "first-only".into(),
21514 extra_json: serde_json::json!({"opaque": true}),
21515 snippets: Vec::new(),
21516 }],
21517 source_id: LOCAL_SOURCE_ID.into(),
21518 origin_host: None,
21519 };
21520
21521 let second = Conversation {
21522 id: None,
21523 agent_slug: "claude_code".into(),
21524 workspace: Some(PathBuf::from("/tmp/workspace")),
21525 external_id: Some("lexical-range-2".into()),
21526 title: Some("Lexical range 2".into()),
21527 source_path: PathBuf::from("/tmp/lexical-range-2.jsonl"),
21528 started_at: Some(1_700_000_000_200),
21529 ended_at: Some(1_700_000_000_300),
21530 approx_tokens: Some(84),
21531 metadata_json: serde_json::Value::Null,
21532 messages: vec![Message {
21533 id: None,
21534 idx: 0,
21535 role: MessageRole::Tool,
21536 author: Some("tool".into()),
21537 created_at: Some(1_700_000_000_210),
21538 content: "second-should-not-appear".into(),
21539 extra_json: serde_json::json!({"opaque": true}),
21540 snippets: Vec::new(),
21541 }],
21542 source_id: LOCAL_SOURCE_ID.into(),
21543 origin_host: None,
21544 };
21545
21546 let first_id = storage
21547 .insert_conversation_tree(agent_id, None, &first)
21548 .unwrap()
21549 .conversation_id;
21550 let second_id = storage
21551 .insert_conversation_tree(agent_id, None, &second)
21552 .unwrap()
21553 .conversation_id;
21554
21555 let mut streamed = Vec::new();
21556 storage
21557 .stream_messages_for_lexical_rebuild_between_conversation_ids(
21558 first_id,
21559 first_id,
21560 |row| {
21561 streamed.push((row.conversation_id, row.idx, row.content));
21562 Ok(())
21563 },
21564 )
21565 .unwrap();
21566
21567 assert_eq!(streamed, vec![(first_id, 0, "first-only".to_string())]);
21568 assert!(
21569 streamed
21570 .iter()
21571 .all(|(conversation_id, _, _)| *conversation_id != second_id),
21572 "upper bound should exclude later conversation ids"
21573 );
21574 }
21575
21576 #[test]
21577 fn stream_messages_for_lexical_rebuild_between_conversation_ids_handles_mixed_ranges() {
21578 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21579 use std::path::PathBuf;
21580
21581 let dir = TempDir::new().unwrap();
21582 let db_path = dir.path().join("agent_search.db");
21583 let storage = SqliteStorage::open(&db_path).unwrap();
21584
21585 let claude_agent_id = storage
21586 .ensure_agent(&Agent {
21587 id: None,
21588 slug: "claude_code".into(),
21589 name: "Claude Code".into(),
21590 version: None,
21591 kind: AgentKind::Cli,
21592 })
21593 .unwrap();
21594 let aider_agent_id = storage
21595 .ensure_agent(&Agent {
21596 id: None,
21597 slug: "aider".into(),
21598 name: "Aider".into(),
21599 version: None,
21600 kind: AgentKind::Cli,
21601 })
21602 .unwrap();
21603
21604 type MessageSpec = (i64, MessageRole, Option<String>, Option<i64>, String);
21605
21606 let mut expected = Vec::new();
21607 let mut first_conversation_id = None;
21608 let mut last_conversation_id = None;
21609 let mut insert_conversation =
21610 |agent_id: i64,
21611 external_id: &str,
21612 title: &str,
21613 source_path: &str,
21614 started_at: i64,
21615 message_specs: Vec<MessageSpec>| {
21616 let conversation = Conversation {
21617 id: None,
21618 agent_slug: if agent_id == aider_agent_id {
21619 "aider".into()
21620 } else {
21621 "claude_code".into()
21622 },
21623 workspace: Some(PathBuf::from("/tmp/workspace")),
21624 external_id: Some(external_id.to_string()),
21625 title: Some(title.to_string()),
21626 source_path: PathBuf::from(source_path),
21627 started_at: Some(started_at),
21628 ended_at: Some(started_at + 100),
21629 approx_tokens: None,
21630 metadata_json: serde_json::Value::Null,
21631 messages: message_specs
21632 .iter()
21633 .map(|(idx, role, author, created_at, content)| Message {
21634 id: None,
21635 idx: *idx,
21636 role: role.clone(),
21637 author: author.clone(),
21638 created_at: *created_at,
21639 content: content.clone(),
21640 extra_json: serde_json::Value::Null,
21641 snippets: Vec::new(),
21642 })
21643 .collect(),
21644 source_id: LOCAL_SOURCE_ID.into(),
21645 origin_host: None,
21646 };
21647 let conversation_id = storage
21648 .insert_conversation_tree(agent_id, None, &conversation)
21649 .unwrap()
21650 .conversation_id;
21651 if first_conversation_id.is_none() {
21652 first_conversation_id = Some(conversation_id);
21653 }
21654 last_conversation_id = Some(conversation_id);
21655 expected.extend(message_specs.into_iter().map(
21656 |(idx, role, author, created_at, content)| {
21657 (
21658 conversation_id,
21659 idx,
21660 match role {
21661 MessageRole::User => "user".to_string(),
21662 MessageRole::Agent => "agent".to_string(),
21663 MessageRole::Tool => "tool".to_string(),
21664 MessageRole::System => "system".to_string(),
21665 MessageRole::Other(other) => other,
21666 },
21667 author,
21668 created_at,
21669 content,
21670 )
21671 },
21672 ));
21673 };
21674
21675 for (label, base_ts) in [
21676 ("alpha", 1_700_000_000_000_i64),
21677 ("beta", 1_700_000_001_000_i64),
21678 ("gamma", 1_700_000_002_000_i64),
21679 ("delta", 1_700_000_003_000_i64),
21680 ("epsilon", 1_700_000_004_000_i64),
21681 ] {
21682 insert_conversation(
21683 claude_agent_id,
21684 &format!("lexical-{label}"),
21685 &format!("Lexical {label}"),
21686 &format!("/tmp/{label}.jsonl"),
21687 base_ts,
21688 vec![
21689 (
21690 0,
21691 MessageRole::User,
21692 None,
21693 Some(base_ts + 10),
21694 format!("{label}_content"),
21695 ),
21696 (
21697 1,
21698 MessageRole::Agent,
21699 None,
21700 Some(base_ts + 20),
21701 format!("{label}_content_response"),
21702 ),
21703 ],
21704 );
21705 }
21706
21707 insert_conversation(
21708 aider_agent_id,
21709 "lexical-aider-history",
21710 "Aider Chat: coding_agent_session_search",
21711 "/tmp/.aider.chat.history.md",
21712 1_764_619_673_394,
21713 vec![
21714 (
21715 0,
21716 MessageRole::System,
21717 Some("system".to_string()),
21718 None,
21719 "# aider chat started at 2025-12-01 20:07:47".to_string(),
21720 ),
21721 (
21722 1,
21723 MessageRole::User,
21724 Some("user".to_string()),
21725 None,
21726 "/tmp/workspace/.venv/bin/aider --no-git --message hello world".to_string(),
21727 ),
21728 ],
21729 );
21730 insert_conversation(
21731 aider_agent_id,
21732 "lexical-aider-fixture",
21733 "Aider Chat: aider",
21734 "/tmp/tests/fixtures/aider/.aider.chat.history.md",
21735 1_764_621_401_399,
21736 vec![
21737 (
21738 0,
21739 MessageRole::User,
21740 Some("user".to_string()),
21741 None,
21742 "/add src/main.rs".to_string(),
21743 ),
21744 (
21745 1,
21746 MessageRole::Agent,
21747 Some("assistant".to_string()),
21748 None,
21749 "Added src/main.rs to the chat.
21750
21751#### /add src/main.rs"
21752 .to_string(),
21753 ),
21754 (
21755 2,
21756 MessageRole::User,
21757 Some("user".to_string()),
21758 None,
21759 "Please refactor.".to_string(),
21760 ),
21761 (
21762 3,
21763 MessageRole::Agent,
21764 Some("assistant".to_string()),
21765 None,
21766 "Sure, here is the code.".to_string(),
21767 ),
21768 ],
21769 );
21770
21771 let mut streamed = Vec::new();
21772 storage
21773 .stream_messages_for_lexical_rebuild_between_conversation_ids(
21774 first_conversation_id.unwrap(),
21775 last_conversation_id.unwrap(),
21776 |row| {
21777 streamed.push((
21778 row.conversation_id,
21779 row.idx,
21780 row.role,
21781 row.author,
21782 row.created_at,
21783 row.content,
21784 ));
21785 Ok(())
21786 },
21787 )
21788 .unwrap();
21789
21790 assert_eq!(streamed, expected);
21791 }
21792
21793 #[test]
21794 fn lexical_rebuild_stream_queries_use_rowid_and_per_conversation_probes() {
21795 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21796 use std::path::PathBuf;
21797
21798 let dir = TempDir::new().unwrap();
21799 let db_path = dir.path().join("agent_search.db");
21800 let storage = SqliteStorage::open(&db_path).unwrap();
21801
21802 let agent = Agent {
21803 id: None,
21804 slug: "claude_code".into(),
21805 name: "Claude Code".into(),
21806 version: None,
21807 kind: AgentKind::Cli,
21808 };
21809 let agent_id = storage.ensure_agent(&agent).unwrap();
21810
21811 for (external_id, base_ts) in [
21812 ("conv-1", 1_700_000_000_000_i64),
21813 ("conv-2", 1_700_000_001_000_i64),
21814 ] {
21815 let conversation = Conversation {
21816 id: None,
21817 agent_slug: "claude_code".into(),
21818 workspace: Some(PathBuf::from("/tmp/workspace")),
21819 external_id: Some(external_id.to_string()),
21820 title: Some("Lexical rebuild".into()),
21821 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
21822 started_at: Some(base_ts),
21823 ended_at: Some(base_ts + 100),
21824 approx_tokens: None,
21825 metadata_json: serde_json::Value::Null,
21826 messages: vec![
21827 Message {
21828 id: None,
21829 idx: 0,
21830 role: MessageRole::User,
21831 author: Some("user".into()),
21832 created_at: Some(base_ts + 10),
21833 content: format!("{external_id}-first"),
21834 extra_json: serde_json::Value::Null,
21835 snippets: Vec::new(),
21836 },
21837 Message {
21838 id: None,
21839 idx: 1,
21840 role: MessageRole::Agent,
21841 author: Some("assistant".into()),
21842 created_at: Some(base_ts + 20),
21843 content: format!("{external_id}-second"),
21844 extra_json: serde_json::Value::Null,
21845 snippets: Vec::new(),
21846 },
21847 ],
21848 source_id: LOCAL_SOURCE_ID.into(),
21849 origin_host: None,
21850 };
21851 storage
21852 .insert_conversation_tree(agent_id, None, &conversation)
21853 .unwrap();
21854 }
21855
21856 let first_id: i64 = storage
21857 .conn
21858 .query_row_map(
21859 "SELECT id FROM conversations ORDER BY id LIMIT 1",
21860 fparams![],
21861 |row| row.get_typed(0),
21862 )
21863 .unwrap();
21864 let last_id: i64 = storage
21865 .conn
21866 .query_row_map(
21867 "SELECT id FROM conversations ORDER BY id DESC LIMIT 1",
21868 fparams![],
21869 |row| row.get_typed(0),
21870 )
21871 .unwrap();
21872
21873 let conversation_plan_details: Vec<String> = storage
21874 .conn
21875 .query_map_collect(
21876 "EXPLAIN QUERY PLAN SELECT id FROM conversations WHERE id >= ?1 AND id <= ?2 ORDER BY id ASC",
21877 fparams![first_id, last_id],
21878 |row| row.get_typed(3),
21879 )
21880 .unwrap();
21881 assert!(
21882 !conversation_plan_details
21883 .iter()
21884 .any(|detail| detail.contains("TEMP B-TREE")),
21885 "expected streamed lexical rebuild conversation listing to avoid sorter temp b-trees, got {conversation_plan_details:?}"
21886 );
21887
21888 let message_plan_details: Vec<String> = storage
21889 .conn
21890 .query_map_collect(
21891 "EXPLAIN QUERY PLAN SELECT id, idx, role, author, created_at, content FROM messages INDEXED BY sqlite_autoindex_messages_1 WHERE conversation_id = ?1 ORDER BY idx",
21892 fparams![first_id],
21893 |row| row.get_typed(3),
21894 )
21895 .unwrap();
21896 assert!(
21897 message_plan_details
21898 .iter()
21899 .any(|detail| detail.contains("sqlite_autoindex_messages_1")
21900 || detail.contains("idx_messages_conv_idx")),
21901 "expected per-conversation lexical rebuild fetch to use the conversation_id/idx index, got {message_plan_details:?}"
21902 );
21903 assert!(
21904 !message_plan_details
21905 .iter()
21906 .any(|detail| detail.contains("TEMP B-TREE")),
21907 "expected per-conversation lexical rebuild fetch to avoid sorter temp b-trees, got {message_plan_details:?}"
21908 );
21909 }
21910
21911 #[test]
21912 fn discover_historical_database_bundles_prefers_larger_archives_first() {
21913 let dir = TempDir::new().unwrap();
21914 let canonical_db = dir.path().join("agent_search.db");
21915 fs::write(&canonical_db, b"canonical").unwrap();
21916
21917 let smaller = dir.path().join("agent_search.corrupt.small");
21918 fs::write(&smaller, vec![0_u8; 32]).unwrap();
21919
21920 let backups_dir = dir.path().join("backups");
21921 fs::create_dir_all(&backups_dir).unwrap();
21922 let larger = backups_dir.join("agent_search.db.20260322T020200.bak");
21923 fs::write(&larger, vec![0_u8; 128]).unwrap();
21924
21925 let bundles = discover_historical_database_bundles(&canonical_db);
21926 let ordered_paths: Vec<PathBuf> =
21927 bundles.into_iter().map(|bundle| bundle.root_path).collect();
21928
21929 assert_eq!(ordered_paths, vec![larger, smaller]);
21930 }
21931
21932 #[test]
21933 fn discover_historical_database_bundles_prefers_queryable_direct_bundles_first() {
21934 let dir = TempDir::new().unwrap();
21935 let canonical_db = dir.path().join("agent_search.db");
21936 fs::write(&canonical_db, b"canonical").unwrap();
21937
21938 let larger_corrupt = dir.path().join("agent_search.corrupt.20260324_212907");
21939 fs::write(&larger_corrupt, vec![0_u8; 4096]).unwrap();
21940
21941 let backups_dir = dir.path().join("backups");
21942 fs::create_dir_all(&backups_dir).unwrap();
21943 let smaller_healthy = backups_dir.join("agent_search.db.20260322T020200.bak");
21944 let conn = FrankenConnection::open(smaller_healthy.to_string_lossy().into_owned()).unwrap();
21945 conn.execute_batch(
21946 "CREATE TABLE conversations (id INTEGER PRIMARY KEY, source_path TEXT);
21947 CREATE TABLE messages (
21948 id INTEGER PRIMARY KEY,
21949 conversation_id INTEGER NOT NULL,
21950 idx INTEGER NOT NULL,
21951 content TEXT
21952 );
21953 INSERT INTO conversations(id, source_path) VALUES (1, '/tmp/history.jsonl');
21954 INSERT INTO messages(id, conversation_id, idx, content)
21955 VALUES (1, 1, 0, 'seed');",
21956 )
21957 .unwrap();
21958 drop(conn);
21959
21960 let bundles = discover_historical_database_bundles(&canonical_db);
21961 let ordered_paths: Vec<PathBuf> = bundles
21962 .iter()
21963 .map(|bundle| bundle.root_path.clone())
21964 .collect();
21965
21966 assert_eq!(ordered_paths, vec![smaller_healthy, larger_corrupt]);
21967 assert!(bundles[0].supports_direct_readonly);
21968 assert!(!bundles[1].supports_direct_readonly);
21969 }
21970
21971 #[test]
21972 fn salvage_historical_databases_skips_unreadable_quarantined_bundles() {
21973 let dir = TempDir::new().unwrap();
21974 let canonical_db = dir.path().join("agent_search.db");
21975 let storage = SqliteStorage::open(&canonical_db).unwrap();
21976
21977 let quarantined = dir.path().join("agent_search.corrupt.20260324_212907");
21978 fs::write(&quarantined, b"not a sqlite database").unwrap();
21979
21980 let discovered: Vec<PathBuf> = discover_historical_database_bundles(&canonical_db)
21981 .into_iter()
21982 .map(|bundle| bundle.root_path)
21983 .collect();
21984 assert_eq!(discovered, vec![quarantined]);
21985
21986 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
21987 assert_eq!(outcome.bundles_considered, 1);
21988 assert_eq!(outcome.bundles_imported, 0);
21989 assert_eq!(outcome.conversations_imported, 0);
21990 assert_eq!(outcome.messages_imported, 0);
21991 assert!(storage.list_conversations(10, 0).unwrap().is_empty());
21992 }
21993
21994 #[test]
21995 fn discover_historical_database_bundles_includes_repair_lab_and_snapshots_named_roots() {
21996 let dir = TempDir::new().unwrap();
21997 let canonical_db = dir.path().join("agent_search.db");
21998 fs::write(&canonical_db, b"canonical").unwrap();
21999
22000 let repair_lab_dir = dir.path().join("repair-lab").join("live-copy");
22001 fs::create_dir_all(&repair_lab_dir).unwrap();
22002 let repair_lab_db = repair_lab_dir.join("agent_search.db");
22003 fs::write(&repair_lab_db, vec![0_u8; 96]).unwrap();
22004 fs::write(
22005 repair_lab_dir.join("agent_search.rebuild-test.db"),
22006 vec![0_u8; 192],
22007 )
22008 .unwrap();
22009
22010 let snapshots_dir = dir.path().join("snapshots").join("20260324T013201Z");
22011 fs::create_dir_all(&snapshots_dir).unwrap();
22012 let snapshot_db = snapshots_dir.join("agent_search.db");
22013 fs::write(&snapshot_db, vec![0_u8; 64]).unwrap();
22014
22015 let bundles = discover_historical_database_bundles(&canonical_db);
22016 let ordered_paths: Vec<PathBuf> =
22017 bundles.into_iter().map(|bundle| bundle.root_path).collect();
22018
22019 assert!(ordered_paths.contains(&repair_lab_db));
22020 assert!(ordered_paths.contains(&snapshot_db));
22021 assert!(
22022 !ordered_paths
22023 .iter()
22024 .any(|path| path.file_name().and_then(|name| name.to_str())
22025 == Some("agent_search.rebuild-test.db"))
22026 );
22027 }
22028
22029 #[test]
22030 fn discover_historical_database_bundles_prefers_healthy_backup_over_replay_priority() {
22031 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22032
22033 let dir = TempDir::new().unwrap();
22034 let canonical_db = dir.path().join("agent_search.db");
22035 fs::write(&canonical_db, b"canonical").unwrap();
22036
22037 let replay_dir = dir
22038 .path()
22039 .join("repair-lab")
22040 .join("replay-20260324T070101Z");
22041 fs::create_dir_all(&replay_dir).unwrap();
22042 let replay_db = replay_dir.join("agent_search.db");
22043 let replay_storage = SqliteStorage::open(&replay_db).unwrap();
22044 let agent = Agent {
22045 id: None,
22046 slug: "codex".into(),
22047 name: "Codex".into(),
22048 version: Some("0.2.3".into()),
22049 kind: AgentKind::Cli,
22050 };
22051 let agent_id = replay_storage.ensure_agent(&agent).unwrap();
22052 let conversation = Conversation {
22053 id: None,
22054 agent_slug: "codex".into(),
22055 workspace: Some(PathBuf::from("/tmp/workspace")),
22056 external_id: Some("replay-conv".into()),
22057 title: Some("Replay bundle".into()),
22058 source_path: PathBuf::from("/tmp/replay.jsonl"),
22059 started_at: Some(1_700_000_000_000),
22060 ended_at: Some(1_700_000_000_100),
22061 approx_tokens: Some(42),
22062 metadata_json: serde_json::Value::Null,
22063 messages: vec![Message {
22064 id: None,
22065 idx: 0,
22066 role: MessageRole::Agent,
22067 author: Some("assistant".into()),
22068 created_at: Some(1_700_000_000_050),
22069 content: "replay message".into(),
22070 extra_json: serde_json::Value::Null,
22071 snippets: Vec::new(),
22072 }],
22073 source_id: LOCAL_SOURCE_ID.into(),
22074 origin_host: None,
22075 };
22076 replay_storage
22077 .insert_conversation_tree(agent_id, None, &conversation)
22078 .unwrap();
22079 drop(replay_storage);
22080
22081 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
22082 let replay_legacy = rusqlite_test_fixture_conn(&replay_db);
22083 replay_legacy
22084 .execute_batch(
22085 "UPDATE meta SET value = '13' WHERE key = 'schema_version';
22086 DELETE FROM _schema_migrations WHERE version = 14;
22087 PRAGMA writable_schema = ON;",
22088 )
22089 .unwrap();
22090 replay_legacy
22091 .execute(
22092 "DELETE FROM meta WHERE key = ?1",
22093 [FTS_FRANKEN_REBUILD_META_KEY],
22094 )
22095 .unwrap();
22096 replay_legacy
22097 .execute(
22098 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
22099 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
22100 [duplicate_legacy_fts_sql],
22101 )
22102 .unwrap();
22103 replay_legacy
22104 .execute_batch("PRAGMA writable_schema = OFF;")
22105 .unwrap();
22106 drop(replay_legacy);
22107
22108 let backups_dir = dir.path().join("backups");
22109 fs::create_dir_all(&backups_dir).unwrap();
22110 let clean_backup = backups_dir.join("agent_search.db.20260322T020200.bak");
22111 let clean_storage = SqliteStorage::open(&clean_backup).unwrap();
22112 let clean_agent_id = clean_storage.ensure_agent(&agent).unwrap();
22113 clean_storage
22114 .insert_conversation_tree(clean_agent_id, None, &conversation)
22115 .unwrap();
22116 drop(clean_storage);
22117
22118 let bundles = discover_historical_database_bundles(&canonical_db);
22119 let ordered_paths: Vec<PathBuf> = bundles
22120 .iter()
22121 .map(|bundle| bundle.root_path.clone())
22122 .collect();
22123
22124 assert_eq!(ordered_paths[0], clean_backup);
22125 assert_eq!(ordered_paths[1], replay_db);
22126 assert_eq!(
22127 bundles[0].probe.schema_version,
22128 Some(CURRENT_SCHEMA_VERSION)
22129 );
22130 assert_eq!(bundles[0].probe.fts_schema_rows, Some(0));
22136 assert!(!bundles[0].probe.fts_queryable);
22139 assert_eq!(bundles[1].probe.schema_version, Some(13));
22140 assert_eq!(bundles[1].probe.fts_schema_rows, Some(1));
22145 }
22146
22147 #[test]
22148 fn ensure_fts_consistency_via_rusqlite_catches_up_missing_rows() {
22149 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22150
22151 let dir = TempDir::new().unwrap();
22152 let db_path = dir.path().join("fts-catchup.db");
22153 let storage = SqliteStorage::open(&db_path).unwrap();
22154 let agent = Agent {
22155 id: None,
22156 slug: "codex".into(),
22157 name: "Codex".into(),
22158 version: Some("0.2.3".into()),
22159 kind: AgentKind::Cli,
22160 };
22161 let agent_id = storage.ensure_agent(&agent).unwrap();
22162 let conversation = Conversation {
22163 id: None,
22164 agent_slug: "codex".into(),
22165 workspace: Some(PathBuf::from("/tmp/workspace")),
22166 external_id: Some("fts-catchup".into()),
22167 title: Some("FTS catchup".into()),
22168 source_path: PathBuf::from("/tmp/fts-catchup.jsonl"),
22169 started_at: Some(1_700_000_000_000),
22170 ended_at: Some(1_700_000_000_100),
22171 approx_tokens: Some(42),
22172 metadata_json: serde_json::Value::Null,
22173 messages: vec![Message {
22174 id: None,
22175 idx: 0,
22176 role: MessageRole::User,
22177 author: Some("user".into()),
22178 created_at: Some(1_700_000_000_050),
22179 content: "initial message".into(),
22180 extra_json: serde_json::Value::Null,
22181 snippets: Vec::new(),
22182 }],
22183 source_id: LOCAL_SOURCE_ID.into(),
22184 origin_host: None,
22185 };
22186 storage
22187 .insert_conversation_tree(agent_id, None, &conversation)
22188 .unwrap();
22189 drop(storage);
22190
22191 rebuild_fts_via_rusqlite(&db_path).unwrap();
22192
22193 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22194 let conversation_id: i64 = conn
22195 .query_row_map("SELECT id FROM conversations LIMIT 1", fparams![], |row| {
22196 row.get_typed(0)
22197 })
22198 .unwrap();
22199 conn.execute_compat(
22200 "INSERT INTO messages(id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
22201 VALUES(2, ?1, 1, 'assistant', 'assistant', 1700000000060, 'authentication catchup', NULL, NULL)",
22202 fparams![conversation_id],
22203 )
22204 .unwrap();
22205 drop(conn);
22206
22207 let repair = ensure_fts_consistency_via_rusqlite(&db_path).unwrap();
22208 assert_eq!(
22209 repair,
22210 FtsConsistencyRepair::IncrementalCatchUp {
22211 inserted_rows: 1,
22212 total_rows: 2
22213 }
22214 );
22215
22216 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22217 let auth_rows: i64 = conn
22218 .query_row_map(
22219 "SELECT COUNT(*) FROM fts_messages WHERE rowid = 2",
22220 fparams![],
22221 |row| row.get_typed(0),
22222 )
22223 .unwrap();
22224 assert_eq!(auth_rows, 1);
22225 }
22226
22227 #[test]
22228 fn rebuild_fts_via_rusqlite_cleans_duplicate_legacy_schema_rows() {
22229 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22230
22231 let dir = TempDir::new().unwrap();
22232 let db_path = dir.path().join("fts-duplicate-rebuild.db");
22233
22234 let storage = SqliteStorage::open(&db_path).unwrap();
22235 let agent = Agent {
22236 id: None,
22237 slug: "codex".into(),
22238 name: "Codex".into(),
22239 version: Some("0.2.3".into()),
22240 kind: AgentKind::Cli,
22241 };
22242 let agent_id = storage.ensure_agent(&agent).unwrap();
22243 let conversation = Conversation {
22244 id: None,
22245 agent_slug: "codex".into(),
22246 workspace: Some(PathBuf::from("/ws")),
22247 external_id: Some("retro".into()),
22248 title: Some("retro".into()),
22249 source_path: PathBuf::from("/tmp/retro.jsonl"),
22250 started_at: Some(42),
22251 ended_at: Some(42),
22252 approx_tokens: None,
22253 metadata_json: serde_json::Value::Null,
22254 messages: vec![Message {
22255 id: None,
22256 idx: 0,
22257 role: MessageRole::User,
22258 author: None,
22259 created_at: Some(42),
22260 content: "retro investigation".into(),
22261 extra_json: serde_json::Value::Null,
22262 snippets: Vec::new(),
22263 }],
22264 source_id: LOCAL_SOURCE_ID.into(),
22265 origin_host: None,
22266 };
22267 storage
22268 .insert_conversation_tree(agent_id, None, &conversation)
22269 .unwrap();
22270 drop(storage);
22271 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
22272
22273 let conn = rusqlite_test_fixture_conn(&db_path);
22274 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
22275 conn.execute(
22276 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
22277 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
22278 ["CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')"],
22279 )
22280 .unwrap();
22281 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
22282 let duplicate_rows: i64 = conn
22283 .query_row(
22284 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
22285 [],
22286 |row| row.get(0),
22287 )
22288 .unwrap();
22289 assert_eq!(duplicate_rows, 2);
22290 drop(conn);
22291
22292 let inserted = rebuild_fts_via_rusqlite(&db_path).unwrap();
22293 assert_eq!(inserted, 1);
22294
22295 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22296 let schema_rows = franken_fts_schema_rows(&conn).unwrap();
22297 assert_eq!(
22298 schema_rows, 1,
22299 "DROP TABLE should leave one clean FTS schema"
22300 );
22301 let match_count: i64 = conn
22302 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
22303 row.get_typed(0)
22304 })
22305 .unwrap();
22306 assert_eq!(match_count, 1);
22307 }
22308
22309 #[test]
22314 fn ensure_agent_creates_new() {
22315 let dir = TempDir::new().unwrap();
22316 let db_path = dir.path().join("test.db");
22317 let storage = SqliteStorage::open(&db_path).unwrap();
22318
22319 let agent = Agent {
22320 id: None,
22321 slug: "test_agent".into(),
22322 name: "Test Agent".into(),
22323 version: Some("1.0".into()),
22324 kind: AgentKind::Cli,
22325 };
22326
22327 let id = storage.ensure_agent(&agent).unwrap();
22328 assert!(id > 0);
22329 }
22330
22331 #[test]
22332 fn ensure_agent_returns_existing_id() {
22333 let dir = TempDir::new().unwrap();
22334 let db_path = dir.path().join("test.db");
22335 let storage = SqliteStorage::open(&db_path).unwrap();
22336
22337 let agent = Agent {
22338 id: None,
22339 slug: "codex".into(),
22340 name: "Codex".into(),
22341 version: None,
22342 kind: AgentKind::Cli,
22343 };
22344
22345 let id1 = storage.ensure_agent(&agent).unwrap();
22346 let id2 = storage.ensure_agent(&agent).unwrap();
22347 assert_eq!(id1, id2);
22348 }
22349
22350 #[test]
22351 fn ensure_agent_unchanged_preserves_updated_at() {
22352 let dir = TempDir::new().unwrap();
22353 let db_path = dir.path().join("test.db");
22354 let storage = SqliteStorage::open(&db_path).unwrap();
22355
22356 let agent = Agent {
22357 id: None,
22358 slug: "codex".into(),
22359 name: "Codex".into(),
22360 version: Some("1.0".into()),
22361 kind: AgentKind::Cli,
22362 };
22363
22364 storage.ensure_agent(&agent).unwrap();
22365 let initial_updated_at: i64 = storage
22366 .conn
22367 .query_row_map(
22368 "SELECT updated_at FROM agents WHERE slug = ?1",
22369 fparams![agent.slug.as_str()],
22370 |row| row.get_typed(0),
22371 )
22372 .unwrap();
22373 std::thread::sleep(std::time::Duration::from_millis(5));
22374
22375 storage.ensure_agent(&agent).unwrap();
22376 let fetched_updated_at: i64 = storage
22377 .conn
22378 .query_row_map(
22379 "SELECT updated_at FROM agents WHERE slug = ?1",
22380 fparams![agent.slug.as_str()],
22381 |row| row.get_typed(0),
22382 )
22383 .unwrap();
22384
22385 assert_eq!(fetched_updated_at, initial_updated_at);
22386 }
22387
22388 #[test]
22389 fn ensure_agent_changed_metadata_updates_cached_slug() {
22390 let dir = TempDir::new().unwrap();
22391 let db_path = dir.path().join("test.db");
22392 let storage = SqliteStorage::open(&db_path).unwrap();
22393
22394 let mut agent = Agent {
22395 id: None,
22396 slug: "codex".into(),
22397 name: "Codex".into(),
22398 version: Some("1.0".into()),
22399 kind: AgentKind::Cli,
22400 };
22401
22402 let id1 = storage.ensure_agent(&agent).unwrap();
22403 agent.name = "Codex CLI".into();
22404 agent.version = Some("1.1".into());
22405 let id2 = storage.ensure_agent(&agent).unwrap();
22406
22407 let fetched: (String, Option<String>) = storage
22408 .conn
22409 .query_row_map(
22410 "SELECT name, version FROM agents WHERE slug = ?1",
22411 fparams![agent.slug.as_str()],
22412 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
22413 )
22414 .unwrap();
22415
22416 assert_eq!(id1, id2);
22417 assert_eq!(fetched, ("Codex CLI".into(), Some("1.1".into())));
22418 }
22419
22420 #[test]
22421 fn list_agents_returns_inserted() {
22422 let dir = TempDir::new().unwrap();
22423 let db_path = dir.path().join("test.db");
22424 let storage = SqliteStorage::open(&db_path).unwrap();
22425
22426 let agent = Agent {
22427 id: None,
22428 slug: "new_agent".into(),
22429 name: "New Agent".into(),
22430 version: None,
22431 kind: AgentKind::VsCode,
22432 };
22433 storage.ensure_agent(&agent).unwrap();
22434
22435 let agents = storage.list_agents().unwrap();
22436 assert!(agents.iter().any(|a| a.slug == "new_agent"));
22437 }
22438
22439 #[test]
22444 fn ensure_workspace_creates_new() {
22445 let dir = TempDir::new().unwrap();
22446 let db_path = dir.path().join("test.db");
22447 let storage = SqliteStorage::open(&db_path).unwrap();
22448
22449 let id = storage
22450 .ensure_workspace(Path::new("/home/user/project"), Some("My Project"))
22451 .unwrap();
22452 assert!(id > 0);
22453 }
22454
22455 #[test]
22456 fn ensure_workspace_returns_existing() {
22457 let dir = TempDir::new().unwrap();
22458 let db_path = dir.path().join("test.db");
22459 let storage = SqliteStorage::open(&db_path).unwrap();
22460
22461 let path = Path::new("/home/user/myproject");
22462 let id1 = storage.ensure_workspace(path, None).unwrap();
22463 let id2 = storage.ensure_workspace(path, None).unwrap();
22464 assert_eq!(id1, id2);
22465 }
22466
22467 #[test]
22468 fn ensure_workspace_changed_display_name_updates_cached_path() {
22469 let dir = TempDir::new().unwrap();
22470 let db_path = dir.path().join("test.db");
22471 let storage = SqliteStorage::open(&db_path).unwrap();
22472
22473 let path = Path::new("/home/user/myproject");
22474 let id1 = storage.ensure_workspace(path, Some("Before")).unwrap();
22475 let id2 = storage.ensure_workspace(path, Some("After")).unwrap();
22476
22477 let display_name: Option<String> = storage
22478 .conn
22479 .query_row_map(
22480 "SELECT display_name FROM workspaces WHERE path = ?1",
22481 fparams![path.to_string_lossy().as_ref()],
22482 |row| row.get_typed(0),
22483 )
22484 .unwrap();
22485
22486 assert_eq!(id1, id2);
22487 assert_eq!(display_name.as_deref(), Some("After"));
22488 }
22489
22490 #[test]
22491 fn list_workspaces_returns_inserted() {
22492 let dir = TempDir::new().unwrap();
22493 let db_path = dir.path().join("test.db");
22494 let storage = SqliteStorage::open(&db_path).unwrap();
22495
22496 storage
22497 .ensure_workspace(Path::new("/test/workspace"), Some("Test WS"))
22498 .unwrap();
22499
22500 let workspaces = storage.list_workspaces().unwrap();
22501 assert!(
22502 workspaces
22503 .iter()
22504 .any(|w| w.path.to_str() == Some("/test/workspace"))
22505 );
22506 }
22507
22508 #[test]
22513 fn upsert_source_creates_new() {
22514 let dir = TempDir::new().unwrap();
22515 let db_path = dir.path().join("test.db");
22516 let storage = SqliteStorage::open(&db_path).unwrap();
22517
22518 let source = Source {
22519 id: "test-laptop".into(),
22520 kind: SourceKind::Ssh,
22521 host_label: Some("test.local".into()),
22522 machine_id: Some("test-machine-id".into()),
22523 platform: None,
22524 config_json: None,
22525 created_at: Some(SqliteStorage::now_millis()),
22526 updated_at: None,
22527 };
22528
22529 storage.upsert_source(&source).unwrap();
22530 let fetched = storage.get_source("test-laptop").unwrap();
22531 assert!(fetched.is_some());
22532 assert_eq!(fetched.unwrap().host_label, Some("test.local".into()));
22533 }
22534
22535 #[test]
22536 fn upsert_source_updates_existing() {
22537 let dir = TempDir::new().unwrap();
22538 let db_path = dir.path().join("test.db");
22539 let storage = SqliteStorage::open(&db_path).unwrap();
22540
22541 let source1 = Source {
22542 id: "my-source".into(),
22543 kind: SourceKind::Ssh,
22544 host_label: Some("Original Label".into()),
22545 machine_id: None,
22546 platform: None,
22547 config_json: None,
22548 created_at: Some(SqliteStorage::now_millis()),
22549 updated_at: None,
22550 };
22551 storage.upsert_source(&source1).unwrap();
22552
22553 let source2 = Source {
22554 id: "my-source".into(),
22555 kind: SourceKind::Ssh,
22556 host_label: Some("Updated Label".into()),
22557 machine_id: None,
22558 platform: Some("linux".into()),
22559 config_json: None,
22560 created_at: Some(SqliteStorage::now_millis()),
22561 updated_at: Some(SqliteStorage::now_millis()),
22562 };
22563 storage.upsert_source(&source2).unwrap();
22564
22565 let fetched = storage.get_source("my-source").unwrap().unwrap();
22566 assert_eq!(fetched.host_label, Some("Updated Label".into()));
22567 assert!(fetched.platform.is_some());
22568 }
22569
22570 #[test]
22571 fn upsert_source_unchanged_preserves_updated_at() {
22572 let dir = TempDir::new().unwrap();
22573 let db_path = dir.path().join("test.db");
22574 let storage = SqliteStorage::open(&db_path).unwrap();
22575
22576 let source = Source {
22577 id: "stable-source".into(),
22578 kind: SourceKind::Ssh,
22579 host_label: Some("builder.local".into()),
22580 machine_id: None,
22581 platform: Some("linux".into()),
22582 config_json: Some(serde_json::json!({"role": "bench"})),
22583 created_at: None,
22584 updated_at: None,
22585 };
22586
22587 storage.upsert_source(&source).unwrap();
22588 let initial = storage.get_source("stable-source").unwrap().unwrap();
22589 std::thread::sleep(std::time::Duration::from_millis(5));
22590
22591 storage.upsert_source(&source).unwrap();
22592 let fetched = storage.get_source("stable-source").unwrap().unwrap();
22593
22594 assert_eq!(fetched.created_at, initial.created_at);
22595 assert_eq!(fetched.updated_at, initial.updated_at);
22596 assert_eq!(fetched.host_label, initial.host_label);
22597 assert_eq!(fetched.platform, initial.platform);
22598 assert_eq!(fetched.config_json, initial.config_json);
22599 }
22600
22601 #[test]
22602 fn ensure_source_for_conversation_recreates_remote_source_after_delete() {
22603 let dir = TempDir::new().unwrap();
22604 let db_path = dir.path().join("test.db");
22605 let storage = SqliteStorage::open(&db_path).unwrap();
22606
22607 let conversation = Conversation {
22608 id: None,
22609 agent_slug: "codex".into(),
22610 workspace: Some(PathBuf::from("/ws/cache-recreate")),
22611 external_id: Some("cache-recreate".into()),
22612 title: Some("Cache Recreate".into()),
22613 source_path: PathBuf::from("/log/cache-recreate.jsonl"),
22614 started_at: Some(1_700_000_000_000),
22615 ended_at: Some(1_700_000_000_001),
22616 approx_tokens: Some(16),
22617 metadata_json: serde_json::json!({}),
22618 messages: vec![Message {
22619 id: None,
22620 idx: 0,
22621 role: MessageRole::User,
22622 author: Some("tester".into()),
22623 created_at: Some(1_700_000_000_000),
22624 content: "cache recreate".into(),
22625 extra_json: serde_json::json!({}),
22626 snippets: Vec::new(),
22627 }],
22628 source_id: "cache-remote-source".into(),
22629 origin_host: Some("builder-cache".into()),
22630 };
22631
22632 storage
22633 .ensure_source_for_conversation(&conversation)
22634 .unwrap();
22635 assert!(storage.get_source("cache-remote-source").unwrap().is_some());
22636
22637 let deleted = storage.delete_source("cache-remote-source", false).unwrap();
22638 assert!(deleted);
22639 assert!(storage.get_source("cache-remote-source").unwrap().is_none());
22640
22641 storage
22642 .ensure_source_for_conversation(&conversation)
22643 .unwrap();
22644 let recreated = storage.get_source("cache-remote-source").unwrap();
22645 assert!(recreated.is_some());
22646 assert_eq!(
22647 recreated.unwrap().host_label.as_deref(),
22648 Some("builder-cache")
22649 );
22650 }
22651
22652 #[test]
22653 fn delete_source_removes_entry() {
22654 let dir = TempDir::new().unwrap();
22655 let db_path = dir.path().join("test.db");
22656 let storage = SqliteStorage::open(&db_path).unwrap();
22657
22658 let source = Source {
22659 id: "to-delete".into(),
22660 kind: SourceKind::Local,
22661 host_label: None,
22662 machine_id: None,
22663 platform: None,
22664 config_json: None,
22665 created_at: Some(SqliteStorage::now_millis()),
22666 updated_at: None,
22667 };
22668 storage.upsert_source(&source).unwrap();
22669
22670 let deleted = storage.delete_source("to-delete", false).unwrap();
22671 assert!(deleted);
22672
22673 let fetched = storage.get_source("to-delete").unwrap();
22674 assert!(fetched.is_none());
22675 }
22676
22677 #[test]
22678 fn delete_source_cannot_delete_local() {
22679 let dir = TempDir::new().unwrap();
22680 let db_path = dir.path().join("test.db");
22681 let storage = SqliteStorage::open(&db_path).unwrap();
22682
22683 let result = storage.delete_source(LOCAL_SOURCE_ID, false);
22684 assert!(result.is_err());
22685 }
22686
22687 #[test]
22688 fn list_sources_includes_local() {
22689 let dir = TempDir::new().unwrap();
22690 let db_path = dir.path().join("test.db");
22691 let storage = SqliteStorage::open(&db_path).unwrap();
22692
22693 let sources = storage.list_sources().unwrap();
22694 assert!(sources.iter().any(|s| s.id == LOCAL_SOURCE_ID));
22695 }
22696
22697 #[test]
22698 fn insert_conversation_tree_blank_local_source_normalizes_to_local_id() {
22699 let dir = TempDir::new().unwrap();
22700 let db_path = dir.path().join("test.db");
22701 let storage = SqliteStorage::open(&db_path).unwrap();
22702
22703 let agent_id = storage
22704 .ensure_agent(&Agent {
22705 id: None,
22706 slug: "codex".into(),
22707 name: "Codex".into(),
22708 version: None,
22709 kind: AgentKind::Cli,
22710 })
22711 .unwrap();
22712
22713 let conversation = Conversation {
22714 id: None,
22715 agent_slug: "codex".into(),
22716 workspace: None,
22717 external_id: Some("blank-local-source".into()),
22718 title: Some("Blank local source".into()),
22719 source_path: dir.path().join("blank-local.jsonl"),
22720 started_at: Some(1_700_000_000_000),
22721 ended_at: Some(1_700_000_000_001),
22722 approx_tokens: None,
22723 metadata_json: serde_json::Value::Null,
22724 messages: vec![Message {
22725 id: None,
22726 idx: 0,
22727 role: MessageRole::User,
22728 author: None,
22729 created_at: Some(1_700_000_000_000),
22730 content: "hello".into(),
22731 extra_json: serde_json::Value::Null,
22732 snippets: Vec::new(),
22733 }],
22734 source_id: " ".into(),
22735 origin_host: None,
22736 };
22737
22738 storage
22739 .insert_conversation_tree(agent_id, None, &conversation)
22740 .unwrap();
22741
22742 assert!(storage.get_source(" ").unwrap().is_none());
22743 let source = storage
22744 .get_source(LOCAL_SOURCE_ID)
22745 .unwrap()
22746 .expect("local source row should exist");
22747 assert_eq!(source.kind, SourceKind::Local);
22748 assert_eq!(source.host_label, None);
22749
22750 let conversations = storage.list_conversations(10, 0).unwrap();
22751 assert_eq!(conversations.len(), 1);
22752 assert_eq!(conversations[0].source_id, LOCAL_SOURCE_ID);
22753 assert_eq!(conversations[0].origin_host, None);
22754 }
22755
22756 #[test]
22757 fn repeated_local_inserts_do_not_touch_bootstrap_source_row() {
22758 let dir = TempDir::new().unwrap();
22759 let db_path = dir.path().join("test.db");
22760 let storage = SqliteStorage::open(&db_path).unwrap();
22761
22762 let agent_id = storage
22763 .ensure_agent(&Agent {
22764 id: None,
22765 slug: "codex".into(),
22766 name: "Codex".into(),
22767 version: None,
22768 kind: AgentKind::Cli,
22769 })
22770 .unwrap();
22771
22772 let bootstrap_updated_at: i64 = storage
22773 .conn
22774 .query_row_map(
22775 "SELECT updated_at FROM sources WHERE id = ?1",
22776 fparams![LOCAL_SOURCE_ID],
22777 |row| row.get_typed(0),
22778 )
22779 .unwrap();
22780
22781 let make_conversation = |external_id: &str, suffix: &str| Conversation {
22782 id: None,
22783 agent_slug: "codex".into(),
22784 workspace: None,
22785 external_id: Some(external_id.into()),
22786 title: Some(format!("Local source {suffix}")),
22787 source_path: dir.path().join(format!("local-{suffix}.jsonl")),
22788 started_at: Some(1_700_000_000_000),
22789 ended_at: Some(1_700_000_000_001),
22790 approx_tokens: None,
22791 metadata_json: serde_json::Value::Null,
22792 messages: vec![Message {
22793 id: None,
22794 idx: 0,
22795 role: MessageRole::User,
22796 author: None,
22797 created_at: Some(1_700_000_000_000),
22798 content: format!("hello-{suffix}"),
22799 extra_json: serde_json::Value::Null,
22800 snippets: Vec::new(),
22801 }],
22802 source_id: LOCAL_SOURCE_ID.into(),
22803 origin_host: None,
22804 };
22805
22806 std::thread::sleep(std::time::Duration::from_millis(5));
22807 storage
22808 .insert_conversation_tree(agent_id, None, &make_conversation("local-source-1", "one"))
22809 .unwrap();
22810 let after_first_insert: i64 = storage
22811 .conn
22812 .query_row_map(
22813 "SELECT updated_at FROM sources WHERE id = ?1",
22814 fparams![LOCAL_SOURCE_ID],
22815 |row| row.get_typed(0),
22816 )
22817 .unwrap();
22818
22819 std::thread::sleep(std::time::Duration::from_millis(5));
22820 storage
22821 .insert_conversation_tree(agent_id, None, &make_conversation("local-source-2", "two"))
22822 .unwrap();
22823 let after_second_insert: i64 = storage
22824 .conn
22825 .query_row_map(
22826 "SELECT updated_at FROM sources WHERE id = ?1",
22827 fparams![LOCAL_SOURCE_ID],
22828 |row| row.get_typed(0),
22829 )
22830 .unwrap();
22831
22832 assert_eq!(after_first_insert, bootstrap_updated_at);
22833 assert_eq!(after_second_insert, bootstrap_updated_at);
22834 }
22835
22836 #[test]
22837 fn insert_conversation_tree_blank_remote_source_normalizes_to_origin_host() {
22838 let dir = TempDir::new().unwrap();
22839 let db_path = dir.path().join("test.db");
22840 let storage = SqliteStorage::open(&db_path).unwrap();
22841
22842 let agent_id = storage
22843 .ensure_agent(&Agent {
22844 id: None,
22845 slug: "codex".into(),
22846 name: "Codex".into(),
22847 version: None,
22848 kind: AgentKind::Cli,
22849 })
22850 .unwrap();
22851
22852 let conversation = Conversation {
22853 id: None,
22854 agent_slug: "codex".into(),
22855 workspace: None,
22856 external_id: Some("blank-remote-source".into()),
22857 title: Some("Blank remote source".into()),
22858 source_path: dir.path().join("blank-remote.jsonl"),
22859 started_at: Some(1_700_000_000_000),
22860 ended_at: Some(1_700_000_000_001),
22861 approx_tokens: None,
22862 metadata_json: serde_json::Value::Null,
22863 messages: vec![Message {
22864 id: None,
22865 idx: 0,
22866 role: MessageRole::User,
22867 author: None,
22868 created_at: Some(1_700_000_000_000),
22869 content: "hello".into(),
22870 extra_json: serde_json::Value::Null,
22871 snippets: Vec::new(),
22872 }],
22873 source_id: " ".into(),
22874 origin_host: Some("user@work-laptop".into()),
22875 };
22876
22877 storage
22878 .insert_conversation_tree(agent_id, None, &conversation)
22879 .unwrap();
22880
22881 assert!(storage.get_source(" ").unwrap().is_none());
22882 let source = storage
22883 .get_source("user@work-laptop")
22884 .unwrap()
22885 .expect("normalized remote source row should exist");
22886 assert_eq!(source.kind, SourceKind::Ssh);
22887 assert_eq!(source.host_label.as_deref(), Some("user@work-laptop"));
22888
22889 let conversations = storage.list_conversations(10, 0).unwrap();
22890 assert_eq!(conversations.len(), 1);
22891 assert_eq!(conversations[0].source_id, "user@work-laptop");
22892 assert_eq!(
22893 conversations[0].origin_host.as_deref(),
22894 Some("user@work-laptop")
22895 );
22896 }
22897
22898 #[test]
22899 fn insert_conversations_batched_normalizes_host_only_remote_source_id() {
22900 let dir = TempDir::new().unwrap();
22901 let db_path = dir.path().join("test.db");
22902 let storage = SqliteStorage::open(&db_path).unwrap();
22903
22904 let agent_id = storage
22905 .ensure_agent(&Agent {
22906 id: None,
22907 slug: "codex".into(),
22908 name: "Codex".into(),
22909 version: None,
22910 kind: AgentKind::Cli,
22911 })
22912 .unwrap();
22913
22914 let conversation = Conversation {
22915 id: None,
22916 agent_slug: "codex".into(),
22917 workspace: None,
22918 external_id: Some("batched-blank-remote-source".into()),
22919 title: Some("Batched blank remote source".into()),
22920 source_path: dir.path().join("batched-blank-remote.jsonl"),
22921 started_at: Some(1_700_000_000_000),
22922 ended_at: Some(1_700_000_000_001),
22923 approx_tokens: None,
22924 metadata_json: serde_json::Value::Null,
22925 messages: vec![Message {
22926 id: None,
22927 idx: 0,
22928 role: MessageRole::User,
22929 author: None,
22930 created_at: Some(1_700_000_000_000),
22931 content: "hello".into(),
22932 extra_json: serde_json::Value::Null,
22933 snippets: Vec::new(),
22934 }],
22935 source_id: " ".into(),
22936 origin_host: Some("user@batch-host".into()),
22937 };
22938
22939 storage
22940 .insert_conversations_batched(&[(agent_id, None, &conversation)])
22941 .unwrap();
22942
22943 assert!(storage.get_source(" ").unwrap().is_none());
22944 let source = storage
22945 .get_source("user@batch-host")
22946 .unwrap()
22947 .expect("normalized batched remote source row should exist");
22948 assert_eq!(source.kind, SourceKind::Ssh);
22949 assert_eq!(source.host_label.as_deref(), Some("user@batch-host"));
22950
22951 let conversations = storage.list_conversations(10, 0).unwrap();
22952 assert_eq!(conversations.len(), 1);
22953 assert_eq!(conversations[0].source_id, "user@batch-host");
22954 assert_eq!(
22955 conversations[0].origin_host.as_deref(),
22956 Some("user@batch-host")
22957 );
22958 }
22959
22960 #[test]
22961 fn get_source_ids_excludes_local() {
22962 let dir = TempDir::new().unwrap();
22963 let db_path = dir.path().join("test.db");
22964 let storage = SqliteStorage::open(&db_path).unwrap();
22965
22966 let source = Source {
22968 id: "remote-1".into(),
22969 kind: SourceKind::Ssh,
22970 host_label: Some("server".into()),
22971 machine_id: None,
22972 platform: None,
22973 config_json: None,
22974 created_at: Some(SqliteStorage::now_millis()),
22975 updated_at: None,
22976 };
22977 storage.upsert_source(&source).unwrap();
22978
22979 let ids = storage.get_source_ids().unwrap();
22980 assert!(!ids.contains(&LOCAL_SOURCE_ID.to_string()));
22981 assert!(ids.contains(&"remote-1".to_string()));
22982 }
22983
22984 #[test]
22989 fn get_last_scan_ts_returns_none_initially() {
22990 let dir = TempDir::new().unwrap();
22991 let db_path = dir.path().join("test.db");
22992 let storage = SqliteStorage::open(&db_path).unwrap();
22993
22994 let ts = storage.get_last_scan_ts().unwrap();
22995 assert!(ts.is_none());
22996 }
22997
22998 #[test]
22999 fn set_and_get_last_scan_ts() {
23000 let dir = TempDir::new().unwrap();
23001 let db_path = dir.path().join("test.db");
23002 let storage = SqliteStorage::open(&db_path).unwrap();
23003
23004 let expected_ts = 1700000000000_i64;
23005 storage.set_last_scan_ts(expected_ts).unwrap();
23006
23007 let actual_ts = storage.get_last_scan_ts().unwrap();
23008 assert_eq!(actual_ts, Some(expected_ts));
23009 }
23010
23011 #[test]
23016 fn now_millis_returns_reasonable_value() {
23017 let ts = SqliteStorage::now_millis();
23018 assert!(ts > 1577836800000);
23020 assert!(ts < 4102444800000);
23022 }
23023
23024 #[test]
23029 fn msgpack_roundtrip_basic_object() {
23030 let value = serde_json::json!({
23031 "key": "value",
23032 "number": 42,
23033 "nested": { "inner": true }
23034 });
23035
23036 let bytes = serialize_json_to_msgpack(&value).expect("should serialize");
23037 let recovered = deserialize_msgpack_to_json(&bytes);
23038
23039 assert_eq!(value, recovered);
23040 }
23041
23042 #[test]
23043 fn msgpack_returns_none_for_null() {
23044 let value = serde_json::Value::Null;
23045 assert!(serialize_json_to_msgpack(&value).is_none());
23046 }
23047
23048 #[test]
23049 fn message_insert_stores_null_extra_json_as_sql_null() {
23050 let dir = TempDir::new().unwrap();
23051 let db_path = dir.path().join("test.db");
23052 let storage = SqliteStorage::open(&db_path).unwrap();
23053 let agent_id = storage
23054 .ensure_agent(&Agent {
23055 id: None,
23056 slug: "codex".into(),
23057 name: "Codex".into(),
23058 version: None,
23059 kind: AgentKind::Cli,
23060 })
23061 .unwrap();
23062 let conversation = Conversation {
23063 id: None,
23064 agent_slug: "codex".into(),
23065 workspace: None,
23066 external_id: Some("null-extra-json".into()),
23067 title: Some("Null extra_json".into()),
23068 source_path: PathBuf::from("/tmp/null-extra-json.jsonl"),
23069 started_at: Some(1_700_000_000_000),
23070 ended_at: Some(1_700_000_000_001),
23071 approx_tokens: None,
23072 metadata_json: serde_json::Value::Null,
23073 messages: vec![Message {
23074 id: None,
23075 idx: 0,
23076 role: MessageRole::User,
23077 author: None,
23078 created_at: Some(1_700_000_000_000),
23079 content: "null metadata message".into(),
23080 extra_json: serde_json::Value::Null,
23081 snippets: Vec::new(),
23082 }],
23083 source_id: LOCAL_SOURCE_ID.into(),
23084 origin_host: None,
23085 };
23086
23087 let conversation_id = storage
23088 .insert_conversation_tree(agent_id, None, &conversation)
23089 .unwrap()
23090 .conversation_id;
23091
23092 let (extra_json, extra_bin): (Option<String>, Option<Vec<u8>>) = storage
23093 .conn
23094 .query_row_map(
23095 "SELECT extra_json, extra_bin FROM messages WHERE conversation_id = ?1",
23096 fparams![conversation_id],
23097 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23098 )
23099 .unwrap();
23100 assert!(extra_json.is_none());
23101 assert!(extra_bin.is_none());
23102
23103 let stored = storage.fetch_messages(conversation_id).unwrap();
23104 assert!(stored[0].extra_json.is_null());
23105 }
23106
23107 #[test]
23108 fn message_insert_stores_nonempty_extra_json_as_msgpack_only() {
23109 let dir = TempDir::new().unwrap();
23110 let db_path = dir.path().join("test.db");
23111 let storage = SqliteStorage::open(&db_path).unwrap();
23112 let agent_id = storage
23113 .ensure_agent(&Agent {
23114 id: None,
23115 slug: "codex".into(),
23116 name: "Codex".into(),
23117 version: None,
23118 kind: AgentKind::Cli,
23119 })
23120 .unwrap();
23121 let extra_json = serde_json::json!({ "idx": 7, "kind": "profile" });
23122 let conversation = Conversation {
23123 id: None,
23124 agent_slug: "codex".into(),
23125 workspace: None,
23126 external_id: Some("msgpack-extra-json".into()),
23127 title: Some("MessagePack extra_json".into()),
23128 source_path: PathBuf::from("/tmp/msgpack-extra-json.jsonl"),
23129 started_at: Some(1_700_000_000_000),
23130 ended_at: Some(1_700_000_000_001),
23131 approx_tokens: None,
23132 metadata_json: serde_json::Value::Null,
23133 messages: vec![Message {
23134 id: None,
23135 idx: 0,
23136 role: MessageRole::User,
23137 author: None,
23138 created_at: Some(1_700_000_000_000),
23139 content: "msgpack metadata message".into(),
23140 extra_json: extra_json.clone(),
23141 snippets: Vec::new(),
23142 }],
23143 source_id: LOCAL_SOURCE_ID.into(),
23144 origin_host: None,
23145 };
23146
23147 let conversation_id = storage
23148 .insert_conversation_tree(agent_id, None, &conversation)
23149 .unwrap()
23150 .conversation_id;
23151
23152 let (extra_json_text, extra_bin): (Option<String>, Option<Vec<u8>>) = storage
23153 .conn
23154 .query_row_map(
23155 "SELECT extra_json, extra_bin FROM messages WHERE conversation_id = ?1",
23156 fparams![conversation_id],
23157 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23158 )
23159 .unwrap();
23160 assert!(extra_json_text.is_none());
23161 assert!(extra_bin.is_some());
23162
23163 let stored = storage.fetch_messages(conversation_id).unwrap();
23164 assert_eq!(stored[0].extra_json, extra_json);
23165 }
23166
23167 #[test]
23168 fn conversation_insert_preserves_null_metadata_json_as_json_null() {
23169 let dir = TempDir::new().unwrap();
23170 let db_path = dir.path().join("test.db");
23171 let storage = SqliteStorage::open(&db_path).unwrap();
23172 let agent_id = storage
23173 .ensure_agent(&Agent {
23174 id: None,
23175 slug: "codex".into(),
23176 name: "Codex".into(),
23177 version: None,
23178 kind: AgentKind::Cli,
23179 })
23180 .unwrap();
23181 let conversation = Conversation {
23182 id: None,
23183 agent_slug: "codex".into(),
23184 workspace: None,
23185 external_id: Some("null-conversation-metadata".into()),
23186 title: Some("Null conversation metadata".into()),
23187 source_path: PathBuf::from("/tmp/null-conversation-metadata.jsonl"),
23188 started_at: Some(1_700_000_000_000),
23189 ended_at: Some(1_700_000_000_001),
23190 approx_tokens: None,
23191 metadata_json: serde_json::Value::Null,
23192 messages: vec![Message {
23193 id: None,
23194 idx: 0,
23195 role: MessageRole::User,
23196 author: None,
23197 created_at: Some(1_700_000_000_000),
23198 content: "null conversation metadata message".into(),
23199 extra_json: serde_json::Value::Null,
23200 snippets: Vec::new(),
23201 }],
23202 source_id: LOCAL_SOURCE_ID.into(),
23203 origin_host: None,
23204 };
23205
23206 storage
23207 .insert_conversation_tree(agent_id, None, &conversation)
23208 .unwrap();
23209
23210 let (metadata_json, metadata_bin): (Option<String>, Option<Vec<u8>>) = storage
23211 .conn
23212 .query_row_map(
23213 "SELECT metadata_json, metadata_bin FROM conversations WHERE external_id = ?1",
23214 fparams!["null-conversation-metadata"],
23215 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23216 )
23217 .unwrap();
23218 assert_eq!(metadata_json.as_deref(), Some("null"));
23219 assert!(metadata_bin.is_none());
23220
23221 let listed = storage.list_conversations(10, 0).unwrap();
23222 assert!(listed[0].metadata_json.is_null());
23223 }
23224
23225 #[test]
23226 fn conversation_insert_stores_nonempty_metadata_as_msgpack_only() {
23227 let dir = TempDir::new().unwrap();
23228 let db_path = dir.path().join("test.db");
23229 let storage = SqliteStorage::open(&db_path).unwrap();
23230 let agent_id = storage
23231 .ensure_agent(&Agent {
23232 id: None,
23233 slug: "codex".into(),
23234 name: "Codex".into(),
23235 version: None,
23236 kind: AgentKind::Cli,
23237 })
23238 .unwrap();
23239 let metadata_json = serde_json::json!({ "bench": true, "source": "profile" });
23240 let conversation = Conversation {
23241 id: None,
23242 agent_slug: "codex".into(),
23243 workspace: None,
23244 external_id: Some("msgpack-conversation-metadata".into()),
23245 title: Some("MessagePack conversation metadata".into()),
23246 source_path: PathBuf::from("/tmp/msgpack-conversation-metadata.jsonl"),
23247 started_at: Some(1_700_000_000_000),
23248 ended_at: Some(1_700_000_000_001),
23249 approx_tokens: None,
23250 metadata_json: metadata_json.clone(),
23251 messages: vec![Message {
23252 id: None,
23253 idx: 0,
23254 role: MessageRole::User,
23255 author: None,
23256 created_at: Some(1_700_000_000_000),
23257 content: "msgpack conversation metadata message".into(),
23258 extra_json: serde_json::Value::Null,
23259 snippets: Vec::new(),
23260 }],
23261 source_id: LOCAL_SOURCE_ID.into(),
23262 origin_host: None,
23263 };
23264
23265 storage
23266 .insert_conversation_tree(agent_id, None, &conversation)
23267 .unwrap();
23268
23269 let (metadata_text, metadata_bin): (Option<String>, Option<Vec<u8>>) = storage
23270 .conn
23271 .query_row_map(
23272 "SELECT metadata_json, metadata_bin FROM conversations WHERE external_id = ?1",
23273 fparams!["msgpack-conversation-metadata"],
23274 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23275 )
23276 .unwrap();
23277 assert!(metadata_text.is_none());
23278 assert!(metadata_bin.is_some());
23279
23280 let listed = storage.list_conversations(10, 0).unwrap();
23281 assert_eq!(listed[0].metadata_json, metadata_json);
23282 }
23283
23284 #[test]
23285 fn msgpack_returns_none_for_empty_object() {
23286 let value = serde_json::json!({});
23287 assert!(serialize_json_to_msgpack(&value).is_none());
23288 }
23289
23290 #[test]
23291 fn parse_historical_json_column_preserves_large_payloads_as_raw_json() {
23292 let raw = format!("{{\"blob\":\"{}\"}}", "x".repeat(1_000_000));
23293
23294 let value = parse_historical_json_column(Some(raw.clone()));
23295
23296 assert_eq!(historical_raw_json(&value), Some(raw.as_str()));
23297 assert_eq!(json_value_size_hint(&value), raw.len());
23298 }
23299
23300 #[test]
23301 fn parse_historical_json_column_preserves_small_payloads_as_raw_json() {
23302 let raw = String::from("{\"ok\":true,\"n\":1}");
23303
23304 let value = parse_historical_json_column(Some(raw.clone()));
23305
23306 assert_eq!(historical_raw_json(&value), Some(raw.as_str()));
23307 }
23308
23309 #[test]
23310 fn msgpack_serializes_non_empty_array() {
23311 let value = serde_json::json!([1, 2, 3]);
23312 let bytes = serialize_json_to_msgpack(&value).expect("should serialize array");
23313 let recovered = deserialize_msgpack_to_json(&bytes);
23314 assert_eq!(value, recovered);
23315 }
23316
23317 #[test]
23318 fn msgpack_smaller_than_json() {
23319 let value = serde_json::json!({
23320 "field_name_one": "some_value",
23321 "field_name_two": 123456,
23322 "field_name_three": [1, 2, 3, 4, 5],
23323 "field_name_four": { "nested": true }
23324 });
23325
23326 let json_bytes = serde_json::to_vec(&value).unwrap();
23327 let msgpack_bytes = serialize_json_to_msgpack(&value).unwrap();
23328
23329 assert!(
23331 msgpack_bytes.len() < json_bytes.len(),
23332 "MessagePack ({} bytes) should be smaller than JSON ({} bytes)",
23333 msgpack_bytes.len(),
23334 json_bytes.len()
23335 );
23336 }
23337
23338 #[test]
23339 fn migration_v7_adds_binary_columns() {
23340 let dir = TempDir::new().unwrap();
23341 let db_path = dir.path().join("test.db");
23342 let storage = SqliteStorage::open(&db_path).unwrap();
23343
23344 let has_metadata_bin = storage
23346 .raw()
23347 .query("PRAGMA table_info(conversations)")
23348 .unwrap()
23349 .iter()
23350 .any(|row| row.get_typed::<String>(1).unwrap() == "metadata_bin");
23351 assert!(
23352 has_metadata_bin,
23353 "conversations should have metadata_bin column"
23354 );
23355
23356 let has_extra_bin = storage
23358 .raw()
23359 .query("PRAGMA table_info(messages)")
23360 .unwrap()
23361 .iter()
23362 .any(|row| row.get_typed::<String>(1).unwrap() == "extra_bin");
23363 assert!(has_extra_bin, "messages should have extra_bin column");
23364 }
23365
23366 #[test]
23367 fn insert_conversation_tree_rehydrates_append_tail_state_cache_after_manual_clear() {
23368 let dir = TempDir::new().unwrap();
23369 let db_path = dir.path().join("append-tail-state-cache.db");
23370 let storage = SqliteStorage::open(&db_path).unwrap();
23371 let agent_id = storage
23372 .ensure_agent(&Agent {
23373 id: None,
23374 slug: "codex".into(),
23375 name: "Codex".into(),
23376 version: None,
23377 kind: AgentKind::Cli,
23378 })
23379 .unwrap();
23380 let workspace = PathBuf::from("/ws/profiled-append-remote");
23381 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
23382
23383 let initial = make_profiled_append_remote_merge_conversation(11, 5);
23384 let insert_outcome = storage
23385 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
23386 .unwrap();
23387 let conversation_id = insert_outcome.conversation_id;
23388
23389 let initial_tail: (Option<i64>, Option<i64>, Option<i64>) = storage
23390 .raw()
23391 .query_row_map(
23392 "SELECT ended_at, last_message_idx, last_message_created_at
23393 FROM conversation_tail_state
23394 WHERE conversation_id = ?1",
23395 fparams![conversation_id],
23396 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
23397 )
23398 .unwrap();
23399 assert_eq!(initial_tail, (Some(111_005), Some(4), Some(111_004)));
23400
23401 storage
23402 .raw()
23403 .execute_compat(
23404 "UPDATE conversations SET ended_at = ?1 WHERE id = ?2",
23405 fparams![111_999_i64, conversation_id],
23406 )
23407 .unwrap();
23408 storage
23409 .raw()
23410 .execute_compat(
23411 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
23412 fparams![conversation_id],
23413 )
23414 .unwrap();
23415
23416 let appended = make_profiled_append_remote_merge_conversation(11, 10);
23417 let append_outcome = storage
23418 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
23419 .unwrap();
23420 assert_eq!(append_outcome.inserted_indices, vec![5, 6, 7, 8, 9]);
23421
23422 let final_tail: (Option<i64>, Option<i64>, Option<i64>) = storage
23423 .raw()
23424 .query_row_map(
23425 "SELECT ended_at, last_message_idx, last_message_created_at
23426 FROM conversation_tail_state
23427 WHERE conversation_id = ?1",
23428 fparams![conversation_id],
23429 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
23430 )
23431 .unwrap();
23432 assert_eq!(final_tail, (Some(111_999), Some(9), Some(111_009)));
23433 }
23434
23435 #[test]
23436 fn msgpack_deserialize_empty_returns_default() {
23437 let recovered = deserialize_msgpack_to_json(&[]);
23438 assert_eq!(recovered, serde_json::Value::Object(serde_json::Map::new()));
23439 }
23440
23441 #[test]
23442 fn msgpack_deserialize_garbage_returns_default() {
23443 let recovered = deserialize_msgpack_to_json(&[0x85]);
23446 assert_eq!(recovered, serde_json::Value::Object(serde_json::Map::new()));
23447 }
23448
23449 #[test]
23450 fn stats_aggregator_collects_and_expands() {
23451 let mut agg = StatsAggregator::new();
23452 assert!(agg.is_empty());
23453
23454 agg.record("claude", "local", 100, 5, 500);
23457 agg.record("codex", "local", 100, 3, 300);
23459 agg.record("claude", "local", 101, 2, 200);
23461
23462 assert!(!agg.is_empty());
23463 assert_eq!(agg.raw_entry_count(), 3);
23464
23465 let entries = agg.expand();
23466 assert_eq!(entries.len(), 10);
23494
23495 let day100_all = entries
23497 .iter()
23498 .find(|(d, a, s, _)| *d == 100 && a == "all" && s == "all")
23499 .unwrap();
23500 assert_eq!(day100_all.3.session_count_delta, 2);
23501 assert_eq!(day100_all.3.message_count_delta, 8);
23502 assert_eq!(day100_all.3.total_chars_delta, 800);
23503 }
23504
23505 #[test]
23510 fn lazy_franken_db_not_open_before_get() {
23511 let dir = TempDir::new().unwrap();
23512 let db_path = dir.path().join("lazy_test.db");
23513
23514 let _storage = SqliteStorage::open(&db_path).unwrap();
23516
23517 let lazy = LazyFrankenDb::new(db_path);
23518 assert!(
23519 !lazy.is_open(),
23520 "LazyFrankenDb must not open on construction"
23521 );
23522 }
23523
23524 #[test]
23525 fn lazy_franken_db_opens_on_first_get() {
23526 let dir = TempDir::new().unwrap();
23527 let db_path = dir.path().join("lazy_test.db");
23528
23529 let _storage = SqliteStorage::open(&db_path).unwrap();
23531 drop(_storage);
23532
23533 let lazy = LazyFrankenDb::new(db_path);
23534 assert!(!lazy.is_open());
23535
23536 let conn = lazy.get("test").expect("should open successfully");
23537 let count: i64 = conn
23538 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |r| {
23539 r.get_typed(0)
23540 })
23541 .unwrap();
23542 assert_eq!(count, 0);
23543 drop(conn);
23544
23545 assert!(lazy.is_open(), "LazyFrankenDb must be open after get()");
23546 }
23547
23548 #[test]
23549 fn lazy_franken_db_reuses_connection() {
23550 let dir = TempDir::new().unwrap();
23551 let db_path = dir.path().join("lazy_test.db");
23552 let _storage = SqliteStorage::open(&db_path).unwrap();
23553 drop(_storage);
23554
23555 let lazy = LazyFrankenDb::new(db_path);
23556
23557 {
23559 let conn = lazy.get("first").unwrap();
23560 conn.execute_batch("CREATE TABLE IF NOT EXISTS test_tbl (id INTEGER)")
23561 .unwrap();
23562 }
23563
23564 {
23566 let conn = lazy.get("second").unwrap();
23567 let count: i64 = conn
23568 .query_row_map("SELECT COUNT(*) FROM test_tbl", fparams![], |r| {
23569 r.get_typed(0)
23570 })
23571 .unwrap();
23572 assert_eq!(count, 0);
23573 }
23574 }
23575
23576 #[test]
23577 fn lazy_franken_db_not_found_error() {
23578 let dir = TempDir::new().unwrap();
23579 let db_path = dir.path().join("nonexistent.db");
23580
23581 let lazy = LazyFrankenDb::new(db_path);
23582 let result = lazy.get("test");
23583 assert!(result.is_err());
23584 assert!(
23585 matches!(result.unwrap_err(), LazyDbError::NotFound(_)),
23586 "should return NotFound for missing DB"
23587 );
23588 }
23589
23590 #[test]
23591 fn lazy_franken_db_path_accessor() {
23592 let path = PathBuf::from("/tmp/test_lazy.db");
23593 let lazy = LazyFrankenDb::new(path.clone());
23594 assert_eq!(lazy.path(), path.as_path());
23595 }
23596
23597 #[test]
23602 fn sql_like_match_basic_patterns() {
23603 assert!(sql_like_match("claude-opus-4-20250101", "claude-opus-4%"));
23604 assert!(sql_like_match("claude-opus-4", "claude-opus-4%"));
23605 assert!(!sql_like_match("claude-sonnet-4", "claude-opus-4%"));
23606
23607 assert!(sql_like_match("gemini-2.0-flash-001", "gemini-2%flash%"));
23609 assert!(sql_like_match("gemini-2-flash", "gemini-2%flash%"));
23610 assert!(!sql_like_match("gemini-2-pro", "gemini-2%flash%"));
23611
23612 assert!(sql_like_match("hello", "hello"));
23614 assert!(!sql_like_match("hello!", "hello"));
23615
23616 assert!(sql_like_match("gpt-4o", "gpt-4_"));
23618 assert!(!sql_like_match("gpt-4oo", "gpt-4_"));
23619
23620 assert!(sql_like_match("Claude-Opus-4", "claude-opus-4%"));
23622 }
23623
23624 #[test]
23625 fn date_str_to_day_id_converts_correctly() {
23626 assert_eq!(date_str_to_day_id("2025-10-01").unwrap(), 2100);
23628 assert_eq!(date_str_to_day_id("2024-04-01").unwrap(), 1552);
23630 assert!(date_str_to_day_id("invalid").is_err());
23631 }
23632
23633 #[test]
23634 fn pricing_table_lookup_selects_matching_entry() {
23635 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
23636 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
23637 let table = PricingTable {
23638 entries: vec![
23639 PricingEntry {
23640 model_pattern: "claude-opus-4%".into(),
23641 provider: "anthropic".into(),
23642 input_cost_per_mtok: 15.0,
23643 output_cost_per_mtok: 75.0,
23644 cache_read_cost_per_mtok: Some(1.5),
23645 cache_creation_cost_per_mtok: Some(18.75),
23646 effective_day_id: effective_day,
23647 },
23648 PricingEntry {
23649 model_pattern: "claude-sonnet-4%".into(),
23650 provider: "anthropic".into(),
23651 input_cost_per_mtok: 3.0,
23652 output_cost_per_mtok: 15.0,
23653 cache_read_cost_per_mtok: Some(0.3),
23654 cache_creation_cost_per_mtok: Some(3.75),
23655 effective_day_id: effective_day,
23656 },
23657 ],
23658 };
23659
23660 let result = table.lookup("claude-opus-4-20260101", lookup_day);
23661 assert!(result.is_some());
23662 assert_eq!(result.unwrap().input_cost_per_mtok, 15.0);
23663
23664 let result = table.lookup("claude-sonnet-4-latest", lookup_day);
23665 assert!(result.is_some());
23666 assert_eq!(result.unwrap().input_cost_per_mtok, 3.0);
23667
23668 assert!(table.lookup("unknown-model", lookup_day).is_none());
23669 }
23670
23671 #[test]
23672 fn pricing_table_lookup_respects_effective_date() {
23673 let effective_day_1 = date_str_to_day_id("2025-10-01").unwrap();
23674 let effective_day_2 = date_str_to_day_id("2026-01-01").unwrap();
23675 let table = PricingTable {
23676 entries: vec![
23677 PricingEntry {
23678 model_pattern: "claude-opus-4%".into(),
23679 provider: "anthropic".into(),
23680 input_cost_per_mtok: 15.0,
23681 output_cost_per_mtok: 75.0,
23682 cache_read_cost_per_mtok: None,
23683 cache_creation_cost_per_mtok: None,
23684 effective_day_id: effective_day_1,
23685 },
23686 PricingEntry {
23687 model_pattern: "claude-opus-4%".into(),
23688 provider: "anthropic".into(),
23689 input_cost_per_mtok: 12.0,
23690 output_cost_per_mtok: 60.0,
23691 cache_read_cost_per_mtok: None,
23692 cache_creation_cost_per_mtok: None,
23693 effective_day_id: effective_day_2,
23694 },
23695 ],
23696 };
23697
23698 let result = table.lookup("claude-opus-4", date_str_to_day_id("2025-11-01").unwrap());
23700 assert!(result.is_some());
23701 assert_eq!(result.unwrap().input_cost_per_mtok, 15.0);
23702
23703 let result = table.lookup("claude-opus-4", date_str_to_day_id("2026-02-01").unwrap());
23705 assert!(result.is_some());
23706 assert_eq!(result.unwrap().input_cost_per_mtok, 12.0);
23707
23708 assert!(
23710 table
23711 .lookup("claude-opus-4", date_str_to_day_id("2024-01-01").unwrap())
23712 .is_none()
23713 );
23714 }
23715
23716 #[test]
23717 fn pricing_table_lookup_specificity_tiebreak() {
23718 let effective_day = date_str_to_day_id("2025-01-01").unwrap();
23719 let lookup_day = date_str_to_day_id("2026-01-01").unwrap();
23720 let table = PricingTable {
23721 entries: vec![
23722 PricingEntry {
23723 model_pattern: "gpt-4%".into(),
23724 provider: "openai".into(),
23725 input_cost_per_mtok: 10.0,
23726 output_cost_per_mtok: 30.0,
23727 cache_read_cost_per_mtok: None,
23728 cache_creation_cost_per_mtok: None,
23729 effective_day_id: effective_day,
23730 },
23731 PricingEntry {
23732 model_pattern: "gpt-4-turbo%".into(),
23733 provider: "openai".into(),
23734 input_cost_per_mtok: 5.0,
23735 output_cost_per_mtok: 15.0,
23736 cache_read_cost_per_mtok: None,
23737 cache_creation_cost_per_mtok: None,
23738 effective_day_id: effective_day,
23739 },
23740 ],
23741 };
23742
23743 let result = table.lookup("gpt-4-turbo-2025", lookup_day);
23745 assert!(result.is_some());
23746 assert_eq!(result.unwrap().input_cost_per_mtok, 5.0);
23747
23748 let result = table.lookup("gpt-4o", lookup_day);
23750 assert!(result.is_some());
23751 assert_eq!(result.unwrap().input_cost_per_mtok, 10.0);
23752 }
23753
23754 #[test]
23755 fn pricing_table_compute_cost_basic() {
23756 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
23757 let table = PricingTable {
23758 entries: vec![PricingEntry {
23759 model_pattern: "claude-opus-4%".into(),
23760 provider: "anthropic".into(),
23761 input_cost_per_mtok: 15.0,
23762 output_cost_per_mtok: 75.0,
23763 cache_read_cost_per_mtok: Some(1.5),
23764 cache_creation_cost_per_mtok: Some(18.75),
23765 effective_day_id: effective_day,
23766 }],
23767 };
23768
23769 let cost = table.compute_cost(
23770 Some("claude-opus-4-latest"),
23771 date_str_to_day_id("2026-02-06").unwrap(),
23772 Some(1000),
23773 Some(500),
23774 None,
23775 None,
23776 );
23777 assert!(cost.is_some());
23778 assert!((cost.unwrap() - 0.0525).abs() < 1e-10);
23780 }
23781
23782 #[test]
23783 fn pricing_table_compute_cost_with_cache() {
23784 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
23785 let table = PricingTable {
23786 entries: vec![PricingEntry {
23787 model_pattern: "claude-opus-4%".into(),
23788 provider: "anthropic".into(),
23789 input_cost_per_mtok: 15.0,
23790 output_cost_per_mtok: 75.0,
23791 cache_read_cost_per_mtok: Some(1.5),
23792 cache_creation_cost_per_mtok: Some(18.75),
23793 effective_day_id: effective_day,
23794 }],
23795 };
23796
23797 let cost = table.compute_cost(
23798 Some("claude-opus-4-latest"),
23799 date_str_to_day_id("2026-02-06").unwrap(),
23800 Some(1_000_000),
23801 Some(100_000),
23802 Some(500_000),
23803 Some(200_000),
23804 );
23805 assert!(cost.is_some());
23806 assert!((cost.unwrap() - 16.5).abs() < 1e-10);
23812 }
23813
23814 #[test]
23815 fn pricing_table_compute_cost_returns_none_for_unknown_model() {
23816 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
23817 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
23818 let table = PricingTable {
23819 entries: vec![PricingEntry {
23820 model_pattern: "claude-opus-4%".into(),
23821 provider: "anthropic".into(),
23822 input_cost_per_mtok: 15.0,
23823 output_cost_per_mtok: 75.0,
23824 cache_read_cost_per_mtok: None,
23825 cache_creation_cost_per_mtok: None,
23826 effective_day_id: effective_day,
23827 }],
23828 };
23829
23830 assert!(
23831 table
23832 .compute_cost(
23833 Some("unknown-model"),
23834 lookup_day,
23835 Some(1000),
23836 Some(500),
23837 None,
23838 None
23839 )
23840 .is_none()
23841 );
23842 assert!(
23843 table
23844 .compute_cost(None, lookup_day, Some(1000), Some(500), None, None)
23845 .is_none()
23846 );
23847 assert!(
23848 table
23849 .compute_cost(Some("claude-opus-4"), lookup_day, None, None, None, None)
23850 .is_none()
23851 );
23852 }
23853
23854 #[test]
23855 fn pricing_table_load_from_db() {
23856 let dir = TempDir::new().unwrap();
23857 let db_path = dir.path().join("test.db");
23858 let storage = SqliteStorage::open(&db_path).unwrap();
23859
23860 let table = PricingTable::load(&storage.conn).unwrap();
23861 assert!(!table.is_empty());
23862
23863 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
23864
23865 let opus = table.lookup("claude-opus-4-latest", lookup_day);
23866 assert!(opus.is_some());
23867 assert_eq!(opus.unwrap().input_cost_per_mtok, 15.0);
23868
23869 let flash = table.lookup("gemini-2.0-flash-001", lookup_day);
23870 assert!(flash.is_some());
23871 assert_eq!(flash.unwrap().input_cost_per_mtok, 0.075);
23872 }
23873
23874 #[test]
23875 fn pricing_table_load_rejects_invalid_effective_date() {
23876 let dir = TempDir::new().unwrap();
23877 let db_path = dir.path().join("test.db");
23878 let storage = SqliteStorage::open(&db_path).unwrap();
23879
23880 storage
23881 .conn
23882 .execute_compat(
23883 "INSERT INTO model_pricing (
23884 model_pattern, provider, input_cost_per_mtok, output_cost_per_mtok,
23885 cache_read_cost_per_mtok, cache_creation_cost_per_mtok, effective_date
23886 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
23887 fparams![
23888 "broken-model%",
23889 "test",
23890 1.0_f64,
23891 2.0_f64,
23892 Option::<f64>::None,
23893 Option::<f64>::None,
23894 "not-a-date"
23895 ],
23896 )
23897 .unwrap();
23898
23899 let err = PricingTable::load(&storage.conn).unwrap_err();
23900 assert!(err.to_string().contains("invalid effective_date"));
23901 }
23902
23903 #[test]
23904 fn pricing_diagnostics_tracks_coverage() {
23905 let mut diag = PricingDiagnostics::default();
23906 diag.record_priced();
23907 diag.record_priced();
23908 diag.record_unpriced(Some("custom-model-v1"));
23909 diag.record_unpriced(Some("custom-model-v1"));
23910 diag.record_unpriced(None);
23911
23912 assert_eq!(diag.priced_count, 2);
23913 assert_eq!(diag.unpriced_count, 3);
23914 assert_eq!(diag.unknown_models.len(), 2);
23915 assert_eq!(diag.unknown_models["custom-model-v1"], 2);
23916 assert_eq!(diag.unknown_models["(none)"], 1);
23917 }
23918
23919 fn franken_storage_in_memory() -> FrankenStorage {
23929 let conn = FrankenConnection::open(":memory:").unwrap();
23930 let storage = FrankenStorage::new(conn, PathBuf::from(":memory:"));
23931 storage.run_migrations().unwrap();
23932 storage.apply_config().unwrap();
23933 storage
23934 }
23935
23936 #[test]
23937 fn franken_migrations_create_all_tables() {
23938 let storage = franken_storage_in_memory();
23939
23940 let version = storage.schema_version().unwrap();
23942 assert_eq!(
23943 version, CURRENT_SCHEMA_VERSION,
23944 "fresh FrankenStorage should be at current schema version"
23945 );
23946
23947 let rows = storage
23949 .raw()
23950 .query("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;")
23951 .unwrap();
23952 let table_names: Vec<String> = rows
23953 .iter()
23954 .filter_map(|r| r.get_typed::<String>(0).ok())
23955 .collect();
23956
23957 for required in [
23958 "meta",
23959 "agents",
23960 "workspaces",
23961 "conversations",
23962 "messages",
23963 "snippets",
23964 "tags",
23965 "conversation_tags",
23966 ] {
23967 assert!(
23968 table_names.contains(&required.to_string()),
23969 "missing table: {required}"
23970 );
23971 }
23972
23973 assert!(
23975 table_names.contains(&"sources".to_string()),
23976 "missing sources table"
23977 );
23978
23979 assert!(
23981 table_names.contains(&"daily_stats".to_string()),
23982 "missing daily_stats table"
23983 );
23984
23985 assert!(
23987 table_names.contains(&"embedding_jobs".to_string()),
23988 "missing embedding_jobs table"
23989 );
23990
23991 for analytics_table in ["message_metrics", "usage_hourly", "usage_daily"] {
23993 assert!(
23994 table_names.contains(&analytics_table.to_string()),
23995 "missing table: {analytics_table}"
23996 );
23997 }
23998 assert!(
23999 table_names.contains(&"conversation_tail_state".to_string()),
24000 "missing conversation_tail_state table"
24001 );
24002 assert!(
24003 table_names.contains(&"conversation_external_lookup".to_string()),
24004 "missing conversation_external_lookup table"
24005 );
24006 assert!(
24007 table_names.contains(&"conversation_external_tail_lookup".to_string()),
24008 "missing conversation_external_tail_lookup table"
24009 );
24010
24011 let rows = storage
24014 .raw()
24015 .query("SELECT COUNT(*) FROM _schema_migrations;")
24016 .unwrap();
24017 let count: i64 = rows.first().unwrap().get_typed(0).unwrap();
24018 assert_eq!(
24019 count,
24020 (13..=CURRENT_SCHEMA_VERSION).count() as i64,
24021 "_schema_migrations should record the V13 base schema and post-V13 migrations"
24022 );
24023
24024 let rows = storage
24026 .raw()
24027 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24028 .unwrap();
24029 let versions: Vec<i64> = rows
24030 .iter()
24031 .map(|row| row.get_typed(0))
24032 .collect::<std::result::Result<_, _>>()
24033 .unwrap();
24034 assert_eq!(
24035 versions,
24036 (13..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
24037 "_schema_migrations should contain v13 through current"
24038 );
24039 }
24040
24041 #[test]
24042 fn franken_migrations_idempotent() {
24043 let storage = franken_storage_in_memory();
24044 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24045
24046 storage.run_migrations().unwrap();
24048 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24049 }
24050
24051 #[test]
24052 fn migration_v20_backfills_conversation_external_tail_lookup() {
24053 let storage = franken_storage_in_memory();
24054 let agent_id = storage
24055 .ensure_agent(&Agent {
24056 id: None,
24057 slug: "codex".into(),
24058 name: "Codex".into(),
24059 version: None,
24060 kind: AgentKind::Cli,
24061 })
24062 .unwrap();
24063 let workspace_id = storage
24064 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
24065 .unwrap();
24066 let mut conv = make_profiled_storage_remote_conversation(1919, 2);
24067 conv.source_id = "profiled-storage-remote-source-東京".into();
24068 conv.external_id = Some("profiled-storage-remote-☃-1919".into());
24069 let outcome = storage
24070 .insert_conversation_tree(agent_id, Some(workspace_id), &conv)
24071 .unwrap();
24072 let external_id = conv.external_id.as_deref().unwrap();
24073 let lookup_key = conversation_external_lookup_key(&conv.source_id, agent_id, external_id);
24074
24075 storage
24076 .raw()
24077 .execute("DELETE FROM conversation_external_tail_lookup")
24078 .unwrap();
24079 storage
24080 .raw()
24081 .execute("DELETE FROM _schema_migrations WHERE version = 20")
24082 .unwrap();
24083 storage
24084 .raw()
24085 .execute_compat(
24086 "UPDATE meta SET value = ?1 WHERE key = 'schema_version'",
24087 fparams!["19"],
24088 )
24089 .unwrap();
24090
24091 storage.run_migrations().unwrap();
24092
24093 let backfilled: (i64, Option<i64>, Option<i64>, Option<i64>) = storage
24094 .raw()
24095 .query_row_map(
24096 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
24097 FROM conversation_external_tail_lookup
24098 WHERE lookup_key = ?1",
24099 fparams![lookup_key.as_str()],
24100 |row| {
24101 Ok((
24102 row.get_typed(0)?,
24103 row.get_typed(1)?,
24104 row.get_typed(2)?,
24105 row.get_typed(3)?,
24106 ))
24107 },
24108 )
24109 .unwrap();
24110 assert_eq!(
24111 backfilled,
24112 (
24113 outcome.conversation_id,
24114 conv.ended_at,
24115 Some(1),
24116 conv.messages[1].created_at
24117 )
24118 );
24119 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24120 }
24121
24122 #[test]
24123 fn migration_v15_creates_lazy_tail_state_cache() {
24124 let conn = FrankenConnection::open(":memory:").unwrap();
24125 conn.execute_batch(
24126 "CREATE TABLE conversations (
24127 id INTEGER PRIMARY KEY,
24128 ended_at INTEGER
24129 );
24130 CREATE TABLE messages (
24131 id INTEGER PRIMARY KEY,
24132 conversation_id INTEGER NOT NULL,
24133 idx INTEGER NOT NULL,
24134 created_at INTEGER
24135 );
24136 INSERT INTO conversations(id, ended_at) VALUES
24137 (1, 1710000000300),
24138 (2, NULL);
24139 INSERT INTO messages(id, conversation_id, idx, created_at) VALUES
24140 (10, 1, 0, 1710000000100),
24141 (11, 1, 1, 1710000000200),
24142 (12, 2, 0, 1710000000400);",
24143 )
24144 .unwrap();
24145
24146 conn.execute(
24147 "CREATE TABLE _schema_migrations (
24148 version INTEGER PRIMARY KEY,
24149 name TEXT NOT NULL,
24150 applied_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
24151 );",
24152 )
24153 .unwrap();
24154
24155 assert!(
24156 apply_conversation_tail_state_cache_migration(&conn).unwrap(),
24157 "v15 migration should apply once"
24158 );
24159 assert!(
24160 !apply_conversation_tail_state_cache_migration(&conn).unwrap(),
24161 "v15 migration should be idempotent once recorded"
24162 );
24163
24164 let columns = conn.query("PRAGMA table_info(conversations);").unwrap();
24165 let column_names: HashSet<String> = columns
24166 .iter()
24167 .map(|row| row.get_typed(1))
24168 .collect::<std::result::Result<_, frankensqlite::FrankenError>>()
24169 .unwrap();
24170 assert!(column_names.contains("last_message_idx"));
24171 assert!(column_names.contains("last_message_created_at"));
24172
24173 let tail_rows: i64 = conn
24174 .query("SELECT COUNT(*) FROM conversation_tail_state;")
24175 .unwrap()
24176 .first()
24177 .unwrap()
24178 .get_typed(0)
24179 .unwrap();
24180 assert_eq!(
24181 tail_rows, 0,
24182 "v15 should create the cache without an open-time message scan"
24183 );
24184
24185 let applied: i64 = conn
24186 .query("SELECT COUNT(*) FROM _schema_migrations WHERE version = 15;")
24187 .unwrap()
24188 .first()
24189 .unwrap()
24190 .get_typed(0)
24191 .unwrap();
24192 assert_eq!(applied, 1);
24193 }
24194
24195 #[test]
24196 fn schema_repair_adds_missing_conversations_token_columns() {
24197 let conn = FrankenConnection::open(":memory:").unwrap();
24198 conn.execute_batch(
24199 "CREATE TABLE conversations (
24200 id INTEGER PRIMARY KEY,
24201 agent_id INTEGER NOT NULL,
24202 source_path TEXT NOT NULL
24203 );",
24204 )
24205 .unwrap();
24206 let storage = FrankenStorage::new(conn, std::path::PathBuf::from(":memory:"));
24207
24208 storage.repair_missing_conversation_token_columns().unwrap();
24209 storage.repair_missing_conversation_token_columns().unwrap();
24210
24211 let columns = franken_table_column_names(&storage.conn, "conversations").unwrap();
24212 for &(column_name, _) in REQUIRED_CONVERSATION_TOKEN_COLUMNS {
24213 assert!(
24214 columns.contains(column_name),
24215 "schema repair should add conversations.{column_name}"
24216 );
24217 }
24218 }
24219
24220 #[test]
24221 fn franken_meta_schema_version_in_sync() {
24222 let storage = franken_storage_in_memory();
24223
24224 let rows = storage
24226 .raw()
24227 .query("SELECT value FROM meta WHERE key = 'schema_version';")
24228 .unwrap();
24229 let meta_version: String = rows.first().unwrap().get_typed(0).unwrap();
24230 assert_eq!(
24231 meta_version,
24232 CURRENT_SCHEMA_VERSION.to_string(),
24233 "meta.schema_version should match CURRENT_SCHEMA_VERSION"
24234 );
24235 }
24236
24237 #[test]
24238 fn franken_transition_from_meta_version() {
24239 let dir = TempDir::new().unwrap();
24240 let db_path = dir.path().join("test_transition.db");
24241
24242 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24245 conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);")
24246 .unwrap();
24247 conn.execute("INSERT INTO meta(key, value) VALUES('schema_version', '10');")
24248 .unwrap();
24249 conn.execute("CREATE TABLE conversations (id INTEGER PRIMARY KEY);")
24251 .unwrap();
24252 drop(conn);
24253
24254 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24256 transition_from_meta_version(&conn).unwrap();
24257
24258 let rows = conn
24260 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24261 .unwrap();
24262 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24263 assert_eq!(
24264 versions,
24265 (1..=10).collect::<Vec<i64>>(),
24266 "transition should backfill versions 1..=10"
24267 );
24268 }
24269
24270 #[test]
24271 fn franken_transition_from_current_meta_backfills_current_schema_marker() {
24272 let dir = TempDir::new().unwrap();
24273 let db_path = dir.path().join("test_current_transition.db");
24274
24275 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24276 conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);")
24277 .unwrap();
24278 conn.execute_compat(
24279 "INSERT INTO meta(key, value) VALUES('schema_version', ?1);",
24280 &[ParamValue::from(CURRENT_SCHEMA_VERSION.to_string())],
24281 )
24282 .unwrap();
24283 conn.execute("CREATE TABLE conversations (id INTEGER PRIMARY KEY);")
24284 .unwrap();
24285 drop(conn);
24286
24287 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24288 transition_from_meta_version(&conn).unwrap();
24289
24290 let rows = conn
24291 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24292 .unwrap();
24293 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24294 assert_eq!(
24295 versions,
24296 (1..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
24297 "current meta schema marker should backfill every known migration"
24298 );
24299 }
24300
24301 #[test]
24302 fn franken_transition_skips_when_already_done() {
24303 let dir = TempDir::new().unwrap();
24304 let db_path = dir.path().join("test_transition_skip.db");
24305
24306 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24308 conn.execute(
24309 "CREATE TABLE _schema_migrations (version INTEGER PRIMARY KEY, name TEXT NOT NULL, applied_at TEXT NOT NULL DEFAULT 'now');",
24310 ).unwrap();
24311 conn.execute("INSERT INTO _schema_migrations (version, name) VALUES (1, 'test');")
24312 .unwrap();
24313
24314 transition_from_meta_version(&conn).unwrap();
24316
24317 let rows = conn
24319 .query("SELECT COUNT(*) FROM _schema_migrations;")
24320 .unwrap();
24321 let count: i64 = rows.first().unwrap().get_typed(0).unwrap();
24322 assert_eq!(
24323 count, 1,
24324 "transition should not re-run on already-transitioned DB"
24325 );
24326 }
24327
24328 #[test]
24329 fn franken_transition_fresh_db_is_noop() {
24330 let dir = TempDir::new().unwrap();
24331 let db_path = dir.path().join("test_fresh_noop.db");
24332
24333 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24335 transition_from_meta_version(&conn).unwrap();
24336
24337 let res = conn.query("SELECT * FROM \"_schema_migrations\";");
24339 assert!(
24340 res.is_err(),
24341 "transition should not create _schema_migrations on fresh DB"
24342 );
24343 }
24344
24345 #[test]
24346 fn franken_transition_with_fts_virtual_table_succeeds() {
24347 let dir = TempDir::new().unwrap();
24348 let db_path = dir.path().join("test_transition_with_fts.db");
24349
24350 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24351 conn.execute_batch(
24352 "CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
24353 INSERT INTO meta(key, value) VALUES('schema_version', '13');
24354 CREATE TABLE conversations (id INTEGER PRIMARY KEY);
24355 CREATE VIRTUAL TABLE fts_messages USING fts5(
24356 content,
24357 title,
24358 agent,
24359 workspace,
24360 source_path,
24361 created_at,
24362 content='',
24363 tokenize='porter unicode61'
24364 );",
24365 )
24366 .unwrap();
24367 drop(conn);
24368
24369 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24370 transition_from_meta_version(&conn).unwrap();
24371
24372 let rows = conn
24373 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24374 .unwrap();
24375 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24376 assert_eq!(versions, (1..=13).collect::<Vec<i64>>());
24377 }
24378
24379 #[test]
24380 fn franken_storage_open_legacy_v13_with_fts_virtual_table_succeeds() {
24381 let dir = TempDir::new().unwrap();
24382 let db_path = dir.path().join("test_open_legacy_v13_with_fts.db");
24383
24384 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24385 conn.execute_batch(
24386 "CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
24387 INSERT INTO meta(key, value) VALUES('schema_version', '13');
24388 CREATE TABLE agents (
24389 id INTEGER PRIMARY KEY,
24390 slug TEXT NOT NULL
24391 );
24392 CREATE TABLE workspaces (
24393 id INTEGER PRIMARY KEY,
24394 path TEXT NOT NULL
24395 );
24396 CREATE TABLE sources (
24397 id TEXT PRIMARY KEY,
24398 kind TEXT NOT NULL,
24399 host_label TEXT,
24400 machine_id TEXT,
24401 platform TEXT,
24402 config_json TEXT,
24403 created_at INTEGER NOT NULL,
24404 updated_at INTEGER NOT NULL
24405 );
24406 CREATE TABLE conversations (
24407 id INTEGER PRIMARY KEY,
24408 agent_id INTEGER NOT NULL,
24409 workspace_id INTEGER,
24410 source_id TEXT NOT NULL DEFAULT 'local',
24411 external_id TEXT,
24412 title TEXT,
24413 source_path TEXT NOT NULL,
24414 started_at INTEGER,
24415 ended_at INTEGER
24416 );
24417 CREATE TABLE messages (
24418 id INTEGER PRIMARY KEY,
24419 conversation_id INTEGER NOT NULL,
24420 idx INTEGER NOT NULL,
24421 role TEXT NOT NULL,
24422 author TEXT,
24423 created_at INTEGER,
24424 content TEXT NOT NULL,
24425 extra_json TEXT,
24426 extra_bin BLOB
24427 );
24428 INSERT INTO agents(id, slug) VALUES (1, 'codex');
24429 INSERT INTO workspaces(id, path) VALUES (1, '/data/projects/coding_agent_session_search');
24430 INSERT INTO sources(id, kind, host_label, created_at, updated_at)
24431 VALUES ('local', 'local', NULL, 1710000000000, 1710000000000);
24432 INSERT INTO conversations(
24433 id,
24434 agent_id,
24435 workspace_id,
24436 source_id,
24437 external_id,
24438 title,
24439 source_path,
24440 started_at
24441 )
24442 VALUES (
24443 1,
24444 1,
24445 1,
24446 'local',
24447 'legacy-session',
24448 'legacy session',
24449 '/tmp/legacy.jsonl',
24450 1710000000000
24451 );
24452 INSERT INTO messages(id, conversation_id, idx, role, author, created_at, content)
24453 VALUES (1, 1, 0, 'user', 'tester', 1710000000000, 'legacy content');
24454 CREATE VIRTUAL TABLE fts_messages USING fts5(
24455 content,
24456 title,
24457 agent,
24458 workspace,
24459 source_path,
24460 created_at,
24461 message_id,
24462 content='',
24463 tokenize='porter unicode61'
24464 );",
24465 )
24466 .unwrap();
24467 drop(conn);
24468
24469 let storage = FrankenStorage::open(&db_path).unwrap();
24470 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24471
24472 let rows = storage
24473 .raw()
24474 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24475 .unwrap();
24476 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24477 assert_eq!(versions, (1..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>());
24478 }
24479
24480 #[test]
24481 fn franken_storage_open_repairs_duplicate_fts_messages_schema_rows() {
24482 let dir = TempDir::new().unwrap();
24483 let db_path = dir.path().join("test_open_repairs_duplicate_fts_schema.db");
24484
24485 let storage = FrankenStorage::open(&db_path).unwrap();
24486 let agent = Agent {
24487 id: None,
24488 slug: "codex".into(),
24489 name: "Codex".into(),
24490 version: None,
24491 kind: AgentKind::Cli,
24492 };
24493 let agent_id = storage.ensure_agent(&agent).unwrap();
24494 let conversation = Conversation {
24495 id: None,
24496 agent_slug: "codex".into(),
24497 workspace: Some(PathBuf::from("/tmp/workspace")),
24498 external_id: Some("dup-fts-schema".into()),
24499 title: Some("Duplicate FTS schema".into()),
24500 source_path: PathBuf::from("/tmp/dup-fts-schema.jsonl"),
24501 started_at: Some(1_700_000_000_000),
24502 ended_at: Some(1_700_000_000_100),
24503 approx_tokens: Some(42),
24504 metadata_json: serde_json::Value::Null,
24505 messages: vec![Message {
24506 id: None,
24507 idx: 0,
24508 role: MessageRole::User,
24509 author: Some("user".into()),
24510 created_at: Some(1_700_000_000_050),
24511 content: "message that should remain queryable".into(),
24512 extra_json: serde_json::Value::Null,
24513 snippets: Vec::new(),
24514 }],
24515 source_id: LOCAL_SOURCE_ID.into(),
24516 origin_host: None,
24517 };
24518 storage
24519 .insert_conversation_tree(agent_id, None, &conversation)
24520 .unwrap();
24521 drop(storage);
24522 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
24523
24524 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
24525 let conn = rusqlite_test_fixture_conn(&db_path);
24526 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
24527 conn.execute(
24528 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
24529 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
24530 [duplicate_legacy_fts_sql],
24531 )
24532 .unwrap();
24533 conn.execute(
24534 "DELETE FROM meta WHERE key = ?1",
24535 [FTS_FRANKEN_REBUILD_META_KEY],
24536 )
24537 .unwrap();
24538 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
24541
24542 let duplicate_rows: i64 = conn
24543 .query_row(
24544 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
24545 [],
24546 |row| row.get(0),
24547 )
24548 .unwrap();
24549 assert_eq!(duplicate_rows, 2);
24550 drop(conn);
24551
24552 let reopened = FrankenStorage::open(&db_path).unwrap();
24553 assert_eq!(reopened.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24554 let generation_rows: Vec<String> = reopened
24555 .raw()
24556 .query_map_collect(
24557 "SELECT value FROM meta WHERE key = ?1",
24558 fparams![FTS_FRANKEN_REBUILD_META_KEY],
24559 |row| row.get_typed(0),
24560 )
24561 .unwrap();
24562 assert_eq!(
24563 generation_rows.len(),
24564 0,
24565 "canonical open should not eagerly rewrite FTS repair metadata"
24566 );
24567 reopened.ensure_search_fallback_fts_consistency().unwrap();
24568 let repaired = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24569 assert_eq!(franken_fts_schema_rows(&repaired).unwrap(), 1);
24570
24571 let total_messages: i64 = reopened
24572 .raw()
24573 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
24574 row.get_typed(0)
24575 })
24576 .unwrap();
24577 let total_fts_rows: i64 = reopened
24578 .raw()
24579 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
24580 row.get_typed(0)
24581 })
24582 .unwrap();
24583 assert_eq!(total_fts_rows, total_messages);
24584 }
24585
24586 #[test]
24587 fn franken_storage_open_fresh_db_keeps_single_franken_fts_schema_row() {
24588 let dir = TempDir::new().unwrap();
24589 let db_path = dir.path().join("fresh-franken-storage-open.db");
24590
24591 let storage = FrankenStorage::open(&db_path).unwrap();
24592 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24593
24594 storage
24601 .ensure_search_fallback_fts_consistency()
24602 .expect("ensure FTS consistency after fresh open");
24603 drop(storage);
24604
24605 let c_reader = FrankenConnection::open(db_path.to_string_lossy().into_owned())
24606 .expect("open DB via frankensqlite for sqlite_master inspection");
24607 assert_eq!(
24608 franken_fts_schema_rows(&c_reader).unwrap(),
24609 1,
24610 "exactly one fts_messages schema row should exist after ensure_search_fallback_fts_consistency"
24611 );
24612 drop(c_reader);
24613
24614 let storage = FrankenStorage::open(&db_path).unwrap();
24615 assert!(
24616 storage
24617 .raw()
24618 .query("SELECT rowid FROM fts_messages LIMIT 1")
24619 .is_ok(),
24620 "fts_messages must be queryable through frankensqlite after open"
24621 );
24622 }
24623
24624 #[test]
24625 fn franken_storage_open_repairs_missing_analytics_tables_when_version_markers_lie() {
24626 let dir = TempDir::new().unwrap();
24627 let db_path = dir.path().join("test_repair_missing_analytics.db");
24628
24629 {
24630 let storage = FrankenStorage::open(&db_path).unwrap();
24631 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24632 }
24633
24634 {
24635 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24636 for table in &[
24637 "usage_models_daily",
24638 "usage_daily",
24639 "usage_hourly",
24640 "message_metrics",
24641 "token_daily_stats",
24642 "token_usage",
24643 "model_pricing",
24644 "embedding_jobs",
24645 "daily_stats",
24646 ] {
24647 conn.execute(&format!("DROP TABLE IF EXISTS {table}"))
24648 .unwrap();
24649 }
24650 conn.execute_compat(
24651 "UPDATE meta SET value = ?1 WHERE key = 'schema_version'",
24652 &[ParamValue::from(CURRENT_SCHEMA_VERSION.to_string())],
24653 )
24654 .unwrap();
24655 }
24656
24657 let repaired = FrankenStorage::open(&db_path).unwrap();
24658 assert_eq!(repaired.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24659
24660 let analytics_count: i64 = repaired
24661 .raw()
24662 .query_row_map(
24663 "SELECT COUNT(*) FROM sqlite_master
24664 WHERE type='table'
24665 AND name IN (
24666 'daily_stats',
24667 'embedding_jobs',
24668 'token_usage',
24669 'token_daily_stats',
24670 'model_pricing',
24671 'message_metrics',
24672 'usage_hourly',
24673 'usage_daily',
24674 'usage_models_daily'
24675 )",
24676 &[],
24677 |row| row.get_typed(0),
24678 )
24679 .unwrap();
24680 assert_eq!(
24681 analytics_count, 9,
24682 "open() should recreate the missing analytics tables even when schema_version already says current"
24683 );
24684 }
24685
24686 #[test]
24687 fn current_schema_repair_batches_cover_every_required_probe() {
24688 let missing_tables: Vec<&'static str> = REQUIRED_CURRENT_SCHEMA_TABLE_PROBES
24689 .iter()
24690 .map(|(table_name, _)| *table_name)
24691 .collect();
24692
24693 let batches = current_schema_repair_batches_for_missing_tables(&missing_tables).unwrap();
24694 let covered_tables: HashSet<&'static str> = batches
24695 .iter()
24696 .flat_map(|batch| batch.tables.iter().copied())
24697 .collect();
24698
24699 for table_name in missing_tables {
24700 assert!(
24701 covered_tables.contains(table_name),
24702 "missing repair coverage for {table_name}"
24703 );
24704 }
24705 }
24706
24707 #[test]
24708 fn current_schema_repair_batches_do_not_replay_core_schema_bootstrap() {
24709 for batch in CURRENT_SCHEMA_REPAIR_BATCHES {
24710 assert!(
24711 !batch.sql.contains("CREATE TABLE IF NOT EXISTS meta"),
24712 "repair batch {} should not recreate meta",
24713 batch.name
24714 );
24715 assert!(
24716 !batch.sql.contains("CREATE TABLE IF NOT EXISTS agents"),
24717 "repair batch {} should not recreate agents",
24718 batch.name
24719 );
24720 assert!(
24721 !batch.sql.contains("CREATE TABLE IF NOT EXISTS workspaces"),
24722 "repair batch {} should not recreate workspaces",
24723 batch.name
24724 );
24725 assert!(
24726 !batch
24727 .sql
24728 .contains("CREATE TABLE IF NOT EXISTS conversations"),
24729 "repair batch {} should not recreate conversations",
24730 batch.name
24731 );
24732 assert!(
24733 !batch.sql.contains("CREATE TABLE IF NOT EXISTS messages"),
24734 "repair batch {} should not recreate messages",
24735 batch.name
24736 );
24737 assert!(
24738 !batch.sql.contains("CREATE TABLE IF NOT EXISTS snippets"),
24739 "repair batch {} should not recreate snippets",
24740 batch.name
24741 );
24742 assert!(
24743 !batch.sql.contains("CREATE VIRTUAL TABLE fts_messages"),
24744 "repair batch {} should not recreate FTS tables",
24745 batch.name
24746 );
24747 assert!(
24748 !batch.sql.contains("DROP TABLE"),
24749 "repair batch {} should never drop tables",
24750 batch.name
24751 );
24752 }
24753 }
24754
24755 #[test]
24756 fn build_cass_migrations_applies_combined_v13() {
24757 let conn = FrankenConnection::open(":memory:").unwrap();
24758 let base_result = build_cass_migrations_before_tail_cache()
24759 .run(&conn)
24760 .unwrap();
24761 assert!(apply_conversation_tail_state_cache_migration(&conn).unwrap());
24762 let post_result = build_cass_migrations_after_tail_cache().run(&conn).unwrap();
24763
24764 assert!(base_result.was_fresh);
24765 let mut applied = base_result.applied;
24766 applied.push(15);
24767 applied.extend(post_result.applied);
24768 assert_eq!(
24769 applied,
24770 (13..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
24771 "should apply combined V13 plus additive post-V13 migrations"
24772 );
24773 let current: i64 = conn
24774 .query("SELECT MAX(version) FROM _schema_migrations;")
24775 .unwrap()
24776 .first()
24777 .unwrap()
24778 .get_typed(0)
24779 .unwrap();
24780 assert_eq!(current, CURRENT_SCHEMA_VERSION);
24781 }
24782
24783 #[test]
24784 fn franken_insert_conversations_batched_populates_analytics_rollups() {
24785 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
24786 use frankensqlite::compat::{ConnectionExt, RowExt};
24787 use std::path::PathBuf;
24788
24789 let dir = TempDir::new().unwrap();
24790 let db_path = dir.path().join("franken-index.db");
24791 let storage = FrankenStorage::open(&db_path).unwrap();
24792
24793 let agent = Agent {
24794 id: None,
24795 slug: "claude_code".into(),
24796 name: "Claude Code".into(),
24797 version: Some("1.0".into()),
24798 kind: AgentKind::Cli,
24799 };
24800 let agent_id = storage.ensure_agent(&agent).unwrap();
24801
24802 let ts_ms = 1_770_551_400_000_i64;
24803 let usage_json = serde_json::json!({
24804 "message": {
24805 "model": "claude-opus-4-6",
24806 "usage": {
24807 "input_tokens": 100,
24808 "output_tokens": 50,
24809 "cache_read_input_tokens": 25,
24810 "cache_creation_input_tokens": 10,
24811 "service_tier": "standard"
24812 }
24813 }
24814 });
24815
24816 let conv = Conversation {
24817 id: None,
24818 agent_slug: "claude_code".into(),
24819 workspace: Some(PathBuf::from("/tmp/workspace")),
24820 external_id: Some("franken-batch-upsert".into()),
24821 title: Some("Franken batch upsert".into()),
24822 source_path: PathBuf::from("/tmp/franken.jsonl"),
24823 started_at: Some(ts_ms),
24824 ended_at: Some(ts_ms + 60_000),
24825 approx_tokens: None,
24826 metadata_json: serde_json::Value::Null,
24827 messages: vec![
24828 Message {
24829 id: None,
24830 idx: 0,
24831 role: MessageRole::User,
24832 author: None,
24833 created_at: Some(ts_ms),
24834 content: "Please make a plan.".into(),
24835 extra_json: serde_json::Value::Null,
24836 snippets: vec![],
24837 },
24838 Message {
24839 id: None,
24840 idx: 1,
24841 role: MessageRole::Agent,
24842 author: None,
24843 created_at: Some(ts_ms + 30_000),
24844 content: "## Plan\n\n1. Reproduce\n2. Patch\n3. Verify".into(),
24845 extra_json: usage_json,
24846 snippets: vec![],
24847 },
24848 ],
24849 source_id: "local".into(),
24850 origin_host: None,
24851 };
24852
24853 let outcomes = storage
24854 .insert_conversations_batched(&[(agent_id, None, &conv)])
24855 .unwrap();
24856 assert_eq!(outcomes.len(), 1);
24857 assert_eq!(outcomes[0].inserted_indices, vec![0, 1]);
24858
24859 let conn = storage.raw();
24860 let daily_stats_rows: i64 = conn
24861 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
24862 row.get_typed(0)
24863 })
24864 .unwrap();
24865 let token_daily_rows: i64 = conn
24866 .query_row_map(
24867 "SELECT COUNT(*) FROM token_daily_stats",
24868 fparams![],
24869 |row| row.get_typed(0),
24870 )
24871 .unwrap();
24872 let usage_daily_rows: i64 = conn
24873 .query_row_map("SELECT COUNT(*) FROM usage_daily", fparams![], |row| {
24874 row.get_typed(0)
24875 })
24876 .unwrap();
24877 let model_daily_rows: i64 = conn
24878 .query_row_map(
24879 "SELECT COUNT(*) FROM usage_models_daily",
24880 fparams![],
24881 |row| row.get_typed(0),
24882 )
24883 .unwrap();
24884
24885 assert!(daily_stats_rows > 0, "daily_stats should be populated");
24886 assert!(
24887 token_daily_rows > 0,
24888 "token_daily_stats should be populated"
24889 );
24890 assert!(usage_daily_rows > 0, "usage_daily should be populated");
24891 assert!(
24892 model_daily_rows > 0,
24893 "usage_models_daily should be populated"
24894 );
24895 }
24896
24897 #[test]
24902 fn connection_manager_creates_readers() {
24903 let dir = TempDir::new().unwrap();
24904 let db_path = dir.path().join("cm.db");
24905
24906 let fs = FrankenStorage::open(&db_path).unwrap();
24908 drop(fs);
24909
24910 let config = ConnectionManagerConfig {
24911 reader_count: 3,
24912 max_writers: 2,
24913 };
24914 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
24915 assert_eq!(mgr.reader_count(), 3);
24916 assert_eq!(mgr.max_writers(), 2);
24917 }
24918
24919 #[test]
24920 fn connection_manager_clamps_zero_writer_limit_to_prevent_deadlock() {
24921 let dir = TempDir::new().unwrap();
24922 let db_path = dir.path().join("cm.db");
24923
24924 let fs = FrankenStorage::open(&db_path).unwrap();
24925 drop(fs);
24926
24927 let mgr = std::sync::Arc::new(
24928 FrankenConnectionManager::new(
24929 &db_path,
24930 ConnectionManagerConfig {
24931 reader_count: 0,
24932 max_writers: 0,
24933 },
24934 )
24935 .unwrap(),
24936 );
24937 assert_eq!(mgr.reader_count(), 1);
24938 assert_eq!(mgr.max_writers(), 1);
24939
24940 let (tx, rx) = std::sync::mpsc::channel();
24941 let mgr_for_thread = std::sync::Arc::clone(&mgr);
24942 std::thread::spawn(move || {
24943 let result = mgr_for_thread.writer().map(|mut guard| {
24944 guard.mark_committed();
24945 });
24946 tx.send(result.is_ok()).expect("writer result send");
24947 });
24948
24949 assert!(
24950 rx.recv_timeout(Duration::from_secs(10)).unwrap(),
24951 "writer acquisition should not block forever when configured with zero writer slots"
24952 );
24953 }
24954
24955 #[test]
24956 fn connection_manager_reader_round_robin() {
24957 let dir = TempDir::new().unwrap();
24958 let db_path = dir.path().join("cm.db");
24959
24960 let fs = FrankenStorage::open(&db_path).unwrap();
24961 drop(fs);
24962
24963 let config = ConnectionManagerConfig {
24964 reader_count: 2,
24965 max_writers: 1,
24966 };
24967 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
24968
24969 let idx_before = mgr.reader_idx.load(std::sync::atomic::Ordering::Relaxed);
24971 let _r1 = mgr.reader();
24972 let idx_after = mgr.reader_idx.load(std::sync::atomic::Ordering::Relaxed);
24973 assert_eq!(idx_after, idx_before + 1, "reader index should advance");
24974 }
24975
24976 #[test]
24977 fn connection_manager_writer_reads_and_writes() {
24978 use frankensqlite::compat::RowExt;
24979
24980 let dir = TempDir::new().unwrap();
24981 let db_path = dir.path().join("cm.db");
24982
24983 let fs = FrankenStorage::open(&db_path).unwrap();
24984 drop(fs);
24985
24986 let mgr = FrankenConnectionManager::new(&db_path, Default::default()).unwrap();
24987
24988 {
24990 let mut guard = mgr.writer().unwrap();
24991 guard
24992 .storage()
24993 .raw()
24994 .execute("CREATE TABLE IF NOT EXISTS cm_test (id INTEGER PRIMARY KEY, val TEXT)")
24995 .unwrap();
24996 guard
24997 .storage()
24998 .raw()
24999 .execute("INSERT INTO cm_test (val) VALUES ('hello')")
25000 .unwrap();
25001 guard.mark_committed();
25002 }
25003
25004 let reader_guard = mgr.reader();
25006 let rows = reader_guard.query("SELECT val FROM cm_test").unwrap();
25007 assert_eq!(rows.len(), 1);
25008 assert_eq!(rows[0].get_typed::<String>(0).unwrap(), "hello");
25009 }
25010
25011 #[test]
25012 fn connection_manager_writer_guard_drops_releases_slot() {
25013 let dir = TempDir::new().unwrap();
25014 let db_path = dir.path().join("cm.db");
25015
25016 let fs = FrankenStorage::open(&db_path).unwrap();
25017 drop(fs);
25018
25019 let config = ConnectionManagerConfig {
25020 reader_count: 1,
25021 max_writers: 1,
25022 };
25023 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
25024
25025 {
25027 let mut guard = mgr.writer().unwrap();
25028 guard.mark_committed();
25029 }
25030
25031 let mut guard2 = mgr.writer().unwrap();
25033 guard2.mark_committed();
25034 }
25035
25036 #[test]
25037 fn connection_manager_concurrent_writer_works() {
25038 use frankensqlite::compat::RowExt;
25039
25040 let dir = TempDir::new().unwrap();
25041 let db_path = dir.path().join("cm.db");
25042
25043 let fs = FrankenStorage::open(&db_path).unwrap();
25044 drop(fs);
25045
25046 let config = ConnectionManagerConfig {
25047 reader_count: 1,
25048 max_writers: 2,
25049 };
25050 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
25051
25052 {
25053 let mut guard = mgr.concurrent_writer().unwrap();
25054 guard
25055 .storage()
25056 .raw()
25057 .execute("CREATE TABLE IF NOT EXISTS cm_conc (id INTEGER PRIMARY KEY, val TEXT)")
25058 .unwrap();
25059 guard
25060 .storage()
25061 .raw()
25062 .execute("INSERT INTO cm_conc (val) VALUES ('concurrent')")
25063 .unwrap();
25064 guard.mark_committed();
25065 }
25066
25067 let reader_guard = mgr.reader();
25068 let rows = reader_guard.query("SELECT val FROM cm_conc").unwrap();
25069 assert_eq!(rows.len(), 1);
25070 assert_eq!(rows[0].get_typed::<String>(0).unwrap(), "concurrent");
25071 }
25072
25073 #[test]
25074 fn connection_manager_default_config() {
25075 let config = ConnectionManagerConfig::default();
25076 assert_eq!(config.reader_count, 4);
25077 assert!(config.max_writers > 0);
25078 }
25079
25080 #[test]
25081 fn purge_agent_archive_data_removes_only_target_agent_and_rebuilds_derived_tables() {
25082 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
25083 use std::path::PathBuf;
25084
25085 fn seed_conversation(storage: &FrankenStorage, agent_slug: &str, marker: &str) {
25086 let agent = Agent {
25087 id: None,
25088 slug: agent_slug.into(),
25089 name: agent_slug.into(),
25090 version: None,
25091 kind: AgentKind::Cli,
25092 };
25093 let agent_id = storage.ensure_agent(&agent).unwrap();
25094 let conversation = Conversation {
25095 id: None,
25096 agent_slug: agent_slug.into(),
25097 workspace: Some(PathBuf::from("/tmp/workspace")),
25098 external_id: Some(format!("{agent_slug}-{marker}")),
25099 title: Some(format!("{agent_slug} {marker}")),
25100 source_path: PathBuf::from(format!("/tmp/{agent_slug}-{marker}.jsonl")),
25101 started_at: Some(1_700_000_000_000),
25102 ended_at: Some(1_700_000_000_100),
25103 approx_tokens: None,
25104 metadata_json: serde_json::Value::Null,
25105 messages: vec![
25106 Message {
25107 id: None,
25108 idx: 0,
25109 role: MessageRole::User,
25110 author: Some("user".into()),
25111 created_at: Some(1_700_000_000_010),
25112 content: format!("{agent_slug} {marker} user"),
25113 extra_json: serde_json::Value::Null,
25114 snippets: Vec::new(),
25115 },
25116 Message {
25117 id: None,
25118 idx: 1,
25119 role: MessageRole::Agent,
25120 author: Some("assistant".into()),
25121 created_at: Some(1_700_000_000_020),
25122 content: format!("{agent_slug} {marker} assistant"),
25123 extra_json: serde_json::Value::Null,
25124 snippets: Vec::new(),
25125 },
25126 ],
25127 source_id: LOCAL_SOURCE_ID.into(),
25128 origin_host: None,
25129 };
25130 storage
25131 .insert_conversation_tree(agent_id, None, &conversation)
25132 .unwrap();
25133 }
25134
25135 let dir = TempDir::new().unwrap();
25136 let db_path = dir.path().join("agent_search.db");
25137 let storage = FrankenStorage::open(&db_path).unwrap();
25138
25139 seed_conversation(&storage, "openclaw", "purge-target");
25140 seed_conversation(&storage, "codex", "keep-target");
25141
25142 let purge = storage.purge_agent_archive_data("openclaw").unwrap();
25143 assert_eq!(purge.conversations_deleted, 1);
25144 assert_eq!(purge.messages_deleted, 2);
25145
25146 storage.rebuild_fts().unwrap();
25147 storage.rebuild_analytics().unwrap();
25148 storage.rebuild_daily_stats().unwrap();
25149 storage.rebuild_token_daily_stats().unwrap();
25150
25151 let agents = storage.list_agents().unwrap();
25152 assert_eq!(agents.len(), 1);
25153 assert_eq!(agents[0].slug, "codex");
25154 assert_eq!(storage.total_conversation_count().unwrap(), 1);
25155 assert_eq!(storage.total_message_count().unwrap(), 2);
25156
25157 let fts_rows: i64 = storage
25158 .raw()
25159 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
25160 row.get_typed(0)
25161 })
25162 .unwrap();
25163 assert_eq!(fts_rows, 2);
25164
25165 let total_daily_sessions: i64 = storage
25166 .raw()
25167 .query_row_map(
25168 "SELECT COALESCE(SUM(session_count), 0)
25169 FROM daily_stats
25170 WHERE agent_slug = 'all' AND source_id = 'all'",
25171 fparams![],
25172 |row| row.get_typed(0),
25173 )
25174 .unwrap();
25175 assert_eq!(total_daily_sessions, 1);
25176
25177 let openclaw_token_rows: i64 = storage
25178 .raw()
25179 .query_row_map(
25180 "SELECT COUNT(*) FROM token_daily_stats WHERE agent_slug = 'openclaw'",
25181 fparams![],
25182 |row| row.get_typed(0),
25183 )
25184 .unwrap();
25185 assert_eq!(openclaw_token_rows, 0);
25186 }
25187
25188 #[test]
25195 fn cleanup_orphan_fk_rows_removes_orphans_and_is_noop_on_clean_db() {
25196 let dir = TempDir::new().unwrap();
25197 let db_path = dir.path().join("orphan_fk_self_heal.db");
25198 let storage = FrankenStorage::open(&db_path).unwrap();
25199
25200 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25203
25204 storage
25207 .raw()
25208 .execute_compat(
25209 "INSERT INTO agents(id, slug, name, kind, created_at, updated_at) \
25210 VALUES(1, 'test-agent', 'Test Agent', 'cli', 0, 0)",
25211 fparams![],
25212 )
25213 .unwrap();
25214 storage
25215 .raw()
25216 .execute_compat(
25217 "INSERT INTO conversations(id, agent_id, source_id, source_path, started_at) \
25218 VALUES(1, 1, 'local', '/tmp/real.jsonl', 0)",
25219 fparams![],
25220 )
25221 .unwrap();
25222 storage
25223 .raw()
25224 .execute_compat(
25225 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25226 VALUES(1, 1, 0, 'user', 'real message')",
25227 fparams![],
25228 )
25229 .unwrap();
25230
25231 for (mid, cid, idx) in [(101_i64, 99_999_i64, 0_i64), (102, 0, 0), (103, 0, 1)] {
25235 storage
25236 .raw()
25237 .execute_compat(
25238 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25239 VALUES(?1, ?2, ?3, 'user', 'orphan message')",
25240 fparams![mid, cid, idx],
25241 )
25242 .unwrap();
25243 }
25244
25245 for message_id in [1_i64, 101_i64, 102_i64] {
25250 storage
25251 .raw()
25252 .execute_compat(
25253 "INSERT INTO message_metrics(
25254 message_id, created_at_ms, hour_id, day_id, agent_slug,
25255 role, content_chars, content_tokens_est
25256 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 13, 2)",
25257 fparams![message_id],
25258 )
25259 .unwrap();
25260 storage
25261 .raw()
25262 .execute_compat(
25263 "INSERT INTO token_usage(
25264 message_id, conversation_id, agent_id, timestamp_ms, day_id,
25265 role, content_chars
25266 ) VALUES(?1, 1, 1, 0, 0, 'user', 13)",
25267 fparams![message_id],
25268 )
25269 .unwrap();
25270 }
25271
25272 storage
25276 .raw()
25277 .execute_compat(
25278 "INSERT INTO snippets(message_id, file_path, start_line, end_line, language, snippet_text) \
25279 VALUES(99999, '/tmp/orphan-snippet.rs', 1, 2, 'rust', 'fn main() {}')",
25280 fparams![],
25281 )
25282 .unwrap();
25283
25284 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25285
25286 let messages_before: i64 = storage
25288 .raw()
25289 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25290 row.get_typed(0)
25291 })
25292 .unwrap();
25293 assert_eq!(messages_before, 4); let snippets_before: i64 = storage
25295 .raw()
25296 .query_row_map("SELECT COUNT(*) FROM snippets", fparams![], |row| {
25297 row.get_typed(0)
25298 })
25299 .unwrap();
25300 assert_eq!(snippets_before, 1);
25301 let metrics_before: i64 = storage
25302 .raw()
25303 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25304 row.get_typed(0)
25305 })
25306 .unwrap();
25307 assert_eq!(metrics_before, 3);
25308 let token_usage_before: i64 = storage
25309 .raw()
25310 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
25311 row.get_typed(0)
25312 })
25313 .unwrap();
25314 assert_eq!(token_usage_before, 3);
25315
25316 let report = storage.cleanup_orphan_fk_rows().unwrap();
25318
25319 let messages_after: i64 = storage
25324 .raw()
25325 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25326 row.get_typed(0)
25327 })
25328 .unwrap();
25329 assert_eq!(messages_after, 1, "real message must be preserved");
25330 let snippets_after: i64 = storage
25331 .raw()
25332 .query_row_map("SELECT COUNT(*) FROM snippets", fparams![], |row| {
25333 row.get_typed(0)
25334 })
25335 .unwrap();
25336 assert_eq!(snippets_after, 0);
25337 let metrics_after: i64 = storage
25338 .raw()
25339 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25340 row.get_typed(0)
25341 })
25342 .unwrap();
25343 assert_eq!(metrics_after, 1, "real message metric must be preserved");
25344 let token_usage_after: i64 = storage
25345 .raw()
25346 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
25347 row.get_typed(0)
25348 })
25349 .unwrap();
25350 assert_eq!(token_usage_after, 1, "real token row must be preserved");
25351
25352 assert_eq!(report.total, 4, "report total: {:?}", report);
25353 let messages_count = report
25354 .per_table
25355 .iter()
25356 .find(|(t, _)| *t == "messages")
25357 .map(|(_, c)| *c);
25358 assert_eq!(messages_count, Some(3));
25359 let snippets_count = report
25360 .per_table
25361 .iter()
25362 .find(|(t, _)| *t == "snippets")
25363 .map(|(_, c)| *c);
25364 assert_eq!(snippets_count, Some(1));
25365
25366 let second = storage.cleanup_orphan_fk_rows().unwrap();
25368 assert_eq!(second.total, 0);
25369 assert!(second.per_table.is_empty());
25370 }
25371
25372 #[test]
25373 fn cleanup_orphan_fk_rows_handles_more_than_one_delete_chunk() {
25374 let dir = TempDir::new().unwrap();
25375 let db_path = dir.path().join("orphan_fk_chunked_self_heal.db");
25376 let storage = FrankenStorage::open(&db_path).unwrap();
25377 let orphan_count = ORPHAN_FK_ID_CHUNK_SIZE + 3;
25378
25379 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25380 {
25381 let mut tx = storage.raw().transaction().unwrap();
25382 for idx in 0..orphan_count {
25383 let message_id = 10_000_i64 + i64::try_from(idx).unwrap();
25384 let conversation_id = 20_000_i64 + i64::try_from(idx).unwrap();
25385 tx.execute_compat(
25386 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25387 VALUES(?1, ?2, 0, 'user', 'orphan message')",
25388 fparams![message_id, conversation_id],
25389 )
25390 .unwrap();
25391 tx.execute_compat(
25392 "INSERT INTO message_metrics(
25393 message_id, created_at_ms, hour_id, day_id, agent_slug,
25394 role, content_chars, content_tokens_est
25395 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 14, 2)",
25396 fparams![message_id],
25397 )
25398 .unwrap();
25399 }
25400 tx.commit().unwrap();
25401 }
25402 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25403
25404 let report = storage.cleanup_orphan_fk_rows().unwrap();
25405
25406 assert_eq!(report.total, i64::try_from(orphan_count).unwrap());
25407 let messages_count = report
25408 .per_table
25409 .iter()
25410 .find(|(table, _)| *table == "messages")
25411 .map(|(_, count)| *count);
25412 assert_eq!(messages_count, Some(i64::try_from(orphan_count).unwrap()));
25413 let messages_after: i64 = storage
25414 .raw()
25415 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25416 row.get_typed(0)
25417 })
25418 .unwrap();
25419 assert_eq!(messages_after, 0);
25420 let metrics_after: i64 = storage
25421 .raw()
25422 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25423 row.get_typed(0)
25424 })
25425 .unwrap();
25426 assert_eq!(metrics_after, 0);
25427 }
25428
25429 #[test]
25430 fn cleanup_orphan_fk_rows_pages_direct_child_orphans() {
25431 let dir = TempDir::new().unwrap();
25432 let db_path = dir.path().join("direct_orphan_fk_paged_self_heal.db");
25433 let storage = FrankenStorage::open(&db_path).unwrap();
25434 let orphan_count = (ORPHAN_FK_ID_CHUNK_SIZE * 2) + 5;
25435
25436 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25437 {
25438 let mut tx = storage.raw().transaction().unwrap();
25439 for idx in 0..orphan_count {
25440 let message_id = 50_000_i64 + i64::try_from(idx).unwrap();
25441 tx.execute_compat(
25442 "INSERT INTO message_metrics(
25443 message_id, created_at_ms, hour_id, day_id, agent_slug,
25444 role, content_chars, content_tokens_est
25445 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 21, 3)",
25446 fparams![message_id],
25447 )
25448 .unwrap();
25449 }
25450 tx.commit().unwrap();
25451 }
25452 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25453
25454 let report = storage.cleanup_orphan_fk_rows().unwrap();
25455
25456 assert_eq!(report.total, i64::try_from(orphan_count).unwrap());
25457 let metrics_count = report
25458 .per_table
25459 .iter()
25460 .filter(|(table, _)| *table == "message_metrics")
25461 .map(|(_, count)| *count)
25462 .sum::<i64>();
25463 assert_eq!(metrics_count, i64::try_from(orphan_count).unwrap());
25464 assert_eq!(
25465 report
25466 .per_table
25467 .iter()
25468 .filter(|(table, _)| *table == "message_metrics")
25469 .count(),
25470 1,
25471 "paged cleanup should aggregate report entries by table: {report:?}"
25472 );
25473 let metrics_after: i64 = storage
25474 .raw()
25475 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25476 row.get_typed(0)
25477 })
25478 .unwrap();
25479 assert_eq!(metrics_after, 0);
25480 }
25481}